Skip to content

Commit e7494c6

Browse files
committed
Separate path and query in URI API
1 parent 697fe92 commit e7494c6

File tree

9 files changed

+295
-182
lines changed

9 files changed

+295
-182
lines changed

src/anyp/Uri.cc

Lines changed: 129 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,19 @@ PathChars()
6969
return pathValid;
7070
}
7171

72+
/// Characters which are valid within a URI query section
73+
static const CharacterSet &
74+
QueryChars()
75+
{
76+
/*
77+
* RFC 3986 section 3.4
78+
*
79+
* query = *( pchar / "/" / "?" )
80+
*/
81+
static const auto queryValid = CharacterSet("query", "/?") + PathChars();
82+
return queryValid;
83+
}
84+
7285
/**
7386
* Governed by RFC 3986 section 2.1
7487
*/
@@ -208,6 +221,22 @@ AnyP::Uri::path() const
208221
return path_;
209222
}
210223

224+
const SBuf &
225+
AnyP::Uri::query() const
226+
{
227+
/*
228+
* RFC 3986 section 3.4:
229+
*
230+
* query = *( pchar / "/" / "?" )
231+
*/
232+
233+
static SBuf empty;
234+
if (scheme_ == AnyP::PROTO_FTP)
235+
return empty; // FTP does not have a query section
236+
237+
return query_;
238+
}
239+
211240
void
212241
urlInitialize(void)
213242
{
@@ -310,6 +339,47 @@ urlAppendDomain(char *host)
310339
return true;
311340
}
312341

342+
/// returns whether whitespace has 'chopped' the URL apart.
343+
static bool
344+
StripAnyWsp(char *buf)
345+
{
346+
if (stringHasWhitespace(buf)) {
347+
switch (Config.uri_whitespace) {
348+
349+
case URI_WHITESPACE_DENY:
350+
throw TextException("URI has whitespace", Here());
351+
352+
case URI_WHITESPACE_ALLOW:
353+
break;
354+
355+
case URI_WHITESPACE_ENCODE: {
356+
auto *t = rfc1738_escape_unescaped(buf);
357+
xstrncpy(buf, t, MAX_URL);
358+
}
359+
break;
360+
361+
case URI_WHITESPACE_CHOP:
362+
*(buf + strcspn(buf, w_space)) = '\0';
363+
return true;
364+
365+
case URI_WHITESPACE_STRIP:
366+
default: {
367+
char *t = buf;
368+
char *q = buf;
369+
while (*t) {
370+
if (!xisspace(*t)) {
371+
*q = *t;
372+
++q;
373+
}
374+
++t;
375+
}
376+
*q = '\0';
377+
}
378+
}
379+
}
380+
return false;
381+
}
382+
313383
/*
314384
* Parse a URI/URL.
315385
*
@@ -330,14 +400,14 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl)
330400
LOCAL_ARRAY(char, login, MAX_URL);
331401
LOCAL_ARRAY(char, foundHost, MAX_URL);
332402
LOCAL_ARRAY(char, urlpath, MAX_URL);
403+
LOCAL_ARRAY(char, foundQuery, MAX_URL);
333404
char *t = nullptr;
334-
char *q = nullptr;
335405
int foundPort;
336406
int l;
337407
int i;
338408
const char *src;
339409
char *dst;
340-
foundHost[0] = urlpath[0] = login[0] = '\0';
410+
foundHost[0] = urlpath[0] = foundQuery[0] = login[0] = '\0';
341411

342412
if ((l = rawUrl.length()) + Config.appendDomainLen > (MAX_URL - 1)) {
343413
debugs(23, DBG_IMPORTANT, MYNAME << "URL too large (" << l << " bytes)");
@@ -411,23 +481,56 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl)
411481
return false;
412482
*dst = '\0';
413483

414-
// We are looking at path-abempty.
415-
if (*src != '/') {
416-
// path-empty, including the end of the `src` c-string cases
417-
urlpath[0] = '/';
418-
dst = &urlpath[1];
419-
} else {
484+
bool chopped = false;
485+
if (*src == '/') {
420486
dst = urlpath;
421-
}
422-
/* Then everything from / (inclusive) until \r\n or \0 - that's urlpath */
423-
for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
424-
*dst = *src;
487+
/* Then everything from / (inclusive) until '?', '#', '\r\n' or '\0' - that's urlpath */
488+
for (; i < l && *src != '?' && *src != '#' && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
489+
*dst = *src;
490+
}
491+
*dst = '\0';
492+
chopped = StripAnyWsp(urlpath);
493+
} else {
494+
// We should be looking at path-abempty. relative-path is not supported (yet).
495+
urlpath[0] = '/';
496+
urlpath[1] = '\0';
425497
}
426498

427499
/* We -could- be at the end of the buffer here */
428500
if (i > l)
429501
return false;
430-
*dst = '\0';
502+
503+
if (*src == '?') {
504+
dst = foundQuery;
505+
++src; // skip '?' delimiter
506+
++i; // keep track of bytes 'consumed' from src
507+
/* Then everything from '?' (exclusive) until '#', '\r\n' or '\0' - that's query */
508+
for (; i < l && *src != '#' && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
509+
*dst = *src;
510+
}
511+
*dst = '\0';
512+
if (chopped)
513+
*foundQuery = '\0';
514+
else
515+
chopped = StripAnyWsp(foundQuery);
516+
}
517+
if (i > l)
518+
return false;
519+
520+
if (*src == '#') {
521+
++src; // skip '#' delimiter
522+
++i; // keep track of bytes 'consumed' from src
523+
/* Then everything from '#' (exclusive) until '\r\n' or '\0' - that's fragment */
524+
for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src) {
525+
; // fragment component is not expected in network protocols. discard for now.
526+
}
527+
}
528+
529+
if (i > l)
530+
return false;
531+
532+
if (*src != '\r' && *src != '\n' && *src != '\0')
533+
throw TextException("invalid URL", Here());
431534

432535
// If the parsed scheme has no (known) default port, and there is no
433536
// explicit port, then we will reject the zero port during foundPort
@@ -490,27 +593,20 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl)
490593
*t = '\0';
491594
++t;
492595
foundPort = atoi(t);
596+
// BUG: fails to detect non-DIGIT garbage like "http://localhost:80fubar"
493597
}
494598
}
495599

496600
for (t = foundHost; *t; ++t)
497601
*t = xtolower(*t);
498602

499-
if (stringHasWhitespace(foundHost)) {
500-
if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
501-
t = q = foundHost;
502-
while (*t) {
503-
if (!xisspace(*t)) {
504-
*q = *t;
505-
++q;
506-
}
507-
++t;
508-
}
509-
*q = '\0';
510-
}
603+
if (StripAnyWsp(foundHost)) {
604+
urlpath[0] = '/';
605+
urlpath[1] = '\0';
606+
*foundQuery = '\0';
511607
}
512608

513-
debugs(23, 3, "Split URL '" << rawUrl << "' into proto='" << scheme.image() << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "'");
609+
debugs(23, 3, "Split URL '" << rawUrl << "' into proto='" << scheme.image() << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "', query='" << foundQuery << "'");
514610

515611
if (Config.onoff.check_hostnames &&
516612
strspn(foundHost, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(foundHost)) {
@@ -536,41 +632,8 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl)
536632
return false;
537633
}
538634

539-
if (stringHasWhitespace(urlpath)) {
540-
debugs(23, 2, "URI has whitespace: {" << rawUrl << "}");
541-
542-
switch (Config.uri_whitespace) {
543-
544-
case URI_WHITESPACE_DENY:
545-
return false;
546-
547-
case URI_WHITESPACE_ALLOW:
548-
break;
549-
550-
case URI_WHITESPACE_ENCODE:
551-
t = rfc1738_escape_unescaped(urlpath);
552-
xstrncpy(urlpath, t, MAX_URL);
553-
break;
554-
555-
case URI_WHITESPACE_CHOP:
556-
*(urlpath + strcspn(urlpath, w_space)) = '\0';
557-
break;
558-
559-
case URI_WHITESPACE_STRIP:
560-
default:
561-
t = q = urlpath;
562-
while (*t) {
563-
if (!xisspace(*t)) {
564-
*q = *t;
565-
++q;
566-
}
567-
++t;
568-
}
569-
*q = '\0';
570-
}
571-
}
572-
573635
setScheme(scheme);
636+
query(SBuf(foundQuery));
574637
path(urlpath);
575638
host(foundHost);
576639
userInfo(SBuf(login));
@@ -775,8 +838,12 @@ SBuf &
775838
AnyP::Uri::absolutePath() const
776839
{
777840
if (absolutePath_.isEmpty()) {
778-
// TODO: Encode each URI subcomponent in path_ as needed.
779-
absolutePath_ = Encode(path(), PathChars());
841+
if (!path().isEmpty())
842+
absolutePath_ = Encode(path(), PathChars());
843+
if (!query().isEmpty()) {
844+
absolutePath_.append("?");
845+
absolutePath_.append(Encode(query(), QueryChars()));
846+
}
780847
}
781848

782849
return absolutePath_;

src/anyp/Uri.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ class Uri
9797
void path(const SBuf &p) {path_=p; touch();}
9898
const SBuf &path() const;
9999

100+
void query(const SBuf &p) { query_=p; touch(); }
101+
const SBuf &query() const;
102+
100103
/**
101104
* Merge a relative-path URL into the existing URI details.
102105
* Implements RFC 3986 section 5.2.3
@@ -191,8 +194,8 @@ class Uri
191194

192195
Port port_; ///< authority port subcomponent
193196

194-
// XXX: for now includes query-string.
195-
SBuf path_; ///< URI path segment
197+
SBuf path_; ///< URI path component
198+
SBuf query_; ///< URI query component
196199

197200
// pre-assembled URI forms
198201
mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port

src/cache_manager.cc

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -125,17 +125,14 @@ CacheManager::WellKnownUrlPathPrefix()
125125
Mgr::Command::Pointer
126126
CacheManager::ParseUrl(const AnyP::Uri &uri)
127127
{
128-
Parser::Tokenizer tok(uri.path());
129-
130-
Assure(tok.skip(WellKnownUrlPathPrefix()));
128+
auto upath = uri.path();
129+
Assure(upath.startsWith(WellKnownUrlPathPrefix()));
131130

132131
Mgr::Command::Pointer cmd = new Mgr::Command();
133132
cmd->params.httpUri = SBufToString(uri.absolute());
134133

135-
static const auto fieldChars = CharacterSet("mgr-field", "?#").complement();
136-
137-
SBuf action;
138-
if (!tok.prefix(action, fieldChars)) {
134+
SBuf action = upath.substr(WellKnownUrlPathPrefix().length()); // path without the prefix
135+
if (action.isEmpty()) {
139136
static const SBuf indexReport("index");
140137
action = indexReport;
141138
}
@@ -150,16 +147,9 @@ CacheManager::ParseUrl(const AnyP::Uri &uri)
150147
throw TextException(ToSBuf("action '", action, "' is ", prot), Here());
151148
cmd->profile = profile;
152149

153-
// TODO: fix when AnyP::Uri::parse() separates path?query#fragment
154-
SBuf params;
155-
if (tok.skip('?')) {
156-
params = tok.remaining();
157-
Mgr::QueryParams::Parse(tok, cmd->params.queryParams);
158-
}
159-
160-
if (!tok.skip('#') && !tok.atEnd())
161-
throw TextException("invalid characters in URL", Here());
162-
// else ignore #fragment (if any)
150+
SBuf params = uri.query();
151+
if (!params.isEmpty())
152+
Mgr::QueryParams::Parse(params, cmd->params.queryParams);
163153

164154
debugs(16, 3, "MGR request: host=" << uri.host() << ", action=" << action << ", params=" << params);
165155

src/carp.cc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -185,14 +185,11 @@ carpSelectParent(PeerSelector *ps)
185185
key.appendf(":%hu", request->url.port().value_or(0));
186186
}
187187
if (tp->options.carp_key.path) {
188-
// XXX: fix when path and query are separate
189-
key.append(request->url.absolutePath().substr(0,request->url.absolutePath().find('?'))); // 0..N
188+
key.append(request->url.path());
190189
}
191190
if (tp->options.carp_key.params) {
192-
// XXX: fix when path and query are separate
193-
SBuf::size_type pos;
194-
if ((pos=request->url.absolutePath().find('?')) != SBuf::npos)
195-
key.append(request->url.absolutePath().substr(pos)); // N..npos
191+
key.append('?');
192+
key.append(request->url.query());
196193
}
197194
}
198195
// if the url-based key is empty, e.g. because the user is

src/mgr/QueryParams.cc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,18 +109,15 @@ ParseParamValue(const SBuf &rawValue)
109109
* value = *pchar | ( 1*DIGIT *( ',' 1*DIGIT ) )
110110
*/
111111
void
112-
Mgr::QueryParams::Parse(Parser::Tokenizer &tok, QueryParams &aParams)
112+
Mgr::QueryParams::Parse(SBuf &buf, QueryParams &aParams)
113113
{
114114
static const CharacterSet nameChars = CharacterSet("param-name", "_") + CharacterSet::ALPHA + CharacterSet::DIGIT;
115115
static const CharacterSet valueChars = CharacterSet("param-value", "&= #").complement();
116116
static const CharacterSet delimChars("param-delim", "&");
117117

118-
while (!tok.atEnd()) {
118+
Parser::Tokenizer tok(buf);
119119

120-
// TODO: remove '#' processing when AnyP::Uri splits 'query#fragment' properly
121-
// #fragment handled by caller. Do not throw.
122-
if (tok.remaining()[0] == '#')
123-
return;
120+
while (!tok.atEnd()) {
124121

125122
if (tok.skipAll(delimChars))
126123
continue;

src/mgr/QueryParams.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
#include "ipc/forward.h"
1515
#include "mgr/QueryParam.h"
16-
#include "parser/forward.h"
16+
#include "sbuf/forward.h"
1717
#include "SquidString.h"
1818

1919
#include <utility>
@@ -34,7 +34,7 @@ class QueryParams
3434
void pack(Ipc::TypedMsgHdr& msg) const; ///< store params into msg
3535
void unpack(const Ipc::TypedMsgHdr& msg); ///< load params from msg
3636
/// parses the query string parameters
37-
static void Parse(Parser::Tokenizer &, QueryParams &);
37+
static void Parse(SBuf &, QueryParams &);
3838

3939
private:
4040
/// find query parameter by name

src/tests/stub_libmgr.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ void Mgr::IoAction::dump(StoreEntry*) STUB
176176
Mgr::QueryParam::Pointer Mgr::QueryParams::get(const String&) const STUB_RETVAL(Mgr::QueryParam::Pointer(nullptr))
177177
void Mgr::QueryParams::pack(Ipc::TypedMsgHdr&) const STUB
178178
void Mgr::QueryParams::unpack(const Ipc::TypedMsgHdr&) STUB
179-
void Mgr::QueryParams::Parse(Parser::Tokenizer &, QueryParams &) STUB
179+
void Mgr::QueryParams::Parse(SBuf &, QueryParams &) STUB
180180
//private:
181181
//Params::const_iterator Mgr::QueryParams::find(const String&) const STUB_RETVAL(new Mgr::Params::const_iterator(*this))
182182
Mgr::QueryParam::Pointer Mgr::QueryParams::CreateParam(QueryParam::Type) STUB_RETVAL(Mgr::QueryParam::Pointer(nullptr))

0 commit comments

Comments
 (0)