hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 26fa7f8a86940e49eca619ec0b65102337e5bb00
parent abaf8ec95392523fc7e1155fd2fdbdbff717cdb8
Author: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>
Date:   Wed,  7 Dec 2022 17:17:57 +0100

net/uri: Acknowledge the different allowed characters

- path wasn't percent-encoded
- query and fragment had allowed characters being percent-encoded

Signed-off-by: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>

Diffstat:
Mnet/uri/+test.ha | 14++++++++++++--
Mnet/uri/fmt.ha | 35+++++++++++++++++++++++++++--------
Mnet/uri/query.ha | 13+++++++++----
3 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/net/uri/+test.ha b/net/uri/+test.ha @@ -68,7 +68,17 @@ use net::ip; ... }, )!; - + test_uri_roundtrip( + "https://sr.ht/projects?search=%23risc-v&sort=longest-active#foo", + uri { + scheme = "https", + host = "sr.ht", + path = "/projects", + query = "search=%23risc-v&sort=longest-active", + fragment = "foo", + ... + }, + )!; }; @test fn invalid() void = { @@ -101,7 +111,7 @@ use net::ip; query = "objectClass?one", ... }, - "ldap://[2001:db8::7]/c=GB?objectClass%3Fone", + "ldap://[2001:db8::7]/c=GB?objectClass?one", )!; // https://bugs.chromium.org/p/chromium/issues/detail?id=841105 diff --git a/net/uri/fmt.ha b/net/uri/fmt.ha @@ -7,6 +7,21 @@ use strconv; use strings; use strio; + +// Extract from RFC3986 ABNF +// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" +// reg-name = *( unreserved / pct-encoded / sub-delims ) +// host = IP-literal / IPv4address / reg-name +// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +// query = *( pchar / "/" / "?" ) +// fragment = *( pchar / "/" / "?" ) + +def unres_host: str = "-._~!$&'()*+,;="; +def unres_query_frag: str = "-._~!$&'()*+,;=:@/?"; +def unres_path: str = "-._~!$&'()*+,;=:@/"; + // Writes a formatted [[uri]] to an [[io::handle]]. Returns the number of bytes // written. export fn fmt(out: io::handle, u: *const uri) (size | io::error) = { @@ -25,7 +40,12 @@ export fn fmt(out: io::handle, u: *const uri) (size | io::error) = { if (!slashes_w) { n += fmt::fprint(out, "//")?; }; - n += percent_encode(out, host)?; + let unres = if(u.scheme == "file") { + yield unres_path; + } else { + yield unres_host; + }; + n += percent_encode(out, host, unres)?; }; case let addr: ip::addr => if (!slashes_w) { @@ -36,14 +56,14 @@ export fn fmt(out: io::handle, u: *const uri) (size | io::error) = { if (u.port != 0) { n += fmt::fprintf(out, ":{}", u.port)?; }; - n += fmt::fprint(out, u.path)?; + n += percent_encode(out, u.path, unres_path)?; if (len(u.query) > 0) { - n += fmt::fprint(out, "?")?; - n += percent_encode(out, u.query)?; + // Always percent-encoded, see parse and encodequery/decodequery + n += fmt::fprintf(out, "?{}", u.query)?; }; if (len(u.fragment) > 0) { n += fmt::fprint(out, "#")?; - n += percent_encode(out, u.fragment)?; + n += percent_encode(out, u.fragment, unres_query_frag)?; }; return n; @@ -62,7 +82,7 @@ fn fmtaddr(out: io::handle, addr: ip::addr) (size | io::error) = { return n; }; -fn percent_encode(out: io::handle, src: str) (size | io::error) = { +fn percent_encode(out: io::handle, src: str, allowed: str) (size | io::error) = { let iter = strings::iter(src); let n = 0z; for (true) { @@ -72,8 +92,7 @@ fn percent_encode(out: io::handle, src: str) (size | io::error) = { case => break; }; - // unreserved - if (ascii::isalnum(r) || strings::contains("-._~", r)) { + if (ascii::isalnum(r) || strings::contains(allowed, r)) { n += fmt::fprint(out, r)?; } else { const en = utf8::encoderune(r); diff --git a/net/uri/query.ha b/net/uri/query.ha @@ -50,10 +50,10 @@ export fn encodequery(pairs: [](str, str)) str = { if (i > 0) strio::appendrune(&buf, '&')!; assert(len(pair.0) > 0); - percent_encode(&buf, pair.0)!; + percent_encode(&buf, pair.0, unres_query_frag)!; if (len(pair.1) > 0) { strio::appendrune(&buf, '=')!; - percent_encode(&buf, pair.1)!; + percent_encode(&buf, pair.1, unres_query_frag)!; }; }; @@ -61,7 +61,7 @@ export fn encodequery(pairs: [](str, str)) str = { }; @test fn decodequery() void = { - const u = parse("https://sr.ht/projects?search=%23risc-v&sort=longest-active")!; + const u = parse("https://sr.ht/projects?search=%23risc-v&sort=longest-active&quantity=100%25")!; defer finish(&u); const query = decodequery(u.query); @@ -73,15 +73,20 @@ export fn encodequery(pairs: [](str, str)) str = { const pair = query_next(&query)! as (str, str); assert(pair.0 == "sort"); assert(pair.1 == "longest-active"); + + const pair = query_next(&query)! as (str, str); + assert(pair.0 == "quantity"); + assert(pair.1 == "100%"); }; @test fn encodequery() void = { const pairs = [ ("search", "#risc-v"), ("sort", "longest-active"), + ("quantity", "100%") ]; const encoded = encodequery(pairs); defer free(encoded); - assert(encoded == "search=%23risc-v&sort=longest-active"); + assert(encoded == "search=%23risc-v&sort=longest-active&quantity=100%25"); };