commit 26fa7f8a86940e49eca619ec0b65102337e5bb00
parent abaf8ec95392523fc7e1155fd2fdbdbff717cdb8
Author: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>
Date: Wed, 7 Dec 2022 17:17:57 +0100
net/uri: Acknowledge the different allowed characters
- path wasn't percent-encoded
- query and fragment had allowed characters being percent-encoded
Signed-off-by: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>
Diffstat:
3 files changed, 48 insertions(+), 14 deletions(-)
diff --git a/net/uri/+test.ha b/net/uri/+test.ha
@@ -68,7 +68,17 @@ use net::ip;
...
},
)!;
-
+ test_uri_roundtrip(
+ "https://sr.ht/projects?search=%23risc-v&sort=longest-active#foo",
+ uri {
+ scheme = "https",
+ host = "sr.ht",
+ path = "/projects",
+ query = "search=%23risc-v&sort=longest-active",
+ fragment = "foo",
+ ...
+ },
+ )!;
};
@test fn invalid() void = {
@@ -101,7 +111,7 @@ use net::ip;
query = "objectClass?one",
...
},
- "ldap://[2001:db8::7]/c=GB?objectClass%3Fone",
+ "ldap://[2001:db8::7]/c=GB?objectClass?one",
)!;
// https://bugs.chromium.org/p/chromium/issues/detail?id=841105
diff --git a/net/uri/fmt.ha b/net/uri/fmt.ha
@@ -7,6 +7,21 @@ use strconv;
use strings;
use strio;
+
+// Extract from RFC3986 ABNF
+// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+// reg-name = *( unreserved / pct-encoded / sub-delims )
+// host = IP-literal / IPv4address / reg-name
+// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+// query = *( pchar / "/" / "?" )
+// fragment = *( pchar / "/" / "?" )
+
+def unres_host: str = "-._~!$&'()*+,;=";
+def unres_query_frag: str = "-._~!$&'()*+,;=:@/?";
+def unres_path: str = "-._~!$&'()*+,;=:@/";
+
// Writes a formatted [[uri]] to an [[io::handle]]. Returns the number of bytes
// written.
export fn fmt(out: io::handle, u: *const uri) (size | io::error) = {
@@ -25,7 +40,12 @@ export fn fmt(out: io::handle, u: *const uri) (size | io::error) = {
if (!slashes_w) {
n += fmt::fprint(out, "//")?;
};
- n += percent_encode(out, host)?;
+ let unres = if(u.scheme == "file") {
+ yield unres_path;
+ } else {
+ yield unres_host;
+ };
+ n += percent_encode(out, host, unres)?;
};
case let addr: ip::addr =>
if (!slashes_w) {
@@ -36,14 +56,14 @@ export fn fmt(out: io::handle, u: *const uri) (size | io::error) = {
if (u.port != 0) {
n += fmt::fprintf(out, ":{}", u.port)?;
};
- n += fmt::fprint(out, u.path)?;
+ n += percent_encode(out, u.path, unres_path)?;
if (len(u.query) > 0) {
- n += fmt::fprint(out, "?")?;
- n += percent_encode(out, u.query)?;
+ // Always percent-encoded, see parse and encodequery/decodequery
+ n += fmt::fprintf(out, "?{}", u.query)?;
};
if (len(u.fragment) > 0) {
n += fmt::fprint(out, "#")?;
- n += percent_encode(out, u.fragment)?;
+ n += percent_encode(out, u.fragment, unres_query_frag)?;
};
return n;
@@ -62,7 +82,7 @@ fn fmtaddr(out: io::handle, addr: ip::addr) (size | io::error) = {
return n;
};
-fn percent_encode(out: io::handle, src: str) (size | io::error) = {
+fn percent_encode(out: io::handle, src: str, allowed: str) (size | io::error) = {
let iter = strings::iter(src);
let n = 0z;
for (true) {
@@ -72,8 +92,7 @@ fn percent_encode(out: io::handle, src: str) (size | io::error) = {
case =>
break;
};
- // unreserved
- if (ascii::isalnum(r) || strings::contains("-._~", r)) {
+ if (ascii::isalnum(r) || strings::contains(allowed, r)) {
n += fmt::fprint(out, r)?;
} else {
const en = utf8::encoderune(r);
diff --git a/net/uri/query.ha b/net/uri/query.ha
@@ -50,10 +50,10 @@ export fn encodequery(pairs: [](str, str)) str = {
if (i > 0) strio::appendrune(&buf, '&')!;
assert(len(pair.0) > 0);
- percent_encode(&buf, pair.0)!;
+ percent_encode(&buf, pair.0, unres_query_frag)!;
if (len(pair.1) > 0) {
strio::appendrune(&buf, '=')!;
- percent_encode(&buf, pair.1)!;
+ percent_encode(&buf, pair.1, unres_query_frag)!;
};
};
@@ -61,7 +61,7 @@ export fn encodequery(pairs: [](str, str)) str = {
};
@test fn decodequery() void = {
- const u = parse("https://sr.ht/projects?search=%23risc-v&sort=longest-active")!;
+ const u = parse("https://sr.ht/projects?search=%23risc-v&sort=longest-active&quantity=100%25")!;
defer finish(&u);
const query = decodequery(u.query);
@@ -73,15 +73,20 @@ export fn encodequery(pairs: [](str, str)) str = {
const pair = query_next(&query)! as (str, str);
assert(pair.0 == "sort");
assert(pair.1 == "longest-active");
+
+ const pair = query_next(&query)! as (str, str);
+ assert(pair.0 == "quantity");
+ assert(pair.1 == "100%");
};
@test fn encodequery() void = {
const pairs = [
("search", "#risc-v"),
("sort", "longest-active"),
+ ("quantity", "100%")
];
const encoded = encodequery(pairs);
defer free(encoded);
- assert(encoded == "search=%23risc-v&sort=longest-active");
+ assert(encoded == "search=%23risc-v&sort=longest-active&quantity=100%25");
};