commit a3749ad1ff35cfcd356b9aae2682001121853390
parent 26fa7f8a86940e49eca619ec0b65102337e5bb00
Author: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>
Date: Wed, 7 Dec 2022 17:17:58 +0100
net/uri: Fix decoding multi-byte percent-data
Technically it can be non-UTF-8 but hare heavily assumes UTF-8.
Signed-off-by: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>
Diffstat:
4 files changed, 38 insertions(+), 4 deletions(-)
diff --git a/net/uri/+test.ha b/net/uri/+test.ha
@@ -79,6 +79,16 @@ use net::ip;
...
},
)!;
+ test_uri_roundtrip(
+ "https://en.wiktionary.org/wiki/%E3%81%8A%E3%81%AF%E3%82%88%E3%81%86#Japanese",
+ uri {
+ scheme = "https",
+ host = "en.wiktionary.org",
+ path = "/wiki/おはよう",
+ fragment = "Japanese",
+ ...
+ }
+ )!;
};
@test fn invalid() void = {
diff --git a/net/uri/parse.ha b/net/uri/parse.ha
@@ -2,6 +2,7 @@
// (c) 2022 Alexey Yerin <yyp@disroot.org>
// (c) 2022 Umar Getagazov <umar@handlerug.me>
use ascii;
+use encoding::utf8;
use io;
use net::ip;
use strconv;
@@ -329,6 +330,7 @@ fn percent_decode_static(out: io::handle, s: str) (void | invalid) = {
let iter = strings::iter(s);
let tmp = strio::dynamic();
defer io::close(&tmp)!;
+ let percent_data: []u8 = [];
for (true) {
match (strings::next(&iter)) {
case let r: rune =>
@@ -342,14 +344,36 @@ fn percent_decode_static(out: io::handle, s: str) (void | invalid) = {
match (strconv::stou8b(strio::string(&tmp),
strconv::base::HEX)) {
case let ord: u8 =>
- strio::appendrune(out, ord: u32: rune)!;
+ append(percent_data, ord);
case =>
return invalid;
};
} else {
+ if(len(percent_data) > 0) {
+ match(strings::fromutf8(percent_data)) {
+ case let stro: str =>
+ strio::concat(out, stro)!;
+ case utf8::invalid =>
+ return invalid;
+ };
+
+ percent_data = [];
+ };
+
strio::appendrune(out, r)!;
};
case void =>
+ if(len(percent_data) > 0) {
+ match(strings::fromutf8(percent_data)) {
+ case let stro: str =>
+ strio::concat(out, stro)!;
+ case utf8::invalid =>
+ return invalid;
+ };
+
+ percent_data = [];
+ };
+
break;
};
};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -1100,7 +1100,7 @@ net_uri() {
+test.ha
fi
gen_ssa net::uri \
- ascii ip net::ip strconv strings strio
+ ascii encoding::utf8 ip net::ip strconv strings strio
}
gensrcs_math_complex() {
diff --git a/stdlib.mk b/stdlib.mk
@@ -1755,7 +1755,7 @@ stdlib_net_uri_any_srcs = \
$(STDLIB)/net/uri/query.ha \
$(STDLIB)/net/uri/uri.ha
-$(HARECACHE)/net/uri/net_uri-any.ssa: $(stdlib_net_uri_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_ip_$(PLATFORM)) $(stdlib_net_ip_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM))
+$(HARECACHE)/net/uri/net_uri-any.ssa: $(stdlib_net_uri_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_ip_$(PLATFORM)) $(stdlib_net_ip_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(HARECACHE)/net/uri
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nnet::uri \
@@ -4010,7 +4010,7 @@ testlib_net_uri_any_srcs = \
$(STDLIB)/net/uri/uri.ha \
$(STDLIB)/net/uri/+test.ha
-$(TESTCACHE)/net/uri/net_uri-any.ssa: $(testlib_net_uri_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_ip_$(PLATFORM)) $(testlib_net_ip_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM))
+$(TESTCACHE)/net/uri/net_uri-any.ssa: $(testlib_net_uri_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_ip_$(PLATFORM)) $(testlib_net_ip_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(TESTCACHE)/net/uri
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nnet::uri \