hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit a3749ad1ff35cfcd356b9aae2682001121853390
parent 26fa7f8a86940e49eca619ec0b65102337e5bb00
Author: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>
Date:   Wed,  7 Dec 2022 17:17:58 +0100

net/uri: Fix decoding multi-byte percent-data

Technically it can be non-UTF-8 but hare heavily assumes UTF-8.

Signed-off-by: Haelwenn (lanodan) Monnier <contact+sr.ht@hacktivis.me>

Diffstat:
Mnet/uri/+test.ha | 10++++++++++
Mnet/uri/parse.ha | 26+++++++++++++++++++++++++-
Mscripts/gen-stdlib | 2+-
Mstdlib.mk | 4++--
4 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/net/uri/+test.ha b/net/uri/+test.ha @@ -79,6 +79,16 @@ use net::ip; ... }, )!; + test_uri_roundtrip( + "https://en.wiktionary.org/wiki/%E3%81%8A%E3%81%AF%E3%82%88%E3%81%86#Japanese", + uri { + scheme = "https", + host = "en.wiktionary.org", + path = "/wiki/おはよう", + fragment = "Japanese", + ... + } + )!; }; @test fn invalid() void = { diff --git a/net/uri/parse.ha b/net/uri/parse.ha @@ -2,6 +2,7 @@ // (c) 2022 Alexey Yerin <yyp@disroot.org> // (c) 2022 Umar Getagazov <umar@handlerug.me> use ascii; +use encoding::utf8; use io; use net::ip; use strconv; @@ -329,6 +330,7 @@ fn percent_decode_static(out: io::handle, s: str) (void | invalid) = { let iter = strings::iter(s); let tmp = strio::dynamic(); defer io::close(&tmp)!; + let percent_data: []u8 = []; for (true) { match (strings::next(&iter)) { case let r: rune => @@ -342,14 +344,36 @@ fn percent_decode_static(out: io::handle, s: str) (void | invalid) = { match (strconv::stou8b(strio::string(&tmp), strconv::base::HEX)) { case let ord: u8 => - strio::appendrune(out, ord: u32: rune)!; + append(percent_data, ord); case => return invalid; }; } else { + if(len(percent_data) > 0) { + match(strings::fromutf8(percent_data)) { + case let stro: str => + strio::concat(out, stro)!; + case utf8::invalid => + return invalid; + }; + + percent_data = []; + }; + strio::appendrune(out, r)!; }; case void => + if(len(percent_data) > 0) { + match(strings::fromutf8(percent_data)) { + case let stro: str => + strio::concat(out, stro)!; + case utf8::invalid => + return invalid; + }; + + percent_data = []; + }; + break; }; }; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -1100,7 +1100,7 @@ net_uri() { +test.ha fi gen_ssa net::uri \ - ascii ip net::ip strconv strings strio + ascii encoding::utf8 ip net::ip strconv strings strio } gensrcs_math_complex() { diff --git a/stdlib.mk b/stdlib.mk @@ -1755,7 +1755,7 @@ stdlib_net_uri_any_srcs = \ $(STDLIB)/net/uri/query.ha \ $(STDLIB)/net/uri/uri.ha -$(HARECACHE)/net/uri/net_uri-any.ssa: $(stdlib_net_uri_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_ip_$(PLATFORM)) $(stdlib_net_ip_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) +$(HARECACHE)/net/uri/net_uri-any.ssa: $(stdlib_net_uri_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_ip_$(PLATFORM)) $(stdlib_net_ip_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(HARECACHE)/net/uri @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nnet::uri \ @@ -4010,7 +4010,7 @@ testlib_net_uri_any_srcs = \ $(STDLIB)/net/uri/uri.ha \ $(STDLIB)/net/uri/+test.ha -$(TESTCACHE)/net/uri/net_uri-any.ssa: $(testlib_net_uri_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_ip_$(PLATFORM)) $(testlib_net_ip_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) +$(TESTCACHE)/net/uri/net_uri-any.ssa: $(testlib_net_uri_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_ip_$(PLATFORM)) $(testlib_net_ip_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(TESTCACHE)/net/uri @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nnet::uri \