hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit d756778d288097017515ddb4e60f6f7c41ed96f9
parent d594cbee291e9b9032f461611c367b44d538d672
Author: Alexey Yerin <yyp@disroot.org>
Date:   Thu, 17 Mar 2022 21:44:10 +0300

Implement net::uri

Implements: https://todo.sr.ht/~sircmpwn/hare/329
Signed-off-by: Alexey Yerin <yyp@disroot.org>

Diffstat:
Anet/uri/+test.ha | 121+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anet/uri/README | 2++
Anet/uri/fmt.ha | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anet/uri/parse.ha | 375+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anet/uri/uri.ha | 28++++++++++++++++++++++++++++
Mscripts/gen-stdlib | 22++++++++++++++++++++++
Mstdlib.mk | 37+++++++++++++++++++++++++++++++++++++
7 files changed, 665 insertions(+), 0 deletions(-)

diff --git a/net/uri/+test.ha b/net/uri/+test.ha @@ -0,0 +1,121 @@ +use net::ip; + +@test fn parse() void = { + test_uri( + "http://harelang.org/", + uri { + scheme = "http", + host = "harelang.org", + path = "/", + ... + }, + )!; + test_uri( + "irc+insecure://chat.sr.ht:6667", + uri { + scheme = "irc+insecure", + host = "chat.sr.ht", + port = 6667, + ... + }, + )!; + test_uri( + "ldap://13.37.73.31:1234/", + uri { + scheme = "ldap", + host = [13, 37, 73, 31]: ip::addr4, + port = 1234, + path = "/", + ... + }, + )!; + test_uri( + "http://[::1]/test", + uri { + scheme = "http", + host = ip::parse("::1")!, + path = "/test", + ... + }, + )!; + + // Test percent decoding in various places + test_uri( + "https://git%2esr.ht/~sircmpw%6e/hare#Build%20status", + uri { + scheme = "https", + host = "git.sr.ht", + path = "/~sircmpwn/hare", + fragment = "Build status", + ... + }, + )!; + + // IPv6 + test_uri( + "ldap://[2001:db8::7]/c=GB?objectClass?one", + uri { + scheme = "ldap", + host = ip::parse("2001:db8::7")!, + path = "/c=GB", + query = "objectClass?one", + ... + }, + )!; + + // Some non-URL variants like mailto: or URN + test_uri( + "urn:example:animal:ferret:nose", + uri { + scheme = "urn", + host = "", + path = "example:animal:ferret:nose", + ... + }, + )!; + test_uri( + "mailto:~sircmpwn/hare-dev@lists.sr.ht", + uri { + scheme = "mailto", + host = "", + path = "~sircmpwn/hare-dev@lists.sr.ht", + ... + }, + )!; + + // https://bugs.chromium.org/p/chromium/issues/detail?id=841105 + test_uri( + "https://web-safety.net/..;@www.google.com:%3443", + uri { + scheme = "https", + host = "web-safety.net", + path = "/..;@www.google.com:443", + ... + }, + )!; +}; + +@test fn invalid() void = { + // Scheme + assert(parse(":") is invalid); + assert(parse("hello*:") is invalid); + assert(parse("hello") is invalid); +}; + +fn test_uri(in: str, expected: uri) (void | invalid) = { + const u = parse(in)?; + defer finish(&u); + + assert(u.scheme == expected.scheme); + match (u.host) { + case let s: str => + assert(s == expected.host as str); + case let i: ip::addr => + assert(ip::equal(i, expected.host as ip::addr)); + }; + assert(u.port == expected.port); + assert(u.userinfo == expected.userinfo); + assert(u.path == expected.path); + assert(u.query == expected.query); + assert(u.fragment == expected.fragment); +}; diff --git a/net/uri/README b/net/uri/README @@ -0,0 +1,2 @@ +The net::uri module provides utilities for parsing Uniform Resource Identifiers +(RFC 3986). diff --git a/net/uri/fmt.ha b/net/uri/fmt.ha @@ -0,0 +1,80 @@ +use ascii; +use encoding::utf8; +use fmt; +use io; +use net::ip; +use strconv; +use strings; +use strio; + +// Writes a formatted [[uri]] to an [[io::handle]]. Returns the number of bytes +// written. +export fn fmt(out: io::handle, u: *const uri) (size | io::error) = { + let n = 0z; + let slashes_w = false; + n += fmt::fprintf(out, "{}:", u.scheme)?; + if (len(u.userinfo) > 0) { + assert(!(u.host is str) || len(u.host as str) > 0); + n += fmt::fprintf(out, "//{}@", u.userinfo)?; + slashes_w = true; + }; + match (u.host) { + case let host: str => + if (len(host) > 0) { + if (!slashes_w) { + n += fmt::fprint(out, "//")?; + }; + n += percent_encode(out, host)?; + }; + case let addr: ip::addr => + if (!slashes_w) { + n += fmt::fprint(out, "//")?; + }; + n += ip::fmt(out, addr)?; + }; + if (u.port != 0) { + n += fmt::fprintf(out, ":{}")?; + }; + n += fmt::fprint(out, u.path)?; + if (len(u.query) > 0) { + n += percent_encode(out, u.query)?; + }; + if (len(u.fragment) > 0) { + n += fmt::fprint(out, "#")?; + n += percent_encode(out, u.fragment)?; + }; + + return n; +}; + +fn percent_encode(out: io::handle, src: str) (size | io::error) = { + let iter = strings::iter(src); + let n = 0z; + for (true) { + const r = match (strings::next(&iter)) { + case let r: rune => + yield r; + case => + break; + }; + // unreserved + if (ascii::isalnum(r) || strings::contains("-._~", r)) { + n += fmt::fprint(out, r)?; + } else { + const en = utf8::encoderune(r); + for (let i = 0z; i < len(en); i += 1) { + n += fmt::fprintf(out, "%{}", + strconv::u8tosb(en[i], + strconv::base::HEX))?; + }; + }; + }; + return n; +}; + +// Formats a [[uri]] into a string. The result must be freed by the caller. +export fn string(u: *const uri) str = { + const st = strio::dynamic(); + fmt(&st, u)!; + return strio::string(&st); +}; diff --git a/net/uri/parse.ha b/net/uri/parse.ha @@ -0,0 +1,375 @@ +use ascii; +use io; +use net::ip; +use strconv; +use strings; +use strio; + +// The URI provided to [[parse]] is invalid. +export type invalid = !void; + +// Parses a URI string into [[uri]] structure. The return value must be freed +// using [[uri_finish]]. +export fn parse(in: str) (uri | invalid) = { + let in = strings::iter(in); + + const scheme = parse_scheme(&in)?; + + // Determine hier-part variant + let path = ""; + let authority = ("", 0u16, ""); + match (strings::next(&in)) { + case let r: rune => + switch (r) { + case '/' => + // Either "//"+authority+path-abempty or path-absolute + switch (wantrune(&in)?) { + case '/' => + // "//" + authority + path-abempty + authority = parse_authority(&in)?; + match (strings::next(&in)) { + case let r: rune => + switch (r) { + case '?', '#' => + // path-empty + strings::prev(&in); + case '/' => + // path-absolute + strings::prev(&in); + path = parse_path(&in, + path_mode::ABSOLUTE)?; + case => + return invalid; + }; + case => void; // path-empty + }; + case => + // path-absolute + strings::prev(&in); + path = parse_path(&in, path_mode::ABSOLUTE)?; + }; + case => + // path-rootless + strings::prev(&in); + path = parse_path(&in, path_mode::ROOTLESS)?; + }; + case => void; // path-empty + }; + + let query = ""; + match (strings::next(&in)) { + case let r: rune => + if (r == '?') { + query = parse_query(&in)?; + } else { + strings::prev(&in); + }; + case => void; + }; + + let fragment = ""; + match (strings::next(&in)) { + case let r: rune => + if (r == '#') { + fragment = parse_fragment(&in)?; + } else { + strings::prev(&in); + }; + case => void; + }; + + return uri { + scheme = scheme, + + host = match (ip::parse(authority.0)) { + case let a: ip::addr => + yield a; + case ip::invalid => + yield authority.0; + }, + port = authority.1, + userinfo = authority.2, + + path = path, + query = query, + fragment = fragment, + }; +}; + +fn parse_scheme(in: *strings::iterator) (str | invalid) = { + let buf = strio::dynamic(); + + for (let i = 0z; true; i += 1) { + const r = wantrune(in)?; + if (i > 0 && r == ':') { + break; + }; + if (i == 0) { + if (!ascii::isalpha(r)) { + return invalid; + }; + } else { + if (!ascii::isalnum(r) && !strings::contains("+-.", r)) { + return invalid; + }; + }; + strio::appendrune(&buf, r)!; + }; + + return strio::string(&buf); +}; + +fn parse_authority(in: *strings::iterator) ((str, u16, str) | invalid) = { + // Scan everything until '@' or ':' or '/', then decide what it is + let buf = strio::dynamic(); + defer io::close(&buf); + let host = ""; + let port = 0u16; + let userinfo = ""; + + for (true) { + const r = match (strings::next(in)) { + case let r: rune => + yield r; + case void => + break; + }; + + if (r == '[') { + if (len(strio::string(&buf)) > 0) { + if (len(userinfo) > 0) { + return invalid; + } else { + userinfo = percent_decode( + strio::string(&buf))?; + }; + }; + strio::reset(&buf); + + for (true) { + const r = wantrune(in)?; + if (r == ']') { + break; + }; + strio::appendrune(&buf, r)!; + }; + + host = percent_decode(strio::string(&buf))?; + } else if (r == ':' || !is_userinfo(r) && !is_host(r)) { + if (len(userinfo) > 0 && is_userinfo(r)) { + return invalid; + }; + + if (r == '@') { + // This was userinfo+host[+port] + userinfo = percent_decode(strio::string(&buf))?; + strio::reset(&buf); + }; + if (r == '/') { + // This was just host + strings::prev(in); + host = percent_decode(strio::string(&buf))?; + break; + }; + if (r == ':') { + // This was host+port + host = percent_decode(strio::string(&buf))?; + port = parse_port(in)?; + break; + }; + } else { + strio::appendrune(&buf, r)!; + }; + }; + + // In end of string case + if (len(host) == 0) { + host = percent_decode(strio::string(&buf))?; + }; + + return (host, port, userinfo); +}; + +type path_mode = enum { + ABSOLUTE, + ROOTLESS, +}; + +fn parse_path(in: *strings::iterator, mode: path_mode) (str | invalid) = { + let buf = strio::dynamic(); + defer io::close(&buf); + + // With rootless path, we need at least one segment + if (mode == path_mode::ROOTLESS) { + for (let i = 0z; true; i += 1) { + match (strings::next(in)) { + case let r: rune => + if (r == '?' || r == '#') { + strings::prev(in); + break; + }; + if (r == '/') { + if (i == 0) { + return invalid; + } else { + strio::appendrune(&buf, '/')!; + break; + }; + }; + if (!is_pchar(r)) { + return invalid; + }; + strio::appendrune(&buf, r)!; + case void => + break; + }; + }; + }; + + for (true) { + match (strings::next(in)) { + case let r: rune => + if (r == '?' || r == '#') { + strings::prev(in); + break; + }; + if (!is_pchar(r) && r != '/') { + return invalid; + }; + strio::appendrune(&buf, r)!; + case void => + break; + }; + }; + + return percent_decode(strio::string(&buf)); +}; + +fn parse_query(in: *strings::iterator) (str | invalid) = { + let buf = strio::dynamic(); + + for (true) { + match (strings::next(in)) { + case let r: rune => + if (r == '#') { + strings::prev(in); + break; + }; + if (!is_pchar(r) && r != '/' && r != '?') { + return invalid; + }; + strio::appendrune(&buf, r)!; + case void => + break; + }; + }; + + return strio::string(&buf); +}; + +fn parse_fragment(in: *strings::iterator) (str | invalid) = { + let buf = strio::dynamic(); + defer io::close(&buf); + + for (true) { + match (strings::next(in)) { + case let r: rune => + if (!is_pchar(r) && r != '/' && r != '?') { + return invalid; + }; + strio::appendrune(&buf, r)!; + case void => + break; + }; + }; + + return percent_decode(strio::string(&buf))?; +}; + +fn parse_port(in: *strings::iterator) (u16 | invalid) = { + let buf = strio::dynamic(); + defer io::close(&buf); + for (true) { + const r = match (strings::next(in)) { + case let r: rune => + yield r; + case void => + break; + }; + + if (!ascii::isdigit(r)) { + strings::prev(in); + break; + }; + strio::appendrune(&buf, r)!; + }; + + match (strconv::stou16(strio::string(&buf))) { + case let port: u16 => + if (port == 0) { + // There's no port 0 + return invalid; + }; + return port; + case => + return invalid; + }; +}; + +fn percent_decode(s: str) (str | invalid) = { + let buf = strio::dynamic(); + let iter = strings::iter(s); + for (true) { + match (strings::next(&iter)) { + case let r: rune => + if (r == '%') { + let tmp = strio::dynamic(); + defer io::close(&tmp); + for (let i = 0z; i < 2; i += 1) { + const r = wantrune(&iter)?; + strio::appendrune(&tmp, r)!; + }; + + match (strconv::stou8b(strio::string(&tmp), + strconv::base::HEX)) { + case let ord: u8 => + strio::appendrune(&buf, ord: u32: rune)!; + case => + return invalid; + }; + } else { + strio::appendrune(&buf, r)!; + }; + case void => + break; + }; + }; + return strio::string(&buf); +}; + +fn wantrune(iter: *strings::iterator) (rune | invalid) = { + match (strings::next(iter)) { + case let r: rune => + return r; + case => + return invalid; + }; +}; + +fn is_userinfo(r: rune) bool = + // unreserved + sub-delim + ":" + ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=:", r) + // %-encoded + || r == '%' || ascii::isxdigit(r); + +fn is_host(r: rune) bool = + // unreserved + sub-delim + ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=", r) + // %-encoded + || r == '%' || ascii::isxdigit(r); + +fn is_pchar(r: rune) bool = + // unreserved + sub-delim + ":"/"@" + ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=:@", r) + // %-encoded + || r == '%' || ascii::isxdigit(r); diff --git a/net/uri/uri.ha b/net/uri/uri.ha @@ -0,0 +1,28 @@ +use net::ip; + +// Representation of a parsed URI. +export type uri = struct { + scheme: str, + + host: (str | ip::addr), + port: u16, + userinfo: str, + + path: str, + query: str, + fragment: str, +}; + +// Frees resources associated with a [[uri]]. +export fn finish(u: *uri) void = { + free(u.scheme); + match (u.host) { + case let s: str => + free(s); + case => void; + }; + free(u.userinfo); + free(u.path); + free(u.query); + free(u.fragment); +}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -933,6 +933,27 @@ net_unix() { gen_ssa -pfreebsd net::unix net errors os io strings types fmt net::dial } +gensrcs_net_uri() { + gen_srcs net::uri \ + fmt.ha \ + parse.ha \ + uri.ha \ + $* +} + +net_uri() { + if [ $testing -eq 0 ] + then + gensrcs_net_uri + else + gensrcs_net_uri \ + +test.ha + fi + gen_ssa net::uri \ + ascii ip net::ip strconv strings strio +} + + math_random() { gen_srcs math::random \ random.ha @@ -1251,6 +1272,7 @@ net::ip linux freebsd net::tcp linux freebsd net::udp linux freebsd net::unix linux freebsd +net::uri os linux freebsd os::exec linux freebsd path diff --git a/stdlib.mk b/stdlib.mk @@ -510,6 +510,12 @@ stdlib_deps_linux+=$(stdlib_net_unix_linux) stdlib_net_unix_freebsd=$(HARECACHE)/net/unix/net_unix-freebsd.o stdlib_deps_freebsd+=$(stdlib_net_unix_freebsd) +# gen_lib net::uri (any) +stdlib_net_uri_any=$(HARECACHE)/net/uri/net_uri-any.o +stdlib_deps_any+=$(stdlib_net_uri_any) +stdlib_net_uri_linux=$(stdlib_net_uri_any) +stdlib_net_uri_freebsd=$(stdlib_net_uri_any) + # gen_lib os (linux) stdlib_os_linux=$(HARECACHE)/os/os-linux.o stdlib_deps_linux+=$(stdlib_os_linux) @@ -1491,6 +1497,18 @@ $(HARECACHE)/net/unix/net_unix-freebsd.ssa: $(stdlib_net_unix_freebsd_srcs) $(st @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nnet::unix \ -t$(HARECACHE)/net/unix/net_unix.td $(stdlib_net_unix_freebsd_srcs) +# net::uri (+any) +stdlib_net_uri_any_srcs= \ + $(STDLIB)/net/uri/fmt.ha \ + $(STDLIB)/net/uri/parse.ha \ + $(STDLIB)/net/uri/uri.ha + +$(HARECACHE)/net/uri/net_uri-any.ssa: $(stdlib_net_uri_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_ip_$(PLATFORM)) $(stdlib_net_ip_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) + @printf 'HAREC \t$@\n' + @mkdir -p $(HARECACHE)/net/uri + @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nnet::uri \ + -t$(HARECACHE)/net/uri/net_uri.td $(stdlib_net_uri_any_srcs) + # os (+linux) stdlib_os_linux_srcs= \ $(STDLIB)/os/+linux/environ.ha \ @@ -2339,6 +2357,12 @@ testlib_deps_linux+=$(testlib_net_unix_linux) testlib_net_unix_freebsd=$(TESTCACHE)/net/unix/net_unix-freebsd.o testlib_deps_freebsd+=$(testlib_net_unix_freebsd) +# gen_lib net::uri (any) +testlib_net_uri_any=$(TESTCACHE)/net/uri/net_uri-any.o +testlib_deps_any+=$(testlib_net_uri_any) +testlib_net_uri_linux=$(testlib_net_uri_any) +testlib_net_uri_freebsd=$(testlib_net_uri_any) + # gen_lib os (linux) testlib_os_linux=$(TESTCACHE)/os/os-linux.o testlib_deps_linux+=$(testlib_os_linux) @@ -3358,6 +3382,19 @@ $(TESTCACHE)/net/unix/net_unix-freebsd.ssa: $(testlib_net_unix_freebsd_srcs) $(t @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nnet::unix \ -t$(TESTCACHE)/net/unix/net_unix.td $(testlib_net_unix_freebsd_srcs) +# net::uri (+any) +testlib_net_uri_any_srcs= \ + $(STDLIB)/net/uri/fmt.ha \ + $(STDLIB)/net/uri/parse.ha \ + $(STDLIB)/net/uri/uri.ha \ + $(STDLIB)/net/uri/+test.ha + +$(TESTCACHE)/net/uri/net_uri-any.ssa: $(testlib_net_uri_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_ip_$(PLATFORM)) $(testlib_net_ip_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) + @printf 'HAREC \t$@\n' + @mkdir -p $(TESTCACHE)/net/uri + @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nnet::uri \ + -t$(TESTCACHE)/net/uri/net_uri.td $(testlib_net_uri_any_srcs) + # os (+linux) testlib_os_linux_srcs= \ $(STDLIB)/os/+linux/environ.ha \