hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

parse.ha (7966B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use ascii;
      5 use encoding::utf8;
      6 use io;
      7 use memio;
      8 use net::ip;
      9 use strconv;
     10 use strings;
     11 
     12 // The URI provided to [[parse]] is invalid.
     13 export type invalid = !void;
     14 
     15 // Parses a URI string into [[uri]] structure. The return value must be freed
     16 // using [[finish]].
     17 export fn parse(in: str) (uri | invalid) = {
     18 	let in = strings::iter(in);
     19 
     20 	const scheme = parse_scheme(&in)?;
     21 
     22 	// Determine hier-part variant
     23 	let path = "";
     24 	let authority: ((str | ip::addr6), u16, str) = ("", 0u16, "");
     25 	match (strings::next(&in)) {
     26 	case let r: rune =>
     27 		switch (r) {
     28 		case '/' =>
     29 			// Either "//"+authority+path-abempty or path-absolute
     30 			switch (wantrune(&in)?) {
     31 			case '/' =>
     32 				// "//" + authority + path-abempty
     33 				authority = parse_authority(&in)?;
     34 				match (strings::next(&in)) {
     35 				case let r: rune =>
     36 					switch (r) {
     37 					case '?', '#' =>
     38 						// path-empty
     39 						strings::prev(&in);
     40 					case '/' =>
     41 						// path-absolute
     42 						strings::prev(&in);
     43 						path = parse_path(&in,
     44 							path_mode::ABSOLUTE)?;
     45 					case =>
     46 						return invalid;
     47 					};
     48 				case => void; // path-empty
     49 				};
     50 			case =>
     51 				// path-absolute
     52 				strings::prev(&in);
     53 				path = parse_path(&in, path_mode::ABSOLUTE)?;
     54 			};
     55 		case =>
     56 			// path-rootless
     57 			strings::prev(&in);
     58 			path = parse_path(&in, path_mode::ROOTLESS)?;
     59 		};
     60 	case => void; // path-empty
     61 	};
     62 
     63 	let query = "";
     64 	match (strings::next(&in)) {
     65 	case let r: rune =>
     66 		if (r == '?') {
     67 			query = parse_query(&in)?;
     68 		} else {
     69 			strings::prev(&in);
     70 		};
     71 	case => void;
     72 	};
     73 
     74 	let fragment = "";
     75 	match (strings::next(&in)) {
     76 	case let r: rune =>
     77 		if (r == '#') {
     78 			fragment = parse_fragment(&in)?;
     79 		} else {
     80 			strings::prev(&in);
     81 		};
     82 	case => void;
     83 	};
     84 
     85 	return uri {
     86 		scheme = scheme,
     87 
     88 		host = match (authority.0) {
     89 		case let ip: ip::addr6 =>
     90 			yield ip;
     91 		case let s: str =>
     92 			yield match (ip::parse(s)) {
     93 			case let a: ip::addr =>
     94 				yield a;
     95 			case ip::invalid =>
     96 				yield s;
     97 			};
     98 		},
     99 		port = authority.1,
    100 		userinfo = authority.2,
    101 
    102 		path = path,
    103 		query = query,
    104 		fragment = fragment,
    105 	};
    106 };
    107 
    108 fn parse_scheme(in: *strings::iterator) (str | invalid) = {
    109 	let copy = *in;
    110 	for (let i = 0z; true; i += 1) {
    111 		const r = wantrune(in)?;
    112 		if (i > 0 && r == ':') {
    113 			strings::prev(in);
    114 			break;
    115 		};
    116 		if (i == 0) {
    117 			if (!ascii::isalpha(r)) {
    118 				return invalid;
    119 			};
    120 		} else {
    121 			if (!ascii::isalnum(r) && !strings::contains("+-.", r)) {
    122 				return invalid;
    123 			};
    124 		};
    125 	};
    126 	let s = strings::dup(strings::slice(&copy, in));
    127 	strings::next(in);
    128 	return s;
    129 };
    130 
    131 fn parse_authority(
    132 	in: *strings::iterator,
    133 ) (((str | ip::addr6), u16, str) | invalid) = {
    134 	// Scan everything until '@' or ':' or '/', then decide what it is
    135 	let buf = memio::dynamic();
    136 	defer io::close(&buf)!;
    137 	let host: (str | ip::addr6) = "";
    138 	let port = 0u16;
    139 	let userinfo = "";
    140 	let has_userinfo = false;
    141 
    142 	for (let r => strings::next(in)) {
    143 		if (r == '[') {
    144 			if (len(memio::string(&buf)!) > 0) {
    145 				if (len(userinfo) > 0) {
    146 					return invalid;
    147 				} else {
    148 					userinfo = percent_decode(
    149 						memio::string(&buf)!)?;
    150 				};
    151 			};
    152 			memio::reset(&buf);
    153 
    154 			for (true) {
    155 				const r = wantrune(in)?;
    156 				if (r == ']') {
    157 					break;
    158 				};
    159 				memio::appendrune(&buf, r)!;
    160 			};
    161 
    162 			const addr = percent_decode(memio::string(&buf)!)?;
    163 			match (ip::parse(addr)) {
    164 			case let v6: ip::addr6 =>
    165 				host = v6;
    166 			case =>
    167 				return invalid;
    168 			};
    169 		} else if (r == ':' || !is_userinfo(r) && !is_host(r)) {
    170 			switch (r) {
    171 			case '@' =>
    172 				if (has_userinfo) {
    173 					return invalid;
    174 				};
    175 				// This was userinfo+host[+port]
    176 				userinfo = percent_decode(memio::string(&buf)!)?;
    177 				memio::reset(&buf);
    178 				has_userinfo = true;
    179 			case '/' =>
    180 				// This was just host
    181 				strings::prev(in);
    182 				host = percent_decode(memio::string(&buf)!)?;
    183 				break;
    184 			case ':' =>
    185 				// This was host+port
    186 				host = percent_decode(memio::string(&buf)!)?;
    187 				port = parse_port(in)?;
    188 				break;
    189 			case =>
    190 				return invalid;
    191 			};
    192 		} else {
    193 			memio::appendrune(&buf, r)!;
    194 		};
    195 	};
    196 
    197 	match (host) {
    198 	case let s: str =>
    199 		// In end of string case
    200 		if (len(s) == 0) {
    201 			host = percent_decode(memio::string(&buf)!)?;
    202 		};
    203 	case => void;
    204 	};
    205 
    206 	return (host, port, userinfo);
    207 };
    208 
    209 type path_mode = enum {
    210 	ABSOLUTE,
    211 	ROOTLESS,
    212 };
    213 
    214 fn parse_path(in: *strings::iterator, mode: path_mode) (str | invalid) = {
    215 	let copy = *in;
    216 	// With rootless path, we need at least one segment
    217 	if (mode == path_mode::ROOTLESS) {
    218 		for (let i = 0z; true; i += 1) {
    219 			match (strings::next(in)) {
    220 			case let r: rune =>
    221 				if (r == '?' || r == '#') {
    222 					strings::prev(in);
    223 					break;
    224 				};
    225 				if (r == '/') {
    226 					if (i == 0) {
    227 						return invalid;
    228 					} else {
    229 						break;
    230 					};
    231 				};
    232 				if (!is_pchar(r)) {
    233 					return invalid;
    234 				};
    235 			case done =>
    236 				break;
    237 			};
    238 		};
    239 	};
    240 
    241 	for (let r => strings::next(in)) {
    242 		if (r == '?' || r == '#') {
    243 			strings::prev(in);
    244 			break;
    245 		};
    246 		if (!is_pchar(r) && r != '/') {
    247 			return invalid;
    248 		};
    249 	};
    250 
    251 	return percent_decode(strings::slice(&copy, in));
    252 };
    253 
    254 fn parse_query(in: *strings::iterator) (str | invalid) = {
    255 	let copy = *in;
    256 	for (let r => strings::next(in)) {
    257 		if (r == '#') {
    258 			strings::prev(in);
    259 			break;
    260 		};
    261 		if (!is_pchar(r) && r != '/' && r != '?') {
    262 			return invalid;
    263 		};
    264 	};
    265 	return strings::dup(strings::slice(&copy, in));
    266 };
    267 
    268 fn parse_fragment(in: *strings::iterator) (str | invalid) = {
    269 	let copy = *in;
    270 	for (let r => strings::next(in)) {
    271 		if (!is_pchar(r) && r != '/' && r != '?') {
    272 			return invalid;
    273 		};
    274 	};
    275 
    276 	return percent_decode(strings::slice(&copy, in))?;
    277 };
    278 
    279 fn parse_port(in: *strings::iterator) (u16 | invalid) = {
    280 	let copy = *in;
    281 	for (let r => strings::next(in)) {
    282 		if (!ascii::isdigit(r)) {
    283 			strings::prev(in);
    284 			break;
    285 		};
    286 	};
    287 
    288 	match (strconv::stou16(strings::slice(&copy, in))) {
    289 	case let port: u16 =>
    290 		if (port == 0) {
    291 			// There's no port 0
    292 			return invalid;
    293 		};
    294 		return port;
    295 	case =>
    296 		return invalid;
    297 	};
    298 };
    299 
    300 fn percent_decode(s: str) (str | invalid) = {
    301 	let buf = memio::dynamic();
    302 	percent_decode_static(&buf, s)?;
    303 	return memio::string(&buf)!;
    304 };
    305 
    306 fn percent_decode_static(out: io::handle, s: str) (void | invalid) = {
    307 	let iter = strings::iter(s);
    308 	let tmp = memio::dynamic();
    309 	defer io::close(&tmp)!;
    310 	let percent_data: []u8 = [];
    311 	for (true) {
    312 		match (strings::next(&iter)) {
    313 		case let r: rune =>
    314 			if (r == '%') {
    315 				memio::reset(&tmp);
    316 				for (let i = 0z; i < 2; i += 1) {
    317 					const r = wantrune(&iter)?;
    318 					memio::appendrune(&tmp, r)!;
    319 				};
    320 
    321 				match (strconv::stou8(memio::string(&tmp)!,
    322 					strconv::base::HEX)) {
    323 				case let ord: u8 =>
    324 					append(percent_data, ord);
    325 				case =>
    326 					return invalid;
    327 				};
    328 			} else {
    329 				if(len(percent_data) > 0) {
    330 					match(strings::fromutf8(percent_data)) {
    331 					case let stro: str =>
    332 						memio::concat(out, stro)!;
    333 					case utf8::invalid =>
    334 						return invalid;
    335 					};
    336 
    337 					percent_data = [];
    338 				};
    339 
    340 				memio::appendrune(out, r)!;
    341 			};
    342 		case done =>
    343 			if(len(percent_data) > 0) {
    344 				match(strings::fromutf8(percent_data)) {
    345 				case let stro: str =>
    346 					memio::concat(out, stro)!;
    347 				case utf8::invalid =>
    348 					return invalid;
    349 				};
    350 
    351 				percent_data = [];
    352 			};
    353 
    354 			break;
    355 		};
    356 	};
    357 };
    358 
    359 fn wantrune(iter: *strings::iterator) (rune | invalid) = {
    360 	match (strings::next(iter)) {
    361 	case let r: rune =>
    362 		return r;
    363 	case =>
    364 		return invalid;
    365 	};
    366 };
    367 
    368 fn is_userinfo(r: rune) bool =
    369 	// unreserved + sub-delim + ":"
    370 	ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=:", r)
    371 	// %-encoded
    372 	|| r == '%' || ascii::isxdigit(r);
    373 
    374 fn is_host(r: rune) bool =
    375 	// unreserved + sub-delim
    376 	ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=", r)
    377 	// %-encoded
    378 	|| r == '%' || ascii::isxdigit(r);
    379 
    380 fn is_pchar(r: rune) bool =
    381 	// unreserved + sub-delim + ":"/"@"
    382 	ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=:@", r)
    383 	// %-encoded
    384 	|| r == '%' || ascii::isxdigit(r);