hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

parse.ha (7748B)


      1 // License: MPL-2.0
      2 // (c) 2022 Alexey Yerin <yyp@disroot.org>
      3 // (c) 2022 Umar Getagazov <umar@handlerug.me>
      4 use ascii;
      5 use io;
      6 use net::ip;
      7 use strconv;
      8 use strings;
      9 use strio;
     10 
     11 // The URI provided to [[parse]] is invalid.
     12 export type invalid = !void;
     13 
     14 // Parses a URI string into [[uri]] structure. The return value must be freed
     15 // using [[finish]].
     16 export fn parse(in: str) (uri | invalid) = {
     17 	let in = strings::iter(in);
     18 
     19 	const scheme = parse_scheme(&in)?;
     20 
     21 	// Determine hier-part variant
     22 	let path = "";
     23 	let authority = ("", 0u16, "");
     24 	match (strings::next(&in)) {
     25 	case let r: rune =>
     26 		switch (r) {
     27 		case '/' =>
     28 			// Either "//"+authority+path-abempty or path-absolute
     29 			switch (wantrune(&in)?) {
     30 			case '/' =>
     31 				// "//" + authority + path-abempty
     32 				authority = parse_authority(&in)?;
     33 				match (strings::next(&in)) {
     34 				case let r: rune =>
     35 					switch (r) {
     36 					case '?', '#' =>
     37 						// path-empty
     38 						strings::prev(&in);
     39 					case '/' =>
     40 						// path-absolute
     41 						strings::prev(&in);
     42 						path = parse_path(&in,
     43 							path_mode::ABSOLUTE)?;
     44 					case =>
     45 						return invalid;
     46 					};
     47 				case => void; // path-empty
     48 				};
     49 			case =>
     50 				// path-absolute
     51 				strings::prev(&in);
     52 				path = parse_path(&in, path_mode::ABSOLUTE)?;
     53 			};
     54 		case =>
     55 			// path-rootless
     56 			strings::prev(&in);
     57 			path = parse_path(&in, path_mode::ROOTLESS)?;
     58 		};
     59 	case => void; // path-empty
     60 	};
     61 
     62 	let query = "";
     63 	match (strings::next(&in)) {
     64 	case let r: rune =>
     65 		if (r == '?') {
     66 			query = parse_query(&in)?;
     67 		} else {
     68 			strings::prev(&in);
     69 		};
     70 	case => void;
     71 	};
     72 
     73 	let fragment = "";
     74 	match (strings::next(&in)) {
     75 	case let r: rune =>
     76 		if (r == '#') {
     77 			fragment = parse_fragment(&in)?;
     78 		} else {
     79 			strings::prev(&in);
     80 		};
     81 	case => void;
     82 	};
     83 
     84 	return uri {
     85 		scheme = scheme,
     86 
     87 		host = match (ip::parse(authority.0)) {
     88 		case let a: ip::addr =>
     89 			yield a;
     90 		case ip::invalid =>
     91 			yield authority.0;
     92 		},
     93 		port = authority.1,
     94 		userinfo = authority.2,
     95 
     96 		path = path,
     97 		query = query,
     98 		fragment = fragment,
     99 	};
    100 };
    101 
    102 fn parse_scheme(in: *strings::iterator) (str | invalid) = {
    103 	let buf = strio::dynamic();
    104 
    105 	for (let i = 0z; true; i += 1) {
    106 		const r = wantrune(in)?;
    107 		if (i > 0 && r == ':') {
    108 			break;
    109 		};
    110 		if (i == 0) {
    111 			if (!ascii::isalpha(r)) {
    112 				return invalid;
    113 			};
    114 		} else {
    115 			if (!ascii::isalnum(r) && !strings::contains("+-.", r)) {
    116 				return invalid;
    117 			};
    118 		};
    119 		strio::appendrune(&buf, r)!;
    120 	};
    121 
    122 	return strio::string(&buf);
    123 };
    124 
    125 fn parse_authority(in: *strings::iterator) ((str, u16, str) | invalid) = {
    126 	// Scan everything until '@' or ':' or '/', then decide what it is
    127 	let buf = strio::dynamic();
    128 	defer io::close(&buf)!;
    129 	let host = "";
    130 	let port = 0u16;
    131 	let userinfo = "";
    132 
    133 	for (true) {
    134 		const r = match (strings::next(in)) {
    135 		case let r: rune =>
    136 			yield r;
    137 		case void =>
    138 			break;
    139 		};
    140 
    141 		if (r == '[') {
    142 			if (len(strio::string(&buf)) > 0) {
    143 				if (len(userinfo) > 0) {
    144 					return invalid;
    145 				} else {
    146 					userinfo = percent_decode(
    147 						strio::string(&buf))?;
    148 				};
    149 			};
    150 			strio::reset(&buf);
    151 
    152 			for (true) {
    153 				const r = wantrune(in)?;
    154 				if (r == ']') {
    155 					break;
    156 				};
    157 				strio::appendrune(&buf, r)!;
    158 			};
    159 
    160 			host = percent_decode(strio::string(&buf))?;
    161 		} else if (r == ':' || !is_userinfo(r) && !is_host(r)) {
    162 			if (len(userinfo) > 0 && is_userinfo(r)) {
    163 				return invalid;
    164 			};
    165 
    166 			if (r == '@') {
    167 				// This was userinfo+host[+port]
    168 				userinfo = percent_decode(strio::string(&buf))?;
    169 				strio::reset(&buf);
    170 			};
    171 			if (r == '/') {
    172 				// This was just host
    173 				strings::prev(in);
    174 				host = percent_decode(strio::string(&buf))?;
    175 				break;
    176 			};
    177 			if (r == ':') {
    178 				// This was host+port
    179 				host = percent_decode(strio::string(&buf))?;
    180 				port = parse_port(in)?;
    181 				break;
    182 			};
    183 		} else {
    184 			strio::appendrune(&buf, r)!;
    185 		};
    186 	};
    187 
    188 	// In end of string case
    189 	if (len(host) == 0) {
    190 		host = percent_decode(strio::string(&buf))?;
    191 	};
    192 
    193 	return (host, port, userinfo);
    194 };
    195 
    196 type path_mode = enum {
    197 	ABSOLUTE,
    198 	ROOTLESS,
    199 };
    200 
    201 fn parse_path(in: *strings::iterator, mode: path_mode) (str | invalid) = {
    202 	let buf = strio::dynamic();
    203 	defer io::close(&buf)!;
    204 
    205 	// With rootless path, we need at least one segment
    206 	if (mode == path_mode::ROOTLESS) {
    207 		for (let i = 0z; true; i += 1) {
    208 			match (strings::next(in)) {
    209 			case let r: rune =>
    210 				if (r == '?' || r == '#') {
    211 					strings::prev(in);
    212 					break;
    213 				};
    214 				if (r == '/') {
    215 					if (i == 0) {
    216 						return invalid;
    217 					} else {
    218 						strio::appendrune(&buf, '/')!;
    219 						break;
    220 					};
    221 				};
    222 				if (!is_pchar(r)) {
    223 					return invalid;
    224 				};
    225 				strio::appendrune(&buf, r)!;
    226 			case void =>
    227 				break;
    228 			};
    229 		};
    230 	};
    231 
    232 	for (true) {
    233 		match (strings::next(in)) {
    234 		case let r: rune =>
    235 			if (r == '?' || r == '#') {
    236 				strings::prev(in);
    237 				break;
    238 			};
    239 			if (!is_pchar(r) && r != '/') {
    240 				return invalid;
    241 			};
    242 			strio::appendrune(&buf, r)!;
    243 		case void =>
    244 			break;
    245 		};
    246 	};
    247 
    248 	return percent_decode(strio::string(&buf));
    249 };
    250 
    251 fn parse_query(in: *strings::iterator) (str | invalid) = {
    252 	let buf = strio::dynamic();
    253 
    254 	for (true) {
    255 		match (strings::next(in)) {
    256 		case let r: rune =>
    257 			if (r == '#') {
    258 				strings::prev(in);
    259 				break;
    260 			};
    261 			if (!is_pchar(r) && r != '/' && r != '?') {
    262 				return invalid;
    263 			};
    264 			strio::appendrune(&buf, r)!;
    265 		case void =>
    266 			break;
    267 		};
    268 	};
    269 
    270 	return strio::string(&buf);
    271 };
    272 
    273 fn parse_fragment(in: *strings::iterator) (str | invalid) = {
    274 	let buf = strio::dynamic();
    275 	defer io::close(&buf)!;
    276 
    277 	for (true) {
    278 		match (strings::next(in)) {
    279 		case let r: rune =>
    280 			if (!is_pchar(r) && r != '/' && r != '?') {
    281 				return invalid;
    282 			};
    283 			strio::appendrune(&buf, r)!;
    284 		case void =>
    285 			break;
    286 		};
    287 	};
    288 
    289 	return percent_decode(strio::string(&buf))?;
    290 };
    291 
    292 fn parse_port(in: *strings::iterator) (u16 | invalid) = {
    293 	let buf = strio::dynamic();
    294 	defer io::close(&buf)!;
    295 	for (true) {
    296 		const r = match (strings::next(in)) {
    297 		case let r: rune =>
    298 			yield r;
    299 		case void =>
    300 			break;
    301 		};
    302 
    303 		if (!ascii::isdigit(r)) {
    304 			strings::prev(in);
    305 			break;
    306 		};
    307 		strio::appendrune(&buf, r)!;
    308 	};
    309 
    310 	match (strconv::stou16(strio::string(&buf))) {
    311 	case let port: u16 =>
    312 		if (port == 0) {
    313 			// There's no port 0
    314 			return invalid;
    315 		};
    316 		return port;
    317 	case =>
    318 		return invalid;
    319 	};
    320 };
    321 
    322 fn percent_decode(s: str) (str | invalid) = {
    323 	let buf = strio::dynamic();
    324 	percent_decode_static(&buf, s)?;
    325 	return strio::string(&buf);
    326 };
    327 
    328 fn percent_decode_static(out: io::handle, s: str) (void | invalid) = {
    329 	let iter = strings::iter(s);
    330 	let tmp = strio::dynamic();
    331 	defer io::close(&tmp)!;
    332 	for (true) {
    333 		match (strings::next(&iter)) {
    334 		case let r: rune =>
    335 			if (r == '%') {
    336 				strio::reset(&tmp);
    337 				for (let i = 0z; i < 2; i += 1) {
    338 					const r = wantrune(&iter)?;
    339 					strio::appendrune(&tmp, r)!;
    340 				};
    341 
    342 				match (strconv::stou8b(strio::string(&tmp),
    343 					strconv::base::HEX)) {
    344 				case let ord: u8 =>
    345 					strio::appendrune(out, ord: u32: rune)!;
    346 				case =>
    347 					return invalid;
    348 				};
    349 			} else {
    350 				strio::appendrune(out, r)!;
    351 			};
    352 		case void =>
    353 			break;
    354 		};
    355 	};
    356 };
    357 
    358 fn wantrune(iter: *strings::iterator) (rune | invalid) = {
    359 	match (strings::next(iter)) {
    360 	case let r: rune =>
    361 		return r;
    362 	case =>
    363 		return invalid;
    364 	};
    365 };
    366 
    367 fn is_userinfo(r: rune) bool =
    368 	// unreserved + sub-delim + ":"
    369 	ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=:", r)
    370 	// %-encoded
    371 	|| r == '%' || ascii::isxdigit(r);
    372 
    373 fn is_host(r: rune) bool =
    374 	// unreserved + sub-delim
    375 	ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=", r)
    376 	// %-encoded
    377 	|| r == '%' || ascii::isxdigit(r);
    378 
    379 fn is_pchar(r: rune) bool =
    380 	// unreserved + sub-delim + ":"/"@"
    381 	ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=:@", r)
    382 	// %-encoded
    383 	|| r == '%' || ascii::isxdigit(r);