parse.ha (8749B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use ascii; 5 use encoding::utf8; 6 use io; 7 use memio; 8 use net::ip; 9 use strconv; 10 use strings; 11 12 // The URI provided to [[parse]] is invalid. 13 export type invalid = !void; 14 15 // Parses a URI string into [[uri]] structure. The return value must be freed 16 // using [[finish]]. 17 export fn parse(in: str) (uri | invalid) = { 18 let success = false; 19 let in = strings::iter(in); 20 21 const scheme = parse_scheme(&in)?; 22 defer if (!success) free(scheme); 23 24 // Determine hier-part variant 25 let path = ""; 26 let authority: ((str | ip::addr6), u16, str) = ("", 0u16, ""); 27 defer if (!success) { 28 free(path); 29 free_host(authority.0); 30 free(authority.2); 31 }; 32 33 match (strings::next(&in)) { 34 case let r: rune => 35 switch (r) { 36 case '/' => 37 // Either "//"+authority+path-abempty or path-absolute 38 match (strings::next(&in)) { 39 case let r: rune => 40 switch(r) { 41 case '/' => 42 // "//" + authority + path-abempty 43 authority = parse_authority(&in)?; 44 match (strings::next(&in)) { 45 case let r: rune => 46 switch (r) { 47 case '?', '#' => 48 // path-empty 49 strings::prev(&in); 50 case '/' => 51 // path-absolute 52 strings::prev(&in); 53 path = parse_path(&in, 54 path_mode::ABSOLUTE)?; 55 case => 56 return invalid; 57 }; 58 case => void; // path-empty 59 }; 60 case => 61 // path-absolute 62 strings::prev(&in); // return current token 63 strings::prev(&in); // return leading slash 64 path = parse_path(&in, path_mode::ABSOLUTE)?; 65 }; 66 case => 67 // path-absolute (just '/') 68 strings::prev(&in); // return leading slash 69 path = parse_path(&in, path_mode::ABSOLUTE)?; 70 }; 71 case => 72 // path-rootless 73 strings::prev(&in); 74 path = parse_path(&in, path_mode::ROOTLESS)?; 75 }; 76 case => void; // path-empty 77 }; 78 79 let query = ""; 80 defer if (!success) free(query); 81 match (strings::next(&in)) { 82 case let r: rune => 83 if (r == '?') { 84 query = parse_query(&in)?; 85 } else { 86 strings::prev(&in); 87 }; 88 case => void; 89 }; 90 91 let fragment = ""; 92 match (strings::next(&in)) { 93 case let r: rune => 94 if (r == '#') { 95 fragment = parse_fragment(&in)?; 96 } else { 97 strings::prev(&in); 98 }; 99 case => void; 100 }; 101 102 success = true; 103 return uri { 104 scheme = scheme, 105 106 host = match (authority.0) { 107 case let ip: ip::addr6 => 108 yield ip; 109 case let s: str => 110 yield match (ip::parse(s)) { 111 case let a: ip::addr => 112 free(s); 113 yield a; 114 case ip::invalid => 115 yield s; 116 }; 117 }, 118 port = authority.1, 119 userinfo = authority.2, 120 121 path = path, 122 query = query, 123 fragment = fragment, 124 }; 125 }; 126 127 fn parse_scheme(in: *strings::iterator) (str | invalid) = { 128 let copy = *in; 129 for (let i = 0z; true; i += 1) { 130 const r = wantrune(in)?; 131 if (i > 0 && r == ':') { 132 strings::prev(in); 133 break; 134 }; 135 if (i == 0) { 136 if (!ascii::isalpha(r)) { 137 return invalid; 138 }; 139 } else { 140 if (!ascii::isalnum(r) && !strings::contains("+-.", r)) { 141 return invalid; 142 }; 143 }; 144 }; 145 let s = strings::dup(strings::slice(©, in)); 146 strings::next(in); 147 return s; 148 }; 149 150 fn parse_authority( 151 in: *strings::iterator, 152 ) (((str | ip::addr6), u16, str) | invalid) = { 153 // Scan everything until '@' or ':' or '/', then decide what it is 154 let success = false; 155 let buf = memio::dynamic(); 156 defer io::close(&buf)!; 157 let host: (str | ip::addr6) = ""; 158 let port = 0u16; 159 let userinfo = ""; 160 let has_userinfo = false; 161 defer if (!success) { 162 free_host(host); 163 free(userinfo); 164 }; 165 166 for (let r => strings::next(in)) { 167 if (r == '[') { 168 if (len(memio::string(&buf)!) > 0) { 169 if (len(userinfo) > 0) { 170 return invalid; 171 } else { 172 userinfo = percent_decode( 173 memio::string(&buf)!)?; 174 }; 175 }; 176 memio::reset(&buf); 177 178 for (true) { 179 const r = wantrune(in)?; 180 if (r == ']') { 181 break; 182 }; 183 memio::appendrune(&buf, r)!; 184 }; 185 186 const addr = percent_decode(memio::string(&buf)!)?; 187 defer free(addr); 188 match (ip::parse(addr)) { 189 case let v6: ip::addr6 => 190 host = v6; 191 case => 192 return invalid; 193 }; 194 } else if (r == ':' || !is_userinfo(r) && !is_host(r)) { 195 switch (r) { 196 case '@' => 197 if (has_userinfo) { 198 return invalid; 199 }; 200 // This was userinfo+host[+port] 201 userinfo = percent_decode(memio::string(&buf)!)?; 202 memio::reset(&buf); 203 has_userinfo = true; 204 case '/' => 205 // This was just host 206 strings::prev(in); 207 host = percent_decode(memio::string(&buf)!)?; 208 break; 209 case ':' => 210 // This was host+port 211 host = percent_decode(memio::string(&buf)!)?; 212 port = parse_port(in)?; 213 break; 214 case => 215 return invalid; 216 }; 217 } else { 218 memio::appendrune(&buf, r)!; 219 }; 220 }; 221 222 match (host) { 223 case let s: str => 224 // In end of string case 225 if (len(s) == 0) { 226 host = percent_decode(memio::string(&buf)!)?; 227 }; 228 case => void; 229 }; 230 231 success = true; 232 return (host, port, userinfo); 233 }; 234 235 type path_mode = enum { 236 ABSOLUTE, 237 ROOTLESS, 238 }; 239 240 fn parse_path(in: *strings::iterator, mode: path_mode) (str | invalid) = { 241 let copy = *in; 242 // With rootless path, we need at least one segment 243 if (mode == path_mode::ROOTLESS) { 244 for (let i = 0z; true; i += 1) { 245 match (strings::next(in)) { 246 case let r: rune => 247 if (r == '?' || r == '#') { 248 strings::prev(in); 249 break; 250 }; 251 if (r == '/') { 252 if (i == 0) { 253 return invalid; 254 } else { 255 break; 256 }; 257 }; 258 if (!is_pchar(r)) { 259 return invalid; 260 }; 261 case done => 262 break; 263 }; 264 }; 265 }; 266 267 for (let r => strings::next(in)) { 268 if (r == '?' || r == '#') { 269 strings::prev(in); 270 break; 271 }; 272 if (!is_pchar(r) && r != '/') { 273 return invalid; 274 }; 275 }; 276 277 return percent_decode(strings::slice(©, in)); 278 }; 279 280 fn parse_query(in: *strings::iterator) (str | invalid) = { 281 let copy = *in; 282 for (let r => strings::next(in)) { 283 if (r == '#') { 284 strings::prev(in); 285 break; 286 }; 287 if (!is_pchar(r) && r != '/' && r != '?') { 288 return invalid; 289 }; 290 }; 291 return strings::dup(strings::slice(©, in)); 292 }; 293 294 fn parse_fragment(in: *strings::iterator) (str | invalid) = { 295 let copy = *in; 296 for (let r => strings::next(in)) { 297 if (!is_pchar(r) && r != '/' && r != '?') { 298 return invalid; 299 }; 300 }; 301 302 return percent_decode(strings::slice(©, in))?; 303 }; 304 305 fn parse_port(in: *strings::iterator) (u16 | invalid) = { 306 let copy = *in; 307 for (let r => strings::next(in)) { 308 if (!ascii::isdigit(r)) { 309 strings::prev(in); 310 break; 311 }; 312 }; 313 314 match (strconv::stou16(strings::slice(©, in))) { 315 case let port: u16 => 316 if (port == 0) { 317 // There's no port 0 318 return invalid; 319 }; 320 return port; 321 case => 322 return invalid; 323 }; 324 }; 325 326 fn percent_decode(s: str) (str | invalid) = { 327 let buf = memio::dynamic(); 328 percent_decode_static(&buf, s)?; 329 return memio::string(&buf)!; 330 }; 331 332 fn percent_decode_static(out: io::handle, s: str) (void | invalid) = { 333 let iter = strings::iter(s); 334 let tmp = memio::dynamic(); 335 defer io::close(&tmp)!; 336 let percent_data: []u8 = []; 337 for (true) { 338 match (strings::next(&iter)) { 339 case let r: rune => 340 if (r == '%') { 341 memio::reset(&tmp); 342 for (let i = 0z; i < 2; i += 1) { 343 const r = wantrune(&iter)?; 344 memio::appendrune(&tmp, r)!; 345 }; 346 347 match (strconv::stou8(memio::string(&tmp)!, 348 strconv::base::HEX)) { 349 case let ord: u8 => 350 append(percent_data, ord)!; 351 case => 352 return invalid; 353 }; 354 } else { 355 if(len(percent_data) > 0) { 356 match(strings::fromutf8(percent_data)) { 357 case let stro: str => 358 memio::concat(out, stro)!; 359 case utf8::invalid => 360 return invalid; 361 }; 362 363 free(percent_data); 364 percent_data = []; 365 }; 366 367 memio::appendrune(out, r)!; 368 }; 369 case done => 370 if(len(percent_data) > 0) { 371 match(strings::fromutf8(percent_data)) { 372 case let stro: str => 373 memio::concat(out, stro)!; 374 case utf8::invalid => 375 return invalid; 376 }; 377 378 free(percent_data); 379 percent_data = []; 380 }; 381 382 break; 383 }; 384 }; 385 }; 386 387 fn wantrune(iter: *strings::iterator) (rune | invalid) = { 388 match (strings::next(iter)) { 389 case let r: rune => 390 return r; 391 case => 392 return invalid; 393 }; 394 }; 395 396 fn free_host(in: (str | ip::addr6)) void = { 397 match (in) { 398 case let s: str => 399 free(s); 400 case => void; 401 }; 402 }; 403 404 fn is_userinfo(r: rune) bool = 405 // unreserved + sub-delim + ":" 406 ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=:", r) 407 // %-encoded 408 || r == '%' || ascii::isxdigit(r); 409 410 fn is_host(r: rune) bool = 411 // unreserved + sub-delim 412 ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=", r) 413 // %-encoded 414 || r == '%' || ascii::isxdigit(r); 415 416 fn is_pchar(r: rune) bool = 417 // unreserved + sub-delim + ":"/"@" 418 ascii::isalnum(r) || strings::contains("-._~!$&'()*+,;=:@", r) 419 // %-encoded 420 || r == '%' || ascii::isxdigit(r);