hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

parse.ha (3726B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use ascii;
      5 use errors;
      6 use strings;
      7 
      8 const tspecial: str = "()<>@,;:\\/[]?=";
      9 export type type_params = strings::tokenizer;
     10 
     11 // Parses a Media Type, returning a tuple of the content type (e.g.
     12 // "text/plain") and a parameter parser object, or [[errors::invalid]] if the
     13 // input cannot be parsed.
     14 //
     15 // To enumerate the Media Type parameter list, pass the type_params object into
     16 // [[next_param]]. If you do not need the parameter list, you can safely discard
     17 // the object. Note that any format errors following the ";" token will not
     18 // cause [[errors::invalid]] to be returned unless [[next_param]] is used to
     19 // enumerate all of the parameters.
     20 export fn parse(in: str) ((str, type_params) | errors::invalid) = {
     21 	const items = strings::cut(in, ";");
     22 	const mtype = items.0, params = items.1;
     23 	const items = strings::cut(mtype, "/");
     24 	if (len(items.0) < 1 || len(items.1) < 1) {
     25 		return errors::invalid;
     26 	};
     27 	typevalid(items.0)?;
     28 	typevalid(items.1)?;
     29 	return (mtype, strings::tokenize(params, ";"));
     30 };
     31 
     32 // Returns the next parameter as a (key, value) tuple from a [[type_params]]
     33 // object that was prepared via [[parse]], void if there are no remaining
     34 // parameters, and [[errors::invalid]] if a syntax error was encountered.
     35 export fn next_param(in: *type_params) ((str, str) | void | errors::invalid) = {
     36 	const tok = match (strings::next_token(in: *strings::tokenizer)) {
     37 	case let s: str =>
     38 		if (s == "") {
     39 			// empty parameter
     40 			return errors::invalid;
     41 		};
     42 		yield s;
     43 	case done =>
     44 		return;
     45 	};
     46 
     47 	const items = strings::cut(tok, "=");
     48 	// The RFC does not permit whitespace here, but whitespace is very
     49 	// common in the wild. ¯\_(ツ)_/¯
     50 	items.0 = strings::trim(items.0);
     51 	items.1 = strings::trim(items.1);
     52 	if (len(items.0) == 0 || len(items.1) == 0) {
     53 		return errors::invalid;
     54 	};
     55 
     56 	if (strings::hasprefix(items.1, "\"")) {
     57 		items.1 = quoted(items.1)?;
     58 	};
     59 
     60 	return (items.0, items.1);
     61 };
     62 
     63 fn quoted(in: str) (str | errors::invalid) = {
     64 	// We have only a basic implementation of quoted-string. It has a couple
     65 	// of problems:
     66 	//
     67 	// 1. The RFC does not define it very well
     68 	// 2. The parts of the RFC which are ill-defined are rarely used
     69 	// 3. Implementing quoted-pair would require allocating a new string
     70 	//
     71 	// This implementation should handle most Media Types seen in practice
     72 	// unless they're doing something weird and ill-advised with them.
     73 	in = strings::trim(in, '"');
     74 	if (strings::contains(in, "\\")
     75 			|| strings::contains(in, "\r")
     76 			|| strings::contains(in, "\n")) {
     77 		return errors::invalid;
     78 	};
     79 	return in;
     80 };
     81 
     82 fn typevalid(in: str) (void | errors::invalid) = {
     83 	const miter = strings::iter(in);
     84 	for (let rn => strings::next(&miter)) {
     85 		if (!ascii::valid(rn) || rn == ' '
     86 				|| ascii::iscntrl(rn)
     87 				|| strings::contains(tspecial, rn)) {
     88 			return errors::invalid;
     89 		};
     90 	};
     91 };
     92 
     93 @test fn parse() void = {
     94 	const res = parse("text/plain")!;
     95 	assert(res.0 == "text/plain");
     96 
     97 	const res = parse("image/png")!;
     98 	assert(res.0 == "image/png");
     99 
    100 	const res = parse("application/svg+xml; charset=utf-8; foo=\"bar baz\"")!;
    101 	assert(res.0 == "application/svg+xml");
    102 	const params = res.1;
    103 	const param = next_param(&params)! as (str, str);
    104 	assert(param.0 == "charset" && param.1 == "utf-8");
    105 	const param = next_param(&params)! as (str, str);
    106 	assert(param.0 == "foo" && param.1 == "bar baz");
    107 	assert(next_param(&params) is void);
    108 
    109 	assert(parse("hi") is errors::invalid);
    110 	assert(parse("text/ spaces ") is errors::invalid);
    111 	assert(parse("text/@") is errors::invalid);
    112 
    113 	const res = parse("text/plain;charset")!;
    114 	assert(res.0 == "text/plain");
    115 	assert(next_param(&res.1) is errors::invalid);
    116 };