hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

parse.ha (3739B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use ascii;
      5 use strings;
      6 
      7 const tspecial: str = "()<>@,;:\\/[]?=";
      8 export type type_params = strings::tokenizer;
      9 
     10 // A syntax error.
     11 export type syntax = !void;
     12 
     13 // Converts an error into a human-friendly string.
     14 export fn strerror(err: syntax) str = "Can't parse Media Type";
     15 
     16 // Parses a Media Type, returning a tuple of the content type (e.g.
     17 // "text/plain") and a parameter parser object, or [[syntax]] if the input
     18 // cannot be parsed.
     19 //
     20 // To enumerate the Media Type parameter list, pass the type_params object into
     21 // [[next_param]]. If you do not need the parameter list, you can safely discard
     22 // the object. Note that any format errors following the ";" token will not
     23 // cause [[syntax]] to be returned unless [[next_param]] is used to enumerate
     24 // all of the parameters.
     25 export fn parse(in: str) ((str, type_params) | syntax) = {
     26 	const items = strings::cut(in, ";");
     27 	const mtype = items.0, params = items.1;
     28 	const items = strings::cut(mtype, "/");
     29 	if (len(items.0) < 1 || len(items.1) < 1) {
     30 		return syntax;
     31 	};
     32 	typevalid(items.0)?;
     33 	typevalid(items.1)?;
     34 	return (mtype, strings::tokenize(params, ";"));
     35 };
     36 
     37 // Returns the next parameter as a (key, value) tuple from a [[type_params]]
     38 // object that was prepared via [[parse]], done if there are no remaining
     39 // parameters, and [[syntax]] if a syntax error was encountered.
     40 export fn next_param(in: *type_params) ((str, str) | done | syntax) = {
     41 	const tok = match (strings::next_token(in: *strings::tokenizer)) {
     42 	case let s: str =>
     43 		if (s == "") {
     44 			// empty parameter
     45 			return syntax;
     46 		};
     47 		yield s;
     48 	case done =>
     49 		return done;
     50 	};
     51 
     52 	const items = strings::cut(tok, "=");
     53 	// The RFC does not permit whitespace here, but whitespace is very
     54 	// common in the wild. ¯\_(ツ)_/¯
     55 	items.0 = strings::trim(items.0);
     56 	items.1 = strings::trim(items.1);
     57 	if (len(items.0) == 0 || len(items.1) == 0) {
     58 		return syntax;
     59 	};
     60 
     61 	if (strings::hasprefix(items.1, "\"")) {
     62 		items.1 = quoted(items.1)?;
     63 	};
     64 
     65 	return (items.0, items.1);
     66 };
     67 
     68 fn quoted(in: str) (str | syntax) = {
     69 	// We have only a basic implementation of quoted-string. It has a couple
     70 	// of problems:
     71 	//
     72 	// 1. The RFC does not define it very well
     73 	// 2. The parts of the RFC which are ill-defined are rarely used
     74 	// 3. Implementing quoted-pair would require allocating a new string
     75 	//
     76 	// This implementation should handle most Media Types seen in practice
     77 	// unless they're doing something weird and ill-advised with them.
     78 	in = strings::trim(in, '"');
     79 	if (strings::contains(in, "\\")
     80 			|| strings::contains(in, "\r")
     81 			|| strings::contains(in, "\n")) {
     82 		return syntax;
     83 	};
     84 	return in;
     85 };
     86 
     87 fn typevalid(in: str) (void | syntax) = {
     88 	const miter = strings::iter(in);
     89 	for (let rn => strings::next(&miter)) {
     90 		if (!ascii::valid(rn) || rn == ' '
     91 				|| ascii::iscntrl(rn)
     92 				|| strings::contains(tspecial, rn)) {
     93 			return syntax;
     94 		};
     95 	};
     96 };
     97 
     98 @test fn parse() void = {
     99 	const res = parse("text/plain")!;
    100 	assert(res.0 == "text/plain");
    101 
    102 	const res = parse("image/png")!;
    103 	assert(res.0 == "image/png");
    104 
    105 	const res = parse("application/svg+xml; charset=utf-8; foo=\"bar baz\"")!;
    106 	assert(res.0 == "application/svg+xml");
    107 	const params = res.1;
    108 	const param = next_param(&params)! as (str, str);
    109 	assert(param.0 == "charset" && param.1 == "utf-8");
    110 	const param = next_param(&params)! as (str, str);
    111 	assert(param.0 == "foo" && param.1 == "bar baz");
    112 	assert(next_param(&params) is done);
    113 
    114 	assert(parse("hi") is syntax);
    115 	assert(parse("text/ spaces ") is syntax);
    116 	assert(parse("text/@") is syntax);
    117 
    118 	const res = parse("text/plain;charset")!;
    119 	assert(res.0 == "text/plain");
    120 	assert(next_param(&res.1) is syntax);
    121 };