hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

fnmatch.ha (9780B)


      1 // License: MPL-2.0
      2 // (c) 2021 Alexey Yerin <yyp@disroot.org>
      3 // (c) 2021 Bor Grošelj Simić <bor.groseljsimic@telemach.net>
      4 // (c) 2021 Drew DeVault <sir@cmpwn.com>
      5 // (c) 2021 Eyal Sawady <ecs@d2evs.net>
      6 // (c) 2021 Miccah Castorina <contact@miccah.io>
      7 // (c) 2021 Thomas Bracht Laumann Jespersen <t@laumann.xyz>
      8 use ascii;
      9 use errors;
     10 use strings;
     11 use sort;
     12 
     13 // A set of flags that alter the matching behavior of [[fnmatch]]
     14 export type flags = enum uint {
     15 	NONE = 0,
     16 
     17 	// If this flag is set, slashes in the string will only be matched by
     18 	// literal slashes in the pattern
     19 	PATHNAME = 1u << 0,
     20 	// If this flag is set, backslash will be treated as an ordinary
     21 	// character
     22 	NOESCAPE = 1u << 1,
     23 	// If this flag is set, a '.' at the beginning of the string can only
     24 	// be matched by a literal '.' in the pattern. If [[flags::PATHNAME]] is
     25 	// set simultaneously, this behavior also apply to any periods
     26 	// immediately following a slash.
     27 	PERIOD = 1u << 2,
     28 };
     29 
     30 type bracket = void;
     31 type star = void;
     32 type question = void;
     33 type end = void;
     34 type token = (rune | bracket | star | question | end);
     35 
     36 // Check whether the 'string' matches the 'pattern', which is a shell wildcard
     37 // pattern with the following matching rules:
     38 //
     39 // - '?' matches any single character
     40 // - '*' matches any string, including the empty string
     41 // - '[' and ']' enclose a bracket expression. Matching rules for bracket
     42 //   expressions are identical to those of bracket subexpressions in regular
     43 //   expressions, except that '!' takes the role of '^' when placed right after
     44 //   the opening '['.
     45 // - '\' escapes the following character, e. g. "\*" only matches literal '*'
     46 //   and has no special meaning
     47 // - all other characters only match themselves
     48 //
     49 // A set of flags that alter the matching behavior may be passed to
     50 // [[fnmatch]]. For an explanation of their meaning, see [[flags]].
     51 export fn fnmatch(pattern: str, string: str, flag: flags...) bool = {
     52 	let fl: flags = 0;
     53 	for (let i = 0z; i < len(flag); i += 1) {
     54 		fl |= flag[i];
     55 	};
     56 	if (fl & flags::PATHNAME != 0) {
     57 		match (fnmatch_pathname(pattern, string, fl)) {
     58 		case let b: bool =>
     59 			return b;
     60 		case =>
     61 			return false;
     62 		};
     63 	} else {
     64 		match (fnmatch_internal(pattern, string, fl)) {
     65 		case let b: bool =>
     66 			return b;
     67 		case =>
     68 			return false;
     69 		};
     70 	};
     71 };
     72 
     73 // Split the pattern and the string on every '/' and process each part
     74 // separately
     75 fn fnmatch_pathname(
     76 	pattern: str,
     77 	string: str,
     78 	fl: flags,
     79 ) (bool | errors::unsupported | errors::invalid) = {
     80 	let tok = strings::tokenize(string, "/");
     81 	let p_iter = strings::iter(pattern);
     82 	let start = p_iter;
     83 	for (true) :outer {
     84 		start = p_iter;
     85 		for (true) match (pat_next(&p_iter, fl)?) {
     86 		case end =>
     87 			break :outer;
     88 		case let r: rune =>
     89 			if (r == '/') break;
     90 		case bracket =>
     91 			match_bracket(&p_iter, '\0')?;
     92 		case (question | star) => void;
     93 		};
     94 		let s = match (strings::next_token(&tok)) {
     95 		case void =>
     96 			return false;
     97 		case let s: str =>
     98 			yield s;
     99 		};
    100 		strings::prev(&p_iter);
    101 		let p = cut_tail(strings::iterstr(&start), &p_iter);
    102 		strings::next(&p_iter);
    103 		if (!fnmatch_internal(p, s, fl)?) {
    104 			return false;
    105 		};
    106 	};
    107 	let s = match(strings::next_token(&tok)) {
    108 	case void =>
    109 		return false;
    110 	case let s: str =>
    111 		yield s;
    112 	};
    113 	let p = strings::iterstr(&start);
    114 	return fnmatch_internal(p, s, fl)? && strings::next_token(&tok) is void;
    115 };
    116 
    117 // Core fnmatch function, implementing the "Sea of stars" algorithm that is also
    118 // used in Musl libc. First we make sure the parts before the first star and
    119 // after the last star produce exact matches and then proceed to greedily match
    120 // everything in between. Because of the greedy property this algorithm does not
    121 // have exponential corner cases.
    122 fn fnmatch_internal(
    123 	pattern: str,
    124 	string: str,
    125 	fl: flags,
    126 ) (bool | errors::invalid | errors::unsupported) = {
    127 	if (fl & flags::PERIOD != 0) {
    128 		if (strings::hasprefix(string, ".")
    129 				&& !strings::hasprefix(pattern, ".")) {
    130 			return false;
    131 		};
    132 	};
    133 
    134 	let p = strings::iter(pattern);
    135 	let s = strings::iter(string);
    136 
    137 	// match up to the first *
    138 	for (true) {
    139 		let copy = s;
    140 		let rn = strings::next(&copy);
    141 		let t = match (pat_next(&p, fl)?) {
    142 		case star =>
    143 			break;
    144 		case end =>
    145 			return rn is void;
    146 		case question =>
    147 			yield rn is rune;
    148 		case bracket =>
    149 			yield rn is rune && match_bracket(&p, rn: rune)?;
    150 		case let r: rune =>
    151 			yield rn is rune && rn: rune == r;
    152 		};
    153 		if (!t) {
    154 			return false;
    155 		};
    156 		s = copy;
    157 	};
    158 
    159 	// find the tail of the pattern
    160 	let p_copy = p, p_last = (p, 0z);
    161 	let cnt = 0z;
    162 	for (true; cnt += 1) {
    163 		match (pat_next(&p, fl)?) {
    164 		case end =>
    165 			break;
    166 		case star =>
    167 			p_last = (p, cnt + 1);
    168 		case bracket =>
    169 			match_bracket(&p, '\0')?;
    170 		case (question | rune) => void;
    171 		};
    172 	};
    173 	p = p_last.0;
    174 	cnt = cnt - p_last.1;
    175 	let s_copy = s;
    176 	s = strings::riter(string);
    177 	for (let i = 0z; i < cnt; i += 1) {
    178 		strings::next(&s);
    179 	};
    180 
    181 	// match the tail
    182 	let s_last = s;
    183 	for (true) {
    184 		let rn = strings::prev(&s);
    185 		let matches = match (pat_next(&p, fl)?) {
    186 		case end =>
    187 			if (rn is void) {
    188 				break;
    189 			} else {
    190 				return false;
    191 			};
    192 		case question =>
    193 			yield rn is rune;
    194 		case bracket =>
    195 			yield rn is rune && match_bracket(&p, rn: rune)?;
    196 		case let r: rune =>
    197 			yield rn is rune && rn: rune == r;
    198 		case star =>
    199 			abort();
    200 		};
    201 		if (!matches) {
    202 			return false;
    203 		};
    204 	};
    205 
    206 	// match the "sea of stars" in the middle
    207 	s_copy = strings::iter(cut_tail(strings::iterstr(&s_copy), &s_last));
    208 	p_copy = strings::iter(cut_tail(strings::iterstr(&p_copy), &p_last.0));
    209 	for (true) :outer {
    210 		p = p_copy;
    211 		if (len(strings::iterstr(&p)) == 0) {
    212 			return true;
    213 		};
    214 		s = s_copy;
    215 		for (true) :inner {
    216 			let copy = s;
    217 			let rn = strings::next(&copy);
    218 			let matched = match (pat_next(&p, fl)?) {
    219 			case end =>
    220 				abort();
    221 			case question =>
    222 				yield rn is rune;
    223 			case bracket =>
    224 				yield rn is rune && match_bracket(&p, rn: rune)?;
    225 			case let r: rune =>
    226 				yield rn is rune && r == rn: rune;
    227 			case star =>
    228 				p_copy = p;
    229 				s_copy = s;
    230 				continue :outer;
    231 			};
    232 			if (!matched) {
    233 				break :inner;
    234 			};
    235 			s = copy;
    236 		};
    237 		match (strings::next(&s_copy)) {
    238 		case void =>
    239 			return false;
    240 		case rune => void;
    241 		};
    242 	};
    243 	abort();
    244 };
    245 
    246 fn match_bracket(
    247 	it: *strings::iterator,
    248 	c: rune,
    249 ) (bool | errors::invalid | errors::unsupported) = {
    250 	let old = *it;
    251 	let first = advance_or_err(it)?;
    252 	let inv = false;
    253 	if (first == '^') {
    254 		return errors::invalid;
    255 	};
    256 	if (first == '!') {
    257 		inv = true;
    258 		first = advance_or_err(it)?;
    259 	};
    260 	let found = (first != '[' && first == c);
    261 	let last: (rune | void) = first;
    262 	if (first == ']') {
    263 		first = advance_or_err(it)?;
    264 	};
    265 	for (let r = first; true; r = advance_or_err(it)?) {
    266 		switch (r) {
    267 		case ']' =>
    268 			break;
    269 		case '-' =>
    270 			let end = advance_or_err(it)?;
    271 			if (end == ']') {
    272 				// '-' at the end matches itself
    273 				strings::push(it, ']');
    274 				last = '-';
    275 				found ||= (c == '-');
    276 				continue;
    277 			};
    278 			if (last is void) {
    279 				return errors::invalid;
    280 			};
    281 			let l = last: rune;
    282 			found ||= (l: u32 <= c: u32 && c: u32 <= end: u32);
    283 			last = void; // forbid 'a-f-n'
    284 		case '[' =>
    285 			let next_rune = advance_or_err(it)?;
    286 			switch (next_rune) { // TODO localization
    287 			case '=', '.' =>
    288 				return errors::unsupported;
    289 			case ':' =>
    290 				let t = match_ctype(it, c)?;
    291 				found ||= t;
    292 			case =>
    293 				strings::push(it, next_rune);
    294 				found ||= (c == '[');
    295 			};
    296 			last = '[';
    297 		case =>
    298 			found ||= (c == r);
    299 			last = r;
    300 		};
    301 	};
    302 
    303 	let cnt = len(strings::iterstr(&old)) - len(strings::iterstr(it));
    304 	if (last is rune && first == last: rune && cnt >= 4) {
    305 		switch (first) {
    306 		case '=', '.', ':' =>
    307 			return errors::invalid;
    308 		case => void;
    309 		};
    310 	};
    311 	return found ^^ inv;
    312 };
    313 
    314 fn match_ctype(it: *strings::iterator, c: rune) (bool | errors::invalid) = {
    315 	let s = strings::iterstr(it);
    316 	let i = 0z;
    317 	for (let r = '\0'; r != ':'; i += 1) {
    318 		r = advance_or_err(it)?;
    319 		if (!ascii::valid(r)) {
    320 			return errors::invalid;
    321 		};
    322 	};
    323 	if (advance_or_err(it)? != ']') {
    324 		return errors::invalid;
    325 	};
    326 	let name = strings::sub(s, 0, i - 1);
    327 	match (ctype_name_to_func(name)) {
    328 	case null =>
    329 		return errors::invalid;
    330 	case let f: *fn(c: rune) bool =>
    331 		return f(c);
    332 	};
    333 };
    334 
    335 type funcmap = (str, *fn(c: rune) bool);
    336 
    337 fn cmp(a: const *void, b: const *void) int =
    338 	strings::compare((a: *funcmap).0, *(b: const *str));
    339 
    340 fn ctype_name_to_func(name: str) nullable *fn(c: rune) bool = {
    341 	const map: [_]funcmap = [
    342 		("alnum", &ascii::isalnum), ("alpha", &ascii::isalpha),
    343 		("blank", &ascii::isblank), ("cntrl", &ascii::iscntrl),
    344 		("digit", &ascii::isdigit), ("graph", &ascii::isgraph),
    345 		("lower", &ascii::islower), ("print", &ascii::isprint),
    346 		("punct", &ascii::ispunct), ("space", &ascii::isspace),
    347 		("upper", &ascii::isupper), ("xdigit",&ascii::isxdigit),
    348 	];
    349 	match (sort::search(map, size(funcmap), &name, &cmp)) {
    350 	case void =>
    351 		return null: nullable *fn(c: rune) bool;
    352 	case let ind: size =>
    353 		return map[ind].1;
    354 	};
    355 };
    356 
    357 fn pat_next(pat: *strings::iterator, fl: flags) (token | errors::invalid) = {
    358 	let r = match (strings::next(pat)) {
    359 	case void =>
    360 		return end;
    361 	case let r: rune =>
    362 		yield r;
    363 	};
    364 	switch (r) {
    365 	case '*' =>
    366 		return star;
    367 	case '?' =>
    368 		return question;
    369 	case '[' =>
    370 		return bracket;
    371 	case '\\' =>
    372 		// TODO: remove ? (harec bug workaround)
    373 		return if (fl & flags::NOESCAPE == 0) advance_or_err(pat)?
    374 			else '\\';
    375 	case =>
    376 		return r;
    377 	};
    378 };
    379 
    380 fn advance_or_err(it: *strings::iterator) (rune | errors::invalid) = {
    381 	match (strings::next(it)) {
    382 	case let r: rune =>
    383 		return r;
    384 	case void =>
    385 		return errors::invalid;
    386 	};
    387 };
    388 
    389 fn cut_tail(s: str, it: *strings::iterator) str = {
    390 	let s_len = len(s), t_len = len(strings::iterstr(it));
    391 	let b = strings::toutf8(s);
    392 	return strings::fromutf8(b[..s_len - t_len]);
    393 };
    394