hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

fnmatch.ha (8732B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use ascii;
      5 use errors;
      6 use strings;
      7 
      8 // A set of flags that alter the matching behavior of [[fnmatch]]
      9 export type flag = enum uint {
     10 	NONE = 0,
     11 
     12 	// If this flag is set, slashes in the string will only be matched by
     13 	// literal slashes in the pattern
     14 	PATHNAME = 1u << 0,
     15 	// If this flag is set, backslash will be treated as an ordinary
     16 	// character
     17 	NOESCAPE = 1u << 1,
     18 	// If this flag is set, a '.' at the beginning of the string can only
     19 	// be matched by a literal '.' in the pattern. If [[flag::PATHNAME]] is
     20 	// set simultaneously, this behavior also apply to any periods
     21 	// immediately following a slash.
     22 	PERIOD = 1u << 2,
     23 };
     24 
     25 type bracket = void;
     26 type star = void;
     27 type question = void;
     28 type end = void;
     29 type token = (rune | bracket | star | question | end);
     30 
     31 // Check whether the 'string' matches the 'pattern', which is a shell wildcard
     32 // pattern with the following matching rules:
     33 //
     34 // - '?' matches any single character
     35 // - '*' matches any string, including the empty string
     36 // - '[' and ']' enclose a bracket expression. Matching rules for bracket
     37 //   expressions are identical to those of bracket subexpressions in regular
     38 //   expressions, except that '!' takes the role of '^' when placed right after
     39 //   the opening '['.
     40 // - '\' escapes the following character, e. g. "\*" only matches literal '*'
     41 //   and has no special meaning
     42 // - all other characters only match themselves
     43 //
     44 // A set of flags that alter the matching behavior may be passed to
     45 // [[fnmatch]]. For an explanation of their meaning, see [[flag]].
     46 export fn fnmatch(pattern: str, string: str, flags: flag = flag::NONE) bool = {
     47 	let b = if (flags & flag::PATHNAME != 0) {
     48 		yield fnmatch_pathname(pattern, string, flags);
     49 	} else {
     50 		yield fnmatch_internal(pattern, string, flags);
     51 	};
     52 	return b is bool && b: bool;
     53 };
     54 
     55 // Split the pattern and the string on every '/' and process each part
     56 // separately
     57 fn fnmatch_pathname(
     58 	pattern: str,
     59 	string: str,
     60 	fl: flag,
     61 ) (bool | errors::unsupported | errors::invalid) = {
     62 	let tok = strings::tokenize(string, "/");
     63 	let p_iter = strings::iter(pattern);
     64 	let start = p_iter;
     65 	for :outer (true) {
     66 		start = p_iter;
     67 		for (true) match (pat_next(&p_iter, fl)?) {
     68 		case end =>
     69 			break :outer;
     70 		case let r: rune =>
     71 			if (r == '/') break;
     72 		case bracket =>
     73 			match_bracket(&p_iter, '\0')?;
     74 		case (question | star) => void;
     75 		};
     76 		let s = match (strings::next_token(&tok)) {
     77 		case done =>
     78 			return false;
     79 		case let s: str =>
     80 			yield s;
     81 		};
     82 		strings::prev(&p_iter);
     83 		let p = strings::slice(&start, &p_iter);
     84 		strings::next(&p_iter);
     85 		if (!fnmatch_internal(p, s, fl)?) {
     86 			return false;
     87 		};
     88 	};
     89 	let s = match(strings::next_token(&tok)) {
     90 	case done =>
     91 		return false;
     92 	case let s: str =>
     93 		yield s;
     94 	};
     95 	let p = strings::iterstr(&start);
     96 	return fnmatch_internal(p, s, fl)? && strings::next_token(&tok) is done;
     97 };
     98 
     99 // Core fnmatch function, implementing the "Sea of stars" algorithm that is also
    100 // used in Musl libc. First we make sure the parts before the first star and
    101 // after the last star produce exact matches and then proceed to greedily match
    102 // everything in between. Because of the greedy property this algorithm does not
    103 // have exponential corner cases.
    104 fn fnmatch_internal(
    105 	pattern: str,
    106 	string: str,
    107 	fl: flag,
    108 ) (bool | errors::invalid | errors::unsupported) = {
    109 	if (fl & flag::PERIOD != 0) {
    110 		if (strings::hasprefix(string, ".")
    111 				&& !strings::hasprefix(pattern, ".")) {
    112 			return false;
    113 		};
    114 	};
    115 
    116 	let p = strings::iter(pattern);
    117 	let s = strings::iter(string);
    118 
    119 	// match up to the first *
    120 	for (true) {
    121 		let copy = s;
    122 		let rn = strings::next(&copy);
    123 		let t = match (pat_next(&p, fl)?) {
    124 		case star =>
    125 			break;
    126 		case end =>
    127 			return rn is done;
    128 		case question =>
    129 			yield rn is rune;
    130 		case bracket =>
    131 			yield rn is rune && match_bracket(&p, rn: rune)?;
    132 		case let r: rune =>
    133 			yield rn is rune && rn: rune == r;
    134 		};
    135 		if (!t) {
    136 			return false;
    137 		};
    138 		s = copy;
    139 	};
    140 
    141 	// find the tail of the pattern
    142 	let p_copy = p, p_last = (p, 0z);
    143 	let cnt = 0z;
    144 	for (true; cnt += 1) {
    145 		match (pat_next(&p, fl)?) {
    146 		case end =>
    147 			break;
    148 		case star =>
    149 			p_last = (p, cnt + 1);
    150 		case bracket =>
    151 			match_bracket(&p, '\0')?;
    152 		case (question | rune) => void;
    153 		};
    154 	};
    155 	p = p_last.0;
    156 	cnt = cnt - p_last.1;
    157 	let s_copy = s;
    158 	s = strings::riter(string);
    159 	for (let i = 0z; i < cnt; i += 1) {
    160 		strings::next(&s);
    161 	};
    162 
    163 	// match the tail
    164 	let s_last = s;
    165 	for (true) {
    166 		let rn = strings::prev(&s);
    167 		let matches = match (pat_next(&p, fl)?) {
    168 		case end =>
    169 			if (rn is done) {
    170 				break;
    171 			} else {
    172 				return false;
    173 			};
    174 		case question =>
    175 			yield rn is rune;
    176 		case bracket =>
    177 			yield rn is rune && match_bracket(&p, rn: rune)?;
    178 		case let r: rune =>
    179 			yield rn is rune && rn: rune == r;
    180 		case star =>
    181 			abort();
    182 		};
    183 		if (!matches) {
    184 			return false;
    185 		};
    186 	};
    187 
    188 	// match the "sea of stars" in the middle
    189 	s_copy = strings::iter(strings::slice(&s_copy, &s_last));
    190 	p_copy = strings::iter(strings::slice(&p_copy, &p_last.0));
    191 	for :outer (true) {
    192 		p = p_copy;
    193 		if (len(strings::iterstr(&p)) == 0) {
    194 			return true;
    195 		};
    196 		s = s_copy;
    197 		for (true) {
    198 			let copy = s;
    199 			let rn = strings::next(&copy);
    200 			let matched = match (pat_next(&p, fl)?) {
    201 			case end =>
    202 				abort();
    203 			case question =>
    204 				yield rn is rune;
    205 			case bracket =>
    206 				yield rn is rune && match_bracket(&p, rn: rune)?;
    207 			case let r: rune =>
    208 				yield rn is rune && r == rn: rune;
    209 			case star =>
    210 				p_copy = p;
    211 				s_copy = s;
    212 				continue :outer;
    213 			};
    214 			if (!matched) {
    215 				break;
    216 			};
    217 			s = copy;
    218 		};
    219 		match (strings::next(&s_copy)) {
    220 		case done =>
    221 			return false;
    222 		case rune => void;
    223 		};
    224 	};
    225 };
    226 
    227 fn match_bracket(
    228 	it: *strings::iterator,
    229 	c: rune,
    230 ) (bool | errors::invalid | errors::unsupported) = {
    231 	let old = *it;
    232 	let first = advance_or_err(it)?;
    233 	let inv = false;
    234 	if (first == '^') {
    235 		return errors::invalid;
    236 	};
    237 	if (first == '!') {
    238 		inv = true;
    239 		first = advance_or_err(it)?;
    240 	};
    241 	let found = (first != '[' && first == c);
    242 	let last: (rune | void) = first;
    243 	if (first == ']') {
    244 		first = advance_or_err(it)?;
    245 	};
    246 
    247 	for (let r = first; true; r = advance_or_err(it)?) {
    248 		switch (r) {
    249 		case ']' =>
    250 			break;
    251 		case '-' =>
    252 			let end = advance_or_err(it)?;
    253 			if (end == ']') {
    254 				// '-' at the end matches itself
    255 				strings::prev(it);
    256 				last = '-';
    257 				found ||= (c == '-');
    258 				continue;
    259 			};
    260 			match (last) {
    261 			case void =>
    262 				return errors::invalid;
    263 			case let l: rune =>
    264 				found ||= (l: u32 <= c: u32 && c: u32 <= end: u32);
    265 				last = void; // forbid 'a-f-n'
    266 			};
    267 		case '[' =>
    268 			let next_rune = advance_or_err(it)?;
    269 			switch (next_rune) { // TODO localization
    270 			case '=', '.' =>
    271 				return errors::unsupported;
    272 			case ':' =>
    273 				let t = match_ctype(it, c)?;
    274 				found ||= t;
    275 			case =>
    276 				strings::prev(it);
    277 				found ||= (c == '[');
    278 			};
    279 			last = '[';
    280 		case =>
    281 			found ||= (c == r);
    282 			last = r;
    283 		};
    284 	};
    285 
    286 	let cnt = len(strings::iterstr(&old)) - len(strings::iterstr(it));
    287 	if (last is rune && first == last: rune && cnt >= 4
    288 			&& (first == '=' || first == '.' || first == ':')) {
    289 		return errors::invalid;
    290 	};
    291 	return found ^^ inv;
    292 };
    293 
    294 fn match_ctype(it: *strings::iterator, c: rune) (bool | errors::invalid) = {
    295 	let s = strings::iterstr(it);
    296 	let i = 0z;
    297 	for (let r = '\0'; r != ':'; i += 1) {
    298 		r = advance_or_err(it)?;
    299 		if (!ascii::valid(r)) {
    300 			return errors::invalid;
    301 		};
    302 	};
    303 	if (advance_or_err(it)? != ']') {
    304 		return errors::invalid;
    305 	};
    306 	let name = strings::sub(s, 0, i - 1);
    307 	const map: [_](str, *fn(rune) bool) = [
    308 		("alnum", &ascii::isalnum), ("alpha", &ascii::isalpha),
    309 		("blank", &ascii::isblank), ("cntrl", &ascii::iscntrl),
    310 		("digit", &ascii::isdigit), ("graph", &ascii::isgraph),
    311 		("lower", &ascii::islower), ("print", &ascii::isprint),
    312 		("punct", &ascii::ispunct), ("space", &ascii::isspace),
    313 		("upper", &ascii::isupper), ("xdigit",&ascii::isxdigit),
    314 	];
    315 	for (let i = 0z; i < len(map); i += 1) {
    316 		if (map[i].0 == name) {
    317 			return map[i].1(c);
    318 		};
    319 	};
    320 	return errors::invalid;
    321 };
    322 
    323 fn pat_next(pat: *strings::iterator, fl: flag) (token | errors::invalid) = {
    324 	let r = match (strings::next(pat)) {
    325 	case done =>
    326 		return end;
    327 	case let r: rune =>
    328 		yield r;
    329 	};
    330 	switch (r) {
    331 	case '*' =>
    332 		return star;
    333 	case '?' =>
    334 		return question;
    335 	case '[' =>
    336 		return bracket;
    337 	case '\\' =>
    338 		// TODO: remove ? (harec bug workaround)
    339 		return if (fl & flag::NOESCAPE == 0) advance_or_err(pat)?
    340 			else '\\';
    341 	case =>
    342 		return r;
    343 	};
    344 };
    345 
    346 fn advance_or_err(it: *strings::iterator) (rune | errors::invalid) = {
    347 	match (strings::next(it)) {
    348 	case let r: rune =>
    349 		return r;
    350 	case done =>
    351 		return errors::invalid;
    352 	};
    353 };