hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

fnmatch.ha (8544B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use ascii;
      5 use strings;
      6 
      7 // A set of flags that alter the matching behavior of [[fnmatch]]
      8 export type flag = enum uint {
      9 	NONE = 0,
     10 
     11 	// If this flag is set, slashes in the string will only be matched by
     12 	// literal slashes in the pattern
     13 	PATHNAME = 1u << 0,
     14 	// If this flag is set, backslash will be treated as an ordinary
     15 	// character
     16 	NOESCAPE = 1u << 1,
     17 	// If this flag is set, a '.' at the beginning of the string can only
     18 	// be matched by a literal '.' in the pattern. If [[flag::PATHNAME]] is
     19 	// set simultaneously, this behavior also apply to any periods
     20 	// immediately following a slash.
     21 	PERIOD = 1u << 2,
     22 };
     23 
     24 type bracket = void;
     25 type star = void;
     26 type question = void;
     27 type end = void;
     28 type token = (rune | bracket | star | question | end);
     29 
     30 type invalid = !void;
     31 
     32 // Check whether the 'string' matches the 'pattern', which is a shell wildcard
     33 // pattern with the following matching rules:
     34 //
     35 // - '?' matches any single character
     36 // - '*' matches any string, including the empty string
     37 // - '[' and ']' enclose a bracket expression. Matching rules for bracket
     38 //   expressions are identical to those of bracket subexpressions in regular
     39 //   expressions, except that '!' takes the role of '^' when placed right after
     40 //   the opening '['.
     41 // - '\' escapes the following character, e. g. "\*" only matches literal '*'
     42 //   and has no special meaning
     43 // - all other characters only match themselves
     44 //
     45 // A set of flags that alter the matching behavior may be passed to
     46 // [[fnmatch]]. For an explanation of their meaning, see [[flag]].
     47 export fn fnmatch(pattern: str, string: str, flags: flag = flag::NONE) bool = {
     48 	let b = if (flags & flag::PATHNAME != 0) {
     49 		yield fnmatch_pathname(pattern, string, flags);
     50 	} else {
     51 		yield fnmatch_internal(pattern, string, flags);
     52 	};
     53 	return b is bool && b: bool;
     54 };
     55 
     56 // Split the pattern and the string on every '/' and process each part
     57 // separately
     58 fn fnmatch_pathname(pattern: str, string: str, fl: flag) (bool | invalid) = {
     59 	let tok = strings::tokenize(string, "/");
     60 	let p_iter = strings::iter(pattern);
     61 	let start = p_iter;
     62 	for :outer (true) {
     63 		start = p_iter;
     64 		for (true) match (pat_next(&p_iter, fl)?) {
     65 		case end =>
     66 			break :outer;
     67 		case let r: rune =>
     68 			if (r == '/') break;
     69 		case bracket =>
     70 			match_bracket(&p_iter, '\0')?;
     71 		case (question | star) => void;
     72 		};
     73 		let s = match (strings::next_token(&tok)) {
     74 		case done =>
     75 			return false;
     76 		case let s: str =>
     77 			yield s;
     78 		};
     79 		strings::prev(&p_iter);
     80 		let p = strings::slice(&start, &p_iter);
     81 		strings::next(&p_iter);
     82 		if (!fnmatch_internal(p, s, fl)?) {
     83 			return false;
     84 		};
     85 	};
     86 	let s = match(strings::next_token(&tok)) {
     87 	case done =>
     88 		return false;
     89 	case let s: str =>
     90 		yield s;
     91 	};
     92 	let p = strings::iterstr(&start);
     93 	return fnmatch_internal(p, s, fl)? && strings::next_token(&tok) is done;
     94 };
     95 
     96 // Core fnmatch function, implementing the "Sea of stars" algorithm that is also
     97 // used in Musl libc. First we make sure the parts before the first star and
     98 // after the last star produce exact matches and then proceed to greedily match
     99 // everything in between. Because of the greedy property this algorithm does not
    100 // have exponential corner cases.
    101 fn fnmatch_internal(pattern: str, string: str, fl: flag) (bool | invalid) = {
    102 	if (fl & flag::PERIOD != 0) {
    103 		if (strings::hasprefix(string, ".")
    104 				&& !strings::hasprefix(pattern, ".")) {
    105 			return false;
    106 		};
    107 	};
    108 
    109 	let p = strings::iter(pattern);
    110 	let s = strings::iter(string);
    111 
    112 	// match up to the first *
    113 	for (true) {
    114 		let copy = s;
    115 		let rn = strings::next(&copy);
    116 		let t = match (pat_next(&p, fl)?) {
    117 		case star =>
    118 			break;
    119 		case end =>
    120 			return rn is done;
    121 		case question =>
    122 			yield rn is rune;
    123 		case bracket =>
    124 			yield rn is rune && match_bracket(&p, rn: rune)?;
    125 		case let r: rune =>
    126 			yield rn is rune && rn: rune == r;
    127 		};
    128 		if (!t) {
    129 			return false;
    130 		};
    131 		s = copy;
    132 	};
    133 
    134 	// find the tail of the pattern
    135 	let p_copy = p, p_last = (p, 0z);
    136 	let cnt = 0z;
    137 	for (true; cnt += 1) {
    138 		match (pat_next(&p, fl)?) {
    139 		case end =>
    140 			break;
    141 		case star =>
    142 			p_last = (p, cnt + 1);
    143 		case bracket =>
    144 			match_bracket(&p, '\0')?;
    145 		case (question | rune) => void;
    146 		};
    147 	};
    148 	p = p_last.0;
    149 	cnt = cnt - p_last.1;
    150 	let s_copy = s;
    151 	s = strings::riter(string);
    152 	for (let i = 0z; i < cnt; i += 1) {
    153 		strings::next(&s);
    154 	};
    155 
    156 	// match the tail
    157 	let s_last = s;
    158 	for (true) {
    159 		let rn = strings::prev(&s);
    160 		let matches = match (pat_next(&p, fl)?) {
    161 		case end =>
    162 			if (rn is done) {
    163 				break;
    164 			} else {
    165 				return false;
    166 			};
    167 		case question =>
    168 			yield rn is rune;
    169 		case bracket =>
    170 			yield rn is rune && match_bracket(&p, rn: rune)?;
    171 		case let r: rune =>
    172 			yield rn is rune && rn: rune == r;
    173 		case star =>
    174 			abort();
    175 		};
    176 		if (!matches) {
    177 			return false;
    178 		};
    179 	};
    180 
    181 	// match the "sea of stars" in the middle
    182 	s_copy = strings::iter(strings::slice(&s_copy, &s_last));
    183 	p_copy = strings::iter(strings::slice(&p_copy, &p_last.0));
    184 	for :outer (true) {
    185 		p = p_copy;
    186 		if (len(strings::iterstr(&p)) == 0) {
    187 			return true;
    188 		};
    189 		s = s_copy;
    190 		for (true) {
    191 			let copy = s;
    192 			let rn = strings::next(&copy);
    193 			let matched = match (pat_next(&p, fl)?) {
    194 			case end =>
    195 				abort();
    196 			case question =>
    197 				yield rn is rune;
    198 			case bracket =>
    199 				yield rn is rune && match_bracket(&p, rn: rune)?;
    200 			case let r: rune =>
    201 				yield rn is rune && r == rn: rune;
    202 			case star =>
    203 				p_copy = p;
    204 				s_copy = s;
    205 				continue :outer;
    206 			};
    207 			if (!matched) {
    208 				break;
    209 			};
    210 			s = copy;
    211 		};
    212 		match (strings::next(&s_copy)) {
    213 		case done =>
    214 			return false;
    215 		case rune => void;
    216 		};
    217 	};
    218 };
    219 
    220 fn match_bracket(it: *strings::iterator, c: rune) (bool | invalid) = {
    221 	let old = *it;
    222 	let first = advance_or_err(it)?;
    223 	let inv = false;
    224 	if (first == '^') {
    225 		return invalid;
    226 	};
    227 	if (first == '!') {
    228 		inv = true;
    229 		first = advance_or_err(it)?;
    230 	};
    231 	let found = (first != '[' && first == c);
    232 	let last: (rune | void) = first;
    233 	if (first == ']') {
    234 		first = advance_or_err(it)?;
    235 	};
    236 
    237 	for (let r = first; true; r = advance_or_err(it)?) {
    238 		switch (r) {
    239 		case ']' =>
    240 			break;
    241 		case '-' =>
    242 			let end = advance_or_err(it)?;
    243 			if (end == ']') {
    244 				// '-' at the end matches itself
    245 				strings::prev(it);
    246 				last = '-';
    247 				found ||= (c == '-');
    248 				continue;
    249 			};
    250 			match (last) {
    251 			case void =>
    252 				return invalid;
    253 			case let l: rune =>
    254 				found ||= (l: u32 <= c: u32 && c: u32 <= end: u32);
    255 				last = void; // forbid 'a-f-n'
    256 			};
    257 		case '[' =>
    258 			let next_rune = advance_or_err(it)?;
    259 			switch (next_rune) { // TODO localization
    260 			case '=', '.' =>
    261 				return invalid;
    262 			case ':' =>
    263 				let t = match_ctype(it, c)?;
    264 				found ||= t;
    265 			case =>
    266 				strings::prev(it);
    267 				found ||= (c == '[');
    268 			};
    269 			last = '[';
    270 		case =>
    271 			found ||= (c == r);
    272 			last = r;
    273 		};
    274 	};
    275 
    276 	let cnt = len(strings::iterstr(&old)) - len(strings::iterstr(it));
    277 	if (last is rune && first == last: rune && cnt >= 4
    278 			&& (first == '=' || first == '.' || first == ':')) {
    279 		return invalid;
    280 	};
    281 	return found ^^ inv;
    282 };
    283 
    284 fn match_ctype(it: *strings::iterator, c: rune) (bool | invalid) = {
    285 	let s = strings::iterstr(it);
    286 	let i = 0z;
    287 	for (let r = '\0'; r != ':'; i += 1) {
    288 		r = advance_or_err(it)?;
    289 		if (!ascii::valid(r)) {
    290 			return invalid;
    291 		};
    292 	};
    293 	if (advance_or_err(it)? != ']') {
    294 		return invalid;
    295 	};
    296 	let name = strings::sub(s, 0, i - 1);
    297 	const map: [_](str, *fn(rune) bool) = [
    298 		("alnum", &ascii::isalnum), ("alpha", &ascii::isalpha),
    299 		("blank", &ascii::isblank), ("cntrl", &ascii::iscntrl),
    300 		("digit", &ascii::isdigit), ("graph", &ascii::isgraph),
    301 		("lower", &ascii::islower), ("print", &ascii::isprint),
    302 		("punct", &ascii::ispunct), ("space", &ascii::isspace),
    303 		("upper", &ascii::isupper), ("xdigit",&ascii::isxdigit),
    304 	];
    305 	for (let i = 0z; i < len(map); i += 1) {
    306 		if (map[i].0 == name) {
    307 			return map[i].1(c);
    308 		};
    309 	};
    310 	return invalid;
    311 };
    312 
    313 fn pat_next(pat: *strings::iterator, fl: flag) (token | invalid) = {
    314 	let r = match (strings::next(pat)) {
    315 	case done =>
    316 		return end;
    317 	case let r: rune =>
    318 		yield r;
    319 	};
    320 	switch (r) {
    321 	case '*' =>
    322 		return star;
    323 	case '?' =>
    324 		return question;
    325 	case '[' =>
    326 		return bracket;
    327 	case '\\' =>
    328 		// TODO: remove ? (harec bug workaround)
    329 		return if (fl & flag::NOESCAPE == 0) advance_or_err(pat)?
    330 			else '\\';
    331 	case =>
    332 		return r;
    333 	};
    334 };
    335 
    336 fn advance_or_err(it: *strings::iterator) (rune | invalid) = {
    337 	match (strings::next(it)) {
    338 	case let r: rune =>
    339 		return r;
    340 	case done =>
    341 		return invalid;
    342 	};
    343 };