fnmatch.ha (9766B)
1 // License: MPL-2.0 2 // (c) 2021 Alexey Yerin <yyp@disroot.org> 3 // (c) 2021 Bor Grošelj Simić <bor.groseljsimic@telemach.net> 4 // (c) 2021 Drew DeVault <sir@cmpwn.com> 5 // (c) 2021 Ember Sawady <ecs@d2evs.net> 6 // (c) 2021 Miccah Castorina <contact@miccah.io> 7 // (c) 2021 Thomas Bracht Laumann Jespersen <t@laumann.xyz> 8 use ascii; 9 use errors; 10 use strings; 11 use sort; 12 13 // A set of flags that alter the matching behavior of [[fnmatch]] 14 export type flags = enum uint { 15 NONE = 0, 16 17 // If this flag is set, slashes in the string will only be matched by 18 // literal slashes in the pattern 19 PATHNAME = 1u << 0, 20 // If this flag is set, backslash will be treated as an ordinary 21 // character 22 NOESCAPE = 1u << 1, 23 // If this flag is set, a '.' at the beginning of the string can only 24 // be matched by a literal '.' in the pattern. If [[flags::PATHNAME]] is 25 // set simultaneously, this behavior also apply to any periods 26 // immediately following a slash. 27 PERIOD = 1u << 2, 28 }; 29 30 type bracket = void; 31 type star = void; 32 type question = void; 33 type end = void; 34 type token = (rune | bracket | star | question | end); 35 36 // Check whether the 'string' matches the 'pattern', which is a shell wildcard 37 // pattern with the following matching rules: 38 // 39 // - '?' matches any single character 40 // - '*' matches any string, including the empty string 41 // - '[' and ']' enclose a bracket expression. Matching rules for bracket 42 // expressions are identical to those of bracket subexpressions in regular 43 // expressions, except that '!' takes the role of '^' when placed right after 44 // the opening '['. 45 // - '\' escapes the following character, e. g. "\*" only matches literal '*' 46 // and has no special meaning 47 // - all other characters only match themselves 48 // 49 // A set of flags that alter the matching behavior may be passed to 50 // [[fnmatch]]. For an explanation of their meaning, see [[flags]]. 51 export fn fnmatch(pattern: str, string: str, flag: flags...) bool = { 52 let fl: flags = 0; 53 for (let i = 0z; i < len(flag); i += 1) { 54 fl |= flag[i]; 55 }; 56 if (fl & flags::PATHNAME != 0) { 57 match (fnmatch_pathname(pattern, string, fl)) { 58 case let b: bool => 59 return b; 60 case => 61 return false; 62 }; 63 } else { 64 match (fnmatch_internal(pattern, string, fl)) { 65 case let b: bool => 66 return b; 67 case => 68 return false; 69 }; 70 }; 71 }; 72 73 // Split the pattern and the string on every '/' and process each part 74 // separately 75 fn fnmatch_pathname( 76 pattern: str, 77 string: str, 78 fl: flags, 79 ) (bool | errors::unsupported | errors::invalid) = { 80 let tok = strings::tokenize(string, "/"); 81 let p_iter = strings::iter(pattern); 82 let start = p_iter; 83 for (true) :outer { 84 start = p_iter; 85 for (true) match (pat_next(&p_iter, fl)?) { 86 case end => 87 break :outer; 88 case let r: rune => 89 if (r == '/') break; 90 case bracket => 91 match_bracket(&p_iter, '\0')?; 92 case (question | star) => void; 93 }; 94 let s = match (strings::next_token(&tok)) { 95 case void => 96 return false; 97 case let s: str => 98 yield s; 99 }; 100 strings::prev(&p_iter); 101 let p = cut_tail(strings::iterstr(&start), &p_iter); 102 strings::next(&p_iter); 103 if (!fnmatch_internal(p, s, fl)?) { 104 return false; 105 }; 106 }; 107 let s = match(strings::next_token(&tok)) { 108 case void => 109 return false; 110 case let s: str => 111 yield s; 112 }; 113 let p = strings::iterstr(&start); 114 return fnmatch_internal(p, s, fl)? && strings::next_token(&tok) is void; 115 }; 116 117 // Core fnmatch function, implementing the "Sea of stars" algorithm that is also 118 // used in Musl libc. First we make sure the parts before the first star and 119 // after the last star produce exact matches and then proceed to greedily match 120 // everything in between. Because of the greedy property this algorithm does not 121 // have exponential corner cases. 122 fn fnmatch_internal( 123 pattern: str, 124 string: str, 125 fl: flags, 126 ) (bool | errors::invalid | errors::unsupported) = { 127 if (fl & flags::PERIOD != 0) { 128 if (strings::hasprefix(string, ".") 129 && !strings::hasprefix(pattern, ".")) { 130 return false; 131 }; 132 }; 133 134 let p = strings::iter(pattern); 135 let s = strings::iter(string); 136 137 // match up to the first * 138 for (true) { 139 let copy = s; 140 let rn = strings::next(©); 141 let t = match (pat_next(&p, fl)?) { 142 case star => 143 break; 144 case end => 145 return rn is void; 146 case question => 147 yield rn is rune; 148 case bracket => 149 yield rn is rune && match_bracket(&p, rn: rune)?; 150 case let r: rune => 151 yield rn is rune && rn: rune == r; 152 }; 153 if (!t) { 154 return false; 155 }; 156 s = copy; 157 }; 158 159 // find the tail of the pattern 160 let p_copy = p, p_last = (p, 0z); 161 let cnt = 0z; 162 for (true; cnt += 1) { 163 match (pat_next(&p, fl)?) { 164 case end => 165 break; 166 case star => 167 p_last = (p, cnt + 1); 168 case bracket => 169 match_bracket(&p, '\0')?; 170 case (question | rune) => void; 171 }; 172 }; 173 p = p_last.0; 174 cnt = cnt - p_last.1; 175 let s_copy = s; 176 s = strings::riter(string); 177 for (let i = 0z; i < cnt; i += 1) { 178 strings::next(&s); 179 }; 180 181 // match the tail 182 let s_last = s; 183 for (true) { 184 let rn = strings::prev(&s); 185 let matches = match (pat_next(&p, fl)?) { 186 case end => 187 if (rn is void) { 188 break; 189 } else { 190 return false; 191 }; 192 case question => 193 yield rn is rune; 194 case bracket => 195 yield rn is rune && match_bracket(&p, rn: rune)?; 196 case let r: rune => 197 yield rn is rune && rn: rune == r; 198 case star => 199 abort(); 200 }; 201 if (!matches) { 202 return false; 203 }; 204 }; 205 206 // match the "sea of stars" in the middle 207 s_copy = strings::iter(cut_tail(strings::iterstr(&s_copy), &s_last)); 208 p_copy = strings::iter(cut_tail(strings::iterstr(&p_copy), &p_last.0)); 209 for (true) :outer { 210 p = p_copy; 211 if (len(strings::iterstr(&p)) == 0) { 212 return true; 213 }; 214 s = s_copy; 215 for (true) :inner { 216 let copy = s; 217 let rn = strings::next(©); 218 let matched = match (pat_next(&p, fl)?) { 219 case end => 220 abort(); 221 case question => 222 yield rn is rune; 223 case bracket => 224 yield rn is rune && match_bracket(&p, rn: rune)?; 225 case let r: rune => 226 yield rn is rune && r == rn: rune; 227 case star => 228 p_copy = p; 229 s_copy = s; 230 continue :outer; 231 }; 232 if (!matched) { 233 break :inner; 234 }; 235 s = copy; 236 }; 237 match (strings::next(&s_copy)) { 238 case void => 239 return false; 240 case rune => void; 241 }; 242 }; 243 abort(); 244 }; 245 246 fn match_bracket( 247 it: *strings::iterator, 248 c: rune, 249 ) (bool | errors::invalid | errors::unsupported) = { 250 let old = *it; 251 let first = advance_or_err(it)?; 252 let inv = false; 253 if (first == '^') { 254 return errors::invalid; 255 }; 256 if (first == '!') { 257 inv = true; 258 first = advance_or_err(it)?; 259 }; 260 let found = (first != '[' && first == c); 261 let last: (rune | void) = first; 262 if (first == ']') { 263 first = advance_or_err(it)?; 264 }; 265 for (let r = first; true; r = advance_or_err(it)?) { 266 switch (r) { 267 case ']' => 268 break; 269 case '-' => 270 let end = advance_or_err(it)?; 271 if (end == ']') { 272 // '-' at the end matches itself 273 strings::prev(it); 274 last = '-'; 275 found ||= (c == '-'); 276 continue; 277 }; 278 if (last is void) { 279 return errors::invalid; 280 }; 281 let l = last: rune; 282 found ||= (l: u32 <= c: u32 && c: u32 <= end: u32); 283 last = void; // forbid 'a-f-n' 284 case '[' => 285 let next_rune = advance_or_err(it)?; 286 switch (next_rune) { // TODO localization 287 case '=', '.' => 288 return errors::unsupported; 289 case ':' => 290 let t = match_ctype(it, c)?; 291 found ||= t; 292 case => 293 strings::prev(it); 294 found ||= (c == '['); 295 }; 296 last = '['; 297 case => 298 found ||= (c == r); 299 last = r; 300 }; 301 }; 302 303 let cnt = len(strings::iterstr(&old)) - len(strings::iterstr(it)); 304 if (last is rune && first == last: rune && cnt >= 4) { 305 switch (first) { 306 case '=', '.', ':' => 307 return errors::invalid; 308 case => void; 309 }; 310 }; 311 return found ^^ inv; 312 }; 313 314 fn match_ctype(it: *strings::iterator, c: rune) (bool | errors::invalid) = { 315 let s = strings::iterstr(it); 316 let i = 0z; 317 for (let r = '\0'; r != ':'; i += 1) { 318 r = advance_or_err(it)?; 319 if (!ascii::valid(r)) { 320 return errors::invalid; 321 }; 322 }; 323 if (advance_or_err(it)? != ']') { 324 return errors::invalid; 325 }; 326 let name = strings::sub(s, 0, i - 1); 327 match (ctype_name_to_func(name)) { 328 case null => 329 return errors::invalid; 330 case let f: *fn(c: rune) bool => 331 return f(c); 332 }; 333 }; 334 335 type funcmap = (str, *fn(c: rune) bool); 336 337 fn cmp(a: const *void, b: const *void) int = 338 strings::compare((a: *funcmap).0, *(b: const *str)); 339 340 fn ctype_name_to_func(name: str) nullable *fn(c: rune) bool = { 341 const map: [_]funcmap = [ 342 ("alnum", &ascii::isalnum), ("alpha", &ascii::isalpha), 343 ("blank", &ascii::isblank), ("cntrl", &ascii::iscntrl), 344 ("digit", &ascii::isdigit), ("graph", &ascii::isgraph), 345 ("lower", &ascii::islower), ("print", &ascii::isprint), 346 ("punct", &ascii::ispunct), ("space", &ascii::isspace), 347 ("upper", &ascii::isupper), ("xdigit",&ascii::isxdigit), 348 ]; 349 match (sort::search(map, size(funcmap), &name, &cmp)) { 350 case void => 351 return null: nullable *fn(c: rune) bool; 352 case let ind: size => 353 return map[ind].1; 354 }; 355 }; 356 357 fn pat_next(pat: *strings::iterator, fl: flags) (token | errors::invalid) = { 358 let r = match (strings::next(pat)) { 359 case void => 360 return end; 361 case let r: rune => 362 yield r; 363 }; 364 switch (r) { 365 case '*' => 366 return star; 367 case '?' => 368 return question; 369 case '[' => 370 return bracket; 371 case '\\' => 372 // TODO: remove ? (harec bug workaround) 373 return if (fl & flags::NOESCAPE == 0) advance_or_err(pat)? 374 else '\\'; 375 case => 376 return r; 377 }; 378 }; 379 380 fn advance_or_err(it: *strings::iterator) (rune | errors::invalid) = { 381 match (strings::next(it)) { 382 case let r: rune => 383 return r; 384 case void => 385 return errors::invalid; 386 }; 387 }; 388 389 fn cut_tail(s: str, it: *strings::iterator) str = { 390 let s_len = len(s), t_len = len(strings::iterstr(it)); 391 let b = strings::toutf8(s); 392 return strings::fromutf8(b[..s_len - t_len])!; 393 }; 394