fnmatch.ha (8732B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use ascii; 5 use errors; 6 use strings; 7 8 // A set of flags that alter the matching behavior of [[fnmatch]] 9 export type flag = enum uint { 10 NONE = 0, 11 12 // If this flag is set, slashes in the string will only be matched by 13 // literal slashes in the pattern 14 PATHNAME = 1u << 0, 15 // If this flag is set, backslash will be treated as an ordinary 16 // character 17 NOESCAPE = 1u << 1, 18 // If this flag is set, a '.' at the beginning of the string can only 19 // be matched by a literal '.' in the pattern. If [[flag::PATHNAME]] is 20 // set simultaneously, this behavior also apply to any periods 21 // immediately following a slash. 22 PERIOD = 1u << 2, 23 }; 24 25 type bracket = void; 26 type star = void; 27 type question = void; 28 type end = void; 29 type token = (rune | bracket | star | question | end); 30 31 // Check whether the 'string' matches the 'pattern', which is a shell wildcard 32 // pattern with the following matching rules: 33 // 34 // - '?' matches any single character 35 // - '*' matches any string, including the empty string 36 // - '[' and ']' enclose a bracket expression. Matching rules for bracket 37 // expressions are identical to those of bracket subexpressions in regular 38 // expressions, except that '!' takes the role of '^' when placed right after 39 // the opening '['. 40 // - '\' escapes the following character, e. g. "\*" only matches literal '*' 41 // and has no special meaning 42 // - all other characters only match themselves 43 // 44 // A set of flags that alter the matching behavior may be passed to 45 // [[fnmatch]]. For an explanation of their meaning, see [[flag]]. 46 export fn fnmatch(pattern: str, string: str, flags: flag = flag::NONE) bool = { 47 let b = if (flags & flag::PATHNAME != 0) { 48 yield fnmatch_pathname(pattern, string, flags); 49 } else { 50 yield fnmatch_internal(pattern, string, flags); 51 }; 52 return b is bool && b: bool; 53 }; 54 55 // Split the pattern and the string on every '/' and process each part 56 // separately 57 fn fnmatch_pathname( 58 pattern: str, 59 string: str, 60 fl: flag, 61 ) (bool | errors::unsupported | errors::invalid) = { 62 let tok = strings::tokenize(string, "/"); 63 let p_iter = strings::iter(pattern); 64 let start = p_iter; 65 for :outer (true) { 66 start = p_iter; 67 for (true) match (pat_next(&p_iter, fl)?) { 68 case end => 69 break :outer; 70 case let r: rune => 71 if (r == '/') break; 72 case bracket => 73 match_bracket(&p_iter, '\0')?; 74 case (question | star) => void; 75 }; 76 let s = match (strings::next_token(&tok)) { 77 case done => 78 return false; 79 case let s: str => 80 yield s; 81 }; 82 strings::prev(&p_iter); 83 let p = strings::slice(&start, &p_iter); 84 strings::next(&p_iter); 85 if (!fnmatch_internal(p, s, fl)?) { 86 return false; 87 }; 88 }; 89 let s = match(strings::next_token(&tok)) { 90 case done => 91 return false; 92 case let s: str => 93 yield s; 94 }; 95 let p = strings::iterstr(&start); 96 return fnmatch_internal(p, s, fl)? && strings::next_token(&tok) is done; 97 }; 98 99 // Core fnmatch function, implementing the "Sea of stars" algorithm that is also 100 // used in Musl libc. First we make sure the parts before the first star and 101 // after the last star produce exact matches and then proceed to greedily match 102 // everything in between. Because of the greedy property this algorithm does not 103 // have exponential corner cases. 104 fn fnmatch_internal( 105 pattern: str, 106 string: str, 107 fl: flag, 108 ) (bool | errors::invalid | errors::unsupported) = { 109 if (fl & flag::PERIOD != 0) { 110 if (strings::hasprefix(string, ".") 111 && !strings::hasprefix(pattern, ".")) { 112 return false; 113 }; 114 }; 115 116 let p = strings::iter(pattern); 117 let s = strings::iter(string); 118 119 // match up to the first * 120 for (true) { 121 let copy = s; 122 let rn = strings::next(©); 123 let t = match (pat_next(&p, fl)?) { 124 case star => 125 break; 126 case end => 127 return rn is done; 128 case question => 129 yield rn is rune; 130 case bracket => 131 yield rn is rune && match_bracket(&p, rn: rune)?; 132 case let r: rune => 133 yield rn is rune && rn: rune == r; 134 }; 135 if (!t) { 136 return false; 137 }; 138 s = copy; 139 }; 140 141 // find the tail of the pattern 142 let p_copy = p, p_last = (p, 0z); 143 let cnt = 0z; 144 for (true; cnt += 1) { 145 match (pat_next(&p, fl)?) { 146 case end => 147 break; 148 case star => 149 p_last = (p, cnt + 1); 150 case bracket => 151 match_bracket(&p, '\0')?; 152 case (question | rune) => void; 153 }; 154 }; 155 p = p_last.0; 156 cnt = cnt - p_last.1; 157 let s_copy = s; 158 s = strings::riter(string); 159 for (let i = 0z; i < cnt; i += 1) { 160 strings::next(&s); 161 }; 162 163 // match the tail 164 let s_last = s; 165 for (true) { 166 let rn = strings::prev(&s); 167 let matches = match (pat_next(&p, fl)?) { 168 case end => 169 if (rn is done) { 170 break; 171 } else { 172 return false; 173 }; 174 case question => 175 yield rn is rune; 176 case bracket => 177 yield rn is rune && match_bracket(&p, rn: rune)?; 178 case let r: rune => 179 yield rn is rune && rn: rune == r; 180 case star => 181 abort(); 182 }; 183 if (!matches) { 184 return false; 185 }; 186 }; 187 188 // match the "sea of stars" in the middle 189 s_copy = strings::iter(strings::slice(&s_copy, &s_last)); 190 p_copy = strings::iter(strings::slice(&p_copy, &p_last.0)); 191 for :outer (true) { 192 p = p_copy; 193 if (len(strings::iterstr(&p)) == 0) { 194 return true; 195 }; 196 s = s_copy; 197 for (true) { 198 let copy = s; 199 let rn = strings::next(©); 200 let matched = match (pat_next(&p, fl)?) { 201 case end => 202 abort(); 203 case question => 204 yield rn is rune; 205 case bracket => 206 yield rn is rune && match_bracket(&p, rn: rune)?; 207 case let r: rune => 208 yield rn is rune && r == rn: rune; 209 case star => 210 p_copy = p; 211 s_copy = s; 212 continue :outer; 213 }; 214 if (!matched) { 215 break; 216 }; 217 s = copy; 218 }; 219 match (strings::next(&s_copy)) { 220 case done => 221 return false; 222 case rune => void; 223 }; 224 }; 225 }; 226 227 fn match_bracket( 228 it: *strings::iterator, 229 c: rune, 230 ) (bool | errors::invalid | errors::unsupported) = { 231 let old = *it; 232 let first = advance_or_err(it)?; 233 let inv = false; 234 if (first == '^') { 235 return errors::invalid; 236 }; 237 if (first == '!') { 238 inv = true; 239 first = advance_or_err(it)?; 240 }; 241 let found = (first != '[' && first == c); 242 let last: (rune | void) = first; 243 if (first == ']') { 244 first = advance_or_err(it)?; 245 }; 246 247 for (let r = first; true; r = advance_or_err(it)?) { 248 switch (r) { 249 case ']' => 250 break; 251 case '-' => 252 let end = advance_or_err(it)?; 253 if (end == ']') { 254 // '-' at the end matches itself 255 strings::prev(it); 256 last = '-'; 257 found ||= (c == '-'); 258 continue; 259 }; 260 match (last) { 261 case void => 262 return errors::invalid; 263 case let l: rune => 264 found ||= (l: u32 <= c: u32 && c: u32 <= end: u32); 265 last = void; // forbid 'a-f-n' 266 }; 267 case '[' => 268 let next_rune = advance_or_err(it)?; 269 switch (next_rune) { // TODO localization 270 case '=', '.' => 271 return errors::unsupported; 272 case ':' => 273 let t = match_ctype(it, c)?; 274 found ||= t; 275 case => 276 strings::prev(it); 277 found ||= (c == '['); 278 }; 279 last = '['; 280 case => 281 found ||= (c == r); 282 last = r; 283 }; 284 }; 285 286 let cnt = len(strings::iterstr(&old)) - len(strings::iterstr(it)); 287 if (last is rune && first == last: rune && cnt >= 4 288 && (first == '=' || first == '.' || first == ':')) { 289 return errors::invalid; 290 }; 291 return found ^^ inv; 292 }; 293 294 fn match_ctype(it: *strings::iterator, c: rune) (bool | errors::invalid) = { 295 let s = strings::iterstr(it); 296 let i = 0z; 297 for (let r = '\0'; r != ':'; i += 1) { 298 r = advance_or_err(it)?; 299 if (!ascii::valid(r)) { 300 return errors::invalid; 301 }; 302 }; 303 if (advance_or_err(it)? != ']') { 304 return errors::invalid; 305 }; 306 let name = strings::sub(s, 0, i - 1); 307 const map: [_](str, *fn(rune) bool) = [ 308 ("alnum", &ascii::isalnum), ("alpha", &ascii::isalpha), 309 ("blank", &ascii::isblank), ("cntrl", &ascii::iscntrl), 310 ("digit", &ascii::isdigit), ("graph", &ascii::isgraph), 311 ("lower", &ascii::islower), ("print", &ascii::isprint), 312 ("punct", &ascii::ispunct), ("space", &ascii::isspace), 313 ("upper", &ascii::isupper), ("xdigit",&ascii::isxdigit), 314 ]; 315 for (let i = 0z; i < len(map); i += 1) { 316 if (map[i].0 == name) { 317 return map[i].1(c); 318 }; 319 }; 320 return errors::invalid; 321 }; 322 323 fn pat_next(pat: *strings::iterator, fl: flag) (token | errors::invalid) = { 324 let r = match (strings::next(pat)) { 325 case done => 326 return end; 327 case let r: rune => 328 yield r; 329 }; 330 switch (r) { 331 case '*' => 332 return star; 333 case '?' => 334 return question; 335 case '[' => 336 return bracket; 337 case '\\' => 338 // TODO: remove ? (harec bug workaround) 339 return if (fl & flag::NOESCAPE == 0) advance_or_err(pat)? 340 else '\\'; 341 case => 342 return r; 343 }; 344 }; 345 346 fn advance_or_err(it: *strings::iterator) (rune | errors::invalid) = { 347 match (strings::next(it)) { 348 case let r: rune => 349 return r; 350 case done => 351 return errors::invalid; 352 }; 353 };