fnmatch.ha (8544B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use ascii; 5 use strings; 6 7 // A set of flags that alter the matching behavior of [[fnmatch]] 8 export type flag = enum uint { 9 NONE = 0, 10 11 // If this flag is set, slashes in the string will only be matched by 12 // literal slashes in the pattern 13 PATHNAME = 1u << 0, 14 // If this flag is set, backslash will be treated as an ordinary 15 // character 16 NOESCAPE = 1u << 1, 17 // If this flag is set, a '.' at the beginning of the string can only 18 // be matched by a literal '.' in the pattern. If [[flag::PATHNAME]] is 19 // set simultaneously, this behavior also apply to any periods 20 // immediately following a slash. 21 PERIOD = 1u << 2, 22 }; 23 24 type bracket = void; 25 type star = void; 26 type question = void; 27 type end = void; 28 type token = (rune | bracket | star | question | end); 29 30 type invalid = !void; 31 32 // Check whether the 'string' matches the 'pattern', which is a shell wildcard 33 // pattern with the following matching rules: 34 // 35 // - '?' matches any single character 36 // - '*' matches any string, including the empty string 37 // - '[' and ']' enclose a bracket expression. Matching rules for bracket 38 // expressions are identical to those of bracket subexpressions in regular 39 // expressions, except that '!' takes the role of '^' when placed right after 40 // the opening '['. 41 // - '\' escapes the following character, e. g. "\*" only matches literal '*' 42 // and has no special meaning 43 // - all other characters only match themselves 44 // 45 // A set of flags that alter the matching behavior may be passed to 46 // [[fnmatch]]. For an explanation of their meaning, see [[flag]]. 47 export fn fnmatch(pattern: str, string: str, flags: flag = flag::NONE) bool = { 48 let b = if (flags & flag::PATHNAME != 0) { 49 yield fnmatch_pathname(pattern, string, flags); 50 } else { 51 yield fnmatch_internal(pattern, string, flags); 52 }; 53 return b is bool && b: bool; 54 }; 55 56 // Split the pattern and the string on every '/' and process each part 57 // separately 58 fn fnmatch_pathname(pattern: str, string: str, fl: flag) (bool | invalid) = { 59 let tok = strings::tokenize(string, "/"); 60 let p_iter = strings::iter(pattern); 61 let start = p_iter; 62 for :outer (true) { 63 start = p_iter; 64 for (true) match (pat_next(&p_iter, fl)?) { 65 case end => 66 break :outer; 67 case let r: rune => 68 if (r == '/') break; 69 case bracket => 70 match_bracket(&p_iter, '\0')?; 71 case (question | star) => void; 72 }; 73 let s = match (strings::next_token(&tok)) { 74 case done => 75 return false; 76 case let s: str => 77 yield s; 78 }; 79 strings::prev(&p_iter); 80 let p = strings::slice(&start, &p_iter); 81 strings::next(&p_iter); 82 if (!fnmatch_internal(p, s, fl)?) { 83 return false; 84 }; 85 }; 86 let s = match(strings::next_token(&tok)) { 87 case done => 88 return false; 89 case let s: str => 90 yield s; 91 }; 92 let p = strings::iterstr(&start); 93 return fnmatch_internal(p, s, fl)? && strings::next_token(&tok) is done; 94 }; 95 96 // Core fnmatch function, implementing the "Sea of stars" algorithm that is also 97 // used in Musl libc. First we make sure the parts before the first star and 98 // after the last star produce exact matches and then proceed to greedily match 99 // everything in between. Because of the greedy property this algorithm does not 100 // have exponential corner cases. 101 fn fnmatch_internal(pattern: str, string: str, fl: flag) (bool | invalid) = { 102 if (fl & flag::PERIOD != 0) { 103 if (strings::hasprefix(string, ".") 104 && !strings::hasprefix(pattern, ".")) { 105 return false; 106 }; 107 }; 108 109 let p = strings::iter(pattern); 110 let s = strings::iter(string); 111 112 // match up to the first * 113 for (true) { 114 let copy = s; 115 let rn = strings::next(©); 116 let t = match (pat_next(&p, fl)?) { 117 case star => 118 break; 119 case end => 120 return rn is done; 121 case question => 122 yield rn is rune; 123 case bracket => 124 yield rn is rune && match_bracket(&p, rn: rune)?; 125 case let r: rune => 126 yield rn is rune && rn: rune == r; 127 }; 128 if (!t) { 129 return false; 130 }; 131 s = copy; 132 }; 133 134 // find the tail of the pattern 135 let p_copy = p, p_last = (p, 0z); 136 let cnt = 0z; 137 for (true; cnt += 1) { 138 match (pat_next(&p, fl)?) { 139 case end => 140 break; 141 case star => 142 p_last = (p, cnt + 1); 143 case bracket => 144 match_bracket(&p, '\0')?; 145 case (question | rune) => void; 146 }; 147 }; 148 p = p_last.0; 149 cnt = cnt - p_last.1; 150 let s_copy = s; 151 s = strings::riter(string); 152 for (let i = 0z; i < cnt; i += 1) { 153 strings::next(&s); 154 }; 155 156 // match the tail 157 let s_last = s; 158 for (true) { 159 let rn = strings::prev(&s); 160 let matches = match (pat_next(&p, fl)?) { 161 case end => 162 if (rn is done) { 163 break; 164 } else { 165 return false; 166 }; 167 case question => 168 yield rn is rune; 169 case bracket => 170 yield rn is rune && match_bracket(&p, rn: rune)?; 171 case let r: rune => 172 yield rn is rune && rn: rune == r; 173 case star => 174 abort(); 175 }; 176 if (!matches) { 177 return false; 178 }; 179 }; 180 181 // match the "sea of stars" in the middle 182 s_copy = strings::iter(strings::slice(&s_copy, &s_last)); 183 p_copy = strings::iter(strings::slice(&p_copy, &p_last.0)); 184 for :outer (true) { 185 p = p_copy; 186 if (len(strings::iterstr(&p)) == 0) { 187 return true; 188 }; 189 s = s_copy; 190 for (true) { 191 let copy = s; 192 let rn = strings::next(©); 193 let matched = match (pat_next(&p, fl)?) { 194 case end => 195 abort(); 196 case question => 197 yield rn is rune; 198 case bracket => 199 yield rn is rune && match_bracket(&p, rn: rune)?; 200 case let r: rune => 201 yield rn is rune && r == rn: rune; 202 case star => 203 p_copy = p; 204 s_copy = s; 205 continue :outer; 206 }; 207 if (!matched) { 208 break; 209 }; 210 s = copy; 211 }; 212 match (strings::next(&s_copy)) { 213 case done => 214 return false; 215 case rune => void; 216 }; 217 }; 218 }; 219 220 fn match_bracket(it: *strings::iterator, c: rune) (bool | invalid) = { 221 let old = *it; 222 let first = advance_or_err(it)?; 223 let inv = false; 224 if (first == '^') { 225 return invalid; 226 }; 227 if (first == '!') { 228 inv = true; 229 first = advance_or_err(it)?; 230 }; 231 let found = (first != '[' && first == c); 232 let last: (rune | void) = first; 233 if (first == ']') { 234 first = advance_or_err(it)?; 235 }; 236 237 for (let r = first; true; r = advance_or_err(it)?) { 238 switch (r) { 239 case ']' => 240 break; 241 case '-' => 242 let end = advance_or_err(it)?; 243 if (end == ']') { 244 // '-' at the end matches itself 245 strings::prev(it); 246 last = '-'; 247 found ||= (c == '-'); 248 continue; 249 }; 250 match (last) { 251 case void => 252 return invalid; 253 case let l: rune => 254 found ||= (l: u32 <= c: u32 && c: u32 <= end: u32); 255 last = void; // forbid 'a-f-n' 256 }; 257 case '[' => 258 let next_rune = advance_or_err(it)?; 259 switch (next_rune) { // TODO localization 260 case '=', '.' => 261 return invalid; 262 case ':' => 263 let t = match_ctype(it, c)?; 264 found ||= t; 265 case => 266 strings::prev(it); 267 found ||= (c == '['); 268 }; 269 last = '['; 270 case => 271 found ||= (c == r); 272 last = r; 273 }; 274 }; 275 276 let cnt = len(strings::iterstr(&old)) - len(strings::iterstr(it)); 277 if (last is rune && first == last: rune && cnt >= 4 278 && (first == '=' || first == '.' || first == ':')) { 279 return invalid; 280 }; 281 return found ^^ inv; 282 }; 283 284 fn match_ctype(it: *strings::iterator, c: rune) (bool | invalid) = { 285 let s = strings::iterstr(it); 286 let i = 0z; 287 for (let r = '\0'; r != ':'; i += 1) { 288 r = advance_or_err(it)?; 289 if (!ascii::valid(r)) { 290 return invalid; 291 }; 292 }; 293 if (advance_or_err(it)? != ']') { 294 return invalid; 295 }; 296 let name = strings::sub(s, 0, i - 1); 297 const map: [_](str, *fn(rune) bool) = [ 298 ("alnum", &ascii::isalnum), ("alpha", &ascii::isalpha), 299 ("blank", &ascii::isblank), ("cntrl", &ascii::iscntrl), 300 ("digit", &ascii::isdigit), ("graph", &ascii::isgraph), 301 ("lower", &ascii::islower), ("print", &ascii::isprint), 302 ("punct", &ascii::ispunct), ("space", &ascii::isspace), 303 ("upper", &ascii::isupper), ("xdigit",&ascii::isxdigit), 304 ]; 305 for (let i = 0z; i < len(map); i += 1) { 306 if (map[i].0 == name) { 307 return map[i].1(c); 308 }; 309 }; 310 return invalid; 311 }; 312 313 fn pat_next(pat: *strings::iterator, fl: flag) (token | invalid) = { 314 let r = match (strings::next(pat)) { 315 case done => 316 return end; 317 case let r: rune => 318 yield r; 319 }; 320 switch (r) { 321 case '*' => 322 return star; 323 case '?' => 324 return question; 325 case '[' => 326 return bracket; 327 case '\\' => 328 // TODO: remove ? (harec bug workaround) 329 return if (fl & flag::NOESCAPE == 0) advance_or_err(pat)? 330 else '\\'; 331 case => 332 return r; 333 }; 334 }; 335 336 fn advance_or_err(it: *strings::iterator) (rune | invalid) = { 337 match (strings::next(it)) { 338 case let r: rune => 339 return r; 340 case done => 341 return invalid; 342 }; 343 };