glob.ha (7215B)
1 // License: MPL-2.0 2 // (c) 2022 Yasumasa Tada <ytada@spartan.dev> 3 use fnmatch; 4 use fs; 5 use io; 6 use os; 7 use sort; 8 use strings; 9 use strio; 10 11 // Flags used to control the behavior of [[next]]. 12 export type flags = enum uint { 13 NONE = 0, 14 // Slash appending is enabled. A slash character is appended to each 15 // pathname that is a directory that matches the pattern. 16 MARK = 1, 17 // If the pattern does not match any pathname, the pattern string is 18 // returned. 19 NOCHECK = 1 << 1, 20 // Backslash escaping is disabled. A backslash character is treated as 21 // an ordinary character. 22 NOESCAPE = 1 << 2, 23 // Pathname sorting is disabled. The order of pathnames returned is 24 // unspecified. 25 NOSORT = 1 << 3, 26 }; 27 28 export type generator = struct { 29 pats: strstack, 30 matc: size, 31 flgs: flags, 32 tmpp: pattern, 33 }; 34 35 export type strstack = struct { 36 bufv: []strio::stream, 37 bufc: size, 38 }; 39 40 export type pattern = struct { 41 dir: strio::stream, 42 pat: strio::stream, 43 rem: strio::stream, 44 }; 45 46 // Information about an unsuccessful search. 47 export type failure = !struct { 48 // The path that cannot be opened or read. 49 path: str, 50 // The actual filesystem error. 51 error: fs::error, 52 }; 53 54 // Returns a generator of pathnames matching a pattern. The result must be 55 // freed using [[finish]]. 56 export fn glob(pattern: str, flags: flags...) generator = { 57 let ss = strstack_init(); 58 strio::concat(strstack_push(&ss), pattern)!; 59 let bs = flags::NONE; 60 for (let i = 0z; i < len(flags); i += 1) { 61 bs |= flags[i]; 62 }; 63 return generator { 64 pats = ss, 65 matc = 0, 66 flgs = bs, 67 tmpp = pattern_init(), 68 }; 69 }; 70 71 // Frees all memory allocated by the generator. 72 export fn finish(gen: *generator) void = { 73 strstack_free(&gen.pats); 74 pattern_free(&gen.tmpp); 75 }; 76 77 // Returns a generated pathname. The returned string is valid until [[next]] 78 // is called again. If, during the search, a directory is encountered that 79 // cannot be opened or read, a [[failure]] object is returned instead. 80 // [[next]] can be repeatedly called until void is returned. 81 export fn next(gen: *generator) (str | void | failure) = { 82 const init = strstack_size(&gen.pats) == 1 83 && len(strio::string(&gen.tmpp.dir)) == 0 84 && len(strio::string(&gen.tmpp.pat)) == 0 85 && len(strio::string(&gen.tmpp.rem)) == 0; 86 match (next_match(os::cwd, gen)) { 87 case let s: str => 88 return s; 89 case let f: failure => 90 return f; 91 case void => void; 92 }; 93 if (init && gen.flgs & flags::NOCHECK != 0) { 94 return strio::string(&gen.pats.bufv[0]); 95 }; 96 }; 97 98 fn next_match(fs: *fs::fs, gen: *generator) (str | void | failure) = { 99 match (strstack_pop(&gen.pats)) { 100 case void => 101 return; 102 case let s: str => 103 if (gen.matc > 0) { 104 gen.matc -= 1; 105 return s; 106 }; 107 pattern_parse(&gen.tmpp, s, gen.flgs & flags::NOESCAPE != 0); 108 }; 109 const l = strstack_size(&gen.pats); 110 111 const dir = pattern_dir(&gen.tmpp); 112 let pat = pattern_pat(&gen.tmpp); 113 const patm = strings::hassuffix(pat, '/'); 114 if (patm) { 115 pat = strings::sub(pat, 0, len(pat) - 1); 116 }; 117 const rem = pattern_rem(&gen.tmpp); 118 119 let flgs = fnmatch::flags::PERIOD; 120 if (gen.flgs & flags::NOESCAPE != 0) { 121 flgs |= fnmatch::flags::NOESCAPE; 122 }; 123 let it = match(fs::iter(fs, if (len(dir) > 0) dir else ".")) { 124 case let i: *fs::iterator => 125 yield i; 126 case let e: fs::error => 127 return failure { 128 path = dir, 129 error = e, 130 }; 131 }; 132 defer fs::finish(it); 133 134 for (true) match (fs::next(it)) { 135 case void => 136 break; 137 case let de: fs::dirent => 138 if (patm && !fs::isdir(de.ftype) && !fs::islink(de.ftype)) { 139 continue; 140 }; 141 if (!fnmatch::fnmatch(pat, de.name, flgs)) { 142 continue; 143 }; 144 145 let b = strstack_push(&gen.pats); 146 if (len(rem) > 0) { 147 strio::concat(b, dir, de.name, "/", rem)!; 148 continue; 149 }; 150 strio::concat(b, dir, de.name)!; 151 if (patm || gen.flgs & flags::MARK != 0) { 152 let m = fs::isdir(de.ftype); 153 // POSIX does not specify the behavior when a pathname 154 // that matches the pattern is a symlink to a 155 // directory. But in major implementation a slash 156 // character is appended in this case. 157 if (fs::islink(de.ftype)) { 158 match (fs::realpath(fs, strio::string(b))) { 159 case let r: str => 160 match (fs::stat(fs, r)) { 161 case let s: fs::filestat => 162 m = fs::isdir(s.mode); 163 case fs::error => void; 164 }; 165 case fs::error => void; 166 }; 167 }; 168 if (m) { 169 strio::concat(b, "/")!; 170 } else if (patm) { 171 strstack_pop(&gen.pats); 172 continue; 173 }; 174 }; 175 gen.matc += 1; 176 }; 177 if (gen.flgs & flags::NOSORT == 0) { 178 strstack_sort(&gen.pats, l); 179 }; 180 181 return next_match(fs, gen); 182 }; 183 184 fn pattern_init() pattern = pattern { 185 dir = strio::dynamic(), 186 pat = strio::dynamic(), 187 rem = strio::dynamic(), 188 }; 189 190 fn pattern_free(p: *pattern) void = { 191 io::close(&p.dir)!; 192 io::close(&p.pat)!; 193 io::close(&p.rem)!; 194 }; 195 196 fn pattern_reset(p: *pattern) void = { 197 strio::reset(&p.dir); 198 strio::reset(&p.pat); 199 strio::reset(&p.rem); 200 }; 201 202 fn pattern_dir(p: *pattern) str = strio::string(&p.dir); 203 204 fn pattern_pat(p: *pattern) str = strio::string(&p.pat); 205 206 fn pattern_rem(p: *pattern) str = strio::string(&p.rem); 207 208 fn pattern_parse(p: *pattern, pstr: str, noesc: bool) void = { 209 pattern_reset(p); 210 211 let itdir = strings::iter(pstr); 212 let itpat = itdir; 213 214 // p.dir is the longest directory name which contains no special 215 // characters. 216 for (let brk = false, esc = false; true) { 217 const r = match (strings::next(&itdir)) { 218 case void => 219 return; 220 case let r: rune => 221 yield r; 222 }; 223 224 if (!esc) switch (r) { 225 case '*', '?' => 226 break; 227 case '[' => 228 brk = true; 229 case ']' => 230 if (brk) { 231 break; 232 }; 233 case '\\' => 234 if (!noesc) { 235 esc = true; 236 continue; 237 }; 238 case => void; 239 }; 240 241 strio::appendrune(&p.pat, r)!; 242 if (r == '/') { 243 strio::concat(&p.dir, strio::string(&p.pat))!; 244 strio::reset(&p.pat); 245 itpat = itdir; 246 }; 247 esc = false; 248 }; 249 250 // p.pat is the first path component which contains special 251 // characters. 252 strio::reset(&p.pat); 253 for (let esc = false; true) { 254 const r = match (strings::next(&itpat)) { 255 case void => 256 return; 257 case let r: rune => 258 yield r; 259 }; 260 261 if (!esc && r == '\\' && !noesc) { 262 esc = true; 263 continue; 264 }; 265 266 if (esc && r != '/') { 267 strio::appendrune(&p.pat, '\\')!; 268 }; 269 strio::appendrune(&p.pat, r)!; 270 if (r == '/') { 271 break; 272 }; 273 esc = false; 274 }; 275 276 strio::concat(&p.rem, strings::iterstr(&itpat))!; 277 }; 278 279 fn strstack_init() strstack = strstack { 280 bufv = [], 281 bufc = 0, 282 }; 283 284 fn strstack_free(ss: *strstack) void = { 285 for (let i = 0z; i < len(ss.bufv); i += 1) { 286 io::close(&ss.bufv[i])!; 287 }; 288 free(ss.bufv); 289 }; 290 291 fn strstack_size(ss: *strstack) size = ss.bufc; 292 293 fn strstack_push(ss: *strstack) *strio::stream = { 294 if (ss.bufc == len(ss.bufv)) { 295 append(ss.bufv, strio::dynamic()); 296 }; 297 let b = &ss.bufv[ss.bufc]; 298 strio::reset(b); 299 ss.bufc += 1; 300 return b; 301 }; 302 303 fn strstack_pop(ss: *strstack) (str | void) = { 304 if (ss.bufc == 0) { 305 return; 306 }; 307 ss.bufc -= 1; 308 return strio::string(&ss.bufv[ss.bufc]); 309 }; 310 311 fn strstack_sort(ss: *strstack, pos: size) void = { 312 if (pos > ss.bufc) { 313 return; 314 }; 315 let s = ss.bufv[pos..ss.bufc]; 316 sort::sort(s, size(strio::stream), &bufcmp); 317 }; 318 319 fn bufcmp(a: const *void, b: const *void) int = 320 strings::compare( 321 strio::string(b: *strio::stream), 322 strio::string(a: *strio::stream), 323 );