glob.ha (7604B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use fmt; 5 use fnmatch; 6 use fs; 7 use io; 8 use memio; 9 use os; 10 use path; 11 use sort; 12 use strings; 13 14 // Flags used to control the behavior of [[next]]. 15 export type flag = enum uint { 16 NONE = 0, 17 // Slash appending is enabled. A slash character is appended to each 18 // pathname that is a directory that matches the pattern. 19 MARK = 1, 20 // If the pattern does not match any pathname, the pattern string is 21 // returned. 22 NOCHECK = 1 << 1, 23 // Backslash escaping is disabled. A backslash character is treated as 24 // an ordinary character. 25 NOESCAPE = 1 << 2, 26 // Pathname sorting is disabled. The order of pathnames returned is 27 // unspecified. 28 NOSORT = 1 << 3, 29 }; 30 31 export type generator = struct { 32 pats: strstack, 33 matc: size, 34 flgs: flag, 35 tmpp: pattern, 36 }; 37 38 export type strstack = struct { 39 bufv: []memio::stream, 40 bufc: size, 41 }; 42 43 export type pattern = struct { 44 // TODO: look into working with a couple of string iterators instead 45 dir: memio::stream, 46 pat: memio::stream, 47 rem: memio::stream, 48 }; 49 50 // Information about an unsuccessful search. 51 export type failure = !struct { 52 // The path that cannot be opened or read. 53 path: str, 54 // The actual filesystem error. 55 error: fs::error, 56 }; 57 58 // Converts an error info a human-friendly string. The result is statically 59 // allocated. 60 export fn strerror(err: failure) str = { 61 static let buf: [path::MAX + 1024]u8 = [0...]; 62 return fmt::bsprintf(buf, "{}: {}", err.path, fs::strerror(err.error)); 63 }; 64 65 // Returns a generator of pathnames matching a pattern. The result must be 66 // freed using [[finish]]. 67 export fn glob(pattern: str, flags: flag = flag::NONE) generator = { 68 let ss = strstack_init(); 69 memio::concat(strstack_push(&ss), pattern)!; 70 return generator { 71 pats = ss, 72 matc = 0, 73 flgs = flags, 74 tmpp = pattern_init(), 75 }; 76 }; 77 78 // Frees all memory allocated by the generator. 79 export fn finish(gen: *generator) void = { 80 strstack_free(&gen.pats); 81 pattern_free(&gen.tmpp); 82 }; 83 84 // Returns a generated pathname. The returned string is valid until [[next]] 85 // is called again. If, during the search, a directory is encountered that 86 // cannot be opened or read, a [[failure]] object is returned instead. 87 // [[next]] can be repeatedly called until void is returned. 88 export fn next(gen: *generator) (str | done | failure) = { 89 const init = strstack_size(&gen.pats) == 1 90 && len(memio::string(&gen.tmpp.dir)!) == 0 91 && len(memio::string(&gen.tmpp.pat)!) == 0 92 && len(memio::string(&gen.tmpp.rem)!) == 0; 93 match (next_match(gen)?) { 94 case let s: str => 95 return s; 96 case void => void; 97 }; 98 if (init && gen.flgs & flag::NOCHECK != 0) { 99 return memio::string(&gen.pats.bufv[0])!; 100 }; 101 return done; 102 }; 103 104 fn next_match(gen: *generator) (str | void | failure) = { 105 match (strstack_pop(&gen.pats)) { 106 case void => 107 return; 108 case let s: str => 109 if (gen.matc > 0) { 110 gen.matc -= 1; 111 return s; 112 }; 113 pattern_parse(&gen.tmpp, s, gen.flgs & flag::NOESCAPE != 0); 114 }; 115 const l = strstack_size(&gen.pats); 116 117 const dir = pattern_dir(&gen.tmpp); 118 let pat = pattern_pat(&gen.tmpp); 119 if (pat == "") { 120 assert(pattern_rem(&gen.tmpp) == ""); 121 return if (os::exists(dir)) dir else void; 122 }; 123 const patm = strings::hassuffix(pat, '/'); 124 if (patm) { 125 pat = strings::sub(pat, 0, len(pat) - 1); 126 }; 127 const rem = pattern_rem(&gen.tmpp); 128 129 let flgs = fnmatch::flag::PERIOD; 130 if (gen.flgs & flag::NOESCAPE != 0) { 131 flgs |= fnmatch::flag::NOESCAPE; 132 }; 133 let it = match(os::iter(if (len(dir) > 0) dir else ".")) { 134 case let i: *fs::iterator => 135 yield i; 136 case let e: fs::error => 137 return failure { 138 path = dir, 139 error = e, 140 }; 141 }; 142 defer fs::finish(it); 143 144 for (true) match (fs::next(it)) { 145 case done => 146 break; 147 case let de: fs::dirent => 148 if (patm && !fs::isdir(de.ftype) && !fs::islink(de.ftype)) { 149 continue; 150 }; 151 if (!fnmatch::fnmatch(pat, de.name, flgs)) { 152 continue; 153 }; 154 155 let b = strstack_push(&gen.pats); 156 if (len(rem) > 0) { 157 memio::concat(b, dir, de.name, "/", rem)!; 158 continue; 159 }; 160 memio::concat(b, dir, de.name)!; 161 if (patm || gen.flgs & flag::MARK != 0) { 162 let m = fs::isdir(de.ftype); 163 // POSIX does not specify the behavior when a pathname 164 // that matches the pattern is a symlink to a 165 // directory. But in major implementation a slash 166 // character is appended in this case. 167 if (fs::islink(de.ftype)) { 168 match (os::realpath(memio::string(b)!)) { 169 case let r: str => 170 match (os::stat(r)) { 171 case let s: fs::filestat => 172 m = fs::isdir(s.mode); 173 case fs::error => void; 174 }; 175 case fs::error => void; 176 }; 177 }; 178 if (m) { 179 memio::concat(b, "/")!; 180 } else if (patm) { 181 strstack_pop(&gen.pats); 182 continue; 183 }; 184 }; 185 gen.matc += 1; 186 case let e: fs::error => 187 return failure { 188 path = dir, 189 error = e, 190 }; 191 }; 192 if (gen.flgs & flag::NOSORT == 0) { 193 strstack_sort(&gen.pats, l); 194 }; 195 196 return next_match(gen); 197 }; 198 199 fn pattern_init() pattern = pattern { 200 dir = memio::dynamic(), 201 pat = memio::dynamic(), 202 rem = memio::dynamic(), 203 }; 204 205 fn pattern_free(p: *pattern) void = { 206 io::close(&p.dir)!; 207 io::close(&p.pat)!; 208 io::close(&p.rem)!; 209 }; 210 211 fn pattern_reset(p: *pattern) void = { 212 memio::reset(&p.dir); 213 memio::reset(&p.pat); 214 memio::reset(&p.rem); 215 }; 216 217 fn pattern_dir(p: *pattern) str = memio::string(&p.dir)!; 218 219 fn pattern_pat(p: *pattern) str = memio::string(&p.pat)!; 220 221 fn pattern_rem(p: *pattern) str = memio::string(&p.rem)!; 222 223 fn pattern_parse(p: *pattern, pstr: str, noesc: bool) void = { 224 pattern_reset(p); 225 226 let itdir = strings::iter(pstr); 227 let itpat = itdir; 228 229 // p.dir is the longest directory name which contains no special 230 // characters. 231 for (let brk = false, esc = false; true) { 232 const r = match (strings::next(&itdir)) { 233 case done => 234 memio::concat(&p.dir, memio::string(&p.pat)!)!; 235 memio::reset(&p.pat); 236 return; 237 case let r: rune => 238 yield r; 239 }; 240 241 if (!esc) switch (r) { 242 case '*', '?' => 243 break; 244 case '[' => 245 brk = true; 246 case ']' => 247 if (brk) { 248 break; 249 }; 250 case '\\' => 251 if (!noesc) { 252 esc = true; 253 continue; 254 }; 255 case => void; 256 }; 257 258 memio::appendrune(&p.pat, r)!; 259 if (r == '/') { 260 memio::concat(&p.dir, memio::string(&p.pat)!)!; 261 memio::reset(&p.pat); 262 itpat = itdir; 263 }; 264 esc = false; 265 }; 266 267 // p.pat is the first path component which contains special 268 // characters. 269 memio::reset(&p.pat); 270 271 let esc = false; 272 for (let r => strings::next(&itpat)) { 273 if (!esc && r == '\\' && !noesc) { 274 esc = true; 275 continue; 276 }; 277 278 if (esc && r != '/') { 279 memio::appendrune(&p.pat, '\\')!; 280 }; 281 memio::appendrune(&p.pat, r)!; 282 if (r == '/') { 283 break; 284 }; 285 esc = false; 286 }; 287 288 memio::concat(&p.rem, strings::iterstr(&itpat))!; 289 }; 290 291 fn strstack_init() strstack = strstack { 292 bufv = [], 293 bufc = 0, 294 }; 295 296 fn strstack_free(ss: *strstack) void = { 297 for (let stream &.. ss.bufv) { 298 io::close(stream)!; 299 }; 300 free(ss.bufv); 301 }; 302 303 fn strstack_size(ss: *strstack) size = ss.bufc; 304 305 fn strstack_push(ss: *strstack) *memio::stream = { 306 if (ss.bufc == len(ss.bufv)) { 307 append(ss.bufv, memio::dynamic()); 308 }; 309 let b = &ss.bufv[ss.bufc]; 310 memio::reset(b); 311 ss.bufc += 1; 312 return b; 313 }; 314 315 fn strstack_pop(ss: *strstack) (str | void) = { 316 if (ss.bufc == 0) { 317 return; 318 }; 319 ss.bufc -= 1; 320 return memio::string(&ss.bufv[ss.bufc])!; 321 }; 322 323 fn strstack_sort(ss: *strstack, pos: size) void = { 324 if (pos > ss.bufc) { 325 return; 326 }; 327 let s = ss.bufv[pos..ss.bufc]; 328 sort::sort(s, size(memio::stream), &bufcmp); 329 }; 330 331 fn bufcmp(a: const *opaque, b: const *opaque) int = 332 strings::compare( 333 memio::string(b: *memio::stream)!, 334 memio::string(a: *memio::stream)!, 335 );