glob.ha (7544B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use fmt; 5 use fnmatch; 6 use fs; 7 use io; 8 use memio; 9 use os; 10 use path; 11 use sort; 12 use strings; 13 14 // Flags used to control the behavior of [[next]]. 15 export type flag = enum uint { 16 NONE = 0, 17 // Slash appending is enabled. A slash character is appended to each 18 // pathname that is a directory that matches the pattern. 19 MARK = 1, 20 // If the pattern does not match any pathname, the pattern string is 21 // returned. 22 NOCHECK = 1 << 1, 23 // Backslash escaping is disabled. A backslash character is treated as 24 // an ordinary character. 25 NOESCAPE = 1 << 2, 26 // Pathname sorting is disabled. The order of pathnames returned is 27 // unspecified. 28 NOSORT = 1 << 3, 29 }; 30 31 export type generator = struct { 32 pats: strstack, 33 matc: size, 34 flgs: flag, 35 tmpp: pattern, 36 }; 37 38 export type strstack = struct { 39 bufv: []memio::stream, 40 bufc: size, 41 }; 42 43 export type pattern = struct { 44 // TODO: look into working with a couple of string iterators instead 45 dir: memio::stream, 46 pat: memio::stream, 47 rem: memio::stream, 48 }; 49 50 // Information about an unsuccessful search. 51 export type failure = !struct { 52 // The path that cannot be opened or read. 53 path: str, 54 // The actual filesystem error. 55 error: fs::error, 56 }; 57 58 // Converts an error info a human-friendly string. The result is statically 59 // allocated. 60 export fn strerror(err: failure) str = { 61 static let buf: [path::MAX + 1024]u8 = [0...]; 62 return fmt::bsprintf(buf, "{}: {}", err.path, fs::strerror(err.error)); 63 }; 64 65 // Returns a generator of pathnames matching a pattern. The result must be 66 // freed using [[finish]]. 67 export fn glob(pattern: str, flags: flag = flag::NONE) generator = { 68 let ss = strstack_init(); 69 memio::concat(strstack_push(&ss), pattern)!; 70 return generator { 71 pats = ss, 72 matc = 0, 73 flgs = flags, 74 tmpp = pattern_init(), 75 }; 76 }; 77 78 // Frees all memory allocated by the generator. 79 export fn finish(gen: *generator) void = { 80 strstack_free(&gen.pats); 81 pattern_free(&gen.tmpp); 82 }; 83 84 // Returns a generated pathname. The returned string is valid until [[next]] 85 // is called again. If, during the search, a directory is encountered that 86 // cannot be opened or read, a [[failure]] object is returned instead. 87 export fn next(gen: *generator) (str | done | failure) = { 88 const init = strstack_size(&gen.pats) == 1 89 && len(memio::string(&gen.tmpp.dir)!) == 0 90 && len(memio::string(&gen.tmpp.pat)!) == 0 91 && len(memio::string(&gen.tmpp.rem)!) == 0; 92 match (next_match(gen)?) { 93 case let s: str => 94 return s; 95 case void => void; 96 }; 97 if (init && gen.flgs & flag::NOCHECK != 0) { 98 return memio::string(&gen.pats.bufv[0])!; 99 }; 100 return done; 101 }; 102 103 fn next_match(gen: *generator) (str | void | failure) = { 104 match (strstack_pop(&gen.pats)) { 105 case void => 106 return; 107 case let s: str => 108 if (gen.matc > 0) { 109 gen.matc -= 1; 110 return s; 111 }; 112 pattern_parse(&gen.tmpp, s, gen.flgs & flag::NOESCAPE != 0); 113 }; 114 const l = strstack_size(&gen.pats); 115 116 const dir = pattern_dir(&gen.tmpp); 117 let pat = pattern_pat(&gen.tmpp); 118 if (pat == "") { 119 assert(pattern_rem(&gen.tmpp) == ""); 120 return if (os::exists(dir)) dir else void; 121 }; 122 const patm = strings::hassuffix(pat, '/'); 123 if (patm) { 124 pat = strings::sub(pat, 0, len(pat) - 1); 125 }; 126 const rem = pattern_rem(&gen.tmpp); 127 128 let flgs = fnmatch::flag::PERIOD; 129 if (gen.flgs & flag::NOESCAPE != 0) { 130 flgs |= fnmatch::flag::NOESCAPE; 131 }; 132 let it = match(os::iter(if (len(dir) > 0) dir else ".")) { 133 case let i: *fs::iterator => 134 yield i; 135 case let e: fs::error => 136 return failure { 137 path = dir, 138 error = e, 139 }; 140 }; 141 defer fs::finish(it); 142 143 for (true) match (fs::next(it)) { 144 case done => 145 break; 146 case let de: fs::dirent => 147 if (patm && !fs::isdir(de.ftype) && !fs::islink(de.ftype)) { 148 continue; 149 }; 150 if (!fnmatch::fnmatch(pat, de.name, flgs)) { 151 continue; 152 }; 153 154 let b = strstack_push(&gen.pats); 155 if (len(rem) > 0) { 156 memio::concat(b, dir, de.name, "/", rem)!; 157 continue; 158 }; 159 memio::concat(b, dir, de.name)!; 160 if (patm || gen.flgs & flag::MARK != 0) { 161 let m = fs::isdir(de.ftype); 162 // POSIX does not specify the behavior when a pathname 163 // that matches the pattern is a symlink to a 164 // directory. But in major implementation a slash 165 // character is appended in this case. 166 if (fs::islink(de.ftype)) { 167 match (os::realpath(memio::string(b)!)) { 168 case let r: str => 169 match (os::stat(r)) { 170 case let s: fs::filestat => 171 m = fs::isdir(s.mode); 172 case fs::error => void; 173 }; 174 case fs::error => void; 175 }; 176 }; 177 if (m) { 178 memio::concat(b, "/")!; 179 } else if (patm) { 180 strstack_pop(&gen.pats); 181 continue; 182 }; 183 }; 184 gen.matc += 1; 185 case let e: fs::error => 186 return failure { 187 path = dir, 188 error = e, 189 }; 190 }; 191 if (gen.flgs & flag::NOSORT == 0) { 192 strstack_sort(&gen.pats, l); 193 }; 194 195 return next_match(gen); 196 }; 197 198 fn pattern_init() pattern = pattern { 199 dir = memio::dynamic(), 200 pat = memio::dynamic(), 201 rem = memio::dynamic(), 202 }; 203 204 fn pattern_free(p: *pattern) void = { 205 io::close(&p.dir)!; 206 io::close(&p.pat)!; 207 io::close(&p.rem)!; 208 }; 209 210 fn pattern_reset(p: *pattern) void = { 211 memio::reset(&p.dir); 212 memio::reset(&p.pat); 213 memio::reset(&p.rem); 214 }; 215 216 fn pattern_dir(p: *pattern) str = memio::string(&p.dir)!; 217 218 fn pattern_pat(p: *pattern) str = memio::string(&p.pat)!; 219 220 fn pattern_rem(p: *pattern) str = memio::string(&p.rem)!; 221 222 fn pattern_parse(p: *pattern, pstr: str, noesc: bool) void = { 223 pattern_reset(p); 224 225 let itdir = strings::iter(pstr); 226 let itpat = itdir; 227 228 // p.dir is the longest directory name which contains no special 229 // characters. 230 for (let brk = false, esc = false; true) { 231 const r = match (strings::next(&itdir)) { 232 case done => 233 memio::concat(&p.dir, memio::string(&p.pat)!)!; 234 memio::reset(&p.pat); 235 return; 236 case let r: rune => 237 yield r; 238 }; 239 240 if (!esc) switch (r) { 241 case '*', '?' => 242 break; 243 case '[' => 244 brk = true; 245 case ']' => 246 if (brk) { 247 break; 248 }; 249 case '\\' => 250 if (!noesc) { 251 esc = true; 252 continue; 253 }; 254 case => void; 255 }; 256 257 memio::appendrune(&p.pat, r)!; 258 if (r == '/') { 259 memio::concat(&p.dir, memio::string(&p.pat)!)!; 260 memio::reset(&p.pat); 261 itpat = itdir; 262 }; 263 esc = false; 264 }; 265 266 // p.pat is the first path component which contains special 267 // characters. 268 memio::reset(&p.pat); 269 270 let esc = false; 271 for (let r => strings::next(&itpat)) { 272 if (!esc && r == '\\' && !noesc) { 273 esc = true; 274 continue; 275 }; 276 277 if (esc && r != '/') { 278 memio::appendrune(&p.pat, '\\')!; 279 }; 280 memio::appendrune(&p.pat, r)!; 281 if (r == '/') { 282 break; 283 }; 284 esc = false; 285 }; 286 287 memio::concat(&p.rem, strings::iterstr(&itpat))!; 288 }; 289 290 fn strstack_init() strstack = strstack { 291 bufv = [], 292 bufc = 0, 293 }; 294 295 fn strstack_free(ss: *strstack) void = { 296 for (let stream &.. ss.bufv) { 297 io::close(stream)!; 298 }; 299 free(ss.bufv); 300 }; 301 302 fn strstack_size(ss: *strstack) size = ss.bufc; 303 304 fn strstack_push(ss: *strstack) *memio::stream = { 305 if (ss.bufc == len(ss.bufv)) { 306 append(ss.bufv, memio::dynamic())!; 307 }; 308 let b = &ss.bufv[ss.bufc]; 309 memio::reset(b); 310 ss.bufc += 1; 311 return b; 312 }; 313 314 fn strstack_pop(ss: *strstack) (str | void) = { 315 if (ss.bufc == 0) { 316 return; 317 }; 318 ss.bufc -= 1; 319 return memio::string(&ss.bufv[ss.bufc])!; 320 }; 321 322 fn strstack_sort(ss: *strstack, pos: size) void = { 323 if (pos > ss.bufc) { 324 return; 325 }; 326 let s = ss.bufv[pos..ss.bufc]; 327 sort::sort(s, size(memio::stream), &bufcmp); 328 }; 329 330 fn bufcmp(a: const *opaque, b: const *opaque) int = 331 strings::compare( 332 memio::string(b: *memio::stream)!, 333 memio::string(a: *memio::stream)!, 334 );