hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

glob.ha (7544B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use fmt;
      5 use fnmatch;
      6 use fs;
      7 use io;
      8 use memio;
      9 use os;
     10 use path;
     11 use sort;
     12 use strings;
     13 
     14 // Flags used to control the behavior of [[next]].
     15 export type flag = enum uint {
     16 	NONE = 0,
     17 	// Slash appending is enabled. A slash character is appended to each
     18 	// pathname that is a directory that matches the pattern.
     19 	MARK = 1,
     20 	// If the pattern does not match any pathname, the pattern string is
     21 	// returned.
     22 	NOCHECK = 1 << 1,
     23 	// Backslash escaping is disabled. A backslash character is treated as
     24 	// an ordinary character.
     25 	NOESCAPE = 1 << 2,
     26 	// Pathname sorting is disabled. The order of pathnames returned is
     27 	// unspecified.
     28 	NOSORT = 1 << 3,
     29 };
     30 
     31 export type generator = struct {
     32 	pats: strstack,
     33 	matc: size,
     34 	flgs: flag,
     35 	tmpp: pattern,
     36 };
     37 
     38 export type strstack = struct {
     39 	bufv: []memio::stream,
     40 	bufc: size,
     41 };
     42 
     43 export type pattern = struct {
     44 	// TODO: look into working with a couple of string iterators instead
     45 	dir: memio::stream,
     46 	pat: memio::stream,
     47 	rem: memio::stream,
     48 };
     49 
     50 // Information about an unsuccessful search.
     51 export type failure = !struct {
     52 	// The path that cannot be opened or read.
     53 	path: str,
     54 	// The actual filesystem error.
     55 	error: fs::error,
     56 };
     57 
     58 // Converts an error info a human-friendly string. The result is statically
     59 // allocated.
     60 export fn strerror(err: failure) str = {
     61 	static let buf: [path::MAX + 1024]u8 = [0...];
     62 	return fmt::bsprintf(buf, "{}: {}", err.path, fs::strerror(err.error));
     63 };
     64 
     65 // Returns a generator of pathnames matching a pattern. The result must be
     66 // freed using [[finish]].
     67 export fn glob(pattern: str, flags: flag = flag::NONE) generator = {
     68 	let ss = strstack_init();
     69 	memio::concat(strstack_push(&ss), pattern)!;
     70 	return generator {
     71 		pats = ss,
     72 		matc = 0,
     73 		flgs = flags,
     74 		tmpp = pattern_init(),
     75 	};
     76 };
     77 
     78 // Frees all memory allocated by the generator.
     79 export fn finish(gen: *generator) void = {
     80 	strstack_free(&gen.pats);
     81 	pattern_free(&gen.tmpp);
     82 };
     83 
     84 // Returns a generated pathname. The returned string is valid until [[next]]
     85 // is called again. If, during the search, a directory is encountered that
     86 // cannot be opened or read, a [[failure]] object is returned instead.
     87 export fn next(gen: *generator) (str | done | failure) = {
     88 	const init = strstack_size(&gen.pats) == 1
     89 		&& len(memio::string(&gen.tmpp.dir)!) == 0
     90 		&& len(memio::string(&gen.tmpp.pat)!) == 0
     91 		&& len(memio::string(&gen.tmpp.rem)!) == 0;
     92 	match (next_match(gen)?) {
     93 	case let s: str =>
     94 		return s;
     95 	case void => void;
     96 	};
     97 	if (init && gen.flgs & flag::NOCHECK != 0) {
     98 		return memio::string(&gen.pats.bufv[0])!;
     99 	};
    100 	return done;
    101 };
    102 
    103 fn next_match(gen: *generator) (str | void | failure) = {
    104 	match (strstack_pop(&gen.pats)) {
    105 	case void =>
    106 		return;
    107 	case let s: str =>
    108 		if (gen.matc > 0) {
    109 			gen.matc -= 1;
    110 			return s;
    111 		};
    112 		pattern_parse(&gen.tmpp, s, gen.flgs & flag::NOESCAPE != 0);
    113 	};
    114 	const l = strstack_size(&gen.pats);
    115 
    116 	const dir = pattern_dir(&gen.tmpp);
    117 	let pat = pattern_pat(&gen.tmpp);
    118 	if (pat == "") {
    119 		assert(pattern_rem(&gen.tmpp) == "");
    120 		return if (os::exists(dir)) dir else void;
    121 	};
    122 	const patm = strings::hassuffix(pat, '/');
    123 	if (patm) {
    124 		pat = strings::sub(pat, 0, len(pat) - 1);
    125 	};
    126 	const rem = pattern_rem(&gen.tmpp);
    127 
    128 	let flgs = fnmatch::flag::PERIOD;
    129 	if (gen.flgs & flag::NOESCAPE != 0) {
    130 		flgs |= fnmatch::flag::NOESCAPE;
    131 	};
    132 	let it = match(os::iter(if (len(dir) > 0) dir else ".")) {
    133 	case let i: *fs::iterator =>
    134 		yield i;
    135 	case let e: fs::error =>
    136 		return failure {
    137 			path = dir,
    138 			error = e,
    139 		};
    140 	};
    141 	defer fs::finish(it);
    142 
    143 	for (true) match (fs::next(it)) {
    144 	case done =>
    145 		break;
    146 	case let de: fs::dirent =>
    147 		if (patm && !fs::isdir(de.ftype) && !fs::islink(de.ftype)) {
    148 			continue;
    149 		};
    150 		if (!fnmatch::fnmatch(pat, de.name, flgs)) {
    151 			continue;
    152 		};
    153 
    154 		let b = strstack_push(&gen.pats);
    155 		if (len(rem) > 0) {
    156 			memio::concat(b, dir, de.name, "/", rem)!;
    157 			continue;
    158 		};
    159 		memio::concat(b, dir, de.name)!;
    160 		if (patm || gen.flgs & flag::MARK != 0) {
    161 			let m = fs::isdir(de.ftype);
    162 			// POSIX does not specify the behavior when a pathname
    163 			// that matches the pattern is a symlink to a
    164 			// directory. But in major implementation a slash
    165 			// character is appended in this case.
    166 			if (fs::islink(de.ftype)) {
    167 				match (os::realpath(memio::string(b)!)) {
    168 				case let r: str =>
    169 					match (os::stat(r)) {
    170 					case let s: fs::filestat =>
    171 						m = fs::isdir(s.mode);
    172 					case fs::error => void;
    173 					};
    174 				case fs::error => void;
    175 				};
    176 			};
    177 			if (m) {
    178 				memio::concat(b, "/")!;
    179 			} else if (patm) {
    180 				strstack_pop(&gen.pats);
    181 				continue;
    182 			};
    183 		};
    184 		gen.matc += 1;
    185 	case let e: fs::error =>
    186 		return failure {
    187 			path = dir,
    188 			error = e,
    189 		};
    190 	};
    191 	if (gen.flgs & flag::NOSORT == 0) {
    192 		strstack_sort(&gen.pats, l);
    193 	};
    194 
    195 	return next_match(gen);
    196 };
    197 
    198 fn pattern_init() pattern = pattern {
    199 	dir = memio::dynamic(),
    200 	pat = memio::dynamic(),
    201 	rem = memio::dynamic(),
    202 };
    203 
    204 fn pattern_free(p: *pattern) void = {
    205 	io::close(&p.dir)!;
    206 	io::close(&p.pat)!;
    207 	io::close(&p.rem)!;
    208 };
    209 
    210 fn pattern_reset(p: *pattern) void = {
    211 	memio::reset(&p.dir);
    212 	memio::reset(&p.pat);
    213 	memio::reset(&p.rem);
    214 };
    215 
    216 fn pattern_dir(p: *pattern) str = memio::string(&p.dir)!;
    217 
    218 fn pattern_pat(p: *pattern) str = memio::string(&p.pat)!;
    219 
    220 fn pattern_rem(p: *pattern) str = memio::string(&p.rem)!;
    221 
    222 fn pattern_parse(p: *pattern, pstr: str, noesc: bool) void = {
    223 	pattern_reset(p);
    224 
    225 	let itdir = strings::iter(pstr);
    226 	let itpat = itdir;
    227 
    228 	// p.dir is the longest directory name which contains no special
    229 	// characters.
    230 	for (let brk = false, esc = false; true) {
    231 		const r = match (strings::next(&itdir)) {
    232 		case done =>
    233 			memio::concat(&p.dir, memio::string(&p.pat)!)!;
    234 			memio::reset(&p.pat);
    235 			return;
    236 		case let r: rune =>
    237 			yield r;
    238 		};
    239 
    240 		if (!esc) switch (r) {
    241 		case '*', '?' =>
    242 			break;
    243 		case '[' =>
    244 			brk = true;
    245 		case ']' =>
    246 			if (brk) {
    247 				break;
    248 			};
    249 		case '\\' =>
    250 			if (!noesc) {
    251 				esc = true;
    252 				continue;
    253 			};
    254 		case => void;
    255 		};
    256 
    257 		memio::appendrune(&p.pat, r)!;
    258 		if (r == '/') {
    259 			memio::concat(&p.dir, memio::string(&p.pat)!)!;
    260 			memio::reset(&p.pat);
    261 			itpat = itdir;
    262 		};
    263 		esc = false;
    264 	};
    265 
    266 	// p.pat is the first path component which contains special
    267 	// characters.
    268 	memio::reset(&p.pat);
    269 
    270 	let esc = false;
    271 	for (let r => strings::next(&itpat)) {
    272 		if (!esc && r == '\\' && !noesc) {
    273 			esc = true;
    274 			continue;
    275 		};
    276 
    277 		if (esc && r != '/') {
    278 			memio::appendrune(&p.pat, '\\')!;
    279 		};
    280 		memio::appendrune(&p.pat, r)!;
    281 		if (r == '/') {
    282 			break;
    283 		};
    284 		esc = false;
    285 	};
    286 
    287 	memio::concat(&p.rem, strings::iterstr(&itpat))!;
    288 };
    289 
    290 fn strstack_init() strstack = strstack {
    291 	bufv = [],
    292 	bufc = 0,
    293 };
    294 
    295 fn strstack_free(ss: *strstack) void = {
    296 	for (let stream &.. ss.bufv) {
    297 		io::close(stream)!;
    298 	};
    299 	free(ss.bufv);
    300 };
    301 
    302 fn strstack_size(ss: *strstack) size = ss.bufc;
    303 
    304 fn strstack_push(ss: *strstack) *memio::stream = {
    305 	if (ss.bufc == len(ss.bufv)) {
    306 		append(ss.bufv, memio::dynamic())!;
    307 	};
    308 	let b = &ss.bufv[ss.bufc];
    309 	memio::reset(b);
    310 	ss.bufc += 1;
    311 	return b;
    312 };
    313 
    314 fn strstack_pop(ss: *strstack) (str | void) = {
    315 	if (ss.bufc == 0) {
    316 		return;
    317 	};
    318 	ss.bufc -= 1;
    319 	return memio::string(&ss.bufv[ss.bufc])!;
    320 };
    321 
    322 fn strstack_sort(ss: *strstack, pos: size) void = {
    323 	if (pos > ss.bufc) {
    324 		return;
    325 	};
    326 	let s = ss.bufv[pos..ss.bufc];
    327 	sort::sort(s, size(memio::stream), &bufcmp);
    328 };
    329 
    330 fn bufcmp(a: const *opaque, b: const *opaque) int =
    331 	strings::compare(
    332 		memio::string(b: *memio::stream)!,
    333 		memio::string(a: *memio::stream)!,
    334 	);