hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

glob.ha (7604B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use fmt;
      5 use fnmatch;
      6 use fs;
      7 use io;
      8 use memio;
      9 use os;
     10 use path;
     11 use sort;
     12 use strings;
     13 
     14 // Flags used to control the behavior of [[next]].
     15 export type flag = enum uint {
     16 	NONE = 0,
     17 	// Slash appending is enabled. A slash character is appended to each
     18 	// pathname that is a directory that matches the pattern.
     19 	MARK = 1,
     20 	// If the pattern does not match any pathname, the pattern string is
     21 	// returned.
     22 	NOCHECK = 1 << 1,
     23 	// Backslash escaping is disabled. A backslash character is treated as
     24 	// an ordinary character.
     25 	NOESCAPE = 1 << 2,
     26 	// Pathname sorting is disabled. The order of pathnames returned is
     27 	// unspecified.
     28 	NOSORT = 1 << 3,
     29 };
     30 
     31 export type generator = struct {
     32 	pats: strstack,
     33 	matc: size,
     34 	flgs: flag,
     35 	tmpp: pattern,
     36 };
     37 
     38 export type strstack = struct {
     39 	bufv: []memio::stream,
     40 	bufc: size,
     41 };
     42 
     43 export type pattern = struct {
     44 	// TODO: look into working with a couple of string iterators instead
     45 	dir: memio::stream,
     46 	pat: memio::stream,
     47 	rem: memio::stream,
     48 };
     49 
     50 // Information about an unsuccessful search.
     51 export type failure = !struct {
     52 	// The path that cannot be opened or read.
     53 	path: str,
     54 	// The actual filesystem error.
     55 	error: fs::error,
     56 };
     57 
     58 // Converts an error info a human-friendly string. The result is statically
     59 // allocated.
     60 export fn strerror(err: failure) str = {
     61 	static let buf: [path::MAX + 1024]u8 = [0...];
     62 	return fmt::bsprintf(buf, "{}: {}", err.path, fs::strerror(err.error));
     63 };
     64 
     65 // Returns a generator of pathnames matching a pattern. The result must be
     66 // freed using [[finish]].
     67 export fn glob(pattern: str, flags: flag = flag::NONE) generator = {
     68 	let ss = strstack_init();
     69 	memio::concat(strstack_push(&ss), pattern)!;
     70 	return generator {
     71 		pats = ss,
     72 		matc = 0,
     73 		flgs = flags,
     74 		tmpp = pattern_init(),
     75 	};
     76 };
     77 
     78 // Frees all memory allocated by the generator.
     79 export fn finish(gen: *generator) void = {
     80 	strstack_free(&gen.pats);
     81 	pattern_free(&gen.tmpp);
     82 };
     83 
     84 // Returns a generated pathname. The returned string is valid until [[next]]
     85 // is called again. If, during the search, a directory is encountered that
     86 // cannot be opened or read, a [[failure]] object is returned instead.
     87 // [[next]] can be repeatedly called until void is returned.
     88 export fn next(gen: *generator) (str | done | failure) = {
     89 	const init = strstack_size(&gen.pats) == 1
     90 		&& len(memio::string(&gen.tmpp.dir)!) == 0
     91 		&& len(memio::string(&gen.tmpp.pat)!) == 0
     92 		&& len(memio::string(&gen.tmpp.rem)!) == 0;
     93 	match (next_match(gen)?) {
     94 	case let s: str =>
     95 		return s;
     96 	case void => void;
     97 	};
     98 	if (init && gen.flgs & flag::NOCHECK != 0) {
     99 		return memio::string(&gen.pats.bufv[0])!;
    100 	};
    101 	return done;
    102 };
    103 
    104 fn next_match(gen: *generator) (str | void | failure) = {
    105 	match (strstack_pop(&gen.pats)) {
    106 	case void =>
    107 		return;
    108 	case let s: str =>
    109 		if (gen.matc > 0) {
    110 			gen.matc -= 1;
    111 			return s;
    112 		};
    113 		pattern_parse(&gen.tmpp, s, gen.flgs & flag::NOESCAPE != 0);
    114 	};
    115 	const l = strstack_size(&gen.pats);
    116 
    117 	const dir = pattern_dir(&gen.tmpp);
    118 	let pat = pattern_pat(&gen.tmpp);
    119 	if (pat == "") {
    120 		assert(pattern_rem(&gen.tmpp) == "");
    121 		return if (os::exists(dir)) dir else void;
    122 	};
    123 	const patm = strings::hassuffix(pat, '/');
    124 	if (patm) {
    125 		pat = strings::sub(pat, 0, len(pat) - 1);
    126 	};
    127 	const rem = pattern_rem(&gen.tmpp);
    128 
    129 	let flgs = fnmatch::flag::PERIOD;
    130 	if (gen.flgs & flag::NOESCAPE != 0) {
    131 		flgs |= fnmatch::flag::NOESCAPE;
    132 	};
    133 	let it = match(os::iter(if (len(dir) > 0) dir else ".")) {
    134 	case let i: *fs::iterator =>
    135 		yield i;
    136 	case let e: fs::error =>
    137 		return failure {
    138 			path = dir,
    139 			error = e,
    140 		};
    141 	};
    142 	defer fs::finish(it);
    143 
    144 	for (true) match (fs::next(it)) {
    145 	case done =>
    146 		break;
    147 	case let de: fs::dirent =>
    148 		if (patm && !fs::isdir(de.ftype) && !fs::islink(de.ftype)) {
    149 			continue;
    150 		};
    151 		if (!fnmatch::fnmatch(pat, de.name, flgs)) {
    152 			continue;
    153 		};
    154 
    155 		let b = strstack_push(&gen.pats);
    156 		if (len(rem) > 0) {
    157 			memio::concat(b, dir, de.name, "/", rem)!;
    158 			continue;
    159 		};
    160 		memio::concat(b, dir, de.name)!;
    161 		if (patm || gen.flgs & flag::MARK != 0) {
    162 			let m = fs::isdir(de.ftype);
    163 			// POSIX does not specify the behavior when a pathname
    164 			// that matches the pattern is a symlink to a
    165 			// directory. But in major implementation a slash
    166 			// character is appended in this case.
    167 			if (fs::islink(de.ftype)) {
    168 				match (os::realpath(memio::string(b)!)) {
    169 				case let r: str =>
    170 					match (os::stat(r)) {
    171 					case let s: fs::filestat =>
    172 						m = fs::isdir(s.mode);
    173 					case fs::error => void;
    174 					};
    175 				case fs::error => void;
    176 				};
    177 			};
    178 			if (m) {
    179 				memio::concat(b, "/")!;
    180 			} else if (patm) {
    181 				strstack_pop(&gen.pats);
    182 				continue;
    183 			};
    184 		};
    185 		gen.matc += 1;
    186 	case let e: fs::error =>
    187 		return failure {
    188 			path = dir,
    189 			error = e,
    190 		};
    191 	};
    192 	if (gen.flgs & flag::NOSORT == 0) {
    193 		strstack_sort(&gen.pats, l);
    194 	};
    195 
    196 	return next_match(gen);
    197 };
    198 
    199 fn pattern_init() pattern = pattern {
    200 	dir = memio::dynamic(),
    201 	pat = memio::dynamic(),
    202 	rem = memio::dynamic(),
    203 };
    204 
    205 fn pattern_free(p: *pattern) void = {
    206 	io::close(&p.dir)!;
    207 	io::close(&p.pat)!;
    208 	io::close(&p.rem)!;
    209 };
    210 
    211 fn pattern_reset(p: *pattern) void = {
    212 	memio::reset(&p.dir);
    213 	memio::reset(&p.pat);
    214 	memio::reset(&p.rem);
    215 };
    216 
    217 fn pattern_dir(p: *pattern) str = memio::string(&p.dir)!;
    218 
    219 fn pattern_pat(p: *pattern) str = memio::string(&p.pat)!;
    220 
    221 fn pattern_rem(p: *pattern) str = memio::string(&p.rem)!;
    222 
    223 fn pattern_parse(p: *pattern, pstr: str, noesc: bool) void = {
    224 	pattern_reset(p);
    225 
    226 	let itdir = strings::iter(pstr);
    227 	let itpat = itdir;
    228 
    229 	// p.dir is the longest directory name which contains no special
    230 	// characters.
    231 	for (let brk = false, esc = false; true) {
    232 		const r = match (strings::next(&itdir)) {
    233 		case done =>
    234 			memio::concat(&p.dir, memio::string(&p.pat)!)!;
    235 			memio::reset(&p.pat);
    236 			return;
    237 		case let r: rune =>
    238 			yield r;
    239 		};
    240 
    241 		if (!esc) switch (r) {
    242 		case '*', '?' =>
    243 			break;
    244 		case '[' =>
    245 			brk = true;
    246 		case ']' =>
    247 			if (brk) {
    248 				break;
    249 			};
    250 		case '\\' =>
    251 			if (!noesc) {
    252 				esc = true;
    253 				continue;
    254 			};
    255 		case => void;
    256 		};
    257 
    258 		memio::appendrune(&p.pat, r)!;
    259 		if (r == '/') {
    260 			memio::concat(&p.dir, memio::string(&p.pat)!)!;
    261 			memio::reset(&p.pat);
    262 			itpat = itdir;
    263 		};
    264 		esc = false;
    265 	};
    266 
    267 	// p.pat is the first path component which contains special
    268 	// characters.
    269 	memio::reset(&p.pat);
    270 
    271 	let esc = false;
    272 	for (let r => strings::next(&itpat)) {
    273 		if (!esc && r == '\\' && !noesc) {
    274 			esc = true;
    275 			continue;
    276 		};
    277 
    278 		if (esc && r != '/') {
    279 			memio::appendrune(&p.pat, '\\')!;
    280 		};
    281 		memio::appendrune(&p.pat, r)!;
    282 		if (r == '/') {
    283 			break;
    284 		};
    285 		esc = false;
    286 	};
    287 
    288 	memio::concat(&p.rem, strings::iterstr(&itpat))!;
    289 };
    290 
    291 fn strstack_init() strstack = strstack {
    292 	bufv = [],
    293 	bufc = 0,
    294 };
    295 
    296 fn strstack_free(ss: *strstack) void = {
    297 	for (let stream &.. ss.bufv) {
    298 		io::close(stream)!;
    299 	};
    300 	free(ss.bufv);
    301 };
    302 
    303 fn strstack_size(ss: *strstack) size = ss.bufc;
    304 
    305 fn strstack_push(ss: *strstack) *memio::stream = {
    306 	if (ss.bufc == len(ss.bufv)) {
    307 		append(ss.bufv, memio::dynamic());
    308 	};
    309 	let b = &ss.bufv[ss.bufc];
    310 	memio::reset(b);
    311 	ss.bufc += 1;
    312 	return b;
    313 };
    314 
    315 fn strstack_pop(ss: *strstack) (str | void) = {
    316 	if (ss.bufc == 0) {
    317 		return;
    318 	};
    319 	ss.bufc -= 1;
    320 	return memio::string(&ss.bufv[ss.bufc])!;
    321 };
    322 
    323 fn strstack_sort(ss: *strstack, pos: size) void = {
    324 	if (pos > ss.bufc) {
    325 		return;
    326 	};
    327 	let s = ss.bufv[pos..ss.bufc];
    328 	sort::sort(s, size(memio::stream), &bufcmp);
    329 };
    330 
    331 fn bufcmp(a: const *opaque, b: const *opaque) int =
    332 	strings::compare(
    333 		memio::string(b: *memio::stream)!,
    334 		memio::string(a: *memio::stream)!,
    335 	);