hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

glob.ha (7759B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use fmt;
      5 use fnmatch;
      6 use fs;
      7 use io;
      8 use memio;
      9 use os;
     10 use path;
     11 use sort;
     12 use strings;
     13 
     14 // Flags used to control the behavior of [[next]].
     15 export type flag = enum uint {
     16 	NONE = 0,
     17 	// Slash appending is enabled. A slash character is appended to each
     18 	// pathname that is a directory that matches the pattern.
     19 	MARK = 1,
     20 	// If the pattern does not match any pathname, the pattern string is
     21 	// returned.
     22 	NOCHECK = 1 << 1,
     23 	// Backslash escaping is disabled. A backslash character is treated as
     24 	// an ordinary character.
     25 	NOESCAPE = 1 << 2,
     26 	// Pathname sorting is disabled. The order of pathnames returned is
     27 	// unspecified.
     28 	NOSORT = 1 << 3,
     29 };
     30 
     31 export type generator = struct {
     32 	pats: strstack,
     33 	matc: size,
     34 	flgs: flag,
     35 	tmpp: pattern,
     36 };
     37 
     38 export type strstack = struct {
     39 	bufv: []memio::stream,
     40 	bufc: size,
     41 };
     42 
     43 export type pattern = struct {
     44 	// TODO: look into working with a couple of string iterators instead
     45 	dir: memio::stream,
     46 	pat: memio::stream,
     47 	rem: memio::stream,
     48 };
     49 
     50 // Information about an unsuccessful search.
     51 export type failure = !struct {
     52 	// The path that cannot be opened or read.
     53 	path: str,
     54 	// The actual filesystem error.
     55 	error: fs::error,
     56 };
     57 
     58 // Converts an error info a human-friendly string. The result is statically
     59 // allocated.
     60 export fn strerror(err: failure) str = {
     61 	let buf: [path::MAX + 1024]u8 = [0...];
     62 	return fmt::bsprintf(buf, "{}: {}", err.path, fs::strerror(err.error));
     63 };
     64 
     65 // Returns a generator of pathnames matching a pattern. The result must be
     66 // freed using [[finish]].
     67 export fn glob(pattern: str, flags: flag...) generator = {
     68 	let ss = strstack_init();
     69 	memio::concat(strstack_push(&ss), pattern)!;
     70 	let bs = flag::NONE;
     71 	for (let i = 0z; i < len(flags); i += 1) {
     72 		bs |= flags[i];
     73 	};
     74 	return generator {
     75 		pats = ss,
     76 		matc = 0,
     77 		flgs = bs,
     78 		tmpp = pattern_init(),
     79 	};
     80 };
     81 
     82 // Frees all memory allocated by the generator.
     83 export fn finish(gen: *generator) void = {
     84 	strstack_free(&gen.pats);
     85 	pattern_free(&gen.tmpp);
     86 };
     87 
     88 // Returns a generated pathname. The returned string is valid until [[next]]
     89 // is called again. If, during the search, a directory is encountered that
     90 // cannot be opened or read, a [[failure]] object is returned instead.
     91 // [[next]] can be repeatedly called until void is returned.
     92 export fn next(gen: *generator) (str | void | failure) = {
     93 	const init = strstack_size(&gen.pats) == 1
     94 		&& len(memio::string(&gen.tmpp.dir)!) == 0
     95 		&& len(memio::string(&gen.tmpp.pat)!) == 0
     96 		&& len(memio::string(&gen.tmpp.rem)!) == 0;
     97 	match (next_match(gen)?) {
     98 	case let s: str =>
     99 		return s;
    100 	case void => void;
    101 	};
    102 	if (init && gen.flgs & flag::NOCHECK != 0) {
    103 		return memio::string(&gen.pats.bufv[0])!;
    104 	};
    105 };
    106 
    107 fn next_match(gen: *generator) (str | void | failure) = {
    108 	match (strstack_pop(&gen.pats)) {
    109 	case void =>
    110 		return;
    111 	case let s: str =>
    112 		if (gen.matc > 0) {
    113 			gen.matc -= 1;
    114 			return s;
    115 		};
    116 		pattern_parse(&gen.tmpp, s, gen.flgs & flag::NOESCAPE != 0);
    117 	};
    118 	const l = strstack_size(&gen.pats);
    119 
    120 	const dir = pattern_dir(&gen.tmpp);
    121 	let pat = pattern_pat(&gen.tmpp);
    122 	if (pat == "") {
    123 		assert(pattern_rem(&gen.tmpp) == "");
    124 		return if (os::exists(dir)) dir else void;
    125 	};
    126 	const patm = strings::hassuffix(pat, '/');
    127 	if (patm) {
    128 		pat = strings::sub(pat, 0, len(pat) - 1);
    129 	};
    130 	const rem = pattern_rem(&gen.tmpp);
    131 
    132 	let flgs = fnmatch::flag::PERIOD;
    133 	if (gen.flgs & flag::NOESCAPE != 0) {
    134 		flgs |= fnmatch::flag::NOESCAPE;
    135 	};
    136 	let it = match(os::iter(if (len(dir) > 0) dir else ".")) {
    137 	case let i: *fs::iterator =>
    138 		yield i;
    139 	case let e: fs::error =>
    140 		return failure {
    141 			path = dir,
    142 			error = e,
    143 		};
    144 	};
    145 	defer fs::finish(it);
    146 
    147 	for (true) match (fs::next(it)) {
    148 	case void =>
    149 		break;
    150 	case let de: fs::dirent =>
    151 		if (patm && !fs::isdir(de.ftype) && !fs::islink(de.ftype)) {
    152 			continue;
    153 		};
    154 		if (!fnmatch::fnmatch(pat, de.name, flgs)) {
    155 			continue;
    156 		};
    157 
    158 		let b = strstack_push(&gen.pats);
    159 		if (len(rem) > 0) {
    160 			memio::concat(b, dir, de.name, "/", rem)!;
    161 			continue;
    162 		};
    163 		memio::concat(b, dir, de.name)!;
    164 		if (patm || gen.flgs & flag::MARK != 0) {
    165 			let m = fs::isdir(de.ftype);
    166 			// POSIX does not specify the behavior when a pathname
    167 			// that matches the pattern is a symlink to a
    168 			// directory. But in major implementation a slash
    169 			// character is appended in this case.
    170 			if (fs::islink(de.ftype)) {
    171 				match (os::realpath(memio::string(b)!)) {
    172 				case let r: str =>
    173 					match (os::stat(r)) {
    174 					case let s: fs::filestat =>
    175 						m = fs::isdir(s.mode);
    176 					case fs::error => void;
    177 					};
    178 				case fs::error => void;
    179 				};
    180 			};
    181 			if (m) {
    182 				memio::concat(b, "/")!;
    183 			} else if (patm) {
    184 				strstack_pop(&gen.pats);
    185 				continue;
    186 			};
    187 		};
    188 		gen.matc += 1;
    189 	case let e: fs::error =>
    190 		return failure {
    191 			path = dir,
    192 			error = e,
    193 		};
    194 	};
    195 	if (gen.flgs & flag::NOSORT == 0) {
    196 		strstack_sort(&gen.pats, l);
    197 	};
    198 
    199 	return next_match(gen);
    200 };
    201 
    202 fn pattern_init() pattern = pattern {
    203 	dir = memio::dynamic(),
    204 	pat = memio::dynamic(),
    205 	rem = memio::dynamic(),
    206 };
    207 
    208 fn pattern_free(p: *pattern) void = {
    209 	io::close(&p.dir)!;
    210 	io::close(&p.pat)!;
    211 	io::close(&p.rem)!;
    212 };
    213 
    214 fn pattern_reset(p: *pattern) void = {
    215 	memio::reset(&p.dir);
    216 	memio::reset(&p.pat);
    217 	memio::reset(&p.rem);
    218 };
    219 
    220 fn pattern_dir(p: *pattern) str = memio::string(&p.dir)!;
    221 
    222 fn pattern_pat(p: *pattern) str = memio::string(&p.pat)!;
    223 
    224 fn pattern_rem(p: *pattern) str = memio::string(&p.rem)!;
    225 
    226 fn pattern_parse(p: *pattern, pstr: str, noesc: bool) void = {
    227 	pattern_reset(p);
    228 
    229 	let itdir = strings::iter(pstr);
    230 	let itpat = itdir;
    231 
    232 	// p.dir is the longest directory name which contains no special
    233 	// characters.
    234 	for (let brk = false, esc = false; true) {
    235 		const r = match (strings::next(&itdir)) {
    236 		case void =>
    237 			memio::concat(&p.dir, memio::string(&p.pat)!)!;
    238 			memio::reset(&p.pat);
    239 			return;
    240 		case let r: rune =>
    241 			yield r;
    242 		};
    243 
    244 		if (!esc) switch (r) {
    245 		case '*', '?' =>
    246 			break;
    247 		case '[' =>
    248 			brk = true;
    249 		case ']' =>
    250 			if (brk) {
    251 				break;
    252 			};
    253 		case '\\' =>
    254 			if (!noesc) {
    255 				esc = true;
    256 				continue;
    257 			};
    258 		case => void;
    259 		};
    260 
    261 		memio::appendrune(&p.pat, r)!;
    262 		if (r == '/') {
    263 			memio::concat(&p.dir, memio::string(&p.pat)!)!;
    264 			memio::reset(&p.pat);
    265 			itpat = itdir;
    266 		};
    267 		esc = false;
    268 	};
    269 
    270 	// p.pat is the first path component which contains special
    271 	// characters.
    272 	memio::reset(&p.pat);
    273 	for (let esc = false; true) {
    274 		const r = match (strings::next(&itpat)) {
    275 		case void =>
    276 			return;
    277 		case let r: rune =>
    278 			yield r;
    279 		};
    280 
    281 		if (!esc && r == '\\' && !noesc) {
    282 			esc = true;
    283 			continue;
    284 		};
    285 
    286 		if (esc && r != '/') {
    287 			memio::appendrune(&p.pat, '\\')!;
    288 		};
    289 		memio::appendrune(&p.pat, r)!;
    290 		if (r == '/') {
    291 			break;
    292 		};
    293 		esc = false;
    294 	};
    295 
    296 	memio::concat(&p.rem, strings::iterstr(&itpat))!;
    297 };
    298 
    299 fn strstack_init() strstack = strstack {
    300 	bufv = [],
    301 	bufc = 0,
    302 };
    303 
    304 fn strstack_free(ss: *strstack) void = {
    305 	for (let i = 0z; i < len(ss.bufv); i += 1) {
    306 		io::close(&ss.bufv[i])!;
    307 	};
    308 	free(ss.bufv);
    309 };
    310 
    311 fn strstack_size(ss: *strstack) size = ss.bufc;
    312 
    313 fn strstack_push(ss: *strstack) *memio::stream = {
    314 	if (ss.bufc == len(ss.bufv)) {
    315 		append(ss.bufv, memio::dynamic());
    316 	};
    317 	let b = &ss.bufv[ss.bufc];
    318 	memio::reset(b);
    319 	ss.bufc += 1;
    320 	return b;
    321 };
    322 
    323 fn strstack_pop(ss: *strstack) (str | void) = {
    324 	if (ss.bufc == 0) {
    325 		return;
    326 	};
    327 	ss.bufc -= 1;
    328 	return memio::string(&ss.bufv[ss.bufc])!;
    329 };
    330 
    331 fn strstack_sort(ss: *strstack, pos: size) void = {
    332 	if (pos > ss.bufc) {
    333 		return;
    334 	};
    335 	let s = ss.bufv[pos..ss.bufc];
    336 	sort::sort(s, size(memio::stream), &bufcmp);
    337 };
    338 
    339 fn bufcmp(a: const *opaque, b: const *opaque) int =
    340 	strings::compare(
    341 		memio::string(b: *memio::stream)!,
    342 		memio::string(a: *memio::stream)!,
    343 	);