hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

glob.ha (7215B)


      1 // License: MPL-2.0
      2 // (c) 2022 Yasumasa Tada <ytada@spartan.dev>
      3 use fnmatch;
      4 use fs;
      5 use io;
      6 use os;
      7 use sort;
      8 use strings;
      9 use strio;
     10 
     11 // Flags used to control the behavior of [[next]].
     12 export type flags = enum uint {
     13 	NONE = 0,
     14 	// Slash appending is enabled. A slash character is appended to each
     15 	// pathname that is a directory that matches the pattern.
     16 	MARK = 1,
     17 	// If the pattern does not match any pathname, the pattern string is
     18 	// returned.
     19 	NOCHECK = 1 << 1,
     20 	// Backslash escaping is disabled. A backslash character is treated as 
     21 	// an ordinary character.
     22 	NOESCAPE = 1 << 2,
     23 	// Pathname sorting is disabled. The order of pathnames returned is
     24 	// unspecified.
     25 	NOSORT = 1 << 3,
     26 };
     27 
     28 export type generator = struct {
     29 	pats: strstack,
     30 	matc: size,
     31 	flgs: flags,
     32 	tmpp: pattern,
     33 };
     34 
     35 export type strstack = struct {
     36 	bufv: []strio::stream,
     37 	bufc: size,
     38 };
     39 
     40 export type pattern = struct {
     41 	dir: strio::stream,
     42 	pat: strio::stream,
     43 	rem: strio::stream,
     44 };
     45 
     46 // Information about an unsuccessful search.
     47 export type failure = !struct {
     48 	// The path that cannot be opened or read.
     49 	path: str,
     50 	// The actual filesystem error.
     51 	error: fs::error,
     52 };
     53 
     54 // Returns a generator of pathnames matching a pattern. The result must be
     55 // freed using [[finish]].
     56 export fn glob(pattern: str, flags: flags...) generator = {
     57 	let ss = strstack_init();
     58 	strio::concat(strstack_push(&ss), pattern)!;
     59 	let bs = flags::NONE;
     60 	for (let i = 0z; i < len(flags); i += 1) {
     61 		bs |= flags[i];
     62 	};
     63 	return generator {
     64 		pats = ss,
     65 		matc = 0,
     66 		flgs = bs,
     67 		tmpp = pattern_init(),
     68 	};
     69 };
     70 
     71 // Frees all memory allocated by the generator.
     72 export fn finish(gen: *generator) void = {
     73 	strstack_free(&gen.pats);
     74 	pattern_free(&gen.tmpp);
     75 };
     76 
     77 // Returns a generated pathname. The returned string is valid until [[next]]
     78 // is called again. If, during the search, a directory is encountered that
     79 // cannot be opened or read, a [[failure]] object is returned instead.
     80 // [[next]] can be repeatedly called until void is returned.
     81 export fn next(gen: *generator) (str | void | failure) = {
     82 	const init = strstack_size(&gen.pats) == 1
     83 		&& len(strio::string(&gen.tmpp.dir)) == 0
     84 		&& len(strio::string(&gen.tmpp.pat)) == 0
     85 		&& len(strio::string(&gen.tmpp.rem)) == 0;
     86 	match (next_match(os::cwd, gen)) {
     87 	case let s: str =>
     88 		return s;
     89 	case let f: failure =>
     90 		return f;
     91 	case void => void;
     92 	};
     93 	if (init && gen.flgs & flags::NOCHECK != 0) {
     94 		return strio::string(&gen.pats.bufv[0]);
     95 	};
     96 };
     97 
     98 fn next_match(fs: *fs::fs, gen: *generator) (str | void | failure) = {
     99 	match (strstack_pop(&gen.pats)) {
    100 	case void =>
    101 		return;
    102 	case let s: str =>
    103 		if (gen.matc > 0) {
    104 			gen.matc -= 1;
    105 			return s;
    106 		};
    107 		pattern_parse(&gen.tmpp, s, gen.flgs & flags::NOESCAPE != 0);
    108 	};
    109 	const l = strstack_size(&gen.pats);
    110 
    111 	const dir = pattern_dir(&gen.tmpp);
    112 	let pat = pattern_pat(&gen.tmpp);
    113 	const patm = strings::hassuffix(pat, '/');
    114 	if (patm) {
    115 		pat = strings::sub(pat, 0, len(pat) - 1);
    116 	};
    117 	const rem = pattern_rem(&gen.tmpp);
    118 
    119 	let flgs = fnmatch::flags::PERIOD;
    120 	if (gen.flgs & flags::NOESCAPE != 0) {
    121 		flgs |= fnmatch::flags::NOESCAPE;
    122 	};
    123 	let it = match(fs::iter(fs, if (len(dir) > 0) dir else ".")) {
    124 	case let i: *fs::iterator =>
    125 		yield i;
    126 	case let e: fs::error =>
    127 		return failure {
    128 			path = dir,
    129 			error = e,
    130 		};
    131 	};
    132 	defer fs::finish(it);
    133 
    134 	for (true) match (fs::next(it)) {
    135 	case void =>
    136 		break;
    137 	case let de: fs::dirent =>
    138 		if (patm && !fs::isdir(de.ftype) && !fs::islink(de.ftype)) {
    139 			continue;
    140 		};
    141 		if (!fnmatch::fnmatch(pat, de.name, flgs)) {
    142 			continue;
    143 		};
    144 
    145 		let b = strstack_push(&gen.pats);
    146 		if (len(rem) > 0) {
    147 			strio::concat(b, dir, de.name, "/", rem)!;
    148 			continue;
    149 		};
    150 		strio::concat(b, dir, de.name)!;
    151 		if (patm || gen.flgs & flags::MARK != 0) {
    152 			let m = fs::isdir(de.ftype);
    153 			// POSIX does not specify the behavior when a pathname
    154 			// that matches the pattern is a symlink to a
    155 			// directory. But in major implementation a slash
    156 			// character is appended in this case.
    157 			if (fs::islink(de.ftype)) {
    158 				match (fs::realpath(fs, strio::string(b))) {
    159 				case let r: str =>
    160 					match (fs::stat(fs, r)) {
    161 					case let s: fs::filestat =>
    162 						m = fs::isdir(s.mode);
    163 					case fs::error => void;
    164 					};
    165 				case fs::error => void;
    166 				};
    167 			};
    168 			if (m) {
    169 				strio::concat(b, "/")!;
    170 			} else if (patm) {
    171 				strstack_pop(&gen.pats);
    172 				continue;
    173 			};
    174 		};
    175 		gen.matc += 1;
    176 	};
    177 	if (gen.flgs & flags::NOSORT == 0) {
    178 		strstack_sort(&gen.pats, l);
    179 	};
    180 
    181 	return next_match(fs, gen);
    182 };
    183 
    184 fn pattern_init() pattern = pattern {
    185 	dir = strio::dynamic(),
    186 	pat = strio::dynamic(),
    187 	rem = strio::dynamic(),
    188 };
    189 
    190 fn pattern_free(p: *pattern) void = {
    191 	io::close(&p.dir)!;
    192 	io::close(&p.pat)!;
    193 	io::close(&p.rem)!;
    194 };
    195 
    196 fn pattern_reset(p: *pattern) void = {
    197 	strio::reset(&p.dir);
    198 	strio::reset(&p.pat);
    199 	strio::reset(&p.rem);
    200 };
    201 
    202 fn pattern_dir(p: *pattern) str = strio::string(&p.dir);
    203 
    204 fn pattern_pat(p: *pattern) str = strio::string(&p.pat);
    205 
    206 fn pattern_rem(p: *pattern) str = strio::string(&p.rem);
    207 
    208 fn pattern_parse(p: *pattern, pstr: str, noesc: bool) void = {
    209 	pattern_reset(p);
    210 
    211 	let itdir = strings::iter(pstr);
    212 	let itpat = itdir;
    213 
    214 	// p.dir is the longest directory name which contains no special
    215 	// characters.
    216 	for (let brk = false, esc = false; true) {
    217 		const r = match (strings::next(&itdir)) {
    218 		case void =>
    219 			return;
    220 		case let r: rune =>
    221 			yield r;
    222 		};
    223 
    224 		if (!esc) switch (r) {
    225 		case '*', '?' =>
    226 			break;
    227 		case '[' =>
    228 			brk = true;
    229 		case ']' =>
    230 			if (brk) {
    231 				break;
    232 			};
    233 		case '\\' =>
    234 			if (!noesc) {
    235 				esc = true;
    236 				continue;
    237 			};
    238 		case => void;
    239 		};
    240 
    241 		strio::appendrune(&p.pat, r)!;
    242 		if (r == '/') {
    243 			strio::concat(&p.dir, strio::string(&p.pat))!;
    244 			strio::reset(&p.pat);
    245 			itpat = itdir;
    246 		};
    247 		esc = false;
    248 	};
    249 	
    250 	// p.pat is the first path component which contains special
    251 	// characters.
    252 	strio::reset(&p.pat);
    253 	for (let esc = false; true) {
    254 		const r = match (strings::next(&itpat)) {
    255 		case void =>
    256 			return;
    257 		case let r: rune =>
    258 			yield r;
    259 		};
    260 
    261 		if (!esc && r == '\\' && !noesc) {
    262 			esc = true;
    263 			continue;
    264 		};
    265 
    266 		if (esc && r != '/') {
    267 			strio::appendrune(&p.pat, '\\')!;
    268 		};
    269 		strio::appendrune(&p.pat, r)!;
    270 		if (r == '/') {
    271 			break;
    272 		};
    273 		esc = false;
    274 	};
    275 
    276 	strio::concat(&p.rem, strings::iterstr(&itpat))!;
    277 };
    278 
    279 fn strstack_init() strstack = strstack {
    280 	bufv = [],
    281 	bufc = 0,
    282 };
    283 
    284 fn strstack_free(ss: *strstack) void = {
    285 	for (let i = 0z; i < len(ss.bufv); i += 1) {
    286 		io::close(&ss.bufv[i])!;
    287 	};
    288 	free(ss.bufv);
    289 };
    290 
    291 fn strstack_size(ss: *strstack) size = ss.bufc;
    292 
    293 fn strstack_push(ss: *strstack) *strio::stream = {
    294 	if (ss.bufc == len(ss.bufv)) {
    295 		append(ss.bufv, strio::dynamic());
    296 	};
    297 	let b = &ss.bufv[ss.bufc];
    298 	strio::reset(b);
    299 	ss.bufc += 1;
    300 	return b;
    301 };
    302 
    303 fn strstack_pop(ss: *strstack) (str | void) = {
    304 	if (ss.bufc == 0) {
    305 		return;
    306 	};
    307 	ss.bufc -= 1;
    308 	return strio::string(&ss.bufv[ss.bufc]);
    309 };
    310 
    311 fn strstack_sort(ss: *strstack, pos: size) void = {
    312 	if (pos > ss.bufc) {
    313 		return;
    314 	};
    315 	let s = ss.bufv[pos..ss.bufc];
    316 	sort::sort(s, size(strio::stream), &bufcmp);
    317 };
    318 
    319 fn bufcmp(a: const *void, b: const *void) int =
    320 	strings::compare(
    321 		strio::string(b: *strio::stream),
    322 		strio::string(a: *strio::stream),
    323 	);