hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit a22dee2b8ad1a33f6cd8c9829c9d6b98deb6df98
parent 5fae03cb0900fdea7a14b5665e1a2d6ac0f90e55
Author: Yasumasa Tada <ytada@spartan.dev>
Date:   Sun, 27 Mar 2022 18:57:27 +0900

glob: new module

Signed-off-by: Yasumasa Tada <ytada@spartan.dev>

Diffstat:
Aglob/+test.ha | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Aglob/glob.ha | 185+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/gen-stdlib | 11+++++++++++
Mstdlib.mk | 33+++++++++++++++++++++++++++++++++
4 files changed, 279 insertions(+), 0 deletions(-)

diff --git a/glob/+test.ha b/glob/+test.ha @@ -0,0 +1,50 @@ +use fnmatch; + +@test fn glob() void = { + const cases: [_]str = [ + "/u??/*in/a*", + "/?sr/[sb]in/*[[:digit:]]*", + "/*.?a", + "./*.[[:alpha:]]a", + "[[:punct:]]*", + "/", + "//", + ".", + "..", + ]; + const flags = [fnmatch::flags::PATHNAME]; + for (let i = 0z; i < len(cases); i += 1) { + let gen = glob(cases[i]); + defer globfree(&gen); + for (true) match (next(&gen)) { + case void => + break; + case let s: const str => + assert(fnmatch::fnmatch(cases[i], s, flags...)); + }; + }; +}; + +@test fn split_pattern() void = { + const cases: [_](str, size, size) = [ + ("foo/bar/baz", 8, 11), + ("/foo/bar/baz", 9, 12), + ("/foobarbaz", 1, 10), + ("foo/bar/baz/", 12, 12), + ("foobarbaz/", 10, 10), + ("foobarbaz", 0, 9), + ("foo/b?r/baz", 4, 8), + ("foob*rbaz/", 0, 10), + ("foo[bar]baz", 0, 11), + ("fo[o/ba[r/baz", 10, 13), + ("fo]o/bar/b[az", 9, 13), + ("foo/ba]r/b]az", 9, 13), + ("foo/ba[r/b]az", 9, 13), + ("fo[o/bar/b]az", 9, 13), + ]; + for (let i = 0z; i < len(cases); i += 1) { + const ends = split_pattern(cases[i].0); + assert(ends.0 == cases[i].1); + assert(ends.1 == cases[i].2); + }; +}; diff --git a/glob/glob.ha b/glob/glob.ha @@ -0,0 +1,185 @@ +use fnmatch; +use fs; +use io; +use os; +use sort; +use strings; +use strio; + +// Currently flags are not supported. +export type flag = enum uint { + NONE = 0, + ERR = 1u << 1, + MARK = 1u << 2, + NOCHECK = 1u << 3, + NOESCAPE = 1u << 4, + NOSORT = 1u << 5, +}; + +export type generator = struct { + pats: strstack, + matc: size, + tmps: strio::dynamic_stream, +}; + +export type strstack = struct { + bufv: []strio::dynamic_stream, + bufc: size, +}; + +// Returns a generator of pathnames matching a pattern. The result must be +// freed using [[globfree]]. +export fn glob(pattern: const str, flags: flag...) generator = { + let init = strstack_init(); + strstack_push(&init, pattern); + return generator { + pats = init, + matc = 0, + tmps = strio::dynamic(), + }; +}; + +// Frees all memory allocated by the generator. +export fn globfree(gen: *generator) void = { + strstack_free(&gen.pats); + io::close(&gen.tmps); +}; + +// Returns a generated pathname. The returned string is valid until [[next]] +// is called again. +export fn next(gen: *generator) (const str | void) = { + match (next_match(os::cwd, gen)) { + case fs::error => + return next(gen); // TODO: Handle errors. + case void => + return; + case let m: const str => + return m; + }; +}; + +fn next_match(fs: *fs::fs, gen: *generator) (const str | void | fs::error) = { + const p = match (strstack_pop(&gen.pats)) { + case void => + return; + case let s: const str => + if (gen.matc > 0) { + gen.matc -= 1; + return s; + }; + // Avoids copying overlapping memory area. + strio::reset(&gen.tmps); + strio::concat(&gen.tmps, s)!; + yield strio::string(&gen.tmps); + }; + const l = strstack_size(&gen.pats); + + const ends = split_pattern(p); + let dir = strings::sub(p, 0, ends.0); + let pat = strings::sub(p, ends.0, ends.1); + if (strings::hassuffix(pat, '/')) { + pat = strings::sub(pat, 0, len(pat) - 1); + }; + let rem = ""; + if (ends.1 < len(p)) { + rem = strings::sub(p, ends.1, strings::end); + }; + + let it = fs::iter(fs, if (len(dir) > 0) dir else ".")?; + for (true) match (fs::next(it)) { + case void => + break; + case let de: fs::dirent => + if (!fnmatch::fnmatch(pat, de.name)) + continue; + if (len(rem) == 0) { + strstack_push(&gen.pats, dir, de.name); + gen.matc += 1; + continue; + }; + strstack_push(&gen.pats, dir, de.name, "/", rem); + }; + + strstack_sort(&gen.pats, l); + return next_match(fs, gen); +}; + +fn split_pattern(p: const str) (size, size) = { + let pos = (strings::iter(p), 0z); + + // p[0..dirend] is path components which have no special characters. + let dirend = 0z; + for (let brk = false; true) match (strings::next(&pos.0)) { + case void => + break; + case let r: rune => + pos.1 += 1; + switch (r) { + case '*', '?' => + break; + case '[' => + brk = true; + case ']' => + if (brk) break; + case '/' => + dirend = pos.1; + case => void; + }; + }; + + // p[dirend..patend] is a path component which has special characters. + let patend = len(p); + for (true) match (strings::next(&pos.0)) { + case void => + break; + case let r: rune => + pos.1 += 1; + switch (r) { + case '/' => + patend = pos.1; + break; + case => void; + }; + }; + + return (dirend, patend); +}; + +fn strstack_init() strstack = strstack { + bufv = [], + bufc = 0, +}; + +fn strstack_free(ss: *strstack) void = { + for (let i = 0z; i < len(ss.bufv); i += 1) + io::close(&ss.bufv[i]); +}; + +fn strstack_size(ss: *strstack) size = ss.bufc; + +fn strstack_push(ss: *strstack, strs: const str...) void = { + if (ss.bufc == len(ss.bufv)) { + append(ss.bufv, strio::dynamic()); + }; + strio::reset(&ss.bufv[ss.bufc]); + strio::concat(&ss.bufv[ss.bufc], strs...)!; + ss.bufc += 1; +}; + +fn strstack_pop(ss: *strstack) (const str | void) = { + if (ss.bufc == 0) return; + ss.bufc -= 1; + return strio::string(&ss.bufv[ss.bufc]); +}; + +fn strstack_sort(ss: *strstack, pos: size) void = { + if (pos > ss.bufc) return; + let s = ss.bufv[pos..ss.bufc]; + sort::sort(s, size(strio::dynamic_stream), &bufcmp); +}; + +fn bufcmp(a: const *void, b: const *void) int = + strings::strcmp( + strio::string(b: *strio::dynamic_stream), + strio::string(a: *strio::dynamic_stream), + ); diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -537,6 +537,16 @@ getopt() { gen_ssa getopt encoding::utf8 fmt io os strings } +glob() { + if [ $testing -eq 0 ] + then + gen_srcs glob glob.ha + else + gen_srcs glob glob.ha +test.ha + fi + gen_ssa glob fnmatch fs io os sort strings strio +} + hare_ast() { gen_srcs hare::ast \ decl.ha \ @@ -1252,6 +1262,7 @@ format::ini format::xml fs getopt +glob hare::ast hare::lex hare::module diff --git a/stdlib.mk b/stdlib.mk @@ -340,6 +340,12 @@ stdlib_deps_any+=$(stdlib_getopt_any) stdlib_getopt_linux=$(stdlib_getopt_any) stdlib_getopt_freebsd=$(stdlib_getopt_any) +# gen_lib glob (any) +stdlib_glob_any=$(HARECACHE)/glob/glob-any.o +stdlib_deps_any+=$(stdlib_glob_any) +stdlib_glob_linux=$(stdlib_glob_any) +stdlib_glob_freebsd=$(stdlib_glob_any) + # gen_lib hare::ast (any) stdlib_hare_ast_any=$(HARECACHE)/hare/ast/hare_ast-any.o stdlib_deps_any+=$(stdlib_hare_ast_any) @@ -1051,6 +1057,16 @@ $(HARECACHE)/getopt/getopt-any.ssa: $(stdlib_getopt_any_srcs) $(stdlib_rt) $(std @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Ngetopt \ -t$(HARECACHE)/getopt/getopt.td $(stdlib_getopt_any_srcs) +# glob (+any) +stdlib_glob_any_srcs= \ + $(STDLIB)/glob/glob.ha + +$(HARECACHE)/glob/glob-any.ssa: $(stdlib_glob_any_srcs) $(stdlib_rt) $(stdlib_fnmatch_$(PLATFORM)) $(stdlib_fs_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_sort_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) + @printf 'HAREC \t$@\n' + @mkdir -p $(HARECACHE)/glob + @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nglob \ + -t$(HARECACHE)/glob/glob.td $(stdlib_glob_any_srcs) + # hare::ast (+any) stdlib_hare_ast_any_srcs= \ $(STDLIB)/hare/ast/decl.ha \ @@ -2203,6 +2219,12 @@ testlib_deps_any+=$(testlib_getopt_any) testlib_getopt_linux=$(testlib_getopt_any) testlib_getopt_freebsd=$(testlib_getopt_any) +# gen_lib glob (any) +testlib_glob_any=$(TESTCACHE)/glob/glob-any.o +testlib_deps_any+=$(testlib_glob_any) +testlib_glob_linux=$(testlib_glob_any) +testlib_glob_freebsd=$(testlib_glob_any) + # gen_lib hare::ast (any) testlib_hare_ast_any=$(TESTCACHE)/hare/ast/hare_ast-any.o testlib_deps_any+=$(testlib_hare_ast_any) @@ -2934,6 +2956,17 @@ $(TESTCACHE)/getopt/getopt-any.ssa: $(testlib_getopt_any_srcs) $(testlib_rt) $(t @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Ngetopt \ -t$(TESTCACHE)/getopt/getopt.td $(testlib_getopt_any_srcs) +# glob (+any) +testlib_glob_any_srcs= \ + $(STDLIB)/glob/glob.ha \ + $(STDLIB)/glob/+test.ha + +$(TESTCACHE)/glob/glob-any.ssa: $(testlib_glob_any_srcs) $(testlib_rt) $(testlib_fnmatch_$(PLATFORM)) $(testlib_fs_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_sort_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) + @printf 'HAREC \t$@\n' + @mkdir -p $(TESTCACHE)/glob + @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nglob \ + -t$(TESTCACHE)/glob/glob.td $(testlib_glob_any_srcs) + # hare::ast (+any) testlib_hare_ast_any_srcs= \ $(STDLIB)/hare/ast/decl.ha \