commit a22dee2b8ad1a33f6cd8c9829c9d6b98deb6df98
parent 5fae03cb0900fdea7a14b5665e1a2d6ac0f90e55
Author: Yasumasa Tada <ytada@spartan.dev>
Date: Sun, 27 Mar 2022 18:57:27 +0900
glob: new module
Signed-off-by: Yasumasa Tada <ytada@spartan.dev>
Diffstat:
4 files changed, 279 insertions(+), 0 deletions(-)
diff --git a/glob/+test.ha b/glob/+test.ha
@@ -0,0 +1,50 @@
+use fnmatch;
+
+@test fn glob() void = {
+ const cases: [_]str = [
+ "/u??/*in/a*",
+ "/?sr/[sb]in/*[[:digit:]]*",
+ "/*.?a",
+ "./*.[[:alpha:]]a",
+ "[[:punct:]]*",
+ "/",
+ "//",
+ ".",
+ "..",
+ ];
+ const flags = [fnmatch::flags::PATHNAME];
+ for (let i = 0z; i < len(cases); i += 1) {
+ let gen = glob(cases[i]);
+ defer globfree(&gen);
+ for (true) match (next(&gen)) {
+ case void =>
+ break;
+ case let s: const str =>
+ assert(fnmatch::fnmatch(cases[i], s, flags...));
+ };
+ };
+};
+
+@test fn split_pattern() void = {
+ const cases: [_](str, size, size) = [
+ ("foo/bar/baz", 8, 11),
+ ("/foo/bar/baz", 9, 12),
+ ("/foobarbaz", 1, 10),
+ ("foo/bar/baz/", 12, 12),
+ ("foobarbaz/", 10, 10),
+ ("foobarbaz", 0, 9),
+ ("foo/b?r/baz", 4, 8),
+ ("foob*rbaz/", 0, 10),
+ ("foo[bar]baz", 0, 11),
+ ("fo[o/ba[r/baz", 10, 13),
+ ("fo]o/bar/b[az", 9, 13),
+ ("foo/ba]r/b]az", 9, 13),
+ ("foo/ba[r/b]az", 9, 13),
+ ("fo[o/bar/b]az", 9, 13),
+ ];
+ for (let i = 0z; i < len(cases); i += 1) {
+ const ends = split_pattern(cases[i].0);
+ assert(ends.0 == cases[i].1);
+ assert(ends.1 == cases[i].2);
+ };
+};
diff --git a/glob/glob.ha b/glob/glob.ha
@@ -0,0 +1,185 @@
+use fnmatch;
+use fs;
+use io;
+use os;
+use sort;
+use strings;
+use strio;
+
+// Currently flags are not supported.
+export type flag = enum uint {
+ NONE = 0,
+ ERR = 1u << 1,
+ MARK = 1u << 2,
+ NOCHECK = 1u << 3,
+ NOESCAPE = 1u << 4,
+ NOSORT = 1u << 5,
+};
+
+export type generator = struct {
+ pats: strstack,
+ matc: size,
+ tmps: strio::dynamic_stream,
+};
+
+export type strstack = struct {
+ bufv: []strio::dynamic_stream,
+ bufc: size,
+};
+
+// Returns a generator of pathnames matching a pattern. The result must be
+// freed using [[globfree]].
+export fn glob(pattern: const str, flags: flag...) generator = {
+ let init = strstack_init();
+ strstack_push(&init, pattern);
+ return generator {
+ pats = init,
+ matc = 0,
+ tmps = strio::dynamic(),
+ };
+};
+
+// Frees all memory allocated by the generator.
+export fn globfree(gen: *generator) void = {
+ strstack_free(&gen.pats);
+ io::close(&gen.tmps);
+};
+
+// Returns a generated pathname. The returned string is valid until [[next]]
+// is called again.
+export fn next(gen: *generator) (const str | void) = {
+ match (next_match(os::cwd, gen)) {
+ case fs::error =>
+ return next(gen); // TODO: Handle errors.
+ case void =>
+ return;
+ case let m: const str =>
+ return m;
+ };
+};
+
+fn next_match(fs: *fs::fs, gen: *generator) (const str | void | fs::error) = {
+ const p = match (strstack_pop(&gen.pats)) {
+ case void =>
+ return;
+ case let s: const str =>
+ if (gen.matc > 0) {
+ gen.matc -= 1;
+ return s;
+ };
+ // Avoids copying overlapping memory area.
+ strio::reset(&gen.tmps);
+ strio::concat(&gen.tmps, s)!;
+ yield strio::string(&gen.tmps);
+ };
+ const l = strstack_size(&gen.pats);
+
+ const ends = split_pattern(p);
+ let dir = strings::sub(p, 0, ends.0);
+ let pat = strings::sub(p, ends.0, ends.1);
+ if (strings::hassuffix(pat, '/')) {
+ pat = strings::sub(pat, 0, len(pat) - 1);
+ };
+ let rem = "";
+ if (ends.1 < len(p)) {
+ rem = strings::sub(p, ends.1, strings::end);
+ };
+
+ let it = fs::iter(fs, if (len(dir) > 0) dir else ".")?;
+ for (true) match (fs::next(it)) {
+ case void =>
+ break;
+ case let de: fs::dirent =>
+ if (!fnmatch::fnmatch(pat, de.name))
+ continue;
+ if (len(rem) == 0) {
+ strstack_push(&gen.pats, dir, de.name);
+ gen.matc += 1;
+ continue;
+ };
+ strstack_push(&gen.pats, dir, de.name, "/", rem);
+ };
+
+ strstack_sort(&gen.pats, l);
+ return next_match(fs, gen);
+};
+
+fn split_pattern(p: const str) (size, size) = {
+ let pos = (strings::iter(p), 0z);
+
+ // p[0..dirend] is path components which have no special characters.
+ let dirend = 0z;
+ for (let brk = false; true) match (strings::next(&pos.0)) {
+ case void =>
+ break;
+ case let r: rune =>
+ pos.1 += 1;
+ switch (r) {
+ case '*', '?' =>
+ break;
+ case '[' =>
+ brk = true;
+ case ']' =>
+ if (brk) break;
+ case '/' =>
+ dirend = pos.1;
+ case => void;
+ };
+ };
+
+ // p[dirend..patend] is a path component which has special characters.
+ let patend = len(p);
+ for (true) match (strings::next(&pos.0)) {
+ case void =>
+ break;
+ case let r: rune =>
+ pos.1 += 1;
+ switch (r) {
+ case '/' =>
+ patend = pos.1;
+ break;
+ case => void;
+ };
+ };
+
+ return (dirend, patend);
+};
+
+fn strstack_init() strstack = strstack {
+ bufv = [],
+ bufc = 0,
+};
+
+fn strstack_free(ss: *strstack) void = {
+ for (let i = 0z; i < len(ss.bufv); i += 1)
+ io::close(&ss.bufv[i]);
+};
+
+fn strstack_size(ss: *strstack) size = ss.bufc;
+
+fn strstack_push(ss: *strstack, strs: const str...) void = {
+ if (ss.bufc == len(ss.bufv)) {
+ append(ss.bufv, strio::dynamic());
+ };
+ strio::reset(&ss.bufv[ss.bufc]);
+ strio::concat(&ss.bufv[ss.bufc], strs...)!;
+ ss.bufc += 1;
+};
+
+fn strstack_pop(ss: *strstack) (const str | void) = {
+ if (ss.bufc == 0) return;
+ ss.bufc -= 1;
+ return strio::string(&ss.bufv[ss.bufc]);
+};
+
+fn strstack_sort(ss: *strstack, pos: size) void = {
+ if (pos > ss.bufc) return;
+ let s = ss.bufv[pos..ss.bufc];
+ sort::sort(s, size(strio::dynamic_stream), &bufcmp);
+};
+
+fn bufcmp(a: const *void, b: const *void) int =
+ strings::strcmp(
+ strio::string(b: *strio::dynamic_stream),
+ strio::string(a: *strio::dynamic_stream),
+ );
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -537,6 +537,16 @@ getopt() {
gen_ssa getopt encoding::utf8 fmt io os strings
}
+glob() {
+ if [ $testing -eq 0 ]
+ then
+ gen_srcs glob glob.ha
+ else
+ gen_srcs glob glob.ha +test.ha
+ fi
+ gen_ssa glob fnmatch fs io os sort strings strio
+}
+
hare_ast() {
gen_srcs hare::ast \
decl.ha \
@@ -1252,6 +1262,7 @@ format::ini
format::xml
fs
getopt
+glob
hare::ast
hare::lex
hare::module
diff --git a/stdlib.mk b/stdlib.mk
@@ -340,6 +340,12 @@ stdlib_deps_any+=$(stdlib_getopt_any)
stdlib_getopt_linux=$(stdlib_getopt_any)
stdlib_getopt_freebsd=$(stdlib_getopt_any)
+# gen_lib glob (any)
+stdlib_glob_any=$(HARECACHE)/glob/glob-any.o
+stdlib_deps_any+=$(stdlib_glob_any)
+stdlib_glob_linux=$(stdlib_glob_any)
+stdlib_glob_freebsd=$(stdlib_glob_any)
+
# gen_lib hare::ast (any)
stdlib_hare_ast_any=$(HARECACHE)/hare/ast/hare_ast-any.o
stdlib_deps_any+=$(stdlib_hare_ast_any)
@@ -1051,6 +1057,16 @@ $(HARECACHE)/getopt/getopt-any.ssa: $(stdlib_getopt_any_srcs) $(stdlib_rt) $(std
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Ngetopt \
-t$(HARECACHE)/getopt/getopt.td $(stdlib_getopt_any_srcs)
+# glob (+any)
+stdlib_glob_any_srcs= \
+ $(STDLIB)/glob/glob.ha
+
+$(HARECACHE)/glob/glob-any.ssa: $(stdlib_glob_any_srcs) $(stdlib_rt) $(stdlib_fnmatch_$(PLATFORM)) $(stdlib_fs_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_sort_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM))
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(HARECACHE)/glob
+ @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nglob \
+ -t$(HARECACHE)/glob/glob.td $(stdlib_glob_any_srcs)
+
# hare::ast (+any)
stdlib_hare_ast_any_srcs= \
$(STDLIB)/hare/ast/decl.ha \
@@ -2203,6 +2219,12 @@ testlib_deps_any+=$(testlib_getopt_any)
testlib_getopt_linux=$(testlib_getopt_any)
testlib_getopt_freebsd=$(testlib_getopt_any)
+# gen_lib glob (any)
+testlib_glob_any=$(TESTCACHE)/glob/glob-any.o
+testlib_deps_any+=$(testlib_glob_any)
+testlib_glob_linux=$(testlib_glob_any)
+testlib_glob_freebsd=$(testlib_glob_any)
+
# gen_lib hare::ast (any)
testlib_hare_ast_any=$(TESTCACHE)/hare/ast/hare_ast-any.o
testlib_deps_any+=$(testlib_hare_ast_any)
@@ -2934,6 +2956,17 @@ $(TESTCACHE)/getopt/getopt-any.ssa: $(testlib_getopt_any_srcs) $(testlib_rt) $(t
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Ngetopt \
-t$(TESTCACHE)/getopt/getopt.td $(testlib_getopt_any_srcs)
+# glob (+any)
+testlib_glob_any_srcs= \
+ $(STDLIB)/glob/glob.ha \
+ $(STDLIB)/glob/+test.ha
+
+$(TESTCACHE)/glob/glob-any.ssa: $(testlib_glob_any_srcs) $(testlib_rt) $(testlib_fnmatch_$(PLATFORM)) $(testlib_fs_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_sort_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM))
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(TESTCACHE)/glob
+ @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nglob \
+ -t$(TESTCACHE)/glob/glob.td $(testlib_glob_any_srcs)
+
# hare::ast (+any)
testlib_hare_ast_any_srcs= \
$(STDLIB)/hare/ast/decl.ha \