commit e0701955a144dc42bd8ce1ff3749fe8ce99c14c8
parent ba79464342f7aad8841f11b0b3b0eeee6ac99bfd
Author: Yasumasa Tada <ytada@spartan.dev>
Date: Wed, 4 May 2022 20:07:34 +0900
glob: escape characters correctly
Signed-off-by: Yasumasa Tada <ytada@spartan.dev>
Diffstat:
M | glob/+test.ha | | | 59 | ++++++++++++++++++++++++++++++++++++++++------------------- |
M | glob/glob.ha | | | 161 | +++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------- |
2 files changed, 144 insertions(+), 76 deletions(-)
diff --git a/glob/+test.ha b/glob/+test.ha
@@ -44,26 +44,47 @@ use strings;
};
};
-@test fn split_pattern() void = {
- const cases: [_](str, size, size) = [
- ("foo/bar/baz", 8, 11),
- ("/foo/bar/baz", 9, 12),
- ("/foobarbaz", 1, 10),
- ("foo/bar/baz/", 12, 12),
- ("foobarbaz/", 10, 10),
- ("foobarbaz", 0, 9),
- ("foo/b?r/baz", 4, 8),
- ("foob*rbaz/", 0, 10),
- ("foo[bar]baz", 0, 11),
- ("fo[o/ba[r/baz", 10, 13),
- ("fo]o/bar/b[az", 9, 13),
- ("foo/ba]r/b]az", 9, 13),
- ("foo/ba[r/b]az", 9, 13),
- ("fo[o/bar/b]az", 9, 13),
+@test fn pattern_parse() void = {
+ const cases: [_](str, bool, str, str, str) = [
+ ("foo/bar/baz", true, "foo/bar/", "baz", ""),
+ ("foo/b\\ar/baz", true, "foo/b\\ar/", "baz", ""),
+ ("foo/b\\ar/baz", false, "foo/bar/", "baz", ""),
+ ("/foo/bar/baz", true, "/foo/bar/", "baz", ""),
+ ("/foo\\/bar/baz", true, "/foo\\/bar/", "baz", ""),
+ ("/foo\\/bar/baz", false, "/foo/bar/", "baz", ""),
+ ("/foo/bar\\/baz", true, "/foo/bar\\/", "baz", ""),
+ ("/foo/bar\\/baz", false, "/foo/bar/", "baz", ""),
+ ("/foobarbaz", true, "/", "foobarbaz", ""),
+ ("foo/bar/baz/", true, "foo/bar/baz/", "", ""),
+ ("foobarbaz/", true, "foobarbaz/", "", ""),
+ ("foobarbaz", true, "", "foobarbaz", ""),
+ ("foo/b?r/baz", true, "foo/", "b?r/", "baz"),
+ ("foo/b?r\\/baz", true, "foo/", "b?r\\/", "baz"),
+ ("foo/b?r\\/baz", false, "foo/", "b?r/", "baz"),
+ ("foob*rbaz/", true, "", "foob*rbaz/", ""),
+ ("foo[bar]baz", true, "", "foo[bar]baz", ""),
+ ("foo/b[ar]/baz/", true, "foo/", "b[ar]/", "baz/"),
+ ("foo/b[a\\r]/baz/", false, "foo/", "b[a\\r]/", "baz/"),
+ ("foo/b[a\\r]/baz/", true, "foo/", "b[a\\r]/", "baz/"),
+ ("foo/b[ar]/baz\\/", true, "foo/", "b[ar]/", "baz\\/"),
+ ("foo/b[ar]/baz\\/", false, "foo/", "b[ar]/", "baz\\/"),
+ ("foo/b\\[ar]/baz\\/", true, "foo/", "b\\[ar]/", "baz\\/"),
+ ("foo/b\\[ar]/baz\\/", false, "foo/b[ar]/baz/", "", ""),
+ ("fo[o/ba[r/baz", true, "fo[o/ba[r/", "baz", ""),
+ ("fo]o/bar/b[az", false, "fo]o/bar/", "b[az", ""),
+ ("foo/ba]r/b]az", true, "foo/ba]r/", "b]az", ""),
+ ("foo/ba[r/b]az", false, "foo/ba[r/", "b]az", ""),
+ ("fo[o/bar/b]az", true, "fo[o/bar/", "b]az", ""),
];
+ let p = pattern_init();
+ defer pattern_free(&p);
for (let i = 0z; i < len(cases); i += 1) {
- const ends = split_pattern(cases[i].0);
- assert(ends.0 == cases[i].1);
- assert(ends.1 == cases[i].2);
+ pattern_parse(&p, cases[i].0, cases[i].1);
+ const dir = pattern_dir(&p);
+ const pat = pattern_pat(&p);
+ const rem = pattern_rem(&p);
+ assert(strings::compare(dir, cases[i].2) == 0);
+ assert(strings::compare(pat, cases[i].3) == 0);
+ assert(strings::compare(rem, cases[i].4) == 0);
};
};
diff --git a/glob/glob.ha b/glob/glob.ha
@@ -13,23 +13,23 @@ export type flags = enum uint {
NONE = 0,
// Slash appending is enabled. A slash character is appended to each
// pathname that is a directory that matches the pattern.
- MARK = 1 << 1,
+ MARK = 1,
// If the pattern does not match any pathname, the pattern string is
// returned.
- NOCHECK = 1 << 2,
+ NOCHECK = 1 << 1,
// Backslash escaping is disabled. A backslash character is treated as
// an ordinary character.
- NOESCAPE = 1 << 3,
+ NOESCAPE = 1 << 2,
// Pathname sorting is disabled. The order of pathnames returned is
// unspecified.
- NOSORT = 1 << 4,
+ NOSORT = 1 << 3,
};
export type generator = struct {
pats: strstack,
matc: size,
flgs: flags,
- tmps: strio::stream,
+ tmpp: pattern,
};
export type strstack = struct {
@@ -37,6 +37,12 @@ export type strstack = struct {
bufc: size,
};
+export type pattern = struct {
+ dir: strio::stream,
+ pat: strio::stream,
+ rem: strio::stream,
+};
+
// Information about an unsuccessful search.
export type failure = !struct {
// The path that cannot be opened or read.
@@ -58,14 +64,14 @@ export fn glob(pattern: str, flags: flags...) generator = {
pats = ss,
matc = 0,
flgs = bs,
- tmps = strio::dynamic(),
+ tmpp = pattern_init(),
};
};
// Frees all memory allocated by the generator.
export fn finish(gen: *generator) void = {
strstack_free(&gen.pats);
- io::close(&gen.tmps)!;
+ pattern_free(&gen.tmpp);
};
// Returns a generated pathname. The returned string is valid until [[next]]
@@ -74,22 +80,23 @@ export fn finish(gen: *generator) void = {
// [[next]] can be repeatedly called until void is returned.
export fn next(gen: *generator) (str | void | failure) = {
const init = strstack_size(&gen.pats) == 1
- && len(strio::string(&gen.tmps)) == 0;
- return match (next_match(os::cwd, gen)) {
- case void =>
- if (init && gen.flgs & flags::NOCHECK != 0) {
- return strio::string(&gen.tmps);
- };
- return void;
- case let f: failure =>
- return f;
+ && len(strio::string(&gen.tmpp.dir)) == 0
+ && len(strio::string(&gen.tmpp.pat)) == 0
+ && len(strio::string(&gen.tmpp.rem)) == 0;
+ match (next_match(os::cwd, gen)) {
case let s: str =>
return s;
+ case let f: failure =>
+ return f;
+ case void => void;
+ };
+ if (init && gen.flgs & flags::NOCHECK != 0) {
+ return strio::string(&gen.pats.bufv[0]);
};
};
fn next_match(fs: *fs::fs, gen: *generator) (str | void | failure) = {
- const p = match (strstack_pop(&gen.pats)) {
+ match (strstack_pop(&gen.pats)) {
case void =>
return;
case let s: str =>
@@ -97,37 +104,30 @@ fn next_match(fs: *fs::fs, gen: *generator) (str | void | failure) = {
gen.matc -= 1;
return s;
};
- // Avoids copying overlapping memory area.
- strio::reset(&gen.tmps);
- strio::concat(&gen.tmps, s)!;
- yield strio::string(&gen.tmps);
+ pattern_parse(&gen.tmpp, s, gen.flgs & flags::NOESCAPE != 0);
};
const l = strstack_size(&gen.pats);
- const ends = split_pattern(p);
- const dir = strings::sub(p, 0, ends.0);
- let pat = strings::sub(p, ends.0, ends.1);
+ const dir = pattern_dir(&gen.tmpp);
+ let pat = pattern_pat(&gen.tmpp);
const patm = strings::hassuffix(pat, '/');
if (patm) {
pat = strings::sub(pat, 0, len(pat) - 1);
};
- let rem = "";
- if (ends.1 < len(p)) {
- rem = strings::sub(p, ends.1, strings::end);
- };
+ const rem = pattern_rem(&gen.tmpp);
let flgs = fnmatch::flags::PERIOD;
if (gen.flgs & flags::NOESCAPE != 0) {
flgs |= fnmatch::flags::NOESCAPE;
};
let it = match(fs::iter(fs, if (len(dir) > 0) dir else ".")) {
+ case let i: *fs::iterator =>
+ yield i;
case let e: fs::error =>
return failure {
path = dir,
error = e,
};
- case let i: *fs::iterator =>
- yield i;
};
defer fs::finish(it);
@@ -181,21 +181,47 @@ fn next_match(fs: *fs::fs, gen: *generator) (str | void | failure) = {
return next_match(fs, gen);
};
-fn split_pattern(p: str) (size, size) = {
- let pos = (strings::iter(p), 0z);
+fn pattern_init() pattern = pattern {
+ dir = strio::dynamic(),
+ pat = strio::dynamic(),
+ rem = strio::dynamic(),
+};
+
+fn pattern_free(p: *pattern) void = {
+ io::close(&p.dir)!;
+ io::close(&p.pat)!;
+ io::close(&p.rem)!;
+};
+
+fn pattern_reset(p: *pattern) void = {
+ strio::reset(&p.dir);
+ strio::reset(&p.pat);
+ strio::reset(&p.rem);
+};
+
+fn pattern_dir(p: *pattern) str = strio::string(&p.dir);
+
+fn pattern_pat(p: *pattern) str = strio::string(&p.pat);
- // TODO: Handle '\' in brackets correctly.
- // TODO: Handle escaped '/' correctly.
+fn pattern_rem(p: *pattern) str = strio::string(&p.rem);
- // p[0..dirend] is path components which contain no special
+fn pattern_parse(p: *pattern, pstr: str, noesc: bool) void = {
+ pattern_reset(p);
+
+ let itdir = strings::iter(pstr);
+ let itpat = itdir;
+
+ // p.dir is the longest directory name which contains no special
// characters.
- let dirend = 0z;
- for (let brk = false; true) match (strings::next(&pos.0)) {
- case void =>
- break;
- case let r: rune =>
- pos.1 += 1;
- switch (r) {
+ for (let brk = false, esc = false; true) {
+ const r = match (strings::next(&itdir)) {
+ case void =>
+ return;
+ case let r: rune =>
+ yield r;
+ };
+
+ if (!esc) switch (r) {
case '*', '?' =>
break;
case '[' =>
@@ -204,29 +230,50 @@ fn split_pattern(p: str) (size, size) = {
if (brk) {
break;
};
- case '/' =>
- dirend = pos.1;
+ case '\\' =>
+ if (!noesc) {
+ esc = true;
+ continue;
+ };
case => void;
};
+
+ strio::appendrune(&p.pat, r)!;
+ if (r == '/') {
+ strio::concat(&p.dir, strio::string(&p.pat))!;
+ strio::reset(&p.pat);
+ itpat = itdir;
+ };
+ esc = false;
};
- // p[dirend..patend] is the first path component which contains
- // special characters.
- let patend = len(p);
- for (true) match (strings::next(&pos.0)) {
- case void =>
- break;
- case let r: rune =>
- pos.1 += 1;
- switch (r) {
- case '/' =>
- patend = pos.1;
+ // p.pat is the first path component which contains special
+ // characters.
+ strio::reset(&p.pat);
+ for (let esc = false; true) {
+ const r = match (strings::next(&itpat)) {
+ case void =>
+ return;
+ case let r: rune =>
+ yield r;
+ };
+
+ if (!esc && r == '\\' && !noesc) {
+ esc = true;
+ continue;
+ };
+
+ if (esc && r != '/') {
+ strio::appendrune(&p.pat, '\\')!;
+ };
+ strio::appendrune(&p.pat, r)!;
+ if (r == '/') {
break;
- case => void;
};
+ esc = false;
};
- return (dirend, patend);
+ strio::concat(&p.rem, strings::iterstr(&itpat))!;
};
fn strstack_init() strstack = strstack {