hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit e0701955a144dc42bd8ce1ff3749fe8ce99c14c8
parent ba79464342f7aad8841f11b0b3b0eeee6ac99bfd
Author: Yasumasa Tada <ytada@spartan.dev>
Date:   Wed,  4 May 2022 20:07:34 +0900

glob: escape characters correctly

Signed-off-by: Yasumasa Tada <ytada@spartan.dev>

Diffstat:
Mglob/+test.ha | 59++++++++++++++++++++++++++++++++++++++++-------------------
Mglob/glob.ha | 161+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
2 files changed, 144 insertions(+), 76 deletions(-)

diff --git a/glob/+test.ha b/glob/+test.ha @@ -44,26 +44,47 @@ use strings; }; }; -@test fn split_pattern() void = { - const cases: [_](str, size, size) = [ - ("foo/bar/baz", 8, 11), - ("/foo/bar/baz", 9, 12), - ("/foobarbaz", 1, 10), - ("foo/bar/baz/", 12, 12), - ("foobarbaz/", 10, 10), - ("foobarbaz", 0, 9), - ("foo/b?r/baz", 4, 8), - ("foob*rbaz/", 0, 10), - ("foo[bar]baz", 0, 11), - ("fo[o/ba[r/baz", 10, 13), - ("fo]o/bar/b[az", 9, 13), - ("foo/ba]r/b]az", 9, 13), - ("foo/ba[r/b]az", 9, 13), - ("fo[o/bar/b]az", 9, 13), +@test fn pattern_parse() void = { + const cases: [_](str, bool, str, str, str) = [ + ("foo/bar/baz", true, "foo/bar/", "baz", ""), + ("foo/b\\ar/baz", true, "foo/b\\ar/", "baz", ""), + ("foo/b\\ar/baz", false, "foo/bar/", "baz", ""), + ("/foo/bar/baz", true, "/foo/bar/", "baz", ""), + ("/foo\\/bar/baz", true, "/foo\\/bar/", "baz", ""), + ("/foo\\/bar/baz", false, "/foo/bar/", "baz", ""), + ("/foo/bar\\/baz", true, "/foo/bar\\/", "baz", ""), + ("/foo/bar\\/baz", false, "/foo/bar/", "baz", ""), + ("/foobarbaz", true, "/", "foobarbaz", ""), + ("foo/bar/baz/", true, "foo/bar/baz/", "", ""), + ("foobarbaz/", true, "foobarbaz/", "", ""), + ("foobarbaz", true, "", "foobarbaz", ""), + ("foo/b?r/baz", true, "foo/", "b?r/", "baz"), + ("foo/b?r\\/baz", true, "foo/", "b?r\\/", "baz"), + ("foo/b?r\\/baz", false, "foo/", "b?r/", "baz"), + ("foob*rbaz/", true, "", "foob*rbaz/", ""), + ("foo[bar]baz", true, "", "foo[bar]baz", ""), + ("foo/b[ar]/baz/", true, "foo/", "b[ar]/", "baz/"), + ("foo/b[a\\r]/baz/", false, "foo/", "b[a\\r]/", "baz/"), + ("foo/b[a\\r]/baz/", true, "foo/", "b[a\\r]/", "baz/"), + ("foo/b[ar]/baz\\/", true, "foo/", "b[ar]/", "baz\\/"), + ("foo/b[ar]/baz\\/", false, "foo/", "b[ar]/", "baz\\/"), + ("foo/b\\[ar]/baz\\/", true, "foo/", "b\\[ar]/", "baz\\/"), + ("foo/b\\[ar]/baz\\/", false, "foo/b[ar]/baz/", "", ""), + ("fo[o/ba[r/baz", true, "fo[o/ba[r/", "baz", ""), + ("fo]o/bar/b[az", false, "fo]o/bar/", "b[az", ""), + ("foo/ba]r/b]az", true, "foo/ba]r/", "b]az", ""), + ("foo/ba[r/b]az", false, "foo/ba[r/", "b]az", ""), + ("fo[o/bar/b]az", true, "fo[o/bar/", "b]az", ""), ]; + let p = pattern_init(); + defer pattern_free(&p); for (let i = 0z; i < len(cases); i += 1) { - const ends = split_pattern(cases[i].0); - assert(ends.0 == cases[i].1); - assert(ends.1 == cases[i].2); + pattern_parse(&p, cases[i].0, cases[i].1); + const dir = pattern_dir(&p); + const pat = pattern_pat(&p); + const rem = pattern_rem(&p); + assert(strings::compare(dir, cases[i].2) == 0); + assert(strings::compare(pat, cases[i].3) == 0); + assert(strings::compare(rem, cases[i].4) == 0); }; }; diff --git a/glob/glob.ha b/glob/glob.ha @@ -13,23 +13,23 @@ export type flags = enum uint { NONE = 0, // Slash appending is enabled. A slash character is appended to each // pathname that is a directory that matches the pattern. - MARK = 1 << 1, + MARK = 1, // If the pattern does not match any pathname, the pattern string is // returned. - NOCHECK = 1 << 2, + NOCHECK = 1 << 1, // Backslash escaping is disabled. A backslash character is treated as // an ordinary character. - NOESCAPE = 1 << 3, + NOESCAPE = 1 << 2, // Pathname sorting is disabled. The order of pathnames returned is // unspecified. - NOSORT = 1 << 4, + NOSORT = 1 << 3, }; export type generator = struct { pats: strstack, matc: size, flgs: flags, - tmps: strio::stream, + tmpp: pattern, }; export type strstack = struct { @@ -37,6 +37,12 @@ export type strstack = struct { bufc: size, }; +export type pattern = struct { + dir: strio::stream, + pat: strio::stream, + rem: strio::stream, +}; + // Information about an unsuccessful search. export type failure = !struct { // The path that cannot be opened or read. @@ -58,14 +64,14 @@ export fn glob(pattern: str, flags: flags...) generator = { pats = ss, matc = 0, flgs = bs, - tmps = strio::dynamic(), + tmpp = pattern_init(), }; }; // Frees all memory allocated by the generator. export fn finish(gen: *generator) void = { strstack_free(&gen.pats); - io::close(&gen.tmps)!; + pattern_free(&gen.tmpp); }; // Returns a generated pathname. The returned string is valid until [[next]] @@ -74,22 +80,23 @@ export fn finish(gen: *generator) void = { // [[next]] can be repeatedly called until void is returned. export fn next(gen: *generator) (str | void | failure) = { const init = strstack_size(&gen.pats) == 1 - && len(strio::string(&gen.tmps)) == 0; - return match (next_match(os::cwd, gen)) { - case void => - if (init && gen.flgs & flags::NOCHECK != 0) { - return strio::string(&gen.tmps); - }; - return void; - case let f: failure => - return f; + && len(strio::string(&gen.tmpp.dir)) == 0 + && len(strio::string(&gen.tmpp.pat)) == 0 + && len(strio::string(&gen.tmpp.rem)) == 0; + match (next_match(os::cwd, gen)) { case let s: str => return s; + case let f: failure => + return f; + case void => void; + }; + if (init && gen.flgs & flags::NOCHECK != 0) { + return strio::string(&gen.pats.bufv[0]); }; }; fn next_match(fs: *fs::fs, gen: *generator) (str | void | failure) = { - const p = match (strstack_pop(&gen.pats)) { + match (strstack_pop(&gen.pats)) { case void => return; case let s: str => @@ -97,37 +104,30 @@ fn next_match(fs: *fs::fs, gen: *generator) (str | void | failure) = { gen.matc -= 1; return s; }; - // Avoids copying overlapping memory area. - strio::reset(&gen.tmps); - strio::concat(&gen.tmps, s)!; - yield strio::string(&gen.tmps); + pattern_parse(&gen.tmpp, s, gen.flgs & flags::NOESCAPE != 0); }; const l = strstack_size(&gen.pats); - const ends = split_pattern(p); - const dir = strings::sub(p, 0, ends.0); - let pat = strings::sub(p, ends.0, ends.1); + const dir = pattern_dir(&gen.tmpp); + let pat = pattern_pat(&gen.tmpp); const patm = strings::hassuffix(pat, '/'); if (patm) { pat = strings::sub(pat, 0, len(pat) - 1); }; - let rem = ""; - if (ends.1 < len(p)) { - rem = strings::sub(p, ends.1, strings::end); - }; + const rem = pattern_rem(&gen.tmpp); let flgs = fnmatch::flags::PERIOD; if (gen.flgs & flags::NOESCAPE != 0) { flgs |= fnmatch::flags::NOESCAPE; }; let it = match(fs::iter(fs, if (len(dir) > 0) dir else ".")) { + case let i: *fs::iterator => + yield i; case let e: fs::error => return failure { path = dir, error = e, }; - case let i: *fs::iterator => - yield i; }; defer fs::finish(it); @@ -181,21 +181,47 @@ fn next_match(fs: *fs::fs, gen: *generator) (str | void | failure) = { return next_match(fs, gen); }; -fn split_pattern(p: str) (size, size) = { - let pos = (strings::iter(p), 0z); +fn pattern_init() pattern = pattern { + dir = strio::dynamic(), + pat = strio::dynamic(), + rem = strio::dynamic(), +}; + +fn pattern_free(p: *pattern) void = { + io::close(&p.dir)!; + io::close(&p.pat)!; + io::close(&p.rem)!; +}; + +fn pattern_reset(p: *pattern) void = { + strio::reset(&p.dir); + strio::reset(&p.pat); + strio::reset(&p.rem); +}; + +fn pattern_dir(p: *pattern) str = strio::string(&p.dir); + +fn pattern_pat(p: *pattern) str = strio::string(&p.pat); - // TODO: Handle '\' in brackets correctly. - // TODO: Handle escaped '/' correctly. +fn pattern_rem(p: *pattern) str = strio::string(&p.rem); - // p[0..dirend] is path components which contain no special +fn pattern_parse(p: *pattern, pstr: str, noesc: bool) void = { + pattern_reset(p); + + let itdir = strings::iter(pstr); + let itpat = itdir; + + // p.dir is the longest directory name which contains no special // characters. - let dirend = 0z; - for (let brk = false; true) match (strings::next(&pos.0)) { - case void => - break; - case let r: rune => - pos.1 += 1; - switch (r) { + for (let brk = false, esc = false; true) { + const r = match (strings::next(&itdir)) { + case void => + return; + case let r: rune => + yield r; + }; + + if (!esc) switch (r) { case '*', '?' => break; case '[' => @@ -204,29 +230,50 @@ fn split_pattern(p: str) (size, size) = { if (brk) { break; }; - case '/' => - dirend = pos.1; + case '\\' => + if (!noesc) { + esc = true; + continue; + }; case => void; }; + + strio::appendrune(&p.pat, r)!; + if (r == '/') { + strio::concat(&p.dir, strio::string(&p.pat))!; + strio::reset(&p.pat); + itpat = itdir; + }; + esc = false; }; - // p[dirend..patend] is the first path component which contains - // special characters. - let patend = len(p); - for (true) match (strings::next(&pos.0)) { - case void => - break; - case let r: rune => - pos.1 += 1; - switch (r) { - case '/' => - patend = pos.1; + // p.pat is the first path component which contains special + // characters. + strio::reset(&p.pat); + for (let esc = false; true) { + const r = match (strings::next(&itpat)) { + case void => + return; + case let r: rune => + yield r; + }; + + if (!esc && r == '\\' && !noesc) { + esc = true; + continue; + }; + + if (esc && r != '/') { + strio::appendrune(&p.pat, '\\')!; + }; + strio::appendrune(&p.pat, r)!; + if (r == '/') { break; - case => void; }; + esc = false; }; - return (dirend, patend); + strio::concat(&p.rem, strings::iterstr(&itpat))!; }; fn strstack_init() strstack = strstack {