commit e9387b648e91996f7d61825857224fc282b97c61
parent 1ce64002a02de07e29b395f56c4208c767357f19
Author: Alexey Yerin <yyp@disroot.org>
Date: Sun, 7 Nov 2021 11:44:41 +0300
shlex: new module
For now only containing split, next up is quote.
Signed-off-by: Alexey Yerin <yyp@disroot.org>
Diffstat:
5 files changed, 205 insertions(+), 0 deletions(-)
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -859,6 +859,23 @@ gensrcs_strconv() {
$*
}
+gensrcs_shlex() {
+ gen_srcs shlex \
+ split.ha \
+ $*
+}
+
+shlex() {
+ if [ $testing -eq 0 ]
+ then
+ gensrcs_shlex
+ else
+ gensrcs_shlex \
+ +test.ha
+ fi
+ gen_ssa shlex bufio io strings strio
+}
+
slice() {
gen_srcs slice \
reverse.ha \
@@ -1096,6 +1113,7 @@ math::random
os linux freebsd
os::exec linux freebsd
path
+shlex
slice
sort
strconv
diff --git a/shlex/+test.ha b/shlex/+test.ha
@@ -0,0 +1,39 @@
+@test fn split() void = {
+ const s = split("hello\\ world")!;
+ defer splitfree(s);
+ assert(len(s) == 1);
+ assert(s[0] == "hello world");
+
+ const s = split("'hello\\ world'")!;
+ defer splitfree(s);
+ assert(len(s) == 1);
+ assert(s[0] == "hello\\ world");
+
+ const s = split("\"hello\\\\world\"")!;
+ defer splitfree(s);
+ assert(len(s) == 1);
+ assert(s[0] == "hello\\world");
+
+ const s = split("\"hello \"'\"'\"world\"'\"'")!;
+ defer splitfree(s);
+ assert(len(s) == 1);
+ assert(s[0] == "hello \"world\"");
+
+ const s = split("with\\ backslashes 'single quoted' \"double quoted\"")!;
+ defer splitfree(s);
+ assert(len(s) == 3);
+ assert(s[0] == "with backslashes");
+ assert(s[1] == "single quoted");
+ assert(s[2] == "double quoted");
+
+ const s = split("'multiple spaces' 42")!;
+ defer splitfree(s);
+ assert(len(s) == 2);
+ assert(s[0] == "multiple spaces");
+ assert(s[1] == "42");
+
+ // Invalid
+ assert(split("\"dangling double quote") is syntaxerr);
+ assert(split("'dangling single quote") is syntaxerr);
+ assert(split("unterminated\\ backslash \\") is syntaxerr);
+};
diff --git a/shlex/README b/shlex/README
@@ -0,0 +1 @@
+The shlex module provides lexical tools for working with POSIX shell grammar.
diff --git a/shlex/split.ha b/shlex/split.ha
@@ -0,0 +1,114 @@
+use bufio;
+use io;
+use strings;
+use strio;
+
+// Invalid shell syntax.
+export type syntaxerr = !void;
+
+// Splits a string of arguments according to shell quoting. The result must be
+// freed using [[splitfree]] when the caller is done processing it.
+export fn split(in: const str) ([]str | syntaxerr) = {
+ let in = bufio::fixed(strings::toutf8(in), io::mode::READ);
+ defer io::close(in);
+
+ let s = strio::dynamic();
+ let slice: []str = [];
+
+ for (true) {
+ const r = match (bufio::scanrune(in)!) {
+ case r: rune =>
+ yield r;
+ case io::EOF =>
+ break;
+ };
+
+ switch (r) {
+ case '\\' =>
+ scan_backslash(s, in)?;
+ case '"' =>
+ scan_double(s, in)?;
+ case '\'' =>
+ scan_single(s, in)?;
+ case ' ', '\t', '\n' =>
+ if (len(strio::string(s)) > 0) {
+ append(slice, strio::finish(s));
+ s = strio::dynamic();
+ };
+ case =>
+ strio::appendrune(s, r)!;
+ };
+ };
+
+ const buf = strio::finish(s);
+ if (len(buf) > 0) {
+ append(slice, buf);
+ };
+
+ return slice;
+};
+
+fn scan_backslash(out: io::handle, in: io::handle) (void | syntaxerr) = {
+ const r = match (bufio::scanrune(in)!) {
+ case r: rune =>
+ yield r;
+ case io::EOF =>
+ return syntaxerr;
+ };
+
+ // The <backslash> and <newline> shall be removed before splitting the
+ // input into tokens. Since the escaped <newline> is removed entirely
+ // from the input and is not replaced by any white space, it cannot
+ // serve as a token separator
+ if (r == '\n') {
+ return;
+ };
+
+ strio::appendrune(out, r)!;
+};
+
+fn scan_double(out: io::handle, in: io::handle) (void | syntaxerr) = {
+ for (true) {
+ const r = match (bufio::scanrune(in)!) {
+ case r: rune =>
+ yield r;
+ case io::EOF =>
+ return syntaxerr;
+ };
+
+ switch (r) {
+ case '"' =>
+ break;
+ case '\\' =>
+ scan_backslash(out, in)?;
+ case =>
+ strio::appendrune(out, r)!;
+ };
+ };
+};
+
+fn scan_single(out: io::handle, in: io::handle) (void | syntaxerr) = {
+ for (true) {
+ const r = match (bufio::scanrune(in)!) {
+ case r: rune =>
+ yield r;
+ case io::EOF =>
+ return syntaxerr;
+ };
+
+ if (r == '\'') {
+ break;
+ };
+ strio::appendrune(out, r)!;
+ };
+};
+
+// TODO: rehome a more generic version into strings/?
+
+// Frees a slice of allocated strings returned from [[split]].
+export fn splitfree(s: const []str) void = {
+ for (let i = 0z; i < len(s); i += 1) {
+ free(s[i]);
+ };
+ free(s);
+};
diff --git a/stdlib.mk b/stdlib.mk
@@ -474,6 +474,12 @@ stdlib_deps_any+=$(stdlib_path_any)
stdlib_path_linux=$(stdlib_path_any)
stdlib_path_freebsd=$(stdlib_path_any)
+# gen_lib shlex (any)
+stdlib_shlex_any=$(HARECACHE)/shlex/shlex-any.o
+stdlib_deps_any+=$(stdlib_shlex_any)
+stdlib_shlex_linux=$(stdlib_shlex_any)
+stdlib_shlex_freebsd=$(stdlib_shlex_any)
+
# gen_lib slice (any)
stdlib_slice_any=$(HARECACHE)/slice/slice-any.o
stdlib_deps_any+=$(stdlib_slice_any)
@@ -1390,6 +1396,16 @@ $(HARECACHE)/path/path-any.ssa: $(stdlib_path_any_srcs) $(stdlib_rt) $(stdlib_st
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Npath \
-t$(HARECACHE)/path/path.td $(stdlib_path_any_srcs)
+# shlex (+any)
+stdlib_shlex_any_srcs= \
+ $(STDLIB)/shlex/split.ha
+
+$(HARECACHE)/shlex/shlex-any.ssa: $(stdlib_shlex_any_srcs) $(stdlib_rt) $(stdlib_bufio_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM))
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(HARECACHE)/shlex
+ @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nshlex \
+ -t$(HARECACHE)/shlex/shlex.td $(stdlib_shlex_any_srcs)
+
# slice (+any)
stdlib_slice_any_srcs= \
$(STDLIB)/slice/reverse.ha \
@@ -2120,6 +2136,12 @@ testlib_deps_any+=$(testlib_path_any)
testlib_path_linux=$(testlib_path_any)
testlib_path_freebsd=$(testlib_path_any)
+# gen_lib shlex (any)
+testlib_shlex_any=$(TESTCACHE)/shlex/shlex-any.o
+testlib_deps_any+=$(testlib_shlex_any)
+testlib_shlex_linux=$(testlib_shlex_any)
+testlib_shlex_freebsd=$(testlib_shlex_any)
+
# gen_lib slice (any)
testlib_slice_any=$(TESTCACHE)/slice/slice-any.o
testlib_deps_any+=$(testlib_slice_any)
@@ -3066,6 +3088,17 @@ $(TESTCACHE)/path/path-any.ssa: $(testlib_path_any_srcs) $(testlib_rt) $(testlib
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Npath \
-t$(TESTCACHE)/path/path.td $(testlib_path_any_srcs)
+# shlex (+any)
+testlib_shlex_any_srcs= \
+ $(STDLIB)/shlex/split.ha \
+ $(STDLIB)/shlex/+test.ha
+
+$(TESTCACHE)/shlex/shlex-any.ssa: $(testlib_shlex_any_srcs) $(testlib_rt) $(testlib_bufio_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM))
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(TESTCACHE)/shlex
+ @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nshlex \
+ -t$(TESTCACHE)/shlex/shlex.td $(testlib_shlex_any_srcs)
+
# slice (+any)
testlib_slice_any_srcs= \
$(STDLIB)/slice/reverse.ha \