hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit e9387b648e91996f7d61825857224fc282b97c61
parent 1ce64002a02de07e29b395f56c4208c767357f19
Author: Alexey Yerin <yyp@disroot.org>
Date:   Sun,  7 Nov 2021 11:44:41 +0300

shlex: new module

For now only containing split, next up is quote.

Signed-off-by: Alexey Yerin <yyp@disroot.org>

Diffstat:
Mscripts/gen-stdlib | 18++++++++++++++++++
Ashlex/+test.ha | 39+++++++++++++++++++++++++++++++++++++++
Ashlex/README | 1+
Ashlex/split.ha | 114+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mstdlib.mk | 33+++++++++++++++++++++++++++++++++
5 files changed, 205 insertions(+), 0 deletions(-)

diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -859,6 +859,23 @@ gensrcs_strconv() { $* } +gensrcs_shlex() { + gen_srcs shlex \ + split.ha \ + $* +} + +shlex() { + if [ $testing -eq 0 ] + then + gensrcs_shlex + else + gensrcs_shlex \ + +test.ha + fi + gen_ssa shlex bufio io strings strio +} + slice() { gen_srcs slice \ reverse.ha \ @@ -1096,6 +1113,7 @@ math::random os linux freebsd os::exec linux freebsd path +shlex slice sort strconv diff --git a/shlex/+test.ha b/shlex/+test.ha @@ -0,0 +1,39 @@ +@test fn split() void = { + const s = split("hello\\ world")!; + defer splitfree(s); + assert(len(s) == 1); + assert(s[0] == "hello world"); + + const s = split("'hello\\ world'")!; + defer splitfree(s); + assert(len(s) == 1); + assert(s[0] == "hello\\ world"); + + const s = split("\"hello\\\\world\"")!; + defer splitfree(s); + assert(len(s) == 1); + assert(s[0] == "hello\\world"); + + const s = split("\"hello \"'\"'\"world\"'\"'")!; + defer splitfree(s); + assert(len(s) == 1); + assert(s[0] == "hello \"world\""); + + const s = split("with\\ backslashes 'single quoted' \"double quoted\"")!; + defer splitfree(s); + assert(len(s) == 3); + assert(s[0] == "with backslashes"); + assert(s[1] == "single quoted"); + assert(s[2] == "double quoted"); + + const s = split("'multiple spaces' 42")!; + defer splitfree(s); + assert(len(s) == 2); + assert(s[0] == "multiple spaces"); + assert(s[1] == "42"); + + // Invalid + assert(split("\"dangling double quote") is syntaxerr); + assert(split("'dangling single quote") is syntaxerr); + assert(split("unterminated\\ backslash \\") is syntaxerr); +}; diff --git a/shlex/README b/shlex/README @@ -0,0 +1 @@ +The shlex module provides lexical tools for working with POSIX shell grammar. diff --git a/shlex/split.ha b/shlex/split.ha @@ -0,0 +1,114 @@ +use bufio; +use io; +use strings; +use strio; + +// Invalid shell syntax. +export type syntaxerr = !void; + +// Splits a string of arguments according to shell quoting. The result must be +// freed using [[splitfree]] when the caller is done processing it. +export fn split(in: const str) ([]str | syntaxerr) = { + let in = bufio::fixed(strings::toutf8(in), io::mode::READ); + defer io::close(in); + + let s = strio::dynamic(); + let slice: []str = []; + + for (true) { + const r = match (bufio::scanrune(in)!) { + case r: rune => + yield r; + case io::EOF => + break; + }; + + switch (r) { + case '\\' => + scan_backslash(s, in)?; + case '"' => + scan_double(s, in)?; + case '\'' => + scan_single(s, in)?; + case ' ', '\t', '\n' => + if (len(strio::string(s)) > 0) { + append(slice, strio::finish(s)); + s = strio::dynamic(); + }; + case => + strio::appendrune(s, r)!; + }; + }; + + const buf = strio::finish(s); + if (len(buf) > 0) { + append(slice, buf); + }; + + return slice; +}; + +fn scan_backslash(out: io::handle, in: io::handle) (void | syntaxerr) = { + const r = match (bufio::scanrune(in)!) { + case r: rune => + yield r; + case io::EOF => + return syntaxerr; + }; + + // The <backslash> and <newline> shall be removed before splitting the + // input into tokens. Since the escaped <newline> is removed entirely + // from the input and is not replaced by any white space, it cannot + // serve as a token separator + if (r == '\n') { + return; + }; + + strio::appendrune(out, r)!; +}; + +fn scan_double(out: io::handle, in: io::handle) (void | syntaxerr) = { + for (true) { + const r = match (bufio::scanrune(in)!) { + case r: rune => + yield r; + case io::EOF => + return syntaxerr; + }; + + switch (r) { + case '"' => + break; + case '\\' => + scan_backslash(out, in)?; + case => + strio::appendrune(out, r)!; + }; + }; +}; + +fn scan_single(out: io::handle, in: io::handle) (void | syntaxerr) = { + for (true) { + const r = match (bufio::scanrune(in)!) { + case r: rune => + yield r; + case io::EOF => + return syntaxerr; + }; + + if (r == '\'') { + break; + }; + strio::appendrune(out, r)!; + }; +}; + +// TODO: rehome a more generic version into strings/? + +// Frees a slice of allocated strings returned from [[split]]. +export fn splitfree(s: const []str) void = { + for (let i = 0z; i < len(s); i += 1) { + free(s[i]); + }; + free(s); +}; diff --git a/stdlib.mk b/stdlib.mk @@ -474,6 +474,12 @@ stdlib_deps_any+=$(stdlib_path_any) stdlib_path_linux=$(stdlib_path_any) stdlib_path_freebsd=$(stdlib_path_any) +# gen_lib shlex (any) +stdlib_shlex_any=$(HARECACHE)/shlex/shlex-any.o +stdlib_deps_any+=$(stdlib_shlex_any) +stdlib_shlex_linux=$(stdlib_shlex_any) +stdlib_shlex_freebsd=$(stdlib_shlex_any) + # gen_lib slice (any) stdlib_slice_any=$(HARECACHE)/slice/slice-any.o stdlib_deps_any+=$(stdlib_slice_any) @@ -1390,6 +1396,16 @@ $(HARECACHE)/path/path-any.ssa: $(stdlib_path_any_srcs) $(stdlib_rt) $(stdlib_st @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Npath \ -t$(HARECACHE)/path/path.td $(stdlib_path_any_srcs) +# shlex (+any) +stdlib_shlex_any_srcs= \ + $(STDLIB)/shlex/split.ha + +$(HARECACHE)/shlex/shlex-any.ssa: $(stdlib_shlex_any_srcs) $(stdlib_rt) $(stdlib_bufio_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) + @printf 'HAREC \t$@\n' + @mkdir -p $(HARECACHE)/shlex + @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nshlex \ + -t$(HARECACHE)/shlex/shlex.td $(stdlib_shlex_any_srcs) + # slice (+any) stdlib_slice_any_srcs= \ $(STDLIB)/slice/reverse.ha \ @@ -2120,6 +2136,12 @@ testlib_deps_any+=$(testlib_path_any) testlib_path_linux=$(testlib_path_any) testlib_path_freebsd=$(testlib_path_any) +# gen_lib shlex (any) +testlib_shlex_any=$(TESTCACHE)/shlex/shlex-any.o +testlib_deps_any+=$(testlib_shlex_any) +testlib_shlex_linux=$(testlib_shlex_any) +testlib_shlex_freebsd=$(testlib_shlex_any) + # gen_lib slice (any) testlib_slice_any=$(TESTCACHE)/slice/slice-any.o testlib_deps_any+=$(testlib_slice_any) @@ -3066,6 +3088,17 @@ $(TESTCACHE)/path/path-any.ssa: $(testlib_path_any_srcs) $(testlib_rt) $(testlib @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Npath \ -t$(TESTCACHE)/path/path.td $(testlib_path_any_srcs) +# shlex (+any) +testlib_shlex_any_srcs= \ + $(STDLIB)/shlex/split.ha \ + $(STDLIB)/shlex/+test.ha + +$(TESTCACHE)/shlex/shlex-any.ssa: $(testlib_shlex_any_srcs) $(testlib_rt) $(testlib_bufio_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) + @printf 'HAREC \t$@\n' + @mkdir -p $(TESTCACHE)/shlex + @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nshlex \ + -t$(TESTCACHE)/shlex/shlex.td $(testlib_shlex_any_srcs) + # slice (+any) testlib_slice_any_srcs= \ $(STDLIB)/slice/reverse.ha \