hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 08083db020f62d2d7fa6b65bf5b877f54aaa377a
parent e89800188d6151b03c6872835aecb555d0a54c51
Author: Sebastian <sebastian@sebsite.pw>
Date:   Wed,  7 Jun 2023 03:08:17 -0400

regex: add replacen and rawreplacen

Signed-off-by: Sebastian <sebastian@sebsite.pw>

Diffstat:
Mregex/+test.ha | 62+++++++++++++++++++++++++++++++++++++-------------------------
Mregex/regex.ha | 42+++++++++++++++++++++++++++++++++++-------
Mscripts/gen-stdlib | 5+++--
Mstdlib.mk | 4++--
4 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha @@ -2,6 +2,7 @@ // (c) 2022 Vlad-Stefan Harbuz <vlad@vladh.net> use fmt; use strings; +use types; type matchres = enum { MATCH, NOMATCH, ERROR }; @@ -137,6 +138,7 @@ fn run_replace_case( expr: str, string: str, target: str, + n: size, expected: (str | void), ) void = { const re = match (compile(expr)) { @@ -149,25 +151,25 @@ fn run_replace_case( }; defer finish(&re); - match (replace(&re, string, target)) { + match (replacen(&re, string, target, n)) { case let e: error => if (expected is str) { fmt::errorln(e)!; - fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" expected=\"{}\"", - expr, string, target, expected as str)!; + fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\"", + expr, string, target, n, expected as str)!; abort(); }; case let s: str => defer free(s); if (expected is void) { fmt::errorln("Expected replace to fail, but it did not")!; - fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" return=\"{}\"", - expr, string, target, s)!; + fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} return=\"{}\"", + expr, string, target, n, s)!; abort(); }; if (expected as str != s) { - fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" expected=\"{}\" return=\"{}\"", - expr, string, target, expected as str, s)!; + fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\" return=\"{}\"", + expr, string, target, n, expected as str, s)!; abort(); }; }; @@ -177,6 +179,7 @@ fn run_rawreplace_case( expr: str, string: str, target: str, + n: size, expected: str, ) void = { const re = match (compile(expr)) { @@ -189,11 +192,11 @@ fn run_rawreplace_case( }; defer finish(&re); - const s = rawreplace(&re, string, target); + const s = rawreplacen(&re, string, target, n); defer free(s); if (expected != s) { - fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" expected=\"{}\" return=\"{}\"", - expr, string, target, expected, s)!; + fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\" return=\"{}\"", + expr, string, target, n, expected, s)!; abort(); }; }; @@ -676,46 +679,55 @@ fn run_rawreplace_case( }; @test fn replace() void = { - const cases: [_](str, str, str, (str | void)) = [ + const cases: [_](str, str, str, size, (str | void)) = [ (`ab.`, "hello abc and abあ test abq thanks", `xyz`, - "hello xyz and xyz test xyz thanks"), + types::SIZE_MAX, "hello xyz and xyz test xyz thanks"), (`([Hh])ello`, "Hello world and hello Hare.", `\1owdy`, - "Howdy world and howdy Hare."), + types::SIZE_MAX, "Howdy world and howdy Hare."), (`fo{2,}`, "fo foo fooofoof oofoo", `\0bar`, - "fo foobar fooobarfoobarf oofoobar"), + types::SIZE_MAX, "fo foobar fooobarfoobarf oofoobar"), (`(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)`, "12345678910", `\10`, - "10"), + types::SIZE_MAX, "10"), (`...?`, "abcdefgh", `\7\0\8`, - "abcdefgh"), - (`...?`, "abcdefgh", `\7\0\`, void), + types::SIZE_MAX, "abcdefgh"), + (`...?`, "abcdefgh", `\7\0\`, types::SIZE_MAX, void), + (`ab.`, "hello abc and abあ test abq thanks", `xyz`, + 2, "hello xyz and xyz test abq thanks"), + (`.`, "blablabla", `x`, 0, "blablabla"), + (`([[:digit:]])([[:digit:]])`, "1234", `\2`, 1, "234"), ]; for (let i = 0z; i < len(cases); i += 1) { const expr = cases[i].0; const string = cases[i].1; const target = cases[i].2; - const expected = cases[i].3; - run_replace_case(expr, string, target, expected); + const n = cases[i].3; + const expected = cases[i].4; + run_replace_case(expr, string, target, n, expected); }; }; @test fn rawreplace() void = { const cases = [ (`ab.`, "hello abc and abあ test abq thanks", "xyz", - "hello xyz and xyz test xyz thanks"), + types::SIZE_MAX, "hello xyz and xyz test xyz thanks"), (`([Hh])ello`, "Hello world and hello Hare.", `\howdy\`, - `\howdy\ world and \howdy\ Hare.`), + types::SIZE_MAX, `\howdy\ world and \howdy\ Hare.`), (`fo{2,}`, "fo foo fooofoof oofoo", `\0bar`, - `fo \0bar \0bar\0barf oo\0bar`), + types::SIZE_MAX, `fo \0bar \0bar\0barf oo\0bar`), (`\\\\`, `\\\\\\\\`, `\00\1`, - `\00\1\00\1\00\1\00\1`), + types::SIZE_MAX, `\00\1\00\1\00\1\00\1`), + (`ab.`, "hello abc and abあ test abq thanks", `xyz`, + 2, "hello xyz and xyz test abq thanks"), + (`.`, "blablabla", `x`, 0, "blablabla"), ]; for (let i = 0z; i < len(cases); i += 1) { const expr = cases[i].0; const string = cases[i].1; const target = cases[i].2; - const expected = cases[i].3; - run_rawreplace_case(expr, string, target, expected); + const n = cases[i].3; + const expected = cases[i].4; + run_rawreplace_case(expr, string, target, n, expected); }; }; diff --git a/regex/regex.ha b/regex/regex.ha @@ -7,6 +7,7 @@ use errors; use io; use strconv; use strings; +use types; // An error string describing a compilation error. export type error = !str; @@ -829,19 +830,35 @@ export fn findall(re: *regex, string: str) []result = { // // An error is only returned if 'targetstr' isn't formatted correctly. export fn replace(re: *regex, string: str, targetstr: str) (str | error) = { + return replacen(re, string, targetstr, types::SIZE_MAX); +}; + +// Replaces up to 'n' non-overlapping matches of a regular expression against a +// string with 'targetstr', in the same manner as [[replace]]. +export fn replacen( + re: *regex, + string: str, + targetstr: str, + n: size, +) (str | error) = { + const target = parse_replace_target(targetstr)?; + defer free(target); + // Check if n == 0 after parse_replace_target so errors are propagated + if (n == 0) { + return strings::dup(string); + }; + const matches = findall(re, string); if (len(matches) == 0) { return strings::dup(string); }; defer result_freeall(matches); - const target = parse_replace_target(targetstr)?; - defer free(target); - const bytes = strings::toutf8(string); let buf = alloc(bytes[..matches[0][0].start_bytesize]...); - for (let i = 0z; i < len(matches); i += 1) { + const n = if (len(matches) > n) n else len(matches); + for (let i = 0z; i < n; i += 1) { for (let j = 0z; j < len(target); j += 1) { match (target[j]) { case let b: []u8 => @@ -853,7 +870,7 @@ export fn replace(re: *regex, string: str, targetstr: str) (str | error) = { }; }; const start = matches[i][0].end_bytesize; - const end = if (i == len(matches) - 1) len(bytes) + const end = if (i == n - 1) len(bytes) else matches[i + 1][0].start_bytesize; append(buf, bytes[start..end]...); }; @@ -907,6 +924,16 @@ fn parse_replace_target(targetstr: str) ([]([]u8 | size) | error) = { // with 'targetstr'. 'targetstr' is isn't interpreted in any special way; all // backslashes are treated literally. export fn rawreplace(re: *regex, string: str, targetstr: str) str = { + return rawreplacen(re, string, targetstr, types::SIZE_MAX); +}; + +// Replaces up to 'n' non-overlapping matches of a regular expression against a +// string with 'targetstr', in the same manner as [[rawreplace]]. +export fn rawreplacen(re: *regex, string: str, targetstr: str, n: size) str = { + if (n == 0) { + return strings::dup(string); + }; + const matches = findall(re, string); if (len(matches) == 0) { return strings::dup(string); @@ -918,14 +945,15 @@ export fn rawreplace(re: *regex, string: str, targetstr: str) str = { let buf: []u8 = []; append(buf, bytes[..matches[0][0].start_bytesize]...); - for (let i = 1z; i < len(matches); i += 1) { + const n = if (len(matches) > n) n else len(matches); + for (let i = 1z; i < n; i += 1) { append(buf, target...); const start = matches[i - 1][0].end_bytesize; const end = matches[i][0].start_bytesize; append(buf, bytes[start..end]...); }; append(buf, target...); - append(buf, bytes[matches[len(matches) - 1][0].end_bytesize..]...); + append(buf, bytes[matches[n - 1][0].end_bytesize..]...); return strings::fromutf8(buf)!; }; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -1226,10 +1226,11 @@ regex() { if [ $testing -eq 0 ]; then gen_srcs regex regex.ha gen_ssa regex ascii bufio encoding::utf8 errors io strconv \ - strings bufio + strings bufio types else gen_srcs regex regex.ha +test.ha - gen_ssa regex encoding::utf8 errors strconv strings fmt io os bufio + gen_ssa regex encoding::utf8 errors strconv strings fmt io os \ + bufio types fi } diff --git a/stdlib.mk b/stdlib.mk @@ -1971,7 +1971,7 @@ $(HARECACHE)/path/path-any.ssa: $(stdlib_path_any_srcs) $(stdlib_rt) $(stdlib_st stdlib_regex_any_srcs = \ $(STDLIB)/regex/regex.ha -$(HARECACHE)/regex/regex-any.ssa: $(stdlib_regex_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) +$(HARECACHE)/regex/regex-any.ssa: $(stdlib_regex_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_types_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(HARECACHE)/regex @$(stdlib_env) $(HAREC) $(HAREFLAGS) -o $@ -Nregex \ @@ -4437,7 +4437,7 @@ testlib_regex_any_srcs = \ $(STDLIB)/regex/regex.ha \ $(STDLIB)/regex/+test.ha -$(TESTCACHE)/regex/regex-any.ssa: $(testlib_regex_any_srcs) $(testlib_rt) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_errors_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) +$(TESTCACHE)/regex/regex-any.ssa: $(testlib_regex_any_srcs) $(testlib_rt) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_errors_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_types_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(TESTCACHE)/regex @$(testlib_env) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nregex \