commit 08083db020f62d2d7fa6b65bf5b877f54aaa377a
parent e89800188d6151b03c6872835aecb555d0a54c51
Author: Sebastian <sebastian@sebsite.pw>
Date: Wed, 7 Jun 2023 03:08:17 -0400
regex: add replacen and rawreplacen
Signed-off-by: Sebastian <sebastian@sebsite.pw>
Diffstat:
4 files changed, 77 insertions(+), 36 deletions(-)
diff --git a/regex/+test.ha b/regex/+test.ha
@@ -2,6 +2,7 @@
// (c) 2022 Vlad-Stefan Harbuz <vlad@vladh.net>
use fmt;
use strings;
+use types;
type matchres = enum { MATCH, NOMATCH, ERROR };
@@ -137,6 +138,7 @@ fn run_replace_case(
expr: str,
string: str,
target: str,
+ n: size,
expected: (str | void),
) void = {
const re = match (compile(expr)) {
@@ -149,25 +151,25 @@ fn run_replace_case(
};
defer finish(&re);
- match (replace(&re, string, target)) {
+ match (replacen(&re, string, target, n)) {
case let e: error =>
if (expected is str) {
fmt::errorln(e)!;
- fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" expected=\"{}\"",
- expr, string, target, expected as str)!;
+ fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\"",
+ expr, string, target, n, expected as str)!;
abort();
};
case let s: str =>
defer free(s);
if (expected is void) {
fmt::errorln("Expected replace to fail, but it did not")!;
- fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" return=\"{}\"",
- expr, string, target, s)!;
+ fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} return=\"{}\"",
+ expr, string, target, n, s)!;
abort();
};
if (expected as str != s) {
- fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" expected=\"{}\" return=\"{}\"",
- expr, string, target, expected as str, s)!;
+ fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\" return=\"{}\"",
+ expr, string, target, n, expected as str, s)!;
abort();
};
};
@@ -177,6 +179,7 @@ fn run_rawreplace_case(
expr: str,
string: str,
target: str,
+ n: size,
expected: str,
) void = {
const re = match (compile(expr)) {
@@ -189,11 +192,11 @@ fn run_rawreplace_case(
};
defer finish(&re);
- const s = rawreplace(&re, string, target);
+ const s = rawreplacen(&re, string, target, n);
defer free(s);
if (expected != s) {
- fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" expected=\"{}\" return=\"{}\"",
- expr, string, target, expected, s)!;
+ fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\" return=\"{}\"",
+ expr, string, target, n, expected, s)!;
abort();
};
};
@@ -676,46 +679,55 @@ fn run_rawreplace_case(
};
@test fn replace() void = {
- const cases: [_](str, str, str, (str | void)) = [
+ const cases: [_](str, str, str, size, (str | void)) = [
(`ab.`, "hello abc and abあ test abq thanks", `xyz`,
- "hello xyz and xyz test xyz thanks"),
+ types::SIZE_MAX, "hello xyz and xyz test xyz thanks"),
(`([Hh])ello`, "Hello world and hello Hare.", `\1owdy`,
- "Howdy world and howdy Hare."),
+ types::SIZE_MAX, "Howdy world and howdy Hare."),
(`fo{2,}`, "fo foo fooofoof oofoo", `\0bar`,
- "fo foobar fooobarfoobarf oofoobar"),
+ types::SIZE_MAX, "fo foobar fooobarfoobarf oofoobar"),
(`(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)`, "12345678910", `\10`,
- "10"),
+ types::SIZE_MAX, "10"),
(`...?`, "abcdefgh", `\7\0\8`,
- "abcdefgh"),
- (`...?`, "abcdefgh", `\7\0\`, void),
+ types::SIZE_MAX, "abcdefgh"),
+ (`...?`, "abcdefgh", `\7\0\`, types::SIZE_MAX, void),
+ (`ab.`, "hello abc and abあ test abq thanks", `xyz`,
+ 2, "hello xyz and xyz test abq thanks"),
+ (`.`, "blablabla", `x`, 0, "blablabla"),
+ (`([[:digit:]])([[:digit:]])`, "1234", `\2`, 1, "234"),
];
for (let i = 0z; i < len(cases); i += 1) {
const expr = cases[i].0;
const string = cases[i].1;
const target = cases[i].2;
- const expected = cases[i].3;
- run_replace_case(expr, string, target, expected);
+ const n = cases[i].3;
+ const expected = cases[i].4;
+ run_replace_case(expr, string, target, n, expected);
};
};
@test fn rawreplace() void = {
const cases = [
(`ab.`, "hello abc and abあ test abq thanks", "xyz",
- "hello xyz and xyz test xyz thanks"),
+ types::SIZE_MAX, "hello xyz and xyz test xyz thanks"),
(`([Hh])ello`, "Hello world and hello Hare.", `\howdy\`,
- `\howdy\ world and \howdy\ Hare.`),
+ types::SIZE_MAX, `\howdy\ world and \howdy\ Hare.`),
(`fo{2,}`, "fo foo fooofoof oofoo", `\0bar`,
- `fo \0bar \0bar\0barf oo\0bar`),
+ types::SIZE_MAX, `fo \0bar \0bar\0barf oo\0bar`),
(`\\\\`, `\\\\\\\\`, `\00\1`,
- `\00\1\00\1\00\1\00\1`),
+ types::SIZE_MAX, `\00\1\00\1\00\1\00\1`),
+ (`ab.`, "hello abc and abあ test abq thanks", `xyz`,
+ 2, "hello xyz and xyz test abq thanks"),
+ (`.`, "blablabla", `x`, 0, "blablabla"),
];
for (let i = 0z; i < len(cases); i += 1) {
const expr = cases[i].0;
const string = cases[i].1;
const target = cases[i].2;
- const expected = cases[i].3;
- run_rawreplace_case(expr, string, target, expected);
+ const n = cases[i].3;
+ const expected = cases[i].4;
+ run_rawreplace_case(expr, string, target, n, expected);
};
};
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -7,6 +7,7 @@ use errors;
use io;
use strconv;
use strings;
+use types;
// An error string describing a compilation error.
export type error = !str;
@@ -829,19 +830,35 @@ export fn findall(re: *regex, string: str) []result = {
//
// An error is only returned if 'targetstr' isn't formatted correctly.
export fn replace(re: *regex, string: str, targetstr: str) (str | error) = {
+ return replacen(re, string, targetstr, types::SIZE_MAX);
+};
+
+// Replaces up to 'n' non-overlapping matches of a regular expression against a
+// string with 'targetstr', in the same manner as [[replace]].
+export fn replacen(
+ re: *regex,
+ string: str,
+ targetstr: str,
+ n: size,
+) (str | error) = {
+ const target = parse_replace_target(targetstr)?;
+ defer free(target);
+ // Check if n == 0 after parse_replace_target so errors are propagated
+ if (n == 0) {
+ return strings::dup(string);
+ };
+
const matches = findall(re, string);
if (len(matches) == 0) {
return strings::dup(string);
};
defer result_freeall(matches);
- const target = parse_replace_target(targetstr)?;
- defer free(target);
-
const bytes = strings::toutf8(string);
let buf = alloc(bytes[..matches[0][0].start_bytesize]...);
- for (let i = 0z; i < len(matches); i += 1) {
+ const n = if (len(matches) > n) n else len(matches);
+ for (let i = 0z; i < n; i += 1) {
for (let j = 0z; j < len(target); j += 1) {
match (target[j]) {
case let b: []u8 =>
@@ -853,7 +870,7 @@ export fn replace(re: *regex, string: str, targetstr: str) (str | error) = {
};
};
const start = matches[i][0].end_bytesize;
- const end = if (i == len(matches) - 1) len(bytes)
+ const end = if (i == n - 1) len(bytes)
else matches[i + 1][0].start_bytesize;
append(buf, bytes[start..end]...);
};
@@ -907,6 +924,16 @@ fn parse_replace_target(targetstr: str) ([]([]u8 | size) | error) = {
// with 'targetstr'. 'targetstr' is isn't interpreted in any special way; all
// backslashes are treated literally.
export fn rawreplace(re: *regex, string: str, targetstr: str) str = {
+ return rawreplacen(re, string, targetstr, types::SIZE_MAX);
+};
+
+// Replaces up to 'n' non-overlapping matches of a regular expression against a
+// string with 'targetstr', in the same manner as [[rawreplace]].
+export fn rawreplacen(re: *regex, string: str, targetstr: str, n: size) str = {
+ if (n == 0) {
+ return strings::dup(string);
+ };
+
const matches = findall(re, string);
if (len(matches) == 0) {
return strings::dup(string);
@@ -918,14 +945,15 @@ export fn rawreplace(re: *regex, string: str, targetstr: str) str = {
let buf: []u8 = [];
append(buf, bytes[..matches[0][0].start_bytesize]...);
- for (let i = 1z; i < len(matches); i += 1) {
+ const n = if (len(matches) > n) n else len(matches);
+ for (let i = 1z; i < n; i += 1) {
append(buf, target...);
const start = matches[i - 1][0].end_bytesize;
const end = matches[i][0].start_bytesize;
append(buf, bytes[start..end]...);
};
append(buf, target...);
- append(buf, bytes[matches[len(matches) - 1][0].end_bytesize..]...);
+ append(buf, bytes[matches[n - 1][0].end_bytesize..]...);
return strings::fromutf8(buf)!;
};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -1226,10 +1226,11 @@ regex() {
if [ $testing -eq 0 ]; then
gen_srcs regex regex.ha
gen_ssa regex ascii bufio encoding::utf8 errors io strconv \
- strings bufio
+ strings bufio types
else
gen_srcs regex regex.ha +test.ha
- gen_ssa regex encoding::utf8 errors strconv strings fmt io os bufio
+ gen_ssa regex encoding::utf8 errors strconv strings fmt io os \
+ bufio types
fi
}
diff --git a/stdlib.mk b/stdlib.mk
@@ -1971,7 +1971,7 @@ $(HARECACHE)/path/path-any.ssa: $(stdlib_path_any_srcs) $(stdlib_rt) $(stdlib_st
stdlib_regex_any_srcs = \
$(STDLIB)/regex/regex.ha
-$(HARECACHE)/regex/regex-any.ssa: $(stdlib_regex_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM))
+$(HARECACHE)/regex/regex-any.ssa: $(stdlib_regex_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strconv_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_types_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(HARECACHE)/regex
@$(stdlib_env) $(HAREC) $(HAREFLAGS) -o $@ -Nregex \
@@ -4437,7 +4437,7 @@ testlib_regex_any_srcs = \
$(STDLIB)/regex/regex.ha \
$(STDLIB)/regex/+test.ha
-$(TESTCACHE)/regex/regex-any.ssa: $(testlib_regex_any_srcs) $(testlib_rt) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_errors_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_bufio_$(PLATFORM))
+$(TESTCACHE)/regex/regex-any.ssa: $(testlib_regex_any_srcs) $(testlib_rt) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_errors_$(PLATFORM)) $(testlib_strconv_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_types_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(TESTCACHE)/regex
@$(testlib_env) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nregex \