commit 650daab2586766bbe9d26d38c6ff35c5e99f2dae
parent 80bc1290bb1942bf498e1fff53e1d02e300acf75
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date: Sun, 22 May 2022 17:44:11 +0100
regex: fix subcapture content and add tests
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
Diffstat:
2 files changed, 40 insertions(+), 6 deletions(-)
diff --git a/regex/+test.ha b/regex/+test.ha
@@ -57,11 +57,29 @@ fn run_find_case(
};
};
+fn run_submatch_case(
+ expr: str,
+ string: str,
+ expected: matchres,
+ count: size,
+ targets: []str
+) void = {
+ const re = compile(expr)!;
+ defer finish(&re);
+
+ const captures = find(&re, string) as []capture;
+ defer free_captures(captures);
+ assert(len(captures) == count, "Invalid number of captures");
+ for (let i = 0z; i < len(targets); i += 1) {
+ assert(targets[i] == captures[i].content, "Invalid capture");
+ };
+};
+
fn run_findall_case(
expr: str,
string: str,
expected: matchres,
- count: int,
+ count: size,
targets: []str
) void = {
const re = match (compile(expr)) {
@@ -99,7 +117,7 @@ fn run_findall_case(
fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it did",
expr, string);
};
- if (count: size != len(matches)) {
+ if (count != len(matches)) {
fmt::fatalf("Expected to find {} matches but found {}",
count, len(matches));
};
@@ -525,11 +543,26 @@ fn run_findall_case(
};
run_find_case(expr, string, should_match, start, end);
};
+
+ const submatch_cases = [
+ // literals
+ (`aaa ([^ ]*) (...)`, "aaa bbb ccc", matchres::MATCH, 3z,
+ ["aaa bbb ccc", "bbb", "ccc"]),
+ ];
+
+ for (let i = 0z; i < len(submatch_cases); i += 1) {
+ const expr = submatch_cases[i].0;
+ const string = submatch_cases[i].1;
+ const should_match = submatch_cases[i].2;
+ const count = submatch_cases[i].3;
+ const targets = submatch_cases[i].4;
+ run_submatch_case(expr, string, should_match, count, targets);
+ };
};
@test fn findall() void = {
const cases = [
- (`ab.`, "hello abc and abあ test abq thanks", matchres::MATCH, 3,
+ (`ab.`, "hello abc and abあ test abq thanks", matchres::MATCH, 3z,
["abc", "abあ", "abq"]),
];
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -543,9 +543,10 @@ fn run_thread(
assert(threads[i].curr_capture_inited, `Found a groupend token ")" without having previously seen a groupstart token "(". Please report this as a bug`);
threads[i].curr_capture.end = str_idx: size;
threads[i].curr_capture.end_bytesize = str_bytesize;
- const content = strings::fromutf8_unsafe(str_bytes[
- threads[i].curr_capture.start_bytesize..
- threads[i].curr_capture.end_bytesize]);
+ threads[i].curr_capture.content =
+ strings::fromutf8_unsafe(str_bytes[
+ threads[i].curr_capture.start_bytesize..
+ threads[i].curr_capture.end_bytesize]);
append(threads[i].captures, threads[i].curr_capture);
threads[i].curr_capture = capture { ... };
threads[i].curr_capture_inited = false;