hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 650daab2586766bbe9d26d38c6ff35c5e99f2dae
parent 80bc1290bb1942bf498e1fff53e1d02e300acf75
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date:   Sun, 22 May 2022 17:44:11 +0100

regex: fix subcapture content and add tests

Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>

Diffstat:
Mregex/+test.ha | 39++++++++++++++++++++++++++++++++++++---
Mregex/regex.ha | 7++++---
2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha @@ -57,11 +57,29 @@ fn run_find_case( }; }; +fn run_submatch_case( + expr: str, + string: str, + expected: matchres, + count: size, + targets: []str +) void = { + const re = compile(expr)!; + defer finish(&re); + + const captures = find(&re, string) as []capture; + defer free_captures(captures); + assert(len(captures) == count, "Invalid number of captures"); + for (let i = 0z; i < len(targets); i += 1) { + assert(targets[i] == captures[i].content, "Invalid capture"); + }; +}; + fn run_findall_case( expr: str, string: str, expected: matchres, - count: int, + count: size, targets: []str ) void = { const re = match (compile(expr)) { @@ -99,7 +117,7 @@ fn run_findall_case( fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it did", expr, string); }; - if (count: size != len(matches)) { + if (count != len(matches)) { fmt::fatalf("Expected to find {} matches but found {}", count, len(matches)); }; @@ -525,11 +543,26 @@ fn run_findall_case( }; run_find_case(expr, string, should_match, start, end); }; + + const submatch_cases = [ + // literals + (`aaa ([^ ]*) (...)`, "aaa bbb ccc", matchres::MATCH, 3z, + ["aaa bbb ccc", "bbb", "ccc"]), + ]; + + for (let i = 0z; i < len(submatch_cases); i += 1) { + const expr = submatch_cases[i].0; + const string = submatch_cases[i].1; + const should_match = submatch_cases[i].2; + const count = submatch_cases[i].3; + const targets = submatch_cases[i].4; + run_submatch_case(expr, string, should_match, count, targets); + }; }; @test fn findall() void = { const cases = [ - (`ab.`, "hello abc and abあ test abq thanks", matchres::MATCH, 3, + (`ab.`, "hello abc and abあ test abq thanks", matchres::MATCH, 3z, ["abc", "abあ", "abq"]), ]; diff --git a/regex/regex.ha b/regex/regex.ha @@ -543,9 +543,10 @@ fn run_thread( assert(threads[i].curr_capture_inited, `Found a groupend token ")" without having previously seen a groupstart token "(". Please report this as a bug`); threads[i].curr_capture.end = str_idx: size; threads[i].curr_capture.end_bytesize = str_bytesize; - const content = strings::fromutf8_unsafe(str_bytes[ - threads[i].curr_capture.start_bytesize.. - threads[i].curr_capture.end_bytesize]); + threads[i].curr_capture.content = + strings::fromutf8_unsafe(str_bytes[ + threads[i].curr_capture.start_bytesize.. + threads[i].curr_capture.end_bytesize]); append(threads[i].captures, threads[i].curr_capture); threads[i].curr_capture = capture { ... }; threads[i].curr_capture_inited = false;