hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit bcc845b890e4dbb3f56ee75a8f519e44e26d425b
parent 68dc855c61e9898991b859596a8b4aaff9fcbe05
Author: Michael Tilli <pyfisch@posteo.org>
Date:   Tue,  2 Jan 2024 10:35:56 +0000

Advance after zero-length regex matches

Add two test cases.

Signed-off-by: Michael Tilli <pyfisch@posteo.org>

Diffstat:
Mregex/+test.ha | 4++++
Mregex/regex.ha | 15+++++++++++++--
2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha @@ -668,6 +668,10 @@ fn run_rawreplace_case( ["a", "a"]: []str), (`fo{2,}`, "fo foo fooofoof oofoo", matchres::MATCH, ["foo", "fooo", "foo", "foo"]: []str), + (``, "abc", matchres::MATCH, + ["", "", "", ""]: []str), + (`a*`, "aaa", matchres::MATCH, + ["aaa", ""]: []str), ]; for (let i = 0z; i < len(cases); i += 1) { diff --git a/regex/regex.ha b/regex/regex.ha @@ -796,10 +796,11 @@ export fn find(re: *regex, string: str) result = { export fn findall(re: *regex, string: str) []result = { let res: []result = []; let str_idx = 0z, str_bytesize = 0z; - let substring = string; let strm = memio::fixed(strings::toutf8(string)); const str_bytes = strings::toutf8(string); for (true) { + let substring = strings::fromutf8_unsafe( + str_bytes[str_bytesize..]); match (search(re, substring, &strm, true)) { case let m: []capture => append(res, m); @@ -809,7 +810,17 @@ export fn findall(re: *regex, string: str) []result = { m[0].end_bytesize += str_bytesize; str_idx = m[0].end; str_bytesize = m[0].end_bytesize; - substring = strings::fromutf8(str_bytes[str_bytesize..])!; + if (m[0].start_bytesize == len(str_bytes)) { + // end-of-string reached + break; + }; + if (m[0].start_bytesize == m[0].end_bytesize) { + // zero-length match + // forward rune and byte indices + str_idx += 1; + str_bytesize += encoding::utf8::utf8sz( + str_bytes[str_bytesize])!; + }; io::seek(&strm, str_bytesize: io::off, io::whence::SET)!; case void => break;