commit bcc845b890e4dbb3f56ee75a8f519e44e26d425b
parent 68dc855c61e9898991b859596a8b4aaff9fcbe05
Author: Michael Tilli <pyfisch@posteo.org>
Date: Tue, 2 Jan 2024 10:35:56 +0000
Advance after zero-length regex matches
Add two test cases.
Signed-off-by: Michael Tilli <pyfisch@posteo.org>
Diffstat:
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/regex/+test.ha b/regex/+test.ha
@@ -668,6 +668,10 @@ fn run_rawreplace_case(
["a", "a"]: []str),
(`fo{2,}`, "fo foo fooofoof oofoo", matchres::MATCH,
["foo", "fooo", "foo", "foo"]: []str),
+ (``, "abc", matchres::MATCH,
+ ["", "", "", ""]: []str),
+ (`a*`, "aaa", matchres::MATCH,
+ ["aaa", ""]: []str),
];
for (let i = 0z; i < len(cases); i += 1) {
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -796,10 +796,11 @@ export fn find(re: *regex, string: str) result = {
export fn findall(re: *regex, string: str) []result = {
let res: []result = [];
let str_idx = 0z, str_bytesize = 0z;
- let substring = string;
let strm = memio::fixed(strings::toutf8(string));
const str_bytes = strings::toutf8(string);
for (true) {
+ let substring = strings::fromutf8_unsafe(
+ str_bytes[str_bytesize..]);
match (search(re, substring, &strm, true)) {
case let m: []capture =>
append(res, m);
@@ -809,7 +810,17 @@ export fn findall(re: *regex, string: str) []result = {
m[0].end_bytesize += str_bytesize;
str_idx = m[0].end;
str_bytesize = m[0].end_bytesize;
- substring = strings::fromutf8(str_bytes[str_bytesize..])!;
+ if (m[0].start_bytesize == len(str_bytes)) {
+ // end-of-string reached
+ break;
+ };
+ if (m[0].start_bytesize == m[0].end_bytesize) {
+ // zero-length match
+ // forward rune and byte indices
+ str_idx += 1;
+ str_bytesize += encoding::utf8::utf8sz(
+ str_bytes[str_bytesize])!;
+ };
io::seek(&strm, str_bytesize: io::off,
io::whence::SET)!;
case void => break;