commit b8b8beb910da394e4d4991fd177ba660e940ec83
parent 7c4680cd316608e0cd9098575343b30fdfe27c06
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date: Sat, 14 May 2022 17:25:01 +0100
regex: find/findall/test can no longer error
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
Diffstat:
3 files changed, 38 insertions(+), 72 deletions(-)
diff --git a/regex/+test.ha b/regex/+test.ha
@@ -16,37 +16,34 @@ fn run_find_case(
case let e: error =>
if (expected == matchres::MATCH) {
fmt::println(e)!;
- fmt::fatalf("Expected expression /{}/ to match, but it errored",
+ fmt::fatalf("Expected expression /{}/ to match string \"{}\", but it errored",
expr, string);
};
if (expected == matchres::NOMATCH) {
fmt::println(e)!;
- fmt::fatalf("Expected expression /{}/ to not match, but it errored",
+ fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it errored",
expr, string);
};
return;
};
+ if (expected == matchres::ERROR) {
+ fmt::fatalf("Expected expression /{}/ to have error caught during compilation, but it did not",
+ expr);
+ };
+
match (find(&re, string)) {
case void =>
if (expected == matchres::MATCH) {
fmt::fatalf("Expected expression /{}/ to match string \"{}\", but it did not",
expr, string);
};
- if (expected == matchres::ERROR) {
- fmt::fatalf("Expression /{}/ failed to match, but should have errored",
- expr, string);
- };
case let m: []capture =>
if (expected == matchres::NOMATCH) {
fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it did",
expr, string);
};
- if (expected == matchres::ERROR) {
- fmt::fatalf("Expression /{}/ matched, but should have errored",
- expr, string);
- };
if (start: size != m[0].start) {
fmt::fatalf("Expected start of main capture to be {} but it was {}",
start, m[0].start);
@@ -55,16 +52,6 @@ fn run_find_case(
fmt::fatalf("Expected end of main capture to be {} but it was {}",
end, m[0].end);
};
-
- case let e: error =>
- if (expected == matchres::MATCH) {
- fmt::fatalf("Expected expression /{}/ to match, but it errored",
- expr, string);
- };
- if (expected == matchres::NOMATCH) {
- fmt::fatalf("Expected expression /{}/ to not match, but it errored",
- expr, string);
- };
};
};
@@ -90,40 +77,27 @@ fn run_findall_case(
return;
};
+ if (expected == matchres::ERROR) {
+ fmt::fatalf("Expected expression /{}/ to have error caught during compilation, but it did not",
+ expr);
+ };
+
match (findall(&re, string)) {
case void =>
if (expected == matchres::MATCH) {
fmt::fatalf("Expected expression /{}/ to match string \"{}\", but it did not",
expr, string);
};
- if (expected == matchres::ERROR) {
- fmt::fatalf("Expression /{}/ failed to match, but should have errored",
- expr, string);
- };
case let groupsets: [][]capture =>
if (expected == matchres::NOMATCH) {
fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it did",
expr, string);
};
- if (expected == matchres::ERROR) {
- fmt::fatalf("Expression /{}/ matched, but should have errored",
- expr, string);
- };
if (count: size != len(groupsets)) {
fmt::fatalf("Expected to find {} matches but found {}",
count, len(groupsets));
};
-
- case let e: error =>
- if (expected == matchres::MATCH) {
- fmt::fatalf("Expected expression /{}/ to match, but it errored",
- expr, string);
- };
- if (expected == matchres::NOMATCH) {
- fmt::fatalf("Expected expression /{}/ to not match, but it errored",
- expr, string);
- };
};
};
@@ -309,9 +283,10 @@ fn run_findall_case(
(`^x(abc){1,2}$`, "xabc", matchres::MATCH, 0, -1),
(`^x(abc){1,2}$`, "xabcabc", matchres::MATCH, 0, -1),
(`^x(abc){1,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1),
- (`^x(abc){,2}$`, "xabc", matchres::ERROR, 0, -1),
- (`^x(abc){,2}$`, "xabcabc", matchres::ERROR, 0, -1),
- (`^x(abc){,2}$`, "xabcabcabc", matchres::ERROR, 0, -1),
+ (`^x(abc){,2}$`, "xabc", matchres::MATCH, 0, -1),
+ (`^x(abc){,2}$`, "xabcabc", matchres::MATCH, 0, -1),
+ (`^x(abc){,2}`, "xabcabcabc", matchres::MATCH, 0, 7),
+ (`^x(abc){,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1),
(`^x(abc){1,}$`, "xabc", matchres::MATCH, 0, -1),
(`^x(abc){1,}$`, "xabcabc", matchres::MATCH, 0, -1),
(`^x(abc){3,}$`, "xabcabc", matchres::NOMATCH, 0, -1),
@@ -481,6 +456,7 @@ fn run_findall_case(
// (`a|b|c|d|e`, "e", matchres::MATCH, 0, -1),
// (`(a|b|c|d|e)f`, "ef", matchres::MATCH, 0, -1),
// TODO: nested capture groups
+ (`((a))`, "abc", matchres::ERROR, 0, -1),
// (`((a))`, "abc", matchres::MATCH, 0, -1),
// (`((a)(b)c)(d)`, "abcd", matchres::MATCH, 0, -1),
// (`(bc+d$|ef*g.|h?i(j|k))`, "effgz", matchres::MATCH, 0, -1),
diff --git a/regex/README b/regex/README
@@ -20,10 +20,10 @@ the longest match among the leftmost matches.
const re = regex::compile(`[Hh]are`)!;
defer regex::finish(&re);
- const does_match = regex::test(&re, "Hello Hare, hello Hare.")!;
+ const does_match = regex::test(&re, "Hello Hare, hello Hare.");
fmt::printfln("matched? {}", does_match)!;
- const first_match = regex::find(&re, "Hello Hare, hello Hare.")!;
+ const first_match = regex::find(&re, "Hello Hare, hello Hare.");
match (first_match) {
case void => void;
case let captures: []regex::capture =>
@@ -35,7 +35,7 @@ the longest match among the leftmost matches.
captures[0].end)!;
};
- const all_matches = regex::findall(&re, "Hello Hare, hello Hare.")!;
+ const all_matches = regex::findall(&re, "Hello Hare, hello Hare.");
match (all_matches) {
case void => void;
case let matches: [][]regex::capture =>
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -90,7 +90,6 @@ const charclass_fns: [](charclass, *fn(c: rune) bool) = [
(charclass::UPPER, &ascii::isupper),
(charclass::XDIGIT, &ascii::isxdigit),
];
-const multibyte_err: error = "Character ranges do not support characters larger than one byte.";
export type regex = struct {
insts: []inst,
@@ -188,15 +187,11 @@ fn handle_bracket(
};
} else if (is_range) {
const start_enc = utf8::encoderune(r);
- if (len(start_enc) > 1) {
- return multibyte_err;
- };
+ assert(len(start_enc) == 1, "Character ranges do not currently support characters larger than one byte");
const start_b = start_enc[0];
const end_enc = utf8::encoderune(range_end as rune);
- if (len(end_enc) > 1) {
- return multibyte_err;
- };
+ assert(len(end_enc) == 1, "Character ranges do not currently support characters larger than one byte");
const end_b = end_enc[0];
if (end_b < start_b) {
@@ -285,6 +280,9 @@ export fn compile(expr: str) (regex | error) = {
append(insts, r: inst_lit);
};
case '(' =>
+ if (n_groupstarts > 0) {
+ return "Found nested capture groups in expression, which are not supported": error;
+ };
append(insts, void: inst_groupstart);
n_groupstarts += 1;
case ')' =>
@@ -441,6 +439,8 @@ fn parse_repetition(
};
case => return "Invalid repetition minimum value": error;
};
+ } else {
+ min = 0;
};
if (len(max_str) > 0) {
@@ -455,10 +455,6 @@ fn parse_repetition(
};
};
- if (len(min_str) == 0 && len(max_str) > 0) {
- return "Invalid repetition minimum value": error;
- };
-
const rep_len = if (is_single_arg) {
yield len(min_str);
} else {
@@ -509,7 +505,7 @@ fn run_thread(
threads: *[]thread,
r_or_end: (rune | void),
str_idx: int
-) (void | error | newmatch) = {
+) (void | newmatch) = {
if (threads[i].matched) {
return;
};
@@ -546,16 +542,12 @@ fn run_thread(
threads[i].matched = true;
return newmatch;
case inst_groupstart =>
- if (threads[i].curr_capture_inited) {
- return "Found nested capture groups in expression, which are not supported": error;
- };
+ assert(!threads[i].curr_capture_inited, "Found nested capture groups in expression, which are not supported");
threads[i].curr_capture.start = str_idx: size;
threads[i].curr_capture_inited = true;
threads[i].pc += 1;
case inst_groupend =>
- if (!threads[i].curr_capture_inited) {
- return `Found a groupend token ")" without having previously seen a groupstart token "("`: error;
- };
+ assert(threads[i].curr_capture_inited, `Found a groupend token ")" without having previously seen a groupstart token "(". Please report this as a bug`);
threads[i].curr_capture.end = str_idx: size;
// TODO: This is a perf issue for large strings
threads[i].curr_capture.content = strings::sub(string,
@@ -615,9 +607,7 @@ fn run_thread(
};
case let range: charset_range_item =>
const r_enc = utf8::encoderune(r);
- if (len(r_enc) > 1) {
- return multibyte_err;
- };
+ assert(len(r_enc) == 1, "Character ranges do not currently support characters larger than one byte");
const r_b = r_enc[0];
if (r_b >= range.0 && r_b <= range.1) {
// Succeeded if positive match
@@ -653,7 +643,7 @@ fn search(
str_iter: *strings::iterator,
str_idx: *int,
need_captures: bool
-) (void | []capture | error) = {
+) (void | []capture) = {
let threads: []thread = alloc([
thread { captures = alloc([]), ... }
]);
@@ -712,7 +702,7 @@ fn search(
for (let i = 0z; i < len(threads); i += 1) {
const res = run_thread(i, re, string, &threads,
- r_or_end, *str_idx)?;
+ r_or_end, *str_idx);
const matchlen = threads[i].root_capture.end
- threads[i].root_capture.start;
if (res is newmatch && matchlen > 0 && !need_captures) {
@@ -773,10 +763,10 @@ fn search(
};
// Returns whether or not a regex matches a string.
-export fn test(re: *regex, string: str) (bool | error) = {
+export fn test(re: *regex, string: str) bool = {
let str_idx = -1;
let str_iter = strings::iter(string);
- match (search(re, string, &str_iter, &str_idx, false)?) {
+ match (search(re, string, &str_iter, &str_idx, false)) {
case void => return false;
case []capture => return true;
};
@@ -785,7 +775,7 @@ export fn test(re: *regex, string: str) (bool | error) = {
// Attempts to match a regular expression against a string and returns the
// longest leftmost match, or void if there is no match.
-export fn find(re: *regex, string: str) (void | []capture | error) = {
+export fn find(re: *regex, string: str) (void | []capture) = {
let str_idx = -1;
let str_iter = strings::iter(string);
return search(re, string, &str_iter, &str_idx, true);
@@ -793,12 +783,12 @@ export fn find(re: *regex, string: str) (void | []capture | error) = {
// Attempts to match a regular expression against a string and returns all
// non-overlapping matches, or void if there are no matches.
-export fn findall(re: *regex, string: str) (void | [][]capture | error) = {
+export fn findall(re: *regex, string: str) (void | [][]capture) = {
let res: [][]capture = alloc([]);
let str_idx = -1;
let str_iter = strings::iter(string);
for (true) {
- const findres = search(re, string, &str_iter, &str_idx, true)?;
+ const findres = search(re, string, &str_iter, &str_idx, true);
match (findres) {
case let m: []capture =>
append(res, m);