hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit b8b8beb910da394e4d4991fd177ba660e940ec83
parent 7c4680cd316608e0cd9098575343b30fdfe27c06
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date:   Sat, 14 May 2022 17:25:01 +0100

regex: find/findall/test can no longer error

Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>

Diffstat:
Mregex/+test.ha | 58+++++++++++++++++-----------------------------------------
Mregex/README | 6+++---
Mregex/regex.ha | 46++++++++++++++++++----------------------------
3 files changed, 38 insertions(+), 72 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha @@ -16,37 +16,34 @@ fn run_find_case( case let e: error => if (expected == matchres::MATCH) { fmt::println(e)!; - fmt::fatalf("Expected expression /{}/ to match, but it errored", + fmt::fatalf("Expected expression /{}/ to match string \"{}\", but it errored", expr, string); }; if (expected == matchres::NOMATCH) { fmt::println(e)!; - fmt::fatalf("Expected expression /{}/ to not match, but it errored", + fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it errored", expr, string); }; return; }; + if (expected == matchres::ERROR) { + fmt::fatalf("Expected expression /{}/ to have error caught during compilation, but it did not", + expr); + }; + match (find(&re, string)) { case void => if (expected == matchres::MATCH) { fmt::fatalf("Expected expression /{}/ to match string \"{}\", but it did not", expr, string); }; - if (expected == matchres::ERROR) { - fmt::fatalf("Expression /{}/ failed to match, but should have errored", - expr, string); - }; case let m: []capture => if (expected == matchres::NOMATCH) { fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it did", expr, string); }; - if (expected == matchres::ERROR) { - fmt::fatalf("Expression /{}/ matched, but should have errored", - expr, string); - }; if (start: size != m[0].start) { fmt::fatalf("Expected start of main capture to be {} but it was {}", start, m[0].start); @@ -55,16 +52,6 @@ fn run_find_case( fmt::fatalf("Expected end of main capture to be {} but it was {}", end, m[0].end); }; - - case let e: error => - if (expected == matchres::MATCH) { - fmt::fatalf("Expected expression /{}/ to match, but it errored", - expr, string); - }; - if (expected == matchres::NOMATCH) { - fmt::fatalf("Expected expression /{}/ to not match, but it errored", - expr, string); - }; }; }; @@ -90,40 +77,27 @@ fn run_findall_case( return; }; + if (expected == matchres::ERROR) { + fmt::fatalf("Expected expression /{}/ to have error caught during compilation, but it did not", + expr); + }; + match (findall(&re, string)) { case void => if (expected == matchres::MATCH) { fmt::fatalf("Expected expression /{}/ to match string \"{}\", but it did not", expr, string); }; - if (expected == matchres::ERROR) { - fmt::fatalf("Expression /{}/ failed to match, but should have errored", - expr, string); - }; case let groupsets: [][]capture => if (expected == matchres::NOMATCH) { fmt::fatalf("Expected expression /{}/ to not match string \"{}\", but it did", expr, string); }; - if (expected == matchres::ERROR) { - fmt::fatalf("Expression /{}/ matched, but should have errored", - expr, string); - }; if (count: size != len(groupsets)) { fmt::fatalf("Expected to find {} matches but found {}", count, len(groupsets)); }; - - case let e: error => - if (expected == matchres::MATCH) { - fmt::fatalf("Expected expression /{}/ to match, but it errored", - expr, string); - }; - if (expected == matchres::NOMATCH) { - fmt::fatalf("Expected expression /{}/ to not match, but it errored", - expr, string); - }; }; }; @@ -309,9 +283,10 @@ fn run_findall_case( (`^x(abc){1,2}$`, "xabc", matchres::MATCH, 0, -1), (`^x(abc){1,2}$`, "xabcabc", matchres::MATCH, 0, -1), (`^x(abc){1,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1), - (`^x(abc){,2}$`, "xabc", matchres::ERROR, 0, -1), - (`^x(abc){,2}$`, "xabcabc", matchres::ERROR, 0, -1), - (`^x(abc){,2}$`, "xabcabcabc", matchres::ERROR, 0, -1), + (`^x(abc){,2}$`, "xabc", matchres::MATCH, 0, -1), + (`^x(abc){,2}$`, "xabcabc", matchres::MATCH, 0, -1), + (`^x(abc){,2}`, "xabcabcabc", matchres::MATCH, 0, 7), + (`^x(abc){,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1), (`^x(abc){1,}$`, "xabc", matchres::MATCH, 0, -1), (`^x(abc){1,}$`, "xabcabc", matchres::MATCH, 0, -1), (`^x(abc){3,}$`, "xabcabc", matchres::NOMATCH, 0, -1), @@ -481,6 +456,7 @@ fn run_findall_case( // (`a|b|c|d|e`, "e", matchres::MATCH, 0, -1), // (`(a|b|c|d|e)f`, "ef", matchres::MATCH, 0, -1), // TODO: nested capture groups + (`((a))`, "abc", matchres::ERROR, 0, -1), // (`((a))`, "abc", matchres::MATCH, 0, -1), // (`((a)(b)c)(d)`, "abcd", matchres::MATCH, 0, -1), // (`(bc+d$|ef*g.|h?i(j|k))`, "effgz", matchres::MATCH, 0, -1), diff --git a/regex/README b/regex/README @@ -20,10 +20,10 @@ the longest match among the leftmost matches. const re = regex::compile(`[Hh]are`)!; defer regex::finish(&re); - const does_match = regex::test(&re, "Hello Hare, hello Hare.")!; + const does_match = regex::test(&re, "Hello Hare, hello Hare."); fmt::printfln("matched? {}", does_match)!; - const first_match = regex::find(&re, "Hello Hare, hello Hare.")!; + const first_match = regex::find(&re, "Hello Hare, hello Hare."); match (first_match) { case void => void; case let captures: []regex::capture => @@ -35,7 +35,7 @@ the longest match among the leftmost matches. captures[0].end)!; }; - const all_matches = regex::findall(&re, "Hello Hare, hello Hare.")!; + const all_matches = regex::findall(&re, "Hello Hare, hello Hare."); match (all_matches) { case void => void; case let matches: [][]regex::capture => diff --git a/regex/regex.ha b/regex/regex.ha @@ -90,7 +90,6 @@ const charclass_fns: [](charclass, *fn(c: rune) bool) = [ (charclass::UPPER, &ascii::isupper), (charclass::XDIGIT, &ascii::isxdigit), ]; -const multibyte_err: error = "Character ranges do not support characters larger than one byte."; export type regex = struct { insts: []inst, @@ -188,15 +187,11 @@ fn handle_bracket( }; } else if (is_range) { const start_enc = utf8::encoderune(r); - if (len(start_enc) > 1) { - return multibyte_err; - }; + assert(len(start_enc) == 1, "Character ranges do not currently support characters larger than one byte"); const start_b = start_enc[0]; const end_enc = utf8::encoderune(range_end as rune); - if (len(end_enc) > 1) { - return multibyte_err; - }; + assert(len(end_enc) == 1, "Character ranges do not currently support characters larger than one byte"); const end_b = end_enc[0]; if (end_b < start_b) { @@ -285,6 +280,9 @@ export fn compile(expr: str) (regex | error) = { append(insts, r: inst_lit); }; case '(' => + if (n_groupstarts > 0) { + return "Found nested capture groups in expression, which are not supported": error; + }; append(insts, void: inst_groupstart); n_groupstarts += 1; case ')' => @@ -441,6 +439,8 @@ fn parse_repetition( }; case => return "Invalid repetition minimum value": error; }; + } else { + min = 0; }; if (len(max_str) > 0) { @@ -455,10 +455,6 @@ fn parse_repetition( }; }; - if (len(min_str) == 0 && len(max_str) > 0) { - return "Invalid repetition minimum value": error; - }; - const rep_len = if (is_single_arg) { yield len(min_str); } else { @@ -509,7 +505,7 @@ fn run_thread( threads: *[]thread, r_or_end: (rune | void), str_idx: int -) (void | error | newmatch) = { +) (void | newmatch) = { if (threads[i].matched) { return; }; @@ -546,16 +542,12 @@ fn run_thread( threads[i].matched = true; return newmatch; case inst_groupstart => - if (threads[i].curr_capture_inited) { - return "Found nested capture groups in expression, which are not supported": error; - }; + assert(!threads[i].curr_capture_inited, "Found nested capture groups in expression, which are not supported"); threads[i].curr_capture.start = str_idx: size; threads[i].curr_capture_inited = true; threads[i].pc += 1; case inst_groupend => - if (!threads[i].curr_capture_inited) { - return `Found a groupend token ")" without having previously seen a groupstart token "("`: error; - }; + assert(threads[i].curr_capture_inited, `Found a groupend token ")" without having previously seen a groupstart token "(". Please report this as a bug`); threads[i].curr_capture.end = str_idx: size; // TODO: This is a perf issue for large strings threads[i].curr_capture.content = strings::sub(string, @@ -615,9 +607,7 @@ fn run_thread( }; case let range: charset_range_item => const r_enc = utf8::encoderune(r); - if (len(r_enc) > 1) { - return multibyte_err; - }; + assert(len(r_enc) == 1, "Character ranges do not currently support characters larger than one byte"); const r_b = r_enc[0]; if (r_b >= range.0 && r_b <= range.1) { // Succeeded if positive match @@ -653,7 +643,7 @@ fn search( str_iter: *strings::iterator, str_idx: *int, need_captures: bool -) (void | []capture | error) = { +) (void | []capture) = { let threads: []thread = alloc([ thread { captures = alloc([]), ... } ]); @@ -712,7 +702,7 @@ fn search( for (let i = 0z; i < len(threads); i += 1) { const res = run_thread(i, re, string, &threads, - r_or_end, *str_idx)?; + r_or_end, *str_idx); const matchlen = threads[i].root_capture.end - threads[i].root_capture.start; if (res is newmatch && matchlen > 0 && !need_captures) { @@ -773,10 +763,10 @@ fn search( }; // Returns whether or not a regex matches a string. -export fn test(re: *regex, string: str) (bool | error) = { +export fn test(re: *regex, string: str) bool = { let str_idx = -1; let str_iter = strings::iter(string); - match (search(re, string, &str_iter, &str_idx, false)?) { + match (search(re, string, &str_iter, &str_idx, false)) { case void => return false; case []capture => return true; }; @@ -785,7 +775,7 @@ export fn test(re: *regex, string: str) (bool | error) = { // Attempts to match a regular expression against a string and returns the // longest leftmost match, or void if there is no match. -export fn find(re: *regex, string: str) (void | []capture | error) = { +export fn find(re: *regex, string: str) (void | []capture) = { let str_idx = -1; let str_iter = strings::iter(string); return search(re, string, &str_iter, &str_idx, true); @@ -793,12 +783,12 @@ export fn find(re: *regex, string: str) (void | []capture | error) = { // Attempts to match a regular expression against a string and returns all // non-overlapping matches, or void if there are no matches. -export fn findall(re: *regex, string: str) (void | [][]capture | error) = { +export fn findall(re: *regex, string: str) (void | [][]capture) = { let res: [][]capture = alloc([]); let str_idx = -1; let str_iter = strings::iter(string); for (true) { - const findres = search(re, string, &str_iter, &str_idx, true)?; + const findres = search(re, string, &str_iter, &str_idx, true); match (findres) { case let m: []capture => append(res, m);