hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 3e156307174d18e17a60a65a18e7ec40cf7a40fb
parent 8da4b7361169870e902eaa1ce4d98e84d22270a9
Author: Adnan Maolood <adnan@maolood.com>
Date:   Tue, 14 Mar 2023 12:56:15 -0400

regex: Handle escaped characters in brackets

Signed-off-by: Adnan Maolood <adnan@maolood.com>

Diffstat:
Mregex/+test.ha | 3+++
Mregex/regex.ha | 20+++++++++++++-------
2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha @@ -333,6 +333,9 @@ fn run_findall_case( (`^a\\b$`, "a\\b", matchres::MATCH, 0, -1), (`^x(abc)\{,2\}$`, "xabc{,2}", matchres::MATCH, 0, -1), (`^x(abc)\{,2\}$`, "xabcabc{,2}", matchres::NOMATCH, 0, -1), + (`^[\\]+$`, "\\", matchres::MATCH, 0, -1), + (`^[\]]+$`, "]", matchres::MATCH, 0, -1), + (`^[A-Za-z\[\]]+$`, "foo[bar]baz", matchres::MATCH, 0, -1), // {m,n} (`^x(abc){2}$`, "xabcabc", matchres::MATCH, 0, -1), (`^x(abc){3}$`, "xabcabc", matchres::NOMATCH, 0, -1), diff --git a/regex/regex.ha b/regex/regex.ha @@ -146,7 +146,17 @@ fn handle_bracket( const range_end = peek2; const is_first_char = *bracket_idx == 0 || *bracket_idx == 1 && !*is_charset_positive; - if (r == ']' && !is_first_char) { + + if (r == '\\') { + if (peek1 is void) { + return `Trailing backslash '\'`: error; + } else { + append(charsets[len(charsets) - 1], + peek1: charset_lit_item); + strings::next(iter); + *r_idx += 1; + }; + } else if (r == ']' && !is_first_char) { const newinst = inst_charset { idx = len(charsets) - 1, is_positive = *is_charset_positive, @@ -212,7 +222,7 @@ export fn compile(expr: str) (regex | error) = { const next = strings::next(&iter); if (r_idx == 0 && next is rune && next: rune != '^') { - append(insts, void: inst_skip); + append(insts, void: inst_skip); }; if (in_bracket) { @@ -256,11 +266,7 @@ export fn compile(expr: str) (regex | error) = { case '[' => in_bracket = true; case ']' => - if (in_bracket) { - in_bracket = false; - } else { - append(insts, r: inst_lit); - }; + append(insts, r: inst_lit); case '(' => if (n_groupstarts > 0) { return `Nested capture groups are unsupported`: error;