commit 3e156307174d18e17a60a65a18e7ec40cf7a40fb
parent 8da4b7361169870e902eaa1ce4d98e84d22270a9
Author: Adnan Maolood <adnan@maolood.com>
Date: Tue, 14 Mar 2023 12:56:15 -0400
regex: Handle escaped characters in brackets
Signed-off-by: Adnan Maolood <adnan@maolood.com>
Diffstat:
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/regex/+test.ha b/regex/+test.ha
@@ -333,6 +333,9 @@ fn run_findall_case(
(`^a\\b$`, "a\\b", matchres::MATCH, 0, -1),
(`^x(abc)\{,2\}$`, "xabc{,2}", matchres::MATCH, 0, -1),
(`^x(abc)\{,2\}$`, "xabcabc{,2}", matchres::NOMATCH, 0, -1),
+ (`^[\\]+$`, "\\", matchres::MATCH, 0, -1),
+ (`^[\]]+$`, "]", matchres::MATCH, 0, -1),
+ (`^[A-Za-z\[\]]+$`, "foo[bar]baz", matchres::MATCH, 0, -1),
// {m,n}
(`^x(abc){2}$`, "xabcabc", matchres::MATCH, 0, -1),
(`^x(abc){3}$`, "xabcabc", matchres::NOMATCH, 0, -1),
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -146,7 +146,17 @@ fn handle_bracket(
const range_end = peek2;
const is_first_char = *bracket_idx == 0 || *bracket_idx == 1
&& !*is_charset_positive;
- if (r == ']' && !is_first_char) {
+
+ if (r == '\\') {
+ if (peek1 is void) {
+ return `Trailing backslash '\'`: error;
+ } else {
+ append(charsets[len(charsets) - 1],
+ peek1: charset_lit_item);
+ strings::next(iter);
+ *r_idx += 1;
+ };
+ } else if (r == ']' && !is_first_char) {
const newinst = inst_charset {
idx = len(charsets) - 1,
is_positive = *is_charset_positive,
@@ -212,7 +222,7 @@ export fn compile(expr: str) (regex | error) = {
const next = strings::next(&iter);
if (r_idx == 0 && next is rune && next: rune != '^') {
- append(insts, void: inst_skip);
+ append(insts, void: inst_skip);
};
if (in_bracket) {
@@ -256,11 +266,7 @@ export fn compile(expr: str) (regex | error) = {
case '[' =>
in_bracket = true;
case ']' =>
- if (in_bracket) {
- in_bracket = false;
- } else {
- append(insts, r: inst_lit);
- };
+ append(insts, r: inst_lit);
case '(' =>
if (n_groupstarts > 0) {
return `Nested capture groups are unsupported`: error;