hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit dc65431e3508369b64c30ea0561b1c5091bef392
parent cb7c2f4a4b3113561efd1e673400ee186aac1183
Author: Max Schillinger <max@mxsr.de>
Date:   Sun, 21 Jul 2024 09:57:22 +0200

regex: allow ^ at start of every whole-expression alternation

Signed-off-by: Max Schillinger <max@mxsr.de>

Diffstat:
Mregex/+test.ha | 7+++++++
Mregex/regex.ha | 17++++++++++++++---
2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha @@ -585,6 +585,9 @@ fn run_rawreplace_case( (`ab$|cd`, "ab", matchres::MATCH, 0, 2), (`ab$|cd`, "abc", matchres::NOMATCH, 0, 0), (`ab|cd$`, "cde", matchres::NOMATCH, 0, 0), + (`ab|^cd`, "bcd", matchres::NOMATCH, 0, 0), + (`ab|^cd`, "cde", matchres::MATCH, 0, 2), + (`ab\|^cd`, "cde", matchres::ERROR, 0, 0), // multiple alternation (`a|b|c|d|e`, "e", matchres::MATCH, 0, -1), (`a|b|c|d|e`, "xe", matchres::MATCH, 1, -1), @@ -593,6 +596,10 @@ fn run_rawreplace_case( (`a|b$|c$|d$|e`, "ax", matchres::MATCH, 0, 1), (`a|b$|c$|d$|e`, "cx", matchres::NOMATCH, 0, 0), (`a|b$|c$|d$|e`, "ex", matchres::MATCH, 0, 1), + (`a|^b|^c|^d|e`, "cd", matchres::MATCH, 0, 1), + (`a|^b|^c|^d|e`, "xa", matchres::MATCH, 1, 2), + (`a|^b|^c|^d|e`, "xc", matchres::NOMATCH, 0, 0), + (`a|^b|^c|^d|e`, "xe", matchres::MATCH, 1, 2), // TODO: nested capture groups (`((a))`, "abc", matchres::ERROR, 0, -1), // (`((a))`, "abc", matchres::MATCH, 0, -1), diff --git a/regex/regex.ha b/regex/regex.ha @@ -227,6 +227,7 @@ export fn compile(expr: str) (regex | error) = { let skip_charclass_rest = false; let bracket_idx = -1; let is_charset_positive = true; + let was_prev_rune_pipe = false; let n_reps = 0z; let n_groupstarts = 0; @@ -267,8 +268,11 @@ export fn compile(expr: str) (regex | error) = { r_idx += 1; }; case '^' => - if (r_idx != 0) { - return `Anchor '^' not at start`: error; + if (n_groupstarts > 0) { + return `Anchor '^' in capture groups is unsupported`: error; + }; + if (!(r_idx == 0 || was_prev_rune_pipe)) { + return `Anchor '^' not at start of whole pattern or alternation`: error; }; case '$' => if (n_groupstarts > 0) { @@ -319,7 +323,13 @@ export fn compile(expr: str) (regex | error) = { append(jump_idxs, len(insts) - 1); // add skip if it's a whole-expression alternation if (origin == 0) { - append(insts, inst_skip); + const peek1 = strings::next(&iter); + if (peek1 is rune) { + if (peek1 as rune != '^') { + append(insts, inst_skip); + }; + strings::prev(&iter); + }; }; case '{' => let origin = len(insts) - 1; @@ -408,6 +418,7 @@ export fn compile(expr: str) (regex | error) = { case => append(insts, r: inst_lit); }; + was_prev_rune_pipe = (r == '|'); r_idx += 1; };