hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit cb7c2f4a4b3113561efd1e673400ee186aac1183
parent 2e40a9474e3e1c588ea0ba392c1c698dccf295f3
Author: Max Schillinger <max@mxsr.de>
Date:   Thu, 18 Jul 2024 20:47:14 +0200

regex: allow $ at end of every whole-expression alternation

Signed-off-by: Max Schillinger <max@mxsr.de>

Diffstat:
Mregex/+test.ha | 7+++++++
Mregex/regex.ha | 18+++++++++++++-----
2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha @@ -582,10 +582,17 @@ fn run_rawreplace_case( (`ab|cd`, "bcd", matchres::MATCH, 1, 3), (`^ab|cd`, "bcd", matchres::MATCH, 1, 3), (`^ab|cd`, "zab", matchres::NOMATCH, 0, 0), + (`ab$|cd`, "ab", matchres::MATCH, 0, 2), + (`ab$|cd`, "abc", matchres::NOMATCH, 0, 0), + (`ab|cd$`, "cde", matchres::NOMATCH, 0, 0), // multiple alternation (`a|b|c|d|e`, "e", matchres::MATCH, 0, -1), (`a|b|c|d|e`, "xe", matchres::MATCH, 1, -1), (`(a|b|c|d|e)f`, "ef", matchres::MATCH, 0, -1), + (`a|b$|c$|d$|e`, "cd", matchres::MATCH, 1, -1), + (`a|b$|c$|d$|e`, "ax", matchres::MATCH, 0, 1), + (`a|b$|c$|d$|e`, "cx", matchres::NOMATCH, 0, 0), + (`a|b$|c$|d$|e`, "ex", matchres::MATCH, 0, 1), // TODO: nested capture groups (`((a))`, "abc", matchres::ERROR, 0, -1), // (`((a))`, "abc", matchres::MATCH, 0, -1), diff --git a/regex/regex.ha b/regex/regex.ha @@ -222,7 +222,6 @@ export fn compile(expr: str) (regex | error) = { let charsets: []charset = []; let iter = strings::iter(expr); let r_idx = 0z; - let anchored = false; let jump_idxs: []size = []; let in_bracket = false; let skip_charclass_rest = false; @@ -272,10 +271,17 @@ export fn compile(expr: str) (regex | error) = { return `Anchor '^' not at start`: error; }; case '$' => - if (r_idx != len(expr) - 1) { - return `Anchor '$' not at end`: error; + if (n_groupstarts > 0) { + return `Anchor '$' in capture groups is unsupported`: error; }; - anchored = true; + const peek1 = strings::next(&iter); + if (peek1 is rune) { + if (peek1 as rune != '|') { + return `Anchor '$' not at end of whole pattern or alternation`: error; + }; + strings::prev(&iter); + }; + append(insts, true: inst_match); case '[' => in_bracket = true; case ']' => @@ -412,7 +418,9 @@ export fn compile(expr: str) (regex | error) = { }; jump_idxs = []; - append(insts, anchored: inst_match); + if (len(insts) == 0 || !(insts[len(insts) - 1] is inst_match)) { + append(insts, false: inst_match); + }; return regex { insts = insts,