commit cb7c2f4a4b3113561efd1e673400ee186aac1183
parent 2e40a9474e3e1c588ea0ba392c1c698dccf295f3
Author: Max Schillinger <max@mxsr.de>
Date: Thu, 18 Jul 2024 20:47:14 +0200
regex: allow $ at end of every whole-expression alternation
Signed-off-by: Max Schillinger <max@mxsr.de>
Diffstat:
2 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/regex/+test.ha b/regex/+test.ha
@@ -582,10 +582,17 @@ fn run_rawreplace_case(
(`ab|cd`, "bcd", matchres::MATCH, 1, 3),
(`^ab|cd`, "bcd", matchres::MATCH, 1, 3),
(`^ab|cd`, "zab", matchres::NOMATCH, 0, 0),
+ (`ab$|cd`, "ab", matchres::MATCH, 0, 2),
+ (`ab$|cd`, "abc", matchres::NOMATCH, 0, 0),
+ (`ab|cd$`, "cde", matchres::NOMATCH, 0, 0),
// multiple alternation
(`a|b|c|d|e`, "e", matchres::MATCH, 0, -1),
(`a|b|c|d|e`, "xe", matchres::MATCH, 1, -1),
(`(a|b|c|d|e)f`, "ef", matchres::MATCH, 0, -1),
+ (`a|b$|c$|d$|e`, "cd", matchres::MATCH, 1, -1),
+ (`a|b$|c$|d$|e`, "ax", matchres::MATCH, 0, 1),
+ (`a|b$|c$|d$|e`, "cx", matchres::NOMATCH, 0, 0),
+ (`a|b$|c$|d$|e`, "ex", matchres::MATCH, 0, 1),
// TODO: nested capture groups
(`((a))`, "abc", matchres::ERROR, 0, -1),
// (`((a))`, "abc", matchres::MATCH, 0, -1),
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -222,7 +222,6 @@ export fn compile(expr: str) (regex | error) = {
let charsets: []charset = [];
let iter = strings::iter(expr);
let r_idx = 0z;
- let anchored = false;
let jump_idxs: []size = [];
let in_bracket = false;
let skip_charclass_rest = false;
@@ -272,10 +271,17 @@ export fn compile(expr: str) (regex | error) = {
return `Anchor '^' not at start`: error;
};
case '$' =>
- if (r_idx != len(expr) - 1) {
- return `Anchor '$' not at end`: error;
+ if (n_groupstarts > 0) {
+ return `Anchor '$' in capture groups is unsupported`: error;
};
- anchored = true;
+ const peek1 = strings::next(&iter);
+ if (peek1 is rune) {
+ if (peek1 as rune != '|') {
+ return `Anchor '$' not at end of whole pattern or alternation`: error;
+ };
+ strings::prev(&iter);
+ };
+ append(insts, true: inst_match);
case '[' =>
in_bracket = true;
case ']' =>
@@ -412,7 +418,9 @@ export fn compile(expr: str) (regex | error) = {
};
jump_idxs = [];
- append(insts, anchored: inst_match);
+ if (len(insts) == 0 || !(insts[len(insts) - 1] is inst_match)) {
+ append(insts, false: inst_match);
+ };
return regex {
insts = insts,