commit dc65431e3508369b64c30ea0561b1c5091bef392
parent cb7c2f4a4b3113561efd1e673400ee186aac1183
Author: Max Schillinger <max@mxsr.de>
Date: Sun, 21 Jul 2024 09:57:22 +0200
regex: allow ^ at start of every whole-expression alternation
Signed-off-by: Max Schillinger <max@mxsr.de>
Diffstat:
2 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/regex/+test.ha b/regex/+test.ha
@@ -585,6 +585,9 @@ fn run_rawreplace_case(
(`ab$|cd`, "ab", matchres::MATCH, 0, 2),
(`ab$|cd`, "abc", matchres::NOMATCH, 0, 0),
(`ab|cd$`, "cde", matchres::NOMATCH, 0, 0),
+ (`ab|^cd`, "bcd", matchres::NOMATCH, 0, 0),
+ (`ab|^cd`, "cde", matchres::MATCH, 0, 2),
+ (`ab\|^cd`, "cde", matchres::ERROR, 0, 0),
// multiple alternation
(`a|b|c|d|e`, "e", matchres::MATCH, 0, -1),
(`a|b|c|d|e`, "xe", matchres::MATCH, 1, -1),
@@ -593,6 +596,10 @@ fn run_rawreplace_case(
(`a|b$|c$|d$|e`, "ax", matchres::MATCH, 0, 1),
(`a|b$|c$|d$|e`, "cx", matchres::NOMATCH, 0, 0),
(`a|b$|c$|d$|e`, "ex", matchres::MATCH, 0, 1),
+ (`a|^b|^c|^d|e`, "cd", matchres::MATCH, 0, 1),
+ (`a|^b|^c|^d|e`, "xa", matchres::MATCH, 1, 2),
+ (`a|^b|^c|^d|e`, "xc", matchres::NOMATCH, 0, 0),
+ (`a|^b|^c|^d|e`, "xe", matchres::MATCH, 1, 2),
// TODO: nested capture groups
(`((a))`, "abc", matchres::ERROR, 0, -1),
// (`((a))`, "abc", matchres::MATCH, 0, -1),
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -227,6 +227,7 @@ export fn compile(expr: str) (regex | error) = {
let skip_charclass_rest = false;
let bracket_idx = -1;
let is_charset_positive = true;
+ let was_prev_rune_pipe = false;
let n_reps = 0z;
let n_groupstarts = 0;
@@ -267,8 +268,11 @@ export fn compile(expr: str) (regex | error) = {
r_idx += 1;
};
case '^' =>
- if (r_idx != 0) {
- return `Anchor '^' not at start`: error;
+ if (n_groupstarts > 0) {
+ return `Anchor '^' in capture groups is unsupported`: error;
+ };
+ if (!(r_idx == 0 || was_prev_rune_pipe)) {
+ return `Anchor '^' not at start of whole pattern or alternation`: error;
};
case '$' =>
if (n_groupstarts > 0) {
@@ -319,7 +323,13 @@ export fn compile(expr: str) (regex | error) = {
append(jump_idxs, len(insts) - 1);
// add skip if it's a whole-expression alternation
if (origin == 0) {
- append(insts, inst_skip);
+ const peek1 = strings::next(&iter);
+ if (peek1 is rune) {
+ if (peek1 as rune != '^') {
+ append(insts, inst_skip);
+ };
+ strings::prev(&iter);
+ };
};
case '{' =>
let origin = len(insts) - 1;
@@ -408,6 +418,7 @@ export fn compile(expr: str) (regex | error) = {
case =>
append(insts, r: inst_lit);
};
+ was_prev_rune_pipe = (r == '|');
r_idx += 1;
};