commit 5d1151fafce314ddee1a2114072c25d12313a4f8
parent 9ea3ca7a283facd1ff7d29860920d8fc803b25a8
Author: Sebastian <sebastian@sebsite.pw>
Date: Wed, 13 Apr 2022 20:33:24 -0400
regex: remove WORD charclass
This isn't specified by POSIX. Its functionality is equivalent the ALNUM
charclass plus '_' (underscore).
Signed-off-by: Sebastian <sebastian@sebsite.pw>
Diffstat:
2 files changed, 1 insertion(+), 7 deletions(-)
diff --git a/regex/+test.ha b/regex/+test.ha
@@ -286,8 +286,6 @@ fn run_findall_case(
(`^test[[:space:]]+$`, "test ", matchres::MATCH, 0, -1),
(`^test[[:upper:]]+$`, "testa", matchres::NOMATCH, 0, -1),
(`^test[[:upper:]]+$`, "testA", matchres::MATCH, 0, -1),
- (`^test[[:word:]]+$`, "test!2", matchres::NOMATCH, 0, -1),
- (`^test[[:word:]]+$`, "test_2", matchres::MATCH, 0, -1),
(`^test[[:xdigit:]]+$`, "testCAFE", matchres::MATCH, 0, -1),
// [:alpha:] etc. plus extra characters
(`^test[[:digit:]][[:alpha:]]$`, "test1a", matchres::MATCH, 0, -1),
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -53,7 +53,7 @@ type newmatch = void;
export type charclass = enum {
ALNUM, ALPHA, BLANK, CNTRL, DIGIT, GRAPH, LOWER, PRINT, PUNCT, SPACE,
- UPPER, WORD, XDIGIT,
+ UPPER, XDIGIT,
};
export type charset = [](charset_lit_item | charset_range_item |
charset_class_item),
@@ -72,7 +72,6 @@ const charclass_names: [](charclass, str) = [
(charclass::PUNCT, ":punct:]"),
(charclass::SPACE, ":space:]"),
(charclass::UPPER, ":upper:]"),
- (charclass::WORD, ":word:]"),
(charclass::XDIGIT, ":xdigit:]"),
];
const charclass_fns: [](charclass, *fn(c: rune) bool) = [
@@ -87,7 +86,6 @@ const charclass_fns: [](charclass, *fn(c: rune) bool) = [
(charclass::PUNCT, &ascii::ispunct),
(charclass::SPACE, &ascii::isspace),
(charclass::UPPER, &ascii::isupper),
- (charclass::WORD, &isword),
(charclass::XDIGIT, &ascii::isxdigit),
];
const multibyte_err: error = "Character ranges do not support characters larger than one byte.";
@@ -116,8 +114,6 @@ fn find_last_groupstart(insts: *[]inst) (size | error) = {
return `Encountered ")" token without matching "("`: error;
};
-fn isword(c: rune) bool = ascii::isalnum(c) || c == '_';
-
fn handle_bracket(
insts: *[]inst,
r: rune,