hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 5d1151fafce314ddee1a2114072c25d12313a4f8
parent 9ea3ca7a283facd1ff7d29860920d8fc803b25a8
Author: Sebastian <sebastian@sebsite.pw>
Date:   Wed, 13 Apr 2022 20:33:24 -0400

regex: remove WORD charclass

This isn't specified by POSIX. Its functionality is equivalent the ALNUM
charclass plus '_' (underscore).

Signed-off-by: Sebastian <sebastian@sebsite.pw>

Diffstat:
Mregex/+test.ha | 2--
Mregex/regex.ha | 6+-----
2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/regex/+test.ha b/regex/+test.ha @@ -286,8 +286,6 @@ fn run_findall_case( (`^test[[:space:]]+$`, "test ", matchres::MATCH, 0, -1), (`^test[[:upper:]]+$`, "testa", matchres::NOMATCH, 0, -1), (`^test[[:upper:]]+$`, "testA", matchres::MATCH, 0, -1), - (`^test[[:word:]]+$`, "test!2", matchres::NOMATCH, 0, -1), - (`^test[[:word:]]+$`, "test_2", matchres::MATCH, 0, -1), (`^test[[:xdigit:]]+$`, "testCAFE", matchres::MATCH, 0, -1), // [:alpha:] etc. plus extra characters (`^test[[:digit:]][[:alpha:]]$`, "test1a", matchres::MATCH, 0, -1), diff --git a/regex/regex.ha b/regex/regex.ha @@ -53,7 +53,7 @@ type newmatch = void; export type charclass = enum { ALNUM, ALPHA, BLANK, CNTRL, DIGIT, GRAPH, LOWER, PRINT, PUNCT, SPACE, - UPPER, WORD, XDIGIT, + UPPER, XDIGIT, }; export type charset = [](charset_lit_item | charset_range_item | charset_class_item), @@ -72,7 +72,6 @@ const charclass_names: [](charclass, str) = [ (charclass::PUNCT, ":punct:]"), (charclass::SPACE, ":space:]"), (charclass::UPPER, ":upper:]"), - (charclass::WORD, ":word:]"), (charclass::XDIGIT, ":xdigit:]"), ]; const charclass_fns: [](charclass, *fn(c: rune) bool) = [ @@ -87,7 +86,6 @@ const charclass_fns: [](charclass, *fn(c: rune) bool) = [ (charclass::PUNCT, &ascii::ispunct), (charclass::SPACE, &ascii::isspace), (charclass::UPPER, &ascii::isupper), - (charclass::WORD, &isword), (charclass::XDIGIT, &ascii::isxdigit), ]; const multibyte_err: error = "Character ranges do not support characters larger than one byte."; @@ -116,8 +114,6 @@ fn find_last_groupstart(insts: *[]inst) (size | error) = { return `Encountered ")" token without matching "("`: error; }; -fn isword(c: rune) bool = ascii::isalnum(c) || c == '_'; - fn handle_bracket( insts: *[]inst, r: rune,