hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit e753d663de9a8fdd7333094545309e68f0c7c79f
parent 6cf28f9723ec32d5b4727499b6567704265ca5a6
Author: Byron Torres <b@torresjrjr.com>
Date:   Sat, 11 Feb 2023 01:56:23 +0000

regex: introduce result type; fix and improve docs

New  type result = []capture  for better semantics; synonym for "match".

Repeated docs information has been reorganized into appropriate
docstrings, improving terseness. A better README example is used which
replaces unobvious prints with assert, and demonstrates capture groups.

Exported changes:

* [[free_captures]] -> [[result_free]]
* [[free_matches]]  -> [[result_freeall]]

Signed-off-by: Byron Torres <b@torresjrjr.com>

Diffstat:
Mcmd/haredoc/color.ha | 2+-
Mregex/+test.ha | 40++++++++++++++++++++--------------------
Mregex/README | 53+++++++++++++----------------------------------------
Mregex/regex.ha | 45++++++++++++++++++++++++++++-----------------
4 files changed, 62 insertions(+), 78 deletions(-)

diff --git a/cmd/haredoc/color.ha b/cmd/haredoc/color.ha @@ -52,7 +52,7 @@ fn init_colors() void = { defer regex::finish(&expr); const matches = regex::findall(&expr, env_colors); - defer regex::free_matches(matches); + defer regex::result_freeall(matches); for (let i = 0z; i < len(matches); i += 1) :colors { const (k, v) = (matches[i][1].content, matches[i][2].content); diff --git a/regex/+test.ha b/regex/+test.ha @@ -37,9 +37,9 @@ fn run_find_case( }; defer finish(&re); - const captures = find(&re, string); - defer free_captures(captures); - if (len(captures) == 0) { + const result = find(&re, string); + defer result_free(result); + if (len(result) == 0) { if (expected == matchres::MATCH) { fmt::errorfln("Expected expression /{}/ to match string \"{}\", but it did not", expr, string)!; @@ -52,14 +52,14 @@ fn run_find_case( abort(); }; - if (start: size != captures[0].start) { + if (start: size != result[0].start) { fmt::errorfln("Expected start of main capture to be {} but it was {}", - start, captures[0].start)!; + start, result[0].start)!; abort(); }; - if (end: size != captures[0].end) { + if (end: size != result[0].end) { fmt::errorfln("Expected end of main capture to be {} but it was {}", - end, captures[0].end)!; + end, result[0].end)!; abort(); }; }; @@ -73,11 +73,11 @@ fn run_submatch_case( const re = compile(expr)!; defer finish(&re); - const captures = find(&re, string); - defer free_captures(captures); - assert(len(captures) == len(targets), "Invalid number of captures"); + const result = find(&re, string); + defer result_free(result); + assert(len(result) == len(targets), "Invalid number of captures"); for (let i = 0z; i < len(targets); i += 1) { - assert(targets[i] == captures[i].content, "Invalid capture"); + assert(targets[i] == result[i].content, "Invalid capture"); }; }; @@ -106,28 +106,28 @@ fn run_findall_case( abort(); }; - const matches = findall(&re, string); - if (len(matches) == 0 && expected == matchres::MATCH) { + const results = findall(&re, string); + if (len(results) == 0 && expected == matchres::MATCH) { fmt::errorfln("Expected expression /{}/ to match string \"{}\", but it did not", expr, string)!; abort(); }; - defer free_matches(matches); + defer result_freeall(results); if (expected == matchres::NOMATCH) { fmt::errorfln("Expected expression /{}/ to not match string \"{}\", but it did", expr, string)!; abort(); }; - if (len(targets) != len(matches)) { - fmt::errorfln("Expected expression /{}/ to find {} matches but found {}", - expr, len(targets), len(matches))!; + if (len(targets) != len(results)) { + fmt::errorfln("Expected expression /{}/ to find {} results but found {}", + expr, len(targets), len(results))!; abort(); }; - for (let i = 0z; i < len(matches); i += 1) { - if (matches[i][0].content != targets[i]) { + for (let i = 0z; i < len(results); i += 1) { + if (results[i][0].content != targets[i]) { fmt::errorfln("Expected submatch of expression /{}/ to be {} but it was {}", - expr, targets[i], matches[i][0].content)!; + expr, targets[i], results[i][0].content)!; abort(); }; }; diff --git a/regex/README b/regex/README @@ -1,48 +1,21 @@ The regex module provides an implementation of regular expressions which adheres -closely to the POSIX Extended Regular Expressions (ERE) specification[0]. This -implementation computes matches in linear time. +closely to the POSIX Extended Regular Expressions (ERE) specification. -By default, matches will be found anywhere in the given string. The ^ and $ -characters can be used to anchor the match to the beginning or end of the -string. +See https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04 -find() returns a slice of [[capture]]s for the first match. The first -[[capture]] represents the entire matching string, while the rest represent the -matching substrings for the subexpressions, specified in the regular expression -using parentheses. +This module refers to a regular expression "match" as a [[result]]. The POSIX +match disambiguation rules are used; the longest of the leftmost matches is +returned. This implementation computes matches in linear time. -findall() finds all non-overlapping matches in the given string and returns -a slice of slices of [[capture]]s. - -This module implements the POSIX match disambiguation rules by returning -the longest match among the leftmost matches. - - const re = regex::compile(`[Hh]are`)!; + const re = regex::compile(`[H|h]ar(e|riet)`)!; defer regex::finish(&re); - const does_match = regex::test(&re, "Hello Hare, hello Hare."); - fmt::printfln("matched? {}", does_match)!; - - const captures = regex::find(&re, "Hello Hare, hello Hare."); - if (len(captures) != 0) { - defer regex::free_captures(captures); - // captures[0]: The full matching string. - // captures[1...]: A capture for every capture group. - fmt::printfln("{} ({}, {})", captures[0].content, - captures[0].start, - captures[0].end)!; - }; + assert(regex::test(&re, "Let's all love Harriet and hare")); - const matches = regex::findall(&re, "Hello Hare, hello Hare."); - defer regex::free_matches(matches); - // matches[0]: All captures for the first match. - // matches[0][0]: The full matching string for the first match. - // matches[0][1...]: A capture for every capture group in the - // first match. - for (let i = 0z; i < len(matches); i += 1) { - fmt::printfln("{} ({}, {})", matches[i][0].content, - matches[i][0].start, - matches[i][0].end)!; - }; + // {"Harriet", "riet"} + const result = regex::find(&re, "Let's all love Harriet and hare"); + defer regex::result_free(result); -[0]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04 + // {{"Harriet", "riet"}, {"hare", "e"}} + const results = regex::findall(&re, "Let's all love Harriet and hare"); + defer regex::result_freeall(results); diff --git a/regex/regex.ha b/regex/regex.ha @@ -6,7 +6,7 @@ use errors; use strconv; use strings; -// A string describing the error the occurred. +// An error string describing a compilation error. export type error = !str; export type inst_lit = rune, @@ -30,7 +30,16 @@ export type inst = (inst_lit | inst_any | inst_split | inst_jump | inst_groupstart | inst_groupend | inst_repeat); -// A (sub)match found as a result of matching a certain string against a regex. +// The resulting match of a [[regex]] applied to a string. +// +// The first [[capture]] corresponds to the implicit zeroth capture group, +// i.e. the whole expression. +// +// The rest of the [[capture]]s correspond to the rest of the capture groups, +// i.e. the sub-expressions. +export type result = []capture; + +// A (sub)match corresponding to a regular expression's capture group. export type capture = struct { content: str, start: size, @@ -85,7 +94,7 @@ export type regex = struct { n_reps: size, }; -// Frees the memory used by a regex. +// Frees resources associated with a [[regex]]. export fn finish(re: *regex) void = { free(re.insts); for (let i = 0z; i < len(re.charsets); i += 1) { @@ -203,7 +212,7 @@ fn handle_bracket( *r_idx += 1; }; -// Compiles a string containing a regular expression into a regex struct. +// Compiles a regular expression string into a [[regex]]. export fn compile(expr: str) (regex | error) = { let insts: []inst = []; let charsets: []charset = []; @@ -759,7 +768,7 @@ fn search( return void; }; -// Returns whether or not a regex matches a string. +// Returns whether or not a [[regex]] matches any part of a given string. export fn test(re: *regex, string: str) bool = { let str_idx = -1; let str_iter = strings::iter(string); @@ -771,9 +780,10 @@ export fn test(re: *regex, string: str) bool = { }; -// Attempts to match a regular expression against a string and returns the -// longest leftmost match, or void if there is no match. -export fn find(re: *regex, string: str) []capture = { +// Attempts to match a [[regex]] against a string and returns the longest +// leftmost match as a [[result]]. The caller must free the return value with +// [[result_free]]. +export fn find(re: *regex, string: str) result = { let str_idx = -1; let str_iter = strings::iter(string); let str_bytesize = 0z; @@ -785,9 +795,10 @@ export fn find(re: *regex, string: str) []capture = { }; }; -// Attempts to match a regular expression against a string and returns all -// non-overlapping matches, or void if there are no matches. -export fn findall(re: *regex, string: str) [][]capture = { +// Attempts to match a [[regex]] against a string and returns all +// non-overlapping matches as a slice of [[result]]s. The caller must free the +// return value with [[result_freeall]]. +export fn findall(re: *regex, string: str) []result = { let res: [][]capture = []; let str_idx = -1; let str_iter = strings::iter(string); @@ -812,18 +823,18 @@ export fn findall(re: *regex, string: str) [][]capture = { return res; }; -// Frees a slice of captures. -export fn free_captures(s: []capture) void = { +// Frees a [[result]]. +export fn result_free(s: result) void = { free(s); }; -// Frees each match in a slice of matches, as well as the slice itself. -export fn free_matches(s: [][]capture) void = { +// Frees a slice of [[result]]s. +export fn result_freeall(s: []result) void = { for (let i = 0z; i < len(s); i += 1) { - free_captures(s[i]); + result_free(s[i]); }; free(s); }; -// Converts regex [[error]] into a user-friendly string. +// Converts an [[error]] into a user-friendly string. export fn strerror(err: error) str = err;