commit e753d663de9a8fdd7333094545309e68f0c7c79f
parent 6cf28f9723ec32d5b4727499b6567704265ca5a6
Author: Byron Torres <b@torresjrjr.com>
Date: Sat, 11 Feb 2023 01:56:23 +0000
regex: introduce result type; fix and improve docs
New type result = []capture for better semantics; synonym for "match".
Repeated docs information has been reorganized into appropriate
docstrings, improving terseness. A better README example is used which
replaces unobvious prints with assert, and demonstrates capture groups.
Exported changes:
* [[free_captures]] -> [[result_free]]
* [[free_matches]] -> [[result_freeall]]
Signed-off-by: Byron Torres <b@torresjrjr.com>
Diffstat:
4 files changed, 62 insertions(+), 78 deletions(-)
diff --git a/cmd/haredoc/color.ha b/cmd/haredoc/color.ha
@@ -52,7 +52,7 @@ fn init_colors() void = {
defer regex::finish(&expr);
const matches = regex::findall(&expr, env_colors);
- defer regex::free_matches(matches);
+ defer regex::result_freeall(matches);
for (let i = 0z; i < len(matches); i += 1) :colors {
const (k, v) = (matches[i][1].content, matches[i][2].content);
diff --git a/regex/+test.ha b/regex/+test.ha
@@ -37,9 +37,9 @@ fn run_find_case(
};
defer finish(&re);
- const captures = find(&re, string);
- defer free_captures(captures);
- if (len(captures) == 0) {
+ const result = find(&re, string);
+ defer result_free(result);
+ if (len(result) == 0) {
if (expected == matchres::MATCH) {
fmt::errorfln("Expected expression /{}/ to match string \"{}\", but it did not",
expr, string)!;
@@ -52,14 +52,14 @@ fn run_find_case(
abort();
};
- if (start: size != captures[0].start) {
+ if (start: size != result[0].start) {
fmt::errorfln("Expected start of main capture to be {} but it was {}",
- start, captures[0].start)!;
+ start, result[0].start)!;
abort();
};
- if (end: size != captures[0].end) {
+ if (end: size != result[0].end) {
fmt::errorfln("Expected end of main capture to be {} but it was {}",
- end, captures[0].end)!;
+ end, result[0].end)!;
abort();
};
};
@@ -73,11 +73,11 @@ fn run_submatch_case(
const re = compile(expr)!;
defer finish(&re);
- const captures = find(&re, string);
- defer free_captures(captures);
- assert(len(captures) == len(targets), "Invalid number of captures");
+ const result = find(&re, string);
+ defer result_free(result);
+ assert(len(result) == len(targets), "Invalid number of captures");
for (let i = 0z; i < len(targets); i += 1) {
- assert(targets[i] == captures[i].content, "Invalid capture");
+ assert(targets[i] == result[i].content, "Invalid capture");
};
};
@@ -106,28 +106,28 @@ fn run_findall_case(
abort();
};
- const matches = findall(&re, string);
- if (len(matches) == 0 && expected == matchres::MATCH) {
+ const results = findall(&re, string);
+ if (len(results) == 0 && expected == matchres::MATCH) {
fmt::errorfln("Expected expression /{}/ to match string \"{}\", but it did not",
expr, string)!;
abort();
};
- defer free_matches(matches);
+ defer result_freeall(results);
if (expected == matchres::NOMATCH) {
fmt::errorfln("Expected expression /{}/ to not match string \"{}\", but it did",
expr, string)!;
abort();
};
- if (len(targets) != len(matches)) {
- fmt::errorfln("Expected expression /{}/ to find {} matches but found {}",
- expr, len(targets), len(matches))!;
+ if (len(targets) != len(results)) {
+ fmt::errorfln("Expected expression /{}/ to find {} results but found {}",
+ expr, len(targets), len(results))!;
abort();
};
- for (let i = 0z; i < len(matches); i += 1) {
- if (matches[i][0].content != targets[i]) {
+ for (let i = 0z; i < len(results); i += 1) {
+ if (results[i][0].content != targets[i]) {
fmt::errorfln("Expected submatch of expression /{}/ to be {} but it was {}",
- expr, targets[i], matches[i][0].content)!;
+ expr, targets[i], results[i][0].content)!;
abort();
};
};
diff --git a/regex/README b/regex/README
@@ -1,48 +1,21 @@
The regex module provides an implementation of regular expressions which adheres
-closely to the POSIX Extended Regular Expressions (ERE) specification[0]. This
-implementation computes matches in linear time.
+closely to the POSIX Extended Regular Expressions (ERE) specification.
-By default, matches will be found anywhere in the given string. The ^ and $
-characters can be used to anchor the match to the beginning or end of the
-string.
+See https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04
-find() returns a slice of [[capture]]s for the first match. The first
-[[capture]] represents the entire matching string, while the rest represent the
-matching substrings for the subexpressions, specified in the regular expression
-using parentheses.
+This module refers to a regular expression "match" as a [[result]]. The POSIX
+match disambiguation rules are used; the longest of the leftmost matches is
+returned. This implementation computes matches in linear time.
-findall() finds all non-overlapping matches in the given string and returns
-a slice of slices of [[capture]]s.
-
-This module implements the POSIX match disambiguation rules by returning
-the longest match among the leftmost matches.
-
- const re = regex::compile(`[Hh]are`)!;
+ const re = regex::compile(`[H|h]ar(e|riet)`)!;
defer regex::finish(&re);
- const does_match = regex::test(&re, "Hello Hare, hello Hare.");
- fmt::printfln("matched? {}", does_match)!;
-
- const captures = regex::find(&re, "Hello Hare, hello Hare.");
- if (len(captures) != 0) {
- defer regex::free_captures(captures);
- // captures[0]: The full matching string.
- // captures[1...]: A capture for every capture group.
- fmt::printfln("{} ({}, {})", captures[0].content,
- captures[0].start,
- captures[0].end)!;
- };
+ assert(regex::test(&re, "Let's all love Harriet and hare"));
- const matches = regex::findall(&re, "Hello Hare, hello Hare.");
- defer regex::free_matches(matches);
- // matches[0]: All captures for the first match.
- // matches[0][0]: The full matching string for the first match.
- // matches[0][1...]: A capture for every capture group in the
- // first match.
- for (let i = 0z; i < len(matches); i += 1) {
- fmt::printfln("{} ({}, {})", matches[i][0].content,
- matches[i][0].start,
- matches[i][0].end)!;
- };
+ // {"Harriet", "riet"}
+ const result = regex::find(&re, "Let's all love Harriet and hare");
+ defer regex::result_free(result);
-[0]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04
+ // {{"Harriet", "riet"}, {"hare", "e"}}
+ const results = regex::findall(&re, "Let's all love Harriet and hare");
+ defer regex::result_freeall(results);
diff --git a/regex/regex.ha b/regex/regex.ha
@@ -6,7 +6,7 @@ use errors;
use strconv;
use strings;
-// A string describing the error the occurred.
+// An error string describing a compilation error.
export type error = !str;
export type inst_lit = rune,
@@ -30,7 +30,16 @@ export type inst = (inst_lit | inst_any | inst_split | inst_jump |
inst_groupstart | inst_groupend |
inst_repeat);
-// A (sub)match found as a result of matching a certain string against a regex.
+// The resulting match of a [[regex]] applied to a string.
+//
+// The first [[capture]] corresponds to the implicit zeroth capture group,
+// i.e. the whole expression.
+//
+// The rest of the [[capture]]s correspond to the rest of the capture groups,
+// i.e. the sub-expressions.
+export type result = []capture;
+
+// A (sub)match corresponding to a regular expression's capture group.
export type capture = struct {
content: str,
start: size,
@@ -85,7 +94,7 @@ export type regex = struct {
n_reps: size,
};
-// Frees the memory used by a regex.
+// Frees resources associated with a [[regex]].
export fn finish(re: *regex) void = {
free(re.insts);
for (let i = 0z; i < len(re.charsets); i += 1) {
@@ -203,7 +212,7 @@ fn handle_bracket(
*r_idx += 1;
};
-// Compiles a string containing a regular expression into a regex struct.
+// Compiles a regular expression string into a [[regex]].
export fn compile(expr: str) (regex | error) = {
let insts: []inst = [];
let charsets: []charset = [];
@@ -759,7 +768,7 @@ fn search(
return void;
};
-// Returns whether or not a regex matches a string.
+// Returns whether or not a [[regex]] matches any part of a given string.
export fn test(re: *regex, string: str) bool = {
let str_idx = -1;
let str_iter = strings::iter(string);
@@ -771,9 +780,10 @@ export fn test(re: *regex, string: str) bool = {
};
-// Attempts to match a regular expression against a string and returns the
-// longest leftmost match, or void if there is no match.
-export fn find(re: *regex, string: str) []capture = {
+// Attempts to match a [[regex]] against a string and returns the longest
+// leftmost match as a [[result]]. The caller must free the return value with
+// [[result_free]].
+export fn find(re: *regex, string: str) result = {
let str_idx = -1;
let str_iter = strings::iter(string);
let str_bytesize = 0z;
@@ -785,9 +795,10 @@ export fn find(re: *regex, string: str) []capture = {
};
};
-// Attempts to match a regular expression against a string and returns all
-// non-overlapping matches, or void if there are no matches.
-export fn findall(re: *regex, string: str) [][]capture = {
+// Attempts to match a [[regex]] against a string and returns all
+// non-overlapping matches as a slice of [[result]]s. The caller must free the
+// return value with [[result_freeall]].
+export fn findall(re: *regex, string: str) []result = {
let res: [][]capture = [];
let str_idx = -1;
let str_iter = strings::iter(string);
@@ -812,18 +823,18 @@ export fn findall(re: *regex, string: str) [][]capture = {
return res;
};
-// Frees a slice of captures.
-export fn free_captures(s: []capture) void = {
+// Frees a [[result]].
+export fn result_free(s: result) void = {
free(s);
};
-// Frees each match in a slice of matches, as well as the slice itself.
-export fn free_matches(s: [][]capture) void = {
+// Frees a slice of [[result]]s.
+export fn result_freeall(s: []result) void = {
for (let i = 0z; i < len(s); i += 1) {
- free_captures(s[i]);
+ result_free(s[i]);
};
free(s);
};
-// Converts regex [[error]] into a user-friendly string.
+// Converts an [[error]] into a user-friendly string.
export fn strerror(err: error) str = err;