commit 038037b0c83c3781fce3a184de3c148ad8cea1a4
parent 0806d8afc8ff8e22be9307098d432a499c67bfb2
Author: Drew DeVault <sir@cmpwn.com>
Date: Mon, 1 Feb 2021 17:56:53 -0500
strings: add strings::iter
strings::iter is similar to encoding::utf8::decoder, but it relies on
the invariant that all Hare strings are valid UTF-8, and saves you the
trouble of handling the case of a partial or invalid UTF-8 sequence.
Diffstat:
2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha
@@ -1,6 +1,7 @@
-use strings;
use types;
+fn to_utf8(in: str) []u8 = *(&in: *[]u8);
+
// The state for the UTF-8 decoder.
export type decoder = struct {
offs: size,
@@ -11,7 +12,7 @@ export type decoder = struct {
export fn decode(src: (str | []u8)) decoder = match (src) {
s: str => decoder {
offs = 0z,
- src = strings::to_utf8(s),
+ src = to_utf8(s),
},
b: []u8 => decoder {
offs = 0z,
diff --git a/strings/iter.ha b/strings/iter.ha
@@ -0,0 +1,24 @@
+use encoding::utf8;
+
+// An iterator which yields each rune from a string.
+//
+// [iterator] is very similar to [encoding::utf8::decoder], and in fact is based
+// on it. However, an invariant of Hare is that a 'str' type always contains a
+// complete, valid UTF-8 string. [iterator] differs from
+// [encoding::utf8::decoder] in that it assumes that the string is valid, and
+// aborts the program otherwise. This reduces the number of error cases your
+// code has to handle when you know you're only dealing with valid strings.
+export type iterator = utf8::decoder;
+
+// Initializes a string iterator.
+export fn iter(src: str) iterator = utf8::decode(src);
+
+// Get the next rune from an iterator, or void if there are none left.
+export fn next(iter: *iterator) (rune | void) = {
+ return match (utf8::next(iter)) {
+ r: rune => r,
+ void => void,
+ utf8::more => abort("Invalid UTF-8 string (this should not happen)"),
+ utf8::invalid => abort("Invalid UTF-8 string (this should not happen)"),
+ };
+};