strings: add strings::iter - hare - The Hare programming language

commit 038037b0c83c3781fce3a184de3c148ad8cea1a4
parent 0806d8afc8ff8e22be9307098d432a499c67bfb2
Author: Drew DeVault <sir@cmpwn.com>
Date:   Mon,  1 Feb 2021 17:56:53 -0500

strings: add strings::iter

strings::iter is similar to encoding::utf8::decoder, but it relies on
the invariant that all Hare strings are valid UTF-8, and saves you the
trouble of handling the case of a partial or invalid UTF-8 sequence.

Diffstat:
M encoding/utf8/decode.ha  | 5 +++--
A strings/iter.ha  | 24 ++++++++++++++++++++++++

2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha
@@ -1,6 +1,7 @@
-use strings;
 use types;
 
+fn to_utf8(in: str) []u8 = *(&in: *[]u8);
+
 // The state for the UTF-8 decoder.
 export type decoder = struct {
 	offs: size,
@@ -11,7 +12,7 @@ export type decoder = struct {
 export fn decode(src: (str | []u8)) decoder = match (src) {
 	s: str => decoder {
 		offs = 0z,
-		src  = strings::to_utf8(s),
+		src  = to_utf8(s),
 	},
 	b: []u8 => decoder {
 		offs = 0z,
diff --git a/strings/iter.ha b/strings/iter.ha
@@ -0,0 +1,24 @@
+use encoding::utf8;
+
+// An iterator which yields each rune from a string.
+//
+// [iterator] is very similar to [encoding::utf8::decoder], and in fact is based
+// on it. However, an invariant of Hare is that a 'str' type always contains a
+// complete, valid UTF-8 string. [iterator] differs from
+// [encoding::utf8::decoder] in that it assumes that the string is valid, and
+// aborts the program otherwise. This reduces the number of error cases your
+// code has to handle when you know you're only dealing with valid strings.
+export type iterator = utf8::decoder;
+
+// Initializes a string iterator.
+export fn iter(src: str) iterator = utf8::decode(src);
+
+// Get the next rune from an iterator, or void if there are none left.
+export fn next(iter: *iterator) (rune | void) = {
+	return match (utf8::next(iter)) {
+		r: rune       => r,
+		void          => void,
+		utf8::more    => abort("Invalid UTF-8 string (this should not happen)"),
+		utf8::invalid => abort("Invalid UTF-8 string (this should not happen)"),
+	};
+};

	hare The Hare programming language
	git clone https://git.torresjrjr.com/hare.git
	Log \| Files \| Refs \| README \| LICENSE

M	encoding/utf8/decode.ha	\|	5	+++--
A	strings/iter.ha	\|	24	++++++++++++++++++++++++