hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 038037b0c83c3781fce3a184de3c148ad8cea1a4
parent 0806d8afc8ff8e22be9307098d432a499c67bfb2
Author: Drew DeVault <sir@cmpwn.com>
Date:   Mon,  1 Feb 2021 17:56:53 -0500

strings: add strings::iter

strings::iter is similar to encoding::utf8::decoder, but it relies on
the invariant that all Hare strings are valid UTF-8, and saves you the
trouble of handling the case of a partial or invalid UTF-8 sequence.

Diffstat:
Mencoding/utf8/decode.ha | 5+++--
Astrings/iter.ha | 24++++++++++++++++++++++++
2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha @@ -1,6 +1,7 @@ -use strings; use types; +fn to_utf8(in: str) []u8 = *(&in: *[]u8); + // The state for the UTF-8 decoder. export type decoder = struct { offs: size, @@ -11,7 +12,7 @@ export type decoder = struct { export fn decode(src: (str | []u8)) decoder = match (src) { s: str => decoder { offs = 0z, - src = strings::to_utf8(s), + src = to_utf8(s), }, b: []u8 => decoder { offs = 0z, diff --git a/strings/iter.ha b/strings/iter.ha @@ -0,0 +1,24 @@ +use encoding::utf8; + +// An iterator which yields each rune from a string. +// +// [iterator] is very similar to [encoding::utf8::decoder], and in fact is based +// on it. However, an invariant of Hare is that a 'str' type always contains a +// complete, valid UTF-8 string. [iterator] differs from +// [encoding::utf8::decoder] in that it assumes that the string is valid, and +// aborts the program otherwise. This reduces the number of error cases your +// code has to handle when you know you're only dealing with valid strings. +export type iterator = utf8::decoder; + +// Initializes a string iterator. +export fn iter(src: str) iterator = utf8::decode(src); + +// Get the next rune from an iterator, or void if there are none left. +export fn next(iter: *iterator) (rune | void) = { + return match (utf8::next(iter)) { + r: rune => r, + void => void, + utf8::more => abort("Invalid UTF-8 string (this should not happen)"), + utf8::invalid => abort("Invalid UTF-8 string (this should not happen)"), + }; +};