hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 0173fccbb92e5cbed8bc8839a23b5946b8865031
parent 24f5278aeb1a92164a802b14ad2542e92c4cdb4a
Author: Sebastian <sebastian@sebsite.pw>
Date:   Mon, 28 Mar 2022 21:08:54 -0400

utf8sz: return void for invalid instead of SIZE_MAX

Signed-off-by: Sebastian <sebastian@sebsite.pw>

Diffstat:
Mbufio/scanner.ha | 6++++--
Mencoding/utf8/decode.ha | 21+++++++++++++--------
2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/bufio/scanner.ha b/bufio/scanner.ha @@ -64,8 +64,10 @@ export fn scanrune( return io::EOF; }; - const sz = utf8::utf8sz(b[0]); - if (sz == types::SIZE_MAX) { + const sz = match (utf8::utf8sz(b[0])) { + case let z: size => + yield z; + case void => return utf8::invalid; }; diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha @@ -35,10 +35,13 @@ export fn next(d: *decoder) (rune | void | more | invalid) = { }; // XXX: It would be faster if we decoded and measured at the same time. - const n = utf8sz(d.src[d.offs]); - if (n == types::SIZE_MAX) { + const n = match (utf8sz(d.src[d.offs])) { + case let z: size => + yield z; + case void => return invalid; - } else if (d.offs + n > len(d.src)) { + }; + if (d.offs + n > len(d.src)) { return more; }; let bytes = d.src[d.offs..d.offs+n]; @@ -84,10 +87,12 @@ export fn prev(d: *decoder) (rune | void | more | invalid) = { return more; }; d.offs -= n; - if (n != utf8sz(d.src[d.offs])) { + match (utf8sz(d.src[d.offs])) { + case let z: size => + return if (n == z) r: rune else invalid; + case void => return invalid; }; - return r: rune; }; @test fn decode() void = { @@ -150,12 +155,12 @@ export fn valid(src: (str | []u8)) bool = { abort(); }; -// Returns the expected length of a UTF-8 character in bytes. -export fn utf8sz(c: u8) size = { +// Returns the expected length of a UTF-8 codepoint in bytes given its first +// byte, or void if the given byte doesn't begin a valid UTF-8 sequence. +export fn utf8sz(c: u8) (size | void) = { for (let i = 0z; i < len(sizes); i += 1) { if (c & sizes[i].mask == sizes[i].result) { return sizes[i].octets; }; }; - return types::SIZE_MAX; };