commit 0173fccbb92e5cbed8bc8839a23b5946b8865031
parent 24f5278aeb1a92164a802b14ad2542e92c4cdb4a
Author: Sebastian <sebastian@sebsite.pw>
Date: Mon, 28 Mar 2022 21:08:54 -0400
utf8sz: return void for invalid instead of SIZE_MAX
Signed-off-by: Sebastian <sebastian@sebsite.pw>
Diffstat:
2 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/bufio/scanner.ha b/bufio/scanner.ha
@@ -64,8 +64,10 @@ export fn scanrune(
return io::EOF;
};
- const sz = utf8::utf8sz(b[0]);
- if (sz == types::SIZE_MAX) {
+ const sz = match (utf8::utf8sz(b[0])) {
+ case let z: size =>
+ yield z;
+ case void =>
return utf8::invalid;
};
diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha
@@ -35,10 +35,13 @@ export fn next(d: *decoder) (rune | void | more | invalid) = {
};
// XXX: It would be faster if we decoded and measured at the same time.
- const n = utf8sz(d.src[d.offs]);
- if (n == types::SIZE_MAX) {
+ const n = match (utf8sz(d.src[d.offs])) {
+ case let z: size =>
+ yield z;
+ case void =>
return invalid;
- } else if (d.offs + n > len(d.src)) {
+ };
+ if (d.offs + n > len(d.src)) {
return more;
};
let bytes = d.src[d.offs..d.offs+n];
@@ -84,10 +87,12 @@ export fn prev(d: *decoder) (rune | void | more | invalid) = {
return more;
};
d.offs -= n;
- if (n != utf8sz(d.src[d.offs])) {
+ match (utf8sz(d.src[d.offs])) {
+ case let z: size =>
+ return if (n == z) r: rune else invalid;
+ case void =>
return invalid;
};
- return r: rune;
};
@test fn decode() void = {
@@ -150,12 +155,12 @@ export fn valid(src: (str | []u8)) bool = {
abort();
};
-// Returns the expected length of a UTF-8 character in bytes.
-export fn utf8sz(c: u8) size = {
+// Returns the expected length of a UTF-8 codepoint in bytes given its first
+// byte, or void if the given byte doesn't begin a valid UTF-8 sequence.
+export fn utf8sz(c: u8) (size | void) = {
for (let i = 0z; i < len(sizes); i += 1) {
if (c & sizes[i].mask == sizes[i].result) {
return sizes[i].octets;
};
};
- return types::SIZE_MAX;
};