commit ccd1a08ab5cd081145da1da3dc22eb3c8ecd1f49
parent 3bb391b7fd0cd073f9fdd1858a58a6254938a911
Author: Alexey Yerin <yyp@disroot.org>
Date: Sat, 30 Dec 2023 23:28:34 +0300
encoding::utf8: Return invalid from utf8sz on invalid starting byte
Signed-off-by: Alexey Yerin <yyp@disroot.org>
Diffstat:
2 files changed, 5 insertions(+), 14 deletions(-)
diff --git a/bufio/scanner.ha b/bufio/scanner.ha
@@ -206,12 +206,7 @@ export fn scan_rune(
yield;
};
};
- const sz = match (utf8::utf8sz(scan.buffer[0])) {
- case let z: size =>
- yield z;
- case void =>
- return utf8::invalid;
- };
+ const sz = utf8::utf8sz(scan.buffer[0])?;
for (scan.pending < sz) {
match (scan_readahead(scan)?) {
@@ -337,12 +332,7 @@ export fn read_rune(
return io::EOF;
};
- const sz = match (utf8::utf8sz(b[0])) {
- case let z: size =>
- yield z;
- case void =>
- return utf8::invalid;
- };
+ const sz = utf8::utf8sz(b[0])?;
if (sz == 1) {
return b[0]: rune;
diff --git a/encoding/utf8/rune.ha b/encoding/utf8/rune.ha
@@ -24,11 +24,12 @@ export fn runesz(r: rune) size = {
};
// Returns the expected length of a UTF-8 codepoint in bytes given its first
-// byte, or void if the given byte doesn't begin a valid UTF-8 sequence.
-export fn utf8sz(c: u8) (size | void) = {
+// byte, or [[invalid]] if the given byte doesn't begin a valid UTF-8 sequence.
+export fn utf8sz(c: u8) (size | invalid) = {
for (let i = 0z; i < len(sizes); i += 1) {
if (c & sizes[i].mask == sizes[i].result) {
return sizes[i].octets;
};
};
+ return invalid;
};