commit 0806d8afc8ff8e22be9307098d432a499c67bfb2
parent 5c5cfb36e47cdaf80a3c292a32f6b9e9d9e48ab9
Author: Drew DeVault <sir@cmpwn.com>
Date: Mon, 1 Feb 2021 17:39:31 -0500
encoding::utf8: various improvements
Diffstat:
2 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/encoding/utf8/decode.ha b/encoding/utf8/decode.ha
@@ -44,30 +44,30 @@ export fn next(d: *decoder) (rune | void | more | invalid) = {
let bytes = d.src[d.offs..d.offs+n];
d.offs += n;
- let cp = 0u32;
+ let r = 0u32;
if (bytes[0] < 128u8) {
// ASCII
return bytes[0]: u32: rune;
};
const mask = masks[n - 1z];
- cp = bytes[0] & mask;
+ r = bytes[0] & mask;
for (let i = 1z; i < len(bytes); i += 1z) {
- cp <<= 6u8;
- cp |= bytes[i] & 0x3Fu8;
+ r <<= 6u8;
+ r |= bytes[i] & 0x3Fu8;
};
- return cp: rune;
+ return r: rune;
};
fn utf8sz(src: []u8) size = {
- assert(sizes[0].octets == 1z);
-
assert(len(src) > 0z);
+
let c = src[0];
for (let i = 0z; i < len(sizes); i += 1z) {
if (c & sizes[i].mask == sizes[i].result) {
return sizes[i].octets;
};
};
+
return types::SIZE_MAX;
};
diff --git a/encoding/utf8/rune.ha b/encoding/utf8/rune.ha
@@ -15,7 +15,6 @@ const sizes: [_]rsize = [
rsize { mask = 0xF8u8, result = 0xF0u8, octets = 4z },
rsize { mask = 0xFCu8, result = 0xF8u8, octets = 5z },
rsize { mask = 0xFEu8, result = 0xF8u8, octets = 6z },
- rsize { mask = 0x80u8, result = 0x80u8, octets = types::SIZE_MAX },
];
// Returns the size of a rune, in octets, when encoded as UTF-8.