rune.ha (938B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 type rsize = struct { 5 mask: u8, 6 result: u8, 7 octets: size, 8 }; 9 10 const sizes: [_]rsize = [ 11 rsize { mask = 0x80, result = 0x00, octets = 1 }, 12 rsize { mask = 0xE0, result = 0xC0, octets = 2 }, 13 rsize { mask = 0xF0, result = 0xE0, octets = 3 }, 14 rsize { mask = 0xF8, result = 0xF0, octets = 4 }, 15 ]; 16 17 // Returns the size of a rune, in octets, when encoded as UTF-8. 18 export fn runesz(r: rune) size = { 19 const ch = r: u32; 20 return if (ch < 0x80) 1 21 else if (ch < 0x800) 2 22 else if (ch < 0x10000) 3 23 else 4; 24 }; 25 26 // Returns the expected length of a UTF-8 codepoint in bytes given its first 27 // byte, or [[invalid]] if the given byte doesn't begin a valid UTF-8 sequence. 28 export fn utf8sz(c: u8) (size | invalid) = { 29 for (let i = 0z; i < len(sizes); i += 1) { 30 if (c & sizes[i].mask == sizes[i].result) { 31 return sizes[i].octets; 32 }; 33 }; 34 return invalid; 35 };