encode.ha (1107B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // Encodes a rune as UTF-8 and returns the result as a slice. The return value 5 // is statically allocated, and will not be consistent after subsequent calls to 6 // encoderune. 7 export fn encoderune(r: rune) []u8 = { 8 let ch = r: u32, n = 0z, first = 0u8; 9 assert((ch < 0xD800 || ch > 0xDFFF) && ch <= 0x10FFFF, 10 "the rune is not a valid Unicode codepoint"); 11 12 if (ch < 0x80) { 13 first = 0; 14 n = 1; 15 } else if (ch < 0x800) { 16 first = 0xC0; 17 n = 2; 18 } else if (ch < 0x10000) { 19 first = 0xE0; 20 n = 3; 21 } else { 22 first = 0xF0; 23 n = 4; 24 }; 25 26 static let buf: [4]u8 = [0...]; 27 for (let i = n - 1; i > 0; i -= 1) { 28 buf[i] = ch: u8 & 0x3F | 0x80; 29 ch >>= 6; 30 }; 31 buf[0] = ch: u8 | first; 32 return buf[..n]; 33 }; 34 35 @test fn encode() void = { 36 const expected: [_][]u8 = [ 37 [0], 38 [0x25], 39 [0xE3, 0x81, 0x93], 40 ]; 41 const inputs = ['\0', '%', 'こ']; 42 for (let i = 0z; i < len(inputs); i += 1) { 43 const out = encoderune(inputs[i]); 44 for (let j = 0z; j < len(expected[i]); j += 1) { 45 assert(out[j] == expected[i][j]); 46 }; 47 }; 48 };