hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

encode.ha (1107B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // Encodes a rune as UTF-8 and returns the result as a slice. The return value
      5 // is statically allocated, and will not be consistent after subsequent calls to
      6 // encoderune.
      7 export fn encoderune(r: rune) []u8 = {
      8 	let ch = r: u32, n = 0z, first = 0u8;
      9 	assert((ch < 0xD800 || ch > 0xDFFF) && ch <= 0x10FFFF,
     10 		"the rune is not a valid Unicode codepoint");
     11 
     12 	if (ch < 0x80) {
     13 		first = 0;
     14 		n = 1;
     15 	} else if (ch < 0x800) {
     16 		first = 0xC0;
     17 		n = 2;
     18 	} else if (ch < 0x10000) {
     19 		first = 0xE0;
     20 		n = 3;
     21 	} else {
     22 		first = 0xF0;
     23 		n = 4;
     24 	};
     25 
     26 	static let buf: [4]u8 = [0...];
     27 	for (let i = n - 1; i > 0; i -= 1) {
     28 		buf[i] = ch: u8 & 0x3F | 0x80;
     29 		ch >>= 6;
     30 	};
     31 	buf[0] = ch: u8 | first;
     32 	return buf[..n];
     33 };
     34 
     35 @test fn encode() void = {
     36 	const expected: [_][]u8 = [
     37 		[0],
     38 		[0x25],
     39 		[0xE3, 0x81, 0x93],
     40 	];
     41 	const inputs = ['\0', '%', 'こ'];
     42 	for (let i = 0z; i < len(inputs); i += 1) {
     43 		const out = encoderune(inputs[i]);
     44 		for (let j = 0z; j < len(expected[i]); j += 1) {
     45 			assert(out[j] == expected[i][j]);
     46 		};
     47 	};
     48 };