hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

encode.ha (1733B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // The value of this rune is not a valid Unicode codepoint.
      5 export type invalidcodepoint = !rune;
      6 
      7 fn isvalidcodepoint(cp: u32) bool = {
      8 	return (cp < 0xD800 || cp > 0xDFFF) && cp <= 0x10FFFF;
      9 };
     10 
     11 // Encodes a rune as UTF-8 and returns the result as a slice. The return value
     12 // is statically allocated, and will not be consistent after subsequent calls to
     13 // encoderune.
     14 export fn encoderune(r: rune) ([]u8 | invalidcodepoint) = {
     15 	let ch = r: u32, n = 0z, first = 0u8;
     16 	if (!isvalidcodepoint(ch)) {
     17 		return r: invalidcodepoint;
     18 	};
     19 
     20 	if (ch < 0x80) {
     21 		first = 0;
     22 		n = 1;
     23 	} else if (ch < 0x800) {
     24 		first = 0xC0;
     25 		n = 2;
     26 	} else if (ch < 0x10000) {
     27 		first = 0xE0;
     28 		n = 3;
     29 	} else {
     30 		first = 0xF0;
     31 		n = 4;
     32 	};
     33 
     34 	static let buf: [4]u8 = [0...];
     35 	for (let i = n - 1; i > 0; i -= 1) {
     36 		buf[i] = ch: u8 & 0x3F | 0x80;
     37 		ch >>= 6;
     38 	};
     39 	buf[0] = ch: u8 | first;
     40 	return buf[..n];
     41 };
     42 
     43 @test fn encode() void = {
     44 	const testcases: [](rune, bool, []u8) = [
     45 	//	input rune
     46 	//	|       expects error
     47 	//	|       |      expected encoding
     48 		('\0',
     49 			false, [0]),
     50 		('%',
     51 			false, [0x25]),
     52 		('こ',
     53 			false, [0xE3, 0x81, 0x93]),
     54 		(0xD800: rune,
     55 			true, []),
     56 		(0xDF00: rune,
     57 			true, []),
     58 		(0x110000: rune,
     59 			true, []),
     60 	];
     61 
     62 	for (let i = 0z; i < len(testcases); i += 1) {
     63 		const tc    = testcases[i];
     64 		const input = tc.0;
     65 		const want  = tc.2;
     66 
     67 		match(encoderune(input)) {
     68 		case invalidcodepoint =>
     69 			assert(tc.1, "want []u8, got invalidcodepoint");
     70 		case let got: []u8 =>
     71 			assert(!tc.1, "want invalidcodepoint, got []u8");
     72 			for (let j = 0z; j < len(want); j += 1) {
     73 				assert(got[j] == want[j],
     74 					"[]u8 mismatch");
     75 			};
     76 		};
     77 	};
     78 };