hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

t61.ha (10116B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // https://en.wikipedia.org/wiki/ITU_T.61
      5 const t61toascii: [_]u8 = [
      6 //	 0	 1	 2	 3	 4	 5	 6	 7
      7 //	 8	 9	 a	 b	 c	 d	 e	 f
      8 	0,	0,	0,	0,	0,	0,	0,	0,	// 0
      9 	0,	0,	0x0a,	0,	0x0c,	0x0d,	0,	0,	// 0
     10 	0,	0,	0,	0,	0,	0,	0,	0,	// 10
     11 	0,	0,	0x1a,	0x1b,	0,	0,	0,	0,	// 10
     12 	0x20,	0x21,	0x22,	0,	0,	0x25,	0x26,	0x27,	// 20
     13 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,	// 20
     14 	0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,	// 30
     15 	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f,	// 30
     16 	0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,	// 40
     17 	0x48,	0x49,	0x4a,	0x4b,	0x4c,	0x4d,	0x4e,	0x4f,	// 40
     18 	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,	// 50
     19 	0x58,	0x59,	0x5a,	0x5b,	0,	0x5d,	0,	0x5f,	// 50
     20 	0,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,	// 60
     21 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,	// 60
     22 	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,	// 70
     23 	0x78,	0x79,	0x7a,	0,	0x7c,	0,	0,	0,	// 70
     24 ];
     25 
     26 const t61toutf8: [_]rune = [
     27 	// 0x80
     28 	'\u0000', '\u0000', '\u0000', '\u0000',
     29 	'\u0000', '\u0000', '\u0000', '\u0000',
     30 	'\u0000', '\u0000', '\u0000', '\u008b',
     31 	'\u008c', '\u0000', '\u0000', '\u0000',
     32 
     33 	// 0x90
     34 	'\u0000', '\u0000', '\u0000', '\u0000',
     35 	'\u0000', '\u0000', '\u0000', '\u0000',
     36 	'\u0000', '\u0000', '\u0000', '\u009b',
     37 	'\u0000', '\u0000', '\u0000', '\u0000',
     38 
     39 	// 0xa0
     40 	'\u00a0', '\u00a1', '\u00a2', '\u00a3',
     41 	'\u0024', '\u00a5', '\u0023', '\u00a7',
     42 	'\u00a4', '\u0000', '\u0000', '\u00ab',
     43 	'\u0000', '\u0000', '\u0000', '\u0000',
     44 
     45 	// 0x0b
     46 	'\u00b0', '\u00b1', '\u00b2', '\u00b3',
     47 	'\u00d7', '\u00b5', '\u00b6', '\u00b7',
     48 	'\u00f7', '\u0000', '\u0000', '\u00bb',
     49 	'\u00bc', '\u00bd', '\u00be', '\u00bf',
     50 
     51 	// 0xc0
     52 	'\u0000', '\u0300', '\u0301', '\u0302',
     53 	'\u0303', '\u0304', '\u0306', '\u0307',
     54 	'\u0308', '\u0308', '\u030a', '\u0327',
     55 	'\u0332', '\u030b', '\u0328', '\u030c',
     56 
     57 	// 0xd0
     58 	'\u0000', '\u0000', '\u0000', '\u0000',
     59 	'\u0000', '\u0000', '\u0000', '\u0000',
     60 	'\u0000', '\u0000', '\u0000', '\u0000',
     61 	'\u0000', '\u0000', '\u0000', '\u0000',
     62 
     63 	// 0xe0
     64 	'\u2126', '\u00c6', '\u00d0', '\u00aa',
     65 	'\u0126', '\u0000', '\u0132', '\u013f',
     66 	'\u0141', '\u00d8', '\u0152', '\u00ba',
     67 	'\u00de', '\u0166', '\u014a', '\u0149',
     68 
     69 	// 0xf0
     70 	'\u0138', '\u00e6', '\u0111', '\u00f0',
     71 	'\u0127', '\u0131', '\u0133', '\u0140',
     72 	'\u0142', '\u00f8', '\u0153', '\u00df',
     73 	'\u00fe', '\u0167', '\u014b', '\u0000',
     74 ];
     75 
     76 fn decode(out: []u8, in: []u8) void = {
     77 	for (let i = 0z; i < len(in); i += 1) {
     78 		const c = in[i];
     79 		const r: rune = if (c & 0x80 != 0) {
     80 			// TODO special cases
     81 			yield t61toutf8[c - 0x80];
     82 		} else {
     83 			const c = t61toascii[in[i]];
     84 			yield c: u32: rune;
     85 		};
     86 
     87 		// write r to out
     88 	};
     89 	return;
     90 };
     91 
     92 export type insufficient = !void;
     93 
     94 export fn t61_chardecode(in: []u8) (rune | insufficient | invalid) = {
     95 	// 'in' is either one char or two if first is a combining character.
     96 	if (len(in) == 2) {
     97 		return t61_combine(in);
     98 	};
     99 
    100 	const in = in[0];
    101 
    102 	if (in & 0x80 == 0) {
    103 		const r = t61toascii[in];
    104 		return if (r == 0) invalid else r: u32: rune;
    105 	};
    106 
    107 	const c = t61toutf8[in - 0x80];
    108 	if (c == '\u0000') {
    109 		return invalid;
    110 	};
    111 
    112 	if (in == 0xcc) {
    113 		return invalid;
    114 	};
    115 	if (in > 0xc0 && in <= 0xcf) {
    116 		return insufficient;
    117 	};
    118 
    119 	return c;
    120 };
    121 
    122 fn t61_combine(in: []u8) (rune | invalid) = {
    123 	const comb = in[0];
    124 	const in = in[1];
    125 	switch (comb) {
    126 	case 0xc1 =>
    127 		switch (in: u32: rune) {
    128 		case 'A' =>
    129 			return '\u00c0';
    130 		case 'E' =>
    131 			return '\u00c8';
    132 		case 'I' =>
    133 			return '\u00cc';
    134 		case 'O' =>
    135 			return '\u00d2';
    136 		case 'U' =>
    137 			return '\u00d9';
    138 		case 'a' =>
    139 			return '\u00e0';
    140 		case 'e' =>
    141 			return '\u00e8';
    142 		case 'i' =>
    143 			return '\u00ec';
    144 		case 'o' =>
    145 			return '\u00f2';
    146 		case 'u' =>
    147 			return '\u00f9';
    148 		case =>
    149 			return invalid;
    150 		};
    151 	case 0xc2 =>
    152 		switch (in: u32: rune) {
    153 		case 'A' =>
    154 			return '\u00c1';
    155 		case 'C' =>
    156 			return '\u0106';
    157 		case 'E' =>
    158 			return '\u00c9';
    159 		case 'I' =>
    160 			return '\u00cd';
    161 		case 'L' =>
    162 			return '\u0139';
    163 		case 'N' =>
    164 			return '\u0143';
    165 		case 'O' =>
    166 			return '\u00d3';
    167 		case 'R' =>
    168 			return '\u0154';
    169 		case 'S' =>
    170 			return '\u015a';
    171 		case 'U' =>
    172 			return '\u00da';
    173 		case 'Y' =>
    174 			return '\u00dd';
    175 		case 'Z' =>
    176 			return '\u0179';
    177 		case 'a' =>
    178 			return '\u00e1';
    179 		case 'c' =>
    180 			return '\u0107';
    181 		case 'e' =>
    182 			return '\u00e9';
    183 		case 'g' =>
    184 			return '\u0123';
    185 		case 'i' =>
    186 			return '\u00ed';
    187 		case 'l' =>
    188 			return '\u013a';
    189 		case 'n' =>
    190 			return '\u0144';
    191 		case 'o' =>
    192 			return '\u00f3';
    193 		case 'r' =>
    194 			return '\u0155';
    195 		case 's' =>
    196 			return '\u015b';
    197 		case 'u' =>
    198 			return '\u00fa';
    199 		case 'y' =>
    200 			return '\u00fd';
    201 		case 'z' =>
    202 			return '\u017a';
    203 		case =>
    204 			return invalid;
    205 		};
    206 	case 0xc3 =>
    207 		switch (in: u32: rune) {
    208 		case 'A' =>
    209 			return '\u00c2';
    210 		case 'C' =>
    211 			return '\u0108';
    212 		case 'E' =>
    213 			return '\u00ca';
    214 		case 'G' =>
    215 			return '\u011c';
    216 		case 'H' =>
    217 			return '\u0124';
    218 		case 'I' =>
    219 			return '\u00ce';
    220 		case 'J' =>
    221 			return '\u0134';
    222 		case 'O' =>
    223 			return '\u00d4';
    224 		case 'S' =>
    225 			return '\u015c';
    226 		case 'U' =>
    227 			return '\u00db';
    228 		case 'W' =>
    229 			return '\u0174';
    230 		case 'Y' =>
    231 			return '\u0176';
    232 		case 'a' =>
    233 			return '\u00e2';
    234 		case 'c' =>
    235 			return '\u0109';
    236 		case 'e' =>
    237 			return '\u00ea';
    238 		case 'g' =>
    239 			return '\u011d';
    240 		case 'h' =>
    241 			return '\u0125';
    242 		case 'i' =>
    243 			return '\u00ee';
    244 		case 'j' =>
    245 			return '\u0135';
    246 		case 'o' =>
    247 			return '\u00f4';
    248 		case 's' =>
    249 			return '\u015d';
    250 		case 'u' =>
    251 			return '\u00fb';
    252 		case 'w' =>
    253 			return '\u0175';
    254 		case 'y' =>
    255 			return '\u0177';
    256 		case =>
    257 			return invalid;
    258 		};
    259 	case 0xc4 =>
    260 		switch (in: u32: rune) {
    261 		case 'A' =>
    262 			return '\u00c3';
    263 		case 'I' =>
    264 			return '\u0128';
    265 		case 'N' =>
    266 			return '\u00d1';
    267 		case 'O' =>
    268 			return '\u00d5';
    269 		case 'U' =>
    270 			return '\u0168';
    271 		case 'a' =>
    272 			return '\u00e3';
    273 		case 'i' =>
    274 			return '\u0129';
    275 		case 'n' =>
    276 			return '\u00f1';
    277 		case 'o' =>
    278 			return '\u00f5';
    279 		case 'u' =>
    280 			return '\u0169';
    281 		case =>
    282 			return invalid;
    283 		};
    284 	case 0xc5 =>
    285 		switch (in: u32: rune) {
    286 		case 'A' =>
    287 			return '\u0100';
    288 		case 'E' =>
    289 			return '\u0112';
    290 		case 'I' =>
    291 			return '\u012a';
    292 		case 'O' =>
    293 			return '\u014c';
    294 		case 'U' =>
    295 			return '\u016a';
    296 		case 'a' =>
    297 			return '\u0101';
    298 		case 'e' =>
    299 			return '\u0113';
    300 		case 'i' =>
    301 			return '\u012b';
    302 		case 'o' =>
    303 			return '\u014d';
    304 		case 'u' =>
    305 			return '\u016b';
    306 		case =>
    307 			return invalid;
    308 		};
    309 	case 0xc6 =>
    310 		switch (in: u32: rune) {
    311 		case 'A' =>
    312 			return '\u0102';
    313 		case 'G' =>
    314 			return '\u011e';
    315 		case 'U' =>
    316 			return '\u016c';
    317 		case 'a' =>
    318 			return '\u0103';
    319 		case 'g' =>
    320 			return '\u011f';
    321 		case 'u' =>
    322 			return '\u016d';
    323 		case =>
    324 			return invalid;
    325 		};
    326 	case 0xc7 =>
    327 		switch (in: u32: rune) {
    328 		case 'C' =>
    329 			return '\u010a';
    330 		case 'E' =>
    331 			return '\u0116';
    332 		case 'G' =>
    333 			return '\u0120';
    334 		case 'I' =>
    335 			return '\u0130';
    336 		case 'Z' =>
    337 			return '\u017b';
    338 		case 'c' =>
    339 			return '\u010b';
    340 		case 'e' =>
    341 			return '\u0117';
    342 		case 'g' =>
    343 			return '\u0121';
    344 		case 'z' =>
    345 			return '\u017c';
    346 		case =>
    347 			return invalid;
    348 		};
    349 	case 0xc8 =>
    350 		switch (in: u32: rune) {
    351 		case 'A' =>
    352 			return '\u00c4';
    353 		case 'E' =>
    354 			return '\u00cb';
    355 		case 'I' =>
    356 			return '\u00cf';
    357 		case 'O' =>
    358 			return '\u00d6';
    359 		case 'U' =>
    360 			return '\u00dc';
    361 		case 'Y' =>
    362 			return '\u0178';
    363 		case 'a' =>
    364 			return '\u00e4';
    365 		case 'e' =>
    366 			return '\u00eb';
    367 		case 'i' =>
    368 			return '\u00ef';
    369 		case 'o' =>
    370 			return '\u00f6';
    371 		case 'u' =>
    372 			return '\u00fc';
    373 		case 'y' =>
    374 			return '\u00ff';
    375 		case =>
    376 			return invalid;
    377 		};
    378 	case 0xc9 =>
    379 		switch (in: u32: rune) {
    380 		case 'A' =>
    381 			return '\u00c4';
    382 		case 'E' =>
    383 			return '\u00cb';
    384 		case 'I' =>
    385 			return '\u00cf';
    386 		case 'O' =>
    387 			return '\u00d6';
    388 		case 'U' =>
    389 			return '\u00dc';
    390 		case 'Y' =>
    391 			return '\u0178';
    392 		case 'a' =>
    393 			return '\u00e4';
    394 		case 'e' =>
    395 			return '\u00eb';
    396 		case 'i' =>
    397 			return '\u00ef';
    398 		case 'o' =>
    399 			return '\u00f6';
    400 		case 'u' =>
    401 			return '\u00fc';
    402 		case 'y' =>
    403 			return '\u00ff';
    404 		case =>
    405 			return invalid;
    406 		};
    407 	case 0xca =>
    408 		switch (in: u32: rune) {
    409 		case 'A' =>
    410 			return '\u00c5';
    411 		case 'U' =>
    412 			return '\u016e';
    413 		case 'a' =>
    414 			return '\u00e5';
    415 		case 'u' =>
    416 			return '\u016f';
    417 		case =>
    418 			return invalid;
    419 		};
    420 	case 0xcb =>
    421 		switch (in: u32: rune) {
    422 		case 'C' =>
    423 			return '\u00c7';
    424 		case 'G' =>
    425 			return '\u0122';
    426 		case 'K' =>
    427 			return '\u0136';
    428 		case 'L' =>
    429 			return '\u013b';
    430 		case 'N' =>
    431 			return '\u0145';
    432 		case 'R' =>
    433 			return '\u0156';
    434 		case 'S' =>
    435 			return '\u015e';
    436 		case 'T' =>
    437 			return '\u0162';
    438 		case 'c' =>
    439 			return '\u00e7';
    440 		case 'k' =>
    441 			return '\u0137';
    442 		case 'l' =>
    443 			return '\u013c';
    444 		case 'n' =>
    445 			return '\u0146';
    446 		case 'r' =>
    447 			return '\u0157';
    448 		case 's' =>
    449 			return '\u015f';
    450 		case 't' =>
    451 			return '\u0163';
    452 		case =>
    453 			return invalid;
    454 		};
    455 	case 0xcd =>
    456 		switch (in: u32: rune) {
    457 		case 'O' =>
    458 			return '\u0150';
    459 		case 'U' =>
    460 			return '\u0170';
    461 		case 'o' =>
    462 			return '\u0151';
    463 		case 'u' =>
    464 			return '\u0171';
    465 		case =>
    466 			return invalid;
    467 		};
    468 	case 0xce =>
    469 		switch (in: u32: rune) {
    470 		case 'A' =>
    471 			return '\u0104';
    472 		case 'E' =>
    473 			return '\u0118';
    474 		case 'I' =>
    475 			return '\u012e';
    476 		case 'U' =>
    477 			return '\u0172';
    478 		case 'a' =>
    479 			return '\u0105';
    480 		case 'e' =>
    481 			return '\u0119';
    482 		case 'i' =>
    483 			return '\u012f';
    484 		case 'u' =>
    485 			return '\u0173';
    486 		case =>
    487 			return invalid;
    488 		};
    489 	case 0xCf =>
    490 		switch (in: u32: rune) {
    491 		case 'C' =>
    492 			return '\u010c';
    493 		case 'D' =>
    494 			return '\u010e';
    495 		case 'E' =>
    496 			return '\u011a';
    497 		case 'L' =>
    498 			return '\u013d';
    499 		case 'N' =>
    500 			return '\u0147';
    501 		case 'R' =>
    502 			return '\u0158';
    503 		case 'S' =>
    504 			return '\u0160';
    505 		case 'T' =>
    506 			return '\u0164';
    507 		case 'Z' =>
    508 			return '\u017d';
    509 		case 'c' =>
    510 			return '\u010d';
    511 		case 'd' =>
    512 			return '\u010f';
    513 		case 'e' =>
    514 			return '\u011b';
    515 		case 'l' =>
    516 			return '\u013e';
    517 		case 'n' =>
    518 			return '\u0148';
    519 		case 'r' =>
    520 			return '\u0159';
    521 		case 's' =>
    522 			return '\u0161';
    523 		case 't' =>
    524 			return '\u0165';
    525 		case 'z' =>
    526 			return '\u017e';
    527 		case =>
    528 			return invalid;
    529 		};
    530 	case =>
    531 		return invalid;
    532 	};
    533 };