t61.ha (10116B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // https://en.wikipedia.org/wiki/ITU_T.61 5 const t61toascii: [_]u8 = [ 6 // 0 1 2 3 4 5 6 7 7 // 8 9 a b c d e f 8 0, 0, 0, 0, 0, 0, 0, 0, // 0 9 0, 0, 0x0a, 0, 0x0c, 0x0d, 0, 0, // 0 10 0, 0, 0, 0, 0, 0, 0, 0, // 10 11 0, 0, 0x1a, 0x1b, 0, 0, 0, 0, // 10 12 0x20, 0x21, 0x22, 0, 0, 0x25, 0x26, 0x27, // 20 13 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, // 20 14 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 30 15 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, // 30 16 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, // 40 17 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, // 40 18 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, // 50 19 0x58, 0x59, 0x5a, 0x5b, 0, 0x5d, 0, 0x5f, // 50 20 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, // 60 21 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, // 60 22 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, // 70 23 0x78, 0x79, 0x7a, 0, 0x7c, 0, 0, 0, // 70 24 ]; 25 26 const t61toutf8: [_]rune = [ 27 // 0x80 28 '\u0000', '\u0000', '\u0000', '\u0000', 29 '\u0000', '\u0000', '\u0000', '\u0000', 30 '\u0000', '\u0000', '\u0000', '\u008b', 31 '\u008c', '\u0000', '\u0000', '\u0000', 32 33 // 0x90 34 '\u0000', '\u0000', '\u0000', '\u0000', 35 '\u0000', '\u0000', '\u0000', '\u0000', 36 '\u0000', '\u0000', '\u0000', '\u009b', 37 '\u0000', '\u0000', '\u0000', '\u0000', 38 39 // 0xa0 40 '\u00a0', '\u00a1', '\u00a2', '\u00a3', 41 '\u0024', '\u00a5', '\u0023', '\u00a7', 42 '\u00a4', '\u0000', '\u0000', '\u00ab', 43 '\u0000', '\u0000', '\u0000', '\u0000', 44 45 // 0x0b 46 '\u00b0', '\u00b1', '\u00b2', '\u00b3', 47 '\u00d7', '\u00b5', '\u00b6', '\u00b7', 48 '\u00f7', '\u0000', '\u0000', '\u00bb', 49 '\u00bc', '\u00bd', '\u00be', '\u00bf', 50 51 // 0xc0 52 '\u0000', '\u0300', '\u0301', '\u0302', 53 '\u0303', '\u0304', '\u0306', '\u0307', 54 '\u0308', '\u0308', '\u030a', '\u0327', 55 '\u0332', '\u030b', '\u0328', '\u030c', 56 57 // 0xd0 58 '\u0000', '\u0000', '\u0000', '\u0000', 59 '\u0000', '\u0000', '\u0000', '\u0000', 60 '\u0000', '\u0000', '\u0000', '\u0000', 61 '\u0000', '\u0000', '\u0000', '\u0000', 62 63 // 0xe0 64 '\u2126', '\u00c6', '\u00d0', '\u00aa', 65 '\u0126', '\u0000', '\u0132', '\u013f', 66 '\u0141', '\u00d8', '\u0152', '\u00ba', 67 '\u00de', '\u0166', '\u014a', '\u0149', 68 69 // 0xf0 70 '\u0138', '\u00e6', '\u0111', '\u00f0', 71 '\u0127', '\u0131', '\u0133', '\u0140', 72 '\u0142', '\u00f8', '\u0153', '\u00df', 73 '\u00fe', '\u0167', '\u014b', '\u0000', 74 ]; 75 76 fn decode(out: []u8, in: []u8) void = { 77 for (let i = 0z; i < len(in); i += 1) { 78 const c = in[i]; 79 const r: rune = if (c & 0x80 != 0) { 80 // TODO special cases 81 yield t61toutf8[c - 0x80]; 82 } else { 83 const c = t61toascii[in[i]]; 84 yield c: u32: rune; 85 }; 86 87 // write r to out 88 }; 89 return; 90 }; 91 92 export type insufficient = !void; 93 94 export fn t61_chardecode(in: []u8) (rune | insufficient | invalid) = { 95 // 'in' is either one char or two if first is a combining character. 96 if (len(in) == 2) { 97 return t61_combine(in); 98 }; 99 100 const in = in[0]; 101 102 if (in & 0x80 == 0) { 103 const r = t61toascii[in]; 104 return if (r == 0) invalid else r: u32: rune; 105 }; 106 107 const c = t61toutf8[in - 0x80]; 108 if (c == '\u0000') { 109 return invalid; 110 }; 111 112 if (in == 0xcc) { 113 return invalid; 114 }; 115 if (in > 0xc0 && in <= 0xcf) { 116 return insufficient; 117 }; 118 119 return c; 120 }; 121 122 fn t61_combine(in: []u8) (rune | invalid) = { 123 const comb = in[0]; 124 const in = in[1]; 125 switch (comb) { 126 case 0xc1 => 127 switch (in: u32: rune) { 128 case 'A' => 129 return '\u00c0'; 130 case 'E' => 131 return '\u00c8'; 132 case 'I' => 133 return '\u00cc'; 134 case 'O' => 135 return '\u00d2'; 136 case 'U' => 137 return '\u00d9'; 138 case 'a' => 139 return '\u00e0'; 140 case 'e' => 141 return '\u00e8'; 142 case 'i' => 143 return '\u00ec'; 144 case 'o' => 145 return '\u00f2'; 146 case 'u' => 147 return '\u00f9'; 148 case => 149 return invalid; 150 }; 151 case 0xc2 => 152 switch (in: u32: rune) { 153 case 'A' => 154 return '\u00c1'; 155 case 'C' => 156 return '\u0106'; 157 case 'E' => 158 return '\u00c9'; 159 case 'I' => 160 return '\u00cd'; 161 case 'L' => 162 return '\u0139'; 163 case 'N' => 164 return '\u0143'; 165 case 'O' => 166 return '\u00d3'; 167 case 'R' => 168 return '\u0154'; 169 case 'S' => 170 return '\u015a'; 171 case 'U' => 172 return '\u00da'; 173 case 'Y' => 174 return '\u00dd'; 175 case 'Z' => 176 return '\u0179'; 177 case 'a' => 178 return '\u00e1'; 179 case 'c' => 180 return '\u0107'; 181 case 'e' => 182 return '\u00e9'; 183 case 'g' => 184 return '\u0123'; 185 case 'i' => 186 return '\u00ed'; 187 case 'l' => 188 return '\u013a'; 189 case 'n' => 190 return '\u0144'; 191 case 'o' => 192 return '\u00f3'; 193 case 'r' => 194 return '\u0155'; 195 case 's' => 196 return '\u015b'; 197 case 'u' => 198 return '\u00fa'; 199 case 'y' => 200 return '\u00fd'; 201 case 'z' => 202 return '\u017a'; 203 case => 204 return invalid; 205 }; 206 case 0xc3 => 207 switch (in: u32: rune) { 208 case 'A' => 209 return '\u00c2'; 210 case 'C' => 211 return '\u0108'; 212 case 'E' => 213 return '\u00ca'; 214 case 'G' => 215 return '\u011c'; 216 case 'H' => 217 return '\u0124'; 218 case 'I' => 219 return '\u00ce'; 220 case 'J' => 221 return '\u0134'; 222 case 'O' => 223 return '\u00d4'; 224 case 'S' => 225 return '\u015c'; 226 case 'U' => 227 return '\u00db'; 228 case 'W' => 229 return '\u0174'; 230 case 'Y' => 231 return '\u0176'; 232 case 'a' => 233 return '\u00e2'; 234 case 'c' => 235 return '\u0109'; 236 case 'e' => 237 return '\u00ea'; 238 case 'g' => 239 return '\u011d'; 240 case 'h' => 241 return '\u0125'; 242 case 'i' => 243 return '\u00ee'; 244 case 'j' => 245 return '\u0135'; 246 case 'o' => 247 return '\u00f4'; 248 case 's' => 249 return '\u015d'; 250 case 'u' => 251 return '\u00fb'; 252 case 'w' => 253 return '\u0175'; 254 case 'y' => 255 return '\u0177'; 256 case => 257 return invalid; 258 }; 259 case 0xc4 => 260 switch (in: u32: rune) { 261 case 'A' => 262 return '\u00c3'; 263 case 'I' => 264 return '\u0128'; 265 case 'N' => 266 return '\u00d1'; 267 case 'O' => 268 return '\u00d5'; 269 case 'U' => 270 return '\u0168'; 271 case 'a' => 272 return '\u00e3'; 273 case 'i' => 274 return '\u0129'; 275 case 'n' => 276 return '\u00f1'; 277 case 'o' => 278 return '\u00f5'; 279 case 'u' => 280 return '\u0169'; 281 case => 282 return invalid; 283 }; 284 case 0xc5 => 285 switch (in: u32: rune) { 286 case 'A' => 287 return '\u0100'; 288 case 'E' => 289 return '\u0112'; 290 case 'I' => 291 return '\u012a'; 292 case 'O' => 293 return '\u014c'; 294 case 'U' => 295 return '\u016a'; 296 case 'a' => 297 return '\u0101'; 298 case 'e' => 299 return '\u0113'; 300 case 'i' => 301 return '\u012b'; 302 case 'o' => 303 return '\u014d'; 304 case 'u' => 305 return '\u016b'; 306 case => 307 return invalid; 308 }; 309 case 0xc6 => 310 switch (in: u32: rune) { 311 case 'A' => 312 return '\u0102'; 313 case 'G' => 314 return '\u011e'; 315 case 'U' => 316 return '\u016c'; 317 case 'a' => 318 return '\u0103'; 319 case 'g' => 320 return '\u011f'; 321 case 'u' => 322 return '\u016d'; 323 case => 324 return invalid; 325 }; 326 case 0xc7 => 327 switch (in: u32: rune) { 328 case 'C' => 329 return '\u010a'; 330 case 'E' => 331 return '\u0116'; 332 case 'G' => 333 return '\u0120'; 334 case 'I' => 335 return '\u0130'; 336 case 'Z' => 337 return '\u017b'; 338 case 'c' => 339 return '\u010b'; 340 case 'e' => 341 return '\u0117'; 342 case 'g' => 343 return '\u0121'; 344 case 'z' => 345 return '\u017c'; 346 case => 347 return invalid; 348 }; 349 case 0xc8 => 350 switch (in: u32: rune) { 351 case 'A' => 352 return '\u00c4'; 353 case 'E' => 354 return '\u00cb'; 355 case 'I' => 356 return '\u00cf'; 357 case 'O' => 358 return '\u00d6'; 359 case 'U' => 360 return '\u00dc'; 361 case 'Y' => 362 return '\u0178'; 363 case 'a' => 364 return '\u00e4'; 365 case 'e' => 366 return '\u00eb'; 367 case 'i' => 368 return '\u00ef'; 369 case 'o' => 370 return '\u00f6'; 371 case 'u' => 372 return '\u00fc'; 373 case 'y' => 374 return '\u00ff'; 375 case => 376 return invalid; 377 }; 378 case 0xc9 => 379 switch (in: u32: rune) { 380 case 'A' => 381 return '\u00c4'; 382 case 'E' => 383 return '\u00cb'; 384 case 'I' => 385 return '\u00cf'; 386 case 'O' => 387 return '\u00d6'; 388 case 'U' => 389 return '\u00dc'; 390 case 'Y' => 391 return '\u0178'; 392 case 'a' => 393 return '\u00e4'; 394 case 'e' => 395 return '\u00eb'; 396 case 'i' => 397 return '\u00ef'; 398 case 'o' => 399 return '\u00f6'; 400 case 'u' => 401 return '\u00fc'; 402 case 'y' => 403 return '\u00ff'; 404 case => 405 return invalid; 406 }; 407 case 0xca => 408 switch (in: u32: rune) { 409 case 'A' => 410 return '\u00c5'; 411 case 'U' => 412 return '\u016e'; 413 case 'a' => 414 return '\u00e5'; 415 case 'u' => 416 return '\u016f'; 417 case => 418 return invalid; 419 }; 420 case 0xcb => 421 switch (in: u32: rune) { 422 case 'C' => 423 return '\u00c7'; 424 case 'G' => 425 return '\u0122'; 426 case 'K' => 427 return '\u0136'; 428 case 'L' => 429 return '\u013b'; 430 case 'N' => 431 return '\u0145'; 432 case 'R' => 433 return '\u0156'; 434 case 'S' => 435 return '\u015e'; 436 case 'T' => 437 return '\u0162'; 438 case 'c' => 439 return '\u00e7'; 440 case 'k' => 441 return '\u0137'; 442 case 'l' => 443 return '\u013c'; 444 case 'n' => 445 return '\u0146'; 446 case 'r' => 447 return '\u0157'; 448 case 's' => 449 return '\u015f'; 450 case 't' => 451 return '\u0163'; 452 case => 453 return invalid; 454 }; 455 case 0xcd => 456 switch (in: u32: rune) { 457 case 'O' => 458 return '\u0150'; 459 case 'U' => 460 return '\u0170'; 461 case 'o' => 462 return '\u0151'; 463 case 'u' => 464 return '\u0171'; 465 case => 466 return invalid; 467 }; 468 case 0xce => 469 switch (in: u32: rune) { 470 case 'A' => 471 return '\u0104'; 472 case 'E' => 473 return '\u0118'; 474 case 'I' => 475 return '\u012e'; 476 case 'U' => 477 return '\u0172'; 478 case 'a' => 479 return '\u0105'; 480 case 'e' => 481 return '\u0119'; 482 case 'i' => 483 return '\u012f'; 484 case 'u' => 485 return '\u0173'; 486 case => 487 return invalid; 488 }; 489 case 0xCf => 490 switch (in: u32: rune) { 491 case 'C' => 492 return '\u010c'; 493 case 'D' => 494 return '\u010e'; 495 case 'E' => 496 return '\u011a'; 497 case 'L' => 498 return '\u013d'; 499 case 'N' => 500 return '\u0147'; 501 case 'R' => 502 return '\u0158'; 503 case 'S' => 504 return '\u0160'; 505 case 'T' => 506 return '\u0164'; 507 case 'Z' => 508 return '\u017d'; 509 case 'c' => 510 return '\u010d'; 511 case 'd' => 512 return '\u010f'; 513 case 'e' => 514 return '\u011b'; 515 case 'l' => 516 return '\u013e'; 517 case 'n' => 518 return '\u0148'; 519 case 'r' => 520 return '\u0159'; 521 case 's' => 522 return '\u0161'; 523 case 't' => 524 return '\u0165'; 525 case 'z' => 526 return '\u017e'; 527 case => 528 return invalid; 529 }; 530 case => 531 return invalid; 532 }; 533 };