hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit b4f3bb3bfc208afcf5e3228a937e6d07f0b6e433
parent 75b51ad7502ee08bc8f8a1c6593d645f03b33aaf
Author: Armin Preiml <apreiml@strohwolke.at>
Date:   Wed,  7 Feb 2024 19:53:34 +0100

asn1: add decode support for non utf8 string types

numeric and ia5, which is basically ascii, are required by some
entries in x.509. UniversalString, BMPString and T61 are only provided
for legacy support.

Note that this only supports a subset of T61. There are still some
certificates in the mozilla trust store that have T61 encoded strings.
Though the characters may only be a subset of ASCII.

Signed-off-by: Armin Preiml <apreiml@strohwolke.at>

Diffstat:
Aencoding/asn1/+test/strings_test.ha | 164+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aencoding/asn1/charset+test.ha | 154+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aencoding/asn1/strings.ha | 362+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aencoding/asn1/t61.ha | 534+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 1214 insertions(+), 0 deletions(-)

diff --git a/encoding/asn1/+test/strings_test.ha b/encoding/asn1/+test/strings_test.ha @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: MPL-2.0 +// (c) Hare authors <https://harelang.org> + +use bytes; +use errors; +use fmt; +use io; +use strings; + + +fn c_checkrange(chars: []u8, f: *fn (c: u8) bool) void = { + for (let i = 0z; i < 256; i += 1) { + let expected = false; + for (let j = 0z; j < len(chars); j += 1) { + if (chars[j] == i: u8) { + expected = true; + break; + }; + }; + + if (f(i: u8) != expected) { + fmt::println(i, expected, f(i: u8))!; + }; + assert(f(i: u8) == expected); + }; +}; + +@test fn c_is_num() void = { + const chars: [_]u8 = [ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' ', + ]; + c_checkrange(chars, &c_is_num); +}; + +@test fn c_is_print() void = { + const chars: [_]u8 = [ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' ', '\'', + '(', ')', '+', ',', '-', '.', '/', ':', '=', '?', + ]; + c_checkrange(chars, &c_is_print); +}; + +@test fn utf8() void = { + let buf: [16]u8 = [0...]; + let b: [_]u8 = [ + 0x55, + 0x56, + 0xd0, 0x98, + 0xe0, 0xa4, 0xb9, + 0xf0, 0x90, 0x8d, 0x88 + ]; + const runesat: [_]size = [0, 1, 2, 2, 4, 4, 4, 7, 7, 7, 7, 8]; + + let expected: str = strings::fromutf8([0xf0, 0x90, 0x8d, 0x88])!; + assert(read_utf8str(&d([0x0c, 0x04, 0xf0, 0x90, 0x8d, 0x88]), buf)! + == expected); + assert(read_utf8str(&d([0x0c, 0x03, 0xf0, 0x90, 0x8d]), buf) is invalid); + + bytes::zero(buf); + let r = strreader(&d([0x0c, 0x04, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!; + assert(io::read(&r, buf)! == 4); + assert(bytes::equal(buf[..4], strings::toutf8(expected))); + + bytes::zero(buf); + let expected: str = strings::fromutf8([0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88])!; + assert(read_utf8str(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), buf)! + == expected); + assert(read_utf8str(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), buf) is invalid); + + bytes::zero(buf); + let r = strreader(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!; + assert(io::read(&r, buf)! == 6); + assert(bytes::equal(buf[..6], strings::toutf8(expected))); + + let r = strreader(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), utag::UTF8_STRING)!; + assert(unwrap_err(io::readall(&r, buf[2..]) as io::error) is invalid); + + bytes::zero(buf); + let r = strreader(&d([0x0c, 0x06, 0x55, 0x56, 0xf0, 0x90, 0x8d, 0x88]), utag::UTF8_STRING)!; + assert(io::read(&r, buf[..4])! == 2); + assert(io::read(&r, buf[2..])! == 4); + assert(bytes::equal(buf[..6], strings::toutf8(expected))); + + bytes::zero(buf); + let r = strreader(&d([0x0c, 0x05, 0x55, 0x56, 0xf0, 0x90, 0x8d]), utag::UTF8_STRING)!; + assert(io::read(&r, buf[..4])! == 2); + assert(unwrap_err(io::readall(&r, buf[2..]) as io::error) is invalid); +}; + +@test fn t61() void = { + let input: [_]u8 = [ + 0x14, 0x29, + 0x42, 0xc8, 0x61, 0x72, 0x65, 0x6e, 0x20, 0x76, 0x65, 0x72, + 0x7a, 0x65, 0x68, 0x72, 0x65, 0x6e, 0x20, 0x67, 0x65, 0x72, + 0x6e, 0x65, 0x20, 0xc8, 0x75, 0x62, 0x65, 0x72, 0x6d, 0xc8, + 0x61, 0xfb, 0x69, 0x67, 0x20, 0x48, 0x6f, 0x6e, 0x69, 0x67, + 0x0a, + ]; + + const expected: [_]u8 = [ + 0x42, 0xc3, 0xa4, 0x72, 0x65, 0x6e, 0x20, 0x76, 0x65, 0x72, + 0x7a, 0x65, 0x68, 0x72, 0x65, 0x6e, 0x20, 0x67, 0x65, 0x72, + 0x6e, 0x65, 0x20, 0xc3, 0xbc, 0x62, 0x65, 0x72, 0x6d, 0xc3, + 0xa4, 0xc3, 0x9f, 0x69, 0x67, 0x20, 0x48, 0x6f, 0x6e, 0x69, + 0x67, 0x0a, + ]; + + let dec = d(input); + let r = strreader(&dec, utag::TELETEX_STRING)!; + let result = io::drain(&r)!; + defer free(result); + assert(bytes::equal(expected, result)); + assert(trypeek(&dec) is io::EOF); + + // cut off multibyte char + input[1] = 0x2; + let r = strreader(&d(input[..4]), utag::TELETEX_STRING)!; + assert(unwrap_err(io::drain(&r) as io::error) is invalid); + + // not enough space for multibyte char + let buf: [24]u8 = [0...]; + let in = input[..27]; + in[1] = (len(in) - 2): u8; + let dec = d(in); + let r = strreader(&dec, utag::TELETEX_STRING)!; + assert(io::read(&r, buf)! == 23); + assert(trypeek(&dec) is badformat); + + let r = strreader(&d([ + 0x14, 0x0f, 0x63, 0x6c, 0xc2, 0x65, 0x73, 0x20, 0x70, 0x75, + 0x62, 0x6c, 0x69, 0x71, 0x75, 0x65, 0x73, + ]), utag::TELETEX_STRING)!; + let b = io::drain(&r)!; + defer free(b); + + assert(strings::fromutf8(b)! == "cl\u00e9s publiques"); +}; + +@test fn bmp() void = { + let input: [_]u8 = [ + 0x1e, 0x26, + 0x00, 0x48, 0x00, 0xe4, 0x00, 0x72, 0x00, 0x65, 0x00, 0x6c, + 0x00, 0x61, 0x00, 0x6e, 0x00, 0x67, 0x00, 0x20, 0x00, 0x69, + 0x01, 0x61, 0x00, 0x20, 0x00, 0x6e, 0x00, 0x65, 0x00, 0x61, + 0x00, 0x74, 0x00, 0x6f, 0x00, 0x20, 0x27, 0x64, + ]; + + const expected: [_]u8 = [ + 0x48, 0xc3, 0xa4, 0x72, 0x65, 0x6c, 0x61, 0x6e, 0x67, 0x20, + 0x69, 0xc5, 0xa1, 0x20, 0x6e, 0x65, 0x61, 0x74, 0x6f, 0x20, + 0xe2, 0x9d, 0xa4, + ]; + + let dec = d(input); + let r = strreader(&dec, utag::BMP_STRING)!; + let result = io::drain(&r)!; + defer free(result); + assert(bytes::equal(expected, result)); + assert(trypeek(&dec) is io::EOF); +}; diff --git a/encoding/asn1/charset+test.ha b/encoding/asn1/charset+test.ha @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: MPL-2.0 +// (c) Hare authors <https://harelang.org> + +use ascii; +use bytes; +use fmt; +use io; +use memio; + + +// Encodes all characters from 0x00 to 0xff separated by \t. Invalid characters +// will not be printed. All possible accents follow the table as defined in +// the two bytes chapter at https://en.wikipedia.org/wiki/T.51/ISO/IEC_6937 +fn print_t61_table(dest: io::handle) void = { + for (let i = 0z; i < 16; i +=1 ) { + fmt::fprintf(dest, "{:x}\t", i)!; + }; + fmt::fprintln(dest)!; + + for (let i = 0z; i < 256; i += 1) { + if (i % 16 == 0) { + fmt::fprintln(dest)!; + }; + match (t61_chardecode([i: u8])) { + case insufficient => + fmt::fprint(dest, "")!; + case invalid => + yield; + case let r: rune => + if (i > 0xa0 || (ascii::isprint(r) && !ascii::isspace(r))) { + fmt::fprint(dest, r)!; + } else { + fmt::fprintf(dest, "x{:.4x}", r: u32)!; + }; + }; + + if (i + 1 % 16 != 0) { + fmt::fprint(dest, "\t")!; + }; + }; + + fmt::fprintln(dest)!; + + for (let i = 0xc1u8; i < 0xd0; i += 1) { + if (i == 0xcc) continue; + fmt::fprintf(dest, "{:.2x}\t", i)!; + for (let j = 0x41u32; j < 0x7b; j += 1) { + if (!ascii::isprint(j: rune)) { + continue; + }; + if (!(t61_chardecode([i: u8]) is insufficient)) { + assert(false); + }; + match (t61_chardecode([i: u8, j: u8])) { + case let r: rune => + fmt::fprint(dest, r)!; + case => + yield; + }; + }; + fmt::fprintln(dest)!; + }; +}; + +@test fn t61encode() void = { + let table = memio::dynamic(); + defer io::close(&table)!; + print_t61_table(&table); + assert(bytes::equal(t61_test_table, memio::buffer(&table))); +}; + +// Print this table as UTF-8, to visual check the characters. +const t61_test_table: [_]u8 = [ + 0x30, 0x09, 0x31, 0x09, 0x32, 0x09, 0x33, 0x09, 0x34, 0x09, 0x35, 0x09, + 0x36, 0x09, 0x37, 0x09, 0x38, 0x09, 0x39, 0x09, 0x61, 0x09, 0x62, 0x09, + 0x63, 0x09, 0x64, 0x09, 0x65, 0x09, 0x66, 0x09, 0x0a, 0x0a, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30, 0x30, + 0x61, 0x09, 0x09, 0x78, 0x30, 0x30, 0x30, 0x63, 0x09, 0x78, 0x30, 0x30, + 0x30, 0x64, 0x09, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30, 0x31, 0x61, 0x09, 0x78, 0x30, + 0x30, 0x31, 0x62, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x78, 0x30, 0x30, + 0x32, 0x30, 0x09, 0x21, 0x09, 0x22, 0x09, 0x09, 0x09, 0x25, 0x09, 0x26, + 0x09, 0x27, 0x09, 0x28, 0x09, 0x29, 0x09, 0x2a, 0x09, 0x2b, 0x09, 0x2c, + 0x09, 0x2d, 0x09, 0x2e, 0x09, 0x2f, 0x09, 0x0a, 0x30, 0x09, 0x31, 0x09, + 0x32, 0x09, 0x33, 0x09, 0x34, 0x09, 0x35, 0x09, 0x36, 0x09, 0x37, 0x09, + 0x38, 0x09, 0x39, 0x09, 0x3a, 0x09, 0x3b, 0x09, 0x3c, 0x09, 0x3d, 0x09, + 0x3e, 0x09, 0x3f, 0x09, 0x0a, 0x40, 0x09, 0x41, 0x09, 0x42, 0x09, 0x43, + 0x09, 0x44, 0x09, 0x45, 0x09, 0x46, 0x09, 0x47, 0x09, 0x48, 0x09, 0x49, + 0x09, 0x4a, 0x09, 0x4b, 0x09, 0x4c, 0x09, 0x4d, 0x09, 0x4e, 0x09, 0x4f, + 0x09, 0x0a, 0x50, 0x09, 0x51, 0x09, 0x52, 0x09, 0x53, 0x09, 0x54, 0x09, + 0x55, 0x09, 0x56, 0x09, 0x57, 0x09, 0x58, 0x09, 0x59, 0x09, 0x5a, 0x09, + 0x5b, 0x09, 0x09, 0x5d, 0x09, 0x09, 0x5f, 0x09, 0x0a, 0x09, 0x61, 0x09, + 0x62, 0x09, 0x63, 0x09, 0x64, 0x09, 0x65, 0x09, 0x66, 0x09, 0x67, 0x09, + 0x68, 0x09, 0x69, 0x09, 0x6a, 0x09, 0x6b, 0x09, 0x6c, 0x09, 0x6d, 0x09, + 0x6e, 0x09, 0x6f, 0x09, 0x0a, 0x70, 0x09, 0x71, 0x09, 0x72, 0x09, 0x73, + 0x09, 0x74, 0x09, 0x75, 0x09, 0x76, 0x09, 0x77, 0x09, 0x78, 0x09, 0x79, + 0x09, 0x7a, 0x09, 0x09, 0x7c, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x78, 0x30, 0x30, + 0x38, 0x62, 0x09, 0x78, 0x30, 0x30, 0x38, 0x63, 0x09, 0x09, 0x09, 0x09, + 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x78, 0x30, 0x30, 0x39, 0x62, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0x78, + 0x30, 0x30, 0x61, 0x30, 0x09, 0xc2, 0xa1, 0x09, 0xc2, 0xa2, 0x09, 0xc2, + 0xa3, 0x09, 0x24, 0x09, 0xc2, 0xa5, 0x09, 0x23, 0x09, 0xc2, 0xa7, 0x09, + 0xc2, 0xa4, 0x09, 0x09, 0x09, 0xc2, 0xab, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x0a, 0xc2, 0xb0, 0x09, 0xc2, 0xb1, 0x09, 0xc2, 0xb2, 0x09, 0xc2, 0xb3, + 0x09, 0xc3, 0x97, 0x09, 0xc2, 0xb5, 0x09, 0xc2, 0xb6, 0x09, 0xc2, 0xb7, + 0x09, 0xc3, 0xb7, 0x09, 0x09, 0x09, 0xc2, 0xbb, 0x09, 0xc2, 0xbc, 0x09, + 0xc2, 0xbd, 0x09, 0xc2, 0xbe, 0x09, 0xc2, 0xbf, 0x09, 0x0a, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x0a, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, + 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x0a, 0xe2, 0x84, 0xa6, 0x09, + 0xc3, 0x86, 0x09, 0xc3, 0x90, 0x09, 0xc2, 0xaa, 0x09, 0xc4, 0xa6, 0x09, + 0x09, 0xc4, 0xb2, 0x09, 0xc4, 0xbf, 0x09, 0xc5, 0x81, 0x09, 0xc3, 0x98, + 0x09, 0xc5, 0x92, 0x09, 0xc2, 0xba, 0x09, 0xc3, 0x9e, 0x09, 0xc5, 0xa6, + 0x09, 0xc5, 0x8a, 0x09, 0xc5, 0x89, 0x09, 0x0a, 0xc4, 0xb8, 0x09, 0xc3, + 0xa6, 0x09, 0xc4, 0x91, 0x09, 0xc3, 0xb0, 0x09, 0xc4, 0xa7, 0x09, 0xc4, + 0xb1, 0x09, 0xc4, 0xb3, 0x09, 0xc5, 0x80, 0x09, 0xc5, 0x82, 0x09, 0xc3, + 0xb8, 0x09, 0xc5, 0x93, 0x09, 0xc3, 0x9f, 0x09, 0xc3, 0xbe, 0x09, 0xc5, + 0xa7, 0x09, 0xc5, 0x8b, 0x09, 0x09, 0x0a, 0x63, 0x31, 0x09, 0xc3, 0x80, + 0xc3, 0x88, 0xc3, 0x8c, 0xc3, 0x92, 0xc3, 0x99, 0xc3, 0xa0, 0xc3, 0xa8, + 0xc3, 0xac, 0xc3, 0xb2, 0xc3, 0xb9, 0x0a, 0x63, 0x32, 0x09, 0xc3, 0x81, + 0xc4, 0x86, 0xc3, 0x89, 0xc3, 0x8d, 0xc4, 0xb9, 0xc5, 0x83, 0xc3, 0x93, + 0xc5, 0x94, 0xc5, 0x9a, 0xc3, 0x9a, 0xc3, 0x9d, 0xc5, 0xb9, 0xc3, 0xa1, + 0xc4, 0x87, 0xc3, 0xa9, 0xc4, 0xa3, 0xc3, 0xad, 0xc4, 0xba, 0xc5, 0x84, + 0xc3, 0xb3, 0xc5, 0x95, 0xc5, 0x9b, 0xc3, 0xba, 0xc3, 0xbd, 0xc5, 0xba, + 0x0a, 0x63, 0x33, 0x09, 0xc3, 0x82, 0xc4, 0x88, 0xc3, 0x8a, 0xc4, 0x9c, + 0xc4, 0xa4, 0xc3, 0x8e, 0xc4, 0xb4, 0xc3, 0x94, 0xc5, 0x9c, 0xc3, 0x9b, + 0xc5, 0xb4, 0xc5, 0xb6, 0xc3, 0xa2, 0xc4, 0x89, 0xc3, 0xaa, 0xc4, 0x9d, + 0xc4, 0xa5, 0xc3, 0xae, 0xc4, 0xb5, 0xc3, 0xb4, 0xc5, 0x9d, 0xc3, 0xbb, + 0xc5, 0xb5, 0xc5, 0xb7, 0x0a, 0x63, 0x34, 0x09, 0xc3, 0x83, 0xc4, 0xa8, + 0xc3, 0x91, 0xc3, 0x95, 0xc5, 0xa8, 0xc3, 0xa3, 0xc4, 0xa9, 0xc3, 0xb1, + 0xc3, 0xb5, 0xc5, 0xa9, 0x0a, 0x63, 0x35, 0x09, 0xc4, 0x80, 0xc4, 0x92, + 0xc4, 0xaa, 0xc5, 0x8c, 0xc5, 0xaa, 0xc4, 0x81, 0xc4, 0x93, 0xc4, 0xab, + 0xc5, 0x8d, 0xc5, 0xab, 0x0a, 0x63, 0x36, 0x09, 0xc4, 0x82, 0xc4, 0x9e, + 0xc5, 0xac, 0xc4, 0x83, 0xc4, 0x9f, 0xc5, 0xad, 0x0a, 0x63, 0x37, 0x09, + 0xc4, 0x8a, 0xc4, 0x96, 0xc4, 0xa0, 0xc4, 0xb0, 0xc5, 0xbb, 0xc4, 0x8b, + 0xc4, 0x97, 0xc4, 0xa1, 0xc5, 0xbc, 0x0a, 0x63, 0x38, 0x09, 0xc3, 0x84, + 0xc3, 0x8b, 0xc3, 0x8f, 0xc3, 0x96, 0xc3, 0x9c, 0xc5, 0xb8, 0xc3, 0xa4, + 0xc3, 0xab, 0xc3, 0xaf, 0xc3, 0xb6, 0xc3, 0xbc, 0xc3, 0xbf, 0x0a, 0x63, + 0x39, 0x09, 0xc3, 0x84, 0xc3, 0x8b, 0xc3, 0x8f, 0xc3, 0x96, 0xc3, 0x9c, + 0xc5, 0xb8, 0xc3, 0xa4, 0xc3, 0xab, 0xc3, 0xaf, 0xc3, 0xb6, 0xc3, 0xbc, + 0xc3, 0xbf, 0x0a, 0x63, 0x61, 0x09, 0xc3, 0x85, 0xc5, 0xae, 0xc3, 0xa5, + 0xc5, 0xaf, 0x0a, 0x63, 0x62, 0x09, 0xc3, 0x87, 0xc4, 0xa2, 0xc4, 0xb6, + 0xc4, 0xbb, 0xc5, 0x85, 0xc5, 0x96, 0xc5, 0x9e, 0xc5, 0xa2, 0xc3, 0xa7, + 0xc4, 0xb7, 0xc4, 0xbc, 0xc5, 0x86, 0xc5, 0x97, 0xc5, 0x9f, 0xc5, 0xa3, + 0x0a, 0x63, 0x64, 0x09, 0xc5, 0x90, 0xc5, 0xb0, 0xc5, 0x91, 0xc5, 0xb1, + 0x0a, 0x63, 0x65, 0x09, 0xc4, 0x84, 0xc4, 0x98, 0xc4, 0xae, 0xc5, 0xb2, + 0xc4, 0x85, 0xc4, 0x99, 0xc4, 0xaf, 0xc5, 0xb3, 0x0a, 0x63, 0x66, 0x09, + 0xc4, 0x8c, 0xc4, 0x8e, 0xc4, 0x9a, 0xc4, 0xbd, 0xc5, 0x87, 0xc5, 0x98, + 0xc5, 0xa0, 0xc5, 0xa4, 0xc5, 0xbd, 0xc4, 0x8d, 0xc4, 0x8f, 0xc4, 0x9b, + 0xc4, 0xbe, 0xc5, 0x88, 0xc5, 0x99, 0xc5, 0xa1, 0xc5, 0xa5, 0xc5, 0xbe, + 0x0a, +]; + diff --git a/encoding/asn1/strings.ha b/encoding/asn1/strings.ha @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: MPL-2.0 +// (c) Hare authors <https://harelang.org> + +use encoding::utf8; +use endian; +use errors; +use io; +use strings; + + +// numeric string +def N: u8 = 0o1; + +// printable string +def P: u8 = 0o2; + +// LUT of bitfields with character attributes +const cclass: [_]u8 = [ +// 0 1 2 3 4 5 6 7 + 0, 0, 0, 0, 0, 0, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, // 10 + 0, 0, 0, 0, 0, 0, 0, 0, // 20 + 0, 0, 0, 0, 0, 0, 0, 0, // 30 + N|P, 0, 0, 0, 0, 0, 0, P, // 40 + P, P, 0, P, P, P, P, P, // 50 + N|P, N|P, N|P, N|P, N|P, N|P, N|P, N|P, // 60 + N|P, N|P, P, 0, 0, P, 0, P, // 70 + 0, P, P, P, P, P, P, P, // 100 + P, P, P, P, P, P, P, P, // 110 + P, P, P, P, P, P, P, P, // 120 + P, P, P, 0, 0, 0, 0, 0, // 130 + 0, P, P, P, P, P, P, P, // 140 + P, P, P, P, P, P, P, P, // 150 + P, P, P, P, P, P, P, P, // 160 + P, P, P, 0, 0, 0, 0, 0, // 170 +]; + +type char_validator = fn (c: u8) bool; + +// Whether 'c' is valid in a NumericString +fn c_is_num(c: u8) bool = c & 0x80 == 0 && cclass[c] & N != 0; + +// Whether 'c' is valid in a PrintableString +fn c_is_print(c: u8) bool = c & 0x80 == 0 && cclass[c] & P != 0; + +fn c_is_ia5(c: u8) bool = c & 0x80 == 0; + +// Returns the number of bytes of the biggest complete utf8 chunk. Returns +// invalid, if the biggest complete chunk contains invalid utf8 characters. +fn validutf8(buf: []u8) (size | invalid) = { + if (len(buf) == 0) { + return 0z; + }; + + const min = if (len(buf) < 4) 0z else len(buf) - 4; + + let lastvalid = 0z; + let lastsz = 0z; + for (let i = min; i < len(buf); i += 1) { + match (utf8::utf8sz(buf[i])) { + case utf8::invalid => + yield; + case let s: size => + lastsz = s; + lastvalid = i; + }; + }; + + if (lastsz == 0) return invalid; + + const n = if (len(buf) - lastvalid == lastsz) len(buf) else lastvalid; + if (utf8::validate(buf[..n]) is utf8::invalid) { + return invalid; + }; + + return n; +}; + +@test fn validutf8() void = { + let b: [_]u8 = [ + 0x55, 0x56, 0xd0, 0x98, 0xe0, 0xa4, 0xb9, 0xf0, 0x90, 0x8d, 0x88 + ]; + const runesat: [_]size = [0, 1, 2, 2, 4, 4, 4, 7, 7, 7, 7, 8]; + + for (let i = 0z; i < len(b); i += 1) { + assert(validutf8(b[..i])! == runesat[i]); + }; + + b[10] = 0x55; + assert(validutf8(b[..10])! == 7); + assert(validutf8(b) is invalid); +}; + +// An io::stream reader that returns only valid utf8 chunks on read. +export type utf8stream = struct { + stream: io::stream, + d: *decoder, + strdec: *strdecoder, +}; + +const utf8stream_vtable = io::vtable { + reader = &utf8stream_reader, + ... +}; + +fn utf8stream_reader(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { + // at least a rune must fit in buf + assert(len(buf) >= 4); + let s = s: *utf8stream; + let cur = match (s.d.cur) { + case void => + abort(); + case let dh: head => + yield dh; + }; + + match (s.strdec(s, buf)?) { + case let n: size => + return n; + case io::EOF => + return io::EOF; + }; +}; + +export type strdecoder = fn( + s: *utf8stream, + buf: []u8, +) (size | io::EOF | io::error); + +fn no_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = + dataread(s.d, buf); + +fn char_decoder( + s: *utf8stream, buf: []u8, + v: *char_validator, +) (size | io::EOF | io::error) = { + let n = match (dataread(s.d, buf)?) { + case let n: size => + yield n; + case io::EOF => + return io::EOF; + }; + + for (let i = 0z; i < n; i += 1) { + if (!v(buf[i])) return wrap_err(invalid); + }; + return n; +}; + +fn num_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = + char_decoder(s, buf, &c_is_num); + +fn print_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = + char_decoder(s, buf, &c_is_print); + +fn ia5_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = + char_decoder(s, buf, &c_is_ia5); + +fn utf8_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = { + let n = 0z; + + n += match (dataread(s.d, buf)?) { + case let sz: size => + yield sz; + case io::EOF => + if (s.d.unbufn > 0) return wrap_err(invalid); + return io::EOF; + }; + + const max = match (validutf8(buf[..n])) { + case let s: size => + yield s; + case invalid => + return wrap_err(invalid); + }; + + if (max < n) { + if (dataeof(s.d)) { + // string ends with incomplete rune + return wrap_err(invalid); + }; + dataunread(s.d, buf[max..n]); + return max; + }; + + return n; +}; + +// A bmp string is an UTF-16 string. +fn bmp_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = { + const max = len(buf) - (len(buf) % 2); + + // TODO disallow control functions (X.690: 8.23.9) + + let n = 0z; + let rbuf: [2]u8 = [0...]; + for (true) { + match (dataread(s.d, rbuf)?) { + case let sz: size => + if (sz < 2) return wrap_err(invalid); + case io::EOF => + return if (n == 0) io::EOF else n; + }; + + let r = endian::begetu16(rbuf): rune; + let rb = utf8::encoderune(r); + if (len(buf) - n < len(rb)) { + dataunread(s.d, rbuf); + return n; + }; + + buf[n..n + len(rb)] = rb; + n += len(rb); + }; +}; + +// Universal string is an UTF32BE string. +fn universal_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = { + const max = len(buf) - (len(buf) % 4); + + let n = 0z; + let rbuf: [4]u8 = [0...]; + for (true) { + match (dataread(s.d, rbuf)?) { + case let sz: size => + if (sz < 4) return wrap_err(invalid); + case io::EOF => + return if (n == 0) io::EOF else n; + }; + + let r = endian::begetu32(rbuf): rune; + let rb = utf8::encoderune(r); + if (len(buf) - n < len(rb)) { + dataunread(s.d, rbuf); + return n; + }; + + buf[n..n + len(rb)] = rb; + n += len(rb); + }; +}; + +fn t61_decoder(s: *utf8stream, buf: []u8) (size | io::EOF | io::error) = { + let inbuf: [2]u8 = [0...]; + let in = inbuf[..0]; + + let n = 0z; + + for (true) { + let chr: [1]u8 = [0]; + match (dataread(s.d, chr)?) { + case let sz: size => + assert(sz == 1); + static append(in, chr[0]); + case io::EOF => + if (len(in) > 0) return wrap_err(invalid); + if (n > 0) return n; + return io::EOF; + }; + + match (t61_chardecode(in)) { + case let r: rune => + let raw = utf8::encoderune(r); + const bufremain = len(buf) - n; + if (len(raw) < bufremain) { + buf[n..n + len(raw)] = raw[..]; + n += len(raw); + in = inbuf[..0]; + } else { + dataunread(s.d, in); + break; + }; + case insufficient => + // leave combining char in in + yield; + case invalid => + return wrap_err(invalid); + }; + }; + + return n; +}; + +fn newstrreader(d: *decoder, t: utag) (utf8stream | error) = { + let strdec: *strdecoder = switch (t) { + case utag::NUMERIC_STRING => + yield &num_decoder; + case utag::PRINTABLE_STRING => + yield &print_decoder; + case utag::IA5_STRING => + yield &ia5_decoder; + case utag::UTF8_STRING => + yield &utf8_decoder; + case utag::TELETEX_STRING => + yield &t61_decoder; + case utag::BMP_STRING => + yield &bmp_decoder; + case utag::UNIVERSAL_STRING => + yield &universal_decoder; + case => + return invalid; + }; + + return utf8stream { + stream = &utf8stream_vtable, + d = d, + strdec = strdec, + ... + }; +}; + +// Returns an [[utf8stream]] for a supported utag 't', which is one of: +// * utag::NUMERIC_STRING +// * utag::PRINTABLE_STRING +// * utag::IA5_STRING +// * utag::UTF8_STRING +// * utag::TELETEX_STRING +// * utag::BMP_STRING +// * utag::UNIVERSAL_STRING +export fn strreader(d: *decoder, t: utag) (utf8stream | error) = { + let dh = next(d)?; + expect_utag(dh, t)?; + return newstrreader(d, t)!; +}; + +// Reads a printable string into 'buf'. +export fn read_printstr(d: *decoder, buf: []u8) (size | error) = { + let dh = next(d)?; + expect_utag(dh, utag::PRINTABLE_STRING)?; + + const n = read_bytes(d, buf)?; + + for (let i = 0z; i < n; i += 1) { + if (!c_is_print(buf[i])) { + return invalid; + }; + }; + return n; +}; + +// Reads an utf8 string into 'buf' and returns a str that borrows from buf. +export fn read_utf8str(d: *decoder, buf: []u8) (str | error) = { + let dh = next(d)?; + expect_utag(dh, utag::UTF8_STRING)?; + + let r = newstrreader(d, utag::UTF8_STRING)!; + let n = 0z; + + for (true) { + n += match (io::read(&r, buf[n..])) { + case let sz: size => + yield sz; + case io::EOF => + break; + case let e: io::error => + return unwrap_err(e); + }; + }; + + return strings::fromutf8(buf[..n])!; +}; + diff --git a/encoding/asn1/t61.ha b/encoding/asn1/t61.ha @@ -0,0 +1,534 @@ +// SPDX-License-Identifier: MPL-2.0 +// (c) Hare authors <https://harelang.org> + +// https://en.wikipedia.org/wiki/ITU_T.61 +const t61toascii: [_]u8 = [ +// 0 1 2 3 4 5 6 7 +// 8 9 a b c d e f + 0, 0, 0, 0, 0, 0, 0, 0, // 0 + 0, 0, 0x0a, 0, 0x0c, 0x0d, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, // 10 + 0, 0, 0x1a, 0x1b, 0, 0, 0, 0, // 10 + 0x20, 0x21, 0x22, 0, 0, 0x25, 0x26, 0x27, // 20 + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, // 20 + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, // 30 + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, // 30 + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, // 40 + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, // 40 + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, // 50 + 0x58, 0x59, 0x5a, 0x5b, 0, 0x5d, 0, 0x5f, // 50 + 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, // 60 + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, // 60 + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, // 70 + 0x78, 0x79, 0x7a, 0, 0x7c, 0, 0, 0, // 70 +]; + +const t61toutf8: [_]rune = [ + // 0x80 + '\u0000', '\u0000', '\u0000', '\u0000', + '\u0000', '\u0000', '\u0000', '\u0000', + '\u0000', '\u0000', '\u0000', '\u008b', + '\u008c', '\u0000', '\u0000', '\u0000', + + // 0x90 + '\u0000', '\u0000', '\u0000', '\u0000', + '\u0000', '\u0000', '\u0000', '\u0000', + '\u0000', '\u0000', '\u0000', '\u009b', + '\u0000', '\u0000', '\u0000', '\u0000', + + // 0xa0 + '\u00a0', '\u00a1', '\u00a2', '\u00a3', + '\u0024', '\u00a5', '\u0023', '\u00a7', + '\u00a4', '\u0000', '\u0000', '\u00ab', + '\u0000', '\u0000', '\u0000', '\u0000', + + // 0x0b + '\u00b0', '\u00b1', '\u00b2', '\u00b3', + '\u00d7', '\u00b5', '\u00b6', '\u00b7', + '\u00f7', '\u0000', '\u0000', '\u00bb', + '\u00bc', '\u00bd', '\u00be', '\u00bf', + + // 0xc0 + '\u0000', '\u0300', '\u0301', '\u0302', + '\u0303', '\u0304', '\u0306', '\u0307', + '\u0308', '\u0308', '\u030a', '\u0327', + '\u0332', '\u030b', '\u0328', '\u030c', + + // 0xd0 + '\u0000', '\u0000', '\u0000', '\u0000', + '\u0000', '\u0000', '\u0000', '\u0000', + '\u0000', '\u0000', '\u0000', '\u0000', + '\u0000', '\u0000', '\u0000', '\u0000', + + // 0xe0 + '\u2126', '\u00c6', '\u00d0', '\u00aa', + '\u0126', '\u0000', '\u0132', '\u013f', + '\u0141', '\u00d8', '\u0152', '\u00ba', + '\u00de', '\u0166', '\u014a', '\u0149', + + // 0xf0 + '\u0138', '\u00e6', '\u0111', '\u00f0', + '\u0127', '\u0131', '\u0133', '\u0140', + '\u0142', '\u00f8', '\u0153', '\u00df', + '\u00fe', '\u0167', '\u014b', '\u0000', +]; + +fn decode(out: []u8, in: []u8) void = { + for (let i = 0z; i < len(in); i += 1) { + const c = in[i]; + const r: rune = if (c & 0x80 != 0) { + // TODO special cases + yield t61toutf8[c - 0x80]; + } else { + const c = t61toascii[in[i]]; + yield c: u32: rune; + }; + + // write r to out + }; + return; +}; + +export type insufficient = !void; + +export fn t61_chardecode(in: []u8) (rune | insufficient | invalid) = { + // 'in' is either one char or two if first is a combining character. + if (len(in) == 2) { + return t61_combine(in); + }; + + const in = in[0]; + + if (in & 0x80 == 0) { + const r = t61toascii[in]; + return if (r == 0) invalid else r: u32: rune; + }; + + const c = t61toutf8[in - 0x80]; + if (c == '\u0000') { + return invalid; + }; + + if (in == 0xcc) { + return invalid; + }; + if (in > 0xc0 && in <= 0xcf) { + return insufficient; + }; + + return c; +}; + +fn t61_combine(in: []u8) (rune | invalid) = { + const comb = in[0]; + const in = in[1]; + switch (comb) { + case 0xc1 => + switch (in: u32: rune) { + case 'A' => + return '\u00c0'; + case 'E' => + return '\u00c8'; + case 'I' => + return '\u00cc'; + case 'O' => + return '\u00d2'; + case 'U' => + return '\u00d9'; + case 'a' => + return '\u00e0'; + case 'e' => + return '\u00e8'; + case 'i' => + return '\u00ec'; + case 'o' => + return '\u00f2'; + case 'u' => + return '\u00f9'; + case => + return invalid; + }; + case 0xc2 => + switch (in: u32: rune) { + case 'A' => + return '\u00c1'; + case 'C' => + return '\u0106'; + case 'E' => + return '\u00c9'; + case 'I' => + return '\u00cd'; + case 'L' => + return '\u0139'; + case 'N' => + return '\u0143'; + case 'O' => + return '\u00d3'; + case 'R' => + return '\u0154'; + case 'S' => + return '\u015a'; + case 'U' => + return '\u00da'; + case 'Y' => + return '\u00dd'; + case 'Z' => + return '\u0179'; + case 'a' => + return '\u00e1'; + case 'c' => + return '\u0107'; + case 'e' => + return '\u00e9'; + case 'g' => + return '\u0123'; + case 'i' => + return '\u00ed'; + case 'l' => + return '\u013a'; + case 'n' => + return '\u0144'; + case 'o' => + return '\u00f3'; + case 'r' => + return '\u0155'; + case 's' => + return '\u015b'; + case 'u' => + return '\u00fa'; + case 'y' => + return '\u00fd'; + case 'z' => + return '\u017a'; + case => + return invalid; + }; + case 0xc3 => + switch (in: u32: rune) { + case 'A' => + return '\u00c2'; + case 'C' => + return '\u0108'; + case 'E' => + return '\u00ca'; + case 'G' => + return '\u011c'; + case 'H' => + return '\u0124'; + case 'I' => + return '\u00ce'; + case 'J' => + return '\u0134'; + case 'O' => + return '\u00d4'; + case 'S' => + return '\u015c'; + case 'U' => + return '\u00db'; + case 'W' => + return '\u0174'; + case 'Y' => + return '\u0176'; + case 'a' => + return '\u00e2'; + case 'c' => + return '\u0109'; + case 'e' => + return '\u00ea'; + case 'g' => + return '\u011d'; + case 'h' => + return '\u0125'; + case 'i' => + return '\u00ee'; + case 'j' => + return '\u0135'; + case 'o' => + return '\u00f4'; + case 's' => + return '\u015d'; + case 'u' => + return '\u00fb'; + case 'w' => + return '\u0175'; + case 'y' => + return '\u0177'; + case => + return invalid; + }; + case 0xc4 => + switch (in: u32: rune) { + case 'A' => + return '\u00c3'; + case 'I' => + return '\u0128'; + case 'N' => + return '\u00d1'; + case 'O' => + return '\u00d5'; + case 'U' => + return '\u0168'; + case 'a' => + return '\u00e3'; + case 'i' => + return '\u0129'; + case 'n' => + return '\u00f1'; + case 'o' => + return '\u00f5'; + case 'u' => + return '\u0169'; + case => + return invalid; + }; + case 0xc5 => + switch (in: u32: rune) { + case 'A' => + return '\u0100'; + case 'E' => + return '\u0112'; + case 'I' => + return '\u012a'; + case 'O' => + return '\u014c'; + case 'U' => + return '\u016a'; + case 'a' => + return '\u0101'; + case 'e' => + return '\u0113'; + case 'i' => + return '\u012b'; + case 'o' => + return '\u014d'; + case 'u' => + return '\u016b'; + case => + return invalid; + }; + case 0xc6 => + switch (in: u32: rune) { + case 'A' => + return '\u0102'; + case 'G' => + return '\u011e'; + case 'U' => + return '\u016c'; + case 'a' => + return '\u0103'; + case 'g' => + return '\u011f'; + case 'u' => + return '\u016d'; + case => + return invalid; + }; + case 0xc7 => + switch (in: u32: rune) { + case 'C' => + return '\u010a'; + case 'E' => + return '\u0116'; + case 'G' => + return '\u0120'; + case 'I' => + return '\u0130'; + case 'Z' => + return '\u017b'; + case 'c' => + return '\u010b'; + case 'e' => + return '\u0117'; + case 'g' => + return '\u0121'; + case 'z' => + return '\u017c'; + case => + return invalid; + }; + case 0xc8 => + switch (in: u32: rune) { + case 'A' => + return '\u00c4'; + case 'E' => + return '\u00cb'; + case 'I' => + return '\u00cf'; + case 'O' => + return '\u00d6'; + case 'U' => + return '\u00dc'; + case 'Y' => + return '\u0178'; + case 'a' => + return '\u00e4'; + case 'e' => + return '\u00eb'; + case 'i' => + return '\u00ef'; + case 'o' => + return '\u00f6'; + case 'u' => + return '\u00fc'; + case 'y' => + return '\u00ff'; + case => + return invalid; + }; + case 0xc9 => + switch (in: u32: rune) { + case 'A' => + return '\u00c4'; + case 'E' => + return '\u00cb'; + case 'I' => + return '\u00cf'; + case 'O' => + return '\u00d6'; + case 'U' => + return '\u00dc'; + case 'Y' => + return '\u0178'; + case 'a' => + return '\u00e4'; + case 'e' => + return '\u00eb'; + case 'i' => + return '\u00ef'; + case 'o' => + return '\u00f6'; + case 'u' => + return '\u00fc'; + case 'y' => + return '\u00ff'; + case => + return invalid; + }; + case 0xca => + switch (in: u32: rune) { + case 'A' => + return '\u00c5'; + case 'U' => + return '\u016e'; + case 'a' => + return '\u00e5'; + case 'u' => + return '\u016f'; + case => + return invalid; + }; + case 0xcb => + switch (in: u32: rune) { + case 'C' => + return '\u00c7'; + case 'G' => + return '\u0122'; + case 'K' => + return '\u0136'; + case 'L' => + return '\u013b'; + case 'N' => + return '\u0145'; + case 'R' => + return '\u0156'; + case 'S' => + return '\u015e'; + case 'T' => + return '\u0162'; + case 'c' => + return '\u00e7'; + case 'k' => + return '\u0137'; + case 'l' => + return '\u013c'; + case 'n' => + return '\u0146'; + case 'r' => + return '\u0157'; + case 's' => + return '\u015f'; + case 't' => + return '\u0163'; + case => + return invalid; + }; + case 0xcd => + switch (in: u32: rune) { + case 'O' => + return '\u0150'; + case 'U' => + return '\u0170'; + case 'o' => + return '\u0151'; + case 'u' => + return '\u0171'; + case => + return invalid; + }; + case 0xce => + switch (in: u32: rune) { + case 'A' => + return '\u0104'; + case 'E' => + return '\u0118'; + case 'I' => + return '\u012e'; + case 'U' => + return '\u0172'; + case 'a' => + return '\u0105'; + case 'e' => + return '\u0119'; + case 'i' => + return '\u012f'; + case 'u' => + return '\u0173'; + case => + return invalid; + }; + case 0xCf => + switch (in: u32: rune) { + case 'C' => + return '\u010c'; + case 'D' => + return '\u010e'; + case 'E' => + return '\u011a'; + case 'L' => + return '\u013d'; + case 'N' => + return '\u0147'; + case 'R' => + return '\u0158'; + case 'S' => + return '\u0160'; + case 'T' => + return '\u0164'; + case 'Z' => + return '\u017d'; + case 'c' => + return '\u010d'; + case 'd' => + return '\u010f'; + case 'e' => + return '\u011b'; + case 'l' => + return '\u013e'; + case 'n' => + return '\u0148'; + case 'r' => + return '\u0159'; + case 's' => + return '\u0161'; + case 't' => + return '\u0165'; + case 'z' => + return '\u017e'; + case => + return invalid; + }; + case => + return invalid; + }; +}; +