hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit b5d53a781b5512014e3d21f076e89b1208e2aeca
parent dd820bc807337a36e40d0d7a67d8982ac91b556e
Author: Armin Preiml <apreiml@strohwolke.at>
Date:   Wed,  7 Feb 2024 19:53:31 +0100

asn1: encoder

Signed-off-by: Armin Preiml <apreiml@strohwolke.at>

Diffstat:
Aencoding/asn1/+test/encoder_test.ha | 147+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aencoding/asn1/encoder.ha | 422+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 569 insertions(+), 0 deletions(-)

diff --git a/encoding/asn1/+test/encoder_test.ha b/encoding/asn1/+test/encoder_test.ha @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: MPL-2.0 +// (c) Hare authors <https://harelang.org> + +use bufio; +use bytes; +use errors; +use io; +use memio; +use os; +use strings; +use time::date; +use types; + + +@test fn write_id() void = { + let buf = memio::dynamic(); + let e = derencoder(&buf); + + write_fixedprim(&e, class::UNIVERSAL, 0x2aa, [0x00])!; + encode(&e)!; + assert(bytes::equal([0x1f, 0x85, 0x2a, 0x01, 0x00], + memio::buffer(&buf))); + + io::seek(&buf, 0, io::whence::SET)!; + let d = derdecoder(&buf); + let h = peek(&d)!; + assert(h.tagid == 0x2aa); + + let buf = memio::dynamic(); + let e = derencoder(&buf); + + write_fixedprim(&e, class::UNIVERSAL, types::U32_MAX, [0x00])!; + encode(&e)!; + assert(bytes::equal([0x1f, 0x8f, 0xff, 0xff, 0xff, 0x7f, 0x01, 0x00], + memio::buffer(&buf))); + + io::seek(&buf, 0, io::whence::SET)!; + let d = derdecoder(&buf); + let h = peek(&d)!; + assert(h.tagid == types::U32_MAX); +}; + +@test fn write_prim() void = { + let buf = memio::dynamic(); + defer io::close(&buf)!; + let dest = memio::dynamic(); + defer io::close(&dest)!; + + let enc = derencoder(&buf); + + create_prim(&enc, class::UNIVERSAL, utag::INTEGER)!; + write(&enc, [0x01, 0x05, 0x07])!; + finish_prim(&enc); + + assert(encodeto(&enc, &dest)! == 5); + + assert(bytes::equal(memio::buffer(&dest), [ + 0x02, 0x03, 0x01, 0x05, 0x07 + ])); +}; + +@test fn encode_dsz() void = { + assert(bytes::equal([0x7f], encode_dsz(0x7f))); + assert(bytes::equal([0x81, 0x8f], encode_dsz(0x8f))); + assert(bytes::equal([0x81, 0xff], encode_dsz(0xff))); + assert(bytes::equal([0x82, 0x01, 0x00], encode_dsz(0x100))); +}; + +@test fn write_seq() void = { + let buf = memio::dynamic(); + defer io::close(&buf)!; + let dest = memio::dynamic(); + defer io::close(&dest)!; + + let enc = derencoder(&buf); + + create_seq(&enc)!; + write_bool(&enc, false)!; + create_seq(&enc)!; + write_int(&enc, [0x01, 0x02, 0x03])!; + finish_seq(&enc); + finish_seq(&enc); + assert(encodeto(&enc, &dest)! == 12); + + assert(bytes::equal(memio::buffer(&dest), [ + 0x30, 0x0a, // seq + 0x01, 0x01, 0x00, // bool + 0x30, 0x05, // seq + 0x02, 0x03, 0x01, 0x02, 0x03, // int + ])); +}; + +@test fn write_bool() void = { + let dest = memio::dynamic(); + defer io::close(&dest)!; + + let buf = memio::dynamic(); + defer io::close(&buf)!; + let enc = derencoder(&buf); + + write_bool(&enc, true)!; + encodeto(&enc, &dest)!; + + assert(bytes::equal(memio::buffer(&dest), [0x01, 0x01, 0xff])); +}; + +@test fn write_int() void = { + let dest = memio::dynamic(); + defer io::close(&dest)!; + let buf = memio::dynamic(); + defer io::close(&buf)!; + + let enc = derencoder(&buf); + + write_int(&enc, [0x00, 0x00, 0x00, 0x00, 0x80])!; + encodeto(&enc, &dest)!; + + assert(bytes::equal(memio::buffer(&dest), [0x02, 0x02, 0x00, 0x80])); + + memio::reset(&dest); + memio::reset(&buf); + let enc = derencoder(&buf); + + write_int(&enc, [0xff, 0xff, 0xff, 0x80, 0x10])!; + encodeto(&enc, &dest)!; + + assert(bytes::equal(memio::buffer(&dest), [0x02, 0x02, 0x80, 0x10])); + + memio::reset(&dest); + memio::reset(&buf); + let enc = derencoder(&buf); + + write_int(&enc, [0x00, 0x00, 0x00])!; + encodeto(&enc, &dest)!; + + assert(bytes::equal(memio::buffer(&dest), [0x02, 0x01, 0x00])); + + memio::reset(&dest); + memio::reset(&buf); + let enc = derencoder(&buf); + + write_uint(&enc, [0x8f, 0x01])!; + encodeto(&enc, &dest)!; + + assert(bytes::equal(memio::buffer(&dest), [0x02, 0x03, 0x00, 0x8f, 0x01])); +}; + diff --git a/encoding/asn1/encoder.ha b/encoding/asn1/encoder.ha @@ -0,0 +1,422 @@ +// SPDX-License-Identifier: MPL-2.0 +// (c) Hare authors <https://harelang.org> + +use bufio; +use bytes; +use endian; +use errors; +use io; +use math::{bit_size_u8,bit_size_u32}; +use memio; +use strings; +use time::date; +use types; + + +export type datasz = u32; // XXX: might want to use size here +let szput = &endian::beputu32; +let szget = &endian::begetu32; +def DATASZ_MAX = types::U32_MAX; + +// The maximum header size possible for u32 tag ids. +export def MAXHEADSZ = 1 + 5 + 1 + size(datasz); + +// The maximum header size possible for entries of [[utag]]. +export def MAXUTAGHEADSZ = 1 + 1 + size(datasz); + +export type encoder = struct { + mem: *memio::stream, + start: io::off, + pos: size, + bt: [MAX_CONS_DEPTH](size, datasz), + btn: size, + + cur_dpos: size, + cur_prim: bool, + cur_fixed: bool, + + parent: nullable *bytewstream, +}; + +// Creates a DER encoder. create_* methods are used to create constructed +// values. Functions to write primitive values start with write_ or end with +// writer. After the entries have been written, the result is encoded using +// [[encode]] or [[encodeto]]. +// +// 'mem' is required to buffer the written data before encoding it.Each entry +// will have an maximum overhead of [[MAXUTAGHEADSZ]], if entries are written +// using only methods provided here; or [[MAXHEADSZ]], if custom tag ids are in +// use. The encoder doesn't close after use 'mem', hence it's the caller's +// responsibility manage its lifetime. +// +// 'mem' as memio::stream allows the caller to decide whether to use a static or +// a dynamic allocated buffer. +export fn derencoder(mem: *memio::stream) encoder = encoder { + mem = mem, + start = io::tell(mem)!, + ... +}; + +// Creates a DER encoder that is nested within another DER entry and hence can +// use the buffer of the parent. +export fn derencoder_nested(b: *bytewstream) encoder = encoder { + mem = b.e.mem, + start = io::tell(b.e.mem)!, + parent = b, + ... +}; + +fn write(e: *encoder, buf: []u8) (void | overflow) = { + if (len(buf) > (DATASZ_MAX - e.pos)) return overflow; + + match (io::write(e.mem, buf)) { + case let n: size => + if (n < len(buf)) { + // short writes happen, if a fixed e.mem reaches its end + return overflow; + }; + case errors::overflow => + return overflow; + case => + // writing to mem does not throw any other errors + abort(); + }; + e.pos += len(buf); +}; + +fn write_id(e: *encoder, c: class, t: u32, cons: bool) (void | overflow) = { + let head: u8 = c << 6; + if (cons) { + head |= (1 << 5); + }; + + if (t < 31) { + bt_add_sz(e, 1); + return write(e, [head | t: u8]); + }; + + write(e, [head | 0x1f])?; + + const bsz = bit_size_u32(t); + const n = ((bsz + 6) / 7) - 1; + for (let i = 0z; i < n; i += 1) { + write(e, [0x80 | (t >> ((n - i) * 7)): u8])?; + }; + write(e, [t: u8 & 0x7f])?; +}; + +fn write_fixedprim(e: *encoder, c: class, t: u32, b: []u8) (void | overflow) = { + if (e.cur_prim) { + finish_prim(e); + }; + + e.cur_prim = true; + e.cur_fixed = true; + write_id(e, c, t, false)?; + + write(e, encode_dsz(len(b)))?; + write(e, b)?; + + bt_add_dsz(e, len(b): datasz); +}; + +fn create_prim(e: *encoder, class: class, tag: u32) (void | overflow) = { + if (e.cur_prim) { + finish_prim(e); + }; + + e.cur_prim = true; + e.cur_fixed = false; + + write_id(e, class, tag, false)?; + + // write size placeholder + const placehsz = 0x80 | size(datasz): u8; + let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...]; + write(e, lbuf)?; + + e.cur_dpos = e.pos; +}; + +fn finish_prim(e: *encoder) void = { + e.cur_prim = false; + if (e.pos == 0 || e.cur_fixed) { + return; + }; + + const pos = io::tell(e.mem)!; + defer io::seek(e.mem, pos, io::whence::SET)!; + + // write back size to placeholder + const dszpos = e.start: size + e.cur_dpos - size(datasz); + const dsz = e.pos - e.cur_dpos; + let dszbuf: [size(datasz)]u8 = [0...]; + szput(dszbuf, dsz: datasz); + + io::seek(e.mem, dszpos: io::off, io::whence::SET)!; + io::write(e.mem, dszbuf)!; + + bt_add_dsz(e, dsz: datasz); +}; + +// Push n empty size value to backtrace stack +fn push_bt(e: *encoder, pos: size) (void | overflow) = { + if (e.btn + 1 >= len(e.bt)) return overflow; + + e.bt[e.btn] = (pos, 0); + e.btn += 1; +}; + +// Add 'sz' to the current value of the backtrack stack +fn bt_add_sz(e: *encoder, sz: size) void = { + if (e.btn == 0) return; + const csz = e.bt[e.btn - 1].1; + e.bt[e.btn - 1].1 = csz + sz: datasz; +}; + +// Add data size 'sz' + size length to current value of the backtrack stack +fn bt_add_dsz(e: *encoder, sz: datasz) void = { + if (e.btn == 0) return; + const lsz = lensz(sz); + return bt_add_sz(e, lsz + sz); +}; + +// Pop current backtrace value from stack +fn pop_bt(e: *encoder) (size, datasz) = { + e.btn -= 1; + let x = e.bt[e.btn]; + e.bt[e.btn] = (0, 0); + return x; +}; + +fn lensz(l: datasz) u8 = if (l < 128) 1: u8 else (1 + (bit_size_u32(l) + 7) / 8); + +fn encode_dsz(sz: size) []u8 = { + static let buf: [size(datasz) + 1]u8 = [0...]; + if (sz < 128) { + buf[0] = sz: u8; + return buf[..1]; + }; + + let n = lensz(sz: datasz); + buf[0] = (n - 1) | 0x80; + for (let i: size = n - 1; sz > 0; i -= 1) { + buf[i] = sz: u8; + sz >>= 8; + }; + + return buf[..n]; +}; + +// Creates an explicit constructed entry. [[finish_explicit]] must be called +// to close the entry. +export fn create_explicit(e: *encoder, c: class, tag: u32) (void | overflow) = + create_cons(e, c, tag); + +// Finishes an explicit constructed entry. +export fn finish_explicit(e: *encoder) void = finish_cons(e); + +fn create_cons(e: *encoder, class: class, tagid: u32) (void | overflow) = { + if (e.cur_prim) { + finish_prim(e); + }; + write_id(e, class, tagid, true)?; + + const placehsz = 0x80 | size(datasz): u8; + let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...]; + write(e, lbuf)?; + + push_bt(e, e.pos - size(datasz))?; + return; +}; + +fn finish_cons(e: *encoder) void = { + if (e.cur_prim) { + finish_prim(e); + }; + + let (dszpos, sz) = pop_bt(e); + let lbuf: [size(datasz)]u8 = [0...]; + szput(lbuf, sz); + + const pos = io::tell(e.mem)!; + defer io::seek(e.mem, pos, io::whence::SET)!; + + dszpos += e.start: size; + io::seek(e.mem, dszpos: io::off, io::whence::SET)!; + io::write(e.mem, lbuf)!; + bt_add_dsz(e, sz); +}; + +// Creates a sequence. [[finish_seq]] must be called to close it. +export fn create_seq(e: *encoder) (void | overflow) = + return create_cons(e, class::UNIVERSAL, utag::SEQUENCE); + +// Finishes a sequence. +export fn finish_seq(e: *encoder) void = finish_cons(e); + +// Writes a boolean. +export fn write_bool(e: *encoder, b: bool) (void | overflow) = { + let v: u8 = if (b) 0xff else 0x00; + write_fixedprim(e, class::UNIVERSAL, utag::BOOLEAN, [v])?; +}; + +// Writes a null value. +export fn write_null(e: *encoder) (void | overflow) = { + write_fixedprim(e, class::UNIVERSAL, utag::NULL, [])?; +}; + +export type bytewstream = struct { + stream: io::stream, + e: *encoder, +}; + +fn bytewriter(e: *encoder, c: class, tagid: u32) (bytewstream | overflow) = { + create_prim(e, c, tagid)?; + return bytewstream { + stream = &bytewriter_vtable, + e = e, + ... + }; +}; + +const bytewriter_vtable = io::vtable { + writer = &bytewriter_write, + ... +}; + +fn bytewriter_write(s: *io::stream, buf: const []u8) (size | io::error) = { + let w = s: *bytewstream; + if (write(w.e, buf) is overflow) { + return wrap_err(overflow); + }; + return len(buf); +}; + +// Creates a io::writer that adds written bytes as OctetString. +export fn octetstrwriter(e: *encoder) (bytewstream | overflow) = { + return bytewriter(e, class::UNIVERSAL, utag::OCTET_STRING); +}; + +// Writes an integer. 'n' must be stored in big endian order. The highest bit of +// the first byte marks the sign. +export fn write_int(e: *encoder, n: []u8) (void | overflow) = { + const neg = n[0] & 0x80 == 0x80; + + // compact according to X.690 Chapt. 8.3.2 + let i = 0z; + for (i < len(n) - 1; i += 1) { + if (neg && (n[i] != 0xff || n[i+1] & 0x80 != 0x80)) { + break; + }; + + if (!neg && (n[i] != 0x00 || n[i+1] & 0x80 == 0x80)) { + break; + }; + }; + + write_fixedprim(e, class::UNIVERSAL, utag::INTEGER, n[i..])?; +}; + +// Writes an integer asuming 'n' is unsigned. +export fn write_uint(e: *encoder, n: []u8) (void | overflow) = { + if (n[0] & 0x80 == 0) { + return write_int(e, n); + }; + + // prepend 0 so that the highest valued bit is not interpreted as sign + create_prim(e, class::UNIVERSAL, utag::INTEGER)?; + write(e, [0])?; + write(e, n)?; + finish_prim(e); +}; + +// Writes 's' as Utf8String. +export fn write_utf8str(e: *encoder, s: str) (void | overflow) = + write_fixedprim(e, class::UNIVERSAL, utag::UTF8_STRING, + strings::toutf8(s))?; + +// Encodes currently written data in given memio stream and returns the buffer +// containing the result borrowed from 'mem' provided for [[derencoder]]. +export fn encode(e: *encoder) ([]u8 | io::error) = { + assert(e.btn == 0); + assert(e.start >= 0); + + if (e.cur_prim) { + finish_prim(e); + }; + + let n = 0z; + let buf = memio::buffer(e.mem)[e.start..]; + + // iterate entries to minify tag ids and data sizes. 't' is the write + // index and 'i' is the read index. + let t = 0z; + for (let i = 0z; i < e.pos) { // TODO cast seems off + // encode id + const id = buf[i]; + buf[t] = id; + t += 1; + i += 1; + + const cons = (id >> 5) & 1 == 1; + if ((id & 0b11111) == 0b11111) { + // id spans multiple bytes + let id: u8 = 0x80; + for (id & 0x80 == 0x80) { + id = buf[i]; + buf[t] = id; + t += 1; + i += 1; + }; + }; + + // encode dsz + let dsz: datasz = 0; + let l = buf[i]; + i += 1; + if (l < 128) { + // data size fits in a single byte + dsz = l; + buf[t] = l; + t += 1; + } else { + // decode multibyte size and minimize, since not all + // placeholder bytes may have been used. + const dn = l & 0x7f; + for (let j = 0z; j < dn; j += 1) { + dsz <<= 8; + dsz |= buf[i]; + i += 1; + }; + + let dszbuf = encode_dsz(dsz); + buf[t..t + len(dszbuf)] = dszbuf; + t += len(dszbuf); + }; + + if (cons) { + continue; + }; + + // write data of primitive fields + buf[t..t+dsz] = buf[i..i+dsz]; + t += dsz; + i += dsz; + }; + + bytes::zero(buf[t..]); + match (e.parent) { + case null => + yield; + case let s: *bytewstream => + s.e.pos += t; + }; + return buf[..t]; +}; + +// Encodes written data and writes it to 'dest'. +export fn encodeto(e: *encoder, dest: io::handle) (size | io::error) = { + const buf = encode(e)?; + return io::writeall(dest, buf)?; +};