hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

encoder.ha (10481B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use bytes;
      5 use endian;
      6 use errors;
      7 use io;
      8 use math::{bit_size};
      9 use memio;
     10 use strings;
     11 use types;
     12 
     13 
     14 export type datasz = u32; // XXX: might want to use size here
     15 let szput = &endian::beputu32;
     16 let szget = &endian::begetu32;
     17 def DATASZ_MAX = types::U32_MAX;
     18 
     19 // The maximum header size possible for u32 tag ids.
     20 export def MAXHEADSZ = 1 + 5 + 1 + size(datasz);
     21 
     22 // The maximum header size possible for entries of [[utag]].
     23 export def MAXUTAGHEADSZ = 1 + 1 + size(datasz);
     24 
     25 export type encoder = struct {
     26 	mem: *memio::stream,
     27 	start: io::off,
     28 	pos: size,
     29 	bt: [MAX_CONS_DEPTH](size, datasz),
     30 	btn: size,
     31 
     32 	cur_dpos: size,
     33 	cur_prim: bool,
     34 	cur_fixed: bool,
     35 
     36 	parent: nullable *bytewstream,
     37 };
     38 
     39 // Creates a new DER encoder. The user must provide a [[memio::stream]] for
     40 // buffering data before it's encoded. The user may provide a dynamic or fixed
     41 // stream at their discretion; fixed may be preferred if the user knows the
     42 // required buffer size in advance.
     43 //
     44 // To encode DER data, the user must call one of the "create_" functions (e.g.
     45 // [[create_explicit]]), followed by the appropriate "write_" functions (e.g.
     46 // [[write_int]]). These operations will be buffered into the provided memio
     47 // buffer, and the encoded form may be finalized and retrieved via [[encode]] or
     48 // [[encodeto]].
     49 //
     50 // To determine the required buffer size for a fixed buffer, consider the
     51 // maximum length of the input data (e.g. integer, string, etc length) plus the
     52 // necessary overhead, which is given by [[MAXUTAGHEADSZ]] if only using the
     53 // provided encoder functions (e.g. "write_" functions), or [[MAXHEADSZ]] if
     54 // using custom tag IDs.
     55 //
     56 // The encoder does not close the provided [[memio::stream]] after use; the
     57 // caller should manage its lifetime accordingly.
     58 export fn derencoder(mem: *memio::stream) encoder = encoder {
     59 	mem = mem,
     60 	start = io::tell(mem)!,
     61 	...
     62 };
     63 
     64 // Creates a DER encoder nested within another DER entry, using the buffer of
     65 // the parent.
     66 export fn derencoder_nested(b: *bytewstream) encoder = encoder {
     67 	mem = b.e.mem,
     68 	start = io::tell(b.e.mem)!,
     69 	parent = b,
     70 	...
     71 };
     72 
     73 fn write(e: *encoder, buf: []u8) (void | overflow) = {
     74 	if (len(buf) > (DATASZ_MAX - e.pos)) return overflow;
     75 
     76 	match (io::write(e.mem, buf)) {
     77 	case let n: size =>
     78 		if (n < len(buf)) {
     79 			// short writes happen, if a fixed e.mem reaches its end
     80 			return overflow;
     81 		};
     82 	case errors::overflow =>
     83 		return overflow;
     84 	case =>
     85 		 // writing to mem does not throw any other errors
     86 		abort();
     87 	};
     88 	e.pos += len(buf);
     89 };
     90 
     91 fn write_id(e: *encoder, c: class, t: u32, cons: bool) (void | overflow) = {
     92 	let head: u8 = c << 6;
     93 	if (cons) {
     94 		head |= (1 << 5);
     95 	};
     96 
     97 	if (t < 31) {
     98 		bt_add_sz(e, 1);
     99 		return write(e, [head | t: u8]);
    100 	};
    101 
    102 	write(e, [head | 0x1f])?;
    103 
    104 	const bsz = bit_size(t);
    105 	const n = ((bsz + 6) / 7) - 1;
    106 	for (let i = 0z; i < n; i += 1) {
    107 		write(e, [0x80 | (t >> ((n - i) * 7)): u8])?;
    108 	};
    109 	write(e, [t: u8 & 0x7f])?;
    110 };
    111 
    112 fn write_fixedprim(e: *encoder, c: class, t: u32, b: []u8) (void | overflow) = {
    113 	if (e.cur_prim) {
    114 		finish_prim(e);
    115 	};
    116 
    117 	e.cur_prim = true;
    118 	e.cur_fixed = true;
    119 	write_id(e, c, t, false)?;
    120 
    121 	write(e, encode_dsz(len(b)))?;
    122 	write(e, b)?;
    123 
    124 	bt_add_dsz(e, len(b): datasz);
    125 };
    126 
    127 fn create_prim(e: *encoder, class: class, tag: u32) (void | overflow) = {
    128 	if (e.cur_prim) {
    129 		finish_prim(e);
    130 	};
    131 
    132 	e.cur_prim = true;
    133 	e.cur_fixed = false;
    134 
    135 	write_id(e, class, tag, false)?;
    136 
    137 	// write size placeholder
    138 	const placehsz = 0x80 | size(datasz): u8;
    139 	let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...];
    140 	write(e, lbuf)?;
    141 
    142 	e.cur_dpos = e.pos;
    143 };
    144 
    145 fn finish_prim(e: *encoder) void = {
    146 	e.cur_prim = false;
    147 	if (e.pos == 0 || e.cur_fixed) {
    148 		return;
    149 	};
    150 
    151 	const pos = io::tell(e.mem)!;
    152 	defer io::seek(e.mem, pos, io::whence::SET)!;
    153 
    154 	// write back size to placeholder
    155 	const dszpos = e.start: size + e.cur_dpos - size(datasz);
    156 	const dsz = e.pos - e.cur_dpos;
    157 	let dszbuf: [size(datasz)]u8 = [0...];
    158 	szput(dszbuf, dsz: datasz);
    159 
    160 	io::seek(e.mem, dszpos: io::off, io::whence::SET)!;
    161 	io::write(e.mem, dszbuf)!;
    162 
    163 	bt_add_dsz(e, dsz: datasz);
    164 };
    165 
    166 // Push n empty size value to backtrace stack
    167 fn push_bt(e: *encoder, pos: size) (void | overflow) = {
    168 	if (e.btn + 1 >= len(e.bt)) return overflow;
    169 
    170 	e.bt[e.btn] = (pos, 0);
    171 	e.btn += 1;
    172 };
    173 
    174 // Add 'sz' to the current value of the backtrack stack
    175 fn bt_add_sz(e: *encoder, sz: size) void = {
    176 	if (e.btn == 0) return;
    177 	const csz = e.bt[e.btn - 1].1;
    178 	e.bt[e.btn - 1].1 = csz + sz: datasz;
    179 };
    180 
    181 // Add data size 'sz' + size length to current value of the backtrack stack
    182 fn bt_add_dsz(e: *encoder, sz: datasz) void = {
    183 	if (e.btn == 0) return;
    184 	const lsz = lensz(sz);
    185 	return bt_add_sz(e, lsz + sz);
    186 };
    187 
    188 // Pop current backtrace value from stack
    189 fn pop_bt(e: *encoder) (size, datasz) = {
    190 	e.btn -= 1;
    191 	let x = e.bt[e.btn];
    192 	e.bt[e.btn] = (0, 0);
    193 	return x;
    194 };
    195 
    196 fn lensz(l: datasz) u8 = if (l < 128) 1: u8 else (1 + (bit_size(l) + 7) / 8);
    197 
    198 fn encode_dsz(sz: size) []u8 = {
    199 	static let buf: [size(datasz) + 1]u8 = [0...];
    200 	if (sz < 128) {
    201 		buf[0] = sz: u8;
    202 		return buf[..1];
    203 	};
    204 
    205 	let n = lensz(sz: datasz);
    206 	buf[0] = (n - 1) | 0x80;
    207 	for (let i: size = n - 1; sz > 0; i -= 1) {
    208 		buf[i] = sz: u8;
    209 		sz >>= 8;
    210 	};
    211 
    212 	return buf[..n];
    213 };
    214 
    215 // Creates an explicit constructed entry. The user must call [[finish_explicit]]
    216 // to close the associated DER entry.
    217 export fn create_explicit(e: *encoder, c: class, tag: u32) (void | overflow) =
    218 	create_cons(e, c, tag);
    219 
    220 // Finishes an explicit constructed entry.
    221 export fn finish_explicit(e: *encoder) void = finish_cons(e);
    222 
    223 fn create_cons(e: *encoder, class: class, tagid: u32) (void | overflow) = {
    224 	if (e.cur_prim) {
    225 		finish_prim(e);
    226 	};
    227 	write_id(e, class, tagid, true)?;
    228 
    229 	const placehsz = 0x80 | size(datasz): u8;
    230 	let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...];
    231 	write(e, lbuf)?;
    232 
    233 	push_bt(e, e.pos - size(datasz))?;
    234 	return;
    235 };
    236 
    237 fn finish_cons(e: *encoder) void = {
    238 	if (e.cur_prim) {
    239 		finish_prim(e);
    240 	};
    241 
    242 	let (dszpos, sz) = pop_bt(e);
    243 	let lbuf: [size(datasz)]u8 = [0...];
    244 	szput(lbuf, sz);
    245 
    246 	const pos = io::tell(e.mem)!;
    247 	defer io::seek(e.mem, pos, io::whence::SET)!;
    248 
    249 	dszpos += e.start: size;
    250 	io::seek(e.mem, dszpos: io::off, io::whence::SET)!;
    251 	io::write(e.mem, lbuf)!;
    252 	bt_add_dsz(e, sz);
    253 };
    254 
    255 // Creates a sequence. The user must call [[finish_seq]] to close the associated
    256 // DER entry.
    257 export fn create_seq(e: *encoder) (void | overflow) =
    258 	return create_cons(e, class::UNIVERSAL, utag::SEQUENCE);
    259 
    260 // Finishes a sequence.
    261 export fn finish_seq(e: *encoder) void = finish_cons(e);
    262 
    263 // Writes a boolean.
    264 export fn write_bool(e: *encoder, b: bool) (void | overflow) = {
    265 	let v: u8 = if (b) 0xff else 0x00;
    266 	write_fixedprim(e, class::UNIVERSAL, utag::BOOLEAN, [v])?;
    267 };
    268 
    269 // Writes a null value.
    270 export fn write_null(e: *encoder) (void | overflow) = {
    271 	write_fixedprim(e, class::UNIVERSAL, utag::NULL, [])?;
    272 };
    273 
    274 export type bytewstream = struct {
    275 	stream: io::stream,
    276 	e: *encoder,
    277 };
    278 
    279 fn bytewriter(e: *encoder, c: class, tagid: u32) (bytewstream | overflow) = {
    280 	create_prim(e, c, tagid)?;
    281 	return bytewstream {
    282 		stream = &bytewriter_vtable,
    283 		e = e,
    284 		...
    285 	};
    286 };
    287 
    288 const bytewriter_vtable = io::vtable {
    289 	writer = &bytewriter_write,
    290 	...
    291 };
    292 
    293 fn bytewriter_write(s: *io::stream, buf: const []u8) (size | io::error) = {
    294 	let w = s: *bytewstream;
    295 	if (write(w.e, buf) is overflow) {
    296 		return wrap_err(overflow);
    297 	};
    298 	return len(buf);
    299 };
    300 
    301 // Creates an [[io::writer]] that encodes data written to it as an OctetString.
    302 export fn octetstrwriter(e: *encoder) (bytewstream | overflow) = {
    303 	return bytewriter(e, class::UNIVERSAL, utag::OCTET_STRING);
    304 };
    305 
    306 // Writes an integer. 'n' must be stored in big endian order. The highest bit of
    307 // the first byte marks the sign.
    308 export fn write_int(e: *encoder, n: []u8) (void | overflow) = {
    309 	const neg = n[0] & 0x80 == 0x80;
    310 
    311 	// compact according to X.690 Chapt. 8.3.2
    312 	let i = 0z;
    313 	for (i < len(n) - 1; i += 1) {
    314 		if (neg && (n[i] != 0xff || n[i+1] & 0x80 != 0x80)) {
    315 			break;
    316 		};
    317 
    318 		if (!neg && (n[i] != 0x00 || n[i+1] & 0x80 == 0x80)) {
    319 			break;
    320 		};
    321 	};
    322 
    323 	write_fixedprim(e, class::UNIVERSAL, utag::INTEGER, n[i..])?;
    324 };
    325 
    326 // Writes an integer asuming 'n' is unsigned.
    327 export fn write_uint(e: *encoder, n: []u8) (void | overflow) = {
    328 	if (n[0] & 0x80 == 0) {
    329 		return write_int(e, n);
    330 	};
    331 
    332 	// prepend 0 so that the highest valued bit is not interpreted as sign
    333 	create_prim(e, class::UNIVERSAL, utag::INTEGER)?;
    334 	write(e, [0])?;
    335 	write(e, n)?;
    336 	finish_prim(e);
    337 };
    338 
    339 // Writes 's' as Utf8String.
    340 export fn write_utf8str(e: *encoder, s: str) (void | overflow) =
    341 	write_fixedprim(e, class::UNIVERSAL, utag::UTF8_STRING,
    342 		strings::toutf8(s))?;
    343 
    344 // Encodes all buffered data in the [[encoder]] and returns a slice representing
    345 // the encoded entry, borrowed from the encoder's buffer.
    346 export fn encode(e: *encoder) ([]u8 | io::error) = {
    347 	assert(e.btn == 0);
    348 	assert(e.start >= 0);
    349 
    350 	if (e.cur_prim) {
    351 		finish_prim(e);
    352 	};
    353 
    354 	let n = 0z;
    355 	let buf = memio::buffer(e.mem)[e.start..];
    356 
    357 	// iterate entries to minify tag ids and data sizes. 't' is the write
    358 	// index and 'i' is the read index.
    359 	let t = 0z;
    360 	for (let i = 0z; i < e.pos) { // TODO cast seems off
    361 		// encode id
    362 		const id = buf[i];
    363 		buf[t] = id;
    364 		t += 1;
    365 		i += 1;
    366 
    367 		const cons = (id >> 5) & 1 == 1;
    368 		if ((id & 0b11111) == 0b11111) {
    369 			// id spans multiple bytes
    370 			let id: u8 = 0x80;
    371 			for (id & 0x80 == 0x80) {
    372 				id = buf[i];
    373 				buf[t] = id;
    374 				t += 1;
    375 				i += 1;
    376 			};
    377 		};
    378 
    379 		// encode dsz
    380 		let dsz: datasz = 0;
    381 		let l = buf[i];
    382 		i += 1;
    383 		if (l < 128) {
    384 			// data size fits in a single byte
    385 			dsz = l;
    386 			buf[t] = l;
    387 			t += 1;
    388 		} else {
    389 			// decode multibyte size and minimize, since not all
    390 			// placeholder bytes may have been used.
    391 			const dn = l & 0x7f;
    392 			for (let j = 0z; j < dn; j += 1) {
    393 				dsz <<= 8;
    394 				dsz |= buf[i];
    395 				i += 1;
    396 			};
    397 
    398 			let dszbuf = encode_dsz(dsz);
    399 			buf[t..t + len(dszbuf)] = dszbuf;
    400 			t += len(dszbuf);
    401 		};
    402 
    403 		if (cons) {
    404 			continue;
    405 		};
    406 
    407 		// write data of primitive fields
    408 		buf[t..t+dsz] = buf[i..i+dsz];
    409 		t += dsz;
    410 		i += dsz;
    411 	};
    412 
    413 	bytes::zero(buf[t..]);
    414 	match (e.parent) {
    415 	case null => void;
    416 	case let s: *bytewstream =>
    417 		s.e.pos += t;
    418 	};
    419 	return buf[..t];
    420 };
    421 
    422 // Encodes all buffered data in the [[encoder]] and writes it to the provided
    423 // [[io::handle]].
    424 export fn encodeto(e: *encoder, dest: io::handle) (size | io::error) = {
    425 	const buf = encode(e)?;
    426 	return io::writeall(dest, buf)?;
    427 };