hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

encoder.ha (10532B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use bufio;
      5 use bytes;
      6 use endian;
      7 use errors;
      8 use io;
      9 use math::{bit_size_u8,bit_size_u32};
     10 use memio;
     11 use strings;
     12 use time::date;
     13 use types;
     14 
     15 
     16 export type datasz = u32; // XXX: might want to use size here
     17 let szput = &endian::beputu32;
     18 let szget = &endian::begetu32;
     19 def DATASZ_MAX = types::U32_MAX;
     20 
     21 // The maximum header size possible for u32 tag ids.
     22 export def MAXHEADSZ = 1 + 5 + 1 + size(datasz);
     23 
     24 // The maximum header size possible for entries of [[utag]].
     25 export def MAXUTAGHEADSZ = 1 + 1 + size(datasz);
     26 
     27 export type encoder = struct {
     28 	mem: *memio::stream,
     29 	start: io::off,
     30 	pos: size,
     31 	bt: [MAX_CONS_DEPTH](size, datasz),
     32 	btn: size,
     33 
     34 	cur_dpos: size,
     35 	cur_prim: bool,
     36 	cur_fixed: bool,
     37 
     38 	parent: nullable *bytewstream,
     39 };
     40 
     41 // Creates a new DER encoder. The user must provide a [[memio::stream]] for
     42 // buffering data before it's encoded. The user may provide a dynamic or fixed
     43 // stream at their discretion; fixed may be preferred if the user knows the
     44 // required buffer size in advance.
     45 //
     46 // To encode DER data, the user must call one of the "create_" functions (e.g.
     47 // [[create_explicit]]), followed by the appropriate "write_" functions (e.g.
     48 // [[write_int]]). These operations will be buffered into the provided memio
     49 // buffer, and the encoded form may be finalized and retrieved via [[encode]] or
     50 // [[encodeto]].
     51 //
     52 // To determine the required buffer size for a fixed buffer, consider the
     53 // maximum length of the input data (e.g. integer, string, etc length) plus the
     54 // necessary overhead, which is given by [[MAXUTAGHEADSZ]] if only using the
     55 // provided encoder functions (e.g. "write_" functions), or [[MAXHEADSZ]] if
     56 // using custom tag IDs.
     57 //
     58 // The encoder does not close the provided [[memio::stream]] after use; the
     59 // caller should manage its lifetime accordingly.
     60 export fn derencoder(mem: *memio::stream) encoder = encoder {
     61 	mem = mem,
     62 	start = io::tell(mem)!,
     63 	...
     64 };
     65 
     66 // Creates a DER encoder nested within another DER entry, using the buffer of
     67 // the parent.
     68 export fn derencoder_nested(b: *bytewstream) encoder = encoder {
     69 	mem = b.e.mem,
     70 	start = io::tell(b.e.mem)!,
     71 	parent = b,
     72 	...
     73 };
     74 
     75 fn write(e: *encoder, buf: []u8) (void | overflow) = {
     76 	if (len(buf) > (DATASZ_MAX - e.pos)) return overflow;
     77 
     78 	match (io::write(e.mem, buf)) {
     79 	case let n: size =>
     80 		if (n < len(buf)) {
     81 			// short writes happen, if a fixed e.mem reaches its end
     82 			return overflow;
     83 		};
     84 	case errors::overflow =>
     85 		return overflow;
     86 	case =>
     87 		 // writing to mem does not throw any other errors
     88 		abort();
     89 	};
     90 	e.pos += len(buf);
     91 };
     92 
     93 fn write_id(e: *encoder, c: class, t: u32, cons: bool) (void | overflow) = {
     94 	let head: u8 = c << 6;
     95 	if (cons) {
     96 		head |= (1 << 5);
     97 	};
     98 
     99 	if (t < 31) {
    100 		bt_add_sz(e, 1);
    101 		return write(e, [head | t: u8]);
    102 	};
    103 
    104 	write(e, [head | 0x1f])?;
    105 
    106 	const bsz = bit_size_u32(t);
    107 	const n = ((bsz + 6) / 7) - 1;
    108 	for (let i = 0z; i < n; i += 1) {
    109 		write(e, [0x80 | (t >> ((n - i) * 7)): u8])?;
    110 	};
    111 	write(e, [t: u8 & 0x7f])?;
    112 };
    113 
    114 fn write_fixedprim(e: *encoder, c: class, t: u32, b: []u8) (void | overflow) = {
    115 	if (e.cur_prim) {
    116 		finish_prim(e);
    117 	};
    118 
    119 	e.cur_prim = true;
    120 	e.cur_fixed = true;
    121 	write_id(e, c, t, false)?;
    122 
    123 	write(e, encode_dsz(len(b)))?;
    124 	write(e, b)?;
    125 
    126 	bt_add_dsz(e, len(b): datasz);
    127 };
    128 
    129 fn create_prim(e: *encoder, class: class, tag: u32) (void | overflow) = {
    130 	if (e.cur_prim) {
    131 		finish_prim(e);
    132 	};
    133 
    134 	e.cur_prim = true;
    135 	e.cur_fixed = false;
    136 
    137 	write_id(e, class, tag, false)?;
    138 
    139 	// write size placeholder
    140 	const placehsz = 0x80 | size(datasz): u8;
    141 	let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...];
    142 	write(e, lbuf)?;
    143 
    144 	e.cur_dpos = e.pos;
    145 };
    146 
    147 fn finish_prim(e: *encoder) void = {
    148 	e.cur_prim = false;
    149 	if (e.pos == 0 || e.cur_fixed) {
    150 		return;
    151 	};
    152 
    153 	const pos = io::tell(e.mem)!;
    154 	defer io::seek(e.mem, pos, io::whence::SET)!;
    155 
    156 	// write back size to placeholder
    157 	const dszpos = e.start: size + e.cur_dpos - size(datasz);
    158 	const dsz = e.pos - e.cur_dpos;
    159 	let dszbuf: [size(datasz)]u8 = [0...];
    160 	szput(dszbuf, dsz: datasz);
    161 
    162 	io::seek(e.mem, dszpos: io::off, io::whence::SET)!;
    163 	io::write(e.mem, dszbuf)!;
    164 
    165 	bt_add_dsz(e, dsz: datasz);
    166 };
    167 
    168 // Push n empty size value to backtrace stack
    169 fn push_bt(e: *encoder, pos: size) (void | overflow) = {
    170 	if (e.btn + 1 >= len(e.bt)) return overflow;
    171 
    172 	e.bt[e.btn] = (pos, 0);
    173 	e.btn += 1;
    174 };
    175 
    176 // Add 'sz' to the current value of the backtrack stack
    177 fn bt_add_sz(e: *encoder, sz: size) void = {
    178 	if (e.btn == 0) return;
    179 	const csz = e.bt[e.btn - 1].1;
    180 	e.bt[e.btn - 1].1 = csz + sz: datasz;
    181 };
    182 
    183 // Add data size 'sz' + size length to current value of the backtrack stack
    184 fn bt_add_dsz(e: *encoder, sz: datasz) void = {
    185 	if (e.btn == 0) return;
    186 	const lsz = lensz(sz);
    187 	return bt_add_sz(e, lsz + sz);
    188 };
    189 
    190 // Pop current backtrace value from stack
    191 fn pop_bt(e: *encoder) (size, datasz) = {
    192 	e.btn -= 1;
    193 	let x = e.bt[e.btn];
    194 	e.bt[e.btn] = (0, 0);
    195 	return x;
    196 };
    197 
    198 fn lensz(l: datasz) u8 = if (l < 128) 1: u8 else (1 + (bit_size_u32(l) + 7) / 8);
    199 
    200 fn encode_dsz(sz: size) []u8 = {
    201 	static let buf: [size(datasz) + 1]u8 = [0...];
    202 	if (sz < 128) {
    203 		buf[0] = sz: u8;
    204 		return buf[..1];
    205 	};
    206 
    207 	let n = lensz(sz: datasz);
    208 	buf[0] = (n - 1) | 0x80;
    209 	for (let i: size = n - 1; sz > 0; i -= 1) {
    210 		buf[i] = sz: u8;
    211 		sz >>= 8;
    212 	};
    213 
    214 	return buf[..n];
    215 };
    216 
    217 // Creates an explicit constructed entry. The user must call [[finish_explicit]]
    218 // to close the associated DER entry.
    219 export fn create_explicit(e: *encoder, c: class, tag: u32) (void | overflow) =
    220 	create_cons(e, c, tag);
    221 
    222 // Finishes an explicit constructed entry.
    223 export fn finish_explicit(e: *encoder) void = finish_cons(e);
    224 
    225 fn create_cons(e: *encoder, class: class, tagid: u32) (void | overflow) = {
    226 	if (e.cur_prim) {
    227 		finish_prim(e);
    228 	};
    229 	write_id(e, class, tagid, true)?;
    230 
    231 	const placehsz = 0x80 | size(datasz): u8;
    232 	let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...];
    233 	write(e, lbuf)?;
    234 
    235 	push_bt(e, e.pos - size(datasz))?;
    236 	return;
    237 };
    238 
    239 fn finish_cons(e: *encoder) void = {
    240 	if (e.cur_prim) {
    241 		finish_prim(e);
    242 	};
    243 
    244 	let (dszpos, sz) = pop_bt(e);
    245 	let lbuf: [size(datasz)]u8 = [0...];
    246 	szput(lbuf, sz);
    247 
    248 	const pos = io::tell(e.mem)!;
    249 	defer io::seek(e.mem, pos, io::whence::SET)!;
    250 
    251 	dszpos += e.start: size;
    252 	io::seek(e.mem, dszpos: io::off, io::whence::SET)!;
    253 	io::write(e.mem, lbuf)!;
    254 	bt_add_dsz(e, sz);
    255 };
    256 
    257 // Creates a sequence. The user must call [[finish_seq]] to close the associated
    258 // DER entry.
    259 export fn create_seq(e: *encoder) (void | overflow) =
    260 	return create_cons(e, class::UNIVERSAL, utag::SEQUENCE);
    261 
    262 // Finishes a sequence.
    263 export fn finish_seq(e: *encoder) void = finish_cons(e);
    264 
    265 // Writes a boolean.
    266 export fn write_bool(e: *encoder, b: bool) (void | overflow) = {
    267 	let v: u8 = if (b) 0xff else 0x00;
    268 	write_fixedprim(e, class::UNIVERSAL, utag::BOOLEAN, [v])?;
    269 };
    270 
    271 // Writes a null value.
    272 export fn write_null(e: *encoder) (void | overflow) = {
    273 	write_fixedprim(e, class::UNIVERSAL, utag::NULL, [])?;
    274 };
    275 
    276 export type bytewstream = struct {
    277 	stream: io::stream,
    278 	e: *encoder,
    279 };
    280 
    281 fn bytewriter(e: *encoder, c: class, tagid: u32) (bytewstream | overflow) = {
    282 	create_prim(e, c, tagid)?;
    283 	return bytewstream {
    284 		stream = &bytewriter_vtable,
    285 		e = e,
    286 		...
    287 	};
    288 };
    289 
    290 const bytewriter_vtable = io::vtable {
    291 	writer = &bytewriter_write,
    292 	...
    293 };
    294 
    295 fn bytewriter_write(s: *io::stream, buf: const []u8) (size | io::error) = {
    296 	let w = s: *bytewstream;
    297 	if (write(w.e, buf) is overflow) {
    298 		return wrap_err(overflow);
    299 	};
    300 	return len(buf);
    301 };
    302 
    303 // Creates an [[io::writer]] that encodes data written to it as an OctetString.
    304 export fn octetstrwriter(e: *encoder) (bytewstream | overflow) = {
    305 	return bytewriter(e, class::UNIVERSAL, utag::OCTET_STRING);
    306 };
    307 
    308 // Writes an integer. 'n' must be stored in big endian order. The highest bit of
    309 // the first byte marks the sign.
    310 export fn write_int(e: *encoder, n: []u8) (void | overflow) = {
    311 	const neg = n[0] & 0x80 == 0x80;
    312 
    313 	// compact according to X.690 Chapt. 8.3.2
    314 	let i = 0z;
    315 	for (i < len(n) - 1; i += 1) {
    316 		if (neg && (n[i] != 0xff || n[i+1] & 0x80 != 0x80)) {
    317 			break;
    318 		};
    319 
    320 		if (!neg && (n[i] != 0x00 || n[i+1] & 0x80 == 0x80)) {
    321 			break;
    322 		};
    323 	};
    324 
    325 	write_fixedprim(e, class::UNIVERSAL, utag::INTEGER, n[i..])?;
    326 };
    327 
    328 // Writes an integer asuming 'n' is unsigned.
    329 export fn write_uint(e: *encoder, n: []u8) (void | overflow) = {
    330 	if (n[0] & 0x80 == 0) {
    331 		return write_int(e, n);
    332 	};
    333 
    334 	// prepend 0 so that the highest valued bit is not interpreted as sign
    335 	create_prim(e, class::UNIVERSAL, utag::INTEGER)?;
    336 	write(e, [0])?;
    337 	write(e, n)?;
    338 	finish_prim(e);
    339 };
    340 
    341 // Writes 's' as Utf8String.
    342 export fn write_utf8str(e: *encoder, s: str) (void | overflow) =
    343 	write_fixedprim(e, class::UNIVERSAL, utag::UTF8_STRING,
    344 		strings::toutf8(s))?;
    345 
    346 // Encodes all buffered data in the [[encoder]] and returns a slice representing
    347 // the encoded entry, borrowed from the encoder's buffer.
    348 export fn encode(e: *encoder) ([]u8 | io::error) = {
    349 	assert(e.btn == 0);
    350 	assert(e.start >= 0);
    351 
    352 	if (e.cur_prim) {
    353 		finish_prim(e);
    354 	};
    355 
    356 	let n = 0z;
    357 	let buf = memio::buffer(e.mem)[e.start..];
    358 
    359 	// iterate entries to minify tag ids and data sizes. 't' is the write
    360 	// index and 'i' is the read index.
    361 	let t = 0z;
    362 	for (let i = 0z; i < e.pos) { // TODO cast seems off
    363 		// encode id
    364 		const id = buf[i];
    365 		buf[t] = id;
    366 		t += 1;
    367 		i += 1;
    368 
    369 		const cons = (id >> 5) & 1 == 1;
    370 		if ((id & 0b11111) == 0b11111) {
    371 			// id spans multiple bytes
    372 			let id: u8 = 0x80;
    373 			for (id & 0x80 == 0x80) {
    374 				id = buf[i];
    375 				buf[t] = id;
    376 				t += 1;
    377 				i += 1;
    378 			};
    379 		};
    380 
    381 		// encode dsz
    382 		let dsz: datasz = 0;
    383 		let l = buf[i];
    384 		i += 1;
    385 		if (l < 128) {
    386 			// data size fits in a single byte
    387 			dsz = l;
    388 			buf[t] = l;
    389 			t += 1;
    390 		} else {
    391 			// decode multibyte size and minimize, since not all
    392 			// placeholder bytes may have been used.
    393 			const dn = l & 0x7f;
    394 			for (let j = 0z; j < dn; j += 1) {
    395 				dsz <<= 8;
    396 				dsz |= buf[i];
    397 				i += 1;
    398 			};
    399 
    400 			let dszbuf = encode_dsz(dsz);
    401 			buf[t..t + len(dszbuf)] = dszbuf;
    402 			t += len(dszbuf);
    403 		};
    404 
    405 		if (cons) {
    406 			continue;
    407 		};
    408 
    409 		// write data of primitive fields
    410 		buf[t..t+dsz] = buf[i..i+dsz];
    411 		t += dsz;
    412 		i += dsz;
    413 	};
    414 
    415 	bytes::zero(buf[t..]);
    416 	match (e.parent) {
    417 	case null => void;
    418 	case let s: *bytewstream =>
    419 		s.e.pos += t;
    420 	};
    421 	return buf[..t];
    422 };
    423 
    424 // Encodes all buffered data in the [[encoder]] and writes it to the provided
    425 // [[io::handle]].
    426 export fn encodeto(e: *encoder, dest: io::handle) (size | io::error) = {
    427 	const buf = encode(e)?;
    428 	return io::writeall(dest, buf)?;
    429 };