encoder.ha (10481B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use bytes; 5 use endian; 6 use errors; 7 use io; 8 use math::{bit_size}; 9 use memio; 10 use strings; 11 use types; 12 13 14 export type datasz = u32; // XXX: might want to use size here 15 let szput = &endian::beputu32; 16 let szget = &endian::begetu32; 17 def DATASZ_MAX = types::U32_MAX; 18 19 // The maximum header size possible for u32 tag ids. 20 export def MAXHEADSZ = 1 + 5 + 1 + size(datasz); 21 22 // The maximum header size possible for entries of [[utag]]. 23 export def MAXUTAGHEADSZ = 1 + 1 + size(datasz); 24 25 export type encoder = struct { 26 mem: *memio::stream, 27 start: io::off, 28 pos: size, 29 bt: [MAX_CONS_DEPTH](size, datasz), 30 btn: size, 31 32 cur_dpos: size, 33 cur_prim: bool, 34 cur_fixed: bool, 35 36 parent: nullable *bytewstream, 37 }; 38 39 // Creates a new DER encoder. The user must provide a [[memio::stream]] for 40 // buffering data before it's encoded. The user may provide a dynamic or fixed 41 // stream at their discretion; fixed may be preferred if the user knows the 42 // required buffer size in advance. 43 // 44 // To encode DER data, the user must call one of the "create_" functions (e.g. 45 // [[create_explicit]]), followed by the appropriate "write_" functions (e.g. 46 // [[write_int]]). These operations will be buffered into the provided memio 47 // buffer, and the encoded form may be finalized and retrieved via [[encode]] or 48 // [[encodeto]]. 49 // 50 // To determine the required buffer size for a fixed buffer, consider the 51 // maximum length of the input data (e.g. integer, string, etc length) plus the 52 // necessary overhead, which is given by [[MAXUTAGHEADSZ]] if only using the 53 // provided encoder functions (e.g. "write_" functions), or [[MAXHEADSZ]] if 54 // using custom tag IDs. 55 // 56 // The encoder does not close the provided [[memio::stream]] after use; the 57 // caller should manage its lifetime accordingly. 58 export fn derencoder(mem: *memio::stream) encoder = encoder { 59 mem = mem, 60 start = io::tell(mem)!, 61 ... 62 }; 63 64 // Creates a DER encoder nested within another DER entry, using the buffer of 65 // the parent. 66 export fn derencoder_nested(b: *bytewstream) encoder = encoder { 67 mem = b.e.mem, 68 start = io::tell(b.e.mem)!, 69 parent = b, 70 ... 71 }; 72 73 fn write(e: *encoder, buf: []u8) (void | overflow) = { 74 if (len(buf) > (DATASZ_MAX - e.pos)) return overflow; 75 76 match (io::write(e.mem, buf)) { 77 case let n: size => 78 if (n < len(buf)) { 79 // short writes happen, if a fixed e.mem reaches its end 80 return overflow; 81 }; 82 case errors::overflow => 83 return overflow; 84 case => 85 // writing to mem does not throw any other errors 86 abort(); 87 }; 88 e.pos += len(buf); 89 }; 90 91 fn write_id(e: *encoder, c: class, t: u32, cons: bool) (void | overflow) = { 92 let head: u8 = c << 6; 93 if (cons) { 94 head |= (1 << 5); 95 }; 96 97 if (t < 31) { 98 bt_add_sz(e, 1); 99 return write(e, [head | t: u8]); 100 }; 101 102 write(e, [head | 0x1f])?; 103 104 const bsz = bit_size(t); 105 const n = ((bsz + 6) / 7) - 1; 106 for (let i = 0z; i < n; i += 1) { 107 write(e, [0x80 | (t >> ((n - i) * 7)): u8])?; 108 }; 109 write(e, [t: u8 & 0x7f])?; 110 }; 111 112 fn write_fixedprim(e: *encoder, c: class, t: u32, b: []u8) (void | overflow) = { 113 if (e.cur_prim) { 114 finish_prim(e); 115 }; 116 117 e.cur_prim = true; 118 e.cur_fixed = true; 119 write_id(e, c, t, false)?; 120 121 write(e, encode_dsz(len(b)))?; 122 write(e, b)?; 123 124 bt_add_dsz(e, len(b): datasz); 125 }; 126 127 fn create_prim(e: *encoder, class: class, tag: u32) (void | overflow) = { 128 if (e.cur_prim) { 129 finish_prim(e); 130 }; 131 132 e.cur_prim = true; 133 e.cur_fixed = false; 134 135 write_id(e, class, tag, false)?; 136 137 // write size placeholder 138 const placehsz = 0x80 | size(datasz): u8; 139 let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...]; 140 write(e, lbuf)?; 141 142 e.cur_dpos = e.pos; 143 }; 144 145 fn finish_prim(e: *encoder) void = { 146 e.cur_prim = false; 147 if (e.pos == 0 || e.cur_fixed) { 148 return; 149 }; 150 151 const pos = io::tell(e.mem)!; 152 defer io::seek(e.mem, pos, io::whence::SET)!; 153 154 // write back size to placeholder 155 const dszpos = e.start: size + e.cur_dpos - size(datasz); 156 const dsz = e.pos - e.cur_dpos; 157 let dszbuf: [size(datasz)]u8 = [0...]; 158 szput(dszbuf, dsz: datasz); 159 160 io::seek(e.mem, dszpos: io::off, io::whence::SET)!; 161 io::write(e.mem, dszbuf)!; 162 163 bt_add_dsz(e, dsz: datasz); 164 }; 165 166 // Push n empty size value to backtrace stack 167 fn push_bt(e: *encoder, pos: size) (void | overflow) = { 168 if (e.btn + 1 >= len(e.bt)) return overflow; 169 170 e.bt[e.btn] = (pos, 0); 171 e.btn += 1; 172 }; 173 174 // Add 'sz' to the current value of the backtrack stack 175 fn bt_add_sz(e: *encoder, sz: size) void = { 176 if (e.btn == 0) return; 177 const csz = e.bt[e.btn - 1].1; 178 e.bt[e.btn - 1].1 = csz + sz: datasz; 179 }; 180 181 // Add data size 'sz' + size length to current value of the backtrack stack 182 fn bt_add_dsz(e: *encoder, sz: datasz) void = { 183 if (e.btn == 0) return; 184 const lsz = lensz(sz); 185 return bt_add_sz(e, lsz + sz); 186 }; 187 188 // Pop current backtrace value from stack 189 fn pop_bt(e: *encoder) (size, datasz) = { 190 e.btn -= 1; 191 let x = e.bt[e.btn]; 192 e.bt[e.btn] = (0, 0); 193 return x; 194 }; 195 196 fn lensz(l: datasz) u8 = if (l < 128) 1: u8 else (1 + (bit_size(l) + 7) / 8); 197 198 fn encode_dsz(sz: size) []u8 = { 199 static let buf: [size(datasz) + 1]u8 = [0...]; 200 if (sz < 128) { 201 buf[0] = sz: u8; 202 return buf[..1]; 203 }; 204 205 let n = lensz(sz: datasz); 206 buf[0] = (n - 1) | 0x80; 207 for (let i: size = n - 1; sz > 0; i -= 1) { 208 buf[i] = sz: u8; 209 sz >>= 8; 210 }; 211 212 return buf[..n]; 213 }; 214 215 // Creates an explicit constructed entry. The user must call [[finish_explicit]] 216 // to close the associated DER entry. 217 export fn create_explicit(e: *encoder, c: class, tag: u32) (void | overflow) = 218 create_cons(e, c, tag); 219 220 // Finishes an explicit constructed entry. 221 export fn finish_explicit(e: *encoder) void = finish_cons(e); 222 223 fn create_cons(e: *encoder, class: class, tagid: u32) (void | overflow) = { 224 if (e.cur_prim) { 225 finish_prim(e); 226 }; 227 write_id(e, class, tagid, true)?; 228 229 const placehsz = 0x80 | size(datasz): u8; 230 let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...]; 231 write(e, lbuf)?; 232 233 push_bt(e, e.pos - size(datasz))?; 234 return; 235 }; 236 237 fn finish_cons(e: *encoder) void = { 238 if (e.cur_prim) { 239 finish_prim(e); 240 }; 241 242 let (dszpos, sz) = pop_bt(e); 243 let lbuf: [size(datasz)]u8 = [0...]; 244 szput(lbuf, sz); 245 246 const pos = io::tell(e.mem)!; 247 defer io::seek(e.mem, pos, io::whence::SET)!; 248 249 dszpos += e.start: size; 250 io::seek(e.mem, dszpos: io::off, io::whence::SET)!; 251 io::write(e.mem, lbuf)!; 252 bt_add_dsz(e, sz); 253 }; 254 255 // Creates a sequence. The user must call [[finish_seq]] to close the associated 256 // DER entry. 257 export fn create_seq(e: *encoder) (void | overflow) = 258 return create_cons(e, class::UNIVERSAL, utag::SEQUENCE); 259 260 // Finishes a sequence. 261 export fn finish_seq(e: *encoder) void = finish_cons(e); 262 263 // Writes a boolean. 264 export fn write_bool(e: *encoder, b: bool) (void | overflow) = { 265 let v: u8 = if (b) 0xff else 0x00; 266 write_fixedprim(e, class::UNIVERSAL, utag::BOOLEAN, [v])?; 267 }; 268 269 // Writes a null value. 270 export fn write_null(e: *encoder) (void | overflow) = { 271 write_fixedprim(e, class::UNIVERSAL, utag::NULL, [])?; 272 }; 273 274 export type bytewstream = struct { 275 stream: io::stream, 276 e: *encoder, 277 }; 278 279 fn bytewriter(e: *encoder, c: class, tagid: u32) (bytewstream | overflow) = { 280 create_prim(e, c, tagid)?; 281 return bytewstream { 282 stream = &bytewriter_vtable, 283 e = e, 284 ... 285 }; 286 }; 287 288 const bytewriter_vtable = io::vtable { 289 writer = &bytewriter_write, 290 ... 291 }; 292 293 fn bytewriter_write(s: *io::stream, buf: const []u8) (size | io::error) = { 294 let w = s: *bytewstream; 295 if (write(w.e, buf) is overflow) { 296 return wrap_err(overflow); 297 }; 298 return len(buf); 299 }; 300 301 // Creates an [[io::writer]] that encodes data written to it as an OctetString. 302 export fn octetstrwriter(e: *encoder) (bytewstream | overflow) = { 303 return bytewriter(e, class::UNIVERSAL, utag::OCTET_STRING); 304 }; 305 306 // Writes an integer. 'n' must be stored in big endian order. The highest bit of 307 // the first byte marks the sign. 308 export fn write_int(e: *encoder, n: []u8) (void | overflow) = { 309 const neg = n[0] & 0x80 == 0x80; 310 311 // compact according to X.690 Chapt. 8.3.2 312 let i = 0z; 313 for (i < len(n) - 1; i += 1) { 314 if (neg && (n[i] != 0xff || n[i+1] & 0x80 != 0x80)) { 315 break; 316 }; 317 318 if (!neg && (n[i] != 0x00 || n[i+1] & 0x80 == 0x80)) { 319 break; 320 }; 321 }; 322 323 write_fixedprim(e, class::UNIVERSAL, utag::INTEGER, n[i..])?; 324 }; 325 326 // Writes an integer asuming 'n' is unsigned. 327 export fn write_uint(e: *encoder, n: []u8) (void | overflow) = { 328 if (n[0] & 0x80 == 0) { 329 return write_int(e, n); 330 }; 331 332 // prepend 0 so that the highest valued bit is not interpreted as sign 333 create_prim(e, class::UNIVERSAL, utag::INTEGER)?; 334 write(e, [0])?; 335 write(e, n)?; 336 finish_prim(e); 337 }; 338 339 // Writes 's' as Utf8String. 340 export fn write_utf8str(e: *encoder, s: str) (void | overflow) = 341 write_fixedprim(e, class::UNIVERSAL, utag::UTF8_STRING, 342 strings::toutf8(s))?; 343 344 // Encodes all buffered data in the [[encoder]] and returns a slice representing 345 // the encoded entry, borrowed from the encoder's buffer. 346 export fn encode(e: *encoder) ([]u8 | io::error) = { 347 assert(e.btn == 0); 348 assert(e.start >= 0); 349 350 if (e.cur_prim) { 351 finish_prim(e); 352 }; 353 354 let n = 0z; 355 let buf = memio::buffer(e.mem)[e.start..]; 356 357 // iterate entries to minify tag ids and data sizes. 't' is the write 358 // index and 'i' is the read index. 359 let t = 0z; 360 for (let i = 0z; i < e.pos) { // TODO cast seems off 361 // encode id 362 const id = buf[i]; 363 buf[t] = id; 364 t += 1; 365 i += 1; 366 367 const cons = (id >> 5) & 1 == 1; 368 if ((id & 0b11111) == 0b11111) { 369 // id spans multiple bytes 370 let id: u8 = 0x80; 371 for (id & 0x80 == 0x80) { 372 id = buf[i]; 373 buf[t] = id; 374 t += 1; 375 i += 1; 376 }; 377 }; 378 379 // encode dsz 380 let dsz: datasz = 0; 381 let l = buf[i]; 382 i += 1; 383 if (l < 128) { 384 // data size fits in a single byte 385 dsz = l; 386 buf[t] = l; 387 t += 1; 388 } else { 389 // decode multibyte size and minimize, since not all 390 // placeholder bytes may have been used. 391 const dn = l & 0x7f; 392 for (let j = 0z; j < dn; j += 1) { 393 dsz <<= 8; 394 dsz |= buf[i]; 395 i += 1; 396 }; 397 398 let dszbuf = encode_dsz(dsz); 399 buf[t..t + len(dszbuf)] = dszbuf; 400 t += len(dszbuf); 401 }; 402 403 if (cons) { 404 continue; 405 }; 406 407 // write data of primitive fields 408 buf[t..t+dsz] = buf[i..i+dsz]; 409 t += dsz; 410 i += dsz; 411 }; 412 413 bytes::zero(buf[t..]); 414 match (e.parent) { 415 case null => void; 416 case let s: *bytewstream => 417 s.e.pos += t; 418 }; 419 return buf[..t]; 420 }; 421 422 // Encodes all buffered data in the [[encoder]] and writes it to the provided 423 // [[io::handle]]. 424 export fn encodeto(e: *encoder, dest: io::handle) (size | io::error) = { 425 const buf = encode(e)?; 426 return io::writeall(dest, buf)?; 427 };