encoder.ha (10532B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use bufio; 5 use bytes; 6 use endian; 7 use errors; 8 use io; 9 use math::{bit_size_u8,bit_size_u32}; 10 use memio; 11 use strings; 12 use time::date; 13 use types; 14 15 16 export type datasz = u32; // XXX: might want to use size here 17 let szput = &endian::beputu32; 18 let szget = &endian::begetu32; 19 def DATASZ_MAX = types::U32_MAX; 20 21 // The maximum header size possible for u32 tag ids. 22 export def MAXHEADSZ = 1 + 5 + 1 + size(datasz); 23 24 // The maximum header size possible for entries of [[utag]]. 25 export def MAXUTAGHEADSZ = 1 + 1 + size(datasz); 26 27 export type encoder = struct { 28 mem: *memio::stream, 29 start: io::off, 30 pos: size, 31 bt: [MAX_CONS_DEPTH](size, datasz), 32 btn: size, 33 34 cur_dpos: size, 35 cur_prim: bool, 36 cur_fixed: bool, 37 38 parent: nullable *bytewstream, 39 }; 40 41 // Creates a new DER encoder. The user must provide a [[memio::stream]] for 42 // buffering data before it's encoded. The user may provide a dynamic or fixed 43 // stream at their discretion; fixed may be preferred if the user knows the 44 // required buffer size in advance. 45 // 46 // To encode DER data, the user must call one of the "create_" functions (e.g. 47 // [[create_explicit]]), followed by the appropriate "write_" functions (e.g. 48 // [[write_int]]). These operations will be buffered into the provided memio 49 // buffer, and the encoded form may be finalized and retrieved via [[encode]] or 50 // [[encodeto]]. 51 // 52 // To determine the required buffer size for a fixed buffer, consider the 53 // maximum length of the input data (e.g. integer, string, etc length) plus the 54 // necessary overhead, which is given by [[MAXUTAGHEADSZ]] if only using the 55 // provided encoder functions (e.g. "write_" functions), or [[MAXHEADSZ]] if 56 // using custom tag IDs. 57 // 58 // The encoder does not close the provided [[memio::stream]] after use; the 59 // caller should manage its lifetime accordingly. 60 export fn derencoder(mem: *memio::stream) encoder = encoder { 61 mem = mem, 62 start = io::tell(mem)!, 63 ... 64 }; 65 66 // Creates a DER encoder nested within another DER entry, using the buffer of 67 // the parent. 68 export fn derencoder_nested(b: *bytewstream) encoder = encoder { 69 mem = b.e.mem, 70 start = io::tell(b.e.mem)!, 71 parent = b, 72 ... 73 }; 74 75 fn write(e: *encoder, buf: []u8) (void | overflow) = { 76 if (len(buf) > (DATASZ_MAX - e.pos)) return overflow; 77 78 match (io::write(e.mem, buf)) { 79 case let n: size => 80 if (n < len(buf)) { 81 // short writes happen, if a fixed e.mem reaches its end 82 return overflow; 83 }; 84 case errors::overflow => 85 return overflow; 86 case => 87 // writing to mem does not throw any other errors 88 abort(); 89 }; 90 e.pos += len(buf); 91 }; 92 93 fn write_id(e: *encoder, c: class, t: u32, cons: bool) (void | overflow) = { 94 let head: u8 = c << 6; 95 if (cons) { 96 head |= (1 << 5); 97 }; 98 99 if (t < 31) { 100 bt_add_sz(e, 1); 101 return write(e, [head | t: u8]); 102 }; 103 104 write(e, [head | 0x1f])?; 105 106 const bsz = bit_size_u32(t); 107 const n = ((bsz + 6) / 7) - 1; 108 for (let i = 0z; i < n; i += 1) { 109 write(e, [0x80 | (t >> ((n - i) * 7)): u8])?; 110 }; 111 write(e, [t: u8 & 0x7f])?; 112 }; 113 114 fn write_fixedprim(e: *encoder, c: class, t: u32, b: []u8) (void | overflow) = { 115 if (e.cur_prim) { 116 finish_prim(e); 117 }; 118 119 e.cur_prim = true; 120 e.cur_fixed = true; 121 write_id(e, c, t, false)?; 122 123 write(e, encode_dsz(len(b)))?; 124 write(e, b)?; 125 126 bt_add_dsz(e, len(b): datasz); 127 }; 128 129 fn create_prim(e: *encoder, class: class, tag: u32) (void | overflow) = { 130 if (e.cur_prim) { 131 finish_prim(e); 132 }; 133 134 e.cur_prim = true; 135 e.cur_fixed = false; 136 137 write_id(e, class, tag, false)?; 138 139 // write size placeholder 140 const placehsz = 0x80 | size(datasz): u8; 141 let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...]; 142 write(e, lbuf)?; 143 144 e.cur_dpos = e.pos; 145 }; 146 147 fn finish_prim(e: *encoder) void = { 148 e.cur_prim = false; 149 if (e.pos == 0 || e.cur_fixed) { 150 return; 151 }; 152 153 const pos = io::tell(e.mem)!; 154 defer io::seek(e.mem, pos, io::whence::SET)!; 155 156 // write back size to placeholder 157 const dszpos = e.start: size + e.cur_dpos - size(datasz); 158 const dsz = e.pos - e.cur_dpos; 159 let dszbuf: [size(datasz)]u8 = [0...]; 160 szput(dszbuf, dsz: datasz); 161 162 io::seek(e.mem, dszpos: io::off, io::whence::SET)!; 163 io::write(e.mem, dszbuf)!; 164 165 bt_add_dsz(e, dsz: datasz); 166 }; 167 168 // Push n empty size value to backtrace stack 169 fn push_bt(e: *encoder, pos: size) (void | overflow) = { 170 if (e.btn + 1 >= len(e.bt)) return overflow; 171 172 e.bt[e.btn] = (pos, 0); 173 e.btn += 1; 174 }; 175 176 // Add 'sz' to the current value of the backtrack stack 177 fn bt_add_sz(e: *encoder, sz: size) void = { 178 if (e.btn == 0) return; 179 const csz = e.bt[e.btn - 1].1; 180 e.bt[e.btn - 1].1 = csz + sz: datasz; 181 }; 182 183 // Add data size 'sz' + size length to current value of the backtrack stack 184 fn bt_add_dsz(e: *encoder, sz: datasz) void = { 185 if (e.btn == 0) return; 186 const lsz = lensz(sz); 187 return bt_add_sz(e, lsz + sz); 188 }; 189 190 // Pop current backtrace value from stack 191 fn pop_bt(e: *encoder) (size, datasz) = { 192 e.btn -= 1; 193 let x = e.bt[e.btn]; 194 e.bt[e.btn] = (0, 0); 195 return x; 196 }; 197 198 fn lensz(l: datasz) u8 = if (l < 128) 1: u8 else (1 + (bit_size_u32(l) + 7) / 8); 199 200 fn encode_dsz(sz: size) []u8 = { 201 static let buf: [size(datasz) + 1]u8 = [0...]; 202 if (sz < 128) { 203 buf[0] = sz: u8; 204 return buf[..1]; 205 }; 206 207 let n = lensz(sz: datasz); 208 buf[0] = (n - 1) | 0x80; 209 for (let i: size = n - 1; sz > 0; i -= 1) { 210 buf[i] = sz: u8; 211 sz >>= 8; 212 }; 213 214 return buf[..n]; 215 }; 216 217 // Creates an explicit constructed entry. The user must call [[finish_explicit]] 218 // to close the associated DER entry. 219 export fn create_explicit(e: *encoder, c: class, tag: u32) (void | overflow) = 220 create_cons(e, c, tag); 221 222 // Finishes an explicit constructed entry. 223 export fn finish_explicit(e: *encoder) void = finish_cons(e); 224 225 fn create_cons(e: *encoder, class: class, tagid: u32) (void | overflow) = { 226 if (e.cur_prim) { 227 finish_prim(e); 228 }; 229 write_id(e, class, tagid, true)?; 230 231 const placehsz = 0x80 | size(datasz): u8; 232 let lbuf: [1 + size(datasz)]u8 = [placehsz, 0...]; 233 write(e, lbuf)?; 234 235 push_bt(e, e.pos - size(datasz))?; 236 return; 237 }; 238 239 fn finish_cons(e: *encoder) void = { 240 if (e.cur_prim) { 241 finish_prim(e); 242 }; 243 244 let (dszpos, sz) = pop_bt(e); 245 let lbuf: [size(datasz)]u8 = [0...]; 246 szput(lbuf, sz); 247 248 const pos = io::tell(e.mem)!; 249 defer io::seek(e.mem, pos, io::whence::SET)!; 250 251 dszpos += e.start: size; 252 io::seek(e.mem, dszpos: io::off, io::whence::SET)!; 253 io::write(e.mem, lbuf)!; 254 bt_add_dsz(e, sz); 255 }; 256 257 // Creates a sequence. The user must call [[finish_seq]] to close the associated 258 // DER entry. 259 export fn create_seq(e: *encoder) (void | overflow) = 260 return create_cons(e, class::UNIVERSAL, utag::SEQUENCE); 261 262 // Finishes a sequence. 263 export fn finish_seq(e: *encoder) void = finish_cons(e); 264 265 // Writes a boolean. 266 export fn write_bool(e: *encoder, b: bool) (void | overflow) = { 267 let v: u8 = if (b) 0xff else 0x00; 268 write_fixedprim(e, class::UNIVERSAL, utag::BOOLEAN, [v])?; 269 }; 270 271 // Writes a null value. 272 export fn write_null(e: *encoder) (void | overflow) = { 273 write_fixedprim(e, class::UNIVERSAL, utag::NULL, [])?; 274 }; 275 276 export type bytewstream = struct { 277 stream: io::stream, 278 e: *encoder, 279 }; 280 281 fn bytewriter(e: *encoder, c: class, tagid: u32) (bytewstream | overflow) = { 282 create_prim(e, c, tagid)?; 283 return bytewstream { 284 stream = &bytewriter_vtable, 285 e = e, 286 ... 287 }; 288 }; 289 290 const bytewriter_vtable = io::vtable { 291 writer = &bytewriter_write, 292 ... 293 }; 294 295 fn bytewriter_write(s: *io::stream, buf: const []u8) (size | io::error) = { 296 let w = s: *bytewstream; 297 if (write(w.e, buf) is overflow) { 298 return wrap_err(overflow); 299 }; 300 return len(buf); 301 }; 302 303 // Creates an [[io::writer]] that encodes data written to it as an OctetString. 304 export fn octetstrwriter(e: *encoder) (bytewstream | overflow) = { 305 return bytewriter(e, class::UNIVERSAL, utag::OCTET_STRING); 306 }; 307 308 // Writes an integer. 'n' must be stored in big endian order. The highest bit of 309 // the first byte marks the sign. 310 export fn write_int(e: *encoder, n: []u8) (void | overflow) = { 311 const neg = n[0] & 0x80 == 0x80; 312 313 // compact according to X.690 Chapt. 8.3.2 314 let i = 0z; 315 for (i < len(n) - 1; i += 1) { 316 if (neg && (n[i] != 0xff || n[i+1] & 0x80 != 0x80)) { 317 break; 318 }; 319 320 if (!neg && (n[i] != 0x00 || n[i+1] & 0x80 == 0x80)) { 321 break; 322 }; 323 }; 324 325 write_fixedprim(e, class::UNIVERSAL, utag::INTEGER, n[i..])?; 326 }; 327 328 // Writes an integer asuming 'n' is unsigned. 329 export fn write_uint(e: *encoder, n: []u8) (void | overflow) = { 330 if (n[0] & 0x80 == 0) { 331 return write_int(e, n); 332 }; 333 334 // prepend 0 so that the highest valued bit is not interpreted as sign 335 create_prim(e, class::UNIVERSAL, utag::INTEGER)?; 336 write(e, [0])?; 337 write(e, n)?; 338 finish_prim(e); 339 }; 340 341 // Writes 's' as Utf8String. 342 export fn write_utf8str(e: *encoder, s: str) (void | overflow) = 343 write_fixedprim(e, class::UNIVERSAL, utag::UTF8_STRING, 344 strings::toutf8(s))?; 345 346 // Encodes all buffered data in the [[encoder]] and returns a slice representing 347 // the encoded entry, borrowed from the encoder's buffer. 348 export fn encode(e: *encoder) ([]u8 | io::error) = { 349 assert(e.btn == 0); 350 assert(e.start >= 0); 351 352 if (e.cur_prim) { 353 finish_prim(e); 354 }; 355 356 let n = 0z; 357 let buf = memio::buffer(e.mem)[e.start..]; 358 359 // iterate entries to minify tag ids and data sizes. 't' is the write 360 // index and 'i' is the read index. 361 let t = 0z; 362 for (let i = 0z; i < e.pos) { // TODO cast seems off 363 // encode id 364 const id = buf[i]; 365 buf[t] = id; 366 t += 1; 367 i += 1; 368 369 const cons = (id >> 5) & 1 == 1; 370 if ((id & 0b11111) == 0b11111) { 371 // id spans multiple bytes 372 let id: u8 = 0x80; 373 for (id & 0x80 == 0x80) { 374 id = buf[i]; 375 buf[t] = id; 376 t += 1; 377 i += 1; 378 }; 379 }; 380 381 // encode dsz 382 let dsz: datasz = 0; 383 let l = buf[i]; 384 i += 1; 385 if (l < 128) { 386 // data size fits in a single byte 387 dsz = l; 388 buf[t] = l; 389 t += 1; 390 } else { 391 // decode multibyte size and minimize, since not all 392 // placeholder bytes may have been used. 393 const dn = l & 0x7f; 394 for (let j = 0z; j < dn; j += 1) { 395 dsz <<= 8; 396 dsz |= buf[i]; 397 i += 1; 398 }; 399 400 let dszbuf = encode_dsz(dsz); 401 buf[t..t + len(dszbuf)] = dszbuf; 402 t += len(dszbuf); 403 }; 404 405 if (cons) { 406 continue; 407 }; 408 409 // write data of primitive fields 410 buf[t..t+dsz] = buf[i..i+dsz]; 411 t += dsz; 412 i += dsz; 413 }; 414 415 bytes::zero(buf[t..]); 416 match (e.parent) { 417 case null => void; 418 case let s: *bytewstream => 419 s.e.pos += t; 420 }; 421 return buf[..t]; 422 }; 423 424 // Encodes all buffered data in the [[encoder]] and writes it to the provided 425 // [[io::handle]]. 426 export fn encodeto(e: *encoder, dest: io::handle) (size | io::error) = { 427 const buf = encode(e)?; 428 return io::writeall(dest, buf)?; 429 };