argon2.ha (13592B)
1 // (c) 2022 Alexey Yerin <yyp@disroot.org> 2 // (c) 2021-2022 Armin Preiml <apreiml@strohwolke.at> 3 // (c) 2021-2022 Drew DeVault <sir@cmpwn.com> 4 use bytes; 5 use crypto::blake2b; 6 use crypto::math; 7 use endian; 8 use errors::{nomem}; 9 use hash; 10 use io; 11 use memio; 12 use types; 13 14 // Latest version of argon2 supported by this implementation (1.3). 15 export def VERSION: u8 = 0x13; 16 17 // Number of u64 elements of one block. 18 export def BLOCKSIZE: u32 = 128; 19 20 def SLICES: size = 4; 21 22 type block64 = [BLOCKSIZE]u64; 23 24 const zeroblock: block64 = [0...]; 25 26 type mode = enum { 27 D = 0, 28 I = 1, 29 ID = 2, 30 }; 31 32 // This type provides configuration options for the argon2 algorithm. Most users 33 // will find [[default_config]] or [[low_mem_config]] suitable for their needs 34 // without providing a custom configuration. If writing a custom configuration, 35 // consult the RFC for advice on selecting suitable values for your use-case. 36 // 37 // 'parallel' specifies the number of parallel processes. 'pass' configures the 38 // number of iterations. Both values must be at least one. Note: the Hare 39 // implementation of argon2 does not process hashes in parallel, though it will 40 // still compute the correct hash if this value is greater than one. 41 // 42 // 'version' specifies the version of the argon2 function. The implementation 43 // currently only supports version 1.3. Use [[VERSION]] here. 44 // 45 // 'secret' and 'data' are optional byte arrays that are applied to the initial 46 // state. Consult the RFC for details. 47 // 48 // The 'mem' parameter is used to configure working memory used during the 49 // computation. The argon2 algorithm requires a large amount of memory to 50 // compute hashes. If 'mem' set to a u32, it is interpreted as the desired 51 // number of 1024-byte blocks the implementation shall allocate for you. If the 52 // caller wants to manage the allocation itself, provide a []u8 instead. The 53 // length of this slice must be at least 8 times the value of 'parallel' in 54 // blocks, and must be a multiple of [[BLOCKSIZE]]. To have the implementation 55 // allocate 64 KiB, set 'mem' to 64. To use the same amount of caller-provided 56 // memory, provide a slice of length 64 * [[BLOCKSIZE]]. 57 export type conf = struct { 58 mem: (u32 | []u64), 59 parallel: u32, 60 passes: u32, 61 version: u8, 62 secret: []u8, 63 data: []u8 64 }; 65 66 // The default recommended configuration for most use cases. This configuration 67 // uses 2 GiB of working memory. A 16-byte 'salt' and 32-byte 'dest' parameter 68 // is recommended in combination with this configuration. 69 export const default_conf: conf = conf { 70 mem = 2 * 1024 * 1024, 71 passes = 1, 72 parallel = 4, 73 version = 0x13, 74 ... 75 }; 76 77 // The default recommended configuration for memory-constrained use cases. This 78 // configuration uses 64 MiB of working memory. A 16-byte 'salt' and 32-byte 79 // 'dest' parameter is recommended in combination with this configuration. 80 export const low_mem_conf: conf = conf { 81 mem = 64 * 1024, 82 passes = 3, 83 parallel = 4, 84 version = 0x13, 85 ... 86 }; 87 88 type context = struct { 89 mode: mode, 90 cols: size, 91 rows: size, 92 sliceblocks: size, 93 mem: []u64, 94 pass: u32, 95 seedsinit: block64, 96 seedblock: block64, 97 }; 98 99 // Computes an argon2d hash, writing the digest to 'dest'. A 'salt' length of 16 100 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes 101 // is recommended, and 4 bytes is the minimum. 102 // 103 // The argon2d mode uses data-dependent memory access and is suitable for 104 // applications with no threats of side-channel timing attacks. 105 export fn argon2d( 106 dest: []u8, 107 password: []u8, 108 salt: []u8, 109 cfg: *conf, 110 ) (void | nomem) = { 111 return argon2(dest, password, salt, cfg, mode::D); 112 }; 113 114 // Computes an argon2i hash, writing the digest to 'dest'. A 'salt' length of 16 115 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes 116 // is recommended, and 4 bytes is the minimum. 117 // 118 // The argon2i mode uses data-independent memory access and is suitable for 119 // password hashing and key derivation. It makes more passes over memory to 120 // protect from trade-off attacks. 121 export fn argon2i( 122 dest: []u8, 123 password: []u8, 124 salt: []u8, 125 cfg: *conf, 126 ) (void | nomem) = { 127 return argon2(dest, password, salt, cfg, mode::I); 128 }; 129 130 // Computes an argon2id hash, writing the digest to 'dest'. A 'salt' length of 131 // 16 bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 132 // bytes is recommended, and 4 bytes is the minimum. 133 // 134 // The argon2id mode works by using argon2i for the first half of the first pass 135 // and argon2d further on. It provides therefore protection from side-channel 136 // attacks and brute-force cost savings due to memory trade-offs. 137 // 138 // If you are unsure which variant to use, argon2id is recommended. 139 export fn argon2id( 140 dest: []u8, 141 password: []u8, 142 salt: []u8, 143 cfg: *conf, 144 ) (void | nomem) = { 145 return argon2(dest, password, salt, cfg, mode::ID); 146 }; 147 148 fn argon2( 149 dest: []u8, 150 password: []u8, 151 salt: []u8, 152 cfg: *conf, 153 mode: mode, 154 ) (void | nomem) = { 155 assert(endian::host == &endian::little, "TODO big endian support"); 156 157 assert(len(dest) >= 4 && len(dest) <= types::U32_MAX); 158 assert(len(password) <= types::U32_MAX); 159 assert(len(salt) >= 8 && len(salt) <= types::U32_MAX); 160 assert(cfg.parallel >= 1); 161 assert(cfg.passes >= 1); 162 assert(len(cfg.secret) <= types::U32_MAX); 163 assert(len(cfg.data) <= types::U32_MAX); 164 165 let initmemsize = 0u32; 166 let mem: []u64 = match (cfg.mem) { 167 case let mem: []u64 => 168 assert(len(mem) >= 8 * cfg.parallel * BLOCKSIZE 169 && len(mem) % BLOCKSIZE == 0 170 && len(mem) / BLOCKSIZE <= types::U32_MAX); 171 initmemsize = (len(mem) / BLOCKSIZE): u32; 172 173 // round down memory to nearest multiple of 4 times parallel 174 const memsize = len(mem) - len(mem) 175 % (4 * cfg.parallel * BLOCKSIZE); 176 yield mem[..memsize]; 177 case let memsize: u32 => 178 assert(memsize >= 8 * cfg.parallel 179 && memsize <= types::U32_MAX); 180 181 initmemsize = memsize; 182 const memsize = memsize - memsize % (4 * cfg.parallel); 183 yield alloc([0...], memsize * BLOCKSIZE): []u64; 184 }; 185 186 let h0: [64]u8 = [0...]; 187 inithash(&h0, len(dest): u32, password, salt, cfg, mode, initmemsize); 188 189 const memsize = (len(mem) / BLOCKSIZE): u32; 190 const cols = 4 * (memsize / (4 * cfg.parallel)); 191 let ctx = context { 192 rows = cfg.parallel, 193 cols = cols, 194 sliceblocks = cols / 4, 195 pass = 0, 196 mem = mem, 197 mode = mode, 198 seedsinit = [0...], 199 seedblock = [0...], 200 ... 201 }; 202 203 // hash first and second blocks of each row 204 for (let i = 0z; i < ctx.rows; i += 1) { 205 let src: [72]u8 = [0...]; 206 src[..64] = h0[..]; 207 208 endian::leputu32(src[64..68], 0); 209 endian::leputu32(src[68..], i: u32); 210 varhash(blocku8(&ctx, i, 0), src); 211 212 endian::leputu32(src[64..68], 1); 213 endian::leputu32(src[68..], i: u32); 214 varhash(blocku8(&ctx, i, 1), src); 215 }; 216 217 // process segments 218 for (ctx.pass < cfg.passes; ctx.pass += 1) { 219 for (let s = 0z; s < SLICES; s += 1) { 220 for (let i = 0z; i < ctx.rows; i += 1) { 221 segproc(cfg, &ctx, i, s); 222 }; 223 }; 224 }; 225 226 // final hash 227 let b = blocku8(&ctx, 0, ctx.cols - 1); 228 for (let i = 1z; i < ctx.rows; i += 1) { 229 math::xor(b, b, blocku8(&ctx, i, ctx.cols - 1)); 230 }; 231 232 varhash(dest, b); 233 234 bytes::zero(h0); 235 bytes::zero((ctx.mem: *[*]u8)[..len(ctx.mem) * size(u64)]); 236 237 if (cfg.mem is u32) { 238 // mem was allocated internally 239 free(ctx.mem); 240 }; 241 }; 242 243 fn block(ctx: *context, i: size, j: size) []u64 = { 244 let index = (ctx.cols * i + j) * BLOCKSIZE; 245 return ctx.mem[index..index + BLOCKSIZE]; 246 }; 247 248 fn blocku8(ctx: *context, i: size, j: size) []u8 = { 249 return (block(ctx, i, j): *[*]u8)[..BLOCKSIZE * size(u64)]; 250 }; 251 252 fn refblock(cfg: *conf, ctx: *context, seed: u64, i: size, j: size) []u64 = { 253 const segstart = (j - (j % ctx.sliceblocks)) / ctx.sliceblocks; 254 const index = j % ctx.sliceblocks; 255 256 const l: size = if (segstart == 0 && ctx.pass == 0) { 257 yield i; 258 } else { 259 yield (seed >> 32) % cfg.parallel; 260 }; 261 262 let poolstart: u64 = ((segstart + 1) % SLICES) * ctx.sliceblocks; 263 let poolsize: u64 = 3 * ctx.sliceblocks; 264 265 if (i == l) { 266 poolsize += index; 267 }; 268 269 if (ctx.pass == 0) { 270 poolstart = 0; 271 poolsize = segstart * ctx.sliceblocks; 272 if (segstart == 0 || i == l) { 273 poolsize += index; 274 }; 275 }; 276 277 if (index == 0 || i == l) { 278 poolsize -= 1; 279 }; 280 281 const j1: u64 = seed & 0xffffffff; 282 const x: u64 = (j1 * j1) >> 32; 283 const y: u64 = (poolsize * x) >> 32; 284 const z: u64 = (poolstart + poolsize - (y+1)) % ctx.cols: u64; 285 286 return block(ctx, l: size, z: size); 287 }; 288 289 fn inithash( 290 dest: *[64]u8, 291 taglen: u32, 292 password: []u8, 293 salt: []u8, 294 cfg: *conf, 295 mode: mode, 296 memsize: u32, 297 ) void = { 298 let u32buf: [4]u8 = [0...]; 299 let h = blake2b::blake2b([], 64); 300 defer hash::close(&h); 301 302 hash_leputu32(&h, cfg.parallel); 303 hash_leputu32(&h, taglen); 304 hash_leputu32(&h, memsize); 305 hash_leputu32(&h, cfg.passes); 306 hash_leputu32(&h, cfg.version); 307 hash_leputu32(&h, mode: u32); 308 hash_leputu32(&h, len(password): u32); 309 hash::write(&h, password); 310 311 hash_leputu32(&h, len(salt): u32); 312 hash::write(&h, salt); 313 314 hash_leputu32(&h, len(cfg.secret): u32); 315 hash::write(&h, cfg.secret); 316 317 hash_leputu32(&h, len(cfg.data): u32); 318 hash::write(&h, cfg.data); 319 320 hash::sum(&h, dest[..]); 321 }; 322 323 fn hash_leputu32(h: *hash::hash, u: u32) void = { 324 let buf: [4]u8 = [0...]; 325 endian::leputu32(buf, u); 326 hash::write(h, buf[..]); 327 }; 328 329 // The variable hash function H' 330 fn varhash(dest: []u8, block: []u8) void = { 331 let u32buf: [4]u8 = [0...]; 332 333 if (len(dest) <= 64) { 334 let h = blake2b::blake2b([], len(dest)); 335 defer hash::close(&h); 336 hash_leputu32(&h, len(dest): u32); 337 hash::write(&h, block); 338 hash::sum(&h, dest); 339 return; 340 }; 341 342 // TODO this may be replaced with a constant time divceil in future to 343 // avoid leaking the dest len. 344 const r = divceil(len(dest): u32, 32) - 2; 345 let v: [64]u8 = [0...]; 346 347 let destbuf = memio::fixed(dest); 348 349 let h = blake2b::blake2b([], 64); 350 hash_leputu32(&h, len(dest): u32); 351 hash::write(&h, block); 352 hash::sum(&h, v[..]); 353 hash::close(&h); 354 355 io::writeall(&destbuf, v[..32])!; 356 357 for (let i = 1z; i < r; i += 1) { 358 let h = blake2b::blake2b([], 64); 359 hash::write(&h, v[..]); 360 hash::sum(&h, v[..]); 361 hash::close(&h); 362 io::writeall(&destbuf, v[..32])!; 363 }; 364 365 const remainder = len(dest) - 32 * r; 366 let hend = blake2b::blake2b([], remainder); 367 defer hash::close(&hend); 368 hash::write(&hend, v[..]); 369 hash::sum(&hend, v[..remainder]); 370 io::writeall(&destbuf, v[..remainder])!; 371 }; 372 373 fn divceil(dividend: u32, divisor: u32) u32 = { 374 let result = dividend / divisor; 375 if (dividend % divisor > 0) { 376 result += 1; 377 }; 378 return result; 379 }; 380 381 fn xorblock(dest: []u64, x: []u64, y: []u64) void = { 382 math::xor((dest: *[*]u8)[..len(dest) * size(u64)], 383 (x: *[*]u8)[..len(dest) * size(u64)], 384 (y: *[*]u8)[..len(dest) * size(u64)]); 385 }; 386 387 fn segproc(cfg: *conf, ctx: *context, i: size, slice: size) void = { 388 const init = switch (ctx.mode) { 389 case mode::I => 390 yield true; 391 case mode::ID => 392 yield ctx.pass == 0 && slice < 2; 393 case mode::D => 394 yield false; 395 }; 396 if (init) { 397 ctx.seedsinit[0] = ctx.pass; 398 ctx.seedsinit[1] = i; 399 ctx.seedsinit[2] = slice; 400 ctx.seedsinit[3] = len(ctx.mem) / BLOCKSIZE; 401 ctx.seedsinit[4] = cfg.passes; 402 ctx.seedsinit[5] = ctx.mode: u64; 403 ctx.seedsinit[6] = 0; 404 405 if (ctx.pass == 0 && slice == 0) { 406 ctx.seedsinit[6] += 1; 407 compress(ctx.seedblock, ctx.seedsinit, zeroblock, 408 false); 409 compress(ctx.seedblock, ctx.seedblock, zeroblock, 410 false); 411 }; 412 }; 413 414 for (let b = 0z; b < ctx.sliceblocks; b += 1) { 415 const j = slice * ctx.sliceblocks + b; 416 if (ctx.pass == 0 && j < 2) { 417 continue; 418 }; 419 420 const dmodeseed = switch (ctx.mode) { 421 case mode::D => 422 yield true; 423 case mode::ID => 424 yield ctx.pass > 0 || slice > 1; 425 case mode::I => 426 yield false; 427 }; 428 429 const pj = if (j == 0) ctx.cols - 1 else j - 1; 430 let prev = block(ctx, i, pj); 431 432 const seed: u64 = if (dmodeseed) { 433 yield prev[0]; 434 } else { 435 if (b % BLOCKSIZE == 0) { 436 ctx.seedsinit[6] += 1; 437 compress(ctx.seedblock, ctx.seedsinit, 438 zeroblock, false); 439 compress(ctx.seedblock, ctx.seedblock, 440 zeroblock, false); 441 }; 442 yield ctx.seedblock[b % BLOCKSIZE]; 443 }; 444 445 let ref = refblock(cfg, ctx, seed, i, j); 446 compress(block(ctx, i, j), prev, ref, ctx.pass > 0); 447 }; 448 }; 449 450 fn compress(dest: []u64, x: []u64, y: []u64, xor: bool) void = { 451 let r: block64 = [0...]; 452 xorblock(r, x, y); 453 454 let z: block64 = [0...]; 455 z[..] = r[..]; 456 457 for (let i = 0z; i < 128; i += 16) { 458 perm(&z[i], &z[i + 1], &z[i + 2], &z[i + 3], &z[i + 4], 459 &z[i + 5], &z[i + 6], &z[i + 7], &z[i + 8], &z[i + 9], 460 &z[i + 10], &z[i + 11], &z[i + 12], &z[i + 13], 461 &z[i + 14], &z[i + 15]); 462 }; 463 464 for (let i = 0z; i < 16; i += 2) { 465 perm(&z[i], &z[i + 1], &z[i + 16], &z[i + 17], &z[i + 32], 466 &z[i + 33], &z[i + 48], &z[i + 49], &z[i + 64], 467 &z[i + 65], &z[i + 80], &z[i + 81], &z[i + 96], 468 &z[i + 97], &z[i + 112], &z[i + 113]); 469 }; 470 471 if (xor) { 472 xorblock(r, r, dest); 473 }; 474 475 xorblock(dest, z, r); 476 }; 477 478 fn perm( 479 x0: *u64, 480 x1: *u64, 481 x2: *u64, 482 x3: *u64, 483 x4: *u64, 484 x5: *u64, 485 x6: *u64, 486 x7: *u64, 487 x8: *u64, 488 x9: *u64, 489 x10: *u64, 490 x11: *u64, 491 x12: *u64, 492 x13: *u64, 493 x14: *u64, 494 x15: *u64, 495 ) void = { 496 mix(x0, x4, x8, x12); 497 mix(x1, x5, x9, x13); 498 mix(x2, x6, x10, x14); 499 mix(x3, x7, x11, x15); 500 501 mix(x0, x5, x10, x15); 502 mix(x1, x6, x11, x12); 503 mix(x2, x7, x8, x13); 504 mix(x3, x4, x9, x14); 505 }; 506 507 fn mix(a: *u64, b: *u64, c: *u64, d: *u64) void = { 508 *a = *a + *b + 2 * trunc(*a) * trunc(*b); 509 *d = math::rotr64(*d ^ *a, 32); 510 *c = *c + *d + 2 * trunc(*c) * trunc(*d); 511 *b = math::rotr64(*b ^ *c, 24); 512 513 *a = *a + *b + 2 * trunc(*a) * trunc(*b); 514 *d = math::rotr64(*d ^ *a, 16); 515 *c = *c + *d + 2 * trunc(*c) * trunc(*d); 516 *b = math::rotr64(*b ^ *c, 63); 517 }; 518 519 fn trunc(a: u64) u64 = { 520 return a & 0xffffffff; 521 };