argon2.ha (13421B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use bytes; 5 use crypto::blake2b; 6 use crypto::math; 7 use endian; 8 use hash; 9 use io; 10 use memio; 11 use types; 12 13 // Latest version of argon2 supported by this implementation (1.3). 14 export def VERSION: u8 = 0x13; 15 16 // Number of u64 elements of one block. 17 export def BLOCKSZ: u32 = 128; 18 19 def SLICES: size = 4; 20 21 type block64 = [BLOCKSZ]u64; 22 23 const zeroblock: block64 = [0...]; 24 25 type mode = enum { 26 D = 0, 27 I = 1, 28 ID = 2, 29 }; 30 31 // This type provides configuration options for the argon2 algorithm. Most users 32 // will find [[default_conf]] or [[low_mem_conf]] suitable for their needs 33 // without providing a custom configuration. If writing a custom configuration, 34 // consult the RFC for advice on selecting suitable values for your use-case. 35 // 36 // 'parallel' specifies the number of parallel processes. 'pass' configures the 37 // number of iterations. Both values must be at least one. Note: the Hare 38 // implementation of argon2 does not process hashes in parallel, though it will 39 // still compute the correct hash if this value is greater than one. 40 // 41 // 'version' specifies the version of the argon2 function. The implementation 42 // currently only supports version 1.3. Use [[VERSION]] here. 43 // 44 // 'secret' and 'data' are optional byte arrays that are applied to the initial 45 // state. Consult the RFC for details. 46 // 47 // The 'mem' parameter is used to configure working memory used during the 48 // computation. The argon2 algorithm requires a large amount of memory to 49 // compute hashes. If 'mem' set to a u32, it is interpreted as the desired 50 // number of 1024-byte blocks the implementation shall allocate for you. If the 51 // caller wants to manage the allocation itself, provide a []u8 instead. The 52 // length of this slice must be at least 8 times the value of 'parallel' in 53 // blocks, and must be a multiple of [[BLOCKSZ]]. To have the implementation 54 // allocate 64 KiB, set 'mem' to 64. To use the same amount of caller-provided 55 // memory, provide a slice of length 64 * [[BLOCKSZ]]. 56 export type conf = struct { 57 mem: (u32 | []u64), 58 parallel: u32, 59 passes: u32, 60 version: u8, 61 secret: []u8, 62 data: []u8 63 }; 64 65 // The default recommended configuration for most use cases. This configuration 66 // uses 2 GiB of working memory. A 16-byte 'salt' and 32-byte 'dest' parameter 67 // is recommended in combination with this configuration. 68 export const default_conf: conf = conf { 69 mem = 2 * 1024 * 1024, 70 passes = 1, 71 parallel = 4, 72 version = 0x13, 73 ... 74 }; 75 76 // The default recommended configuration for memory-constrained use cases. This 77 // configuration uses 64 MiB of working memory. A 16-byte 'salt' and 32-byte 78 // 'dest' parameter is recommended in combination with this configuration. 79 export const low_mem_conf: conf = conf { 80 mem = 64 * 1024, 81 passes = 3, 82 parallel = 4, 83 version = 0x13, 84 ... 85 }; 86 87 type context = struct { 88 mode: mode, 89 cols: size, 90 rows: size, 91 sliceblocks: size, 92 mem: []u64, 93 pass: u32, 94 seedsinit: block64, 95 seedblock: block64, 96 }; 97 98 // Computes an argon2d hash, writing the digest to 'dest'. A 'salt' length of 16 99 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes 100 // is recommended, and 4 bytes is the minimum. 101 // 102 // The argon2d mode uses data-dependent memory access and is suitable for 103 // applications with no threats of side-channel timing attacks. 104 export fn argon2d( 105 dest: []u8, 106 password: []u8, 107 salt: []u8, 108 cfg: *conf, 109 ) (void | nomem) = { 110 return argon2(dest, password, salt, cfg, mode::D); 111 }; 112 113 // Computes an argon2i hash, writing the digest to 'dest'. A 'salt' length of 16 114 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes 115 // is recommended, and 4 bytes is the minimum. 116 // 117 // The argon2i mode uses data-independent memory access and is suitable for 118 // password hashing and key derivation. It makes more passes over memory to 119 // protect from trade-off attacks. 120 export fn argon2i( 121 dest: []u8, 122 password: []u8, 123 salt: []u8, 124 cfg: *conf, 125 ) (void | nomem) = { 126 return argon2(dest, password, salt, cfg, mode::I); 127 }; 128 129 // Computes an argon2id hash, writing the digest to 'dest'. A 'salt' length of 130 // 16 bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 131 // bytes is recommended, and 4 bytes is the minimum. 132 // 133 // The argon2id mode works by using argon2i for the first half of the first pass 134 // and argon2d further on. It provides therefore protection from side-channel 135 // attacks and brute-force cost savings due to memory trade-offs. 136 // 137 // If you are unsure which variant to use, argon2id is recommended. 138 export fn argon2id( 139 dest: []u8, 140 password: []u8, 141 salt: []u8, 142 cfg: *conf, 143 ) (void | nomem) = { 144 return argon2(dest, password, salt, cfg, mode::ID); 145 }; 146 147 fn argon2( 148 dest: []u8, 149 password: []u8, 150 salt: []u8, 151 cfg: *conf, 152 mode: mode, 153 ) (void | nomem) = { 154 assert(endian::host == &endian::little, "TODO big endian support"); 155 156 assert(len(dest) >= 4 && len(dest) <= types::U32_MAX); 157 assert(len(password) <= types::U32_MAX); 158 assert(len(salt) >= 8 && len(salt) <= types::U32_MAX); 159 assert(cfg.parallel >= 1); 160 assert(cfg.passes >= 1); 161 assert(len(cfg.secret) <= types::U32_MAX); 162 assert(len(cfg.data) <= types::U32_MAX); 163 164 let initmemsize = 0u32; 165 let mem: []u64 = match (cfg.mem) { 166 case let mem: []u64 => 167 assert(len(mem) >= 8 * cfg.parallel * BLOCKSZ 168 && len(mem) % BLOCKSZ == 0 169 && len(mem) / BLOCKSZ <= types::U32_MAX); 170 initmemsize = (len(mem) / BLOCKSZ): u32; 171 172 // round down memory to nearest multiple of 4 times parallel 173 const memsize = len(mem) - len(mem) 174 % (4 * cfg.parallel * BLOCKSZ); 175 yield mem[..memsize]; 176 case let memsize: u32 => 177 assert(memsize >= 8 * cfg.parallel 178 && memsize <= types::U32_MAX); 179 180 initmemsize = memsize; 181 const memsize = memsize - memsize % (4 * cfg.parallel); 182 yield alloc([0...], memsize * BLOCKSZ)!: []u64; 183 }; 184 185 let h0: [64]u8 = [0...]; 186 inithash(&h0, len(dest): u32, password, salt, cfg, mode, initmemsize); 187 188 const memsize = (len(mem) / BLOCKSZ): u32; 189 const cols = 4 * (memsize / (4 * cfg.parallel)); 190 let ctx = context { 191 rows = cfg.parallel, 192 cols = cols, 193 sliceblocks = cols / 4, 194 pass = 0, 195 mem = mem, 196 mode = mode, 197 seedsinit = [0...], 198 seedblock = [0...], 199 ... 200 }; 201 202 // hash first and second blocks of each row 203 for (let i = 0z; i < ctx.rows; i += 1) { 204 let src: [72]u8 = [0...]; 205 src[..64] = h0[..]; 206 207 endian::leputu32(src[64..68], 0); 208 endian::leputu32(src[68..], i: u32); 209 varhash(blocku8(&ctx, i, 0), src); 210 211 endian::leputu32(src[64..68], 1); 212 endian::leputu32(src[68..], i: u32); 213 varhash(blocku8(&ctx, i, 1), src); 214 }; 215 216 // process segments 217 for (ctx.pass < cfg.passes; ctx.pass += 1) { 218 for (let s = 0z; s < SLICES; s += 1) { 219 for (let i = 0z; i < ctx.rows; i += 1) { 220 segproc(cfg, &ctx, i, s); 221 }; 222 }; 223 }; 224 225 // final hash 226 let b = blocku8(&ctx, 0, ctx.cols - 1); 227 for (let i = 1z; i < ctx.rows; i += 1) { 228 math::xor(b, b, blocku8(&ctx, i, ctx.cols - 1)); 229 }; 230 231 varhash(dest, b); 232 233 bytes::zero(h0); 234 bytes::zero((ctx.mem: *[*]u8)[..len(ctx.mem) * size(u64)]); 235 236 if (cfg.mem is u32) { 237 // mem was allocated internally 238 free(ctx.mem); 239 }; 240 }; 241 242 fn block(ctx: *context, i: size, j: size) []u64 = { 243 let index = (ctx.cols * i + j) * BLOCKSZ; 244 return ctx.mem[index..index + BLOCKSZ]; 245 }; 246 247 fn blocku8(ctx: *context, i: size, j: size) []u8 = { 248 return (block(ctx, i, j): *[*]u8)[..BLOCKSZ * size(u64)]; 249 }; 250 251 fn refblock(cfg: *conf, ctx: *context, seed: u64, i: size, j: size) []u64 = { 252 const segstart = (j - (j % ctx.sliceblocks)) / ctx.sliceblocks; 253 const index = j % ctx.sliceblocks; 254 255 const l: size = if (segstart == 0 && ctx.pass == 0) { 256 yield i; 257 } else { 258 yield (seed >> 32) % cfg.parallel; 259 }; 260 261 let poolstart: u64 = ((segstart + 1) % SLICES) * ctx.sliceblocks; 262 let poolsize: u64 = 3 * ctx.sliceblocks; 263 264 if (i == l) { 265 poolsize += index; 266 }; 267 268 if (ctx.pass == 0) { 269 poolstart = 0; 270 poolsize = segstart * ctx.sliceblocks; 271 if (segstart == 0 || i == l) { 272 poolsize += index; 273 }; 274 }; 275 276 if (index == 0 || i == l) { 277 poolsize -= 1; 278 }; 279 280 const j1: u64 = seed & 0xffffffff; 281 const x: u64 = (j1 * j1) >> 32; 282 const y: u64 = (poolsize * x) >> 32; 283 const z: u64 = (poolstart + poolsize - (y+1)) % ctx.cols: u64; 284 285 return block(ctx, l: size, z: size); 286 }; 287 288 fn inithash( 289 dest: *[64]u8, 290 taglen: u32, 291 password: []u8, 292 salt: []u8, 293 cfg: *conf, 294 mode: mode, 295 memsize: u32, 296 ) void = { 297 let u32buf: [4]u8 = [0...]; 298 let h = blake2b::blake2b([], 64); 299 defer hash::close(&h); 300 301 hash_leputu32(&h, cfg.parallel); 302 hash_leputu32(&h, taglen); 303 hash_leputu32(&h, memsize); 304 hash_leputu32(&h, cfg.passes); 305 hash_leputu32(&h, cfg.version); 306 hash_leputu32(&h, mode: u32); 307 hash_leputu32(&h, len(password): u32); 308 hash::write(&h, password); 309 310 hash_leputu32(&h, len(salt): u32); 311 hash::write(&h, salt); 312 313 hash_leputu32(&h, len(cfg.secret): u32); 314 hash::write(&h, cfg.secret); 315 316 hash_leputu32(&h, len(cfg.data): u32); 317 hash::write(&h, cfg.data); 318 319 hash::sum(&h, dest[..]); 320 }; 321 322 fn hash_leputu32(h: *hash::hash, u: u32) void = { 323 let buf: [4]u8 = [0...]; 324 endian::leputu32(buf, u); 325 hash::write(h, buf[..]); 326 }; 327 328 // The variable hash function H' 329 fn varhash(dest: []u8, block: []u8) void = { 330 let u32buf: [4]u8 = [0...]; 331 332 if (len(dest) <= 64) { 333 let h = blake2b::blake2b([], len(dest)); 334 defer hash::close(&h); 335 hash_leputu32(&h, len(dest): u32); 336 hash::write(&h, block); 337 hash::sum(&h, dest); 338 return; 339 }; 340 341 // TODO this may be replaced with a constant time divceil in future to 342 // avoid leaking the dest len. 343 const r = divceil(len(dest): u32, 32) - 2; 344 let v: [64]u8 = [0...]; 345 346 let destbuf = memio::fixed(dest); 347 348 let h = blake2b::blake2b([], 64); 349 hash_leputu32(&h, len(dest): u32); 350 hash::write(&h, block); 351 hash::sum(&h, v[..]); 352 hash::close(&h); 353 354 io::writeall(&destbuf, v[..32])!; 355 356 for (let i = 1z; i < r; i += 1) { 357 let h = blake2b::blake2b([], 64); 358 hash::write(&h, v[..]); 359 hash::sum(&h, v[..]); 360 hash::close(&h); 361 io::writeall(&destbuf, v[..32])!; 362 }; 363 364 const remainder = len(dest) - 32 * r; 365 let hend = blake2b::blake2b([], remainder); 366 defer hash::close(&hend); 367 hash::write(&hend, v[..]); 368 hash::sum(&hend, v[..remainder]); 369 io::writeall(&destbuf, v[..remainder])!; 370 }; 371 372 fn divceil(dividend: u32, divisor: u32) u32 = { 373 let result = dividend / divisor; 374 if (dividend % divisor > 0) { 375 result += 1; 376 }; 377 return result; 378 }; 379 380 fn xorblock(dest: []u64, x: []u64, y: []u64) void = { 381 for (let i = 0z; i < len(dest); i += 1) { 382 dest[i] = x[i] ^ y[i]; 383 }; 384 }; 385 386 fn segproc(cfg: *conf, ctx: *context, i: size, slice: size) void = { 387 const init = switch (ctx.mode) { 388 case mode::I => 389 yield true; 390 case mode::ID => 391 yield ctx.pass == 0 && slice < 2; 392 case mode::D => 393 yield false; 394 }; 395 if (init) { 396 ctx.seedsinit[0] = ctx.pass; 397 ctx.seedsinit[1] = i; 398 ctx.seedsinit[2] = slice; 399 ctx.seedsinit[3] = len(ctx.mem) / BLOCKSZ; 400 ctx.seedsinit[4] = cfg.passes; 401 ctx.seedsinit[5] = ctx.mode: u64; 402 ctx.seedsinit[6] = 0; 403 404 if (ctx.pass == 0 && slice == 0) { 405 ctx.seedsinit[6] += 1; 406 compress(ctx.seedblock, ctx.seedsinit, zeroblock, 407 false); 408 compress(ctx.seedblock, ctx.seedblock, zeroblock, 409 false); 410 }; 411 }; 412 413 for (let b = 0z; b < ctx.sliceblocks; b += 1) { 414 const j = slice * ctx.sliceblocks + b; 415 if (ctx.pass == 0 && j < 2) { 416 continue; 417 }; 418 419 const dmodeseed = switch (ctx.mode) { 420 case mode::D => 421 yield true; 422 case mode::ID => 423 yield ctx.pass > 0 || slice > 1; 424 case mode::I => 425 yield false; 426 }; 427 428 const pj = if (j == 0) ctx.cols - 1 else j - 1; 429 let prev = block(ctx, i, pj); 430 431 const seed: u64 = if (dmodeseed) { 432 yield prev[0]; 433 } else { 434 if (b % BLOCKSZ == 0) { 435 ctx.seedsinit[6] += 1; 436 compress(ctx.seedblock, ctx.seedsinit, 437 zeroblock, false); 438 compress(ctx.seedblock, ctx.seedblock, 439 zeroblock, false); 440 }; 441 yield ctx.seedblock[b % BLOCKSZ]; 442 }; 443 444 let ref = refblock(cfg, ctx, seed, i, j); 445 compress(block(ctx, i, j), prev, ref, ctx.pass > 0); 446 }; 447 }; 448 449 fn compress(dest: []u64, x: []u64, y: []u64, xor: bool) void = { 450 let r: block64 = [0...]; 451 xorblock(r, x, y); 452 453 let z: block64 = [0...]; 454 z[..] = r[..]; 455 456 for (let i = 0z; i < 128; i += 16) { 457 perm(&z[i], &z[i + 1], &z[i + 2], &z[i + 3], &z[i + 4], 458 &z[i + 5], &z[i + 6], &z[i + 7], &z[i + 8], &z[i + 9], 459 &z[i + 10], &z[i + 11], &z[i + 12], &z[i + 13], 460 &z[i + 14], &z[i + 15]); 461 }; 462 463 for (let i = 0z; i < 16; i += 2) { 464 perm(&z[i], &z[i + 1], &z[i + 16], &z[i + 17], &z[i + 32], 465 &z[i + 33], &z[i + 48], &z[i + 49], &z[i + 64], 466 &z[i + 65], &z[i + 80], &z[i + 81], &z[i + 96], 467 &z[i + 97], &z[i + 112], &z[i + 113]); 468 }; 469 470 if (xor) { 471 xorblock(r, r, dest); 472 }; 473 474 xorblock(dest, z, r); 475 }; 476 477 fn perm( 478 x0: *u64, 479 x1: *u64, 480 x2: *u64, 481 x3: *u64, 482 x4: *u64, 483 x5: *u64, 484 x6: *u64, 485 x7: *u64, 486 x8: *u64, 487 x9: *u64, 488 x10: *u64, 489 x11: *u64, 490 x12: *u64, 491 x13: *u64, 492 x14: *u64, 493 x15: *u64, 494 ) void = { 495 mix(x0, x4, x8, x12); 496 mix(x1, x5, x9, x13); 497 mix(x2, x6, x10, x14); 498 mix(x3, x7, x11, x15); 499 500 mix(x0, x5, x10, x15); 501 mix(x1, x6, x11, x12); 502 mix(x2, x7, x8, x13); 503 mix(x3, x4, x9, x14); 504 }; 505 506 fn mix(a: *u64, b: *u64, c: *u64, d: *u64) void = { 507 *a = *a + *b + 2 * (*a & 0xffffffff) * (*b & 0xffffffff); 508 *d = math::rotr64(*d ^ *a, 32); 509 *c = *c + *d + 2 * (*c & 0xffffffff) * (*d & 0xffffffff); 510 *b = math::rotr64(*b ^ *c, 24); 511 512 *a = *a + *b + 2 * (*a & 0xffffffff) * (*b & 0xffffffff); 513 *d = math::rotr64(*d ^ *a, 16); 514 *c = *c + *d + 2 * (*c & 0xffffffff) * (*d & 0xffffffff); 515 *b = math::rotr64(*b ^ *c, 63); 516 };