argon2.ha (13584B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // (c) 2022 Alexey Yerin <yyp@disroot.org> 5 // (c) 2021-2022 Armin Preiml <apreiml@strohwolke.at> 6 // (c) 2021-2022 Drew DeVault <sir@cmpwn.com> 7 use bytes; 8 use crypto::blake2b; 9 use crypto::math; 10 use endian; 11 use errors::{nomem}; 12 use hash; 13 use io; 14 use memio; 15 use types; 16 17 // Latest version of argon2 supported by this implementation (1.3). 18 export def VERSION: u8 = 0x13; 19 20 // Number of u64 elements of one block. 21 export def BLOCKSZ: u32 = 128; 22 23 def SLICES: size = 4; 24 25 type block64 = [BLOCKSZ]u64; 26 27 const zeroblock: block64 = [0...]; 28 29 type mode = enum { 30 D = 0, 31 I = 1, 32 ID = 2, 33 }; 34 35 // This type provides configuration options for the argon2 algorithm. Most users 36 // will find [[default_conf]] or [[low_mem_conf]] suitable for their needs 37 // without providing a custom configuration. If writing a custom configuration, 38 // consult the RFC for advice on selecting suitable values for your use-case. 39 // 40 // 'parallel' specifies the number of parallel processes. 'pass' configures the 41 // number of iterations. Both values must be at least one. Note: the Hare 42 // implementation of argon2 does not process hashes in parallel, though it will 43 // still compute the correct hash if this value is greater than one. 44 // 45 // 'version' specifies the version of the argon2 function. The implementation 46 // currently only supports version 1.3. Use [[VERSION]] here. 47 // 48 // 'secret' and 'data' are optional byte arrays that are applied to the initial 49 // state. Consult the RFC for details. 50 // 51 // The 'mem' parameter is used to configure working memory used during the 52 // computation. The argon2 algorithm requires a large amount of memory to 53 // compute hashes. If 'mem' set to a u32, it is interpreted as the desired 54 // number of 1024-byte blocks the implementation shall allocate for you. If the 55 // caller wants to manage the allocation itself, provide a []u8 instead. The 56 // length of this slice must be at least 8 times the value of 'parallel' in 57 // blocks, and must be a multiple of [[BLOCKSZ]]. To have the implementation 58 // allocate 64 KiB, set 'mem' to 64. To use the same amount of caller-provided 59 // memory, provide a slice of length 64 * [[BLOCKSZ]]. 60 export type conf = struct { 61 mem: (u32 | []u64), 62 parallel: u32, 63 passes: u32, 64 version: u8, 65 secret: []u8, 66 data: []u8 67 }; 68 69 // The default recommended configuration for most use cases. This configuration 70 // uses 2 GiB of working memory. A 16-byte 'salt' and 32-byte 'dest' parameter 71 // is recommended in combination with this configuration. 72 export const default_conf: conf = conf { 73 mem = 2 * 1024 * 1024, 74 passes = 1, 75 parallel = 4, 76 version = 0x13, 77 ... 78 }; 79 80 // The default recommended configuration for memory-constrained use cases. This 81 // configuration uses 64 MiB of working memory. A 16-byte 'salt' and 32-byte 82 // 'dest' parameter is recommended in combination with this configuration. 83 export const low_mem_conf: conf = conf { 84 mem = 64 * 1024, 85 passes = 3, 86 parallel = 4, 87 version = 0x13, 88 ... 89 }; 90 91 type context = struct { 92 mode: mode, 93 cols: size, 94 rows: size, 95 sliceblocks: size, 96 mem: []u64, 97 pass: u32, 98 seedsinit: block64, 99 seedblock: block64, 100 }; 101 102 // Computes an argon2d hash, writing the digest to 'dest'. A 'salt' length of 16 103 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes 104 // is recommended, and 4 bytes is the minimum. 105 // 106 // The argon2d mode uses data-dependent memory access and is suitable for 107 // applications with no threats of side-channel timing attacks. 108 export fn argon2d( 109 dest: []u8, 110 password: []u8, 111 salt: []u8, 112 cfg: *conf, 113 ) (void | nomem) = { 114 return argon2(dest, password, salt, cfg, mode::D); 115 }; 116 117 // Computes an argon2i hash, writing the digest to 'dest'. A 'salt' length of 16 118 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes 119 // is recommended, and 4 bytes is the minimum. 120 // 121 // The argon2i mode uses data-independent memory access and is suitable for 122 // password hashing and key derivation. It makes more passes over memory to 123 // protect from trade-off attacks. 124 export fn argon2i( 125 dest: []u8, 126 password: []u8, 127 salt: []u8, 128 cfg: *conf, 129 ) (void | nomem) = { 130 return argon2(dest, password, salt, cfg, mode::I); 131 }; 132 133 // Computes an argon2id hash, writing the digest to 'dest'. A 'salt' length of 134 // 16 bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 135 // bytes is recommended, and 4 bytes is the minimum. 136 // 137 // The argon2id mode works by using argon2i for the first half of the first pass 138 // and argon2d further on. It provides therefore protection from side-channel 139 // attacks and brute-force cost savings due to memory trade-offs. 140 // 141 // If you are unsure which variant to use, argon2id is recommended. 142 export fn argon2id( 143 dest: []u8, 144 password: []u8, 145 salt: []u8, 146 cfg: *conf, 147 ) (void | nomem) = { 148 return argon2(dest, password, salt, cfg, mode::ID); 149 }; 150 151 fn argon2( 152 dest: []u8, 153 password: []u8, 154 salt: []u8, 155 cfg: *conf, 156 mode: mode, 157 ) (void | nomem) = { 158 assert(endian::host == &endian::little, "TODO big endian support"); 159 160 assert(len(dest) >= 4 && len(dest) <= types::U32_MAX); 161 assert(len(password) <= types::U32_MAX); 162 assert(len(salt) >= 8 && len(salt) <= types::U32_MAX); 163 assert(cfg.parallel >= 1); 164 assert(cfg.passes >= 1); 165 assert(len(cfg.secret) <= types::U32_MAX); 166 assert(len(cfg.data) <= types::U32_MAX); 167 168 let initmemsize = 0u32; 169 let mem: []u64 = match (cfg.mem) { 170 case let mem: []u64 => 171 assert(len(mem) >= 8 * cfg.parallel * BLOCKSZ 172 && len(mem) % BLOCKSZ == 0 173 && len(mem) / BLOCKSZ <= types::U32_MAX); 174 initmemsize = (len(mem) / BLOCKSZ): u32; 175 176 // round down memory to nearest multiple of 4 times parallel 177 const memsize = len(mem) - len(mem) 178 % (4 * cfg.parallel * BLOCKSZ); 179 yield mem[..memsize]; 180 case let memsize: u32 => 181 assert(memsize >= 8 * cfg.parallel 182 && memsize <= types::U32_MAX); 183 184 initmemsize = memsize; 185 const memsize = memsize - memsize % (4 * cfg.parallel); 186 yield alloc([0...], memsize * BLOCKSZ): []u64; 187 }; 188 189 let h0: [64]u8 = [0...]; 190 inithash(&h0, len(dest): u32, password, salt, cfg, mode, initmemsize); 191 192 const memsize = (len(mem) / BLOCKSZ): u32; 193 const cols = 4 * (memsize / (4 * cfg.parallel)); 194 let ctx = context { 195 rows = cfg.parallel, 196 cols = cols, 197 sliceblocks = cols / 4, 198 pass = 0, 199 mem = mem, 200 mode = mode, 201 seedsinit = [0...], 202 seedblock = [0...], 203 ... 204 }; 205 206 // hash first and second blocks of each row 207 for (let i = 0z; i < ctx.rows; i += 1) { 208 let src: [72]u8 = [0...]; 209 src[..64] = h0[..]; 210 211 endian::leputu32(src[64..68], 0); 212 endian::leputu32(src[68..], i: u32); 213 varhash(blocku8(&ctx, i, 0), src); 214 215 endian::leputu32(src[64..68], 1); 216 endian::leputu32(src[68..], i: u32); 217 varhash(blocku8(&ctx, i, 1), src); 218 }; 219 220 // process segments 221 for (ctx.pass < cfg.passes; ctx.pass += 1) { 222 for (let s = 0z; s < SLICES; s += 1) { 223 for (let i = 0z; i < ctx.rows; i += 1) { 224 segproc(cfg, &ctx, i, s); 225 }; 226 }; 227 }; 228 229 // final hash 230 let b = blocku8(&ctx, 0, ctx.cols - 1); 231 for (let i = 1z; i < ctx.rows; i += 1) { 232 math::xor(b, b, blocku8(&ctx, i, ctx.cols - 1)); 233 }; 234 235 varhash(dest, b); 236 237 bytes::zero(h0); 238 bytes::zero((ctx.mem: *[*]u8)[..len(ctx.mem) * size(u64)]); 239 240 if (cfg.mem is u32) { 241 // mem was allocated internally 242 free(ctx.mem); 243 }; 244 }; 245 246 fn block(ctx: *context, i: size, j: size) []u64 = { 247 let index = (ctx.cols * i + j) * BLOCKSZ; 248 return ctx.mem[index..index + BLOCKSZ]; 249 }; 250 251 fn blocku8(ctx: *context, i: size, j: size) []u8 = { 252 return (block(ctx, i, j): *[*]u8)[..BLOCKSZ * size(u64)]; 253 }; 254 255 fn refblock(cfg: *conf, ctx: *context, seed: u64, i: size, j: size) []u64 = { 256 const segstart = (j - (j % ctx.sliceblocks)) / ctx.sliceblocks; 257 const index = j % ctx.sliceblocks; 258 259 const l: size = if (segstart == 0 && ctx.pass == 0) { 260 yield i; 261 } else { 262 yield (seed >> 32) % cfg.parallel; 263 }; 264 265 let poolstart: u64 = ((segstart + 1) % SLICES) * ctx.sliceblocks; 266 let poolsize: u64 = 3 * ctx.sliceblocks; 267 268 if (i == l) { 269 poolsize += index; 270 }; 271 272 if (ctx.pass == 0) { 273 poolstart = 0; 274 poolsize = segstart * ctx.sliceblocks; 275 if (segstart == 0 || i == l) { 276 poolsize += index; 277 }; 278 }; 279 280 if (index == 0 || i == l) { 281 poolsize -= 1; 282 }; 283 284 const j1: u64 = seed & 0xffffffff; 285 const x: u64 = (j1 * j1) >> 32; 286 const y: u64 = (poolsize * x) >> 32; 287 const z: u64 = (poolstart + poolsize - (y+1)) % ctx.cols: u64; 288 289 return block(ctx, l: size, z: size); 290 }; 291 292 fn inithash( 293 dest: *[64]u8, 294 taglen: u32, 295 password: []u8, 296 salt: []u8, 297 cfg: *conf, 298 mode: mode, 299 memsize: u32, 300 ) void = { 301 let u32buf: [4]u8 = [0...]; 302 let h = blake2b::blake2b([], 64); 303 defer hash::close(&h); 304 305 hash_leputu32(&h, cfg.parallel); 306 hash_leputu32(&h, taglen); 307 hash_leputu32(&h, memsize); 308 hash_leputu32(&h, cfg.passes); 309 hash_leputu32(&h, cfg.version); 310 hash_leputu32(&h, mode: u32); 311 hash_leputu32(&h, len(password): u32); 312 hash::write(&h, password); 313 314 hash_leputu32(&h, len(salt): u32); 315 hash::write(&h, salt); 316 317 hash_leputu32(&h, len(cfg.secret): u32); 318 hash::write(&h, cfg.secret); 319 320 hash_leputu32(&h, len(cfg.data): u32); 321 hash::write(&h, cfg.data); 322 323 hash::sum(&h, dest[..]); 324 }; 325 326 fn hash_leputu32(h: *hash::hash, u: u32) void = { 327 let buf: [4]u8 = [0...]; 328 endian::leputu32(buf, u); 329 hash::write(h, buf[..]); 330 }; 331 332 // The variable hash function H' 333 fn varhash(dest: []u8, block: []u8) void = { 334 let u32buf: [4]u8 = [0...]; 335 336 if (len(dest) <= 64) { 337 let h = blake2b::blake2b([], len(dest)); 338 defer hash::close(&h); 339 hash_leputu32(&h, len(dest): u32); 340 hash::write(&h, block); 341 hash::sum(&h, dest); 342 return; 343 }; 344 345 // TODO this may be replaced with a constant time divceil in future to 346 // avoid leaking the dest len. 347 const r = divceil(len(dest): u32, 32) - 2; 348 let v: [64]u8 = [0...]; 349 350 let destbuf = memio::fixed(dest); 351 352 let h = blake2b::blake2b([], 64); 353 hash_leputu32(&h, len(dest): u32); 354 hash::write(&h, block); 355 hash::sum(&h, v[..]); 356 hash::close(&h); 357 358 io::writeall(&destbuf, v[..32])!; 359 360 for (let i = 1z; i < r; i += 1) { 361 let h = blake2b::blake2b([], 64); 362 hash::write(&h, v[..]); 363 hash::sum(&h, v[..]); 364 hash::close(&h); 365 io::writeall(&destbuf, v[..32])!; 366 }; 367 368 const remainder = len(dest) - 32 * r; 369 let hend = blake2b::blake2b([], remainder); 370 defer hash::close(&hend); 371 hash::write(&hend, v[..]); 372 hash::sum(&hend, v[..remainder]); 373 io::writeall(&destbuf, v[..remainder])!; 374 }; 375 376 fn divceil(dividend: u32, divisor: u32) u32 = { 377 let result = dividend / divisor; 378 if (dividend % divisor > 0) { 379 result += 1; 380 }; 381 return result; 382 }; 383 384 fn xorblock(dest: []u64, x: []u64, y: []u64) void = { 385 for (let i = 0z; i < len(dest); i += 1) { 386 dest[i] = x[i] ^ y[i]; 387 }; 388 }; 389 390 fn segproc(cfg: *conf, ctx: *context, i: size, slice: size) void = { 391 const init = switch (ctx.mode) { 392 case mode::I => 393 yield true; 394 case mode::ID => 395 yield ctx.pass == 0 && slice < 2; 396 case mode::D => 397 yield false; 398 }; 399 if (init) { 400 ctx.seedsinit[0] = ctx.pass; 401 ctx.seedsinit[1] = i; 402 ctx.seedsinit[2] = slice; 403 ctx.seedsinit[3] = len(ctx.mem) / BLOCKSZ; 404 ctx.seedsinit[4] = cfg.passes; 405 ctx.seedsinit[5] = ctx.mode: u64; 406 ctx.seedsinit[6] = 0; 407 408 if (ctx.pass == 0 && slice == 0) { 409 ctx.seedsinit[6] += 1; 410 compress(ctx.seedblock, ctx.seedsinit, zeroblock, 411 false); 412 compress(ctx.seedblock, ctx.seedblock, zeroblock, 413 false); 414 }; 415 }; 416 417 for (let b = 0z; b < ctx.sliceblocks; b += 1) { 418 const j = slice * ctx.sliceblocks + b; 419 if (ctx.pass == 0 && j < 2) { 420 continue; 421 }; 422 423 const dmodeseed = switch (ctx.mode) { 424 case mode::D => 425 yield true; 426 case mode::ID => 427 yield ctx.pass > 0 || slice > 1; 428 case mode::I => 429 yield false; 430 }; 431 432 const pj = if (j == 0) ctx.cols - 1 else j - 1; 433 let prev = block(ctx, i, pj); 434 435 const seed: u64 = if (dmodeseed) { 436 yield prev[0]; 437 } else { 438 if (b % BLOCKSZ == 0) { 439 ctx.seedsinit[6] += 1; 440 compress(ctx.seedblock, ctx.seedsinit, 441 zeroblock, false); 442 compress(ctx.seedblock, ctx.seedblock, 443 zeroblock, false); 444 }; 445 yield ctx.seedblock[b % BLOCKSZ]; 446 }; 447 448 let ref = refblock(cfg, ctx, seed, i, j); 449 compress(block(ctx, i, j), prev, ref, ctx.pass > 0); 450 }; 451 }; 452 453 fn compress(dest: []u64, x: []u64, y: []u64, xor: bool) void = { 454 let r: block64 = [0...]; 455 xorblock(r, x, y); 456 457 let z: block64 = [0...]; 458 z[..] = r[..]; 459 460 for (let i = 0z; i < 128; i += 16) { 461 perm(&z[i], &z[i + 1], &z[i + 2], &z[i + 3], &z[i + 4], 462 &z[i + 5], &z[i + 6], &z[i + 7], &z[i + 8], &z[i + 9], 463 &z[i + 10], &z[i + 11], &z[i + 12], &z[i + 13], 464 &z[i + 14], &z[i + 15]); 465 }; 466 467 for (let i = 0z; i < 16; i += 2) { 468 perm(&z[i], &z[i + 1], &z[i + 16], &z[i + 17], &z[i + 32], 469 &z[i + 33], &z[i + 48], &z[i + 49], &z[i + 64], 470 &z[i + 65], &z[i + 80], &z[i + 81], &z[i + 96], 471 &z[i + 97], &z[i + 112], &z[i + 113]); 472 }; 473 474 if (xor) { 475 xorblock(r, r, dest); 476 }; 477 478 xorblock(dest, z, r); 479 }; 480 481 fn perm( 482 x0: *u64, 483 x1: *u64, 484 x2: *u64, 485 x3: *u64, 486 x4: *u64, 487 x5: *u64, 488 x6: *u64, 489 x7: *u64, 490 x8: *u64, 491 x9: *u64, 492 x10: *u64, 493 x11: *u64, 494 x12: *u64, 495 x13: *u64, 496 x14: *u64, 497 x15: *u64, 498 ) void = { 499 mix(x0, x4, x8, x12); 500 mix(x1, x5, x9, x13); 501 mix(x2, x6, x10, x14); 502 mix(x3, x7, x11, x15); 503 504 mix(x0, x5, x10, x15); 505 mix(x1, x6, x11, x12); 506 mix(x2, x7, x8, x13); 507 mix(x3, x4, x9, x14); 508 }; 509 510 fn mix(a: *u64, b: *u64, c: *u64, d: *u64) void = { 511 *a = *a + *b + 2 * (*a & 0xffffffff) * (*b & 0xffffffff); 512 *d = math::rotr64(*d ^ *a, 32); 513 *c = *c + *d + 2 * (*c & 0xffffffff) * (*d & 0xffffffff); 514 *b = math::rotr64(*b ^ *c, 24); 515 516 *a = *a + *b + 2 * (*a & 0xffffffff) * (*b & 0xffffffff); 517 *d = math::rotr64(*d ^ *a, 16); 518 *c = *c + *d + 2 * (*c & 0xffffffff) * (*d & 0xffffffff); 519 *b = math::rotr64(*b ^ *c, 63); 520 };