hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

argon2.ha (13592B)


      1 // (c) 2022 Alexey Yerin <yyp@disroot.org>
      2 // (c) 2021-2022 Armin Preiml <apreiml@strohwolke.at>
      3 // (c) 2021-2022 Drew DeVault <sir@cmpwn.com>
      4 use bytes;
      5 use crypto::blake2b;
      6 use crypto::math;
      7 use endian;
      8 use errors::{nomem};
      9 use hash;
     10 use io;
     11 use memio;
     12 use types;
     13 
     14 // Latest version of argon2 supported by this implementation (1.3).
     15 export def VERSION: u8 = 0x13;
     16 
     17 // Number of u64 elements of one block.
     18 export def BLOCKSIZE: u32 = 128;
     19 
     20 def SLICES: size = 4;
     21 
     22 type block64 = [BLOCKSIZE]u64;
     23 
     24 const zeroblock: block64 = [0...];
     25 
     26 type mode = enum {
     27 	D = 0,
     28 	I = 1,
     29 	ID = 2,
     30 };
     31 
     32 // This type provides configuration options for the argon2 algorithm. Most users
     33 // will find [[default_config]] or [[low_mem_config]] suitable for their needs
     34 // without providing a custom configuration. If writing a custom configuration,
     35 // consult the RFC for advice on selecting suitable values for your use-case.
     36 //
     37 // 'parallel' specifies the number of parallel processes. 'pass' configures the
     38 // number of iterations. Both values must be at least one. Note: the Hare
     39 // implementation of argon2 does not process hashes in parallel, though it will
     40 // still compute the correct hash if this value is greater than one.
     41 //
     42 // 'version' specifies the version of the argon2 function. The implementation
     43 // currently only supports version 1.3. Use [[VERSION]] here.
     44 //
     45 // 'secret' and 'data' are optional byte arrays that are applied to the initial
     46 // state. Consult the RFC for details.
     47 //
     48 // The 'mem' parameter is used to configure working memory used during the
     49 // computation. The argon2 algorithm requires a large amount of memory to
     50 // compute hashes. If 'mem' set to a u32, it is interpreted as the desired
     51 // number of 1024-byte blocks the implementation shall allocate for you. If the
     52 // caller wants to manage the allocation itself, provide a []u8 instead. The
     53 // length of this slice must be at least 8 times the value of 'parallel' in
     54 // blocks, and must be a multiple of [[BLOCKSIZE]]. To have the implementation
     55 // allocate 64 KiB, set 'mem' to 64. To use the same amount of caller-provided
     56 // memory, provide a slice of length 64 * [[BLOCKSIZE]].
     57 export type conf = struct {
     58 	mem: (u32 | []u64),
     59 	parallel: u32,
     60 	passes: u32,
     61 	version: u8,
     62 	secret: []u8,
     63 	data: []u8
     64 };
     65 
     66 // The default recommended configuration for most use cases. This configuration
     67 // uses 2 GiB of working memory. A 16-byte 'salt' and 32-byte 'dest' parameter
     68 // is recommended in combination with this configuration.
     69 export const default_conf: conf = conf {
     70 	mem = 2 * 1024 * 1024,
     71 	passes = 1,
     72 	parallel = 4,
     73 	version = 0x13,
     74 	...
     75 };
     76 
     77 // The default recommended configuration for memory-constrained use cases. This
     78 // configuration uses 64 MiB of working memory. A 16-byte 'salt' and 32-byte
     79 // 'dest' parameter is recommended in combination with this configuration.
     80 export const low_mem_conf: conf = conf {
     81 	mem = 64 * 1024,
     82 	passes = 3,
     83 	parallel = 4,
     84 	version = 0x13,
     85 	...
     86 };
     87 
     88 type context = struct {
     89 	mode: mode,
     90 	cols: size,
     91 	rows: size,
     92 	sliceblocks: size,
     93 	mem: []u64,
     94 	pass: u32,
     95 	seedsinit: block64,
     96 	seedblock: block64,
     97 };
     98 
     99 // Computes an argon2d hash, writing the digest to 'dest'. A 'salt' length of 16
    100 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes
    101 // is recommended, and 4 bytes is the minimum.
    102 //
    103 // The argon2d mode uses data-dependent memory access and is suitable for
    104 // applications with no threats of side-channel timing attacks.
    105 export fn argon2d(
    106 	dest: []u8,
    107 	password: []u8,
    108 	salt: []u8,
    109 	cfg: *conf,
    110 ) (void | nomem) = {
    111 	return argon2(dest, password, salt, cfg, mode::D);
    112 };
    113 
    114 // Computes an argon2i hash, writing the digest to 'dest'. A 'salt' length of 16
    115 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes
    116 // is recommended, and 4 bytes is the minimum.
    117 //
    118 // The argon2i mode uses data-independent memory access and is suitable for
    119 // password hashing and key derivation. It makes more passes over memory to
    120 // protect from trade-off attacks.
    121 export fn argon2i(
    122 	dest: []u8,
    123 	password: []u8,
    124 	salt: []u8,
    125 	cfg: *conf,
    126 ) (void | nomem) = {
    127 	return argon2(dest, password, salt, cfg, mode::I);
    128 };
    129 
    130 // Computes an argon2id hash, writing the digest to 'dest'. A 'salt' length of
    131 // 16 bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32
    132 // bytes is recommended, and 4 bytes is the minimum.
    133 //
    134 // The argon2id mode works by using argon2i for the first half of the first pass
    135 // and argon2d further on. It provides therefore protection from side-channel
    136 // attacks and brute-force cost savings due to memory trade-offs.
    137 //
    138 // If you are unsure which variant to use, argon2id is recommended.
    139 export fn argon2id(
    140 	dest: []u8,
    141 	password: []u8,
    142 	salt: []u8,
    143 	cfg: *conf,
    144 ) (void | nomem) = {
    145 	return argon2(dest, password, salt, cfg, mode::ID);
    146 };
    147 
    148 fn argon2(
    149 	dest: []u8,
    150 	password: []u8,
    151 	salt: []u8,
    152 	cfg: *conf,
    153 	mode: mode,
    154 ) (void | nomem) = {
    155 	assert(endian::host == &endian::little, "TODO big endian support");
    156 
    157 	assert(len(dest) >= 4 && len(dest) <= types::U32_MAX);
    158 	assert(len(password) <= types::U32_MAX);
    159 	assert(len(salt) >= 8 && len(salt) <= types::U32_MAX);
    160 	assert(cfg.parallel >= 1);
    161 	assert(cfg.passes >= 1);
    162 	assert(len(cfg.secret) <= types::U32_MAX);
    163 	assert(len(cfg.data) <= types::U32_MAX);
    164 
    165 	let initmemsize = 0u32;
    166 	let mem: []u64 = match (cfg.mem) {
    167 	case let mem: []u64 =>
    168 		assert(len(mem) >= 8 * cfg.parallel * BLOCKSIZE
    169 			&& len(mem) % BLOCKSIZE == 0
    170 			&& len(mem) / BLOCKSIZE <= types::U32_MAX);
    171 		initmemsize = (len(mem) / BLOCKSIZE): u32;
    172 
    173 		// round down memory to nearest multiple of 4 times parallel
    174 		const memsize = len(mem) - len(mem)
    175 			% (4 * cfg.parallel * BLOCKSIZE);
    176 		yield mem[..memsize];
    177 	case let memsize: u32 =>
    178 		assert(memsize >= 8 * cfg.parallel
    179 			&& memsize <= types::U32_MAX);
    180 
    181 		initmemsize = memsize;
    182 		const memsize = memsize - memsize % (4 * cfg.parallel);
    183 		yield alloc([0...], memsize * BLOCKSIZE): []u64;
    184 	};
    185 
    186 	let h0: [64]u8 = [0...];
    187 	inithash(&h0, len(dest): u32, password, salt, cfg, mode, initmemsize);
    188 
    189 	const memsize = (len(mem) / BLOCKSIZE): u32;
    190 	const cols = 4 * (memsize / (4 * cfg.parallel));
    191 	let ctx = context {
    192 		rows = cfg.parallel,
    193 		cols = cols,
    194 		sliceblocks = cols / 4,
    195 		pass = 0,
    196 		mem = mem,
    197 		mode = mode,
    198 		seedsinit = [0...],
    199 		seedblock = [0...],
    200 		...
    201 	};
    202 
    203 	// hash first and second blocks of each row
    204 	for (let i = 0z; i < ctx.rows; i += 1) {
    205 		let src: [72]u8 = [0...];
    206 		src[..64] = h0[..];
    207 
    208 		endian::leputu32(src[64..68], 0);
    209 		endian::leputu32(src[68..], i: u32);
    210 		varhash(blocku8(&ctx, i, 0), src);
    211 
    212 		endian::leputu32(src[64..68], 1);
    213 		endian::leputu32(src[68..], i: u32);
    214 		varhash(blocku8(&ctx, i, 1), src);
    215 	};
    216 
    217 	// process segments
    218 	for (ctx.pass < cfg.passes; ctx.pass += 1) {
    219 		for (let s = 0z; s < SLICES; s += 1) {
    220 			for (let i = 0z; i < ctx.rows; i += 1) {
    221 				segproc(cfg, &ctx, i, s);
    222 			};
    223 		};
    224 	};
    225 
    226 	// final hash
    227 	let b = blocku8(&ctx, 0, ctx.cols - 1);
    228 	for (let i = 1z; i < ctx.rows; i += 1) {
    229 		math::xor(b, b, blocku8(&ctx, i, ctx.cols - 1));
    230 	};
    231 
    232 	varhash(dest, b);
    233 
    234 	bytes::zero(h0);
    235 	bytes::zero((ctx.mem: *[*]u8)[..len(ctx.mem) * size(u64)]);
    236 
    237 	if (cfg.mem is u32) {
    238 		// mem was allocated internally
    239 		free(ctx.mem);
    240 	};
    241 };
    242 
    243 fn block(ctx: *context, i: size, j: size) []u64 = {
    244 	let index = (ctx.cols * i + j) * BLOCKSIZE;
    245 	return ctx.mem[index..index + BLOCKSIZE];
    246 };
    247 
    248 fn blocku8(ctx: *context, i: size, j: size) []u8 = {
    249 	return (block(ctx, i, j): *[*]u8)[..BLOCKSIZE * size(u64)];
    250 };
    251 
    252 fn refblock(cfg: *conf, ctx: *context, seed: u64, i: size, j: size) []u64 = {
    253 	const segstart = (j - (j % ctx.sliceblocks)) / ctx.sliceblocks;
    254 	const index = j % ctx.sliceblocks;
    255 
    256 	const l: size = if (segstart == 0 && ctx.pass == 0) {
    257 		yield i;
    258 	} else {
    259 		yield (seed >> 32) % cfg.parallel;
    260 	};
    261 
    262 	let poolstart: u64 = ((segstart + 1) % SLICES) * ctx.sliceblocks;
    263 	let poolsize: u64 = 3 * ctx.sliceblocks;
    264 
    265 	if (i == l) {
    266 		poolsize += index;
    267 	};
    268 
    269 	if (ctx.pass == 0) {
    270 		poolstart = 0;
    271 		poolsize = segstart * ctx.sliceblocks;
    272 		if (segstart == 0 || i == l) {
    273 			poolsize += index;
    274 		};
    275 	};
    276 
    277 	if (index == 0 || i == l) {
    278 		poolsize -= 1;
    279 	};
    280 
    281 	const j1: u64 = seed & 0xffffffff;
    282 	const x: u64 = (j1 * j1) >> 32;
    283 	const y: u64 = (poolsize * x) >> 32;
    284 	const z: u64 = (poolstart + poolsize - (y+1)) % ctx.cols: u64;
    285 
    286 	return block(ctx, l: size, z: size);
    287 };
    288 
    289 fn inithash(
    290 	dest: *[64]u8,
    291 	taglen: u32,
    292 	password: []u8,
    293 	salt: []u8,
    294 	cfg: *conf,
    295 	mode: mode,
    296 	memsize: u32,
    297 ) void = {
    298 	let u32buf: [4]u8 = [0...];
    299 	let h = blake2b::blake2b([], 64);
    300 	defer hash::close(&h);
    301 
    302 	hash_leputu32(&h, cfg.parallel);
    303 	hash_leputu32(&h, taglen);
    304 	hash_leputu32(&h, memsize);
    305 	hash_leputu32(&h, cfg.passes);
    306 	hash_leputu32(&h, cfg.version);
    307 	hash_leputu32(&h, mode: u32);
    308 	hash_leputu32(&h, len(password): u32);
    309 	hash::write(&h, password);
    310 
    311 	hash_leputu32(&h, len(salt): u32);
    312 	hash::write(&h, salt);
    313 
    314 	hash_leputu32(&h, len(cfg.secret): u32);
    315 	hash::write(&h, cfg.secret);
    316 
    317 	hash_leputu32(&h, len(cfg.data): u32);
    318 	hash::write(&h, cfg.data);
    319 
    320 	hash::sum(&h, dest[..]);
    321 };
    322 
    323 fn hash_leputu32(h: *hash::hash, u: u32) void = {
    324 	let buf: [4]u8 = [0...];
    325 	endian::leputu32(buf, u);
    326 	hash::write(h, buf[..]);
    327 };
    328 
    329 // The variable hash function H'
    330 fn varhash(dest: []u8, block: []u8) void = {
    331 	let u32buf: [4]u8 = [0...];
    332 
    333 	if (len(dest) <= 64) {
    334 		let h = blake2b::blake2b([], len(dest));
    335 		defer hash::close(&h);
    336 		hash_leputu32(&h, len(dest): u32);
    337 		hash::write(&h, block);
    338 		hash::sum(&h, dest);
    339 		return;
    340 	};
    341 
    342 	// TODO this may be replaced with a constant time divceil in future to
    343 	// avoid leaking the dest len.
    344 	const r = divceil(len(dest): u32, 32) - 2;
    345 	let v: [64]u8 = [0...];
    346 
    347 	let destbuf = memio::fixed(dest);
    348 
    349 	let h = blake2b::blake2b([], 64);
    350 	hash_leputu32(&h, len(dest): u32);
    351 	hash::write(&h, block);
    352 	hash::sum(&h, v[..]);
    353 	hash::close(&h);
    354 
    355 	io::writeall(&destbuf, v[..32])!;
    356 
    357 	for (let i = 1z; i < r; i += 1) {
    358 		let h = blake2b::blake2b([], 64);
    359 		hash::write(&h, v[..]);
    360 		hash::sum(&h, v[..]);
    361 		hash::close(&h);
    362 		io::writeall(&destbuf, v[..32])!;
    363 	};
    364 
    365 	const remainder = len(dest) - 32 * r;
    366 	let hend = blake2b::blake2b([], remainder);
    367 	defer hash::close(&hend);
    368 	hash::write(&hend, v[..]);
    369 	hash::sum(&hend, v[..remainder]);
    370 	io::writeall(&destbuf, v[..remainder])!;
    371 };
    372 
    373 fn divceil(dividend: u32, divisor: u32) u32 = {
    374 	let result = dividend / divisor;
    375 	if (dividend % divisor > 0) {
    376 		result += 1;
    377 	};
    378 	return result;
    379 };
    380 
    381 fn xorblock(dest: []u64, x: []u64, y: []u64) void = {
    382 	math::xor((dest: *[*]u8)[..len(dest) * size(u64)],
    383 		(x: *[*]u8)[..len(dest) * size(u64)],
    384 		(y: *[*]u8)[..len(dest) * size(u64)]);
    385 };
    386 
    387 fn segproc(cfg: *conf, ctx: *context, i: size, slice: size) void = {
    388 	const init = switch (ctx.mode) {
    389 	case mode::I =>
    390 		yield true;
    391 	case mode::ID =>
    392 		yield ctx.pass == 0 && slice < 2;
    393 	case mode::D =>
    394 		yield false;
    395 	};
    396 	if (init) {
    397 		ctx.seedsinit[0] = ctx.pass;
    398 		ctx.seedsinit[1] = i;
    399 		ctx.seedsinit[2] = slice;
    400 		ctx.seedsinit[3] = len(ctx.mem) / BLOCKSIZE;
    401 		ctx.seedsinit[4] = cfg.passes;
    402 		ctx.seedsinit[5] = ctx.mode: u64;
    403 		ctx.seedsinit[6] = 0;
    404 
    405 		if (ctx.pass == 0 && slice == 0) {
    406 			ctx.seedsinit[6] += 1;
    407 			compress(ctx.seedblock, ctx.seedsinit, zeroblock,
    408 				false);
    409 			compress(ctx.seedblock, ctx.seedblock, zeroblock,
    410 				false);
    411 		};
    412 	};
    413 
    414 	for (let b = 0z; b < ctx.sliceblocks; b += 1) {
    415 		const j = slice * ctx.sliceblocks + b;
    416 		if (ctx.pass == 0 && j < 2) {
    417 			continue;
    418 		};
    419 
    420 		const dmodeseed = switch (ctx.mode) {
    421 		case mode::D =>
    422 			yield true;
    423 		case mode::ID =>
    424 			yield ctx.pass > 0 || slice > 1;
    425 		case mode::I =>
    426 			yield false;
    427 		};
    428 
    429 		const pj = if (j == 0) ctx.cols - 1 else j - 1;
    430 		let prev = block(ctx, i, pj);
    431 
    432 		const seed: u64 = if (dmodeseed) {
    433 			yield prev[0];
    434 		} else {
    435 			if (b % BLOCKSIZE == 0) {
    436 				ctx.seedsinit[6] += 1;
    437 				compress(ctx.seedblock, ctx.seedsinit,
    438 					zeroblock, false);
    439 				compress(ctx.seedblock, ctx.seedblock,
    440 					zeroblock, false);
    441 			};
    442 			yield ctx.seedblock[b % BLOCKSIZE];
    443 		};
    444 
    445 		let ref = refblock(cfg, ctx, seed, i, j);
    446 		compress(block(ctx, i, j), prev, ref, ctx.pass > 0);
    447 	};
    448 };
    449 
    450 fn compress(dest: []u64, x: []u64, y: []u64, xor: bool) void = {
    451 	let r: block64 = [0...];
    452 	xorblock(r, x, y);
    453 
    454 	let z: block64 = [0...];
    455 	z[..] = r[..];
    456 
    457 	for (let i = 0z; i < 128; i += 16) {
    458 		perm(&z[i], &z[i + 1], &z[i + 2], &z[i + 3], &z[i + 4],
    459 			&z[i + 5], &z[i + 6], &z[i + 7], &z[i + 8], &z[i + 9],
    460 			&z[i + 10], &z[i + 11], &z[i + 12], &z[i + 13],
    461 			&z[i + 14], &z[i + 15]);
    462 	};
    463 
    464 	for (let i = 0z; i < 16; i += 2) {
    465 		perm(&z[i], &z[i + 1], &z[i + 16], &z[i + 17], &z[i + 32],
    466 			&z[i + 33], &z[i + 48], &z[i + 49], &z[i + 64],
    467 			&z[i + 65], &z[i + 80], &z[i + 81], &z[i + 96],
    468 			&z[i + 97], &z[i + 112], &z[i + 113]);
    469 	};
    470 
    471 	if (xor) {
    472 		xorblock(r, r, dest);
    473 	};
    474 
    475 	xorblock(dest, z, r);
    476 };
    477 
    478 fn perm(
    479 	x0: *u64,
    480 	x1: *u64,
    481 	x2: *u64,
    482 	x3: *u64,
    483 	x4: *u64,
    484 	x5: *u64,
    485 	x6: *u64,
    486 	x7: *u64,
    487 	x8: *u64,
    488 	x9: *u64,
    489 	x10: *u64,
    490 	x11: *u64,
    491 	x12: *u64,
    492 	x13: *u64,
    493 	x14: *u64,
    494 	x15: *u64,
    495 ) void = {
    496 	mix(x0, x4, x8, x12);
    497 	mix(x1, x5, x9, x13);
    498 	mix(x2, x6, x10, x14);
    499 	mix(x3, x7, x11, x15);
    500 
    501 	mix(x0, x5, x10, x15);
    502 	mix(x1, x6, x11, x12);
    503 	mix(x2, x7, x8, x13);
    504 	mix(x3, x4, x9, x14);
    505 };
    506 
    507 fn mix(a: *u64, b: *u64, c: *u64, d: *u64) void = {
    508 	*a = *a + *b + 2 * trunc(*a) * trunc(*b);
    509 	*d = math::rotr64(*d ^ *a, 32);
    510 	*c = *c + *d + 2 * trunc(*c) * trunc(*d);
    511 	*b = math::rotr64(*b ^ *c, 24);
    512 
    513 	*a = *a + *b + 2 * trunc(*a) * trunc(*b);
    514 	*d = math::rotr64(*d ^ *a, 16);
    515 	*c = *c + *d + 2 * trunc(*c) * trunc(*d);
    516 	*b = math::rotr64(*b ^ *c, 63);
    517 };
    518 
    519 fn trunc(a: u64) u64 = {
    520 	return a & 0xffffffff;
    521 };