hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

argon2.ha (13584B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // (c) 2022 Alexey Yerin <yyp@disroot.org>
      5 // (c) 2021-2022 Armin Preiml <apreiml@strohwolke.at>
      6 // (c) 2021-2022 Drew DeVault <sir@cmpwn.com>
      7 use bytes;
      8 use crypto::blake2b;
      9 use crypto::math;
     10 use endian;
     11 use errors::{nomem};
     12 use hash;
     13 use io;
     14 use memio;
     15 use types;
     16 
     17 // Latest version of argon2 supported by this implementation (1.3).
     18 export def VERSION: u8 = 0x13;
     19 
     20 // Number of u64 elements of one block.
     21 export def BLOCKSZ: u32 = 128;
     22 
     23 def SLICES: size = 4;
     24 
     25 type block64 = [BLOCKSZ]u64;
     26 
     27 const zeroblock: block64 = [0...];
     28 
     29 type mode = enum {
     30 	D = 0,
     31 	I = 1,
     32 	ID = 2,
     33 };
     34 
     35 // This type provides configuration options for the argon2 algorithm. Most users
     36 // will find [[default_conf]] or [[low_mem_conf]] suitable for their needs
     37 // without providing a custom configuration. If writing a custom configuration,
     38 // consult the RFC for advice on selecting suitable values for your use-case.
     39 //
     40 // 'parallel' specifies the number of parallel processes. 'pass' configures the
     41 // number of iterations. Both values must be at least one. Note: the Hare
     42 // implementation of argon2 does not process hashes in parallel, though it will
     43 // still compute the correct hash if this value is greater than one.
     44 //
     45 // 'version' specifies the version of the argon2 function. The implementation
     46 // currently only supports version 1.3. Use [[VERSION]] here.
     47 //
     48 // 'secret' and 'data' are optional byte arrays that are applied to the initial
     49 // state. Consult the RFC for details.
     50 //
     51 // The 'mem' parameter is used to configure working memory used during the
     52 // computation. The argon2 algorithm requires a large amount of memory to
     53 // compute hashes. If 'mem' set to a u32, it is interpreted as the desired
     54 // number of 1024-byte blocks the implementation shall allocate for you. If the
     55 // caller wants to manage the allocation itself, provide a []u8 instead. The
     56 // length of this slice must be at least 8 times the value of 'parallel' in
     57 // blocks, and must be a multiple of [[BLOCKSZ]]. To have the implementation
     58 // allocate 64 KiB, set 'mem' to 64. To use the same amount of caller-provided
     59 // memory, provide a slice of length 64 * [[BLOCKSZ]].
     60 export type conf = struct {
     61 	mem: (u32 | []u64),
     62 	parallel: u32,
     63 	passes: u32,
     64 	version: u8,
     65 	secret: []u8,
     66 	data: []u8
     67 };
     68 
     69 // The default recommended configuration for most use cases. This configuration
     70 // uses 2 GiB of working memory. A 16-byte 'salt' and 32-byte 'dest' parameter
     71 // is recommended in combination with this configuration.
     72 export const default_conf: conf = conf {
     73 	mem = 2 * 1024 * 1024,
     74 	passes = 1,
     75 	parallel = 4,
     76 	version = 0x13,
     77 	...
     78 };
     79 
     80 // The default recommended configuration for memory-constrained use cases. This
     81 // configuration uses 64 MiB of working memory. A 16-byte 'salt' and 32-byte
     82 // 'dest' parameter is recommended in combination with this configuration.
     83 export const low_mem_conf: conf = conf {
     84 	mem = 64 * 1024,
     85 	passes = 3,
     86 	parallel = 4,
     87 	version = 0x13,
     88 	...
     89 };
     90 
     91 type context = struct {
     92 	mode: mode,
     93 	cols: size,
     94 	rows: size,
     95 	sliceblocks: size,
     96 	mem: []u64,
     97 	pass: u32,
     98 	seedsinit: block64,
     99 	seedblock: block64,
    100 };
    101 
    102 // Computes an argon2d hash, writing the digest to 'dest'. A 'salt' length of 16
    103 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes
    104 // is recommended, and 4 bytes is the minimum.
    105 //
    106 // The argon2d mode uses data-dependent memory access and is suitable for
    107 // applications with no threats of side-channel timing attacks.
    108 export fn argon2d(
    109 	dest: []u8,
    110 	password: []u8,
    111 	salt: []u8,
    112 	cfg: *conf,
    113 ) (void | nomem) = {
    114 	return argon2(dest, password, salt, cfg, mode::D);
    115 };
    116 
    117 // Computes an argon2i hash, writing the digest to 'dest'. A 'salt' length of 16
    118 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes
    119 // is recommended, and 4 bytes is the minimum.
    120 //
    121 // The argon2i mode uses data-independent memory access and is suitable for
    122 // password hashing and key derivation. It makes more passes over memory to
    123 // protect from trade-off attacks.
    124 export fn argon2i(
    125 	dest: []u8,
    126 	password: []u8,
    127 	salt: []u8,
    128 	cfg: *conf,
    129 ) (void | nomem) = {
    130 	return argon2(dest, password, salt, cfg, mode::I);
    131 };
    132 
    133 // Computes an argon2id hash, writing the digest to 'dest'. A 'salt' length of
    134 // 16 bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32
    135 // bytes is recommended, and 4 bytes is the minimum.
    136 //
    137 // The argon2id mode works by using argon2i for the first half of the first pass
    138 // and argon2d further on. It provides therefore protection from side-channel
    139 // attacks and brute-force cost savings due to memory trade-offs.
    140 //
    141 // If you are unsure which variant to use, argon2id is recommended.
    142 export fn argon2id(
    143 	dest: []u8,
    144 	password: []u8,
    145 	salt: []u8,
    146 	cfg: *conf,
    147 ) (void | nomem) = {
    148 	return argon2(dest, password, salt, cfg, mode::ID);
    149 };
    150 
    151 fn argon2(
    152 	dest: []u8,
    153 	password: []u8,
    154 	salt: []u8,
    155 	cfg: *conf,
    156 	mode: mode,
    157 ) (void | nomem) = {
    158 	assert(endian::host == &endian::little, "TODO big endian support");
    159 
    160 	assert(len(dest) >= 4 && len(dest) <= types::U32_MAX);
    161 	assert(len(password) <= types::U32_MAX);
    162 	assert(len(salt) >= 8 && len(salt) <= types::U32_MAX);
    163 	assert(cfg.parallel >= 1);
    164 	assert(cfg.passes >= 1);
    165 	assert(len(cfg.secret) <= types::U32_MAX);
    166 	assert(len(cfg.data) <= types::U32_MAX);
    167 
    168 	let initmemsize = 0u32;
    169 	let mem: []u64 = match (cfg.mem) {
    170 	case let mem: []u64 =>
    171 		assert(len(mem) >= 8 * cfg.parallel * BLOCKSZ
    172 			&& len(mem) % BLOCKSZ == 0
    173 			&& len(mem) / BLOCKSZ <= types::U32_MAX);
    174 		initmemsize = (len(mem) / BLOCKSZ): u32;
    175 
    176 		// round down memory to nearest multiple of 4 times parallel
    177 		const memsize = len(mem) - len(mem)
    178 			% (4 * cfg.parallel * BLOCKSZ);
    179 		yield mem[..memsize];
    180 	case let memsize: u32 =>
    181 		assert(memsize >= 8 * cfg.parallel
    182 			&& memsize <= types::U32_MAX);
    183 
    184 		initmemsize = memsize;
    185 		const memsize = memsize - memsize % (4 * cfg.parallel);
    186 		yield alloc([0...], memsize * BLOCKSZ): []u64;
    187 	};
    188 
    189 	let h0: [64]u8 = [0...];
    190 	inithash(&h0, len(dest): u32, password, salt, cfg, mode, initmemsize);
    191 
    192 	const memsize = (len(mem) / BLOCKSZ): u32;
    193 	const cols = 4 * (memsize / (4 * cfg.parallel));
    194 	let ctx = context {
    195 		rows = cfg.parallel,
    196 		cols = cols,
    197 		sliceblocks = cols / 4,
    198 		pass = 0,
    199 		mem = mem,
    200 		mode = mode,
    201 		seedsinit = [0...],
    202 		seedblock = [0...],
    203 		...
    204 	};
    205 
    206 	// hash first and second blocks of each row
    207 	for (let i = 0z; i < ctx.rows; i += 1) {
    208 		let src: [72]u8 = [0...];
    209 		src[..64] = h0[..];
    210 
    211 		endian::leputu32(src[64..68], 0);
    212 		endian::leputu32(src[68..], i: u32);
    213 		varhash(blocku8(&ctx, i, 0), src);
    214 
    215 		endian::leputu32(src[64..68], 1);
    216 		endian::leputu32(src[68..], i: u32);
    217 		varhash(blocku8(&ctx, i, 1), src);
    218 	};
    219 
    220 	// process segments
    221 	for (ctx.pass < cfg.passes; ctx.pass += 1) {
    222 		for (let s = 0z; s < SLICES; s += 1) {
    223 			for (let i = 0z; i < ctx.rows; i += 1) {
    224 				segproc(cfg, &ctx, i, s);
    225 			};
    226 		};
    227 	};
    228 
    229 	// final hash
    230 	let b = blocku8(&ctx, 0, ctx.cols - 1);
    231 	for (let i = 1z; i < ctx.rows; i += 1) {
    232 		math::xor(b, b, blocku8(&ctx, i, ctx.cols - 1));
    233 	};
    234 
    235 	varhash(dest, b);
    236 
    237 	bytes::zero(h0);
    238 	bytes::zero((ctx.mem: *[*]u8)[..len(ctx.mem) * size(u64)]);
    239 
    240 	if (cfg.mem is u32) {
    241 		// mem was allocated internally
    242 		free(ctx.mem);
    243 	};
    244 };
    245 
    246 fn block(ctx: *context, i: size, j: size) []u64 = {
    247 	let index = (ctx.cols * i + j) * BLOCKSZ;
    248 	return ctx.mem[index..index + BLOCKSZ];
    249 };
    250 
    251 fn blocku8(ctx: *context, i: size, j: size) []u8 = {
    252 	return (block(ctx, i, j): *[*]u8)[..BLOCKSZ * size(u64)];
    253 };
    254 
    255 fn refblock(cfg: *conf, ctx: *context, seed: u64, i: size, j: size) []u64 = {
    256 	const segstart = (j - (j % ctx.sliceblocks)) / ctx.sliceblocks;
    257 	const index = j % ctx.sliceblocks;
    258 
    259 	const l: size = if (segstart == 0 && ctx.pass == 0) {
    260 		yield i;
    261 	} else {
    262 		yield (seed >> 32) % cfg.parallel;
    263 	};
    264 
    265 	let poolstart: u64 = ((segstart + 1) % SLICES) * ctx.sliceblocks;
    266 	let poolsize: u64 = 3 * ctx.sliceblocks;
    267 
    268 	if (i == l) {
    269 		poolsize += index;
    270 	};
    271 
    272 	if (ctx.pass == 0) {
    273 		poolstart = 0;
    274 		poolsize = segstart * ctx.sliceblocks;
    275 		if (segstart == 0 || i == l) {
    276 			poolsize += index;
    277 		};
    278 	};
    279 
    280 	if (index == 0 || i == l) {
    281 		poolsize -= 1;
    282 	};
    283 
    284 	const j1: u64 = seed & 0xffffffff;
    285 	const x: u64 = (j1 * j1) >> 32;
    286 	const y: u64 = (poolsize * x) >> 32;
    287 	const z: u64 = (poolstart + poolsize - (y+1)) % ctx.cols: u64;
    288 
    289 	return block(ctx, l: size, z: size);
    290 };
    291 
    292 fn inithash(
    293 	dest: *[64]u8,
    294 	taglen: u32,
    295 	password: []u8,
    296 	salt: []u8,
    297 	cfg: *conf,
    298 	mode: mode,
    299 	memsize: u32,
    300 ) void = {
    301 	let u32buf: [4]u8 = [0...];
    302 	let h = blake2b::blake2b([], 64);
    303 	defer hash::close(&h);
    304 
    305 	hash_leputu32(&h, cfg.parallel);
    306 	hash_leputu32(&h, taglen);
    307 	hash_leputu32(&h, memsize);
    308 	hash_leputu32(&h, cfg.passes);
    309 	hash_leputu32(&h, cfg.version);
    310 	hash_leputu32(&h, mode: u32);
    311 	hash_leputu32(&h, len(password): u32);
    312 	hash::write(&h, password);
    313 
    314 	hash_leputu32(&h, len(salt): u32);
    315 	hash::write(&h, salt);
    316 
    317 	hash_leputu32(&h, len(cfg.secret): u32);
    318 	hash::write(&h, cfg.secret);
    319 
    320 	hash_leputu32(&h, len(cfg.data): u32);
    321 	hash::write(&h, cfg.data);
    322 
    323 	hash::sum(&h, dest[..]);
    324 };
    325 
    326 fn hash_leputu32(h: *hash::hash, u: u32) void = {
    327 	let buf: [4]u8 = [0...];
    328 	endian::leputu32(buf, u);
    329 	hash::write(h, buf[..]);
    330 };
    331 
    332 // The variable hash function H'
    333 fn varhash(dest: []u8, block: []u8) void = {
    334 	let u32buf: [4]u8 = [0...];
    335 
    336 	if (len(dest) <= 64) {
    337 		let h = blake2b::blake2b([], len(dest));
    338 		defer hash::close(&h);
    339 		hash_leputu32(&h, len(dest): u32);
    340 		hash::write(&h, block);
    341 		hash::sum(&h, dest);
    342 		return;
    343 	};
    344 
    345 	// TODO this may be replaced with a constant time divceil in future to
    346 	// avoid leaking the dest len.
    347 	const r = divceil(len(dest): u32, 32) - 2;
    348 	let v: [64]u8 = [0...];
    349 
    350 	let destbuf = memio::fixed(dest);
    351 
    352 	let h = blake2b::blake2b([], 64);
    353 	hash_leputu32(&h, len(dest): u32);
    354 	hash::write(&h, block);
    355 	hash::sum(&h, v[..]);
    356 	hash::close(&h);
    357 
    358 	io::writeall(&destbuf, v[..32])!;
    359 
    360 	for (let i = 1z; i < r; i += 1) {
    361 		let h = blake2b::blake2b([], 64);
    362 		hash::write(&h, v[..]);
    363 		hash::sum(&h, v[..]);
    364 		hash::close(&h);
    365 		io::writeall(&destbuf, v[..32])!;
    366 	};
    367 
    368 	const remainder = len(dest) - 32 * r;
    369 	let hend = blake2b::blake2b([], remainder);
    370 	defer hash::close(&hend);
    371 	hash::write(&hend, v[..]);
    372 	hash::sum(&hend, v[..remainder]);
    373 	io::writeall(&destbuf, v[..remainder])!;
    374 };
    375 
    376 fn divceil(dividend: u32, divisor: u32) u32 = {
    377 	let result = dividend / divisor;
    378 	if (dividend % divisor > 0) {
    379 		result += 1;
    380 	};
    381 	return result;
    382 };
    383 
    384 fn xorblock(dest: []u64, x: []u64, y: []u64) void = {
    385 	for (let i = 0z; i < len(dest); i += 1) {
    386 		dest[i] = x[i] ^ y[i];
    387 	};
    388 };
    389 
    390 fn segproc(cfg: *conf, ctx: *context, i: size, slice: size) void = {
    391 	const init = switch (ctx.mode) {
    392 	case mode::I =>
    393 		yield true;
    394 	case mode::ID =>
    395 		yield ctx.pass == 0 && slice < 2;
    396 	case mode::D =>
    397 		yield false;
    398 	};
    399 	if (init) {
    400 		ctx.seedsinit[0] = ctx.pass;
    401 		ctx.seedsinit[1] = i;
    402 		ctx.seedsinit[2] = slice;
    403 		ctx.seedsinit[3] = len(ctx.mem) / BLOCKSZ;
    404 		ctx.seedsinit[4] = cfg.passes;
    405 		ctx.seedsinit[5] = ctx.mode: u64;
    406 		ctx.seedsinit[6] = 0;
    407 
    408 		if (ctx.pass == 0 && slice == 0) {
    409 			ctx.seedsinit[6] += 1;
    410 			compress(ctx.seedblock, ctx.seedsinit, zeroblock,
    411 				false);
    412 			compress(ctx.seedblock, ctx.seedblock, zeroblock,
    413 				false);
    414 		};
    415 	};
    416 
    417 	for (let b = 0z; b < ctx.sliceblocks; b += 1) {
    418 		const j = slice * ctx.sliceblocks + b;
    419 		if (ctx.pass == 0 && j < 2) {
    420 			continue;
    421 		};
    422 
    423 		const dmodeseed = switch (ctx.mode) {
    424 		case mode::D =>
    425 			yield true;
    426 		case mode::ID =>
    427 			yield ctx.pass > 0 || slice > 1;
    428 		case mode::I =>
    429 			yield false;
    430 		};
    431 
    432 		const pj = if (j == 0) ctx.cols - 1 else j - 1;
    433 		let prev = block(ctx, i, pj);
    434 
    435 		const seed: u64 = if (dmodeseed) {
    436 			yield prev[0];
    437 		} else {
    438 			if (b % BLOCKSZ == 0) {
    439 				ctx.seedsinit[6] += 1;
    440 				compress(ctx.seedblock, ctx.seedsinit,
    441 					zeroblock, false);
    442 				compress(ctx.seedblock, ctx.seedblock,
    443 					zeroblock, false);
    444 			};
    445 			yield ctx.seedblock[b % BLOCKSZ];
    446 		};
    447 
    448 		let ref = refblock(cfg, ctx, seed, i, j);
    449 		compress(block(ctx, i, j), prev, ref, ctx.pass > 0);
    450 	};
    451 };
    452 
    453 fn compress(dest: []u64, x: []u64, y: []u64, xor: bool) void = {
    454 	let r: block64 = [0...];
    455 	xorblock(r, x, y);
    456 
    457 	let z: block64 = [0...];
    458 	z[..] = r[..];
    459 
    460 	for (let i = 0z; i < 128; i += 16) {
    461 		perm(&z[i], &z[i + 1], &z[i + 2], &z[i + 3], &z[i + 4],
    462 			&z[i + 5], &z[i + 6], &z[i + 7], &z[i + 8], &z[i + 9],
    463 			&z[i + 10], &z[i + 11], &z[i + 12], &z[i + 13],
    464 			&z[i + 14], &z[i + 15]);
    465 	};
    466 
    467 	for (let i = 0z; i < 16; i += 2) {
    468 		perm(&z[i], &z[i + 1], &z[i + 16], &z[i + 17], &z[i + 32],
    469 			&z[i + 33], &z[i + 48], &z[i + 49], &z[i + 64],
    470 			&z[i + 65], &z[i + 80], &z[i + 81], &z[i + 96],
    471 			&z[i + 97], &z[i + 112], &z[i + 113]);
    472 	};
    473 
    474 	if (xor) {
    475 		xorblock(r, r, dest);
    476 	};
    477 
    478 	xorblock(dest, z, r);
    479 };
    480 
    481 fn perm(
    482 	x0: *u64,
    483 	x1: *u64,
    484 	x2: *u64,
    485 	x3: *u64,
    486 	x4: *u64,
    487 	x5: *u64,
    488 	x6: *u64,
    489 	x7: *u64,
    490 	x8: *u64,
    491 	x9: *u64,
    492 	x10: *u64,
    493 	x11: *u64,
    494 	x12: *u64,
    495 	x13: *u64,
    496 	x14: *u64,
    497 	x15: *u64,
    498 ) void = {
    499 	mix(x0, x4, x8, x12);
    500 	mix(x1, x5, x9, x13);
    501 	mix(x2, x6, x10, x14);
    502 	mix(x3, x7, x11, x15);
    503 
    504 	mix(x0, x5, x10, x15);
    505 	mix(x1, x6, x11, x12);
    506 	mix(x2, x7, x8, x13);
    507 	mix(x3, x4, x9, x14);
    508 };
    509 
    510 fn mix(a: *u64, b: *u64, c: *u64, d: *u64) void = {
    511 	*a = *a + *b + 2 * (*a & 0xffffffff) * (*b & 0xffffffff);
    512 	*d = math::rotr64(*d ^ *a, 32);
    513 	*c = *c + *d + 2 * (*c & 0xffffffff) * (*d & 0xffffffff);
    514 	*b = math::rotr64(*b ^ *c, 24);
    515 
    516 	*a = *a + *b + 2 * (*a & 0xffffffff) * (*b & 0xffffffff);
    517 	*d = math::rotr64(*d ^ *a, 16);
    518 	*c = *c + *d + 2 * (*c & 0xffffffff) * (*d & 0xffffffff);
    519 	*b = math::rotr64(*b ^ *c, 63);
    520 };