hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

argon2.ha (13421B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use bytes;
      5 use crypto::blake2b;
      6 use crypto::math;
      7 use endian;
      8 use hash;
      9 use io;
     10 use memio;
     11 use types;
     12 
     13 // Latest version of argon2 supported by this implementation (1.3).
     14 export def VERSION: u8 = 0x13;
     15 
     16 // Number of u64 elements of one block.
     17 export def BLOCKSZ: u32 = 128;
     18 
     19 def SLICES: size = 4;
     20 
     21 type block64 = [BLOCKSZ]u64;
     22 
     23 const zeroblock: block64 = [0...];
     24 
     25 type mode = enum {
     26 	D = 0,
     27 	I = 1,
     28 	ID = 2,
     29 };
     30 
     31 // This type provides configuration options for the argon2 algorithm. Most users
     32 // will find [[default_conf]] or [[low_mem_conf]] suitable for their needs
     33 // without providing a custom configuration. If writing a custom configuration,
     34 // consult the RFC for advice on selecting suitable values for your use-case.
     35 //
     36 // 'parallel' specifies the number of parallel processes. 'pass' configures the
     37 // number of iterations. Both values must be at least one. Note: the Hare
     38 // implementation of argon2 does not process hashes in parallel, though it will
     39 // still compute the correct hash if this value is greater than one.
     40 //
     41 // 'version' specifies the version of the argon2 function. The implementation
     42 // currently only supports version 1.3. Use [[VERSION]] here.
     43 //
     44 // 'secret' and 'data' are optional byte arrays that are applied to the initial
     45 // state. Consult the RFC for details.
     46 //
     47 // The 'mem' parameter is used to configure working memory used during the
     48 // computation. The argon2 algorithm requires a large amount of memory to
     49 // compute hashes. If 'mem' set to a u32, it is interpreted as the desired
     50 // number of 1024-byte blocks the implementation shall allocate for you. If the
     51 // caller wants to manage the allocation itself, provide a []u8 instead. The
     52 // length of this slice must be at least 8 times the value of 'parallel' in
     53 // blocks, and must be a multiple of [[BLOCKSZ]]. To have the implementation
     54 // allocate 64 KiB, set 'mem' to 64. To use the same amount of caller-provided
     55 // memory, provide a slice of length 64 * [[BLOCKSZ]].
     56 export type conf = struct {
     57 	mem: (u32 | []u64),
     58 	parallel: u32,
     59 	passes: u32,
     60 	version: u8,
     61 	secret: []u8,
     62 	data: []u8
     63 };
     64 
     65 // The default recommended configuration for most use cases. This configuration
     66 // uses 2 GiB of working memory. A 16-byte 'salt' and 32-byte 'dest' parameter
     67 // is recommended in combination with this configuration.
     68 export const default_conf: conf = conf {
     69 	mem = 2 * 1024 * 1024,
     70 	passes = 1,
     71 	parallel = 4,
     72 	version = 0x13,
     73 	...
     74 };
     75 
     76 // The default recommended configuration for memory-constrained use cases. This
     77 // configuration uses 64 MiB of working memory. A 16-byte 'salt' and 32-byte
     78 // 'dest' parameter is recommended in combination with this configuration.
     79 export const low_mem_conf: conf = conf {
     80 	mem = 64 * 1024,
     81 	passes = 3,
     82 	parallel = 4,
     83 	version = 0x13,
     84 	...
     85 };
     86 
     87 type context = struct {
     88 	mode: mode,
     89 	cols: size,
     90 	rows: size,
     91 	sliceblocks: size,
     92 	mem: []u64,
     93 	pass: u32,
     94 	seedsinit: block64,
     95 	seedblock: block64,
     96 };
     97 
     98 // Computes an argon2d hash, writing the digest to 'dest'. A 'salt' length of 16
     99 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes
    100 // is recommended, and 4 bytes is the minimum.
    101 //
    102 // The argon2d mode uses data-dependent memory access and is suitable for
    103 // applications with no threats of side-channel timing attacks.
    104 export fn argon2d(
    105 	dest: []u8,
    106 	password: []u8,
    107 	salt: []u8,
    108 	cfg: *conf,
    109 ) (void | nomem) = {
    110 	return argon2(dest, password, salt, cfg, mode::D);
    111 };
    112 
    113 // Computes an argon2i hash, writing the digest to 'dest'. A 'salt' length of 16
    114 // bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32 bytes
    115 // is recommended, and 4 bytes is the minimum.
    116 //
    117 // The argon2i mode uses data-independent memory access and is suitable for
    118 // password hashing and key derivation. It makes more passes over memory to
    119 // protect from trade-off attacks.
    120 export fn argon2i(
    121 	dest: []u8,
    122 	password: []u8,
    123 	salt: []u8,
    124 	cfg: *conf,
    125 ) (void | nomem) = {
    126 	return argon2(dest, password, salt, cfg, mode::I);
    127 };
    128 
    129 // Computes an argon2id hash, writing the digest to 'dest'. A 'salt' length of
    130 // 16 bytes is recommended, and 8 bytes is the minimum. A 'dest' length of 32
    131 // bytes is recommended, and 4 bytes is the minimum.
    132 //
    133 // The argon2id mode works by using argon2i for the first half of the first pass
    134 // and argon2d further on. It provides therefore protection from side-channel
    135 // attacks and brute-force cost savings due to memory trade-offs.
    136 //
    137 // If you are unsure which variant to use, argon2id is recommended.
    138 export fn argon2id(
    139 	dest: []u8,
    140 	password: []u8,
    141 	salt: []u8,
    142 	cfg: *conf,
    143 ) (void | nomem) = {
    144 	return argon2(dest, password, salt, cfg, mode::ID);
    145 };
    146 
    147 fn argon2(
    148 	dest: []u8,
    149 	password: []u8,
    150 	salt: []u8,
    151 	cfg: *conf,
    152 	mode: mode,
    153 ) (void | nomem) = {
    154 	assert(endian::host == &endian::little, "TODO big endian support");
    155 
    156 	assert(len(dest) >= 4 && len(dest) <= types::U32_MAX);
    157 	assert(len(password) <= types::U32_MAX);
    158 	assert(len(salt) >= 8 && len(salt) <= types::U32_MAX);
    159 	assert(cfg.parallel >= 1);
    160 	assert(cfg.passes >= 1);
    161 	assert(len(cfg.secret) <= types::U32_MAX);
    162 	assert(len(cfg.data) <= types::U32_MAX);
    163 
    164 	let initmemsize = 0u32;
    165 	let mem: []u64 = match (cfg.mem) {
    166 	case let mem: []u64 =>
    167 		assert(len(mem) >= 8 * cfg.parallel * BLOCKSZ
    168 			&& len(mem) % BLOCKSZ == 0
    169 			&& len(mem) / BLOCKSZ <= types::U32_MAX);
    170 		initmemsize = (len(mem) / BLOCKSZ): u32;
    171 
    172 		// round down memory to nearest multiple of 4 times parallel
    173 		const memsize = len(mem) - len(mem)
    174 			% (4 * cfg.parallel * BLOCKSZ);
    175 		yield mem[..memsize];
    176 	case let memsize: u32 =>
    177 		assert(memsize >= 8 * cfg.parallel
    178 			&& memsize <= types::U32_MAX);
    179 
    180 		initmemsize = memsize;
    181 		const memsize = memsize - memsize % (4 * cfg.parallel);
    182 		yield alloc([0...], memsize * BLOCKSZ)!: []u64;
    183 	};
    184 
    185 	let h0: [64]u8 = [0...];
    186 	inithash(&h0, len(dest): u32, password, salt, cfg, mode, initmemsize);
    187 
    188 	const memsize = (len(mem) / BLOCKSZ): u32;
    189 	const cols = 4 * (memsize / (4 * cfg.parallel));
    190 	let ctx = context {
    191 		rows = cfg.parallel,
    192 		cols = cols,
    193 		sliceblocks = cols / 4,
    194 		pass = 0,
    195 		mem = mem,
    196 		mode = mode,
    197 		seedsinit = [0...],
    198 		seedblock = [0...],
    199 		...
    200 	};
    201 
    202 	// hash first and second blocks of each row
    203 	for (let i = 0z; i < ctx.rows; i += 1) {
    204 		let src: [72]u8 = [0...];
    205 		src[..64] = h0[..];
    206 
    207 		endian::leputu32(src[64..68], 0);
    208 		endian::leputu32(src[68..], i: u32);
    209 		varhash(blocku8(&ctx, i, 0), src);
    210 
    211 		endian::leputu32(src[64..68], 1);
    212 		endian::leputu32(src[68..], i: u32);
    213 		varhash(blocku8(&ctx, i, 1), src);
    214 	};
    215 
    216 	// process segments
    217 	for (ctx.pass < cfg.passes; ctx.pass += 1) {
    218 		for (let s = 0z; s < SLICES; s += 1) {
    219 			for (let i = 0z; i < ctx.rows; i += 1) {
    220 				segproc(cfg, &ctx, i, s);
    221 			};
    222 		};
    223 	};
    224 
    225 	// final hash
    226 	let b = blocku8(&ctx, 0, ctx.cols - 1);
    227 	for (let i = 1z; i < ctx.rows; i += 1) {
    228 		math::xor(b, b, blocku8(&ctx, i, ctx.cols - 1));
    229 	};
    230 
    231 	varhash(dest, b);
    232 
    233 	bytes::zero(h0);
    234 	bytes::zero((ctx.mem: *[*]u8)[..len(ctx.mem) * size(u64)]);
    235 
    236 	if (cfg.mem is u32) {
    237 		// mem was allocated internally
    238 		free(ctx.mem);
    239 	};
    240 };
    241 
    242 fn block(ctx: *context, i: size, j: size) []u64 = {
    243 	let index = (ctx.cols * i + j) * BLOCKSZ;
    244 	return ctx.mem[index..index + BLOCKSZ];
    245 };
    246 
    247 fn blocku8(ctx: *context, i: size, j: size) []u8 = {
    248 	return (block(ctx, i, j): *[*]u8)[..BLOCKSZ * size(u64)];
    249 };
    250 
    251 fn refblock(cfg: *conf, ctx: *context, seed: u64, i: size, j: size) []u64 = {
    252 	const segstart = (j - (j % ctx.sliceblocks)) / ctx.sliceblocks;
    253 	const index = j % ctx.sliceblocks;
    254 
    255 	const l: size = if (segstart == 0 && ctx.pass == 0) {
    256 		yield i;
    257 	} else {
    258 		yield (seed >> 32) % cfg.parallel;
    259 	};
    260 
    261 	let poolstart: u64 = ((segstart + 1) % SLICES) * ctx.sliceblocks;
    262 	let poolsize: u64 = 3 * ctx.sliceblocks;
    263 
    264 	if (i == l) {
    265 		poolsize += index;
    266 	};
    267 
    268 	if (ctx.pass == 0) {
    269 		poolstart = 0;
    270 		poolsize = segstart * ctx.sliceblocks;
    271 		if (segstart == 0 || i == l) {
    272 			poolsize += index;
    273 		};
    274 	};
    275 
    276 	if (index == 0 || i == l) {
    277 		poolsize -= 1;
    278 	};
    279 
    280 	const j1: u64 = seed & 0xffffffff;
    281 	const x: u64 = (j1 * j1) >> 32;
    282 	const y: u64 = (poolsize * x) >> 32;
    283 	const z: u64 = (poolstart + poolsize - (y+1)) % ctx.cols: u64;
    284 
    285 	return block(ctx, l: size, z: size);
    286 };
    287 
    288 fn inithash(
    289 	dest: *[64]u8,
    290 	taglen: u32,
    291 	password: []u8,
    292 	salt: []u8,
    293 	cfg: *conf,
    294 	mode: mode,
    295 	memsize: u32,
    296 ) void = {
    297 	let u32buf: [4]u8 = [0...];
    298 	let h = blake2b::blake2b([], 64);
    299 	defer hash::close(&h);
    300 
    301 	hash_leputu32(&h, cfg.parallel);
    302 	hash_leputu32(&h, taglen);
    303 	hash_leputu32(&h, memsize);
    304 	hash_leputu32(&h, cfg.passes);
    305 	hash_leputu32(&h, cfg.version);
    306 	hash_leputu32(&h, mode: u32);
    307 	hash_leputu32(&h, len(password): u32);
    308 	hash::write(&h, password);
    309 
    310 	hash_leputu32(&h, len(salt): u32);
    311 	hash::write(&h, salt);
    312 
    313 	hash_leputu32(&h, len(cfg.secret): u32);
    314 	hash::write(&h, cfg.secret);
    315 
    316 	hash_leputu32(&h, len(cfg.data): u32);
    317 	hash::write(&h, cfg.data);
    318 
    319 	hash::sum(&h, dest[..]);
    320 };
    321 
    322 fn hash_leputu32(h: *hash::hash, u: u32) void = {
    323 	let buf: [4]u8 = [0...];
    324 	endian::leputu32(buf, u);
    325 	hash::write(h, buf[..]);
    326 };
    327 
    328 // The variable hash function H'
    329 fn varhash(dest: []u8, block: []u8) void = {
    330 	let u32buf: [4]u8 = [0...];
    331 
    332 	if (len(dest) <= 64) {
    333 		let h = blake2b::blake2b([], len(dest));
    334 		defer hash::close(&h);
    335 		hash_leputu32(&h, len(dest): u32);
    336 		hash::write(&h, block);
    337 		hash::sum(&h, dest);
    338 		return;
    339 	};
    340 
    341 	// TODO this may be replaced with a constant time divceil in future to
    342 	// avoid leaking the dest len.
    343 	const r = divceil(len(dest): u32, 32) - 2;
    344 	let v: [64]u8 = [0...];
    345 
    346 	let destbuf = memio::fixed(dest);
    347 
    348 	let h = blake2b::blake2b([], 64);
    349 	hash_leputu32(&h, len(dest): u32);
    350 	hash::write(&h, block);
    351 	hash::sum(&h, v[..]);
    352 	hash::close(&h);
    353 
    354 	io::writeall(&destbuf, v[..32])!;
    355 
    356 	for (let i = 1z; i < r; i += 1) {
    357 		let h = blake2b::blake2b([], 64);
    358 		hash::write(&h, v[..]);
    359 		hash::sum(&h, v[..]);
    360 		hash::close(&h);
    361 		io::writeall(&destbuf, v[..32])!;
    362 	};
    363 
    364 	const remainder = len(dest) - 32 * r;
    365 	let hend = blake2b::blake2b([], remainder);
    366 	defer hash::close(&hend);
    367 	hash::write(&hend, v[..]);
    368 	hash::sum(&hend, v[..remainder]);
    369 	io::writeall(&destbuf, v[..remainder])!;
    370 };
    371 
    372 fn divceil(dividend: u32, divisor: u32) u32 = {
    373 	let result = dividend / divisor;
    374 	if (dividend % divisor > 0) {
    375 		result += 1;
    376 	};
    377 	return result;
    378 };
    379 
    380 fn xorblock(dest: []u64, x: []u64, y: []u64) void = {
    381 	for (let i = 0z; i < len(dest); i += 1) {
    382 		dest[i] = x[i] ^ y[i];
    383 	};
    384 };
    385 
    386 fn segproc(cfg: *conf, ctx: *context, i: size, slice: size) void = {
    387 	const init = switch (ctx.mode) {
    388 	case mode::I =>
    389 		yield true;
    390 	case mode::ID =>
    391 		yield ctx.pass == 0 && slice < 2;
    392 	case mode::D =>
    393 		yield false;
    394 	};
    395 	if (init) {
    396 		ctx.seedsinit[0] = ctx.pass;
    397 		ctx.seedsinit[1] = i;
    398 		ctx.seedsinit[2] = slice;
    399 		ctx.seedsinit[3] = len(ctx.mem) / BLOCKSZ;
    400 		ctx.seedsinit[4] = cfg.passes;
    401 		ctx.seedsinit[5] = ctx.mode: u64;
    402 		ctx.seedsinit[6] = 0;
    403 
    404 		if (ctx.pass == 0 && slice == 0) {
    405 			ctx.seedsinit[6] += 1;
    406 			compress(ctx.seedblock, ctx.seedsinit, zeroblock,
    407 				false);
    408 			compress(ctx.seedblock, ctx.seedblock, zeroblock,
    409 				false);
    410 		};
    411 	};
    412 
    413 	for (let b = 0z; b < ctx.sliceblocks; b += 1) {
    414 		const j = slice * ctx.sliceblocks + b;
    415 		if (ctx.pass == 0 && j < 2) {
    416 			continue;
    417 		};
    418 
    419 		const dmodeseed = switch (ctx.mode) {
    420 		case mode::D =>
    421 			yield true;
    422 		case mode::ID =>
    423 			yield ctx.pass > 0 || slice > 1;
    424 		case mode::I =>
    425 			yield false;
    426 		};
    427 
    428 		const pj = if (j == 0) ctx.cols - 1 else j - 1;
    429 		let prev = block(ctx, i, pj);
    430 
    431 		const seed: u64 = if (dmodeseed) {
    432 			yield prev[0];
    433 		} else {
    434 			if (b % BLOCKSZ == 0) {
    435 				ctx.seedsinit[6] += 1;
    436 				compress(ctx.seedblock, ctx.seedsinit,
    437 					zeroblock, false);
    438 				compress(ctx.seedblock, ctx.seedblock,
    439 					zeroblock, false);
    440 			};
    441 			yield ctx.seedblock[b % BLOCKSZ];
    442 		};
    443 
    444 		let ref = refblock(cfg, ctx, seed, i, j);
    445 		compress(block(ctx, i, j), prev, ref, ctx.pass > 0);
    446 	};
    447 };
    448 
    449 fn compress(dest: []u64, x: []u64, y: []u64, xor: bool) void = {
    450 	let r: block64 = [0...];
    451 	xorblock(r, x, y);
    452 
    453 	let z: block64 = [0...];
    454 	z[..] = r[..];
    455 
    456 	for (let i = 0z; i < 128; i += 16) {
    457 		perm(&z[i], &z[i + 1], &z[i + 2], &z[i + 3], &z[i + 4],
    458 			&z[i + 5], &z[i + 6], &z[i + 7], &z[i + 8], &z[i + 9],
    459 			&z[i + 10], &z[i + 11], &z[i + 12], &z[i + 13],
    460 			&z[i + 14], &z[i + 15]);
    461 	};
    462 
    463 	for (let i = 0z; i < 16; i += 2) {
    464 		perm(&z[i], &z[i + 1], &z[i + 16], &z[i + 17], &z[i + 32],
    465 			&z[i + 33], &z[i + 48], &z[i + 49], &z[i + 64],
    466 			&z[i + 65], &z[i + 80], &z[i + 81], &z[i + 96],
    467 			&z[i + 97], &z[i + 112], &z[i + 113]);
    468 	};
    469 
    470 	if (xor) {
    471 		xorblock(r, r, dest);
    472 	};
    473 
    474 	xorblock(dest, z, r);
    475 };
    476 
    477 fn perm(
    478 	x0: *u64,
    479 	x1: *u64,
    480 	x2: *u64,
    481 	x3: *u64,
    482 	x4: *u64,
    483 	x5: *u64,
    484 	x6: *u64,
    485 	x7: *u64,
    486 	x8: *u64,
    487 	x9: *u64,
    488 	x10: *u64,
    489 	x11: *u64,
    490 	x12: *u64,
    491 	x13: *u64,
    492 	x14: *u64,
    493 	x15: *u64,
    494 ) void = {
    495 	mix(x0, x4, x8, x12);
    496 	mix(x1, x5, x9, x13);
    497 	mix(x2, x6, x10, x14);
    498 	mix(x3, x7, x11, x15);
    499 
    500 	mix(x0, x5, x10, x15);
    501 	mix(x1, x6, x11, x12);
    502 	mix(x2, x7, x8, x13);
    503 	mix(x3, x4, x9, x14);
    504 };
    505 
    506 fn mix(a: *u64, b: *u64, c: *u64, d: *u64) void = {
    507 	*a = *a + *b + 2 * (*a & 0xffffffff) * (*b & 0xffffffff);
    508 	*d = math::rotr64(*d ^ *a, 32);
    509 	*c = *c + *d + 2 * (*c & 0xffffffff) * (*d & 0xffffffff);
    510 	*b = math::rotr64(*b ^ *c, 24);
    511 
    512 	*a = *a + *b + 2 * (*a & 0xffffffff) * (*b & 0xffffffff);
    513 	*d = math::rotr64(*d ^ *a, 16);
    514 	*c = *c + *d + 2 * (*c & 0xffffffff) * (*d & 0xffffffff);
    515 	*b = math::rotr64(*b ^ *c, 63);
    516 };