arithm.ha - hare - [hare] The Hare programming language

arithm.ha (14293B)
      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // The following code was initially ported from BearSSL.
      5 //
      6 // Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
      7 //
      8 // Permission is hereby granted, free of charge, to any person obtaining a copy
      9 // of this software and associated documentation files (the "Software"), to deal
     10 // in the Software without restriction, including without limitation the rights
     11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     12 // copies of the Software, and to permit persons to whom the Software is
     13 // furnished to do so, subject to the following conditions:
     14 //
     15 // The above copyright notice and this permission notice shall be included in
     16 // all copies or substantial portions of the Software.
     17 //
     18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     24 // SOFTWARE.
     25 use crypto::math::*;
     26 
     27 // Adds 'b' to 'a', if 'ctl' is 1. If 'ctl' is 0, the result will be discarded.
     28 // Returns the carry, if the addition overflows.
     29 //
     30 // 'a' and 'b' must have the same effective word length.
     31 export fn add(a: []word, b: const []word, ctl: u32) u32 = {
     32 	const l = ewordlen(a);
     33 	assert(equ32(l, ewordlen(b)) == 1);
     34 
     35 	let carry: u32 = 0;
     36 	for (let i = 1z; i <= l; i += 1) {
     37 		const n: u32 = a[i] + b[i] + carry;
     38 		carry = n >> WORD_BITSZ;
     39 		a[i] = muxu32(ctl, n & WORD_BITMASK, a[i]): word;
     40 	};
     41 	return carry;
     42 };
     43 
     44 // Subtracts 'b' from 'a', if 'ctl' is 1. If 'ctl' is 0, the result will be
     45 // discared. Returns the carry, if the subtraction overflows.
     46 //
     47 // 'a' and 'b' must be of the same effective word length.
     48 export fn sub(a: []word, b: const []word, do: u32) u32 = {
     49 	const l = ewordlen(a);
     50 	assert(equ32(l, ewordlen(b)) == 1);
     51 
     52 	let carry: u32 = 0;
     53 	for (let i = 1z; i <= l; i += 1) {
     54 		let n: u32 = a[i] - b[i] - carry;
     55 		carry = n >> WORD_BITSZ;
     56 		a[i] = muxu32(do, n & WORD_BITMASK, a[i]): word;
     57 	};
     58 	return carry;
     59 };
     60 
     61 // Decrements 1 from an odd integer 'x'.
     62 export fn decrodd(x: []word) void = {
     63 	assert(isodd(x));
     64 	x[1] ^= 1;
     65 };
     66 
     67 // Right-shift an integer 'x' by 'count'. The shift amount must be less than
     68 // [[WORD_BITSZ]].
     69 export fn rshift(x: []word, count: word) void = {
     70 	assert(ltu32(count, WORD_BITSZ) == 1);
     71 
     72 	const l = ewordlen(x);
     73 	if (l == 0) {
     74 		return;
     75 	};
     76 
     77 	let r = x[1] >> count;
     78 	for (let i = 2z; i <= l; i += 1) {
     79 		const w = x[i];
     80 		x[i - 1] = (w << (WORD_BITSZ - count) | r) & WORD_BITMASK;
     81 		r = w >> count;
     82 	};
     83 	x[l] = r;
     84 };
     85 
     86 // Multiply 'x' by 2^WORD_BITSZ and then add integer 'z', modulo 'm'. This
     87 // function assumes that 'x' and 'm' have the same announced bit length, and the
     88 // announced bit length of 'm' matches its true bit length.
     89 //
     90 // 'x' and 'm' may not refer to the same array.
     91 //
     92 // This function runs in constant time for a given announced bit length of 'x'
     93 // and 'm'.
     94 fn muladd_small(x: []word, z: word, m: const []word) void = {
     95 	assert(&x[0] != &m[0], "'x' and 'm' may not refer to the same array");
     96 
     97 	let hi: u32 = 0;
     98 
     99 	// We can test on the modulus bit length since we accept to leak that
    100 	// length.
    101 	const m_bitlen = m[0];
    102 	if (m_bitlen == 0) {
    103 		return;
    104 	};
    105 
    106 	if (m_bitlen <= WORD_BITSZ) {
    107 		hi = x[1] >> 1;
    108 		const lo = (x[1] << WORD_BITSZ) | z;
    109 		x[1] = divu32(hi, lo, m[1]).1: word;
    110 		return;
    111 	};
    112 
    113 	let mlen = ewordlen(m);
    114 	let mblr = m_bitlen & WORD_BITSZ: word;
    115 
    116 	// Principle: we estimate the quotient (x*2^31+z)/m by
    117 	// doing a 64/32 division with the high words.
    118 	//
    119 	// Let:
    120 	//   w = 2^31
    121 	//   a = (w*a0 + a1) * w^N + a2
    122 	//   b = b0 * w^N + b2
    123 	// such that:
    124 	//   0 <= a0 < w
    125 	//   0 <= a1 < w
    126 	//   0 <= a2 < w^N
    127 	//   w/2 <= b0 < w
    128 	//   0 <= b2 < w^N
    129 	//   a < w*b
    130 	// I.e. the two top words of a are a0:a1, the top word of b is
    131 	// b0, we ensured that b0 is "full" (high bit set), and a is
    132 	// such that the quotient q = a/b fits on one word (0 <= q < w).
    133 	//
    134 	// If a = b*q + r (with 0 <= r < q), we can estimate q by
    135 	// doing an Euclidean division on the top words:
    136 	//   a0*w+a1 = b0*u + v  (with 0 <= v < b0)
    137 	// Then the following holds:
    138 	//   0 <= u <= w
    139 	//   u-2 <= q <= u
    140 
    141 	let a0: u32 = 0, a1: u32 = 0, b0: u32 = 0;
    142 	hi = x[mlen];
    143 	if (mblr == 0) {
    144 		a0 = x[mlen];
    145 		x[2..mlen+1] = x[1..mlen];
    146 		x[1] = z;
    147 		a1 = x[mlen];
    148 		b0 = m[mlen];
    149 	} else {
    150 		a0 = ((x[mlen] << (WORD_BITSZ: word - mblr))
    151 			| (x[mlen - 1] >> mblr)) & WORD_BITMASK;
    152 		x[2..mlen+1] = x[1..mlen];
    153 		x[1] = z;
    154 		a1 = ((x[mlen] << (WORD_BITSZ: word - mblr))
    155 			| (x[mlen - 1] >> mblr)) & WORD_BITMASK;
    156 		b0 = ((m[mlen] << (WORD_BITSZ: word - mblr))
    157 			| (m[mlen - 1] >> mblr)) & WORD_BITMASK;
    158 	};
    159 
    160 	// We estimate a divisor q. If the quotient returned by br_div()
    161 	// is g:
    162 	// -- If a0 == b0 then g == 0; we want q = 0x7FFFFFFF.
    163 	// -- Otherwise:
    164 	//    -- if g == 0 then we set q = 0;
    165 	//    -- otherwise, we set q = g - 1.
    166 	// The properties described above then ensure that the true
    167 	// quotient is q-1, q or q+1.
    168 	//
    169 	// Take care that a0, a1 and b0 are 31-bit words, not 32-bit. We
    170 	// must adjust the parameters to br_div() accordingly.
    171 	//
    172 	const (g, _) = divu32(a0 >> 1, a1 | (a0 << 31), b0);
    173 	const q = muxu32(equ32(a0, b0), WORD_BITMASK,
    174 		muxu32(equ32(g, 0), 0, g - 1));
    175 
    176 	// We subtract q*m from x (with the extra high word of value 'hi').
    177 	// Since q may be off by 1 (in either direction), we may have to
    178 	// add or subtract m afterwards.
    179 	//
    180 	// The 'tb' flag will be true (1) at the end of the loop if the
    181 	// result is greater than or equal to the modulus (not counting
    182 	// 'hi' or the carry).
    183 	let cc: u32 = 0;
    184 	let tb: u32 = 1;
    185 	for (let u = 1z; u <= mlen; u += 1) {
    186 		const mw = m[u];
    187 		const zl = mul31(mw, q) + cc;
    188 		cc = (zl >> WORD_BITSZ): word;
    189 		const zw = zl: word & WORD_BITMASK;
    190 		const xw = x[u];
    191 		let nxw = xw - zw;
    192 		cc += nxw >> WORD_BITSZ;
    193 		nxw &= WORD_BITMASK;
    194 		x[u] = nxw;
    195 		tb = muxu32(equ32(nxw, mw), tb, gtu32(nxw, mw));
    196 	};
    197 
    198 	// If we underestimated q, then either cc < hi (one extra bit
    199 	// beyond the top array word), or cc == hi and tb is true (no
    200 	// extra bit, but the result is not lower than the modulus). In
    201 	// these cases we must subtract m once.
    202 	//
    203 	// Otherwise, we may have overestimated, which will show as
    204 	// cc > hi (thus a negative result). Correction is adding m once.
    205 	const over = gtu32(cc, hi);
    206 	const under = ~over & (tb | ltu32(cc, hi));
    207 	add(x, m, over);
    208 	sub(x, m, under);
    209 };
    210 
    211 
    212 // Multiply two 31-bit integers, with a 62-bit result. This default
    213 // implementation assumes that the basic multiplication operator
    214 // yields constant-time code.
    215 fn mul31(x: u32, y: u32) u64 = x: u64 * y: u64;
    216 fn mul31_lo(x: u32, y: u32) u32 = (x: u32 * y: u32) & 0x7FFFFFFF;
    217 
    218 // Calculate "m0i" which is equal to -(1/m0) mod 2^WORD_BITSZ, where m0 is the
    219 // least significant value word of m: []word.
    220 fn ninv31(m0: u32) u32 = {
    221 	let y: u32 = 2 - m0;
    222 	y *= 2 - y * m0;
    223 	y *= 2 - y * m0;
    224 	y *= 2 - y * m0;
    225 	y *= 2 - y * m0;
    226 	return muxu32(m0 & 1, -y, 0) & 0x7FFFFFFF;
    227 };
    228 
    229 // Calculates "m0i" which is equal to -(1/'m0') mod 2^WORD_BITSZ. 'm0' is the
    230 // the first significant word of a big integer, which is the word at index 1.
    231 export fn ninv(m0: word) word = ninv31(m0);
    232 
    233 
    234 // Compute 'd' + 'a' * 'b', result in 'd'. The initial announced bit length of
    235 // 'd' MUST match that of 'a'. The 'd' array MUST be large enough to accommodate
    236 // the full result, plus (possibly) an extra word. The resulting announced bit
    237 // length of 'd' will be the sum of the announced bit lengths of 'a' and 'b'
    238 // (therefore, it may be larger than the actual bit length of the numerical
    239 // result).
    240 //
    241 // 'a' and 'b' may be the same array. 'd' must be disjoint from both 'a' and
    242 // 'b'.
    243 export fn mulacc(d: []word, a: const []word, b: const []word) void = {
    244 	assert(&a[0] != &d[0] && &b[0] != &d[0]);
    245 
    246 	const alen = ewordlen(a);
    247 	const blen = ewordlen(b);
    248 
    249 	// We want to add the two bit lengths, but these are encoded,
    250 	// which requires some extra care.
    251 	const dl: u32 = (a[0] & WORD_BITSZ) + (b[0] & WORD_BITSZ);
    252 	const dh: u32 = (a[0] >> WORD_SHIFT) + (b[0] >> WORD_SHIFT);
    253 	d[0] = (dh << WORD_SHIFT) + dl
    254 		+ (~(dl - WORD_BITSZ): u32 >> 31);
    255 
    256 	for (let u = 0z; u < blen; u += 1) {
    257 		// Carry always fits on 31 bits; we want to keep it in a
    258 		// 32-bit register on 32-bit architectures (on a 64-bit
    259 		// architecture, cast down from 64 to 32 bits means
    260 		// clearing the high bits, which is not free; on a 32-bit
    261 		// architecture, the same operation really means ignoring
    262 		// the top register, which has negative or zero cost).
    263 		const f: u32 = b[1 + u];
    264 		let cc: u64 = 0; // TODO #if BR_64 u32
    265 
    266 		for (let v = 0z; v < alen; v += 1) {
    267 
    268 			let z: u64 = d[1 + u + v]: u64 + mul31(f, a[1 + v]) + cc;
    269 			cc = z >> WORD_BITSZ;
    270 			d[1 + u + v] = z: word & WORD_BITMASK;
    271 		};
    272 		d[1 + u + alen] = cc: word;
    273 	};
    274 };
    275 
    276 // Reduce an integer 'a' modulo another 'm'. The result is written in 'x' and
    277 // its announced bit length is set to be equal to that of 'm'.
    278 //
    279 // 'x' must be distinct from 'a' and 'm'.
    280 //
    281 // This function runs in constant time for given announced bit lengths of 'a'
    282 // and 'm'.
    283 export fn reduce(x: []word, a: const []word, m: const []word) void = {
    284 	assert(&x[0] != &a[0] && &x[0] != &m[0]);
    285 
    286 	const m_bitlen = m[0];
    287 	const mlen = ewordlen(m);
    288 
    289 	x[0] = m_bitlen;
    290 	if (m_bitlen == 0) {
    291 		return;
    292 	};
    293 
    294 	// If the source is shorter, then simply copy all words from a[]
    295 	// and zero out the upper words.
    296 	const a_bitlen = a[0];
    297 	const alen = ewordlen(a);
    298 	if (a_bitlen < m_bitlen) {
    299 		x[1..1 + alen] = a[1..1 + alen];
    300 		for (let u = alen; u < mlen; u += 1) {
    301 			x[u + 1] = 0;
    302 		};
    303 		return;
    304 	};
    305 
    306 	// The source length is at least equal to that of the modulus.
    307 	// We must thus copy N-1 words, and input the remaining words
    308 	// one by one.
    309 	x[1..mlen] = a[2 + (alen - mlen).. 1 + mlen];
    310 	x[mlen] = 0;
    311 	for (let u = 1 + alen - mlen; u > 0; u -= 1) {
    312 		muladd_small(x, a[u], m);
    313 	};
    314 };
    315 
    316 // Copies 'src' to 'dest' if ctl is 1. The length of 'src' must match the length
    317 // of 'dest'.
    318 fn ccopyword(ctl: u32, dest: []word, src: const []word) void = {
    319 	for (let i = 0z; i < len(dest); i += 1) {
    320 		const x = src[i];
    321 		const y = dest[i];
    322 
    323 		dest[i] = muxu32(ctl, x, y);
    324 	};
    325 };
    326 
    327 // Compute a modular exponentiation. 'x' must be an integer modulo 'm' (same
    328 // announced bit length, lower value). 'm' must be odd. The exponent is in
    329 // big-endian unsigned notation, over len(e) bytes. The 'm0i' parameter is equal
    330 // to -(1/m0) mod 2^31, where m0 is the least significant value word of 'm'
    331 // (this works only if 'm' is an odd integer). See [[ninv]] on how to get this
    332 // value. The 'tmp' array is used for temporaries and MUST be large enough to
    333 // accommodate at least two temporary values with the same size as 'm'
    334 // (including the leading 'bit length' word). If there is room for more
    335 // temporaries, then this function may use the extra room for window-based
    336 // optimisation, resulting in faster computations.
    337 //
    338 // Returned value is 1 on success, 0 on error. An error is reported if the
    339 // provided 'tmp' array is too short.
    340 export fn modpow(
    341 	x: []word,
    342 	e: const []u8,
    343 	m: const []word,
    344 	m0i: u32,
    345 	tmp: []word,
    346 ) u32 = {
    347 	assert(m[1] & 1 == 1, "'m' must be odd");
    348 	assert(equ32(x[0], m[0]) == 1);
    349 
    350 	// Get modulus size.
    351 	let mwlen = ewordlen(m) + 1;
    352 	const tmplen = (mwlen & 1) + mwlen;
    353 	let t1 = tmp[..tmplen];
    354 	let t2 = tmp[tmplen..];
    355 
    356 	const twlen = len(tmp);
    357 	if (twlen < (mwlen << 1)) {
    358 		return 0;
    359 	};
    360 
    361 	// Compute possible window size, with a maximum of 5 bits.
    362 	// When the window has size 1 bit, we use a specific code
    363 	// that requires only two temporaries. Otherwise, for a
    364 	// window of k bits, we need 2^k+1 temporaries.
    365 	let win_len: word = 5;
    366 	for (win_len > 1; win_len -= 1) {
    367 		if (((1u32 << win_len: u32) + 1) * mwlen <= twlen) {
    368 			break;
    369 		};
    370 	};
    371 
    372 	// Everything is done in Montgomery representation.
    373 	tomonty(x, m);
    374 
    375 	// Compute window contents. If the window has size one bit only,
    376 	// then t2 is set to x; otherwise, t2[0] is left untouched, and
    377 	// t2[k] is set to x^k (for k >= 1).
    378 	if (win_len == 1) {
    379 		t2[..mwlen] = x[..mwlen];
    380 	} else {
    381 		let base = t2[mwlen..];
    382 		base[..mwlen] = x[..mwlen];
    383 		for (let u = 2z; u < (1u << win_len: uint); u += 1) {
    384 			montymul(base[mwlen..2 * mwlen],
    385 				base[..mwlen], x, m, m0i);
    386 			base = base[mwlen..];
    387 		};
    388 	};
    389 
    390 	// We need to set x to 1, in Montgomery representation. This can
    391 	// be done efficiently by setting the high word to 1, then doing
    392 	// one word-sized shift.
    393 	zero(x, m[0]);
    394 	x[ewordlen(m)] = 1;
    395 	muladd_small(x, 0, m);
    396 
    397 	let elen = len(e);
    398 	let e = e[..];
    399 
    400 	// We process bits from most to least significant. At each
    401 	// loop iteration, we have acc_len bits in acc.
    402 	let acc: word = 0;
    403 	let acc_len = 0i;
    404 	for (acc_len > 0 || elen > 0) {
    405 		// Get the next bits.
    406 		let k = win_len;
    407 		if (acc_len: u32 < win_len) {
    408 			if (elen > 0) {
    409 				acc = (acc << 8) | e[0];
    410 				e = e[1..];
    411 				elen -= 1;
    412 				acc_len += 8;
    413 			} else {
    414 				k = acc_len: u32;
    415 			};
    416 		};
    417 
    418 		let bits: u32 = (acc >> (acc_len: u32 - k: u32))
    419 			& ((1u32 << k: u32) - 1u32);
    420 		acc_len -= k: int;
    421 
    422 		// We could get exactly k bits. Compute k squarings.
    423 		for (let i = 0z; i < k: size; i += 1) {
    424 			montymul(t1, x, x, m, m0i);
    425 			// memcpy(x, t1, mlen);
    426 			x[..mwlen] = t1[..mwlen];
    427 		};
    428 
    429 		// Window lookup: we want to set t2 to the window
    430 		// lookup value, assuming the bits are non-zero. If
    431 		// the window length is 1 bit only, then t2 is
    432 		// already set; otherwise, we do a constant-time lookup.
    433 		if (win_len > 1) {
    434 			zero(t2, m[0]);
    435 			let base = t2[mwlen..];
    436 			for (let u = 1u32; u < (1 << k): size; u += 1) {
    437 				let mask: u32 = -equ32(u, bits);
    438 				for (let v = 1z; v < mwlen; v += 1) {
    439 					t2[v] |= mask & base[v];
    440 				};
    441 				base = base[mwlen..];
    442 			};
    443 		};
    444 
    445 		// Multiply with the looked-up value. We keep the
    446 		// product only if the exponent bits are not all-zero.
    447 		montymul(t1, x, t2, m, m0i);
    448 
    449 		ccopyword(nequ32(bits, 0), x[..mwlen], t1[..mwlen]);
    450 	};
    451 
    452 	// Convert back from Montgomery representation, and exit.
    453 	frommonty(x, m, m0i);
    454 	return 1;
    455 };
	hare [hare] The Hare programming language
	git clone https://git.torresjrjr.com/hare.git
	Log \| Files \| Refs \| README \| LICENSE