hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

monty.ha (5679B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // The following code was initially ported from BearSSL.
      5 //
      6 // Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
      7 //
      8 // Permission is hereby granted, free of charge, to any person obtaining a copy
      9 // of this software and associated documentation files (the "Software"), to deal
     10 // in the Software without restriction, including without limitation the rights
     11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     12 // copies of the Software, and to permit persons to whom the Software is
     13 // furnished to do so, subject to the following conditions:
     14 //
     15 // The above copyright notice and this permission notice shall be included in
     16 // all copies or substantial portions of the Software.
     17 //
     18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     24 // SOFTWARE.
     25 use crypto::math::*;
     26 
     27 // Convert a modular integer to Montgomery representation. The integer 'x' must
     28 // be lower than 'm', but with the same announced bit length.
     29 export fn tomonty(x: []word, m: const []word) void = {
     30 	assert(equ32(x[0], m[0]) == 1);
     31 	for (let k: u32 = ewordlen(m); k > 0; k -= 1) {
     32 		muladd_small(x, 0, m);
     33 	};
     34 };
     35 
     36 // Convert a modular integer back from Montgomery representation. The integer
     37 // 'x' must be less than 'm', but with the same announced bit length. The 'm0i'
     38 // parameter is equal to -(1/m0) mod 2^32, where m0 is the least significant
     39 // value word of 'm' (this works only if 'm' is an odd integer). See [[ninv]]
     40 // for how to get this value.
     41 export fn frommonty(x: []word, m: const []word, m0i: word) void = {
     42 	assert(equ32(x[0], m[0]) == 1);
     43 	const mlen = ewordlen(m);
     44 	for (let u = 0z; u < mlen; u += 1) {
     45 		const f: u32 = mul31_lo(x[1], m0i);
     46 		let cc: u64 = 0;
     47 		for (let v = 0z; v < mlen; v += 1) {
     48 			const z: u64 = x[v + 1]: u64 + mul31(f, m[v + 1]) + cc;
     49 			cc = z >> 31;
     50 			if (v != 0) {
     51 				x[v] = z: word & WORD_BITMASK;
     52 			};
     53 		};
     54 		x[mlen] = cc: u32;
     55 	};
     56 
     57 	// We may have to do an extra subtraction, but only if the
     58 	// value in x[] is indeed greater than or equal to that of m[],
     59 	// which is why we must do two calls (first call computes the
     60 	// carry, second call performs the subtraction only if the carry
     61 	// is 0).
     62 	sub(x, m, notu32(sub(x, m, 0)));
     63 };
     64 
     65 // Compute a modular Montgomery multiplication. 'd' is filled with the value of
     66 // 'x'*'y'/R modulo 'm' (where R is the Montgomery factor). The array 'd' must
     67 // be distinct from 'x', 'y' and 'm'. 'x' and 'y' must be numerically lower than
     68 // 'm'. 'x' and 'y' may be the same array. The 'm0i' parameter is equal to
     69 // -(1/m0) mod 2^32, where m0 is the least significant value word of 'm' (this
     70 // works only if 'm' is an odd integer). See [[ninv]] for how to get this value.
     71 export fn montymul(
     72 	d: []word,
     73 	x: const []word,
     74 	y: const []word,
     75 	m: const []word,
     76 	m0i: u32
     77 ) void = {
     78 	// Each outer loop iteration computes:
     79 	//   d <- (d + xu*y + f*m) / 2^31
     80 	// We have xu <= 2^31-1 and f <= 2^31-1.
     81 	// Thus, if d <= 2*m-1 on input, then:
     82 	//   2*m-1 + 2*(2^31-1)*m <= (2^32)*m-1
     83 	// and the new d value is less than 2*m.
     84 	//
     85 	// We represent d over 31-bit words, with an extra word 'dh'
     86 	// which can thus be only 0 or 1.
     87 	let v: size = 0;
     88 
     89 	const mlen = ewordlen(m);
     90 	const len4 = mlen & ~3: size;
     91 	zero(d, m[0]);
     92 	let dh: u32 = 0;
     93 	for (let u = 0z; u < mlen; u += 1) {
     94 		// The carry for each operation fits on 32 bits:
     95 		//   d[v+1] <= 2^31-1
     96 		//   xu*y[v+1] <= (2^31-1)*(2^31-1)
     97 		//   f*m[v+1] <= (2^31-1)*(2^31-1)
     98 		//   r <= 2^32-1
     99 		//   (2^31-1) + 2*(2^31-1)*(2^31-1) + (2^32-1) = 2^63 - 2^31
    100 		// After division by 2^31, the new r is then at most 2^32-1
    101 		//
    102 		// Using a 32-bit carry has performance benefits on 32-bit
    103 		// systems; however, on 64-bit architectures, we prefer to
    104 		// keep the carry (r) in a 64-bit register, thus avoiding some
    105 		// "clear high bits" operations.
    106 
    107 		const xu: u32 = x[u + 1];
    108 		const f: u32 = mul31_lo((d[1] + mul31_lo(x[u + 1], y[1])), m0i);
    109 
    110 		let r: u64 = 0; // TODO if !BR_64 u32
    111 		v = 0;
    112 		for (v < len4; v += 4) {
    113 			let z: u64 = d[v + 1]: u64 + mul31(xu, y[v + 1])
    114 				+ mul31(f, m[v + 1]) + r;
    115 			r = z >> 31;
    116 			d[v + 0] = z: u32 & 0x7FFFFFFF;
    117 			z = d[v + 2]: u64 + mul31(xu, y[v + 2])
    118 				+ mul31(f, m[v + 2]) + r;
    119 			r = z >> 31;
    120 			d[v + 1] = z: u32 & 0x7FFFFFFF;
    121 			z = d[v + 3]: u64 + mul31(xu, y[v + 3])
    122 				+ mul31(f, m[v + 3]) + r;
    123 			r = z >> 31;
    124 			d[v + 2] = z: u32 & 0x7FFFFFFF;
    125 			z = d[v + 4]: u64 + mul31(xu, y[v + 4])
    126 				+ mul31(f, m[v + 4]) + r;
    127 			r = z >> 31;
    128 			d[v + 3] = z: u32 & 0x7FFFFFFF;
    129 		};
    130 		for (v < mlen; v += 1) {
    131 			const z: u64 = d[v + 1]: u64 + mul31(xu, y[v + 1])
    132 				+ mul31(f, m[v + 1]) + r;
    133 			r = z >> 31;
    134 			d[v] = z: u32 & 0x7FFFFFFF;
    135 		};
    136 
    137 		// Since the new dh can only be 0 or 1, the addition of
    138 		// the old dh with the carry MUST fit on 32 bits, and
    139 		// thus can be done into dh itself.
    140 		dh += r: u32;
    141 		d[mlen] = dh & 0x7FFFFFFF;
    142 		dh >>= 31;
    143 	};
    144 
    145 	// We must write back the bit length because it was overwritten in
    146 	// the loop (not overwriting it would require a test in the loop,
    147 	// which would yield bigger and slower code).
    148 	d[0] = m[0];
    149 
    150 	// d[] may still be greater than m[] at that point; notably, the
    151 	// 'dh' word may be non-zero.
    152 	sub(d, m, nequ32(dh, 0) | notu32(sub(d, m, 0)));
    153 };