arithm.ha (14293B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // The following code was initially ported from BearSSL. 5 // 6 // Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 // SOFTWARE. 25 use crypto::math::*; 26 27 // Adds 'b' to 'a', if 'ctl' is 1. If 'ctl' is 0, the result will be discarded. 28 // Returns the carry, if the addition overflows. 29 // 30 // 'a' and 'b' must have the same effective word length. 31 export fn add(a: []word, b: const []word, ctl: u32) u32 = { 32 const l = ewordlen(a); 33 assert(equ32(l, ewordlen(b)) == 1); 34 35 let carry: u32 = 0; 36 for (let i = 1z; i <= l; i += 1) { 37 const n: u32 = a[i] + b[i] + carry; 38 carry = n >> WORD_BITSZ; 39 a[i] = muxu32(ctl, n & WORD_BITMASK, a[i]): word; 40 }; 41 return carry; 42 }; 43 44 // Subtracts 'b' from 'a', if 'ctl' is 1. If 'ctl' is 0, the result will be 45 // discared. Returns the carry, if the subtraction overflows. 46 // 47 // 'a' and 'b' must be of the same effective word length. 48 export fn sub(a: []word, b: const []word, do: u32) u32 = { 49 const l = ewordlen(a); 50 assert(equ32(l, ewordlen(b)) == 1); 51 52 let carry: u32 = 0; 53 for (let i = 1z; i <= l; i += 1) { 54 let n: u32 = a[i] - b[i] - carry; 55 carry = n >> WORD_BITSZ; 56 a[i] = muxu32(do, n & WORD_BITMASK, a[i]): word; 57 }; 58 return carry; 59 }; 60 61 // Decrements 1 from an odd integer 'x'. 62 export fn decrodd(x: []word) void = { 63 assert(isodd(x)); 64 x[1] ^= 1; 65 }; 66 67 // Right-shift an integer 'x' by 'count'. The shift amount must be less than 68 // [[WORD_BITSZ]]. 69 export fn rshift(x: []word, count: word) void = { 70 assert(ltu32(count, WORD_BITSZ) == 1); 71 72 const l = ewordlen(x); 73 if (l == 0) { 74 return; 75 }; 76 77 let r = x[1] >> count; 78 for (let i = 2z; i <= l; i += 1) { 79 const w = x[i]; 80 x[i - 1] = (w << (WORD_BITSZ - count) | r) & WORD_BITMASK; 81 r = w >> count; 82 }; 83 x[l] = r; 84 }; 85 86 // Multiply 'x' by 2^WORD_BITSZ and then add integer 'z', modulo 'm'. This 87 // function assumes that 'x' and 'm' have the same announced bit length, and the 88 // announced bit length of 'm' matches its true bit length. 89 // 90 // 'x' and 'm' may not refer to the same array. 91 // 92 // This function runs in constant time for a given announced bit length of 'x' 93 // and 'm'. 94 fn muladd_small(x: []word, z: word, m: const []word) void = { 95 assert(&x[0] != &m[0], "'x' and 'm' may not refer to the same array"); 96 97 let hi: u32 = 0; 98 99 // We can test on the modulus bit length since we accept to leak that 100 // length. 101 const m_bitlen = m[0]; 102 if (m_bitlen == 0) { 103 return; 104 }; 105 106 if (m_bitlen <= WORD_BITSZ) { 107 hi = x[1] >> 1; 108 const lo = (x[1] << WORD_BITSZ) | z; 109 x[1] = divu32(hi, lo, m[1]).1: word; 110 return; 111 }; 112 113 let mlen = ewordlen(m); 114 let mblr = m_bitlen & WORD_BITSZ: word; 115 116 // Principle: we estimate the quotient (x*2^31+z)/m by 117 // doing a 64/32 division with the high words. 118 // 119 // Let: 120 // w = 2^31 121 // a = (w*a0 + a1) * w^N + a2 122 // b = b0 * w^N + b2 123 // such that: 124 // 0 <= a0 < w 125 // 0 <= a1 < w 126 // 0 <= a2 < w^N 127 // w/2 <= b0 < w 128 // 0 <= b2 < w^N 129 // a < w*b 130 // I.e. the two top words of a are a0:a1, the top word of b is 131 // b0, we ensured that b0 is "full" (high bit set), and a is 132 // such that the quotient q = a/b fits on one word (0 <= q < w). 133 // 134 // If a = b*q + r (with 0 <= r < q), we can estimate q by 135 // doing an Euclidean division on the top words: 136 // a0*w+a1 = b0*u + v (with 0 <= v < b0) 137 // Then the following holds: 138 // 0 <= u <= w 139 // u-2 <= q <= u 140 141 let a0: u32 = 0, a1: u32 = 0, b0: u32 = 0; 142 hi = x[mlen]; 143 if (mblr == 0) { 144 a0 = x[mlen]; 145 x[2..mlen+1] = x[1..mlen]; 146 x[1] = z; 147 a1 = x[mlen]; 148 b0 = m[mlen]; 149 } else { 150 a0 = ((x[mlen] << (WORD_BITSZ: word - mblr)) 151 | (x[mlen - 1] >> mblr)) & WORD_BITMASK; 152 x[2..mlen+1] = x[1..mlen]; 153 x[1] = z; 154 a1 = ((x[mlen] << (WORD_BITSZ: word - mblr)) 155 | (x[mlen - 1] >> mblr)) & WORD_BITMASK; 156 b0 = ((m[mlen] << (WORD_BITSZ: word - mblr)) 157 | (m[mlen - 1] >> mblr)) & WORD_BITMASK; 158 }; 159 160 // We estimate a divisor q. If the quotient returned by br_div() 161 // is g: 162 // -- If a0 == b0 then g == 0; we want q = 0x7FFFFFFF. 163 // -- Otherwise: 164 // -- if g == 0 then we set q = 0; 165 // -- otherwise, we set q = g - 1. 166 // The properties described above then ensure that the true 167 // quotient is q-1, q or q+1. 168 // 169 // Take care that a0, a1 and b0 are 31-bit words, not 32-bit. We 170 // must adjust the parameters to br_div() accordingly. 171 // 172 const (g, _) = divu32(a0 >> 1, a1 | (a0 << 31), b0); 173 const q = muxu32(equ32(a0, b0), WORD_BITMASK, 174 muxu32(equ32(g, 0), 0, g - 1)); 175 176 // We subtract q*m from x (with the extra high word of value 'hi'). 177 // Since q may be off by 1 (in either direction), we may have to 178 // add or subtract m afterwards. 179 // 180 // The 'tb' flag will be true (1) at the end of the loop if the 181 // result is greater than or equal to the modulus (not counting 182 // 'hi' or the carry). 183 let cc: u32 = 0; 184 let tb: u32 = 1; 185 for (let u = 1z; u <= mlen; u += 1) { 186 const mw = m[u]; 187 const zl = mul31(mw, q) + cc; 188 cc = (zl >> WORD_BITSZ): word; 189 const zw = zl: word & WORD_BITMASK; 190 const xw = x[u]; 191 let nxw = xw - zw; 192 cc += nxw >> WORD_BITSZ; 193 nxw &= WORD_BITMASK; 194 x[u] = nxw; 195 tb = muxu32(equ32(nxw, mw), tb, gtu32(nxw, mw)); 196 }; 197 198 // If we underestimated q, then either cc < hi (one extra bit 199 // beyond the top array word), or cc == hi and tb is true (no 200 // extra bit, but the result is not lower than the modulus). In 201 // these cases we must subtract m once. 202 // 203 // Otherwise, we may have overestimated, which will show as 204 // cc > hi (thus a negative result). Correction is adding m once. 205 const over = gtu32(cc, hi); 206 const under = ~over & (tb | ltu32(cc, hi)); 207 add(x, m, over); 208 sub(x, m, under); 209 }; 210 211 212 // Multiply two 31-bit integers, with a 62-bit result. This default 213 // implementation assumes that the basic multiplication operator 214 // yields constant-time code. 215 fn mul31(x: u32, y: u32) u64 = x: u64 * y: u64; 216 fn mul31_lo(x: u32, y: u32) u32 = (x: u32 * y: u32) & 0x7FFFFFFF; 217 218 // Calculate "m0i" which is equal to -(1/m0) mod 2^WORD_BITSZ, where m0 is the 219 // least significant value word of m: []word. 220 fn ninv31(m0: u32) u32 = { 221 let y: u32 = 2 - m0; 222 y *= 2 - y * m0; 223 y *= 2 - y * m0; 224 y *= 2 - y * m0; 225 y *= 2 - y * m0; 226 return muxu32(m0 & 1, -y, 0) & 0x7FFFFFFF; 227 }; 228 229 // Calculates "m0i" which is equal to -(1/'m0') mod 2^WORD_BITSZ. 'm0' is the 230 // the first significant word of a big integer, which is the word at index 1. 231 export fn ninv(m0: word) word = ninv31(m0); 232 233 234 // Compute 'd' + 'a' * 'b', result in 'd'. The initial announced bit length of 235 // 'd' MUST match that of 'a'. The 'd' array MUST be large enough to accommodate 236 // the full result, plus (possibly) an extra word. The resulting announced bit 237 // length of 'd' will be the sum of the announced bit lengths of 'a' and 'b' 238 // (therefore, it may be larger than the actual bit length of the numerical 239 // result). 240 // 241 // 'a' and 'b' may be the same array. 'd' must be disjoint from both 'a' and 242 // 'b'. 243 export fn mulacc(d: []word, a: const []word, b: const []word) void = { 244 assert(&a[0] != &d[0] && &b[0] != &d[0]); 245 246 const alen = ewordlen(a); 247 const blen = ewordlen(b); 248 249 // We want to add the two bit lengths, but these are encoded, 250 // which requires some extra care. 251 const dl: u32 = (a[0] & WORD_BITSZ) + (b[0] & WORD_BITSZ); 252 const dh: u32 = (a[0] >> WORD_SHIFT) + (b[0] >> WORD_SHIFT); 253 d[0] = (dh << WORD_SHIFT) + dl 254 + (~(dl - WORD_BITSZ): u32 >> 31); 255 256 for (let u = 0z; u < blen; u += 1) { 257 // Carry always fits on 31 bits; we want to keep it in a 258 // 32-bit register on 32-bit architectures (on a 64-bit 259 // architecture, cast down from 64 to 32 bits means 260 // clearing the high bits, which is not free; on a 32-bit 261 // architecture, the same operation really means ignoring 262 // the top register, which has negative or zero cost). 263 const f: u32 = b[1 + u]; 264 let cc: u64 = 0; // TODO #if BR_64 u32 265 266 for (let v = 0z; v < alen; v += 1) { 267 268 let z: u64 = d[1 + u + v]: u64 + mul31(f, a[1 + v]) + cc; 269 cc = z >> WORD_BITSZ; 270 d[1 + u + v] = z: word & WORD_BITMASK; 271 }; 272 d[1 + u + alen] = cc: word; 273 }; 274 }; 275 276 // Reduce an integer 'a' modulo another 'm'. The result is written in 'x' and 277 // its announced bit length is set to be equal to that of 'm'. 278 // 279 // 'x' must be distinct from 'a' and 'm'. 280 // 281 // This function runs in constant time for given announced bit lengths of 'a' 282 // and 'm'. 283 export fn reduce(x: []word, a: const []word, m: const []word) void = { 284 assert(&x[0] != &a[0] && &x[0] != &m[0]); 285 286 const m_bitlen = m[0]; 287 const mlen = ewordlen(m); 288 289 x[0] = m_bitlen; 290 if (m_bitlen == 0) { 291 return; 292 }; 293 294 // If the source is shorter, then simply copy all words from a[] 295 // and zero out the upper words. 296 const a_bitlen = a[0]; 297 const alen = ewordlen(a); 298 if (a_bitlen < m_bitlen) { 299 x[1..1 + alen] = a[1..1 + alen]; 300 for (let u = alen; u < mlen; u += 1) { 301 x[u + 1] = 0; 302 }; 303 return; 304 }; 305 306 // The source length is at least equal to that of the modulus. 307 // We must thus copy N-1 words, and input the remaining words 308 // one by one. 309 x[1..mlen] = a[2 + (alen - mlen).. 1 + mlen]; 310 x[mlen] = 0; 311 for (let u = 1 + alen - mlen; u > 0; u -= 1) { 312 muladd_small(x, a[u], m); 313 }; 314 }; 315 316 // Copies 'src' to 'dest' if ctl is 1. The length of 'src' must match the length 317 // of 'dest'. 318 fn ccopyword(ctl: u32, dest: []word, src: const []word) void = { 319 for (let i = 0z; i < len(dest); i += 1) { 320 const x = src[i]; 321 const y = dest[i]; 322 323 dest[i] = muxu32(ctl, x, y); 324 }; 325 }; 326 327 // Compute a modular exponentiation. 'x' must be an integer modulo 'm' (same 328 // announced bit length, lower value). 'm' must be odd. The exponent is in 329 // big-endian unsigned notation, over len(e) bytes. The 'm0i' parameter is equal 330 // to -(1/m0) mod 2^31, where m0 is the least significant value word of 'm' 331 // (this works only if 'm' is an odd integer). See [[ninv]] on how to get this 332 // value. The 'tmp' array is used for temporaries and MUST be large enough to 333 // accommodate at least two temporary values with the same size as 'm' 334 // (including the leading 'bit length' word). If there is room for more 335 // temporaries, then this function may use the extra room for window-based 336 // optimisation, resulting in faster computations. 337 // 338 // Returned value is 1 on success, 0 on error. An error is reported if the 339 // provided 'tmp' array is too short. 340 export fn modpow( 341 x: []word, 342 e: const []u8, 343 m: const []word, 344 m0i: u32, 345 tmp: []word, 346 ) u32 = { 347 assert(m[1] & 1 == 1, "'m' must be odd"); 348 assert(equ32(x[0], m[0]) == 1); 349 350 // Get modulus size. 351 let mwlen = ewordlen(m) + 1; 352 const tmplen = (mwlen & 1) + mwlen; 353 let t1 = tmp[..tmplen]; 354 let t2 = tmp[tmplen..]; 355 356 const twlen = len(tmp); 357 if (twlen < (mwlen << 1)) { 358 return 0; 359 }; 360 361 // Compute possible window size, with a maximum of 5 bits. 362 // When the window has size 1 bit, we use a specific code 363 // that requires only two temporaries. Otherwise, for a 364 // window of k bits, we need 2^k+1 temporaries. 365 let win_len: word = 5; 366 for (win_len > 1; win_len -= 1) { 367 if (((1u32 << win_len: u32) + 1) * mwlen <= twlen) { 368 break; 369 }; 370 }; 371 372 // Everything is done in Montgomery representation. 373 tomonty(x, m); 374 375 // Compute window contents. If the window has size one bit only, 376 // then t2 is set to x; otherwise, t2[0] is left untouched, and 377 // t2[k] is set to x^k (for k >= 1). 378 if (win_len == 1) { 379 t2[..mwlen] = x[..mwlen]; 380 } else { 381 let base = t2[mwlen..]; 382 base[..mwlen] = x[..mwlen]; 383 for (let u = 2z; u < (1u << win_len: uint); u += 1) { 384 montymul(base[mwlen..2 * mwlen], 385 base[..mwlen], x, m, m0i); 386 base = base[mwlen..]; 387 }; 388 }; 389 390 // We need to set x to 1, in Montgomery representation. This can 391 // be done efficiently by setting the high word to 1, then doing 392 // one word-sized shift. 393 zero(x, m[0]); 394 x[ewordlen(m)] = 1; 395 muladd_small(x, 0, m); 396 397 let elen = len(e); 398 let e = e[..]; 399 400 // We process bits from most to least significant. At each 401 // loop iteration, we have acc_len bits in acc. 402 let acc: word = 0; 403 let acc_len = 0i; 404 for (acc_len > 0 || elen > 0) { 405 // Get the next bits. 406 let k = win_len; 407 if (acc_len: u32 < win_len) { 408 if (elen > 0) { 409 acc = (acc << 8) | e[0]; 410 e = e[1..]; 411 elen -= 1; 412 acc_len += 8; 413 } else { 414 k = acc_len: u32; 415 }; 416 }; 417 418 let bits: u32 = (acc >> (acc_len: u32 - k: u32)) 419 & ((1u32 << k: u32) - 1u32); 420 acc_len -= k: int; 421 422 // We could get exactly k bits. Compute k squarings. 423 for (let i = 0z; i < k: size; i += 1) { 424 montymul(t1, x, x, m, m0i); 425 // memcpy(x, t1, mlen); 426 x[..mwlen] = t1[..mwlen]; 427 }; 428 429 // Window lookup: we want to set t2 to the window 430 // lookup value, assuming the bits are non-zero. If 431 // the window length is 1 bit only, then t2 is 432 // already set; otherwise, we do a constant-time lookup. 433 if (win_len > 1) { 434 zero(t2, m[0]); 435 let base = t2[mwlen..]; 436 for (let u = 1u32; u < (1 << k): size; u += 1) { 437 let mask: u32 = -equ32(u, bits); 438 for (let v = 1z; v < mwlen; v += 1) { 439 t2[v] |= mask & base[v]; 440 }; 441 base = base[mwlen..]; 442 }; 443 }; 444 445 // Multiply with the looked-up value. We keep the 446 // product only if the exponent bits are not all-zero. 447 montymul(t1, x, t2, m, m0i); 448 449 ccopyword(nequ32(bits, 0), x[..mwlen], t1[..mwlen]); 450 }; 451 452 // Convert back from Montgomery representation, and exit. 453 frommonty(x, m, m0i); 454 return 1; 455 };