monty.ha (5679B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // The following code was initially ported from BearSSL. 5 // 6 // Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 // SOFTWARE. 25 use crypto::math::*; 26 27 // Convert a modular integer to Montgomery representation. The integer 'x' must 28 // be lower than 'm', but with the same announced bit length. 29 export fn tomonty(x: []word, m: const []word) void = { 30 assert(equ32(x[0], m[0]) == 1); 31 for (let k: u32 = ewordlen(m); k > 0; k -= 1) { 32 muladd_small(x, 0, m); 33 }; 34 }; 35 36 // Convert a modular integer back from Montgomery representation. The integer 37 // 'x' must be less than 'm', but with the same announced bit length. The 'm0i' 38 // parameter is equal to -(1/m0) mod 2^32, where m0 is the least significant 39 // value word of 'm' (this works only if 'm' is an odd integer). See [[ninv]] 40 // for how to get this value. 41 export fn frommonty(x: []word, m: const []word, m0i: word) void = { 42 assert(equ32(x[0], m[0]) == 1); 43 const mlen = ewordlen(m); 44 for (let u = 0z; u < mlen; u += 1) { 45 const f: u32 = mul31_lo(x[1], m0i); 46 let cc: u64 = 0; 47 for (let v = 0z; v < mlen; v += 1) { 48 const z: u64 = x[v + 1]: u64 + mul31(f, m[v + 1]) + cc; 49 cc = z >> 31; 50 if (v != 0) { 51 x[v] = z: word & WORD_BITMASK; 52 }; 53 }; 54 x[mlen] = cc: u32; 55 }; 56 57 // We may have to do an extra subtraction, but only if the 58 // value in x[] is indeed greater than or equal to that of m[], 59 // which is why we must do two calls (first call computes the 60 // carry, second call performs the subtraction only if the carry 61 // is 0). 62 sub(x, m, notu32(sub(x, m, 0))); 63 }; 64 65 // Compute a modular Montgomery multiplication. 'd' is filled with the value of 66 // 'x'*'y'/R modulo 'm' (where R is the Montgomery factor). The array 'd' must 67 // be distinct from 'x', 'y' and 'm'. 'x' and 'y' must be numerically lower than 68 // 'm'. 'x' and 'y' may be the same array. The 'm0i' parameter is equal to 69 // -(1/m0) mod 2^32, where m0 is the least significant value word of 'm' (this 70 // works only if 'm' is an odd integer). See [[ninv]] for how to get this value. 71 export fn montymul( 72 d: []word, 73 x: const []word, 74 y: const []word, 75 m: const []word, 76 m0i: u32 77 ) void = { 78 // Each outer loop iteration computes: 79 // d <- (d + xu*y + f*m) / 2^31 80 // We have xu <= 2^31-1 and f <= 2^31-1. 81 // Thus, if d <= 2*m-1 on input, then: 82 // 2*m-1 + 2*(2^31-1)*m <= (2^32)*m-1 83 // and the new d value is less than 2*m. 84 // 85 // We represent d over 31-bit words, with an extra word 'dh' 86 // which can thus be only 0 or 1. 87 let v: size = 0; 88 89 const mlen = ewordlen(m); 90 const len4 = mlen & ~3: size; 91 zero(d, m[0]); 92 let dh: u32 = 0; 93 for (let u = 0z; u < mlen; u += 1) { 94 // The carry for each operation fits on 32 bits: 95 // d[v+1] <= 2^31-1 96 // xu*y[v+1] <= (2^31-1)*(2^31-1) 97 // f*m[v+1] <= (2^31-1)*(2^31-1) 98 // r <= 2^32-1 99 // (2^31-1) + 2*(2^31-1)*(2^31-1) + (2^32-1) = 2^63 - 2^31 100 // After division by 2^31, the new r is then at most 2^32-1 101 // 102 // Using a 32-bit carry has performance benefits on 32-bit 103 // systems; however, on 64-bit architectures, we prefer to 104 // keep the carry (r) in a 64-bit register, thus avoiding some 105 // "clear high bits" operations. 106 107 const xu: u32 = x[u + 1]; 108 const f: u32 = mul31_lo((d[1] + mul31_lo(x[u + 1], y[1])), m0i); 109 110 let r: u64 = 0; // TODO if !BR_64 u32 111 v = 0; 112 for (v < len4; v += 4) { 113 let z: u64 = d[v + 1]: u64 + mul31(xu, y[v + 1]) 114 + mul31(f, m[v + 1]) + r; 115 r = z >> 31; 116 d[v + 0] = z: u32 & 0x7FFFFFFF; 117 z = d[v + 2]: u64 + mul31(xu, y[v + 2]) 118 + mul31(f, m[v + 2]) + r; 119 r = z >> 31; 120 d[v + 1] = z: u32 & 0x7FFFFFFF; 121 z = d[v + 3]: u64 + mul31(xu, y[v + 3]) 122 + mul31(f, m[v + 3]) + r; 123 r = z >> 31; 124 d[v + 2] = z: u32 & 0x7FFFFFFF; 125 z = d[v + 4]: u64 + mul31(xu, y[v + 4]) 126 + mul31(f, m[v + 4]) + r; 127 r = z >> 31; 128 d[v + 3] = z: u32 & 0x7FFFFFFF; 129 }; 130 for (v < mlen; v += 1) { 131 const z: u64 = d[v + 1]: u64 + mul31(xu, y[v + 1]) 132 + mul31(f, m[v + 1]) + r; 133 r = z >> 31; 134 d[v] = z: u32 & 0x7FFFFFFF; 135 }; 136 137 // Since the new dh can only be 0 or 1, the addition of 138 // the old dh with the carry MUST fit on 32 bits, and 139 // thus can be done into dh itself. 140 dh += r: u32; 141 d[mlen] = dh & 0x7FFFFFFF; 142 dh >>= 31; 143 }; 144 145 // We must write back the bit length because it was overwritten in 146 // the loop (not overwriting it would require a test in the loop, 147 // which would yield bigger and slower code). 148 d[0] = m[0]; 149 150 // d[] may still be greater than m[] at that point; notably, the 151 // 'dh' word may be non-zero. 152 sub(d, m, nequ32(dh, 0) | notu32(sub(d, m, 0))); 153 };