hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

prime.ha (23598B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // Ported from BearSSL
      5 //
      6 // Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
      7 //
      8 // Permission is hereby granted, free of charge, to any person obtaining
      9 // a copy of this software and associated documentation files (the
     10 // "Software"), to deal in the Software without restriction, including
     11 // without limitation the rights to use, copy, modify, merge, publish,
     12 // distribute, sublicense, and/or sell copies of the Software, and to
     13 // permit persons to whom the Software is furnished to do so, subject to
     14 // the following conditions:
     15 //
     16 // The above copyright notice and this permission notice shall be
     17 // included in all copies or substantial portions of the Software.
     18 //
     19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     23 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     24 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     25 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     26 // SOFTWARE.
     27 
     28 use crypto::math::*;
     29 use crypto::bigint::*;
     30 
     31 
     32 type curveparams = struct {
     33 	p: []word,
     34 	b: []word,
     35 	r2: []word,
     36 	p0i: word,
     37 	pointlen: size,
     38 	g: []u8,
     39 };
     40 
     41 // Parameters for supported curves (field modulus, and 'b' equation
     42 // parameter; both values use the 'i31' format, and 'b' is in Montgomery
     43 // representation).
     44 const p384params = curveparams {
     45 	p = [
     46 		0x0000018C, 0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
     47 		0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
     48 		0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000FFF
     49 	],
     50 	b = [
     51 		0x0000018C, 0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
     52 		0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B, 0x2719BA5F,
     53 		0x41F02209, 0x36C5643E, 0x5813EFFE, 0x000008A5
     54 	],
     55 	r2 = [
     56 		0x0000018C, 0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
     57 		0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF, 0x00008000,
     58 		0x00008000, 0x00000000, 0x00000000, 0x00000000
     59 	],
     60 	p0i = 0x00000001,
     61 	pointlen = 97,
     62 	g = [ // XXX: use P384_G, when possible
     63 		0x04, 0xaa, 0x87, 0xca, 0x22, 0xbe, 0x8b, 0x05, 0x37, 0x8e,
     64 		0xb1, 0xc7, 0x1e, 0xf3, 0x20, 0xad, 0x74, 0x6e, 0x1d, 0x3b,
     65 		0x62, 0x8b, 0xa7, 0x9b, 0x98, 0x59, 0xf7, 0x41, 0xe0, 0x82,
     66 		0x54, 0x2a, 0x38, 0x55, 0x02, 0xf2, 0x5d, 0xbf, 0x55, 0x29,
     67 		0x6c, 0x3a, 0x54, 0x5e, 0x38, 0x72, 0x76, 0x0a, 0xb7, 0x36,
     68 		0x17, 0xde, 0x4a, 0x96, 0x26, 0x2c, 0x6f, 0x5d, 0x9e, 0x98,
     69 		0xbf, 0x92, 0x92, 0xdc, 0x29, 0xf8, 0xf4, 0x1d, 0xbd, 0x28,
     70 		0x9a, 0x14, 0x7c, 0xe9, 0xda, 0x31, 0x13, 0xb5, 0xf0, 0xb8,
     71 		0xc0, 0x0a, 0x60, 0xb1, 0xce, 0x1d, 0x7e, 0x81, 0x9d, 0x7a,
     72 		0x43, 0x1d, 0x7c, 0x90, 0xea, 0x0e, 0x5f,
     73 	],
     74 };
     75 
     76 const p521params = curveparams {
     77 	p = [
     78 		0x00000219, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
     79 		0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
     80 		0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
     81 		0x7FFFFFFF, 0x7FFFFFFF, 0x01FFFFFF
     82 	],
     83 	b = [
     84 		0x00000219, 0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
     85 		0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F, 0x42785586,
     86 		0x44C8C778, 0x15F3B8B4, 0x64B73366, 0x03BA8B69, 0x0D05B42A,
     87 		0x21F929A2, 0x2C31C393, 0x00654FAE
     88 	],
     89 	r2 = [
     90 		0x00000219, 0x00001000, 0x00000000, 0x00000000, 0x00000000,
     91 		0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
     92 		0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
     93 		0x00000000, 0x00000000, 0x00000000
     94 	],
     95 	p0i = 0x00000001,
     96 	pointlen = 133,
     97 	g = [ // XXX: use P384_G, when possible
     98 		0x04, 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, 0x04, 0xE9,
     99 		0xCD, 0x9E, 0x3E, 0xCB, 0x66, 0x23, 0x95, 0xB4, 0x42, 0x9C,
    100 		0x64, 0x81, 0x39, 0x05, 0x3F, 0xB5, 0x21, 0xF8, 0x28, 0xAF,
    101 		0x60, 0x6B, 0x4D, 0x3D, 0xBA, 0xA1, 0x4B, 0x5E, 0x77, 0xEF,
    102 		0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2, 0xFF, 0xA8,
    103 		0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, 0x6A, 0x42, 0x9B, 0xF9,
    104 		0x7E, 0x7E, 0x31, 0xC2, 0xE5, 0xBD, 0x66, 0x01, 0x18, 0x39,
    105 		0x29, 0x6A, 0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, 0x5F,
    106 		0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44, 0x49, 0x57,
    107 		0x9B, 0x44, 0x68, 0x17, 0xAF, 0xBD, 0x17, 0x27, 0x3E, 0x66,
    108 		0x2C, 0x97, 0xEE, 0x72, 0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5,
    109 		0x50, 0xB9, 0x01, 0x3F, 0xAD, 0x07, 0x61, 0x35, 0x3C, 0x70,
    110 		0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, 0x94, 0x76, 0x9F,
    111 		0xD1, 0x66, 0x50,
    112 	],
    113 };
    114 
    115 @test fn bigint_support() void = {
    116 	// This is a port of the i31 variant from BearSSL. Word must be an u32
    117 	// in order for this code to work.
    118 	static assert(size(word) == 4);
    119 };
    120 
    121 def MAX_COORDSZ = (MAX_COORDBITSZ + 61) / 31;
    122 
    123 type prime_jacobian = struct {
    124 	// TODO things would be a lot easier if this is a flat array
    125 	c: [3][MAX_COORDSZ]word,
    126 };
    127 type reg = []word;
    128 
    129 // XXX: BearSSL is using memcpy. Can I do this here also?
    130 fn jcpy(d: *prime_jacobian, a: *prime_jacobian) void = {
    131 	for (let i = 0z; i < len(d.c); i += 1) {
    132 		d.c[i][..] = a.c[i][..]; // XXX: is this copy ct?
    133 	};
    134 };
    135 
    136 fn jccpy(ctl: u32, d: *prime_jacobian, a: *prime_jacobian) void = {
    137 	for (let i = 0z; i < len(d.c); i += 1) {
    138 		ccopyu32(ctl, d.c[i]: []u32, a.c[i][..]: []u32);
    139 	};
    140 };
    141 
    142 fn mset(d: reg, a: reg) void = {
    143 	for (let i = 0z; i < len(d); i += 1) {
    144 		d[i] = a[i];
    145 	};
    146 };
    147 
    148 fn madd(d: reg, a: reg, cc: *curveparams) void = {
    149 	let ctl = add(d, a, 1);
    150 	ctl |= notu32(sub(d, cc.p, 0));
    151 	sub(d, cc.p, ctl);
    152 };
    153 
    154 fn msub(d: reg, a: reg, cc: *curveparams) void = {
    155 	add(d, cc.p, sub(d, a, 1));
    156 };
    157 
    158 fn mmul(d: reg, a: reg, b: reg, cc: *curveparams) void = {
    159 	montymul(d, a, b, cc.p, cc.p0i);
    160 };
    161 
    162 fn minv(d: reg, a: reg, b: reg, cc: *curveparams) void = {
    163 	let tp: [MAX_POINTSZ / 2]u8 = [0...];
    164 	let plen = (cc.p[0] - (cc.p[0] >> 5) + 7) >> 3;
    165 	decode(tp[..plen], cc.p);
    166 	tp[plen - 1] -= 2;
    167 
    168 	// XXX: change modpow to use two bigints as buf, like it was intended
    169 	let buf: [2 * MAX_COORDSZ]word = [0...];
    170 	modpow(d, tp[..plen], cc.p, cc.p0i, buf);
    171 };
    172 
    173 fn prime_zero(p: *prime_jacobian, cc: *curveparams) void = {
    174 	for (let i = 0z; i < len(p.c); i += 1) {
    175 		for (let j = 0z; j < len(p.c[i]); j += 1) {
    176 			p.c[i][j] = 0;
    177 		};
    178 	};
    179 
    180 	p.c[0][0] = cc.p[0];
    181 	p.c[1][0] = cc.p[0];
    182 	p.c[2][0] = cc.p[0];
    183 };
    184 
    185 // Doubling formulas are:
    186 //
    187 //   s = 4*x*y^2
    188 //   m = 3*(x + z^2)*(x - z^2)
    189 //   x' = m^2 - 2*s
    190 //   y' = m*(s - x') - 8*y^4
    191 //   z' = 2*y*z
    192 //
    193 // If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
    194 // should. This case should not happen anyway, because our curves have
    195 // prime order, and thus do not contain any point of order 2.
    196 //
    197 // If P is infinity (z = 0), then again the formulas yield infinity,
    198 // which is correct. Thus, this code works for all points.
    199 //
    200 // Cost: 8 multiplications
    201 fn point_double(p: *prime_jacobian, cc: *curveparams) void = {
    202 	let px: [MAX_COORDSZ]word = p.c[0];
    203 	let py: [MAX_COORDSZ]word = p.c[1];
    204 	let pz: [MAX_COORDSZ]word = p.c[2];
    205 	let t1: [MAX_COORDSZ]word = [0...];
    206 	let t2: [MAX_COORDSZ]word = [0...];
    207 	let t3: [MAX_COORDSZ]word = [0...];
    208 	let t4: [MAX_COORDSZ]word = [0...];
    209 
    210 	// Compute z^2 (in t1).
    211 	mmul(t1, pz, pz, cc);
    212 
    213 	// Compute x-z^2 (in t2) and then x+z^2 (in t1).
    214 	mset(t2, px);
    215 	msub(t2, t1, cc);
    216 	madd(t1, px, cc);
    217 
    218 	// Compute m = 3*(x+z^2)*(x-z^2) (in t1).
    219 	mmul(t3, t1, t2, cc);
    220 	mset(t1, t3);
    221 	madd(t1, t3, cc);
    222 	madd(t1, t3, cc);
    223 
    224 	// Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
    225 	mmul(t3, py, py, cc);
    226 	madd(t3, t3, cc);
    227 	mmul(t2, px, t3, cc);
    228 	madd(t2, t2, cc);
    229 
    230 	// Compute x' = m^2 - 2*s.
    231 	mmul(px, t1, t1, cc);
    232 	msub(px, t2, cc);
    233 	msub(px, t2, cc);
    234 
    235 	// Compute z' = 2*y*z.
    236 	mmul(t4, py, pz, cc);
    237 	mset(pz, t4);
    238 	madd(pz, t4, cc);
    239 
    240 	// Compute y' = m*(s - x') - 8*y^4. Note that we already have
    241 	// 2*y^2 in t3.
    242 	msub(t2, px, cc);
    243 	mmul(py, t1, t2, cc);
    244 	mmul(t4, t3, t3, cc);
    245 	msub(py, t4, cc);
    246 	msub(py, t4, cc);
    247 
    248 	// copy back result
    249 	p.c[0][..] = px[..];
    250 	p.c[1][..] = py[..];
    251 	p.c[2][..] = pz[..];
    252 };
    253 
    254 // Addtions formulas are:
    255 //
    256 //   u1 = x1 * z2^2
    257 //   u2 = x2 * z1^2
    258 //   s1 = y1 * z2^3
    259 //   s2 = y2 * z1^3
    260 //   h = u2 - u1
    261 //   r = s2 - s1
    262 //   x3 = r^2 - h^3 - 2 * u1 * h^2
    263 //   y3 = r * (u1 * h^2 - x3) - s1 * h^3
    264 //   z3 = h * z1 * z2
    265 //
    266 // If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
    267 // z3 == 0, so the result is correct.
    268 // If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
    269 // not correct.
    270 // h == 0 only if u1 == u2; this happens in two cases:
    271 // -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
    272 // -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
    273 //
    274 // Thus, the following situations are not handled correctly:
    275 // -- P1 = 0 and P2 != 0
    276 // -- P1 != 0 and P2 = 0
    277 // -- P1 = P2
    278 // All other cases are properly computed. However, even in "incorrect"
    279 // situations, the three coordinates still are properly formed field
    280 // elements.
    281 //
    282 // The returned flag is cleared if r == 0. This happens in the following
    283 // cases:
    284 // -- Both points are on the same horizontal line (same Y coordinate).
    285 // -- Both points are infinity.
    286 // -- One point is infinity and the other is on line Y = 0.
    287 // The third case cannot happen with our curves (there is no valid point
    288 // on line Y = 0 since that would be a point of order 2). If the two
    289 // source points are non-infinity, then remains only the case where the
    290 // two points are on the same horizontal line.
    291 //
    292 // This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
    293 // P2 != 0:
    294 // -- If the returned value is not the point at infinity, then it was properly
    295 // computed.
    296 // -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
    297 // is indeed the point at infinity.
    298 // -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
    299 // use the 'double' code.
    300 //
    301 // Cost: 16 multiplications
    302 fn point_add(p1: *prime_jacobian, p2: *prime_jacobian, cc: *curveparams) u32 = {
    303 	let p1x: [MAX_COORDSZ]word = p1.c[0];
    304 	let p1y: [MAX_COORDSZ]word = p1.c[1];
    305 	let p1z: [MAX_COORDSZ]word = p1.c[2];
    306 	let p2x: [MAX_COORDSZ]word = p2.c[0];
    307 	let p2y: [MAX_COORDSZ]word = p2.c[1];
    308 	let p2z: [MAX_COORDSZ]word = p2.c[2];
    309 	let t1: [MAX_COORDSZ]word = [0...];
    310 	let t2: [MAX_COORDSZ]word = [0...];
    311 	let t3: [MAX_COORDSZ]word = [0...];
    312 	let t4: [MAX_COORDSZ]word = [0...];
    313 	let t5: [MAX_COORDSZ]word = [0...];
    314 	let t6: [MAX_COORDSZ]word = [0...];
    315 	let t7: [MAX_COORDSZ]word = [0...];
    316 
    317 	let r: u32 = 1;
    318 
    319 	// Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
    320 	mmul(t3, p2z, p2z, cc);
    321 	mmul(t1, p1x, t3, cc);
    322 	mmul(t4, p2z, t3, cc);
    323 	mmul(t3, p1y, t4, cc);
    324 
    325 	// Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
    326 	mmul(t4, p1z, p1z, cc);
    327 	mmul(t2, p2x, t4, cc);
    328 	mmul(t5, p1z, t4, cc);
    329 	mmul(t4, p2y, t5, cc);
    330 
    331 	// Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
    332 	msub(t2, t1, cc);
    333 	msub(t4, t3, cc);
    334 
    335 	// Report cases where r = 0 through the returned flag.
    336 	r &= ~iszero(t4);
    337 
    338 	// Compute u1*h^2 (in t6) and h^3 (in t5).
    339 	mmul(t7, t2, t2, cc);
    340 	mmul(t6, t1, t7, cc);
    341 	mmul(t5, t7, t2, cc);
    342 
    343 	// Compute x3 = r^2 - h^3 - 2*u1*h^2.
    344 	// t1 and t7 can be used as scratch registers.
    345 	mmul(p1x, t4, t4, cc);
    346 	msub(p1x, t5, cc);
    347 	msub(p1x, t6, cc);
    348 	msub(p1x, t6, cc);
    349 
    350 	// Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
    351 	msub(t6, p1x, cc);
    352 	mmul(p1y, t4, t6, cc);
    353 	mmul(t1, t5, t3, cc);
    354 	msub(p1y, t1, cc);
    355 
    356 	// Compute z3 = h*z1*z2.
    357 	mmul(t1, p1z, p2z, cc);
    358 	mmul(p1z, t1, t2, cc);
    359 
    360 	// copy back result
    361 	p1.c[0][..] = p1x[..];
    362 	p1.c[1][..] = p1y[..];
    363 	p1.c[2][..] = p1z[..];
    364 
    365 	return r;
    366 };
    367 
    368 // Check that the point is on the curve. This code snippet assumes the
    369 // following conventions:
    370 // -- Coordinates x and y have been freshly decoded in P1 (but not
    371 // converted to Montgomery coordinates yet).
    372 // -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
    373 fn prime_check(p1: *prime_jacobian, p2: *prime_jacobian, cc: *curveparams) u32 = {
    374 	let p1x: [MAX_COORDSZ]word = p1.c[0];
    375 	let p1y: [MAX_COORDSZ]word = p1.c[1];
    376 	let p1z: [MAX_COORDSZ]word = p1.c[2];
    377 	let p2x: [MAX_COORDSZ]word = p2.c[0];
    378 	let p2y: [MAX_COORDSZ]word = p2.c[1];
    379 	let p2z: [MAX_COORDSZ]word = p2.c[2];
    380 	let t1: [MAX_COORDSZ]word = [0...];
    381 	let t2: [MAX_COORDSZ]word = [0...];
    382 
    383 	let r: u32 = 1;
    384 
    385 	// Convert x and y to Montgomery representation.
    386 	mmul(t1, p1x, p2x, cc);
    387 	mmul(t2, p1y, p2x, cc);
    388 	mset(p1x, t1);
    389 	mset(p1y, t2);
    390 
    391 	// Compute x^3 in t1. */
    392 	mmul(t2, p1x, p1x, cc);
    393 	mmul(t1, p1x, t2, cc);
    394 
    395 	// Subtract 3*x from t1. */
    396 	msub(t1, p1x, cc);
    397 	msub(t1, p1x, cc);
    398 	msub(t1, p1x, cc);
    399 
    400 	// Add b. */
    401 	madd(t1, p2y, cc);
    402 
    403 	// Compute y^2 in t2. */
    404 	mmul(t2, p1y, p1y, cc);
    405 
    406 	// Compare y^2 with x^3 - 3*x + b; they must match. */
    407 	msub(t1, t2, cc);
    408 	r &= ~iszero(t1);
    409 
    410 	// Set z to 1 (in Montgomery representation). */
    411 	mmul(p1z, p2x, p2z, cc);
    412 
    413 	// copy back result
    414 	p1.c[0][..] = p1x[..];
    415 	p1.c[1][..] = p1y[..];
    416 	p1.c[2][..] = p1z[..];
    417 
    418 	return r;
    419 };
    420 
    421 // Conversion back to affine coordinates. This code snippet assumes that
    422 // the z coordinate of P2 is set to 1 (not in Montgomery representation).
    423 fn prime_affine(p1: *prime_jacobian, p2: *prime_jacobian, cc: *curveparams) void = {
    424 	let p1x: [MAX_COORDSZ]word = p1.c[0];
    425 	let p1y: [MAX_COORDSZ]word = p1.c[1];
    426 	let p1z: [MAX_COORDSZ]word = p1.c[2];
    427 	let p2x: [MAX_COORDSZ]word = p2.c[0];
    428 	let p2y: [MAX_COORDSZ]word = p2.c[1];
    429 	let p2z: [MAX_COORDSZ]word = p2.c[2];
    430 	let t1: [MAX_COORDSZ]word = [0...];
    431 	let t2: [MAX_COORDSZ]word = [0...];
    432 	let t3: [MAX_COORDSZ]word = [0...];
    433 	let t4: [MAX_COORDSZ]word = [0...];
    434 	let t5: [MAX_COORDSZ]word = [0...];
    435 	let t6: [MAX_COORDSZ]word = [0...];
    436 	let t7: [MAX_COORDSZ]word = [0...];
    437 
    438 	let r: u32 = 1;
    439 
    440 	// Save z*R in t1. */
    441 	mset(t1, p1z);
    442 
    443 	// Compute z^3 in t2. */
    444 	mmul(t2, p1z, p1z, cc);
    445 	mmul(t3, p1z, t2, cc);
    446 	mmul(t2, t3, p2z, cc);
    447 
    448 	// Invert to (1/z^3) in t2. */
    449 	minv(t2, t3, t4, cc);
    450 
    451 	// Compute y. */
    452 	mset(t3, p1y);
    453 	mmul(p1y, t2, t3, cc);
    454 
    455 	// Compute (1/z^2) in t3. */
    456 	mmul(t3, t2, t1, cc);
    457 
    458 	// Compute x. */
    459 	mset(t2, p1x);
    460 	mmul(p1x, t2, t3, cc);
    461 
    462 	// copy back result
    463 	p1.c[0][..] = p1x[..];
    464 	p1.c[1][..] = p1y[..];
    465 	p1.c[2][..] = p1z[..];
    466 };
    467 
    468 fn prime_setone(x: []word, p: []word) void = {
    469 	for (let i = 0z; i < len(x); i += 1) {
    470 		x[i] = 0;
    471 	};
    472 	x[0] = p[0];
    473 	x[1] = 1;
    474 };
    475 
    476 fn prime_pointzero(p: *prime_jacobian) void = {
    477 	// XXX: bytes::zero?
    478 	for (let i = 0z; i < len(p.c); i += 1) {
    479 		for (let j = 0z; j < len(p.c[i]); j += 1) {
    480 			p.c[i][j] = 0;
    481 		};
    482 	};
    483 };
    484 
    485 fn point_mul(p: *prime_jacobian, x: []u8, cc: *curveparams) void = {
    486 	// We do a simple double-and-add ladder with a 2-bit window
    487 	// to make only one add every two doublings. We thus first
    488 	// precompute 2P and 3P in some local buffers.
    489 	//
    490 	// We always perform two doublings and one addition; the
    491 	// addition is with P, 2P and 3P and is done in a temporary
    492 	// array.
    493 	//
    494 	// The addition code cannot handle cases where one of the
    495 	// operands is infinity, which is the case at the start of the
    496 	// ladder. We therefore need to maintain a flag that controls
    497 	// this situation.
    498 	let p2 = prime_jacobian { ... };
    499 	let p3 = prime_jacobian { ... };
    500 	let q = prime_jacobian { ... };
    501 	let t = prime_jacobian { ... };
    502 	let u = prime_jacobian { ... };
    503 
    504 	jcpy(&p2, p);
    505 	point_double(&p2, cc);
    506 
    507 	jcpy(&p3, p);
    508 	point_add(&p3, &p2, cc);
    509 
    510 	prime_zero(&q, cc);
    511 	let qz: u32 = 1;
    512 	for (let i = 0z; i < len(x); i += 1) {
    513 		for (let k: i32 = 6; k >= 0; k -= 2) {
    514 			point_double(&q, cc);
    515 			point_double(&q, cc);
    516 			jcpy(&t, p);
    517 			jcpy(&u, &q);
    518 			let bits: u32 = (x[i]: u32 >> k: u32) & 3;
    519 			let bnz: u32 = nequ32(bits, 0);
    520 			jccpy(equ32(bits, 2), &t, &p2);
    521 			jccpy(equ32(bits, 3), &t, &p3);
    522 			point_add(&u, &t, cc);
    523 			jccpy(bnz & qz, &q, &t);
    524 			jccpy(bnz & ~qz, &q, &u);
    525 			qz &= ~bnz;
    526 		};
    527 	};
    528 	jcpy(p, &q);
    529 };
    530 
    531 // Decode point into Jacobian coordinates. This function does not support
    532 // the point at infinity. If the point is invalid then this returns 0, but
    533 // the coordinates are still set to properly formed field elements.
    534 fn point_decode(p: *prime_jacobian, src: []u8, cc: *curveparams) u32 = {
    535 	// Points must use uncompressed format:
    536 	// -- first byte is 0x04;
    537 	// -- coordinates X and Y use unsigned big-endian, with the same
    538 	//    length as the field modulus.
    539 	//
    540 	// We don't support hybrid format (uncompressed, but first byte
    541 	// has value 0x06 or 0x07, depending on the least significant bit
    542 	// of Y) because it is rather useless, and explicitly forbidden
    543 	// by PKIX (RFC 5480, section 2.2).
    544 	//
    545 	// We don't support compressed format either, because it is not
    546 	// much used in practice (there are or were patent-related
    547 	// concerns about point compression, which explains the lack of
    548 	// generalised support). Also, point compression support would
    549 	// need a bit more code.
    550 	let q = prime_jacobian { ... };
    551 
    552 	let buf = src;
    553 	prime_pointzero(p);
    554 	let plen: size = (cc.p[0] - (cc.p[0] >> 5) + 7) >> 3;
    555 	if (len(src) != 1 + (plen << 1)) {
    556 		return 0;
    557 	};
    558 	let r: u32 = encodemod(p.c[0], buf[1..1 + plen], cc.p);
    559 	r &= encodemod(p.c[1], buf[1 + plen..1 + 2*plen], cc.p);
    560 
    561 	// Check first byte.
    562 	r &= equ32(buf[0], 0x04);
    563 
    564 	// Convert coordinates and check that the point is valid.
    565 	let zlen: size = ((cc.p[0] + 63) >> 5);
    566 
    567 	q.c[0][..zlen] = cc.r2[..zlen];
    568 	q.c[1][..zlen] = cc.b[..zlen];
    569 	prime_setone(q.c[2], cc.p);
    570 
    571 	r &= ~prime_check(p, &q, cc);
    572 	return r;
    573 };
    574 
    575 // Encode a point. This method assumes that the point is correct and is
    576 // not the point at infinity. Encoded size is always 1+2*plen, where
    577 // plen is the field modulus length, in bytes.
    578 fn point_encode(dest: []u8, p: *prime_jacobian, cc: *curveparams) void = {
    579 	let q = prime_jacobian { ... };
    580 	let t = prime_jacobian { ... };
    581 
    582 	let xbl: u32 = cc.p[0];
    583 	xbl -= (xbl >> 5);
    584 	let plen: size = (xbl + 7) >> 3;
    585 	dest[0] = 0x04;
    586 	jcpy(&q, p);
    587 	prime_setone(t.c[2], cc.p);
    588 
    589 	prime_affine(&q, &t, cc);
    590 	decode(dest[1..1+plen], q.c[0]);
    591 	decode(dest[1+plen..1+2*plen], q.c[1]);
    592 };
    593 
    594 fn prime_mul(g: []u8, x: []u8, cc: *curveparams) u32 = {
    595 	if (len(g) != cc.pointlen) {
    596 		return 0;
    597 	};
    598 	let p = prime_jacobian { ... };
    599 	let r = point_decode(&p, g, cc);
    600 	point_mul(&p, x, cc);
    601 	point_encode(g, &p, cc);
    602 
    603 	return r;
    604 };
    605 
    606 const P384_G: [_]u8 = [
    607 	0x04, 0xaa, 0x87, 0xca, 0x22, 0xbe, 0x8b, 0x05, 0x37, 0x8e, 0xb1, 0xc7,
    608 	0x1e, 0xf3, 0x20, 0xad, 0x74, 0x6e, 0x1d, 0x3b, 0x62, 0x8b, 0xa7, 0x9b,
    609 	0x98, 0x59, 0xf7, 0x41, 0xe0, 0x82, 0x54, 0x2a, 0x38, 0x55, 0x02, 0xf2,
    610 	0x5d, 0xbf, 0x55, 0x29, 0x6c, 0x3a, 0x54, 0x5e, 0x38, 0x72, 0x76, 0x0a,
    611 	0xb7, 0x36, 0x17, 0xde, 0x4a, 0x96, 0x26, 0x2c, 0x6f, 0x5d, 0x9e, 0x98,
    612 	0xbf, 0x92, 0x92, 0xdc, 0x29, 0xf8, 0xf4, 0x1d, 0xbd, 0x28, 0x9a, 0x14,
    613 	0x7c, 0xe9, 0xda, 0x31, 0x13, 0xb5, 0xf0, 0xb8, 0xc0, 0x0a, 0x60, 0xb1,
    614 	0xce, 0x1d, 0x7e, 0x81, 0x9d, 0x7a, 0x43, 0x1d, 0x7c, 0x90, 0xea, 0x0e,
    615 	0x5f,
    616 ];
    617 
    618 fn p384_generator() const []u8 = P384_G;
    619 
    620 const P384_N: [_]u8 = [
    621 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    622 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    623 	0xc7, 0x63, 0x4d, 0x81, 0xf4, 0x37, 0x2d, 0xdf, 0x58, 0x1a, 0x0d, 0xb2,
    624 	0x48, 0xb0, 0xa7, 0x7a, 0xec, 0xec, 0x19, 0x6a, 0xcc, 0xc5, 0x29, 0x73,
    625 ];
    626 
    627 fn p384_order() const []u8 = P384_N;
    628 
    629 fn p384_mul(p: []u8, x: []u8) u32 = {
    630 	return prime_mul(p, x, &p384params);
    631 };
    632 
    633 
    634 fn p384_mulgen(r: []u8, x: []u8) size = {
    635 	const g = P384_G[..];
    636 	r[..len(g)] = P384_G[..];
    637 	p384_mul(r[..len(g)], x);
    638 	return len(g);
    639 };
    640 
    641 fn prime_muladd(cc: *curveparams, a: []u8, b: []u8, x: []u8, y: []u8) u32 = {
    642 	let p = prime_jacobian { ... };
    643 	let q = prime_jacobian { ... };
    644 
    645 	// TODO: see about merging the two ladders. Right now, we do
    646 	// two independent point multiplications, which is a bit
    647 	// wasteful of CPU resources (but yields short code).
    648 
    649 	if (len(a) != cc.pointlen) {
    650 		return 0;
    651 	};
    652 	let r = point_decode(&p, a, cc);
    653 	if (len(b) == 0) {
    654 		b = cc.g[..];
    655 	};
    656 	r &= point_decode(&q, b, cc);
    657 	point_mul(&p, x, cc);
    658 	point_mul(&q, y, cc);
    659 
    660 	// We want to compute P+Q. Since the base points A and B are distinct
    661 	// from infinity, and the multipliers are non-zero and lower than the
    662 	// curve order, then we know that P and Q are non-infinity. This
    663 	// leaves two special situations to test for:
    664 	// -- If P = Q then we must use point_double().
    665 	// -- If P+Q = 0 then we must report an error.
    666 	let t = point_add(&p, &q, cc);
    667 	point_double(&q, cc);
    668 	let z = iszero(p.c[2]);
    669 
    670 	// If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
    671 	// have the following:
    672 	//
    673 	//   z = 0, t = 0   return P (normal addition)
    674 	//   z = 0, t = 1   return P (normal addition)
    675 	//   z = 1, t = 0   return Q (a 'double' case)
    676 	//   z = 1, t = 1   report an error (P+Q = 0)
    677 	jccpy(z & ~t, &p, &q);
    678 	point_encode(a, &p, cc);
    679 	r &= ~(z & t);
    680 
    681 	return r;
    682 };
    683 
    684 fn p384_muladd(a: []u8, b: []u8, x: []u8, y: []u8) u32 =
    685 	prime_muladd(&p384params, a, b, x, y);
    686 
    687 const _p384: curve = curve {
    688 	pointsz = P384_POINTSZ,
    689 	order = &p384_order,
    690 	generator = &p384_generator,
    691 	mul = &p384_mul,
    692 	mulgen = &p384_mulgen,
    693 	muladd = &p384_muladd,
    694 	keygen = &mask_keygen,
    695 };
    696 
    697 // Size of a [[p384]] point in bytes.
    698 export def P384_POINTSZ = 97;
    699 
    700 // Size of a [[p384]] scalar in bytes.
    701 export def P384_SCALARSZ = 48;
    702 
    703 // A [[curve]] implementation of P-384, also known as secp384r1;
    704 //
    705 // The point size is defined by [[P384_POINTSZ]] and the scalar size is defined
    706 // by [[P384_SCALARSZ]. See the documentation of [[curve]] on how to encode
    707 // such values.
    708 export const p384: *curve = &_p384;
    709 
    710 const P521_N: [_]u8 = [
    711 	0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
    712 	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
    713 	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFA, 0x51, 0x86,
    714 	0x87, 0x83, 0xBF, 0x2F, 0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,
    715 	0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C, 0x47, 0xAE, 0xBB, 0x6F,
    716 	0xB7, 0x1E, 0x91, 0x38, 0x64, 0x09,
    717 ];
    718 
    719 fn p521_order() const []u8 = P521_N;
    720 
    721 const P521_G: [_]u8 = [
    722 	0x04, 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, 0x04, 0xE9, 0xCD, 0x9E,
    723 	0x3E, 0xCB, 0x66, 0x23, 0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05,
    724 	0x3F, 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B, 0x4D, 0x3D, 0xBA, 0xA1,
    725 	0x4B, 0x5E, 0x77, 0xEF, 0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2,
    726 	0xFF, 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, 0x6A, 0x42, 0x9B, 0xF9,
    727 	0x7E, 0x7E, 0x31, 0xC2, 0xE5, 0xBD, 0x66, 0x01, 0x18, 0x39, 0x29, 0x6A,
    728 	0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, 0x5F, 0xB4, 0x2C, 0x7D, 0x1B,
    729 	0xD9, 0x98, 0xF5, 0x44, 0x49, 0x57, 0x9B, 0x44, 0x68, 0x17, 0xAF, 0xBD,
    730 	0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE, 0x72, 0x99, 0x5E, 0xF4, 0x26,
    731 	0x40, 0xC5, 0x50, 0xB9, 0x01, 0x3F, 0xAD, 0x07, 0x61, 0x35, 0x3C, 0x70,
    732 	0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, 0x94, 0x76, 0x9F, 0xD1, 0x66,
    733 	0x50
    734 ];
    735 
    736 fn p521_generator() const []u8 = P521_G;
    737 
    738 
    739 fn p521_mul(p: []u8, x: []u8) u32 = {
    740 	return prime_mul(p, x, &p521params);
    741 };
    742 
    743 fn p521_mulgen(r: []u8, x: []u8) size = {
    744 	const g = P521_G[..];
    745 	r[..len(g)] = P521_G[..];
    746 	p521_mul(r[..len(g)], x);
    747 	return len(g);
    748 };
    749 
    750 fn p521_muladd(a: []u8, b: []u8, x: []u8, y: []u8) u32 =
    751 	prime_muladd(&p521params, a, b, x, y);
    752 
    753 const _p521: curve = curve {
    754 	pointsz = P521_POINTSZ,
    755 	order = &p521_order,
    756 	generator = &p521_generator,
    757 	mul = &p521_mul,
    758 	mulgen = &p521_mulgen,
    759 	muladd = &p521_muladd,
    760 	keygen = &mask_keygen,
    761 };
    762 
    763 // Size of a [[p521]] point in bytes.
    764 export def P521_POINTSZ = 133;
    765 
    766 // Size of a [[p521]] scalar in bytes.
    767 export def P521_SCALARSZ = 66;
    768 
    769 // A [[curve]] implementation of P-521, also known as secp521r1;
    770 //
    771 // The point size is defined by [[P521_POINTSZ]] and the scalar size is defined
    772 // by [[P521_SCALARSZ]. See the documentation of [[curve]] on how to encode
    773 // such values.
    774 export const p521: *curve = &_p521;