hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

prime.ha (23302B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // Ported from BearSSL
      5 //
      6 // Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
      7 //
      8 // Permission is hereby granted, free of charge, to any person obtaining
      9 // a copy of this software and associated documentation files (the
     10 // "Software"), to deal in the Software without restriction, including
     11 // without limitation the rights to use, copy, modify, merge, publish,
     12 // distribute, sublicense, and/or sell copies of the Software, and to
     13 // permit persons to whom the Software is furnished to do so, subject to
     14 // the following conditions:
     15 //
     16 // The above copyright notice and this permission notice shall be
     17 // included in all copies or substantial portions of the Software.
     18 //
     19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     23 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     24 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     25 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     26 // SOFTWARE.
     27 
     28 use bytes;
     29 use crypto::math::*;
     30 use crypto::bigint::*;
     31 use io;
     32 
     33 
     34 type curveparams = struct {
     35 	p: []word,
     36 	b: []word,
     37 	r2: []word,
     38 	p0i: word,
     39 	pointlen: size,
     40 	g: []u8,
     41 };
     42 
     43 // Parameters for supported curves (field modulus, and 'b' equation
     44 // parameter; both values use the 'i31' format, and 'b' is in Montgomery
     45 // representation).
     46 const p384params = curveparams {
     47 	p = [
     48 		0x0000018C, 0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
     49 		0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
     50 		0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000FFF
     51 	],
     52 	b = [
     53 		0x0000018C, 0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
     54 		0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B, 0x2719BA5F,
     55 		0x41F02209, 0x36C5643E, 0x5813EFFE, 0x000008A5
     56 	],
     57 	r2 = [
     58 		0x0000018C, 0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
     59 		0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF, 0x00008000,
     60 		0x00008000, 0x00000000, 0x00000000, 0x00000000
     61 	],
     62 	p0i = 0x00000001,
     63 	pointlen = 97,
     64 	g = [ // XXX: use P384_G, when possible
     65 		0x04, 0xaa, 0x87, 0xca, 0x22, 0xbe, 0x8b, 0x05, 0x37, 0x8e,
     66 		0xb1, 0xc7, 0x1e, 0xf3, 0x20, 0xad, 0x74, 0x6e, 0x1d, 0x3b,
     67 		0x62, 0x8b, 0xa7, 0x9b, 0x98, 0x59, 0xf7, 0x41, 0xe0, 0x82,
     68 		0x54, 0x2a, 0x38, 0x55, 0x02, 0xf2, 0x5d, 0xbf, 0x55, 0x29,
     69 		0x6c, 0x3a, 0x54, 0x5e, 0x38, 0x72, 0x76, 0x0a, 0xb7, 0x36,
     70 		0x17, 0xde, 0x4a, 0x96, 0x26, 0x2c, 0x6f, 0x5d, 0x9e, 0x98,
     71 		0xbf, 0x92, 0x92, 0xdc, 0x29, 0xf8, 0xf4, 0x1d, 0xbd, 0x28,
     72 		0x9a, 0x14, 0x7c, 0xe9, 0xda, 0x31, 0x13, 0xb5, 0xf0, 0xb8,
     73 		0xc0, 0x0a, 0x60, 0xb1, 0xce, 0x1d, 0x7e, 0x81, 0x9d, 0x7a,
     74 		0x43, 0x1d, 0x7c, 0x90, 0xea, 0x0e, 0x5f,
     75 	],
     76 };
     77 
     78 const p521params = curveparams {
     79 	p = [
     80 		0x00000219, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
     81 		0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
     82 		0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
     83 		0x7FFFFFFF, 0x7FFFFFFF, 0x01FFFFFF
     84 	],
     85 	b = [
     86 		0x00000219, 0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
     87 		0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F, 0x42785586,
     88 		0x44C8C778, 0x15F3B8B4, 0x64B73366, 0x03BA8B69, 0x0D05B42A,
     89 		0x21F929A2, 0x2C31C393, 0x00654FAE
     90 	],
     91 	r2 = [
     92 		0x00000219, 0x00001000, 0x00000000, 0x00000000, 0x00000000,
     93 		0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
     94 		0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
     95 		0x00000000, 0x00000000, 0x00000000
     96 	],
     97 	p0i = 0x00000001,
     98 	pointlen = 133,
     99 	g = [ // XXX: use P384_G, when possible
    100 		0x04, 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, 0x04, 0xE9,
    101 		0xCD, 0x9E, 0x3E, 0xCB, 0x66, 0x23, 0x95, 0xB4, 0x42, 0x9C,
    102 		0x64, 0x81, 0x39, 0x05, 0x3F, 0xB5, 0x21, 0xF8, 0x28, 0xAF,
    103 		0x60, 0x6B, 0x4D, 0x3D, 0xBA, 0xA1, 0x4B, 0x5E, 0x77, 0xEF,
    104 		0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2, 0xFF, 0xA8,
    105 		0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, 0x6A, 0x42, 0x9B, 0xF9,
    106 		0x7E, 0x7E, 0x31, 0xC2, 0xE5, 0xBD, 0x66, 0x01, 0x18, 0x39,
    107 		0x29, 0x6A, 0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, 0x5F,
    108 		0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44, 0x49, 0x57,
    109 		0x9B, 0x44, 0x68, 0x17, 0xAF, 0xBD, 0x17, 0x27, 0x3E, 0x66,
    110 		0x2C, 0x97, 0xEE, 0x72, 0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5,
    111 		0x50, 0xB9, 0x01, 0x3F, 0xAD, 0x07, 0x61, 0x35, 0x3C, 0x70,
    112 		0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, 0x94, 0x76, 0x9F,
    113 		0xD1, 0x66, 0x50,
    114 	],
    115 };
    116 
    117 @test fn bigint_support() void = {
    118 	// This is a port of the i31 variant from BearSSL. Word must be an u32
    119 	// in order for this code to work.
    120 	static assert(size(word) == 4);
    121 };
    122 
    123 def MAX_COORDSZ = (MAX_COORDBITSZ + 61) / 31;
    124 
    125 type prime_jacobian = struct {
    126 	// TODO things would be a lot easier if this is a flat array
    127 	c: [3][MAX_COORDSZ]word,
    128 };
    129 type reg = []word;
    130 
    131 // XXX: BearSSL is using memcpy. Can I do this here also?
    132 fn jcpy(d: *prime_jacobian, a: *prime_jacobian) void = {
    133 	for (let i = 0z; i < len(d.c); i += 1) {
    134 		d.c[i][..] = a.c[i][..]; // XXX: is this copy ct?
    135 	};
    136 };
    137 
    138 fn jccpy(ctl: u32, d: *prime_jacobian, a: *prime_jacobian) void = {
    139 	for (let i = 0z; i < len(d.c); i += 1) {
    140 		ccopyu32(ctl, d.c[i]: []u32, a.c[i][..]: []u32);
    141 	};
    142 };
    143 
    144 fn mset(d: reg, a: reg) void = {
    145 	for (let i = 0z; i < len(d); i += 1) {
    146 		d[i] = a[i];
    147 	};
    148 };
    149 
    150 fn madd(d: reg, a: reg, cc: *curveparams) void = {
    151 	let ctl = add(d, a, 1);
    152 	ctl |= notu32(sub(d, cc.p, 0));
    153 	sub(d, cc.p, ctl);
    154 };
    155 
    156 fn msub(d: reg, a: reg, cc: *curveparams) void = {
    157 	add(d, cc.p, sub(d, a, 1));
    158 };
    159 
    160 fn mmul(d: reg, a: reg, b: reg, cc: *curveparams) void = {
    161 	montymul(d, a, b, cc.p, cc.p0i);
    162 };
    163 
    164 fn minv(d: reg, a: reg, b: reg, cc: *curveparams) void = {
    165 	let tp: [MAX_POINTSZ / 2]u8 = [0...];
    166 	let plen = (cc.p[0] - (cc.p[0] >> 5) + 7) >> 3;
    167 	decode(tp[..plen], cc.p);
    168 	tp[plen - 1] -= 2;
    169 
    170 	// XXX: change modpow to use two bigints as buf, like it was intended
    171 	let buf: [2 * MAX_COORDSZ]word = [0...];
    172 	modpow(d, tp[..plen], cc.p, cc.p0i, buf);
    173 };
    174 
    175 fn prime_zero(p: *prime_jacobian, cc: *curveparams) void = {
    176 	for (let i = 0z; i < len(p.c); i += 1) {
    177 		for (let j = 0z; j < len(p.c[i]); j += 1) {
    178 			p.c[i][j] = 0;
    179 		};
    180 	};
    181 
    182 	p.c[0][0] = cc.p[0];
    183 	p.c[1][0] = cc.p[0];
    184 	p.c[2][0] = cc.p[0];
    185 };
    186 
    187 // Doubling formulas are:
    188 //
    189 //   s = 4*x*y^2
    190 //   m = 3*(x + z^2)*(x - z^2)
    191 //   x' = m^2 - 2*s
    192 //   y' = m*(s - x') - 8*y^4
    193 //   z' = 2*y*z
    194 //
    195 // If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
    196 // should. This case should not happen anyway, because our curves have
    197 // prime order, and thus do not contain any point of order 2.
    198 //
    199 // If P is infinity (z = 0), then again the formulas yield infinity,
    200 // which is correct. Thus, this code works for all points.
    201 //
    202 // Cost: 8 multiplications
    203 fn point_double(p: *prime_jacobian, cc: *curveparams) void = {
    204 	let px: [MAX_COORDSZ]word = p.c[0];
    205 	let py: [MAX_COORDSZ]word = p.c[1];
    206 	let pz: [MAX_COORDSZ]word = p.c[2];
    207 	let t1: [MAX_COORDSZ]word = [0...];
    208 	let t2: [MAX_COORDSZ]word = [0...];
    209 	let t3: [MAX_COORDSZ]word = [0...];
    210 	let t4: [MAX_COORDSZ]word = [0...];
    211 
    212 	// Compute z^2 (in t1).
    213 	mmul(t1, pz, pz, cc);
    214 
    215 	// Compute x-z^2 (in t2) and then x+z^2 (in t1).
    216 	mset(t2, px);
    217 	msub(t2, t1, cc);
    218 	madd(t1, px, cc);
    219 
    220 	// Compute m = 3*(x+z^2)*(x-z^2) (in t1).
    221 	mmul(t3, t1, t2, cc);
    222 	mset(t1, t3);
    223 	madd(t1, t3, cc);
    224 	madd(t1, t3, cc);
    225 
    226 	// Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
    227 	mmul(t3, py, py, cc);
    228 	madd(t3, t3, cc);
    229 	mmul(t2, px, t3, cc);
    230 	madd(t2, t2, cc);
    231 
    232 	// Compute x' = m^2 - 2*s.
    233 	mmul(px, t1, t1, cc);
    234 	msub(px, t2, cc);
    235 	msub(px, t2, cc);
    236 
    237 	// Compute z' = 2*y*z.
    238 	mmul(t4, py, pz, cc);
    239 	mset(pz, t4);
    240 	madd(pz, t4, cc);
    241 
    242 	// Compute y' = m*(s - x') - 8*y^4. Note that we already have
    243 	// 2*y^2 in t3.
    244 	msub(t2, px, cc);
    245 	mmul(py, t1, t2, cc);
    246 	mmul(t4, t3, t3, cc);
    247 	msub(py, t4, cc);
    248 	msub(py, t4, cc);
    249 
    250 	// copy back result
    251 	p.c[0][..] = px[..];
    252 	p.c[1][..] = py[..];
    253 	p.c[2][..] = pz[..];
    254 };
    255 
    256 // Addtions formulas are:
    257 //
    258 //   u1 = x1 * z2^2
    259 //   u2 = x2 * z1^2
    260 //   s1 = y1 * z2^3
    261 //   s2 = y2 * z1^3
    262 //   h = u2 - u1
    263 //   r = s2 - s1
    264 //   x3 = r^2 - h^3 - 2 * u1 * h^2
    265 //   y3 = r * (u1 * h^2 - x3) - s1 * h^3
    266 //   z3 = h * z1 * z2
    267 //
    268 // If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
    269 // z3 == 0, so the result is correct.
    270 // If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
    271 // not correct.
    272 // h == 0 only if u1 == u2; this happens in two cases:
    273 // -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
    274 // -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
    275 //
    276 // Thus, the following situations are not handled correctly:
    277 // -- P1 = 0 and P2 != 0
    278 // -- P1 != 0 and P2 = 0
    279 // -- P1 = P2
    280 // All other cases are properly computed. However, even in "incorrect"
    281 // situations, the three coordinates still are properly formed field
    282 // elements.
    283 //
    284 // The returned flag is cleared if r == 0. This happens in the following
    285 // cases:
    286 // -- Both points are on the same horizontal line (same Y coordinate).
    287 // -- Both points are infinity.
    288 // -- One point is infinity and the other is on line Y = 0.
    289 // The third case cannot happen with our curves (there is no valid point
    290 // on line Y = 0 since that would be a point of order 2). If the two
    291 // source points are non-infinity, then remains only the case where the
    292 // two points are on the same horizontal line.
    293 //
    294 // This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
    295 // P2 != 0:
    296 // -- If the returned value is not the point at infinity, then it was properly
    297 // computed.
    298 // -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
    299 // is indeed the point at infinity.
    300 // -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
    301 // use the 'double' code.
    302 //
    303 // Cost: 16 multiplications
    304 fn point_add(p1: *prime_jacobian, p2: *prime_jacobian, cc: *curveparams) u32 = {
    305 	let p1x: [MAX_COORDSZ]word = p1.c[0];
    306 	let p1y: [MAX_COORDSZ]word = p1.c[1];
    307 	let p1z: [MAX_COORDSZ]word = p1.c[2];
    308 	let p2x: [MAX_COORDSZ]word = p2.c[0];
    309 	let p2y: [MAX_COORDSZ]word = p2.c[1];
    310 	let p2z: [MAX_COORDSZ]word = p2.c[2];
    311 	let t1: [MAX_COORDSZ]word = [0...];
    312 	let t2: [MAX_COORDSZ]word = [0...];
    313 	let t3: [MAX_COORDSZ]word = [0...];
    314 	let t4: [MAX_COORDSZ]word = [0...];
    315 	let t5: [MAX_COORDSZ]word = [0...];
    316 	let t6: [MAX_COORDSZ]word = [0...];
    317 	let t7: [MAX_COORDSZ]word = [0...];
    318 
    319 	let r: u32 = 1;
    320 
    321 	// Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
    322 	mmul(t3, p2z, p2z, cc);
    323 	mmul(t1, p1x, t3, cc);
    324 	mmul(t4, p2z, t3, cc);
    325 	mmul(t3, p1y, t4, cc);
    326 
    327 	// Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
    328 	mmul(t4, p1z, p1z, cc);
    329 	mmul(t2, p2x, t4, cc);
    330 	mmul(t5, p1z, t4, cc);
    331 	mmul(t4, p2y, t5, cc);
    332 
    333 	// Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
    334 	msub(t2, t1, cc);
    335 	msub(t4, t3, cc);
    336 
    337 	// Report cases where r = 0 through the returned flag.
    338 	r &= ~iszero(t4);
    339 
    340 	// Compute u1*h^2 (in t6) and h^3 (in t5).
    341 	mmul(t7, t2, t2, cc);
    342 	mmul(t6, t1, t7, cc);
    343 	mmul(t5, t7, t2, cc);
    344 
    345 	// Compute x3 = r^2 - h^3 - 2*u1*h^2.
    346 	// t1 and t7 can be used as scratch registers.
    347 	mmul(p1x, t4, t4, cc);
    348 	msub(p1x, t5, cc);
    349 	msub(p1x, t6, cc);
    350 	msub(p1x, t6, cc);
    351 
    352 	// Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
    353 	msub(t6, p1x, cc);
    354 	mmul(p1y, t4, t6, cc);
    355 	mmul(t1, t5, t3, cc);
    356 	msub(p1y, t1, cc);
    357 
    358 	// Compute z3 = h*z1*z2.
    359 	mmul(t1, p1z, p2z, cc);
    360 	mmul(p1z, t1, t2, cc);
    361 
    362 	// copy back result
    363 	p1.c[0][..] = p1x[..];
    364 	p1.c[1][..] = p1y[..];
    365 	p1.c[2][..] = p1z[..];
    366 
    367 	return r;
    368 };
    369 
    370 // Check that the point is on the curve. This code snippet assumes the
    371 // following conventions:
    372 // -- Coordinates x and y have been freshly decoded in P1 (but not
    373 // converted to Montgomery coordinates yet).
    374 // -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
    375 fn prime_check(p1: *prime_jacobian, p2: *prime_jacobian, cc: *curveparams) u32 = {
    376 	let p1x: [MAX_COORDSZ]word = p1.c[0];
    377 	let p1y: [MAX_COORDSZ]word = p1.c[1];
    378 	let p1z: [MAX_COORDSZ]word = p1.c[2];
    379 	let p2x: [MAX_COORDSZ]word = p2.c[0];
    380 	let p2y: [MAX_COORDSZ]word = p2.c[1];
    381 	let p2z: [MAX_COORDSZ]word = p2.c[2];
    382 	let t1: [MAX_COORDSZ]word = [0...];
    383 	let t2: [MAX_COORDSZ]word = [0...];
    384 
    385 	let r: u32 = 1;
    386 
    387 	// Convert x and y to Montgomery representation.
    388 	mmul(t1, p1x, p2x, cc);
    389 	mmul(t2, p1y, p2x, cc);
    390 	mset(p1x, t1);
    391 	mset(p1y, t2);
    392 
    393 	// Compute x^3 in t1. */
    394 	mmul(t2, p1x, p1x, cc);
    395 	mmul(t1, p1x, t2, cc);
    396 
    397 	// Subtract 3*x from t1. */
    398 	msub(t1, p1x, cc);
    399 	msub(t1, p1x, cc);
    400 	msub(t1, p1x, cc);
    401 
    402 	// Add b. */
    403 	madd(t1, p2y, cc);
    404 
    405 	// Compute y^2 in t2. */
    406 	mmul(t2, p1y, p1y, cc);
    407 
    408 	// Compare y^2 with x^3 - 3*x + b; they must match. */
    409 	msub(t1, t2, cc);
    410 	r &= ~iszero(t1);
    411 
    412 	// Set z to 1 (in Montgomery representation). */
    413 	mmul(p1z, p2x, p2z, cc);
    414 
    415 	// copy back result
    416 	p1.c[0][..] = p1x[..];
    417 	p1.c[1][..] = p1y[..];
    418 	p1.c[2][..] = p1z[..];
    419 
    420 	return r;
    421 };
    422 
    423 // Conversion back to affine coordinates. This code snippet assumes that
    424 // the z coordinate of P2 is set to 1 (not in Montgomery representation).
    425 fn prime_affine(p1: *prime_jacobian, p2: *prime_jacobian, cc: *curveparams) void = {
    426 	let p1x: [MAX_COORDSZ]word = p1.c[0];
    427 	let p1y: [MAX_COORDSZ]word = p1.c[1];
    428 	let p1z: [MAX_COORDSZ]word = p1.c[2];
    429 	let p2x: [MAX_COORDSZ]word = p2.c[0];
    430 	let p2y: [MAX_COORDSZ]word = p2.c[1];
    431 	let p2z: [MAX_COORDSZ]word = p2.c[2];
    432 	let t1: [MAX_COORDSZ]word = [0...];
    433 	let t2: [MAX_COORDSZ]word = [0...];
    434 	let t3: [MAX_COORDSZ]word = [0...];
    435 	let t4: [MAX_COORDSZ]word = [0...];
    436 	let t5: [MAX_COORDSZ]word = [0...];
    437 	let t6: [MAX_COORDSZ]word = [0...];
    438 	let t7: [MAX_COORDSZ]word = [0...];
    439 
    440 	let r: u32 = 1;
    441 
    442 	// Save z*R in t1. */
    443 	mset(t1, p1z);
    444 
    445 	// Compute z^3 in t2. */
    446 	mmul(t2, p1z, p1z, cc);
    447 	mmul(t3, p1z, t2, cc);
    448 	mmul(t2, t3, p2z, cc);
    449 
    450 	// Invert to (1/z^3) in t2. */
    451 	minv(t2, t3, t4, cc);
    452 
    453 	// Compute y. */
    454 	mset(t3, p1y);
    455 	mmul(p1y, t2, t3, cc);
    456 
    457 	// Compute (1/z^2) in t3. */
    458 	mmul(t3, t2, t1, cc);
    459 
    460 	// Compute x. */
    461 	mset(t2, p1x);
    462 	mmul(p1x, t2, t3, cc);
    463 
    464 	// copy back result
    465 	p1.c[0][..] = p1x[..];
    466 	p1.c[1][..] = p1y[..];
    467 	p1.c[2][..] = p1z[..];
    468 };
    469 
    470 fn prime_setone(x: []word, p: []word) void = {
    471 	for (let i = 0z; i < len(x); i += 1) {
    472 		x[i] = 0;
    473 	};
    474 	x[0] = p[0];
    475 	x[1] = 1;
    476 };
    477 
    478 fn prime_pointzero(p: *prime_jacobian) void = {
    479 	// XXX: bytes::zero?
    480 	for (let i = 0z; i < len(p.c); i += 1) {
    481 		for (let j = 0z; j < len(p.c[i]); j += 1) {
    482 			p.c[i][j] = 0;
    483 		};
    484 	};
    485 };
    486 
    487 fn point_mul(p: *prime_jacobian, x: []u8, cc: *curveparams) void = {
    488 	// We do a simple double-and-add ladder with a 2-bit window
    489 	// to make only one add every two doublings. We thus first
    490 	// precompute 2P and 3P in some local buffers.
    491 	//
    492 	// We always perform two doublings and one addition; the
    493 	// addition is with P, 2P and 3P and is done in a temporary
    494 	// array.
    495 	//
    496 	// The addition code cannot handle cases where one of the
    497 	// operands is infinity, which is the case at the start of the
    498 	// ladder. We therefore need to maintain a flag that controls
    499 	// this situation.
    500 	let p2 = prime_jacobian { ... };
    501 	let p3 = prime_jacobian { ... };
    502 	let q = prime_jacobian { ... };
    503 	let t = prime_jacobian { ... };
    504 	let u = prime_jacobian { ... };
    505 
    506 	jcpy(&p2, p);
    507 	point_double(&p2, cc);
    508 
    509 	jcpy(&p3, p);
    510 	point_add(&p3, &p2, cc);
    511 
    512 	prime_zero(&q, cc);
    513 	let qz: u32 = 1;
    514 	for (let i = 0z; i < len(x); i += 1) {
    515 		for (let k: i32 = 6; k >= 0; k -= 2) {
    516 			point_double(&q, cc);
    517 			point_double(&q, cc);
    518 			jcpy(&t, p);
    519 			jcpy(&u, &q);
    520 			let bits: u32 = (x[i]: u32 >> k: u32) & 3;
    521 			let bnz: u32 = nequ32(bits, 0);
    522 			jccpy(equ32(bits, 2), &t, &p2);
    523 			jccpy(equ32(bits, 3), &t, &p3);
    524 			point_add(&u, &t, cc);
    525 			jccpy(bnz & qz, &q, &t);
    526 			jccpy(bnz & ~qz, &q, &u);
    527 			qz &= ~bnz;
    528 		};
    529 	};
    530 	jcpy(p, &q);
    531 };
    532 
    533 // Decode point into Jacobian coordinates. This function does not support
    534 // the point at infinity. If the point is invalid then this returns 0, but
    535 // the coordinates are still set to properly formed field elements.
    536 fn point_decode(p: *prime_jacobian, src: []u8, cc: *curveparams) u32 = {
    537 	// Points must use uncompressed format:
    538 	// -- first byte is 0x04;
    539 	// -- coordinates X and Y use unsigned big-endian, with the same
    540 	//    length as the field modulus.
    541 	//
    542 	// We don't support hybrid format (uncompressed, but first byte
    543 	// has value 0x06 or 0x07, depending on the least significant bit
    544 	// of Y) because it is rather useless, and explicitly forbidden
    545 	// by PKIX (RFC 5480, section 2.2).
    546 	//
    547 	// We don't support compressed format either, because it is not
    548 	// much used in practice (there are or were patent-related
    549 	// concerns about point compression, which explains the lack of
    550 	// generalised support). Also, point compression support would
    551 	// need a bit more code.
    552 	let q = prime_jacobian { ... };
    553 
    554 	let buf = src;
    555 	prime_pointzero(p);
    556 	let plen: size = (cc.p[0] - (cc.p[0] >> 5) + 7) >> 3;
    557 	if (len(src) != 1 + (plen << 1)) {
    558 		return 0;
    559 	};
    560 	let r: u32 = encodemod(p.c[0], buf[1..1 + plen], cc.p);
    561 	r &= encodemod(p.c[1], buf[1 + plen..1 + 2*plen], cc.p);
    562 
    563 	// Check first byte.
    564 	r &= equ32(buf[0], 0x04);
    565 
    566 	// Convert coordinates and check that the point is valid.
    567 	let zlen: size = ((cc.p[0] + 63) >> 5);
    568 
    569 	q.c[0][..zlen] = cc.r2[..zlen];
    570 	q.c[1][..zlen] = cc.b[..zlen];
    571 	prime_setone(q.c[2], cc.p);
    572 
    573 	r &= ~prime_check(p, &q, cc);
    574 	return r;
    575 };
    576 
    577 // Encode a point. This method assumes that the point is correct and is
    578 // not the point at infinity. Encoded size is always 1+2*plen, where
    579 // plen is the field modulus length, in bytes.
    580 fn point_encode(dest: []u8, p: *prime_jacobian, cc: *curveparams) void = {
    581 	let q = prime_jacobian { ... };
    582 	let t = prime_jacobian { ... };
    583 
    584 	let xbl: u32 = cc.p[0];
    585 	xbl -= (xbl >> 5);
    586 	let plen: size = (xbl + 7) >> 3;
    587 	dest[0] = 0x04;
    588 	jcpy(&q, p);
    589 	prime_setone(t.c[2], cc.p);
    590 
    591 	prime_affine(&q, &t, cc);
    592 	decode(dest[1..1+plen], q.c[0]);
    593 	decode(dest[1+plen..1+2*plen], q.c[1]);
    594 };
    595 
    596 fn prime_mul(g: []u8, x: []u8, cc: *curveparams) u32 = {
    597 	if (len(g) != cc.pointlen) {
    598 		return 0;
    599 	};
    600 	let p = prime_jacobian { ... };
    601 	let r = point_decode(&p, g, cc);
    602 	point_mul(&p, x, cc);
    603 	point_encode(g, &p, cc);
    604 
    605 	return r;
    606 };
    607 
    608 const P384_G: [_]u8 = [
    609 	0x04, 0xaa, 0x87, 0xca, 0x22, 0xbe, 0x8b, 0x05, 0x37, 0x8e, 0xb1, 0xc7,
    610 	0x1e, 0xf3, 0x20, 0xad, 0x74, 0x6e, 0x1d, 0x3b, 0x62, 0x8b, 0xa7, 0x9b,
    611 	0x98, 0x59, 0xf7, 0x41, 0xe0, 0x82, 0x54, 0x2a, 0x38, 0x55, 0x02, 0xf2,
    612 	0x5d, 0xbf, 0x55, 0x29, 0x6c, 0x3a, 0x54, 0x5e, 0x38, 0x72, 0x76, 0x0a,
    613 	0xb7, 0x36, 0x17, 0xde, 0x4a, 0x96, 0x26, 0x2c, 0x6f, 0x5d, 0x9e, 0x98,
    614 	0xbf, 0x92, 0x92, 0xdc, 0x29, 0xf8, 0xf4, 0x1d, 0xbd, 0x28, 0x9a, 0x14,
    615 	0x7c, 0xe9, 0xda, 0x31, 0x13, 0xb5, 0xf0, 0xb8, 0xc0, 0x0a, 0x60, 0xb1,
    616 	0xce, 0x1d, 0x7e, 0x81, 0x9d, 0x7a, 0x43, 0x1d, 0x7c, 0x90, 0xea, 0x0e,
    617 	0x5f,
    618 ];
    619 
    620 fn p384_generator() const []u8 = P384_G;
    621 
    622 const P384_N: [_]u8 = [
    623 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    624 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    625 	0xc7, 0x63, 0x4d, 0x81, 0xf4, 0x37, 0x2d, 0xdf, 0x58, 0x1a, 0x0d, 0xb2,
    626 	0x48, 0xb0, 0xa7, 0x7a, 0xec, 0xec, 0x19, 0x6a, 0xcc, 0xc5, 0x29, 0x73,
    627 ];
    628 
    629 fn p384_order() const []u8 = P384_N;
    630 
    631 fn p384_mul(p: []u8, x: []u8) u32 = {
    632 	return prime_mul(p, x, &p384params);
    633 };
    634 
    635 
    636 fn p384_mulgen(r: []u8, x: []u8) size = {
    637 	const g = P384_G[..];
    638 	r[..len(g)] = P384_G[..];
    639 	p384_mul(r[..len(g)], x);
    640 	return len(g);
    641 };
    642 
    643 fn prime_muladd(cc: *curveparams, a: []u8, b: []u8, x: []u8, y: []u8) u32 = {
    644 	let p = prime_jacobian { ... };
    645 	let q = prime_jacobian { ... };
    646 
    647 	// TODO: see about merging the two ladders. Right now, we do
    648 	// two independent point multiplications, which is a bit
    649 	// wasteful of CPU resources (but yields short code).
    650 
    651 	if (len(a) != cc.pointlen) {
    652 		return 0;
    653 	};
    654 	let r = point_decode(&p, a, cc);
    655 	if (len(b) == 0) {
    656 		b = cc.g[..];
    657 	};
    658 	r &= point_decode(&q, b, cc);
    659 	point_mul(&p, x, cc);
    660 	point_mul(&q, y, cc);
    661 
    662 	// We want to compute P+Q. Since the base points A and B are distinct
    663 	// from infinity, and the multipliers are non-zero and lower than the
    664 	// curve order, then we know that P and Q are non-infinity. This
    665 	// leaves two special situations to test for:
    666 	// -- If P = Q then we must use point_double().
    667 	// -- If P+Q = 0 then we must report an error.
    668 	let t = point_add(&p, &q, cc);
    669 	point_double(&q, cc);
    670 	let z = iszero(p.c[2]);
    671 
    672 	// If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
    673 	// have the following:
    674 	//
    675 	//   z = 0, t = 0   return P (normal addition)
    676 	//   z = 0, t = 1   return P (normal addition)
    677 	//   z = 1, t = 0   return Q (a 'double' case)
    678 	//   z = 1, t = 1   report an error (P+Q = 0)
    679 	jccpy(z & ~t, &p, &q);
    680 	point_encode(a, &p, cc);
    681 	r &= ~(z & t);
    682 
    683 	return r;
    684 };
    685 
    686 fn p384_muladd(a: []u8, b: []u8, x: []u8, y: []u8) u32 =
    687 	prime_muladd(&p384params, a, b, x, y);
    688 
    689 const _p384: curve = curve {
    690 	pointsz = P384_POINTSZ,
    691 	order = &p384_order,
    692 	generator = &p384_generator,
    693 	mul = &p384_mul,
    694 	mulgen = &p384_mulgen,
    695 	muladd = &p384_muladd,
    696 	keygen = &mask_keygen,
    697 };
    698 
    699 // Size of a [[p384]] point in bytes.
    700 export def P384_POINTSZ = len(P384_G);
    701 
    702 // Size of a [[p384]] scalar in bytes.
    703 export def P384_SCALARSZ = len(P384_N);
    704 
    705 // A [[curve]] implementation of P-384, also known as secp384r1;
    706 export const p384: *curve = &_p384;
    707 
    708 const P521_N: [_]u8 = [
    709 	0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
    710 	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
    711 	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFA, 0x51, 0x86,
    712 	0x87, 0x83, 0xBF, 0x2F, 0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,
    713 	0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C, 0x47, 0xAE, 0xBB, 0x6F,
    714 	0xB7, 0x1E, 0x91, 0x38, 0x64, 0x09,
    715 ];
    716 
    717 fn p521_order() const []u8 = P521_N;
    718 
    719 const P521_G: [_]u8 = [
    720 	0x04, 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, 0x04, 0xE9, 0xCD, 0x9E,
    721 	0x3E, 0xCB, 0x66, 0x23, 0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05,
    722 	0x3F, 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B, 0x4D, 0x3D, 0xBA, 0xA1,
    723 	0x4B, 0x5E, 0x77, 0xEF, 0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2,
    724 	0xFF, 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, 0x6A, 0x42, 0x9B, 0xF9,
    725 	0x7E, 0x7E, 0x31, 0xC2, 0xE5, 0xBD, 0x66, 0x01, 0x18, 0x39, 0x29, 0x6A,
    726 	0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, 0x5F, 0xB4, 0x2C, 0x7D, 0x1B,
    727 	0xD9, 0x98, 0xF5, 0x44, 0x49, 0x57, 0x9B, 0x44, 0x68, 0x17, 0xAF, 0xBD,
    728 	0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE, 0x72, 0x99, 0x5E, 0xF4, 0x26,
    729 	0x40, 0xC5, 0x50, 0xB9, 0x01, 0x3F, 0xAD, 0x07, 0x61, 0x35, 0x3C, 0x70,
    730 	0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, 0x94, 0x76, 0x9F, 0xD1, 0x66,
    731 	0x50
    732 ];
    733 
    734 fn p521_generator() const []u8 = P521_G;
    735 
    736 
    737 fn p521_mul(p: []u8, x: []u8) u32 = {
    738 	return prime_mul(p, x, &p521params);
    739 };
    740 
    741 fn p521_mulgen(r: []u8, x: []u8) size = {
    742 	const g = P521_G[..];
    743 	r[..len(g)] = P521_G[..];
    744 	p521_mul(r[..len(g)], x);
    745 	return len(g);
    746 };
    747 
    748 fn p521_muladd(a: []u8, b: []u8, x: []u8, y: []u8) u32 =
    749 	prime_muladd(&p521params, a, b, x, y);
    750 
    751 const _p521: curve = curve {
    752 	pointsz = P521_POINTSZ,
    753 	order = &p521_order,
    754 	generator = &p521_generator,
    755 	mul = &p521_mul,
    756 	mulgen = &p521_mulgen,
    757 	muladd = &p521_muladd,
    758 	keygen = &mask_keygen,
    759 };
    760 
    761 // Size of a [[p521]] point in bytes.
    762 export def P521_POINTSZ = len(P521_G);
    763 
    764 // Size of a [[p521]] scalar in bytes.
    765 export def P521_SCALARSZ = len(P521_N);
    766 
    767 // A [[curve]] implementation of P-521, also known as secp521r1;
    768 export const p521: *curve = &_p521;