p256.ha (35914B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // Ported from BearSSL 5 // 6 // Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining 9 // a copy of this software and associated documentation files (the 10 // "Software"), to deal in the Software without restriction, including 11 // without limitation the rights to use, copy, modify, merge, publish, 12 // distribute, sublicense, and/or sell copies of the Software, and to 13 // permit persons to whom the Software is furnished to do so, subject to 14 // the following conditions: 15 // 16 // The above copyright notice and this permission notice shall be 17 // included in all copies or substantial portions of the Software. 18 // 19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 23 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 24 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 25 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 // SOFTWARE. 27 28 use bytes; 29 use crypto::math::*; 30 31 32 // Note from the BearSSL documentation: 33 // 34 // The ec_p256_m31 implementation supports P-256 with specialised code, 35 // including modular reduction routines that leverage the special format of the 36 // field modulus, and internal split of data as sequences of 30-bit words, which 37 // helps with carry propagation. ec_p256_m31 also includes fixed point 38 // optimisations, for the common case of multiplying the conventional generator 39 // point. These implementations are faster than the generic "i31" code, but with 40 // a larger code footprint. 41 // 42 // Convert an integer from unsigned big-endian encoding to a sequence of 30-bit 43 // words in little-endian order. The final "partial" word is returned. 44 fn be8tole30(dest: []u32, src: []u8) u32 = { 45 let acc: u32 = 0; 46 let acclen: u32 = 0; 47 let destpos = 0; 48 49 for (let i = len(src); i > 0; i -= 1) { 50 let b = src[i - 1]: u32; 51 if (acclen < 22) { 52 acc |= b << acclen; 53 acclen += 8; 54 } else { 55 dest[destpos] = (acc | (b << acclen)) & 0x3FFFFFFF; 56 destpos += 1; 57 acc = b >> (30 - acclen); 58 acclen -= 22; 59 }; 60 }; 61 return acc; 62 }; 63 64 // Convert an integer (30-bit words, little-endian) to unsigned 65 // big-endian encoding. The total encoding length is provided; all 66 // the destination bytes will be filled. 67 fn le30tobe8(dest: []u8, src: []u32) void = { 68 let acc: u32 = 0; 69 let acclen: u32 = 0; 70 let srcpos: size = 0; 71 72 for (let i = len(dest); i > 0; i -= 1) { 73 if (acclen < 8) { 74 let w = src[srcpos]; 75 srcpos += 1; 76 77 dest[i - 1] = (acc | (w << acclen)): u8; 78 acc = w >> (8 - acclen); 79 acclen += 22; 80 } else { 81 dest[i - 1] = acc: u8; 82 acc >>= 8; 83 acclen -= 8; 84 }; 85 }; 86 }; 87 88 @test fn be8tole30() void = { 89 let be8: [6]u8 = [0x11, 0x22, 0xF3, 0x44, 0x55, 0x66]; 90 let le30result: [2]u32 = [0...]; 91 let be8result: [6]u8 = [0...]; 92 93 le30result[1] = be8tole30(le30result, be8); 94 le30tobe8(be8result, le30result); 95 96 assert(bytes::equal(be8, be8result)); 97 }; 98 99 fn arsh(x: u32, n: u32) u32 = (x: i32 >> n: i32): u32; 100 fn arshw(x: u64, n: u32) u64 = (x: i64 >> n: i32): u64; 101 102 @test fn arsh() void = assert(arsh(0x80000000u32, 2) == 0xe0000000); 103 104 // Multiply two integers. Source integers are represented as arrays of 105 // nine 30-bit words, for values up to 2^270-1. Result is encoded over 106 // 18 words of 30 bits each. 107 fn mul9(d: []u32, a: []u32, b: []u32) void = { 108 // Maximum intermediate result is no more than 109 // 10376293531797946367, which fits in 64 bits. Reason: 110 // 111 // 10376293531797946367 = 9 * (2^30-1)^2 + 9663676406 112 // 10376293531797946367 < 9663676407 * 2^30 113 // 114 // Thus, adding together 9 products of 30-bit integers, with 115 // a carry of at most 9663676406, yields an integer that fits 116 // on 64 bits and generates a carry of at most 9663676406. 117 let t: [17]u64 = [0...]; 118 119 t[0] = mulu32(a[0], b[0]); 120 t[1] = mulu32(a[0], b[1]) 121 + mulu32(a[1], b[0]); 122 t[2] = mulu32(a[0], b[2]) 123 + mulu32(a[1], b[1]) 124 + mulu32(a[2], b[0]); 125 t[3] = mulu32(a[0], b[3]) 126 + mulu32(a[1], b[2]) 127 + mulu32(a[2], b[1]) 128 + mulu32(a[3], b[0]); 129 t[4] = mulu32(a[0], b[4]) 130 + mulu32(a[1], b[3]) 131 + mulu32(a[2], b[2]) 132 + mulu32(a[3], b[1]) 133 + mulu32(a[4], b[0]); 134 t[5] = mulu32(a[0], b[5]) 135 + mulu32(a[1], b[4]) 136 + mulu32(a[2], b[3]) 137 + mulu32(a[3], b[2]) 138 + mulu32(a[4], b[1]) 139 + mulu32(a[5], b[0]); 140 t[6] = mulu32(a[0], b[6]) 141 + mulu32(a[1], b[5]) 142 + mulu32(a[2], b[4]) 143 + mulu32(a[3], b[3]) 144 + mulu32(a[4], b[2]) 145 + mulu32(a[5], b[1]) 146 + mulu32(a[6], b[0]); 147 t[7] = mulu32(a[0], b[7]) 148 + mulu32(a[1], b[6]) 149 + mulu32(a[2], b[5]) 150 + mulu32(a[3], b[4]) 151 + mulu32(a[4], b[3]) 152 + mulu32(a[5], b[2]) 153 + mulu32(a[6], b[1]) 154 + mulu32(a[7], b[0]); 155 t[8] = mulu32(a[0], b[8]) 156 + mulu32(a[1], b[7]) 157 + mulu32(a[2], b[6]) 158 + mulu32(a[3], b[5]) 159 + mulu32(a[4], b[4]) 160 + mulu32(a[5], b[3]) 161 + mulu32(a[6], b[2]) 162 + mulu32(a[7], b[1]) 163 + mulu32(a[8], b[0]); 164 t[9] = mulu32(a[1], b[8]) 165 + mulu32(a[2], b[7]) 166 + mulu32(a[3], b[6]) 167 + mulu32(a[4], b[5]) 168 + mulu32(a[5], b[4]) 169 + mulu32(a[6], b[3]) 170 + mulu32(a[7], b[2]) 171 + mulu32(a[8], b[1]); 172 t[10] = mulu32(a[2], b[8]) 173 + mulu32(a[3], b[7]) 174 + mulu32(a[4], b[6]) 175 + mulu32(a[5], b[5]) 176 + mulu32(a[6], b[4]) 177 + mulu32(a[7], b[3]) 178 + mulu32(a[8], b[2]); 179 t[11] = mulu32(a[3], b[8]) 180 + mulu32(a[4], b[7]) 181 + mulu32(a[5], b[6]) 182 + mulu32(a[6], b[5]) 183 + mulu32(a[7], b[4]) 184 + mulu32(a[8], b[3]); 185 t[12] = mulu32(a[4], b[8]) 186 + mulu32(a[5], b[7]) 187 + mulu32(a[6], b[6]) 188 + mulu32(a[7], b[5]) 189 + mulu32(a[8], b[4]); 190 t[13] = mulu32(a[5], b[8]) 191 + mulu32(a[6], b[7]) 192 + mulu32(a[7], b[6]) 193 + mulu32(a[8], b[5]); 194 t[14] = mulu32(a[6], b[8]) 195 + mulu32(a[7], b[7]) 196 + mulu32(a[8], b[6]); 197 t[15] = mulu32(a[7], b[8]) 198 + mulu32(a[8], b[7]); 199 t[16] = mulu32(a[8], b[8]); 200 201 // Propagate carries. 202 let cc: u64 = 0; 203 for (let i = 0z; i < 17; i += 1) { 204 let w = t[i] + cc; 205 d[i] = w: u32 & 0x3FFFFFFF; 206 cc = w >> 30; 207 }; 208 d[17] = cc: u32; 209 }; 210 211 // Square a 270-bit integer, represented as an array of nine 30-bit words. 212 // Result uses 18 words of 30 bits each. 213 fn square9(d: []u32, a: []u32) void = { 214 let t: [17]u64 = [0...]; 215 216 t[0] = mulu32(a[0], a[0]); 217 t[1] = ((mulu32(a[0], a[1])) << 1); 218 t[2] = mulu32(a[1], a[1]) 219 + ((mulu32(a[0], a[2])) << 1); 220 t[3] = ((mulu32(a[0], a[3]) 221 + mulu32(a[1], a[2])) << 1); 222 t[4] = mulu32(a[2], a[2]) 223 + ((mulu32(a[0], a[4]) 224 + mulu32(a[1], a[3])) << 1); 225 t[5] = ((mulu32(a[0], a[5]) 226 + mulu32(a[1], a[4]) 227 + mulu32(a[2], a[3])) << 1); 228 t[6] = mulu32(a[3], a[3]) 229 + ((mulu32(a[0], a[6]) 230 + mulu32(a[1], a[5]) 231 + mulu32(a[2], a[4])) << 1); 232 t[7] = ((mulu32(a[0], a[7]) 233 + mulu32(a[1], a[6]) 234 + mulu32(a[2], a[5]) 235 + mulu32(a[3], a[4])) << 1); 236 t[8] = mulu32(a[4], a[4]) 237 + ((mulu32(a[0], a[8]) 238 + mulu32(a[1], a[7]) 239 + mulu32(a[2], a[6]) 240 + mulu32(a[3], a[5])) << 1); 241 t[9] = ((mulu32(a[1], a[8]) 242 + mulu32(a[2], a[7]) 243 + mulu32(a[3], a[6]) 244 + mulu32(a[4], a[5])) << 1); 245 t[10] = mulu32(a[5], a[5]) 246 + ((mulu32(a[2], a[8]) 247 + mulu32(a[3], a[7]) 248 + mulu32(a[4], a[6])) << 1); 249 t[11] = ((mulu32(a[3], a[8]) 250 + mulu32(a[4], a[7]) 251 + mulu32(a[5], a[6])) << 1); 252 t[12] = mulu32(a[6], a[6]) 253 + ((mulu32(a[4], a[8]) 254 + mulu32(a[5], a[7])) << 1); 255 t[13] = ((mulu32(a[5], a[8]) 256 + mulu32(a[6], a[7])) << 1); 257 t[14] = mulu32(a[7], a[7]) 258 + ((mulu32(a[6], a[8])) << 1); 259 t[15] = ((mulu32(a[7], a[8])) << 1); 260 t[16] = mulu32(a[8], a[8]); 261 262 // Propagate carries. 263 let cc: u64 = 0; 264 for (let i = 0z; i < 17; i += 1) { 265 let w = t[i] + cc; 266 d[i] = w: u32 & 0x3FFFFFFF; 267 cc = w >> 30; 268 }; 269 d[17] = cc: u32; 270 }; 271 272 // Base field modulus for P-256. 273 const F256: [_]u32 = [ 274 0x3FFFFFFF, 0x3FFFFFFF, 0x3FFFFFFF, 0x0000003F, 0x00000000, 0x00000000, 275 0x00001000, 0x3FFFC000, 0x0000FFFF 276 ]; 277 278 // The 'b' curve equation coefficient for P-256. 279 const P256_B: [_]u32 = [ 280 0x27D2604B, 0x2F38F0F8, 0x053B0F63, 0x0741AC33, 0x1886BC65, 0x2EF555DA, 281 0x293E7B3E, 0x0D762A8E, 0x00005AC6 282 ]; 283 284 // Addition in the field. Source operands shall fit on 257 bits; output 285 // will be lower than twice the modulus. 286 fn add_f256(d: []u32, a: []u32, b: []u32) void = { 287 let w: u32 = 0; 288 289 let cc: u32 = 0; 290 for (let i = 0z; i < 9; i += 1) { 291 w = a[i] + b[i] + cc; 292 d[i] = w & 0x3FFFFFFF; 293 cc = w >> 30; 294 }; 295 w >>= 16; 296 d[8] &= 0xFFFF; 297 d[3] -= w << 6; 298 d[6] -= w << 12; 299 d[7] += w << 14; 300 cc = w; 301 for (let i = 0z; i < 9; i += 1) { 302 w = d[i] + cc; 303 d[i] = w & 0x3FFFFFFF; 304 cc = arsh(w, 30); 305 }; 306 }; 307 308 // Subtraction in the field. Source operands shall be smaller than twice 309 // the modulus; the result will fulfil the same property. 310 fn sub_f256(d: []u32, a: []u32, b: []u32) void = { 311 let w: u32 = 0; 312 let cc: u32 = 0; 313 314 // We really compute a - b + 2*p to make sure that the result is 315 // positive. 316 w = a[0] - b[0] - 0x00002; 317 d[0] = w & 0x3FFFFFFF; 318 w = a[1] - b[1] + arsh(w, 30); 319 d[1] = w & 0x3FFFFFFF; 320 w = a[2] - b[2] + arsh(w, 30); 321 d[2] = w & 0x3FFFFFFF; 322 w = a[3] - b[3] + arsh(w, 30) + 0x00080; 323 d[3] = w & 0x3FFFFFFF; 324 w = a[4] - b[4] + arsh(w, 30); 325 d[4] = w & 0x3FFFFFFF; 326 w = a[5] - b[5] + arsh(w, 30); 327 d[5] = w & 0x3FFFFFFF; 328 w = a[6] - b[6] + arsh(w, 30) + 0x02000; 329 d[6] = w & 0x3FFFFFFF; 330 w = a[7] - b[7] + arsh(w, 30) - 0x08000; 331 d[7] = w & 0x3FFFFFFF; 332 w = a[8] - b[8] + arsh(w, 30) + 0x20000; 333 d[8] = w & 0xFFFF; 334 w >>= 16; 335 d[8] &= 0xFFFF; 336 d[3] -= w << 6; 337 d[6] -= w << 12; 338 d[7] += w << 14; 339 cc = w; 340 for (let i = 0z; i < 9; i += 1) { 341 w = d[i] + cc; 342 d[i] = w & 0x3FFFFFFF; 343 cc = arsh(w, 30); 344 }; 345 }; 346 347 // Compute a multiplication in F256. Source operands shall be less than 348 // twice the modulus. 349 fn mul_f256(d: []u32, a: []u32, b: []u32) void = { 350 let t: [18]u32 = [0...]; 351 let s: [18]u64 = [0...]; 352 let x: u64 = 0; 353 354 mul9(t, a, b); 355 356 // Modular reduction: each high word in added/subtracted where 357 // necessary. 358 // 359 // The modulus is: 360 // p = 2^256 - 2^224 + 2^192 + 2^96 - 1 361 // Therefore: 362 // 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p 363 // 364 // For a word x at bit offset n (n >= 256), we have: 365 // x*2^n = x*2^(n-32) - x*2^(n-64) 366 // - x*2^(n - 160) + x*2^(n-256) mod p 367 // 368 // Thus, we can nullify the high word if we reinject it at some 369 // proper emplacements. 370 // 371 // We use 64-bit intermediate words to allow for carries to 372 // accumulate easily, before performing the final propagation. 373 for (let i = 0; i < 18; i += 1) { 374 s[i] = t[i]; 375 }; 376 377 for (let i = 17; i >= 9; i -= 1) { 378 let y = s[i]; 379 s[i - 1] += arshw(y, 2); 380 s[i - 2] += (y << 28) & 0x3FFFFFFF; 381 s[i - 2] -= arshw(y, 4); 382 s[i - 3] -= (y << 26) & 0x3FFFFFFF; 383 s[i - 5] -= arshw(y, 10); 384 s[i - 6] -= (y << 20) & 0x3FFFFFFF; 385 s[i - 8] += arshw(y, 16); 386 s[i - 9] += (y << 14) & 0x3FFFFFFF; 387 }; 388 389 // Carry propagation must be signed. Moreover, we may have overdone 390 // it a bit, and obtain a negative result. 391 // 392 // The loop above ran 9 times; each time, each word was augmented 393 // by at most one extra word (in absolute value). Thus, the top 394 // word must in fine fit in 39 bits, so the carry below will fit 395 // on 9 bits. 396 let cc: u64 = 0; 397 for (let i = 0z; i < 9; i += 1) { 398 x = s[i] + cc; 399 d[i] = x: u32 & 0x3FFFFFFF; 400 cc = arshw(x, 30); 401 }; 402 403 // All nine words fit on 30 bits, but there may be an extra 404 // carry for a few bits (at most 9), and that carry may be 405 // negative. Moreover, we want the result to fit on 257 bits. 406 // The two lines below ensure that the word in d[] has length 407 // 256 bits, and the (signed) carry (beyond 2^256) is in cc. The 408 // significant length of cc is less than 24 bits, so we will be 409 // able to switch to 32-bit operations. 410 cc = arshw(x, 16); 411 d[8] &= 0xFFFF; 412 413 // One extra round of reduction, for cc*2^256, which means 414 // adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) 415 // value. If cc is negative, then it may happen (rarely, but 416 // not neglectibly so) that the result would be negative. In 417 // order to avoid that, if cc is negative, then we add the 418 // modulus once. Note that if cc is negative, then propagating 419 // that carry must yield a value lower than the modulus, so 420 // adding the modulus once will keep the final result under 421 // twice the modulus. 422 let z = cc: u32; 423 d[3] -= z << 6; 424 d[6] -= (z << 12) & 0x3FFFFFFF; 425 d[7] -= arsh(z, 18); 426 d[7] += (z << 14) & 0x3FFFFFFF; 427 d[8] += arsh(z, 16); 428 let c = z >> 31; 429 d[0] -= c; 430 d[3] += c << 6; 431 d[6] += c << 12; 432 d[7] -= c << 14; 433 d[8] += c << 16; 434 435 for (let i = 0z; i < 9; i += 1) { 436 let w = d[i] + z; 437 d[i] = w & 0x3FFFFFFF; 438 z = arsh(w, 30); 439 }; 440 }; 441 442 443 // Compute a square in F256. Source operand shall be less than 444 // twice the modulus. 445 fn square_f256(d: []u32, a: []u32) void = { 446 let t: [18]u32 = [0...]; 447 let s: [18]u64 = [0...]; 448 449 square9(t, a); 450 451 // Modular reduction: each high word in added/subtracted where 452 // necessary. 453 // 454 // The modulus is: 455 // p = 2^256 - 2^224 + 2^192 + 2^96 - 1 456 // Therefore: 457 // 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p 458 // 459 // For a word x at bit offset n (n >= 256), we have: 460 // x*2^n = x*2^(n-32) - x*2^(n-64) 461 // - x*2^(n - 160) + x*2^(n-256) mod p 462 // 463 // Thus, we can nullify the high word if we reinject it at some 464 // proper emplacements. 465 // 466 // We use 64-bit intermediate words to allow for carries to 467 // accumulate easily, before performing the final propagation. 468 for (let i = 0; i < 18; i += 1) { 469 s[i] = t[i]; 470 }; 471 472 for (let i = 17; i >= 9; i -= 1) { 473 let y = s[i]; 474 s[i - 1] += arshw(y, 2); 475 s[i - 2] += (y << 28) & 0x3FFFFFFF; 476 s[i - 2] -= arshw(y, 4); 477 s[i - 3] -= (y << 26) & 0x3FFFFFFF; 478 s[i - 5] -= arshw(y, 10); 479 s[i - 6] -= (y << 20) & 0x3FFFFFFF; 480 s[i - 8] += arshw(y, 16); 481 s[i - 9] += (y << 14) & 0x3FFFFFFF; 482 }; 483 484 // Carry propagation must be signed. Moreover, we may have overdone 485 // it a bit, and obtain a negative result. 486 // 487 // The loop above ran 9 times; each time, each word was augmented 488 // by at most one extra word (in absolute value). Thus, the top 489 // word must in fine fit in 39 bits, so the carry below will fit 490 // on 9 bits. 491 let cc: u64 = 0; 492 let x: u64 = 0; 493 for (let i = 0; i < 9; i += 1) { 494 x = s[i] + cc; 495 d[i] = x: u32 & 0x3FFFFFFF; 496 cc = arshw(x, 30); 497 }; 498 499 // All nine words fit on 30 bits, but there may be an extra 500 // carry for a few bits (at most 9), and that carry may be 501 // negative. Moreover, we want the result to fit on 257 bits. 502 // The two lines below ensure that the word in d[] has length 503 // 256 bits, and the (signed) carry (beyond 2^256) is in cc. The 504 // significant length of cc is less than 24 bits, so we will be 505 // able to switch to 32-bit operations. 506 cc = arshw(x, 16); 507 d[8] &= 0xFFFF; 508 509 // One extra round of reduction, for cc*2^256, which means 510 // adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) 511 // value. If cc is negative, then it may happen (rarely, but 512 // not neglectibly so) that the result would be negative. In 513 // order to avoid that, if cc is negative, then we add the 514 // modulus once. Note that if cc is negative, then propagating 515 // that carry must yield a value lower than the modulus, so 516 // adding the modulus once will keep the final result under 517 // twice the modulus. 518 let z = cc: u32; 519 d[3] -= z << 6; 520 d[6] -= (z << 12) & 0x3FFFFFFF; 521 d[7] -= arsh(z, 18); 522 d[7] += (z << 14) & 0x3FFFFFFF; 523 d[8] += arsh(z, 16); 524 let c = z >> 31; 525 d[0] -= c; 526 d[3] += c << 6; 527 d[6] += c << 12; 528 d[7] -= c << 14; 529 d[8] += c << 16; 530 531 for (let i = 0z; i < 9; i += 1) { 532 let w = d[i] + z; 533 d[i] = w & 0x3FFFFFFF; 534 z = arsh(w, 30); 535 }; 536 }; 537 538 // Perform a "final reduction" in field F256 (field for curve P-256). 539 // The source value must be less than twice the modulus. If the value 540 // is not lower than the modulus, then the modulus is subtracted and 541 // this function returns 1; otherwise, it leaves it untouched and it 542 // returns 0. 543 fn reduce_final_f256(d: []u32) u32 = { 544 let t: [9]u32 = [0...]; 545 546 let cc: u32 = 0; 547 for (let i = 0; i < 9; i += 1) { 548 let w = d[i] - F256[i] - cc; 549 cc = w >> 31; 550 t[i] = w & 0x3FFFFFFF; 551 }; 552 cc ^= 1; 553 ccopyu32(cc, d, t); 554 return cc; 555 }; 556 557 // Jacobian coordinates for a point in P-256: affine coordinates (X,Y) 558 // are such that: 559 // X = x / z^2 560 // Y = y / z^3 561 // For the point at infinity, z = 0. 562 // Each point thus admits many possible representations. 563 // 564 // Coordinates are represented in arrays of 32-bit integers, each holding 565 // 30 bits of data. Values may also be slightly greater than the modulus, 566 // but they will always be lower than twice the modulus. 567 type p256_jacobian = struct { 568 x: [9]u32, 569 y: [9]u32, 570 z: [9]u32, 571 }; 572 573 // Convert a point to affine coordinates: 574 // - If the point is the point at infinity, then all three coordinates 575 // are set to 0. 576 // - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y' 577 // coordinates are the 'X' and 'Y' affine coordinates. 578 // The coordinates are guaranteed to be lower than the modulus. 579 fn p256_to_affine(p: *p256_jacobian) void = { 580 let t1: [9]u32 = [0...]; 581 let t2: [9]u32 = [0...]; 582 583 // Invert z with a modular exponentiation: the modulus is 584 // p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is 585 // p-2. Exponent bit pattern (from high to low) is: 586 // - 32 bits of value 1 587 // - 31 bits of value 0 588 // - 1 bit of value 1 589 // - 96 bits of value 0 590 // - 94 bits of value 1 591 // - 1 bit of value 0 592 // - 1 bit of value 1 593 // Thus, we precompute z^(2^31-1) to speed things up. 594 // 595 // If z = 0 (point at infinity) then the modular exponentiation 596 // will yield 0, which leads to the expected result (all three 597 // coordinates set to 0). 598 599 // A simple square-and-multiply for z^(2^31-1). We could save about 600 // two dozen multiplications here with an addition chain, but 601 // this would require a bit more code, and extra stack buffers. 602 t1[..] = p.z[..]; 603 for (let i = 0; i < 30; i += 1) { 604 square_f256(t1, t1); 605 mul_f256(t1, t1, p.z); 606 }; 607 608 // Square-and-multiply. Apart from the squarings, we have a few 609 // multiplications to set bits to 1; we multiply by the original z 610 // for setting 1 bit, and by t1 for setting 31 bits. 611 t2[..] = p.z[..]; 612 for (let i = 1; i < 256; i += 1) { 613 square_f256(t2, t2); 614 switch (i) { 615 case 31, 190, 221, 252 => 616 mul_f256(t2, t2, t1); 617 case 63, 253, 255 => 618 mul_f256(t2, t2, p.z); 619 case => void; 620 }; 621 }; 622 623 // Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3. 624 mul_f256(t1, t2, t2); 625 mul_f256(p.x, t1, p.x); 626 mul_f256(t1, t1, t2); 627 mul_f256(p.y, t1, p.y); 628 reduce_final_f256(p.x); 629 reduce_final_f256(p.y); 630 631 // Multiply z by 1/z. If z = 0, then this will yield 0, otherwise 632 // this will set z to 1. 633 mul_f256(p.z, p.z, t2); 634 reduce_final_f256(p.z); 635 }; 636 637 // Double a point in P-256. This function works for all valid points, 638 // including the point at infinity. 639 fn p256_double(q: *p256_jacobian) void = { 640 // Doubling formulas are: 641 // 642 // s = 4*x*y^2 643 // m = 3*(x + z^2)*(x - z^2) 644 // x' = m^2 - 2*s 645 // y' = m*(s - x') - 8*y^4 646 // z' = 2*y*z 647 // 648 // These formulas work for all points, including points of order 2 649 // and points at infinity: 650 // - If y = 0 then z' = 0. But there is no such point in P-256 651 // anyway. 652 // - If z = 0 then z' = 0. 653 let t1: [9]u32 = [0...]; 654 let t2: [9]u32 = [0...]; 655 let t3: [9]u32 = [0...]; 656 let t4: [9]u32 = [0...]; 657 658 // Compute z^2 in t1. 659 square_f256(t1, q.z); 660 661 // Compute x-z^2 in t2 and x+z^2 in t1. 662 add_f256(t2, q.x, t1); 663 sub_f256(t1, q.x, t1); 664 665 // Compute 3*(x+z^2)*(x-z^2) in t1. 666 mul_f256(t3, t1, t2); 667 add_f256(t1, t3, t3); 668 add_f256(t1, t3, t1); 669 670 // Compute 4*x*y^2 (in t2) and 2*y^2 (in t3). 671 square_f256(t3, q.y); 672 add_f256(t3, t3, t3); 673 mul_f256(t2, q.x, t3); 674 add_f256(t2, t2, t2); 675 676 677 // Compute x' = m^2 - 2*s. 678 square_f256(q.x, t1); 679 sub_f256(q.x, q.x, t2); 680 sub_f256(q.x, q.x, t2); 681 682 // Compute z' = 2*y*z. 683 mul_f256(t4, q.y, q.z); 684 add_f256(q.z, t4, t4); 685 686 // Compute y' = m*(s - x') - 8*y^4. Note that we already have 687 // 2*y^2 in t3. 688 sub_f256(t2, t2, q.x); 689 mul_f256(q.y, t1, t2); 690 square_f256(t4, t3); 691 add_f256(t4, t4, t4); 692 sub_f256(q.y, q.y, t4); 693 }; 694 695 // Add point P2 to point P1. 696 // 697 // This function computes the wrong result in the following cases: 698 // 699 // - If P1 == 0 but P2 != 0 700 // - If P1 != 0 but P2 == 0 701 // - If P1 == P2 702 // 703 // In all three cases, P1 is set to the point at infinity. 704 // 705 // Returned value is 0 if one of the following occurs: 706 // 707 // - P1 and P2 have the same Y coordinate 708 // - P1 == 0 and P2 == 0 709 // - The Y coordinate of one of the points is 0 and the other point is 710 // the point at infinity. 711 // 712 // The third case cannot actually happen with valid points, since a point 713 // with Y == 0 is a point of order 2, and there is no point of order 2 on 714 // curve P-256. 715 // 716 // Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller 717 // can apply the following: 718 // 719 // - If the result is not the point at infinity, then it is correct. 720 // - Otherwise, if the returned value is 1, then this is a case of 721 // P1+P2 == 0, so the result is indeed the point at infinity. 722 // - Otherwise, P1 == P2, so a "double" operation should have been 723 // performed. 724 fn p256_add(p1: *p256_jacobian, p2: *p256_jacobian) u32 = { 725 // Addtions formulas are: 726 // 727 // u1 = x1 * z2^2 728 // u2 = x2 * z1^2 729 // s1 = y1 * z2^3 730 // s2 = y2 * z1^3 731 // h = u2 - u1 732 // r = s2 - s1 733 // x3 = r^2 - h^3 - 2 * u1 * h^2 734 // y3 = r * (u1 * h^2 - x3) - s1 * h^3 735 // z3 = h * z1 * z2 736 let t1: [9]u32 = [0...]; 737 let t2: [9]u32 = [0...]; 738 let t3: [9]u32 = [0...]; 739 let t4: [9]u32 = [0...]; 740 let t5: [9]u32 = [0...]; 741 let t6: [9]u32 = [0...]; 742 let t7: [9]u32 = [0...]; 743 let ret: u32 = 0; 744 745 // Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). 746 square_f256(t3, p2.z); 747 mul_f256(t1, p1.x, t3); 748 mul_f256(t4, p2.z, t3); 749 mul_f256(t3, p1.y, t4); 750 751 // Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). 752 square_f256(t4, p1.z); 753 mul_f256(t2, p2.x, t4); 754 mul_f256(t5, p1.z, t4); 755 mul_f256(t4, p2.y, t5); 756 757 // Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). 758 // We need to test whether r is zero, so we will do some extra 759 // reduce. 760 sub_f256(t2, t2, t1); 761 sub_f256(t4, t4, t3); 762 reduce_final_f256(t4); 763 ret = 0; 764 for (let i = 0; i < 9; i += 1) { 765 ret |= t4[i]; 766 }; 767 ret = (ret | -ret) >> 31; 768 769 // Compute u1*h^2 (in t6) and h^3 (in t5); 770 square_f256(t7, t2); 771 mul_f256(t6, t1, t7); 772 mul_f256(t5, t7, t2); 773 774 // Compute x3 = r^2 - h^3 - 2*u1*h^2. 775 square_f256(p1.x, t4); 776 sub_f256(p1.x, p1.x, t5); 777 sub_f256(p1.x, p1.x, t6); 778 sub_f256(p1.x, p1.x, t6); 779 780 // Compute y3 = r*(u1*h^2 - x3) - s1*h^3. 781 sub_f256(t6, t6, p1.x); 782 mul_f256(p1.y, t4, t6); 783 mul_f256(t1, t5, t3); 784 sub_f256(p1.y, p1.y, t1); 785 786 // Compute z3 = h*z1*z2. 787 mul_f256(t1, p1.z, p2.z); 788 mul_f256(p1.z, t1, t2); 789 790 return ret; 791 }; 792 793 // Add point P2 to point P1. This is a specialised function for the 794 // case when P2 is a non-zero point in affine coordinate. 795 // 796 // This function computes the wrong result in the following cases: 797 // 798 // - If P1 == 0 799 // - If P1 == P2 800 // 801 // In both cases, P1 is set to the point at infinity. 802 // 803 // Returned value is 0 if one of the following occurs: 804 // 805 // - P1 and P2 have the same Y coordinate 806 // - The Y coordinate of P2 is 0 and P1 is the point at infinity. 807 // 808 // The second case cannot actually happen with valid points, since a point 809 // with Y == 0 is a point of order 2, and there is no point of order 2 on 810 // curve P-256. 811 // 812 // Therefore, assuming that P1 != 0 on input, then the caller 813 // can apply the following: 814 // 815 // - If the result is not the point at infinity, then it is correct. 816 // - Otherwise, if the returned value is 1, then this is a case of 817 // P1+P2 == 0, so the result is indeed the point at infinity. 818 // - Otherwise, P1 == P2, so a "double" operation should have been 819 // performed. 820 fn p256_add_mixed(p1: *p256_jacobian, p2: *p256_jacobian) u32 = { 821 // Addtions formulas are: 822 // 823 // u1 = x1 824 // u2 = x2 * z1^2 825 // s1 = y1 826 // s2 = y2 * z1^3 827 // h = u2 - u1 828 // r = s2 - s1 829 // x3 = r^2 - h^3 - 2 * u1 * h^2 830 // y3 = r * (u1 * h^2 - x3) - s1 * h^3 831 // z3 = h * z1 832 let t1: [9]u32 = [0...]; 833 let t2: [9]u32 = [0...]; 834 let t3: [9]u32 = [0...]; 835 let t4: [9]u32 = [0...]; 836 let t5: [9]u32 = [0...]; 837 let t6: [9]u32 = [0...]; 838 let t7: [9]u32 = [0...]; 839 let ret: u32 = 0; 840 841 // Compute u1 = x1 (in t1) and s1 = y1 (in t3). 842 t1[..] = p1.x[..]; 843 t3[..] = p1.y[..]; 844 845 // Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). 846 square_f256(t4, p1.z); 847 mul_f256(t2, p2.x, t4); 848 mul_f256(t5, p1.z, t4); 849 mul_f256(t4, p2.y, t5); 850 851 // Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). 852 // We need to test whether r is zero, so we will do some extra 853 // reduce. 854 sub_f256(t2, t2, t1); 855 sub_f256(t4, t4, t3); 856 reduce_final_f256(t4); 857 ret = 0; 858 for (let i = 0; i < 9; i += 1) { 859 ret |= t4[i]; 860 }; 861 ret = (ret | -ret) >> 31; 862 863 // Compute u1*h^2 (in t6) and h^3 (in t5); 864 square_f256(t7, t2); 865 mul_f256(t6, t1, t7); 866 mul_f256(t5, t7, t2); 867 868 // Compute x3 = r^2 - h^3 - 2*u1*h^2. 869 square_f256(p1.x, t4); 870 sub_f256(p1.x, p1.x, t5); 871 sub_f256(p1.x, p1.x, t6); 872 sub_f256(p1.x, p1.x, t6); 873 874 // Compute y3 = r*(u1*h^2 - x3) - s1*h^3. 875 sub_f256(t6, t6, p1.x); 876 mul_f256(p1.y, t4, t6); 877 mul_f256(t1, t5, t3); 878 sub_f256(p1.y, p1.y, t1); 879 880 // Compute z3 = h*z1*z2. 881 mul_f256(p1.z, p1.z, t2); 882 883 return ret; 884 }; 885 886 // Decode a P-256 point. This function does not support the point at 887 // infinity. Returned value is 0 if the point is invalid, 1 otherwise. 888 fn p256_decode(p: *p256_jacobian, src: const []u8) u32 = { 889 let tx: [9]u32 = [0...]; 890 let ty: [9]u32 = [0...]; 891 let t1: [9]u32 = [0...]; 892 let t2: [9]u32 = [0...]; 893 let bad: u32 = 0; 894 895 if (len(src) != 65) { 896 return 0; 897 }; 898 let buf = src; 899 900 // First byte must be 0x04 (uncompressed format). We could support 901 // "hybrid format" (first byte is 0x06 or 0x07, and encodes the 902 // least significant bit of the Y coordinate), but it is explicitly 903 // forbidden by RFC 5480 (section 2.2). 904 bad = nequ32(buf[0], 0x04); 905 906 // Decode the coordinates, and check that they are both lower 907 // than the modulus. 908 tx[8] = be8tole30(tx, buf[1..33]); 909 ty[8] = be8tole30(ty, buf[33..]); 910 bad |= reduce_final_f256(tx); 911 bad |= reduce_final_f256(ty); 912 913 // Check curve equation. 914 square_f256(t1, tx); 915 mul_f256(t1, tx, t1); 916 square_f256(t2, ty); 917 sub_f256(t1, t1, tx); 918 sub_f256(t1, t1, tx); 919 sub_f256(t1, t1, tx); 920 add_f256(t1, t1, P256_B); 921 sub_f256(t1, t1, t2); 922 reduce_final_f256(t1); 923 for (let i = 0; i < 9; i += 1) { 924 bad |= t1[i]; 925 }; 926 927 // Copy coordinates to the point structure. 928 p.x[..] = tx[..]; 929 p.y[..] = ty[..]; 930 p.z[..] = [0...]; 931 p.z[0] = 1; 932 return equ32(bad, 0); 933 }; 934 935 // Encode a point into a buffer. This function assumes that the point is 936 // valid, in affine coordinates, and not the point at infinity. 937 fn p256_encode(dest: []u8, p: *p256_jacobian) void = { 938 dest[0] = 0x04; 939 le30tobe8(dest[1..33], p.x); 940 le30tobe8(dest[33..], p.y); 941 }; 942 943 // Multiply a curve point by an integer. The integer is assumed to be 944 // lower than the curve order, and the base point must not be the point 945 // at infinity. 946 fn p256_mul(p: *p256_jacobian, x: []u8) void = { 947 // qz is a flag that is initially 1, and remains equal to 1 948 // as long as the point is the point at infinity. 949 // 950 // We use a 2-bit window to handle multiplier bits by pairs. 951 // The precomputed window really is the points P2 and P3. 952 let qz: u32 = 1; 953 let p2 = p256_jacobian { ... }; 954 let p3 = p256_jacobian { ... }; 955 let q = p256_jacobian { ... }; 956 let t = p256_jacobian { ... }; 957 let u = p256_jacobian { ... }; 958 let xpos: size = 0; 959 960 // Compute window values. 961 p2 = *p; 962 p256_double(&p2); 963 p3 = *p; 964 p256_add(&p3, &p2); 965 966 // We start with Q = 0. We process multiplier bits 2 by 2. 967 for (let i = len(x); i > 0; i -= 1) { 968 for (let k = 6i8; k >= 0; k -= 2) { 969 let bits: u32 = 0; 970 let bnz: u32 = 0; 971 972 p256_double(&q); 973 p256_double(&q); 974 t = *p; 975 u = q; 976 bits = (x[xpos] >> k: u8) & 3u32; 977 bnz = nequ32(bits, 0); 978 jaccopy(equ32(bits, 2), &t, &p2); 979 jaccopy(equ32(bits, 3), &t, &p3); 980 p256_add(&u, &t); 981 jaccopy(bnz & qz, &q, &t); 982 jaccopy(bnz & ~qz, &q, &u); 983 qz &= ~bnz; 984 }; 985 xpos += 1; 986 }; 987 *p = q; 988 }; 989 990 fn jaccopy(ctl: u32, dest: *p256_jacobian, src: *p256_jacobian) void = { 991 ccopyu32(ctl, dest.x, src.x); 992 ccopyu32(ctl, dest.y, src.y); 993 ccopyu32(ctl, dest.z, src.z); 994 }; 995 996 // Precomputed window: k*G points, where G is the curve generator, and k 997 // is an integer from 1 to 15 (inclusive). The X and Y coordinates of 998 // the point are encoded as 9 words of 30 bits each (little-endian 999 // order). 1000 const gwin: [15][18]u32 = [ 1001 [ 1002 0x1898c296, 0x1284e517, 0x1eb33a0f, 0x00df604b, 0x2440f277, 1003 0x339b958e, 0x04247f8b, 0x347cb84b, 0x00006b17, 0x37bf51f5, 1004 0x2ed901a0, 0x3315ecec, 0x338cd5da, 0x0f9e162b, 0x1fad29f0, 1005 0x27f9b8ee, 0x10b8bf86, 0x00004fe3, 1006 ], 1007 [ 1008 0x07669978, 0x182d23f1, 0x3f21b35a, 0x225a789d, 0x351ac3c0, 1009 0x08e00c12, 0x34f7e8a5, 0x1ec62340, 0x00007cf2, 0x227873d1, 1010 0x3812de74, 0x0e982299, 0x1f6b798f, 0x3430dbba, 0x366b1a7d, 1011 0x2d040293, 0x154436e3, 0x00000777, 1012 ], 1013 [ 1014 0x06e7fd6c, 0x2d05986f, 0x3ada985f, 0x31adc87b, 0x0bf165e6, 1015 0x1fbe5475, 0x30a44c8f, 0x3934698c, 0x00005ecb, 0x227d5032, 1016 0x29e6c49e, 0x04fb83d9, 0x0aac0d8e, 0x24a2ecd8, 0x2c1b3869, 1017 0x0ff7e374, 0x19031266, 0x00008734, 1018 ], 1019 [ 1020 0x2b030852, 0x024c0911, 0x05596ef5, 0x07f8b6de, 0x262bd003, 1021 0x3779967b, 0x08fbba02, 0x128d4cb4, 0x0000e253, 0x184ed8c6, 1022 0x310b08fc, 0x30ee0055, 0x3f25b0fc, 0x062d764e, 0x3fb97f6a, 1023 0x33cc719d, 0x15d69318, 0x0000e0f1, 1024 ], 1025 [ 1026 0x03d033ed, 0x05552837, 0x35be5242, 0x2320bf47, 0x268fdfef, 1027 0x13215821, 0x140d2d78, 0x02de9454, 0x00005159, 0x3da16da4, 1028 0x0742ed13, 0x0d80888d, 0x004bc035, 0x0a79260d, 0x06fcdafe, 1029 0x2727d8ae, 0x1f6a2412, 0x0000e0c1, 1030 ], 1031 [ 1032 0x3c2291a9, 0x1ac2aba4, 0x3b215b4c, 0x131d037a, 0x17dde302, 1033 0x0c90b2e2, 0x0602c92d, 0x05ca9da9, 0x0000b01a, 0x0fc77fe2, 1034 0x35f1214e, 0x07e16bdf, 0x003ddc07, 0x2703791c, 0x3038b7ee, 1035 0x3dad56fe, 0x041d0c8d, 0x0000e85c, 1036 ], 1037 [ 1038 0x3187b2a3, 0x0018a1c0, 0x00fef5b3, 0x3e7e2e2a, 0x01fb607e, 1039 0x2cc199f0, 0x37b4625b, 0x0edbe82f, 0x00008e53, 0x01f400b4, 1040 0x15786a1b, 0x3041b21c, 0x31cd8cf2, 0x35900053, 0x1a7e0e9b, 1041 0x318366d0, 0x076f780c, 0x000073eb, 1042 ], 1043 [ 1044 0x1b6fb393, 0x13767707, 0x3ce97dbb, 0x348e2603, 0x354cadc1, 1045 0x09d0b4ea, 0x1b053404, 0x1de76fba, 0x000062d9, 0x0f09957e, 1046 0x295029a8, 0x3e76a78d, 0x3b547dae, 0x27cee0a2, 0x0575dc45, 1047 0x1d8244ff, 0x332f647a, 0x0000ad5a, 1048 ], 1049 [ 1050 0x10949ee0, 0x1e7a292e, 0x06df8b3d, 0x02b2e30b, 0x31f8729e, 1051 0x24e35475, 0x30b71878, 0x35edbfb7, 0x0000ea68, 0x0dd048fa, 1052 0x21688929, 0x0de823fe, 0x1c53faa9, 0x0ea0c84d, 0x052a592a, 1053 0x1fce7870, 0x11325cb2, 0x00002a27, 1054 ], 1055 [ 1056 0x04c5723f, 0x30d81a50, 0x048306e4, 0x329b11c7, 0x223fb545, 1057 0x085347a8, 0x2993e591, 0x1b5aca8e, 0x0000cef6, 0x04af0773, 1058 0x28d2eea9, 0x2751eeec, 0x037b4a7f, 0x3b4c1059, 0x08f37674, 1059 0x2ae906e1, 0x18a88a6a, 0x00008786, 1060 ], 1061 [ 1062 0x34bc21d1, 0x0cce474d, 0x15048bf4, 0x1d0bb409, 0x021cda16, 1063 0x20de76c3, 0x34c59063, 0x04ede20e, 0x00003ed1, 0x282a3740, 1064 0x0be3bbf3, 0x29889dae, 0x03413697, 0x34c68a09, 0x210ebe93, 1065 0x0c8a224c, 0x0826b331, 0x00009099, 1066 ], 1067 [ 1068 0x0624e3c4, 0x140317ba, 0x2f82c99d, 0x260c0a2c, 0x25d55179, 1069 0x194dcc83, 0x3d95e462, 0x356f6a05, 0x0000741d, 0x0d4481d3, 1070 0x2657fc8b, 0x1ba5ca71, 0x3ae44b0d, 0x07b1548e, 0x0e0d5522, 1071 0x05fdc567, 0x2d1aa70e, 0x00000770, 1072 ], 1073 [ 1074 0x06072c01, 0x23857675, 0x1ead58a9, 0x0b8a12d9, 0x1ee2fc79, 1075 0x0177cb61, 0x0495a618, 0x20deb82b, 0x0000177c, 0x2fc7bfd8, 1076 0x310eef8b, 0x1fb4df39, 0x3b8530e8, 0x0f4e7226, 0x0246b6d0, 1077 0x2a558a24, 0x163353af, 0x000063bb, 1078 ], 1079 [ 1080 0x24d2920b, 0x1c249dcc, 0x2069c5e5, 0x09ab2f9e, 0x36df3cf1, 1081 0x1991fd0c, 0x062b97a7, 0x1e80070e, 0x000054e7, 0x20d0b375, 1082 0x2e9f20bd, 0x35090081, 0x1c7a9ddc, 0x22e7c371, 0x087e3016, 1083 0x03175421, 0x3c6eca7d, 0x0000f599, 1084 ], 1085 [ 1086 0x259b9d5f, 0x0d9a318f, 0x23a0ef16, 0x00ebe4b7, 0x088265ae, 1087 0x2cde2666, 0x2bae7adf, 0x1371a5c6, 0x0000f045, 0x0d034f36, 1088 0x1f967378, 0x1b5fa3f4, 0x0ec8739d, 0x1643e62a, 0x1653947e, 1089 0x22d1f4e6, 0x0fb8d64b, 0x0000b5b9, 1090 ], 1091 ]; 1092 1093 // Lookup one of the Gwin[] values, by index. This is constant-time. 1094 fn lookup_gwin(t: *p256_jacobian, idx: u32) void = { 1095 let xy: [18]u32 = [0...]; 1096 let k: u32 = 0; 1097 let u: size = 0; 1098 1099 for (let k = 0u32; k < 15; k += 1) { 1100 let m = -equ32(idx, k + 1); 1101 for (let u = 0z; u < 18; u += 1) { 1102 xy[u] |= m & gwin[k][u]; 1103 }; 1104 }; 1105 t.x[..] = xy[..9]; 1106 t.y[..] = xy[9..]; 1107 t.z[..] = [0...]; 1108 t.z[0] = 1; 1109 }; 1110 1111 // Multiply the generator by an integer. The integer is assumed non-zero 1112 // and lower than the curve order. 1113 fn p256_mulgen(p :*p256_jacobian, x: []u8) void = { 1114 // qz is a flag that is initially 1, and remains equal to 1 1115 // as long as the point is the point at infinity. 1116 // 1117 // We use a 4-bit window to handle multiplier bits by groups 1118 // of 4. The precomputed window is constant static data, with 1119 // points in affine coordinates; we use a constant-time lookup. 1120 let q = p256_jacobian { ... }; 1121 let qz: u32 = 1; 1122 let xpos: size = 0; 1123 1124 for (let i = len(x); i > 0; i -= 1) { 1125 let bx = x[xpos]: u32; 1126 xpos += 1; 1127 1128 for (let k = 0z; k < 2; k += 1) { 1129 let bits: u32 = 0; 1130 let bnz: u32 = 0; 1131 let t = p256_jacobian { ... }; 1132 let u = p256_jacobian { ... }; 1133 1134 p256_double(&q); 1135 p256_double(&q); 1136 p256_double(&q); 1137 p256_double(&q); 1138 bits = (bx >> 4) & 0x0f; 1139 bnz = nequ32(bits, 0); 1140 lookup_gwin(&t, bits); 1141 u = q; 1142 p256_add_mixed(&u, &t); 1143 jaccopy(bnz & qz, &q, &t); 1144 jaccopy(bnz & ~qz, &q, &u); 1145 qz &= ~bnz; 1146 bx <<= 4; 1147 }; 1148 }; 1149 *p = q; 1150 }; 1151 1152 const P256_G: [_]u8 = [ 1153 0x04, 0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, 0xf8, 0xbc, 0xe6, 1154 0xe5, 0x63, 0xa4, 0x40, 0xf2, 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 1155 0xa0, 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96, 0x4f, 0xe3, 0x42, 1156 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 1157 0x16, 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, 0xcb, 0xb6, 0x40, 1158 0x68, 0x37, 0xbf, 0x51, 0xf5 1159 ]; 1160 1161 const P256_N: [_]u8 = [ 1162 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 1163 0xff, 0xff, 0xff, 0xff, 0xbc, 0xe6, 0xfa, 0xad, 0xa7, 0x17, 0x9e, 0x84, 1164 0xf3, 0xb9, 0xca, 0xc2, 0xfc, 0x63, 0x25, 0x51 1165 ]; 1166 1167 fn api256_mul(g: []u8, x: []u8) u32 = { 1168 if (len(g) != 65) { 1169 return 0; 1170 }; 1171 1172 let p = p256_jacobian { ... }; 1173 let r = p256_decode(&p, g); 1174 1175 p256_mul(&p, x); 1176 p256_to_affine(&p); 1177 p256_encode(g, &p); 1178 return r; 1179 }; 1180 1181 fn api256_mulgen(r: []u8, x: []u8) size = { 1182 let p = p256_jacobian { ... }; 1183 1184 p256_mulgen(&p, x); 1185 p256_to_affine(&p); 1186 p256_encode(r[..65], &p); 1187 return 65; 1188 }; 1189 1190 fn api256_muladd(a: []u8, b: []u8, x: []u8, y: []u8) u32 ={ 1191 let p = p256_jacobian { ... }; 1192 let q = p256_jacobian { ... }; 1193 1194 if (len(a) != 65) { 1195 return 0; 1196 }; 1197 1198 let r = p256_decode(&p, a); 1199 p256_mul(&p, x); 1200 if (len(b) == 0) { 1201 p256_mulgen(&q, y); 1202 } else { 1203 r &= p256_decode(&q, b); 1204 p256_mul(&q, y); 1205 }; 1206 1207 // The final addition may fail in case both points are equal. 1208 let t = p256_add(&p, &q); 1209 reduce_final_f256(p.z); 1210 let z: u32 = 0; 1211 for (let i = 0z; i < 9; i += 1) { 1212 z |= p.z[i]; 1213 }; 1214 z = equ32(z, 0); 1215 p256_double(&q); 1216 1217 // If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we 1218 // have the following: 1219 // 1220 // z = 0, t = 0 return P (normal addition) 1221 // z = 0, t = 1 return P (normal addition) 1222 // z = 1, t = 0 return Q (a 'double' case) 1223 // z = 1, t = 1 report an error (P+Q = 0) 1224 jaccopy(z & ~t, &p, &q); 1225 p256_to_affine(&p); 1226 p256_encode(a, &p); 1227 r &= ~(z & t); 1228 return r; 1229 }; 1230 1231 fn api256_order() const []u8 = P256_N; 1232 fn api256_generator() const []u8 = P256_G; 1233 1234 const _p256: curve = curve { 1235 pointsz = P256_POINTSZ, 1236 order = &api256_order, 1237 generator = &api256_generator, 1238 mul = &api256_mul, 1239 mulgen = &api256_mulgen, 1240 muladd = &api256_muladd, 1241 keygen = &mask_keygen, 1242 }; 1243 1244 // Size of a [[p256]] point in bytes. 1245 export def P256_POINTSZ = len(P256_G); 1246 1247 // Size of a [[p256]] scalar in bytes. 1248 export def P256_SCALARSZ = len(P256_N); 1249 1250 // A [[curve]] implementation of P-256, also known as secp256r1 or prime256v1. 1251 export const p256: *curve = &_p256;