hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 444720781794b0c23d350bbf9ebc4099d0f4f188
parent dd80eb36907f0c8f5d01145cf240f49fdd56f5e9
Author: Armin Preiml <apreiml@strohwolke.at>
Date:   Thu,  7 Mar 2024 14:50:05 +0100

crypto::ec: port the i31 p256 implementation from BearSSL

Signed-off-by: Armin Preiml <apreiml@strohwolke.at>

Diffstat:
Acrypto/ec/curves+test.ha | 453+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrypto/ec/genkeys+test.ha | 34++++++++++++++++++++++++++++++++++
Acrypto/ec/p256.ha | 1252+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 1739 insertions(+), 0 deletions(-)

diff --git a/crypto/ec/curves+test.ha b/crypto/ec/curves+test.ha @@ -0,0 +1,453 @@ +// SPDX-License-Identifier: MPL-2.0 +// (c) Hare authors <https://harelang.org> + +use bytes; + + +// Testcases have been generated with help of pycryptdome. + +type multc = struct { + a: []u8, + b: []u8, + x: []u8, + y: []u8, + result: u32, + expected: []u8, +}; + +fn tmuladd(c: *curve, tcs: []multc) void = { + for (let i = 0z; i < len(tcs); i += 1) { + let t = tcs[i]; + assert(c.muladd(t.a, t.b, t.x, t.y) == t.result); + if (t.result == 1) { + assert(bytes::equal(t.a, t.expected)); + }; + }; +}; + +@test fn p256_mulgen() void = { + const c = p256; + // multiply by 1 + let m1: [P256_SCALARSZ]u8 = [0...]; + m1[len(m1) - 1] = 1; + + let g = alloc(P256_G); + defer free(g); + assert(c.mul(g, m1) == 1); + assert(bytes::equal(g, P256_G)); + + assert(c.mulgen(g, m1) == 65); + assert(bytes::equal(g, P256_G)); + + // multiply by order - 1 + let o = alloc(P256_N); + defer free(o); + o[len(o) - 1] -= 1; + + const expected: [_]u8 = [ + 0x04, 0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, 0xf8, + 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, 0x77, 0x03, 0x7d, + 0x81, 0x2d, 0xeb, 0x33, 0xa0, 0xf4, 0xa1, 0x39, 0x45, 0xd8, + 0x98, 0xc2, 0x96, 0xb0, 0x1c, 0xbd, 0x1c, 0x01, 0xe5, 0x80, + 0x65, 0x71, 0x18, 0x14, 0xb5, 0x83, 0xf0, 0x61, 0xe9, 0xd4, + 0x31, 0xcc, 0xa9, 0x94, 0xce, 0xa1, 0x31, 0x34, 0x49, 0xbf, + 0x97, 0xc8, 0x40, 0xae, 0x0a, + ]; + + assert(c.mul(g, o) == 1); + assert(bytes::equal(g, expected)); + + let g = alloc(P256_G); + defer free(g); + assert(c.mulgen(g, o) == 65); + assert(bytes::equal(g, expected)); + let priv: [_]u8 = [ + 0xde, 0x5c, 0x88, 0x05, 0x42, 0xa0, 0x71, 0xe2, 0xf6, 0xfe, + 0xd0, 0xdc, 0x80, 0x07, 0x37, 0xc4, 0x35, 0xa6, 0x29, 0x48, + 0x85, 0x70, 0x4f, 0x54, 0x1c, 0x41, 0x89, 0xaf, 0xf6, 0xbc, + 0xb5, 0x19, + ]; + const expected: [_]u8 = [ + 0x04, 0x8a, 0x0d, 0x13, 0x84, 0x8e, 0x4f, 0xdb, 0x05, 0x83, + 0x8e, 0x76, 0x24, 0xf9, 0x8a, 0xb2, 0x83, 0x5a, 0x79, 0xb0, + 0x65, 0x63, 0x9b, 0x4c, 0x30, 0xcf, 0x69, 0x31, 0x46, 0xf7, + 0x10, 0x49, 0x13, 0x46, 0x0b, 0xb0, 0x4e, 0x3e, 0x44, 0x7f, + 0xad, 0x96, 0xfa, 0xfb, 0xc5, 0x78, 0xc0, 0xee, 0xc6, 0xc4, + 0x88, 0xa3, 0xe5, 0xc3, 0x10, 0x3c, 0x63, 0x24, 0xc1, 0x48, + 0x43, 0x37, 0x13, 0xdf, 0xc9, + ]; + + let g = alloc(P256_G); + assert(c.mul(g, priv) == 1); + assert(bytes::equal(g, expected)); + + let r: [65]u8 = [0...]; + assert(c.mulgen(r, priv) == 65); + assert(bytes::equal(r, expected)); +}; + +@test fn p256_muladd() void = { + let c = p256; + + let tcs: [_]multc = [ + multc { + a = [ + 0x04, 0xee, 0x19, 0x71, 0x69, 0xae, 0xbd, 0x94, + 0xc7, 0x8d, 0x4e, 0x7f, 0x05, 0x06, 0xb4, 0xdf, + 0x60, 0x43, 0x22, 0x2c, 0x50, 0x8d, 0x33, 0x8a, + 0x5a, 0xd0, 0x02, 0x60, 0x95, 0x5e, 0xda, 0x31, + 0x64, 0x4c, 0x7e, 0x63, 0xa9, 0xa0, 0xaf, 0x56, + 0xbb, 0x7c, 0x03, 0x84, 0x8a, 0x44, 0x30, 0xfd, + 0x39, 0x1d, 0xc0, 0x0f, 0x5c, 0x50, 0xcf, 0xda, + 0xf1, 0x99, 0x69, 0x8e, 0x77, 0x62, 0x56, 0x34, + 0xce, + ], + b = [], + x = [ + 0xef, 0xcb, 0x7a, 0x72, 0x9c, 0xb4, 0x6a, 0xe6, + 0xb4, 0x66, 0xa6, 0x8c, 0x1a, 0x18, 0x64, 0x5c, + 0xd8, 0xdc, 0x9d, 0xa2, 0x5b, 0x48, 0x45, 0xcd, + 0x68, 0x0c, 0x5f, 0x93, 0x1b, 0x74, 0x42, 0x0b, + ], + y = [ + 0x7b, 0x6b, 0x8f, 0x14, 0x8b, 0x7a, 0x5d, 0xbe, + 0xa5, 0xd0, 0x6b, 0x7a, 0x3c, 0xaa, 0x6f, 0x4b, + 0x2a, 0xeb, 0x52, 0x7e, 0x80, 0xdb, 0xc5, 0x95, + 0x73, 0x91, 0xab, 0x3d, 0x29, 0xbc, 0xbd, 0x74, + ], + result = 1, + expected = [ + 0x04, 0x85, 0x93, 0xaf, 0x32, 0x68, 0x1c, 0xa8, + 0xba, 0xf8, 0x64, 0x4e, 0xd3, 0xd8, 0xf3, 0x52, + 0x88, 0xdf, 0x3b, 0x92, 0x5b, 0xb4, 0xbb, 0xf6, + 0xd4, 0x2c, 0xce, 0x42, 0x8c, 0xa5, 0x9b, 0xd4, + 0xb0, 0x87, 0xac, 0xfe, 0x83, 0xcc, 0x11, 0xd1, + 0x98, 0x3e, 0x63, 0xc8, 0xba, 0xc1, 0x8e, 0x7e, + 0xf2, 0xd1, 0xa7, 0x78, 0x93, 0xa8, 0x71, 0x9e, + 0xee, 0xd2, 0xf9, 0xe3, 0xf3, 0x0a, 0xda, 0xf6, + 0x26, + ], + }, + multc { + a = [ + 0x04, 0xee, 0x19, 0x71, 0x69, 0xae, 0xbd, 0x94, + 0xc7, 0x8d, 0x4e, 0x7f, 0x05, 0x06, 0xb4, 0xdf, + 0x60, 0x43, 0x22, 0x2c, 0x50, 0x8d, 0x33, 0x8a, + 0x5a, 0xd0, 0x02, 0x60, 0x95, 0x5e, 0xda, 0x31, + 0x64, 0x4c, 0x7e, 0x63, 0xa9, 0xa0, 0xaf, 0x56, + 0xbb, 0x7c, 0x03, 0x84, 0x8a, 0x44, 0x30, 0xfd, + 0x39, 0x1d, 0xc0, 0x0f, 0x5c, 0x50, 0xcf, 0xda, + 0xf1, 0x99, 0x69, 0x8e, 0x77, 0x62, 0x56, 0x34, + 0xce, + ], + b = [ + 0x04, 0xbd, 0x52, 0x07, 0x83, 0x40, 0x88, 0x6a, + 0xa5, 0x24, 0xd8, 0x22, 0x13, 0x4f, 0xc3, 0xf3, + 0x03, 0xca, 0xe1, 0xd3, 0x5e, 0x01, 0x95, 0x82, + 0x5f, 0xa9, 0x95, 0x9f, 0xc3, 0xc4, 0x92, 0x8f, + 0xd2, 0x30, 0x18, 0x56, 0x29, 0x93, 0x74, 0x50, + 0xbd, 0xa6, 0x8d, 0x88, 0xf6, 0x03, 0xd6, 0x16, + 0xd9, 0x9d, 0x01, 0x82, 0xbe, 0x08, 0x13, 0xec, + 0x9f, 0xb4, 0xb1, 0x18, 0xbc, 0x14, 0x09, 0x31, + 0xad, + ], + x = [ + 0xef, 0xcb, 0x7a, 0x72, 0x9c, 0xb4, 0x6a, 0xe6, + 0xb4, 0x66, 0xa6, 0x8c, 0x1a, 0x18, 0x64, 0x5c, + 0xd8, 0xdc, 0x9d, 0xa2, 0x5b, 0x48, 0x45, 0xcd, + 0x68, 0x0c, 0x5f, 0x93, 0x1b, 0x74, 0x42, 0x0b, + ], + y = [ + 0x7b, 0x6b, 0x8f, 0x14, 0x8b, 0x7a, 0x5d, 0xbe, + 0xa5, 0xd0, 0x6b, 0x7a, 0x3c, 0xaa, 0x6f, 0x4b, + 0x2a, 0xeb, 0x52, 0x7e, 0x80, 0xdb, 0xc5, 0x95, + 0x73, 0x91, 0xab, 0x3d, 0x29, 0xbc, 0xbd, 0x74, + ], + result = 1, + expected = [ + 0x04, 0x7f, 0x66, 0x4b, 0x8c, 0x3e, 0x64, 0x5b, + 0xc1, 0x97, 0x60, 0xac, 0xa6, 0xc3, 0x0c, 0x17, + 0x80, 0xff, 0xd0, 0x95, 0xcd, 0x4d, 0x5b, 0xf1, + 0x35, 0x25, 0x53, 0x5a, 0xec, 0xff, 0xb1, 0xd9, + 0xdb, 0xdd, 0x96, 0xe3, 0xfb, 0x84, 0x68, 0x2b, + 0x53, 0x04, 0x4b, 0xcd, 0x7c, 0x4e, 0x10, 0xfa, + 0x87, 0x4c, 0x14, 0x43, 0xf0, 0x1b, 0x57, 0x39, + 0x02, 0x35, 0x44, 0xef, 0xa0, 0x38, 0xb5, 0x70, + 0xb8, + ], + }, + // invalid a + multc { + a = [ + 0x04, 0xee, 0x19, 0x71, 0x69, 0xae, 0xbd, 0x94, + 0xc7, 0x8d, 0x4e, 0x0f, 0x05, 0x06, 0xb4, 0xdf, + 0x60, 0x43, 0x22, 0x0c, 0x50, 0x8d, 0x33, 0x8a, + 0x5a, 0xd0, 0x02, 0x00, 0x95, 0x5e, 0xda, 0x31, + 0x64, 0x4c, 0x7e, 0x03, 0xa9, 0xa0, 0xaf, 0x56, + 0xbb, 0x7c, 0x03, 0x04, 0x8a, 0x44, 0x30, 0xfd, + 0x39, 0x1d, 0xc0, 0x0f, 0x5c, 0x50, 0xcf, 0xda, + 0xf1, 0x99, 0x69, 0x0e, 0x77, 0x62, 0x56, 0x34, + 0xce, + ], + b = [ + 0x04, 0xbd, 0x52, 0x07, 0x83, 0x40, 0x88, 0x6a, + 0xa5, 0x24, 0xd8, 0x22, 0x13, 0x4f, 0xc3, 0xf3, + 0x03, 0xca, 0xe1, 0xd3, 0x5e, 0x01, 0x95, 0x82, + 0x5f, 0xa9, 0x95, 0x9f, 0xc3, 0xc4, 0x92, 0x8f, + 0xd2, 0x30, 0x18, 0x56, 0x29, 0x93, 0x74, 0x50, + 0xbd, 0xa6, 0x8d, 0x88, 0xf6, 0x03, 0xd6, 0x16, + 0xd9, 0x9d, 0x01, 0x82, 0xbe, 0x08, 0x13, 0xec, + 0x9f, 0xb4, 0xb1, 0x18, 0xbc, 0x14, 0x09, 0x31, + 0xad, + ], + x = [ + 0xef, 0xcb, 0x7a, 0x72, 0x9c, 0xb4, 0x6a, 0xe6, + 0xb4, 0x66, 0xa6, 0x8c, 0x1a, 0x18, 0x64, 0x5c, + 0xd8, 0xdc, 0x9d, 0xa2, 0x5b, 0x48, 0x45, 0xcd, + 0x68, 0x0c, 0x5f, 0x93, 0x1b, 0x74, 0x42, 0x0b, + ], + y = [ + 0x7b, 0x6b, 0x8f, 0x14, 0x8b, 0x7a, 0x5d, 0xbe, + 0xa5, 0xd0, 0x6b, 0x7a, 0x3c, 0xaa, 0x6f, 0x4b, + 0x2a, 0xeb, 0x52, 0x7e, 0x80, 0xdb, 0xc5, 0x95, + 0x73, 0x91, 0xab, 0x3d, 0x29, 0xbc, 0xbd, 0x74, + ], + result = 0, + expected = [], + }, + // invalid b + multc { + a = [ + 0x04, 0xee, 0x19, 0x71, 0x69, 0xae, 0xbd, 0x94, + 0xc7, 0x8d, 0x4e, 0x7f, 0x05, 0x06, 0xb4, 0xdf, + 0x60, 0x43, 0x22, 0x2c, 0x50, 0x8d, 0x33, 0x8a, + 0x5a, 0xd0, 0x02, 0x60, 0x95, 0x5e, 0xda, 0x31, + 0x64, 0x4c, 0x7e, 0x63, 0xa9, 0xa0, 0xaf, 0x56, + 0xbb, 0x7c, 0x03, 0x84, 0x8a, 0x44, 0x30, 0xfd, + 0x39, 0x1d, 0xc0, 0x0f, 0x5c, 0x50, 0xcf, 0xda, + 0xf1, 0x99, 0x69, 0x8e, 0x77, 0x62, 0x56, 0x34, + 0xce, + ], + b = [ + 0x04, 0xbd, 0x52, 0x07, 0x83, 0x40, 0x88, 0x6a, + 0xf5, 0x24, 0xd8, 0x22, 0x13, 0x4f, 0xc3, 0xf3, + 0xf3, 0xca, 0xe1, 0xd3, 0x5e, 0x01, 0x95, 0x82, + 0x3f, 0xa9, 0x95, 0x9f, 0xc3, 0xc4, 0x92, 0x8f, + 0x52, 0x30, 0x18, 0x56, 0x29, 0x93, 0x74, 0x50, + 0x1d, 0xa6, 0x8d, 0x88, 0xf6, 0x03, 0xd6, 0x16, + 0xd9, 0x9d, 0x01, 0x82, 0xbe, 0x08, 0x13, 0xec, + 0x9f, 0xb4, 0xb1, 0x18, 0xbc, 0x14, 0x09, 0x31, + 0xad, + ], + x = [ + 0xef, 0xcb, 0x7a, 0x72, 0x9c, 0xb4, 0x6a, 0xe6, + 0xb4, 0x66, 0xa6, 0x8c, 0x1a, 0x18, 0x64, 0x5c, + 0xd8, 0xdc, 0x9d, 0xa2, 0x5b, 0x48, 0x45, 0xcd, + 0x68, 0x0c, 0x5f, 0x93, 0x1b, 0x74, 0x42, 0x0b, + ], + y = [ + 0x7b, 0x6b, 0x8f, 0x14, 0x8b, 0x7a, 0x5d, 0xbe, + 0xa5, 0xd0, 0x6b, 0x7a, 0x3c, 0xaa, 0x6f, 0x4b, + 0x2a, 0xeb, 0x52, 0x7e, 0x80, 0xdb, 0xc5, 0x95, + 0x73, 0x91, 0xab, 0x3d, 0x29, 0xbc, 0xbd, 0x74, + ], + result = 0, + expected = [], + }, + // invalid a and b + multc { + a = [ + 0x04, 0xee, 0x19, 0x71, 0x69, 0xae, 0xbd, 0x94, + 0xc7, 0x8d, 0x4e, 0x0f, 0x05, 0x06, 0xb4, 0xdf, + 0x60, 0x43, 0x22, 0x0c, 0x50, 0x8d, 0x33, 0x8a, + 0x5a, 0xd0, 0x02, 0x00, 0x95, 0x5e, 0xda, 0x31, + 0x64, 0x4c, 0x7e, 0x03, 0xa9, 0xa0, 0xaf, 0x56, + 0xbb, 0x7c, 0x03, 0x04, 0x8a, 0x44, 0x30, 0xfd, + 0x39, 0x1d, 0xc0, 0x0f, 0x5c, 0x50, 0xcf, 0xda, + 0xf1, 0x99, 0x69, 0x0e, 0x77, 0x62, 0x56, 0x34, + 0xce, + ], + b = [ + 0x04, 0xbd, 0x52, 0x07, 0x83, 0xf0, 0x88, 0x6a, + 0xa5, 0x24, 0xd8, 0x22, 0x13, 0xff, 0xc3, 0xf3, + 0x03, 0xca, 0xe1, 0xd3, 0x5e, 0xf1, 0x95, 0x82, + 0x5f, 0xa9, 0x95, 0x9f, 0xc3, 0xf4, 0x92, 0x8f, + 0xd2, 0x30, 0x18, 0x56, 0x29, 0xf3, 0x74, 0x50, + 0xbd, 0xa6, 0x8d, 0x88, 0xf6, 0xf3, 0xd6, 0x16, + 0xd9, 0x9d, 0x01, 0x82, 0xbe, 0xf8, 0x13, 0xec, + 0x9f, 0xb4, 0xb1, 0x18, 0xbc, 0x14, 0x09, 0x31, + 0xad, + ], + x = [ + 0xef, 0xcb, 0x7a, 0x72, 0x9c, 0xb4, 0x6a, 0xe6, + 0xb4, 0x66, 0xa6, 0x8c, 0x1a, 0x18, 0x64, 0x5c, + 0xd8, 0xdc, 0x9d, 0xa2, 0x5b, 0x48, 0x45, 0xcd, + 0x68, 0x0c, 0x5f, 0x93, 0x1b, 0x74, 0x42, 0x0b, + ], + y = [ + 0x7b, 0x6b, 0x8f, 0x14, 0x8b, 0x7a, 0x5d, 0xbe, + 0xa5, 0xd0, 0x6b, 0x7a, 0x3c, 0xaa, 0x6f, 0x4b, + 0x2a, 0xeb, 0x52, 0x7e, 0x80, 0xdb, 0xc5, 0x95, + 0x73, 0x91, 0xab, 0x3d, 0x29, 0xbc, 0xbd, 0x74, + ], + result = 0, + expected = [], + } + ]; + + tmuladd(p256, tcs); +}; + +@test fn p384_mulgen() void = { + let c = p384; + + // multiply by 1 + let m1: [P384_SCALARSZ]u8 = [0...]; + m1[len(m1) - 1] = 1; + + let g = alloc(P384_G); + defer free(g); + assert(c.mul(g, m1) == 1); + assert(bytes::equal(g, P384_G)); + + assert(c.mulgen(g, m1) == 97); + assert(bytes::equal(g, P384_G)); + + // multiply by order - 1 + let o = alloc(P384_N); + defer free(o); + o[len(o) - 1] -= 1; + + const expected: [_]u8 = [ + 0x04, 0xaa, 0x87, 0xca, 0x22, 0xbe, 0x8b, 0x05, 0x37, 0x8e, + 0xb1, 0xc7, 0x1e, 0xf3, 0x20, 0xad, 0x74, 0x6e, 0x1d, 0x3b, + 0x62, 0x8b, 0xa7, 0x9b, 0x98, 0x59, 0xf7, 0x41, 0xe0, 0x82, + 0x54, 0x2a, 0x38, 0x55, 0x02, 0xf2, 0x5d, 0xbf, 0x55, 0x29, + 0x6c, 0x3a, 0x54, 0x5e, 0x38, 0x72, 0x76, 0x0a, 0xb7, 0xc9, + 0xe8, 0x21, 0xb5, 0x69, 0xd9, 0xd3, 0x90, 0xa2, 0x61, 0x67, + 0x40, 0x6d, 0x6d, 0x23, 0xd6, 0x07, 0x0b, 0xe2, 0x42, 0xd7, + 0x65, 0xeb, 0x83, 0x16, 0x25, 0xce, 0xec, 0x4a, 0x0f, 0x47, + 0x3e, 0xf5, 0x9f, 0x4e, 0x30, 0xe2, 0x81, 0x7e, 0x62, 0x85, + 0xbc, 0xe2, 0x84, 0x6f, 0x15, 0xf1, 0xa0, + ]; + + assert(c.mul(g, o) == 1); + assert(bytes::equal(g, expected)); + + let g = alloc(P384_G); + defer free(g); + assert(c.mulgen(g, o) == 97); + assert(bytes::equal(g, expected)); + + // random example + let priv: [_]u8 = [ + 0xde, 0x5c, 0x88, 0x05, 0x42, 0xa0, 0x71, 0xe2, 0xf6, 0xfe, + 0xd0, 0xdc, 0x80, 0x07, 0x37, 0xc4, 0x35, 0xa6, 0x29, 0x48, + 0x85, 0x70, 0x4f, 0x54, 0x1c, 0x41, 0x89, 0xaf, 0xf6, 0xbc, + 0xb5, 0x19, 0x85, 0x70, 0x4f, 0x54, 0x1c, 0x41, 0x89, 0xaf, + 0xf6, 0xbc, 0xb5, 0x19, 0xb5, 0x19, 0xb5, 0x19, + ]; + const expected: [_]u8 = [ + 0x04, 0xb6, 0xb4, 0x77, 0x6e, 0x08, 0x86, 0x64, 0x55, 0x3a, + 0x64, 0xe0, 0xa3, 0xb0, 0x56, 0x39, 0x18, 0x4c, 0x5b, 0x15, + 0x3b, 0x19, 0xd0, 0x99, 0xc5, 0xd2, 0x7a, 0x4a, 0x23, 0xb9, + 0xf7, 0x37, 0x95, 0x4b, 0xe0, 0x97, 0x4c, 0x72, 0x16, 0x34, + 0x76, 0xf6, 0xce, 0x9e, 0xb7, 0x52, 0xc9, 0x33, 0xed, 0x26, + 0x5b, 0x7c, 0xb0, 0xd8, 0xe8, 0x8f, 0xc1, 0xb2, 0xd4, 0xbe, + 0xe3, 0x20, 0xc7, 0x82, 0xc7, 0x71, 0x3d, 0xaf, 0xad, 0xdb, + 0xe8, 0x56, 0xba, 0xad, 0x29, 0x76, 0x4c, 0x12, 0x24, 0xe9, + 0x59, 0xca, 0xef, 0x09, 0x48, 0x0d, 0x40, 0xb1, 0xce, 0x5f, + 0xcf, 0x6b, 0x62, 0xfd, 0xe5, 0x4b, 0xcc, + ]; + + let g = alloc(P384_G); + defer free(g); + assert(c.mul(g, priv) == 1); + assert(bytes::equal(g, expected)); + + let r: [97]u8 = [0...]; + assert(c.mulgen(r, priv) == 97); + assert(bytes::equal(r, expected)); +}; + +@test fn p521_mulgen() void = { + let c = p521; + + // multiply by 1 + let m1: [P521_SCALARSZ]u8 = [0...]; + m1[len(m1) - 1] = 1; + + let g = alloc(P521_G); + defer free(g); + assert(c.mul(g, m1) == 1); + assert(bytes::equal(g, P521_G)); + + assert(c.mulgen(g, m1) == 133); + assert(bytes::equal(g, P521_G)); + + // multiply by order - 1 + let o = alloc(P521_N); + defer free(o); + o[len(o) - 1] -= 1; + + const expected: [_]u8 = [ + 0x04, 0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, 0xe9, + 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, 0xb4, 0x42, 0x9c, + 0x64, 0x81, 0x39, 0x05, 0x3f, 0xb5, 0x21, 0xf8, 0x28, 0xaf, + 0x60, 0x6b, 0x4d, 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, + 0xe7, 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, 0xa8, + 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, 0x42, 0x9b, 0xf9, + 0x7e, 0x7e, 0x31, 0xc2, 0xe5, 0xbd, 0x66, 0x00, 0xe7, 0xc6, + 0xd6, 0x95, 0x87, 0x65, 0xc4, 0x3f, 0xfb, 0xa3, 0x75, 0xa0, + 0x4b, 0xd3, 0x82, 0xe4, 0x26, 0x67, 0x0a, 0xbb, 0xb6, 0xa8, + 0x64, 0xbb, 0x97, 0xe8, 0x50, 0x42, 0xe8, 0xd8, 0xc1, 0x99, + 0xd3, 0x68, 0x11, 0x8d, 0x66, 0xa1, 0x0b, 0xd9, 0xbf, 0x3a, + 0xaf, 0x46, 0xfe, 0xc0, 0x52, 0xf8, 0x9e, 0xca, 0xc3, 0x8f, + 0x79, 0x5d, 0x8d, 0x3d, 0xbf, 0x77, 0x41, 0x6b, 0x89, 0x60, + 0x2e, 0x99, 0xaf, + ]; + + assert(c.mul(g, o) == 1); + assert(bytes::equal(g, expected)); + + let g = alloc(P521_G); + defer free(g); + assert(c.mulgen(g, o) == 133); + assert(bytes::equal(g, expected)); + + // random example + let priv: [_]u8 = [ + 0x01, 0x1f, 0xd5, 0x29, 0xff, 0x28, 0x24, 0x93, 0x43, 0x23, + 0x3c, 0x0f, 0xe4, 0x15, 0x6a, 0xea, 0x1c, 0x95, 0xe6, 0xf1, + 0x5e, 0x19, 0x30, 0xfe, 0xb4, 0xea, 0x0d, 0x24, 0xb3, 0x67, + 0xa0, 0xeb, 0xcf, 0xed, 0x5c, 0xcf, 0xcf, 0x9f, 0x9d, 0x48, + 0x04, 0x8b, 0xb1, 0x56, 0x53, 0xba, 0xa4, 0x86, 0x01, 0x29, + 0x61, 0x21, 0xca, 0xd7, 0x63, 0x84, 0x35, 0x45, 0xc3, 0x9b, + 0x55, 0xff, 0x9a, 0x4f, 0x27, 0x03, + ]; + const expected: [_]u8 = [ + 0x04, 0x00, 0x9f, 0x72, 0x15, 0x21, 0x0c, 0xdd, 0x22, 0x24, + 0x16, 0xbb, 0x18, 0xa6, 0x46, 0x21, 0xee, 0x4f, 0x73, 0x1e, + 0x9e, 0x03, 0xaf, 0x79, 0xc7, 0x1f, 0xf0, 0x33, 0x1c, 0xa6, + 0xa1, 0x13, 0x98, 0x95, 0x6e, 0x88, 0x44, 0x6b, 0xa4, 0xbd, + 0x4d, 0x49, 0x9e, 0xcb, 0x96, 0x0e, 0xdf, 0x67, 0x89, 0xca, + 0xd5, 0xc3, 0x9e, 0x99, 0xa8, 0xc9, 0x00, 0x4b, 0x2c, 0x49, + 0x57, 0xd7, 0xee, 0xf4, 0x6e, 0x07, 0xbb, 0x01, 0x35, 0xe2, + 0xa4, 0xd6, 0x60, 0x99, 0x2c, 0x62, 0xad, 0xab, 0x89, 0xb5, + 0xe7, 0xb6, 0x4b, 0x44, 0x89, 0xcf, 0xb0, 0x19, 0x29, 0xf8, + 0x54, 0x76, 0x07, 0x40, 0x3a, 0xee, 0x41, 0x0c, 0x64, 0xeb, + 0xda, 0x1e, 0x22, 0x4a, 0xee, 0xdb, 0x16, 0xa0, 0x5d, 0xe1, + 0x95, 0x2f, 0xe3, 0xbc, 0x53, 0xd3, 0xf4, 0x36, 0x76, 0x03, + 0x1f, 0xd8, 0xc6, 0xf4, 0x23, 0x4f, 0x45, 0x4e, 0x11, 0xeb, + 0xb2, 0xb1, 0x62, + ]; + + let g = alloc(P521_G); + defer free(g); + assert(c.mul(g, priv) == 1); + assert(bytes::equal(g, expected)); + + let r: [133]u8 = [0...]; + assert(c.mulgen(r, priv) == 133); + assert(bytes::equal(r, expected)); +}; diff --git a/crypto/ec/genkeys+test.ha b/crypto/ec/genkeys+test.ha @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: MPL-2.0 +// (c) Hare authors <https://harelang.org> + +use bytes; +use io; +use memio; + + +@test fn keygen_p256() void = edgecases(p256); + +fn edgecases(c: *curve) void = { + const scalarsz = len(c.order()); + let priv: [MAX_SCALARSZ]u8 = [0...]; + let priv = priv[..scalarsz]; + + let zero: [MAX_SCALARSZ]u8 = [0...]; + let zero = zero[..scalarsz]; + + let rnd = memio::fixed(zero); + assert(keygen(c, priv, &rnd) is io::error); + + let rnd = memio::fixed(c.order()); + assert(keygen(c, priv, &rnd) is io::error); + + let sub1: [MAX_SCALARSZ]u8 = [0...]; + let sub1 = sub1[..scalarsz]; + + sub1[..] = c.order()[..]; + sub1[len(sub1) - 1] -= 1; + + let rnd = memio::fixed(sub1); + assert(keygen(c, priv, &rnd): size == scalarsz); + assert(bytes::equal(sub1, priv)); +}; diff --git a/crypto/ec/p256.ha b/crypto/ec/p256.ha @@ -0,0 +1,1252 @@ +// SPDX-License-Identifier: MPL-2.0 +// (c) Hare authors <https://harelang.org> + +// Ported from BearSSL +// +// Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +use bytes; +use crypto::math::*; + + +// Note from the BearSSL documentation: +// +// The ec_p256_m31 implementation supports P-256 with specialised code, +// including modular reduction routines that leverage the special format of the +// field modulus, and internal split of data as sequences of 30-bit words, which +// helps with carry propagation. ec_p256_m31 also includes fixed point +// optimisations, for the common case of multiplying the conventional generator +// point. These implementations are faster than the generic "i31" code, but with +// a larger code footprint. +// +// Convert an integer from unsigned big-endian encoding to a sequence of 30-bit +// words in little-endian order. The final "partial" word is returned. +fn be8tole30(dest: []u32, src: []u8) u32 = { + let acc: u32 = 0; + let acclen: u32 = 0; + let destpos = 0; + + for (let i = len(src); i > 0; i -= 1) { + let b = src[i - 1]: u32; + if (acclen < 22) { + acc |= b << acclen; + acclen += 8; + } else { + dest[destpos] = (acc | (b << acclen)) & 0x3FFFFFFF; + destpos += 1; + acc = b >> (30 - acclen); + acclen -= 22; + }; + }; + return acc; +}; + +// Convert an integer (30-bit words, little-endian) to unsigned +// big-endian encoding. The total encoding length is provided; all +// the destination bytes will be filled. +fn le30tobe8(dest: []u8, src: []u32) void = { + let acc: u32 = 0; + let acclen: u32 = 0; + let srcpos: size = 0; + + for (let i = len(dest); i > 0; i -= 1) { + if (acclen < 8) { + let w = src[srcpos]; + srcpos += 1; + + dest[i - 1] = (acc | (w << acclen)): u8; + acc = w >> (8 - acclen); + acclen += 22; + } else { + dest[i - 1] = acc: u8; + acc >>= 8; + acclen -= 8; + }; + }; +}; + +@test fn be8tole30() void = { + let be8: [6]u8 = [0x11, 0x22, 0xF3, 0x44, 0x55, 0x66]; + let le30result: [2]u32 = [0...]; + let be8result: [6]u8 = [0...]; + + le30result[1] = be8tole30(le30result, be8); + le30tobe8(be8result, le30result); + + assert(bytes::equal(be8, be8result)); +}; + +fn arsh(x: u32, n: u32) u32 = (*(&x: *i32) >> n: i32): u32; +fn arshw(x: u64, n: u32) u64 = (*(&x: *i64) >> n: i32): u64; + +@test fn arsh() void = assert(arsh(0x80000000u32, 2) == 0xe0000000); + +// Multiply two integers. Source integers are represented as arrays of +// nine 30-bit words, for values up to 2^270-1. Result is encoded over +// 18 words of 30 bits each. +fn mul9(d: []u32, a: []u32, b: []u32) void = { + // Maximum intermediate result is no more than + // 10376293531797946367, which fits in 64 bits. Reason: + // + // 10376293531797946367 = 9 * (2^30-1)^2 + 9663676406 + // 10376293531797946367 < 9663676407 * 2^30 + // + // Thus, adding together 9 products of 30-bit integers, with + // a carry of at most 9663676406, yields an integer that fits + // on 64 bits and generates a carry of at most 9663676406. + let t: [17]u64 = [0...]; + + t[0] = mulu32(a[0], b[0]); + t[1] = mulu32(a[0], b[1]) + + mulu32(a[1], b[0]); + t[2] = mulu32(a[0], b[2]) + + mulu32(a[1], b[1]) + + mulu32(a[2], b[0]); + t[3] = mulu32(a[0], b[3]) + + mulu32(a[1], b[2]) + + mulu32(a[2], b[1]) + + mulu32(a[3], b[0]); + t[4] = mulu32(a[0], b[4]) + + mulu32(a[1], b[3]) + + mulu32(a[2], b[2]) + + mulu32(a[3], b[1]) + + mulu32(a[4], b[0]); + t[5] = mulu32(a[0], b[5]) + + mulu32(a[1], b[4]) + + mulu32(a[2], b[3]) + + mulu32(a[3], b[2]) + + mulu32(a[4], b[1]) + + mulu32(a[5], b[0]); + t[6] = mulu32(a[0], b[6]) + + mulu32(a[1], b[5]) + + mulu32(a[2], b[4]) + + mulu32(a[3], b[3]) + + mulu32(a[4], b[2]) + + mulu32(a[5], b[1]) + + mulu32(a[6], b[0]); + t[7] = mulu32(a[0], b[7]) + + mulu32(a[1], b[6]) + + mulu32(a[2], b[5]) + + mulu32(a[3], b[4]) + + mulu32(a[4], b[3]) + + mulu32(a[5], b[2]) + + mulu32(a[6], b[1]) + + mulu32(a[7], b[0]); + t[8] = mulu32(a[0], b[8]) + + mulu32(a[1], b[7]) + + mulu32(a[2], b[6]) + + mulu32(a[3], b[5]) + + mulu32(a[4], b[4]) + + mulu32(a[5], b[3]) + + mulu32(a[6], b[2]) + + mulu32(a[7], b[1]) + + mulu32(a[8], b[0]); + t[9] = mulu32(a[1], b[8]) + + mulu32(a[2], b[7]) + + mulu32(a[3], b[6]) + + mulu32(a[4], b[5]) + + mulu32(a[5], b[4]) + + mulu32(a[6], b[3]) + + mulu32(a[7], b[2]) + + mulu32(a[8], b[1]); + t[10] = mulu32(a[2], b[8]) + + mulu32(a[3], b[7]) + + mulu32(a[4], b[6]) + + mulu32(a[5], b[5]) + + mulu32(a[6], b[4]) + + mulu32(a[7], b[3]) + + mulu32(a[8], b[2]); + t[11] = mulu32(a[3], b[8]) + + mulu32(a[4], b[7]) + + mulu32(a[5], b[6]) + + mulu32(a[6], b[5]) + + mulu32(a[7], b[4]) + + mulu32(a[8], b[3]); + t[12] = mulu32(a[4], b[8]) + + mulu32(a[5], b[7]) + + mulu32(a[6], b[6]) + + mulu32(a[7], b[5]) + + mulu32(a[8], b[4]); + t[13] = mulu32(a[5], b[8]) + + mulu32(a[6], b[7]) + + mulu32(a[7], b[6]) + + mulu32(a[8], b[5]); + t[14] = mulu32(a[6], b[8]) + + mulu32(a[7], b[7]) + + mulu32(a[8], b[6]); + t[15] = mulu32(a[7], b[8]) + + mulu32(a[8], b[7]); + t[16] = mulu32(a[8], b[8]); + + // Propagate carries. + let cc: u64 = 0; + for (let i = 0z; i < 17; i += 1) { + let w = t[i] + cc; + d[i] = w: u32 & 0x3FFFFFFF; + cc = w >> 30; + }; + d[17] = cc: u32; +}; + +// Square a 270-bit integer, represented as an array of nine 30-bit words. +// Result uses 18 words of 30 bits each. +fn square9(d: []u32, a: []u32) void = { + let t: [17]u64 = [0...]; + + t[0] = mulu32(a[0], a[0]); + t[1] = ((mulu32(a[0], a[1])) << 1); + t[2] = mulu32(a[1], a[1]) + + ((mulu32(a[0], a[2])) << 1); + t[3] = ((mulu32(a[0], a[3]) + + mulu32(a[1], a[2])) << 1); + t[4] = mulu32(a[2], a[2]) + + ((mulu32(a[0], a[4]) + + mulu32(a[1], a[3])) << 1); + t[5] = ((mulu32(a[0], a[5]) + + mulu32(a[1], a[4]) + + mulu32(a[2], a[3])) << 1); + t[6] = mulu32(a[3], a[3]) + + ((mulu32(a[0], a[6]) + + mulu32(a[1], a[5]) + + mulu32(a[2], a[4])) << 1); + t[7] = ((mulu32(a[0], a[7]) + + mulu32(a[1], a[6]) + + mulu32(a[2], a[5]) + + mulu32(a[3], a[4])) << 1); + t[8] = mulu32(a[4], a[4]) + + ((mulu32(a[0], a[8]) + + mulu32(a[1], a[7]) + + mulu32(a[2], a[6]) + + mulu32(a[3], a[5])) << 1); + t[9] = ((mulu32(a[1], a[8]) + + mulu32(a[2], a[7]) + + mulu32(a[3], a[6]) + + mulu32(a[4], a[5])) << 1); + t[10] = mulu32(a[5], a[5]) + + ((mulu32(a[2], a[8]) + + mulu32(a[3], a[7]) + + mulu32(a[4], a[6])) << 1); + t[11] = ((mulu32(a[3], a[8]) + + mulu32(a[4], a[7]) + + mulu32(a[5], a[6])) << 1); + t[12] = mulu32(a[6], a[6]) + + ((mulu32(a[4], a[8]) + + mulu32(a[5], a[7])) << 1); + t[13] = ((mulu32(a[5], a[8]) + + mulu32(a[6], a[7])) << 1); + t[14] = mulu32(a[7], a[7]) + + ((mulu32(a[6], a[8])) << 1); + t[15] = ((mulu32(a[7], a[8])) << 1); + t[16] = mulu32(a[8], a[8]); + + // Propagate carries. + let cc: u64 = 0; + for (let i = 0z; i < 17; i += 1) { + let w = t[i] + cc; + d[i] = w: u32 & 0x3FFFFFFF; + cc = w >> 30; + }; + d[17] = cc: u32; +}; + +// Base field modulus for P-256. +const F256: [_]u32 = [ + 0x3FFFFFFF, 0x3FFFFFFF, 0x3FFFFFFF, 0x0000003F, 0x00000000, 0x00000000, + 0x00001000, 0x3FFFC000, 0x0000FFFF +]; + +// The 'b' curve equation coefficient for P-256. +const P256_B: [_]u32 = [ + 0x27D2604B, 0x2F38F0F8, 0x053B0F63, 0x0741AC33, 0x1886BC65, 0x2EF555DA, + 0x293E7B3E, 0x0D762A8E, 0x00005AC6 +]; + +// Addition in the field. Source operands shall fit on 257 bits; output +// will be lower than twice the modulus. +fn add_f256(d: []u32, a: []u32, b: []u32) void = { + let w: u32 = 0; + + let cc: u32 = 0; + for (let i = 0z; i < 9; i += 1) { + w = a[i] + b[i] + cc; + d[i] = w & 0x3FFFFFFF; + cc = w >> 30; + }; + w >>= 16; + d[8] &= 0xFFFF; + d[3] -= w << 6; + d[6] -= w << 12; + d[7] += w << 14; + cc = w; + for (let i = 0z; i < 9; i += 1) { + w = d[i] + cc; + d[i] = w & 0x3FFFFFFF; + cc = arsh(w, 30); + }; +}; + +// Subtraction in the field. Source operands shall be smaller than twice +// the modulus; the result will fulfil the same property. +fn sub_f256(d: []u32, a: []u32, b: []u32) void = { + let w: u32 = 0; + let cc: u32 = 0; + + // We really compute a - b + 2*p to make sure that the result is + // positive. + w = a[0] - b[0] - 0x00002; + d[0] = w & 0x3FFFFFFF; + w = a[1] - b[1] + arsh(w, 30); + d[1] = w & 0x3FFFFFFF; + w = a[2] - b[2] + arsh(w, 30); + d[2] = w & 0x3FFFFFFF; + w = a[3] - b[3] + arsh(w, 30) + 0x00080; + d[3] = w & 0x3FFFFFFF; + w = a[4] - b[4] + arsh(w, 30); + d[4] = w & 0x3FFFFFFF; + w = a[5] - b[5] + arsh(w, 30); + d[5] = w & 0x3FFFFFFF; + w = a[6] - b[6] + arsh(w, 30) + 0x02000; + d[6] = w & 0x3FFFFFFF; + w = a[7] - b[7] + arsh(w, 30) - 0x08000; + d[7] = w & 0x3FFFFFFF; + w = a[8] - b[8] + arsh(w, 30) + 0x20000; + d[8] = w & 0xFFFF; + w >>= 16; + d[8] &= 0xFFFF; + d[3] -= w << 6; + d[6] -= w << 12; + d[7] += w << 14; + cc = w; + for (let i = 0z; i < 9; i += 1) { + w = d[i] + cc; + d[i] = w & 0x3FFFFFFF; + cc = arsh(w, 30); + }; +}; + +// Compute a multiplication in F256. Source operands shall be less than +// twice the modulus. +fn mul_f256(d: []u32, a: []u32, b: []u32) void = { + let t: [18]u32 = [0...]; + let s: [18]u64 = [0...]; + let x: u64 = 0; + + mul9(t, a, b); + + // Modular reduction: each high word in added/subtracted where + // necessary. + // + // The modulus is: + // p = 2^256 - 2^224 + 2^192 + 2^96 - 1 + // Therefore: + // 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p + // + // For a word x at bit offset n (n >= 256), we have: + // x*2^n = x*2^(n-32) - x*2^(n-64) + // - x*2^(n - 160) + x*2^(n-256) mod p + // + // Thus, we can nullify the high word if we reinject it at some + // proper emplacements. + // + // We use 64-bit intermediate words to allow for carries to + // accumulate easily, before performing the final propagation. + for (let i = 0; i < 18; i += 1) { + s[i] = t[i]; + }; + + for (let i = 17; i >= 9; i -= 1) { + let y = s[i]; + s[i - 1] += arshw(y, 2); + s[i - 2] += (y << 28) & 0x3FFFFFFF; + s[i - 2] -= arshw(y, 4); + s[i - 3] -= (y << 26) & 0x3FFFFFFF; + s[i - 5] -= arshw(y, 10); + s[i - 6] -= (y << 20) & 0x3FFFFFFF; + s[i - 8] += arshw(y, 16); + s[i - 9] += (y << 14) & 0x3FFFFFFF; + }; + + // Carry propagation must be signed. Moreover, we may have overdone + // it a bit, and obtain a negative result. + // + // The loop above ran 9 times; each time, each word was augmented + // by at most one extra word (in absolute value). Thus, the top + // word must in fine fit in 39 bits, so the carry below will fit + // on 9 bits. + let cc: u64 = 0; + for (let i = 0z; i < 9; i += 1) { + x = s[i] + cc; + d[i] = x: u32 & 0x3FFFFFFF; + cc = arshw(x, 30); + }; + + // All nine words fit on 30 bits, but there may be an extra + // carry for a few bits (at most 9), and that carry may be + // negative. Moreover, we want the result to fit on 257 bits. + // The two lines below ensure that the word in d[] has length + // 256 bits, and the (signed) carry (beyond 2^256) is in cc. The + // significant length of cc is less than 24 bits, so we will be + // able to switch to 32-bit operations. + cc = arshw(x, 16); + d[8] &= 0xFFFF; + + // One extra round of reduction, for cc*2^256, which means + // adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) + // value. If cc is negative, then it may happen (rarely, but + // not neglectibly so) that the result would be negative. In + // order to avoid that, if cc is negative, then we add the + // modulus once. Note that if cc is negative, then propagating + // that carry must yield a value lower than the modulus, so + // adding the modulus once will keep the final result under + // twice the modulus. + let z = cc: u32; + d[3] -= z << 6; + d[6] -= (z << 12) & 0x3FFFFFFF; + d[7] -= arsh(z, 18); + d[7] += (z << 14) & 0x3FFFFFFF; + d[8] += arsh(z, 16); + let c = z >> 31; + d[0] -= c; + d[3] += c << 6; + d[6] += c << 12; + d[7] -= c << 14; + d[8] += c << 16; + + for (let i = 0z; i < 9; i += 1) { + let w = d[i] + z; + d[i] = w & 0x3FFFFFFF; + z = arsh(w, 30); + }; +}; + + +// Compute a square in F256. Source operand shall be less than +// twice the modulus. +fn square_f256(d: []u32, a: []u32) void = { + let t: [18]u32 = [0...]; + let s: [18]u64 = [0...]; + + square9(t, a); + + // Modular reduction: each high word in added/subtracted where + // necessary. + // + // The modulus is: + // p = 2^256 - 2^224 + 2^192 + 2^96 - 1 + // Therefore: + // 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p + // + // For a word x at bit offset n (n >= 256), we have: + // x*2^n = x*2^(n-32) - x*2^(n-64) + // - x*2^(n - 160) + x*2^(n-256) mod p + // + // Thus, we can nullify the high word if we reinject it at some + // proper emplacements. + // + // We use 64-bit intermediate words to allow for carries to + // accumulate easily, before performing the final propagation. + for (let i = 0; i < 18; i += 1) { + s[i] = t[i]; + }; + + for (let i = 17; i >= 9; i -= 1) { + let y = s[i]; + s[i - 1] += arshw(y, 2); + s[i - 2] += (y << 28) & 0x3FFFFFFF; + s[i - 2] -= arshw(y, 4); + s[i - 3] -= (y << 26) & 0x3FFFFFFF; + s[i - 5] -= arshw(y, 10); + s[i - 6] -= (y << 20) & 0x3FFFFFFF; + s[i - 8] += arshw(y, 16); + s[i - 9] += (y << 14) & 0x3FFFFFFF; + }; + + // Carry propagation must be signed. Moreover, we may have overdone + // it a bit, and obtain a negative result. + // + // The loop above ran 9 times; each time, each word was augmented + // by at most one extra word (in absolute value). Thus, the top + // word must in fine fit in 39 bits, so the carry below will fit + // on 9 bits. + let cc: u64 = 0; + let x: u64 = 0; + for (let i = 0; i < 9; i += 1) { + x = s[i] + cc; + d[i] = x: u32 & 0x3FFFFFFF; + cc = arshw(x, 30); + }; + + // All nine words fit on 30 bits, but there may be an extra + // carry for a few bits (at most 9), and that carry may be + // negative. Moreover, we want the result to fit on 257 bits. + // The two lines below ensure that the word in d[] has length + // 256 bits, and the (signed) carry (beyond 2^256) is in cc. The + // significant length of cc is less than 24 bits, so we will be + // able to switch to 32-bit operations. + cc = arshw(x, 16); + d[8] &= 0xFFFF; + + // One extra round of reduction, for cc*2^256, which means + // adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) + // value. If cc is negative, then it may happen (rarely, but + // not neglectibly so) that the result would be negative. In + // order to avoid that, if cc is negative, then we add the + // modulus once. Note that if cc is negative, then propagating + // that carry must yield a value lower than the modulus, so + // adding the modulus once will keep the final result under + // twice the modulus. + let z = cc: u32; + d[3] -= z << 6; + d[6] -= (z << 12) & 0x3FFFFFFF; + d[7] -= arsh(z, 18); + d[7] += (z << 14) & 0x3FFFFFFF; + d[8] += arsh(z, 16); + let c = z >> 31; + d[0] -= c; + d[3] += c << 6; + d[6] += c << 12; + d[7] -= c << 14; + d[8] += c << 16; + + for (let i = 0z; i < 9; i += 1) { + let w = d[i] + z; + d[i] = w & 0x3FFFFFFF; + z = arsh(w, 30); + }; +}; + +// Perform a "final reduction" in field F256 (field for curve P-256). +// The source value must be less than twice the modulus. If the value +// is not lower than the modulus, then the modulus is subtracted and +// this function returns 1; otherwise, it leaves it untouched and it +// returns 0. +fn reduce_final_f256(d: []u32) u32 = { + let t: [9]u32 = [0...]; + + let cc: u32 = 0; + for (let i = 0; i < 9; i += 1) { + let w = d[i] - F256[i] - cc; + cc = w >> 31; + t[i] = w & 0x3FFFFFFF; + }; + cc ^= 1; + ccopyu32(cc, d, t); + return cc; +}; + +// Jacobian coordinates for a point in P-256: affine coordinates (X,Y) +// are such that: +// X = x / z^2 +// Y = y / z^3 +// For the point at infinity, z = 0. +// Each point thus admits many possible representations. +// +// Coordinates are represented in arrays of 32-bit integers, each holding +// 30 bits of data. Values may also be slightly greater than the modulus, +// but they will always be lower than twice the modulus. +type p256_jacobian = struct { + x: [9]u32, + y: [9]u32, + z: [9]u32, +}; + +// Convert a point to affine coordinates: +// - If the point is the point at infinity, then all three coordinates +// are set to 0. +// - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y' +// coordinates are the 'X' and 'Y' affine coordinates. +// The coordinates are guaranteed to be lower than the modulus. +fn p256_to_affine(p: *p256_jacobian) void = { + let t1: [9]u32 = [0...]; + let t2: [9]u32 = [0...]; + + // Invert z with a modular exponentiation: the modulus is + // p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is + // p-2. Exponent bit pattern (from high to low) is: + // - 32 bits of value 1 + // - 31 bits of value 0 + // - 1 bit of value 1 + // - 96 bits of value 0 + // - 94 bits of value 1 + // - 1 bit of value 0 + // - 1 bit of value 1 + // Thus, we precompute z^(2^31-1) to speed things up. + // + // If z = 0 (point at infinity) then the modular exponentiation + // will yield 0, which leads to the expected result (all three + // coordinates set to 0). + + // A simple square-and-multiply for z^(2^31-1). We could save about + // two dozen multiplications here with an addition chain, but + // this would require a bit more code, and extra stack buffers. + t1[..] = p.z[..]; + for (let i = 0; i < 30; i += 1) { + square_f256(t1, t1); + mul_f256(t1, t1, p.z); + }; + + // Square-and-multiply. Apart from the squarings, we have a few + // multiplications to set bits to 1; we multiply by the original z + // for setting 1 bit, and by t1 for setting 31 bits. + t2[..] = p.z[..]; + for (let i = 1; i < 256; i += 1) { + square_f256(t2, t2); + switch (i) { + case 31, 190, 221, 252 => + mul_f256(t2, t2, t1); + case 63, 253, 255 => + mul_f256(t2, t2, p.z); + case => + yield; + }; + }; + + // Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3. + mul_f256(t1, t2, t2); + mul_f256(p.x, t1, p.x); + mul_f256(t1, t1, t2); + mul_f256(p.y, t1, p.y); + reduce_final_f256(p.x); + reduce_final_f256(p.y); + + // Multiply z by 1/z. If z = 0, then this will yield 0, otherwise + // this will set z to 1. + mul_f256(p.z, p.z, t2); + reduce_final_f256(p.z); +}; + +// Double a point in P-256. This function works for all valid points, +// including the point at infinity. +fn p256_double(q: *p256_jacobian) void = { + // Doubling formulas are: + // + // s = 4*x*y^2 + // m = 3*(x + z^2)*(x - z^2) + // x' = m^2 - 2*s + // y' = m*(s - x') - 8*y^4 + // z' = 2*y*z + // + // These formulas work for all points, including points of order 2 + // and points at infinity: + // - If y = 0 then z' = 0. But there is no such point in P-256 + // anyway. + // - If z = 0 then z' = 0. + let t1: [9]u32 = [0...]; + let t2: [9]u32 = [0...]; + let t3: [9]u32 = [0...]; + let t4: [9]u32 = [0...]; + + // Compute z^2 in t1. + square_f256(t1, q.z); + + // Compute x-z^2 in t2 and x+z^2 in t1. + add_f256(t2, q.x, t1); + sub_f256(t1, q.x, t1); + + // Compute 3*(x+z^2)*(x-z^2) in t1. + mul_f256(t3, t1, t2); + add_f256(t1, t3, t3); + add_f256(t1, t3, t1); + + // Compute 4*x*y^2 (in t2) and 2*y^2 (in t3). + square_f256(t3, q.y); + add_f256(t3, t3, t3); + mul_f256(t2, q.x, t3); + add_f256(t2, t2, t2); + + + // Compute x' = m^2 - 2*s. + square_f256(q.x, t1); + sub_f256(q.x, q.x, t2); + sub_f256(q.x, q.x, t2); + + // Compute z' = 2*y*z. + mul_f256(t4, q.y, q.z); + add_f256(q.z, t4, t4); + + // Compute y' = m*(s - x') - 8*y^4. Note that we already have + // 2*y^2 in t3. + sub_f256(t2, t2, q.x); + mul_f256(q.y, t1, t2); + square_f256(t4, t3); + add_f256(t4, t4, t4); + sub_f256(q.y, q.y, t4); +}; + +// Add point P2 to point P1. +// +// This function computes the wrong result in the following cases: +// +// - If P1 == 0 but P2 != 0 +// - If P1 != 0 but P2 == 0 +// - If P1 == P2 +// +// In all three cases, P1 is set to the point at infinity. +// +// Returned value is 0 if one of the following occurs: +// +// - P1 and P2 have the same Y coordinate +// - P1 == 0 and P2 == 0 +// - The Y coordinate of one of the points is 0 and the other point is +// the point at infinity. +// +// The third case cannot actually happen with valid points, since a point +// with Y == 0 is a point of order 2, and there is no point of order 2 on +// curve P-256. +// +// Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller +// can apply the following: +// +// - If the result is not the point at infinity, then it is correct. +// - Otherwise, if the returned value is 1, then this is a case of +// P1+P2 == 0, so the result is indeed the point at infinity. +// - Otherwise, P1 == P2, so a "double" operation should have been +// performed. +fn p256_add(p1: *p256_jacobian, p2: *p256_jacobian) u32 = { + // Addtions formulas are: + // + // u1 = x1 * z2^2 + // u2 = x2 * z1^2 + // s1 = y1 * z2^3 + // s2 = y2 * z1^3 + // h = u2 - u1 + // r = s2 - s1 + // x3 = r^2 - h^3 - 2 * u1 * h^2 + // y3 = r * (u1 * h^2 - x3) - s1 * h^3 + // z3 = h * z1 * z2 + let t1: [9]u32 = [0...]; + let t2: [9]u32 = [0...]; + let t3: [9]u32 = [0...]; + let t4: [9]u32 = [0...]; + let t5: [9]u32 = [0...]; + let t6: [9]u32 = [0...]; + let t7: [9]u32 = [0...]; + let ret: u32 = 0; + + // Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). + square_f256(t3, p2.z); + mul_f256(t1, p1.x, t3); + mul_f256(t4, p2.z, t3); + mul_f256(t3, p1.y, t4); + + // Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + square_f256(t4, p1.z); + mul_f256(t2, p2.x, t4); + mul_f256(t5, p1.z, t4); + mul_f256(t4, p2.y, t5); + + // Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + // We need to test whether r is zero, so we will do some extra + // reduce. + sub_f256(t2, t2, t1); + sub_f256(t4, t4, t3); + reduce_final_f256(t4); + ret = 0; + for (let i = 0; i < 9; i += 1) { + ret |= t4[i]; + }; + ret = (ret | -ret) >> 31; + + // Compute u1*h^2 (in t6) and h^3 (in t5); + square_f256(t7, t2); + mul_f256(t6, t1, t7); + mul_f256(t5, t7, t2); + + // Compute x3 = r^2 - h^3 - 2*u1*h^2. + square_f256(p1.x, t4); + sub_f256(p1.x, p1.x, t5); + sub_f256(p1.x, p1.x, t6); + sub_f256(p1.x, p1.x, t6); + + // Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + sub_f256(t6, t6, p1.x); + mul_f256(p1.y, t4, t6); + mul_f256(t1, t5, t3); + sub_f256(p1.y, p1.y, t1); + + // Compute z3 = h*z1*z2. + mul_f256(t1, p1.z, p2.z); + mul_f256(p1.z, t1, t2); + + return ret; +}; + +// Add point P2 to point P1. This is a specialised function for the +// case when P2 is a non-zero point in affine coordinate. +// +// This function computes the wrong result in the following cases: +// +// - If P1 == 0 +// - If P1 == P2 +// +// In both cases, P1 is set to the point at infinity. +// +// Returned value is 0 if one of the following occurs: +// +// - P1 and P2 have the same Y coordinate +// - The Y coordinate of P2 is 0 and P1 is the point at infinity. +// +// The second case cannot actually happen with valid points, since a point +// with Y == 0 is a point of order 2, and there is no point of order 2 on +// curve P-256. +// +// Therefore, assuming that P1 != 0 on input, then the caller +// can apply the following: +// +// - If the result is not the point at infinity, then it is correct. +// - Otherwise, if the returned value is 1, then this is a case of +// P1+P2 == 0, so the result is indeed the point at infinity. +// - Otherwise, P1 == P2, so a "double" operation should have been +// performed. +fn p256_add_mixed(p1: *p256_jacobian, p2: *p256_jacobian) u32 = { + // Addtions formulas are: + // + // u1 = x1 + // u2 = x2 * z1^2 + // s1 = y1 + // s2 = y2 * z1^3 + // h = u2 - u1 + // r = s2 - s1 + // x3 = r^2 - h^3 - 2 * u1 * h^2 + // y3 = r * (u1 * h^2 - x3) - s1 * h^3 + // z3 = h * z1 + let t1: [9]u32 = [0...]; + let t2: [9]u32 = [0...]; + let t3: [9]u32 = [0...]; + let t4: [9]u32 = [0...]; + let t5: [9]u32 = [0...]; + let t6: [9]u32 = [0...]; + let t7: [9]u32 = [0...]; + let ret: u32 = 0; + + // Compute u1 = x1 (in t1) and s1 = y1 (in t3). + t1[..] = p1.x[..]; + t3[..] = p1.y[..]; + + // Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + square_f256(t4, p1.z); + mul_f256(t2, p2.x, t4); + mul_f256(t5, p1.z, t4); + mul_f256(t4, p2.y, t5); + + // Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). + // We need to test whether r is zero, so we will do some extra + // reduce. + sub_f256(t2, t2, t1); + sub_f256(t4, t4, t3); + reduce_final_f256(t4); + ret = 0; + for (let i = 0; i < 9; i += 1) { + ret |= t4[i]; + }; + ret = (ret | -ret) >> 31; + + // Compute u1*h^2 (in t6) and h^3 (in t5); + square_f256(t7, t2); + mul_f256(t6, t1, t7); + mul_f256(t5, t7, t2); + + // Compute x3 = r^2 - h^3 - 2*u1*h^2. + square_f256(p1.x, t4); + sub_f256(p1.x, p1.x, t5); + sub_f256(p1.x, p1.x, t6); + sub_f256(p1.x, p1.x, t6); + + // Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + sub_f256(t6, t6, p1.x); + mul_f256(p1.y, t4, t6); + mul_f256(t1, t5, t3); + sub_f256(p1.y, p1.y, t1); + + // Compute z3 = h*z1*z2. + mul_f256(p1.z, p1.z, t2); + + return ret; +}; + +// Decode a P-256 point. This function does not support the point at +// infinity. Returned value is 0 if the point is invalid, 1 otherwise. +fn p256_decode(p: *p256_jacobian, src: const []u8) u32 = { + let tx: [9]u32 = [0...]; + let ty: [9]u32 = [0...]; + let t1: [9]u32 = [0...]; + let t2: [9]u32 = [0...]; + let bad: u32 = 0; + + if (len(src) != 65) { + return 0; + }; + let buf = src; + + // First byte must be 0x04 (uncompressed format). We could support + // "hybrid format" (first byte is 0x06 or 0x07, and encodes the + // least significant bit of the Y coordinate), but it is explicitly + // forbidden by RFC 5480 (section 2.2). + bad = nequ32(buf[0], 0x04); + + // Decode the coordinates, and check that they are both lower + // than the modulus. + tx[8] = be8tole30(tx, buf[1..33]); + ty[8] = be8tole30(ty, buf[33..]); + bad |= reduce_final_f256(tx); + bad |= reduce_final_f256(ty); + + // Check curve equation. + square_f256(t1, tx); + mul_f256(t1, tx, t1); + square_f256(t2, ty); + sub_f256(t1, t1, tx); + sub_f256(t1, t1, tx); + sub_f256(t1, t1, tx); + add_f256(t1, t1, P256_B); + sub_f256(t1, t1, t2); + reduce_final_f256(t1); + for (let i = 0; i < 9; i += 1) { + bad |= t1[i]; + }; + + // Copy coordinates to the point structure. + p.x[..] = tx[..]; + p.y[..] = ty[..]; + p.z[..] = [0...]; + p.z[0] = 1; + return equ32(bad, 0); +}; + +// Encode a point into a buffer. This function assumes that the point is +// valid, in affine coordinates, and not the point at infinity. +fn p256_encode(dest: []u8, p: *p256_jacobian) void = { + dest[0] = 0x04; + le30tobe8(dest[1..33], p.x); + le30tobe8(dest[33..], p.y); +}; + +// Multiply a curve point by an integer. The integer is assumed to be +// lower than the curve order, and the base point must not be the point +// at infinity. +fn p256_mul(p: *p256_jacobian, x: []u8) void = { + // qz is a flag that is initially 1, and remains equal to 1 + // as long as the point is the point at infinity. + // + // We use a 2-bit window to handle multiplier bits by pairs. + // The precomputed window really is the points P2 and P3. + let qz: u32 = 1; + let p2 = p256_jacobian { ... }; + let p3 = p256_jacobian { ... }; + let q = p256_jacobian { ... }; + let t = p256_jacobian { ... }; + let u = p256_jacobian { ... }; + let xpos: size = 0; + + // Compute window values. + p2 = *p; + p256_double(&p2); + p3 = *p; + p256_add(&p3, &p2); + + // We start with Q = 0. We process multiplier bits 2 by 2. + for (let i = len(x); i > 0; i -= 1) { + for (let k = 6i8; k >= 0; k -= 2) { + let bits: u32 = 0; + let bnz: u32 = 0; + + p256_double(&q); + p256_double(&q); + t = *p; + u = q; + bits = (x[xpos] >> k: u8) & 3u32; + bnz = nequ32(bits, 0); + jaccopy(equ32(bits, 2), &t, &p2); + jaccopy(equ32(bits, 3), &t, &p3); + p256_add(&u, &t); + jaccopy(bnz & qz, &q, &t); + jaccopy(bnz & ~qz, &q, &u); + qz &= ~bnz; + }; + xpos += 1; + }; + *p = q; +}; + +fn jaccopy(ctl: u32, dest: *p256_jacobian, src: *p256_jacobian) void = { + ccopyu32(ctl, dest.x, src.x); + ccopyu32(ctl, dest.y, src.y); + ccopyu32(ctl, dest.z, src.z); +}; + +// Precomputed window: k*G points, where G is the curve generator, and k +// is an integer from 1 to 15 (inclusive). The X and Y coordinates of +// the point are encoded as 9 words of 30 bits each (little-endian +// order). +const gwin: [15][18]u32 = [ + [ + 0x1898c296, 0x1284e517, 0x1eb33a0f, 0x00df604b, 0x2440f277, + 0x339b958e, 0x04247f8b, 0x347cb84b, 0x00006b17, 0x37bf51f5, + 0x2ed901a0, 0x3315ecec, 0x338cd5da, 0x0f9e162b, 0x1fad29f0, + 0x27f9b8ee, 0x10b8bf86, 0x00004fe3, + ], + [ + 0x07669978, 0x182d23f1, 0x3f21b35a, 0x225a789d, 0x351ac3c0, + 0x08e00c12, 0x34f7e8a5, 0x1ec62340, 0x00007cf2, 0x227873d1, + 0x3812de74, 0x0e982299, 0x1f6b798f, 0x3430dbba, 0x366b1a7d, + 0x2d040293, 0x154436e3, 0x00000777, + ], + [ + 0x06e7fd6c, 0x2d05986f, 0x3ada985f, 0x31adc87b, 0x0bf165e6, + 0x1fbe5475, 0x30a44c8f, 0x3934698c, 0x00005ecb, 0x227d5032, + 0x29e6c49e, 0x04fb83d9, 0x0aac0d8e, 0x24a2ecd8, 0x2c1b3869, + 0x0ff7e374, 0x19031266, 0x00008734, + ], + [ + 0x2b030852, 0x024c0911, 0x05596ef5, 0x07f8b6de, 0x262bd003, + 0x3779967b, 0x08fbba02, 0x128d4cb4, 0x0000e253, 0x184ed8c6, + 0x310b08fc, 0x30ee0055, 0x3f25b0fc, 0x062d764e, 0x3fb97f6a, + 0x33cc719d, 0x15d69318, 0x0000e0f1, + ], + [ + 0x03d033ed, 0x05552837, 0x35be5242, 0x2320bf47, 0x268fdfef, + 0x13215821, 0x140d2d78, 0x02de9454, 0x00005159, 0x3da16da4, + 0x0742ed13, 0x0d80888d, 0x004bc035, 0x0a79260d, 0x06fcdafe, + 0x2727d8ae, 0x1f6a2412, 0x0000e0c1, + ], + [ + 0x3c2291a9, 0x1ac2aba4, 0x3b215b4c, 0x131d037a, 0x17dde302, + 0x0c90b2e2, 0x0602c92d, 0x05ca9da9, 0x0000b01a, 0x0fc77fe2, + 0x35f1214e, 0x07e16bdf, 0x003ddc07, 0x2703791c, 0x3038b7ee, + 0x3dad56fe, 0x041d0c8d, 0x0000e85c, + ], + [ + 0x3187b2a3, 0x0018a1c0, 0x00fef5b3, 0x3e7e2e2a, 0x01fb607e, + 0x2cc199f0, 0x37b4625b, 0x0edbe82f, 0x00008e53, 0x01f400b4, + 0x15786a1b, 0x3041b21c, 0x31cd8cf2, 0x35900053, 0x1a7e0e9b, + 0x318366d0, 0x076f780c, 0x000073eb, + ], + [ + 0x1b6fb393, 0x13767707, 0x3ce97dbb, 0x348e2603, 0x354cadc1, + 0x09d0b4ea, 0x1b053404, 0x1de76fba, 0x000062d9, 0x0f09957e, + 0x295029a8, 0x3e76a78d, 0x3b547dae, 0x27cee0a2, 0x0575dc45, + 0x1d8244ff, 0x332f647a, 0x0000ad5a, + ], + [ + 0x10949ee0, 0x1e7a292e, 0x06df8b3d, 0x02b2e30b, 0x31f8729e, + 0x24e35475, 0x30b71878, 0x35edbfb7, 0x0000ea68, 0x0dd048fa, + 0x21688929, 0x0de823fe, 0x1c53faa9, 0x0ea0c84d, 0x052a592a, + 0x1fce7870, 0x11325cb2, 0x00002a27, + ], + [ + 0x04c5723f, 0x30d81a50, 0x048306e4, 0x329b11c7, 0x223fb545, + 0x085347a8, 0x2993e591, 0x1b5aca8e, 0x0000cef6, 0x04af0773, + 0x28d2eea9, 0x2751eeec, 0x037b4a7f, 0x3b4c1059, 0x08f37674, + 0x2ae906e1, 0x18a88a6a, 0x00008786, + ], + [ + 0x34bc21d1, 0x0cce474d, 0x15048bf4, 0x1d0bb409, 0x021cda16, + 0x20de76c3, 0x34c59063, 0x04ede20e, 0x00003ed1, 0x282a3740, + 0x0be3bbf3, 0x29889dae, 0x03413697, 0x34c68a09, 0x210ebe93, + 0x0c8a224c, 0x0826b331, 0x00009099, + ], + [ + 0x0624e3c4, 0x140317ba, 0x2f82c99d, 0x260c0a2c, 0x25d55179, + 0x194dcc83, 0x3d95e462, 0x356f6a05, 0x0000741d, 0x0d4481d3, + 0x2657fc8b, 0x1ba5ca71, 0x3ae44b0d, 0x07b1548e, 0x0e0d5522, + 0x05fdc567, 0x2d1aa70e, 0x00000770, + ], + [ + 0x06072c01, 0x23857675, 0x1ead58a9, 0x0b8a12d9, 0x1ee2fc79, + 0x0177cb61, 0x0495a618, 0x20deb82b, 0x0000177c, 0x2fc7bfd8, + 0x310eef8b, 0x1fb4df39, 0x3b8530e8, 0x0f4e7226, 0x0246b6d0, + 0x2a558a24, 0x163353af, 0x000063bb, + ], + [ + 0x24d2920b, 0x1c249dcc, 0x2069c5e5, 0x09ab2f9e, 0x36df3cf1, + 0x1991fd0c, 0x062b97a7, 0x1e80070e, 0x000054e7, 0x20d0b375, + 0x2e9f20bd, 0x35090081, 0x1c7a9ddc, 0x22e7c371, 0x087e3016, + 0x03175421, 0x3c6eca7d, 0x0000f599, + ], + [ + 0x259b9d5f, 0x0d9a318f, 0x23a0ef16, 0x00ebe4b7, 0x088265ae, + 0x2cde2666, 0x2bae7adf, 0x1371a5c6, 0x0000f045, 0x0d034f36, + 0x1f967378, 0x1b5fa3f4, 0x0ec8739d, 0x1643e62a, 0x1653947e, + 0x22d1f4e6, 0x0fb8d64b, 0x0000b5b9, + ], +]; + +// Lookup one of the Gwin[] values, by index. This is constant-time. +fn lookup_gwin(t: *p256_jacobian, idx: u32) void = { + let xy: [18]u32 = [0...]; + let k: u32 = 0; + let u: size = 0; + + for (let k = 0u32; k < 15; k += 1) { + let m = -equ32(idx, k + 1); + for (let u = 0z; u < 18; u += 1) { + xy[u] |= m & gwin[k][u]; + }; + }; + t.x[..] = xy[..9]; + t.y[..] = xy[9..]; + t.z[..] = [0...]; + t.z[0] = 1; +}; + +// Multiply the generator by an integer. The integer is assumed non-zero +// and lower than the curve order. +fn p256_mulgen(p :*p256_jacobian, x: []u8) void = { + // qz is a flag that is initially 1, and remains equal to 1 + // as long as the point is the point at infinity. + // + // We use a 4-bit window to handle multiplier bits by groups + // of 4. The precomputed window is constant static data, with + // points in affine coordinates; we use a constant-time lookup. + let q = p256_jacobian { ... }; + let qz: u32 = 1; + let xpos: size = 0; + + for (let i = len(x); i > 0; i -= 1) { + let bx = x[xpos]: u32; + xpos += 1; + + for (let k = 0z; k < 2; k += 1) { + let bits: u32 = 0; + let bnz: u32 = 0; + let t = p256_jacobian { ... }; + let u = p256_jacobian { ... }; + + p256_double(&q); + p256_double(&q); + p256_double(&q); + p256_double(&q); + bits = (bx >> 4) & 0x0f; + bnz = nequ32(bits, 0); + lookup_gwin(&t, bits); + u = q; + p256_add_mixed(&u, &t); + jaccopy(bnz & qz, &q, &t); + jaccopy(bnz & ~qz, &q, &u); + qz &= ~bnz; + bx <<= 4; + }; + }; + *p = q; +}; + +const P256_G: [_]u8 = [ + 0x04, 0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, 0xf8, 0xbc, 0xe6, + 0xe5, 0x63, 0xa4, 0x40, 0xf2, 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, + 0xa0, 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96, 0x4f, 0xe3, 0x42, + 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, + 0x16, 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, 0xcb, 0xb6, 0x40, + 0x68, 0x37, 0xbf, 0x51, 0xf5 +]; + +const P256_N: [_]u8 = [ + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xbc, 0xe6, 0xfa, 0xad, 0xa7, 0x17, 0x9e, 0x84, + 0xf3, 0xb9, 0xca, 0xc2, 0xfc, 0x63, 0x25, 0x51 +]; + +fn api256_mul(g: []u8, x: []u8) u32 = { + if (len(g) != 65) { + return 0; + }; + + let p = p256_jacobian { ... }; + let r = p256_decode(&p, g); + + p256_mul(&p, x); + p256_to_affine(&p); + p256_encode(g, &p); + return r; +}; + +fn api256_mulgen(r: []u8, x: []u8) size = { + let p = p256_jacobian { ... }; + + p256_mulgen(&p, x); + p256_to_affine(&p); + p256_encode(r[..65], &p); + return 65; +}; + +fn api256_muladd(a: []u8, b: []u8, x: []u8, y: []u8) u32 ={ + let p = p256_jacobian { ... }; + let q = p256_jacobian { ... }; + + if (len(a) != 65) { + return 0; + }; + + let r = p256_decode(&p, a); + p256_mul(&p, x); + if (len(b) == 0) { + p256_mulgen(&q, y); + } else { + r &= p256_decode(&q, b); + p256_mul(&q, y); + }; + + // The final addition may fail in case both points are equal. + let t = p256_add(&p, &q); + reduce_final_f256(p.z); + let z: u32 = 0; + for (let i = 0z; i < 9; i += 1) { + z |= p.z[i]; + }; + z = equ32(z, 0); + p256_double(&q); + + // If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we + // have the following: + // + // z = 0, t = 0 return P (normal addition) + // z = 0, t = 1 return P (normal addition) + // z = 1, t = 0 return Q (a 'double' case) + // z = 1, t = 1 report an error (P+Q = 0) + jaccopy(z & ~t, &p, &q); + p256_to_affine(&p); + p256_encode(a, &p); + r &= ~(z & t); + return r; +}; + +fn api256_order() const []u8 = P256_N; +fn api256_generator() const []u8 = P256_G; + +const _p256: curve = curve { + pointsz = P256_POINTSZ, + order = &api256_order, + generator = &api256_generator, + mul = &api256_mul, + mulgen = &api256_mulgen, + muladd = &api256_muladd, + keygen = &mask_keygen, +}; + +// Size of a [[p256]] point in bytes. +export def P256_POINTSZ = len(P256_G); + +// Size of a [[p256]] scalar in bytes. +export def P256_SCALARSZ = len(P256_N); + +// A [[curve]] implementation of P-256, also known as secp256r1 or prime256v1. +export const p256: *curve = &_p256;