From 9ec8d3bc04328d1da821eb6187309b9e23fe8497 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Tue, 11 Jul 2023 17:30:48 +0800 Subject: [PATCH] sm9/bn256: add double & triple, fix gfpNeg issue, use Square as possible --- sm9/bn256/bn_pair.go | 28 +++++++-------- sm9/bn256/curve.go | 46 ++++++++++++------------ sm9/bn256/gfp12.go | 18 +++++----- sm9/bn256/gfp2.go | 41 ++++++++++----------- sm9/bn256/gfp2_test.go | 29 ++++++++++++++- sm9/bn256/gfp4.go | 6 ++++ sm9/bn256/gfp_amd64.s | 45 +++++++++++++++++++++++ sm9/bn256/gfp_arm64.s | 77 ++++++++++++++++++++++++++++++++++++++++ sm9/bn256/gfp_decl.go | 12 ++++++- sm9/bn256/gfp_generic.go | 9 +++++ sm9/bn256/gfp_test.go | 61 +++++++++++++++++++++++++++++++ sm9/bn256/twist.go | 22 ++++++------ 12 files changed, 313 insertions(+), 81 deletions(-) diff --git a/sm9/bn256/bn_pair.go b/sm9/bn256/bn_pair.go index 3a40cf77..ff9fd81e 100644 --- a/sm9/bn256/bn_pair.go +++ b/sm9/bn256/bn_pair.go @@ -12,8 +12,8 @@ func lineFunctionAdd(r, p, rOut *twistPoint, q *curvePoint, r2, a, b, c *gfP2) { H := (&gfP2{}).Sub(B, &r.x) // H = Xp * Zr^2 - Xr I := (&gfP2{}).SquareNC(H) // I = (Xp * Zr^2 - Xr)^2 = Xp^2*Zr^4 + Xr^2 - 2Xr*Xp*Zr^2 - E := (&gfP2{}).Add(I, I) // E = 2*(Xp * Zr^2 - Xr)^2 - E.Add(E, E) // E = 4*(Xp * Zr^2 - Xr)^2 + E := (&gfP2{}).Double(I) // E = 2*(Xp * Zr^2 - Xr)^2 + E.Double(E) // E = 4*(Xp * Zr^2 - Xr)^2 J := (&gfP2{}).MulNC(H, E) // J = 4*(Xp * Zr^2 - Xr)^3 @@ -29,7 +29,7 @@ func lineFunctionAdd(r, p, rOut *twistPoint, q *curvePoint, r2, a, b, c *gfP2) { t := (&gfP2{}).Sub(V, &rOut.x) // t = V - rOut.x t.Mul(t, L1) // t = L1*(V-rOut.x) t2 := (&gfP2{}).MulNC(&r.y, J) - t2.Add(t2, t2) // t2 = 2Yr * J + t2.Double(t2) // t2 = 2Yr * J rOut.y.Sub(t, t2) // rOut.y = L1*(V-rOut.x) - 2Yr*J rOut.t.SquareNC(&rOut.z) @@ -38,14 +38,14 @@ func lineFunctionAdd(r, p, rOut *twistPoint, q *curvePoint, r2, a, b, c *gfP2) { t.Add(&p.y, &rOut.z).Square(t).Sub(t, r2).Sub(t, &rOut.t) t2.Mul(L1, &p.x) - t2.Add(t2, t2) // t2 = 2 L1 * Xp - a.Sub(t2, t) // a = 2 L1 * Xp - 2 Yp * rOut.z = 2 L1 * Xp - (Yp + rOut.Z)^2 + Yp^2 + rOut.Z^2 + t2.Double(t2) // t2 = 2 L1 * Xp + a.Sub(t2, t) // a = 2 L1 * Xp - 2 Yp * rOut.z = 2 L1 * Xp - (Yp + rOut.Z)^2 + Yp^2 + rOut.Z^2 c.MulScalar(&rOut.z, &q.y) // c = rOut.z * Yq - c.Add(c, c) // c = 2 * rOut.z * Yq + c.Double(c) // c = 2 * rOut.z * Yq b.Neg(L1) // b= -L1 - b.MulScalar(b, &q.x).Add(b, b) // b = -2 * L1 * Xq + b.MulScalar(b, &q.x).Double(b) // b = -2 * L1 * Xq } func lineFunctionDouble(r, rOut *twistPoint, q *curvePoint, a, b, c *gfP2) { @@ -56,9 +56,9 @@ func lineFunctionDouble(r, rOut *twistPoint, q *curvePoint, a, b, c *gfP2) { C := (&gfP2{}).SquareNC(B) // C = Yr ^ 4 D := (&gfP2{}).Add(&r.x, B) - D.Square(D).Sub(D, A).Sub(D, C).Add(D, D) + D.Square(D).Sub(D, A).Sub(D, C).Double(D) - E := (&gfP2{}).Add(A, A) // + E := (&gfP2{}).Double(A) // E.Add(E, A) // E = 3 * Xr ^ 2 G := (&gfP2{}).SquareNC(E) // G = 9 * Xr^4 @@ -68,23 +68,23 @@ func lineFunctionDouble(r, rOut *twistPoint, q *curvePoint, a, b, c *gfP2) { rOut.z.Add(&r.y, &r.z).Square(&rOut.z).Sub(&rOut.z, B).Sub(&rOut.z, &r.t) // Z3 = (Yr + Zr)^2 - Yr^2 - Zr^2 = 2Yr*Zr rOut.y.Sub(D, &rOut.x).Mul(&rOut.y, E) - t := (&gfP2{}).Add(C, C) // t = 2 * r.y ^ 4 - t.Add(t, t).Add(t, t) // t = 8 * Yr ^ 4 + t := (&gfP2{}).Double(C) // t = 2 * r.y ^ 4 + t.Double(t).Double(t) // t = 8 * Yr ^ 4 rOut.y.Sub(&rOut.y, t) rOut.t.SquareNC(&rOut.z) - t.Mul(E, &r.t).Add(t, t) // t = 2(E * Tr) + t.Mul(E, &r.t).Double(t) // t = 2(E * Tr) b.Neg(t) // b = -2(E * Tr) b.MulScalar(b, &q.x) // b = -2(E * Tr * Xq) a.Add(&r.x, E) // a = Xr + E a.Square(a).Sub(a, A).Sub(a, G) // a = (Xr + E) ^ 2 - A - G - t.Add(B, B).Add(t, t) // t = 4B + t.Double(B).Double(t) // t = 4B a.Sub(a, t) // a = (Xr + E) ^ 2 - A - G - 4B c.Mul(&rOut.z, &r.t) // c = rOut.z * Tr - c.Add(c, c).MulScalar(c, &q.y) // c = 2 rOut.z * Tr * Yq + c.Double(c).MulScalar(c, &q.y) // c = 2 rOut.z * Tr * Yq } // (ret.z + ret.y*w + ret.x*w^2)* ((cv+a) + b*w^2) diff --git a/sm9/bn256/curve.go b/sm9/bn256/curve.go index b0acbfb8..a2d23288 100644 --- a/sm9/bn256/curve.go +++ b/sm9/bn256/curve.go @@ -38,7 +38,7 @@ func (c *curvePoint) Set(a *curvePoint) { func (c *curvePoint) polynomial(x *gfP) *gfP { x3 := &gfP{} - gfpMul(x3, x, x) + gfpSqr(x3, x, 1) gfpMul(x3, x3, x) gfpAdd(x3, x3, curveB) return x3 @@ -52,7 +52,7 @@ func (c *curvePoint) IsOnCurve() bool { } y2 := &gfP{} - gfpMul(y2, &c.y, &c.y) + gfpSqr(y2, &c.y, 1) x3 := c.polynomial(&c.x) @@ -98,8 +98,8 @@ func (c *curvePoint) Add(a, b *curvePoint) { // by [u1:s1:z1·z2] and [u2:s2:z1·z2] // where u1 = x1·z2², s1 = y1·z2³ and u1 = x2·z1², s2 = y2·z1³ z12, z22 := &gfP{}, &gfP{} - gfpMul(z12, &a.z, &a.z) - gfpMul(z22, &b.z, &b.z) + gfpSqr(z12, &a.z, 1) + gfpSqr(z22, &b.z, 1) u1, u2 := &gfP{}, &gfP{} gfpMul(u1, &a.x, z22) @@ -123,10 +123,10 @@ func (c *curvePoint) Add(a, b *curvePoint) { h := &gfP{} gfpSub(h, u2, u1) - gfpAdd(t, h, h) + gfpDouble(t, h) // i = 4h² i := &gfP{} - gfpMul(i, t, t) + gfpSqr(i, t, 1) // j = 4h³ j := &gfP{} gfpMul(j, h, i) @@ -138,15 +138,15 @@ func (c *curvePoint) Add(a, b *curvePoint) { return } r := &gfP{} - gfpAdd(r, t, t) + gfpDouble(r, t) v := &gfP{} gfpMul(v, u1, i) // t4 = 4(s2-s1)² t4, t6 := &gfP{}, &gfP{} - gfpMul(t4, r, r) - gfpAdd(t, v, v) + gfpSqr(t4, r, 1) + gfpDouble(t, v) gfpSub(t6, t4, j) gfpSub(&c.x, t6, t) @@ -156,13 +156,13 @@ func (c *curvePoint) Add(a, b *curvePoint) { // y = - 2·s1·j - (s2-s1)(2x - 2i·u1) = r(v-x) - 2·s1·j gfpSub(t, v, &c.x) // t7 gfpMul(t4, s1, j) // t8 - gfpAdd(t6, t4, t4) // t9 + gfpDouble(t6, t4) // t9 gfpMul(t4, r, t) // t10 gfpSub(&c.y, t4, t6) // Set z = 2(u2-u1)·z1·z2 = 2h·z1·z2 gfpAdd(t, &a.z, &b.z) // t11 - gfpMul(t4, t, t) // t12 + gfpSqr(t4, t, 1) // t12 gfpSub(t, t4, z12) // t13 gfpSub(t4, t, z22) // t14 gfpMul(&c.z, t4, h) @@ -171,31 +171,31 @@ func (c *curvePoint) Add(a, b *curvePoint) { func (c *curvePoint) Double(a *curvePoint) { // See http://hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian-0/doubling/dbl-2009-l.op3 A, B, C := &gfP{}, &gfP{}, &gfP{} - gfpMul(A, &a.x, &a.x) - gfpMul(B, &a.y, &a.y) - gfpMul(C, B, B) + gfpSqr(A, &a.x, 1) + gfpSqr(B, &a.y, 1) + gfpSqr(C, B, 1) t, t2 := &gfP{}, &gfP{} gfpAdd(t, &a.x, B) - gfpMul(t2, t, t) + gfpSqr(t2, t, 1) gfpSub(t, t2, A) gfpSub(t2, t, C) d, e, f := &gfP{}, &gfP{}, &gfP{} gfpAdd(d, t2, t2) - gfpAdd(t, A, A) + gfpDouble(t, A) gfpAdd(e, t, A) - gfpMul(f, e, e) + gfpSqr(f, e, 1) - gfpAdd(t, d, d) + gfpDouble(t, d) gfpSub(&c.x, f, t) gfpMul(&c.z, &a.y, &a.z) - gfpAdd(&c.z, &c.z, &c.z) + gfpDouble(&c.z, &c.z) - gfpAdd(t, C, C) - gfpAdd(t2, t, t) - gfpAdd(t, t2, t2) + gfpDouble(t, C) + gfpDouble(t2, t) + gfpDouble(t, t2) gfpSub(&c.y, d, &c.x) gfpMul(t2, e, &c.y) gfpSub(&c.y, t2, t) @@ -232,7 +232,7 @@ func (c *curvePoint) MakeAffine() { t, zInv2 := &gfP{}, &gfP{} gfpMul(t, &c.y, zInv) - gfpMul(zInv2, zInv, zInv) + gfpSqr(zInv2, zInv, 1) gfpMul(&c.x, &c.x, zInv2) gfpMul(&c.y, t, zInv2) diff --git a/sm9/bn256/gfp12.go b/sm9/bn256/gfp12.go index cf3765fe..cd51aa67 100644 --- a/sm9/bn256/gfp12.go +++ b/sm9/bn256/gfp12.go @@ -258,11 +258,11 @@ func (e *gfP12) SpecialSquares(a *gfP12, n int) *gfP12 { ty.Triple(v1) tz.Triple(v2) - v0.Add(&a.x, &a.x) // (f12, f01) + v0.Double(&a.x) // (f12, f01) v0.y.Neg(&v0.y) - v1.Add(&a.y, &a.y) // (f02, f10) + v1.Double(&a.y) // (f02, f10) v1.x.Neg(&v1.x) - v2.Add(&a.z, &a.z) // (f11, f00) + v2.Double(&a.z) // (f11, f00) v2.y.Neg(&v2.y) v0.Add(ty, v0) @@ -285,11 +285,11 @@ func (e *gfP12) SpecialSquares(a *gfP12, n int) *gfP12 { ty.Triple(v1) tz.Triple(v2) - v0.Add(&in.x, &in.x) // (f12, f01) + v0.Double(&in.x) // (f12, f01) v0.y.Neg(&v0.y) - v1.Add(&in.y, &in.y) // (f02, f10) + v1.Double(&in.y) // (f02, f10) v1.x.Neg(&v1.x) - v2.Add(&in.z, &in.z) // (f11, f00) + v2.Double(&in.z) // (f11, f00) v2.y.Neg(&v2.y) v0.Add(ty, v0) @@ -321,11 +321,11 @@ func (e *gfP12) SpecialSquareNC(a *gfP12) *gfP12 { ty.Triple(v1) tz.Triple(v2) - v0.Add(&a.x, &a.x) // (f12, f01) + v0.Double(&a.x) // (f12, f01) v0.y.Neg(&v0.y) - v1.Add(&a.y, &a.y) // (f02, f10) + v1.Double(&a.y) // (f02, f10) v1.x.Neg(&v1.x) - v2.Add(&a.z, &a.z) // (f11, f00) + v2.Double(&a.z) // (f11, f00) v2.y.Neg(&v2.y) v0.Add(ty, v0) diff --git a/sm9/bn256/gfp2.go b/sm9/bn256/gfp2.go index 90b3865b..0c46e5b0 100644 --- a/sm9/bn256/gfp2.go +++ b/sm9/bn256/gfp2.go @@ -75,13 +75,13 @@ func (e *gfP2) IsOne() bool { func (e *gfP2) Conjugate(a *gfP2) *gfP2 { e.y.Set(&a.y) - gfpSub(&e.x, zero, &a.x) + gfpNeg(&e.x, &a.x) return e } func (e *gfP2) Neg(a *gfP2) *gfP2 { - gfpSub(&e.x, zero, &a.x) - gfpSub(&e.y, zero, &a.y) + gfpNeg(&e.x, &a.x) + gfpNeg(&e.y, &a.y) return e } @@ -98,17 +98,14 @@ func (e *gfP2) Sub(a, b *gfP2) *gfP2 { } func (e *gfP2) Double(a *gfP2) *gfP2 { - gfpAdd(&e.x, &a.x, &a.x) - gfpAdd(&e.y, &a.y, &a.y) + gfpDouble(&e.x, &a.x) + gfpDouble(&e.y, &a.y) return e } func (e *gfP2) Triple(a *gfP2) *gfP2 { - gfpAdd(&e.x, &a.x, &a.x) - gfpAdd(&e.y, &a.y, &a.y) - - gfpAdd(&e.x, &e.x, &a.x) - gfpAdd(&e.y, &e.y, &a.y) + gfpTriple(&e.x, &a.x) + gfpTriple(&e.y, &a.y) return e } @@ -172,8 +169,8 @@ func (e *gfP2) MulUNC(a, b *gfP2) *gfP2 { gfpMul(ty, tx, ty) gfpSub(ty, ty, v0) gfpSub(ty, ty, v1) - gfpAdd(ty, ty, ty) - gfpSub(ty, zero, ty) + gfpDouble(ty, ty) + gfpNeg(ty, ty) gfpSub(tx, v0, v1) gfpSub(tx, tx, v1) @@ -187,8 +184,8 @@ func (e *gfP2) MulUNC(a, b *gfP2) *gfP2 { // c0 = -2a1 func (e *gfP2) MulU1(a *gfP2) *gfP2 { t := &gfP{} - gfpAdd(t, &a.x, &a.x) - gfpSub(t, zero, t) + gfpDouble(t, &a.x) + gfpNeg(t, t) gfpCopy(&e.x, &a.y) gfpCopy(&e.y, t) @@ -212,12 +209,12 @@ func (e *gfP2) SquareNC(a *gfP2) *gfP2 { ty := &e.y gfpAdd(ty, &a.x, &a.y) - gfpAdd(tx, &a.x, &a.x) + gfpDouble(tx, &a.x) gfpSub(tx, &a.y, tx) gfpMul(ty, tx, ty) gfpMul(tx, &a.x, &a.y) gfpAdd(ty, tx, ty) - gfpAdd(tx, tx, tx) + gfpDouble(tx, tx) return e } @@ -240,14 +237,14 @@ func (e *gfP2) SquareUNC(a *gfP2) *gfP2 { ty := &e.y gfpAdd(tx, &a.x, &a.y) - gfpAdd(ty, &a.x, &a.x) + gfpDouble(ty, &a.x) gfpSub(ty, &a.y, ty) gfpMul(tx, tx, ty) gfpMul(ty, &a.x, &a.y) gfpAdd(tx, tx, ty) - gfpAdd(ty, ty, ty) - gfpAdd(ty, ty, ty) - gfpSub(ty, zero, ty) + gfpDouble(ty, ty) + gfpDouble(ty, ty) + gfpNeg(ty, ty) return e } @@ -263,14 +260,14 @@ func (e *gfP2) Invert(a *gfP2) *gfP2 { // ftp://136.206.11.249/pub/crypto/pairings.pdf t1, t2, t3 := &gfP{}, &gfP{}, &gfP{} gfpSqr(t1, &a.x, 1) - gfpAdd(t3, t1, t1) + gfpDouble(t3, t1) gfpSqr(t2, &a.y, 1) gfpAdd(t3, t3, t2) inv := &gfP{} inv.Invert(t3) // inv = (2 * a.x ^ 2 + a.y ^ 2) ^ (-1) - gfpSub(t1, zero, &a.x) + gfpNeg(t1, &a.x) gfpMul(&e.x, t1, inv) // x = - a.x * inv gfpMul(&e.y, &a.y, inv) // y = a.y * inv diff --git a/sm9/bn256/gfp2_test.go b/sm9/bn256/gfp2_test.go index 92d17bb5..10ad6a6b 100644 --- a/sm9/bn256/gfp2_test.go +++ b/sm9/bn256/gfp2_test.go @@ -152,10 +152,11 @@ func BenchmarkGfP2MulU(b *testing.B) { *fromBigInt(bigFromHex("17509B092E845C1266BA0D262CBEE6ED0736A96FA347C8BD856DC76B84EBEB96")), *fromBigInt(bigFromHex("A7CF28D519BE3DA65F3170153D278FF247EFBA98A71A08116215BBA5C999A7C7")), } + + t := &gfP2{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { - t := &gfP2{} t.MulU(x, y) } } @@ -184,6 +185,32 @@ func BenchmarkGfP2SquareU(b *testing.B) { } } +func BenchmarkGfP2Neg(b *testing.B) { + x := &gfP2{ + *fromBigInt(bigFromHex("85AEF3D078640C98597B6027B441A01FF1DD2C190F5E93C454806C11D8806141")), + *fromBigInt(bigFromHex("3722755292130B08D2AAB97FD34EC120EE265948D19C17ABF9B7213BAF82D65B")), + } + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + gfpNeg(&x.x, &x.x) + gfpNeg(&x.y, &x.y) + } +} + +func BenchmarkGfP2Neg2(b *testing.B) { + x := &gfP2{ + *fromBigInt(bigFromHex("85AEF3D078640C98597B6027B441A01FF1DD2C190F5E93C454806C11D8806141")), + *fromBigInt(bigFromHex("3722755292130B08D2AAB97FD34EC120EE265948D19C17ABF9B7213BAF82D65B")), + } + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + gfpSub(&x.x, zero, &x.x) + gfpSub(&x.y, zero, &x.y) + } +} + /* func Test_gfP2QuadraticResidue(t *testing.T) { x := &gfP2{ diff --git a/sm9/bn256/gfp4.go b/sm9/bn256/gfp4.go index 4f4b828a..eb9bc0de 100644 --- a/sm9/bn256/gfp4.go +++ b/sm9/bn256/gfp4.go @@ -73,6 +73,12 @@ func (e *gfP4) Add(a, b *gfP4) *gfP4 { return e } +func (e *gfP4) Double(a *gfP4) *gfP4 { + e.x.Double(&a.x) + e.y.Double(&a.y) + return e +} + func (e *gfP4) Triple(a *gfP4) *gfP4 { e.x.Triple(&a.x) e.y.Triple(&a.y) diff --git a/sm9/bn256/gfp_amd64.s b/sm9/bn256/gfp_amd64.s index 5fdc2250..8acdc305 100644 --- a/sm9/bn256/gfp_amd64.s +++ b/sm9/bn256/gfp_amd64.s @@ -104,6 +104,51 @@ TEXT ·gfpAdd(SB),0,$0-24 storeBlock(R8,R9,R10,R11, 0(DI)) RET +TEXT ·gfpDouble(SB),0,$0-16 + MOVQ a+0(FP), DI + MOVQ b+8(FP), SI + + loadBlock(0(SI), R8,R9,R10,R11) + XORQ R12, R12 + + ADDQ R8, R8 + ADCQ R9, R9 + ADCQ R10, R10 + ADCQ R11, R11 + ADCQ $0, R12 + + gfpCarry(R8,R9,R10,R11, R13,R14,CX,AX,R12) + + storeBlock(R8,R9,R10,R11, 0(DI)) + RET + +TEXT ·gfpTriple(SB),0,$0-16 + MOVQ a+0(FP), DI + MOVQ b+8(FP), SI + + loadBlock(0(SI), R8,R9,R10,R11) + XORQ R12, R12 + + ADDQ R8, R8 + ADCQ R9, R9 + ADCQ R10, R10 + ADCQ R11, R11 + ADCQ $0, R12 + + gfpCarry(R8,R9,R10,R11, R13,R14,CX,AX,R12) + + XORQ R12, R12 + ADDQ 0(SI), R8 + ADCQ 8(SI), R9 + ADCQ 16(SI), R10 + ADCQ 24(SI), R11 + ADCQ $0, R12 + + gfpCarry(R8,R9,R10,R11, R13,R14,CX,AX,R12) + + storeBlock(R8,R9,R10,R11, 0(DI)) + RET + TEXT ·gfpSub(SB),0,$0-24 MOVQ a+8(FP), DI MOVQ b+16(FP), SI diff --git a/sm9/bn256/gfp_arm64.s b/sm9/bn256/gfp_arm64.s index dc7f23b4..96b8b25c 100644 --- a/sm9/bn256/gfp_arm64.s +++ b/sm9/bn256/gfp_arm64.s @@ -109,6 +109,83 @@ TEXT ·gfpAdd(SB),0,$0-24 storeBlock(R1,R2,R3,R4, 0(R0)) RET +TEXT ·gfpDouble(SB),0,$0-16 + MOVD a+8(FP), R0 + loadBlock(0(R0), R1,R2,R3,R4) + loadModulus(R9,R10,R11,R12) + MOVD ZR, R0 + + ADDS R1, R1 + ADCS R2, R2 + ADCS R3, R3 + ADCS R4, R4 + ADCS ZR, R0 + + SUBS R9, R1, R5 + SBCS R10, R2, R6 + SBCS R11, R3, R7 + SBCS R12, R4, R8 + SBCS ZR, R0, R0 + + CSEL CS, R5, R1, R1 + CSEL CS, R6, R2, R2 + CSEL CS, R7, R3, R3 + CSEL CS, R8, R4, R4 + + MOVD c+0(FP), R0 + storeBlock(R1,R2,R3,R4, 0(R0)) + RET + +TEXT ·gfpTriple(SB),0,$0-16 + MOVD a+8(FP), R0 + loadBlock(0(R0), R1,R2,R3,R4) + MOVD R1, R19 + MOVD R2, R20 + MOVD R3, R21 + MOVD R4, R22 + loadModulus(R9,R10,R11,R12) + MOVD ZR, R0 + + ADDS R1, R1 + ADCS R2, R2 + ADCS R3, R3 + ADCS R4, R4 + ADCS ZR, R0 + + SUBS R9, R1, R5 + SBCS R10, R2, R6 + SBCS R11, R3, R7 + SBCS R12, R4, R8 + SBCS ZR, R0, R0 + + CSEL CS, R5, R1, R1 + CSEL CS, R6, R2, R2 + CSEL CS, R7, R3, R3 + CSEL CS, R8, R4, R4 + + MOVD ZR, R0 + + ADDS R19, R1 + ADCS R20, R2 + ADCS R21, R3 + ADCS R22, R4 + ADCS ZR, R0 + + SUBS R9, R1, R5 + SBCS R10, R2, R6 + SBCS R11, R3, R7 + SBCS R12, R4, R8 + SBCS ZR, R0, R0 + + CSEL CS, R5, R1, R1 + CSEL CS, R6, R2, R2 + CSEL CS, R7, R3, R3 + CSEL CS, R8, R4, R4 + + MOVD c+0(FP), R0 + storeBlock(R1,R2,R3,R4, 0(R0)) + RET + TEXT ·gfpSub(SB),0,$0-24 MOVD a+8(FP), R0 loadBlock(0(R0), R1,R2,R3,R4) diff --git a/sm9/bn256/gfp_decl.go b/sm9/bn256/gfp_decl.go index 1e2ff8fe..589bd3f6 100644 --- a/sm9/bn256/gfp_decl.go +++ b/sm9/bn256/gfp_decl.go @@ -18,7 +18,7 @@ var supportADX = cpu.X86.HasADX && cpu.X86.HasBMI2 // Set c = p - a, if c == p, then c = 0 // It seems this function's performance is worse than gfpSub with zero. // -// go:noescape +//go:noescape func gfpNeg(c, a *gfP) // Set c = a + b, if c >= p, then c = c - p @@ -26,6 +26,16 @@ func gfpNeg(c, a *gfP) //go:noescape func gfpAdd(c, a, b *gfP) +// Set c = a + a +// +//go:noescape +func gfpDouble(c, a *gfP) + +// Set c = a + a + a +// +//go:noescape +func gfpTriple(c, a *gfP) + // Set c = a - b, if c is negative, then c = c + p // //go:noescape diff --git a/sm9/bn256/gfp_generic.go b/sm9/bn256/gfp_generic.go index f62ed09e..c6c2f8a3 100644 --- a/sm9/bn256/gfp_generic.go +++ b/sm9/bn256/gfp_generic.go @@ -42,6 +42,15 @@ func gfpAdd(c, a, b *gfP) { gfpCarry(c, carry) } +func gfpDouble(c, a *gfP) { + gfpAdd(c, a, a) +} + +func gfpTriple(c, a *gfP) { + gfpAdd(c, a, a) + gfpAdd(c, c, a) +} + func gfpSub(c, a, b *gfP) { t := &gfP{} diff --git a/sm9/bn256/gfp_test.go b/sm9/bn256/gfp_test.go index 5c9eb73f..016e2ab0 100644 --- a/sm9/bn256/gfp_test.go +++ b/sm9/bn256/gfp_test.go @@ -225,3 +225,64 @@ func BenchmarkGfPSqr(b *testing.B) { gfpSqr(ret, x, 1) } } + +func BenchmarkGfPTriple(b *testing.B) { + x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596")) + b.ReportAllocs() + b.ResetTimer() + ret := &gfP{} + for i := 0; i < b.N; i++ { + gfpTriple(ret, x) + } +} + +func BenchmarkGfPTriple2(b *testing.B) { + x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596")) + b.ReportAllocs() + b.ResetTimer() + ret := &gfP{} + for i := 0; i < b.N; i++ { + gfpAdd(ret, x, x) + gfpAdd(ret, ret, x) + } +} + +func BenchmarkGfPDouble(b *testing.B) { + x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596")) + b.ReportAllocs() + b.ResetTimer() + ret := &gfP{} + for i := 0; i < b.N; i++ { + gfpDouble(ret, x) + } +} + +func BenchmarkGfPDouble2(b *testing.B) { + x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596")) + b.ReportAllocs() + b.ResetTimer() + ret := &gfP{} + for i := 0; i < b.N; i++ { + gfpAdd(ret, x, x) + } +} + +func BenchmarkGfPNeg(b *testing.B) { + x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596")) + b.ReportAllocs() + b.ResetTimer() + ret := &gfP{} + for i := 0; i < b.N; i++ { + gfpNeg(ret, x) + } +} + +func BenchmarkGfPNeg2(b *testing.B) { + x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596")) + b.ReportAllocs() + b.ResetTimer() + ret := &gfP{} + for i := 0; i < b.N; i++ { + gfpSub(ret, zero, x) + } +} diff --git a/sm9/bn256/twist.go b/sm9/bn256/twist.go index 0c93f86e..1f760ade 100644 --- a/sm9/bn256/twist.go +++ b/sm9/bn256/twist.go @@ -114,7 +114,7 @@ func (c *twistPoint) Add(a, b *twistPoint) { h := (&gfP2{}).Sub(u2, u1) xEqual := h.IsZero() - t.Add(h, h) + t.Double(h) i := (&gfP2{}).SquareNC(t) j := (&gfP2{}).MulNC(h, i) @@ -124,18 +124,18 @@ func (c *twistPoint) Add(a, b *twistPoint) { c.Double(a) return } - r := (&gfP2{}).Add(t, t) + r := (&gfP2{}).Double(t) v := (&gfP2{}).MulNC(u1, i) t4 := (&gfP2{}).SquareNC(r) - t.Add(v, v) + t.Double(v) t6 := (&gfP2{}).Sub(t4, j) c.x.Sub(t6, t) t.Sub(v, &c.x) // t7 t4.Mul(s1, j) // t8 - t6.Add(t4, t4) // t9 + t6.Double(t4) // t9 t4.Mul(r, t) // t10 c.y.Sub(t4, t6) @@ -156,20 +156,20 @@ func (c *twistPoint) Double(a *twistPoint) { t2 := (&gfP2{}).SquareNC(t) t.Sub(t2, A) t2.Sub(t, C) - d := (&gfP2{}).Add(t2, t2) - t.Add(A, A) + d := (&gfP2{}).Double(t2) + t.Double(A) e := (&gfP2{}).Add(t, A) f := (&gfP2{}).SquareNC(e) - t.Add(d, d) + t.Double(d) c.x.Sub(f, t) c.z.Mul(&a.y, &a.z) - c.z.Add(&c.z, &c.z) + c.z.Double(&c.z) - t.Add(C, C) - t2.Add(t, t) - t.Add(t2, t2) + t.Double(C) + t2.Double(t) + t.Double(t2) c.y.Sub(d, &c.x) t2.Mul(e, &c.y) c.y.Sub(t2, t)