From 5190113a0bd4e30ae76d53176df05ecaa396af93 Mon Sep 17 00:00:00 2001 From: Tuomo Kriikkula Date: Thu, 8 Aug 2024 22:56:18 +0300 Subject: [PATCH] Preliminary benchmarking drafts --- Classes/FCryptoBigInt.uc | 64 ++++++++++++++++++++ Classes/FCryptoMacros.uci | 3 + Classes/FCryptoQWORD.uc | 13 +++- Classes/FCryptoTestMutator.uc | 108 ++++++++++++++++++++++++++++++++++ 4 files changed, 187 insertions(+), 1 deletion(-) diff --git a/Classes/FCryptoBigInt.uc b/Classes/FCryptoBigInt.uc index 172af07..7fb53c5 100644 --- a/Classes/FCryptoBigInt.uc +++ b/Classes/FCryptoBigInt.uc @@ -548,6 +548,30 @@ static final function int BitLength( return (Twk << 4) + BIT_LENGTH(Tw); } +static final function int BitLength_NonConst( + array X, + int XLen +) +{ + local int Tw; + local int Twk; + local int W; + local int C; + + Tw = 0; + Twk = 0; + + while (XLen-- > 0) + { + C = EQ(Tw, 0); + W = X[XLen]; + Tw = MUX(C, W, Tw); + Twk = MUX(C, XLen, Twk); + } + + return (Twk << 4) + BIT_LENGTH(Tw); +} + /* * Decode an integer from its big-endian unsigned representation. The * integer MUST be lower than m[]; the announced bit length written in @@ -725,6 +749,46 @@ static final function Decode( X[0] = BitLength(XArr, V - 1); } +static final function Decode_NonConst( + out array X, + array Src, + int Len +) +{ + local int V; + local int Acc; + local int AccLen; + local int B; + local array XArr; + + V = 1; + Acc = 0; + AccLen = 0; + while (Len-- > 0) + { + B = Src[Len]; + // Acc = Acc | ((B << AccLen) & 0xFFFF); // @ALIGN-32-16. + Acc = Acc | (B << AccLen); + AccLen += 8; + if (AccLen >= 15) + { + X[V++] = Acc & 0x7FFF; + AccLen -= 15; + Acc = Acc >>> 15; + } + } + if (AccLen != 0) + { + X[V++] = Acc & 0xFFFF; // @ALIGN-32-16. + } + + // X[0] = BitLength(X + 1, V - 1); + // TODO: is there a faster way of doing this in UScript? + XArr = X; + XArr.Remove(0, 1); + X[0] = BitLength_NonConst(XArr, V - 1); +} + /* * Decode an integer from its big-endian unsigned representation, and * reduce it modulo the provided modulus m[]. The announced bit length diff --git a/Classes/FCryptoMacros.uci b/Classes/FCryptoMacros.uci index e65d3a1..91deaca 100644 --- a/Classes/FCryptoMacros.uci +++ b/Classes/FCryptoMacros.uci @@ -1,6 +1,9 @@ // General development debugging. `define FCDEBUG 1 +// Define benchmarking functions, data and run the test suite. +`define FCBENCHMARK 1 + // Monty specific extra debugging. // `define FCDEBUG_MONTY 1 diff --git a/Classes/FCryptoQWORD.uc b/Classes/FCryptoQWORD.uc index 763bb93..e02f34d 100644 --- a/Classes/FCryptoQWORD.uc +++ b/Classes/FCryptoQWORD.uc @@ -34,7 +34,18 @@ struct FCQWORD }; // Return A > B. -final static function bool IsGt(FCQWORD A, FCQWORD B) +final static function bool IsGt(const out FCQWORD A, const out FCQWORD B) +{ + if (IsGt_AsUInt32(A.A, B.A)) + { + return True; + } + + return IsGt_AsUInt32(A.B, B.B); +} + +// For benchmarking. +final static function bool IsGt_NonConst(FCQWORD A, FCQWORD B) { if (IsGt_AsUInt32(A.A, B.A)) { diff --git a/Classes/FCryptoTestMutator.uc b/Classes/FCryptoTestMutator.uc index fe31fb8..211bef2 100644 --- a/Classes/FCryptoTestMutator.uc +++ b/Classes/FCryptoTestMutator.uc @@ -1060,11 +1060,23 @@ private final simulated function int TestMath() local int Remainder; local int HardCodedMontyFail; local int MontyDecodeResult; + local int BenchmarkRound; local string BigIntString; local int Dummy; local FCQWORD QW; + local FCQWORD QW1; + local FCQWORD QW2; + local FCQWORD QW3; + local FCQWORD QW4; + local FCQWORD QW5; + local FCQWORD QW6; + local FCQWORD QW7; + local FCQWORD QW8; + local FCQWORD QW9; local bool bQWCarry; + local float QWClock; + local int QWIdx; // TODO: Design for FCQWORD arithmetic. Dummy = 0xFFFFFFFF; @@ -1109,6 +1121,70 @@ private final simulated function int TestMath() `fclog("0x7FFFFFFF > 0xFFFFFFFF :" @ IsGt(0x7FFFFFFF, 0xFFFFFFFF)); `fclog("0xFFFFFFFF > 0x7FFFFFFF :" @ IsGt(0xFFFFFFFF, 0x7FFFFFFF)); + QW1.A = 0x00000000; + QW1.B = 0xFFFFFFFF; + QW2.A = 0xFFFFFFFF; + QW2.B = 0xFFFFFFFF; + QW3.A = 0x00000000; + QW3.B = 0x00000000; + QW4.A = 0x00000000; + QW4.B = 0xFFFFFFFF; + QW5.A = 0x00000002; + QW5.B = 0xFFFFFFFF; + QW6.A = 0x00000000; + QW6.B = 0x7FFAFFFF; + QW7.A = 0x00000000; + QW7.B = 0x00000002; + QW8.A = 0x00000000; + QW8.B = 0x00000001; + QW9.A = 0x7FFFFFFF; + QW9.B = 0x7FFFFFFF; + + Clock(QWClock); + for (QWIdx = 0; QWIdx < 1024; ++QWIdx) + { + class'FCryptoQWORD'.static.IsGt(QW1, QW1); + class'FCryptoQWORD'.static.IsGt(QW1, QW2); + class'FCryptoQWORD'.static.IsGt(QW2, QW3); + class'FCryptoQWORD'.static.IsGt(QW3, QW4); + class'FCryptoQWORD'.static.IsGt(QW4, QW5); + class'FCryptoQWORD'.static.IsGt(QW5, QW6); + class'FCryptoQWORD'.static.IsGt(QW7, QW8); + class'FCryptoQWORD'.static.IsGt(QW8, QW9); + class'FCryptoQWORD'.static.IsGt(QW9, QW1); + class'FCryptoQWORD'.static.IsGt(QW2, QW5); + class'FCryptoQWORD'.static.IsGt(QW7, QW9); + class'FCryptoQWORD'.static.IsGt(QW1, QW5); + class'FCryptoQWORD'.static.IsGt(QW1, QW4); + class'FCryptoQWORD'.static.IsGt(QW4, QW9); + class'FCryptoQWORD'.static.IsGt(QW4, QW8); + } + UnClock(QWClock); + `fclog("QWClock (reference)=" $ QWClock); + + QWClock = 0; + Clock(QWClock); + for (QWIdx = 0; QWIdx < 1024; ++QWIdx) + { + class'FCryptoQWORD'.static.IsGt_NonConst(QW1, QW1); + class'FCryptoQWORD'.static.IsGt_NonConst(QW1, QW2); + class'FCryptoQWORD'.static.IsGt_NonConst(QW2, QW3); + class'FCryptoQWORD'.static.IsGt_NonConst(QW3, QW4); + class'FCryptoQWORD'.static.IsGt_NonConst(QW4, QW5); + class'FCryptoQWORD'.static.IsGt_NonConst(QW5, QW6); + class'FCryptoQWORD'.static.IsGt_NonConst(QW7, QW8); + class'FCryptoQWORD'.static.IsGt_NonConst(QW8, QW9); + class'FCryptoQWORD'.static.IsGt_NonConst(QW9, QW1); + class'FCryptoQWORD'.static.IsGt_NonConst(QW2, QW5); + class'FCryptoQWORD'.static.IsGt_NonConst(QW7, QW9); + class'FCryptoQWORD'.static.IsGt_NonConst(QW1, QW5); + class'FCryptoQWORD'.static.IsGt_NonConst(QW1, QW4); + class'FCryptoQWORD'.static.IsGt_NonConst(QW4, QW9); + class'FCryptoQWORD'.static.IsGt_NonConst(QW4, QW8); + } + UnClock(QWClock); + `fclog("QWClock (copy)=" $ QWClock); + // BearSSL assumes all operands caller-allocated. // We'll do some bare minimum allocations here to avoid issues. // TODO: does UScript dynamic array allocation break CT guarantees? @@ -1153,6 +1229,38 @@ private final simulated function int TestMath() ); X.Length = 0; + // ----------------------------------------------------------------------- + // ----------------------------------------------------------------------- + // TODO: dedicated benchmarking test suite. + QWClock = 0; + Clock(QWClock); + for (BenchmarkRound = 0; BenchmarkRound < 10000; ++BenchmarkRound) + { + class'FCryptoBigInt'.static.Decode( + X, + Bytes_683384335291162482276352519, + Bytes_683384335291162482276352519.Length + ); + } + UnClock(QWClock); + `fclog("QWClock (decode1)=" $ QWClock); + + QWClock = 0; + Clock(QWClock); + for (BenchmarkRound = 0; BenchmarkRound < 10000; ++BenchmarkRound) + { + class'FCryptoBigInt'.static.Decode_NonConst( + X, + Bytes_683384335291162482276352519, + Bytes_683384335291162482276352519.Length + ); + } + UnClock(QWClock); + `fclog("QWClock (decode2)=" $ QWClock); + + // ----------------------------------------------------------------------- + // ----------------------------------------------------------------------- + `fcdebug("check decode Bytes_683384335291162482276352519"); class'FCryptoBigInt'.static.Decode( X,