-
Notifications
You must be signed in to change notification settings - Fork 147
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
357 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,281 @@ | ||
// k12 implements the KangarooTwelve XOF. | ||
package k12 | ||
|
||
import ( | ||
"encoding/binary" | ||
|
||
"github.com/cloudflare/circl/internal/sha3" | ||
"github.com/cloudflare/circl/simd/keccakf1600" | ||
) | ||
|
||
const chunkSize = 8192 | ||
|
||
type State struct { | ||
initialTodo int | ||
stalk sha3.State | ||
|
||
context []byte | ||
|
||
buf []byte // chunkSize * lanes | ||
offset int // offset in buf | ||
lanes uint8 // number of TurboSHAKE128s to compute in parallel | ||
chunk uint // number of current chunk being absorbed | ||
} | ||
|
||
// Create a new instance of Kangaroo12 draft version -10 | ||
func NewDraft10(c []byte) State { | ||
return State{ | ||
initialTodo: chunkSize, | ||
stalk: sha3.NewTurboShake128(0x07), | ||
context: c, | ||
} | ||
} | ||
|
||
func (s *State) Reset() { | ||
s.initialTodo = chunkSize | ||
s.stalk.Reset() | ||
s.stalk.SwitchDS(0x07) | ||
s.buf = nil | ||
s.offset = 0 | ||
s.chunk = 0 | ||
} | ||
|
||
func Draft10Sum(hash []byte, msg []byte, c []byte) { | ||
s := NewDraft10(c) | ||
_, _ = s.Write(msg) | ||
_, _ = s.Read(hash) | ||
} | ||
|
||
func (s *State) Write(p []byte) (int, error) { | ||
written := len(p) | ||
|
||
// The first chunk is written directly to the stalk. | ||
if s.initialTodo > 0 { | ||
taken := s.initialTodo | ||
if len(p) < taken { | ||
taken = len(p) | ||
} | ||
headP := p[:taken] | ||
_, _ = s.stalk.Write(headP) | ||
s.initialTodo -= taken | ||
p = p[taken:] | ||
} | ||
|
||
if len(p) == 0 { | ||
return written, nil | ||
} | ||
|
||
// If this is the first bit of data written after the initial chunk, | ||
// we're out of the fast-path and allocate some buffers. | ||
if s.buf == nil { | ||
// TODO optimise the X1 case | ||
if keccakf1600.IsEnabledX4() { | ||
s.lanes = 4 | ||
} else { | ||
s.lanes = 2 | ||
} | ||
|
||
s.buf = make([]byte, int(s.lanes)*chunkSize) | ||
_, _ = s.stalk.Write([]byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}) | ||
s.stalk.SwitchDS(0x06) | ||
s.chunk++ | ||
} | ||
|
||
// If we can't fill all our lanes or the buffer isn't empty, we write the | ||
// data to the buffer. | ||
if s.offset != 0 || len(p) < len(s.buf) { | ||
to := len(s.buf) - s.offset | ||
if len(p) < to { | ||
to = len(p) | ||
} | ||
p2 := p[:to] | ||
p = p[to:] | ||
copy(s.buf[s.offset:], p2) | ||
s.offset += to | ||
} | ||
|
||
// Absorb the buffer if we filled it | ||
if s.offset == len(s.buf) { | ||
s.writeX(s.buf) | ||
s.offset = 0 | ||
} | ||
|
||
// Note that at this point we may assume that s.offset = 0 if len(p) != 0 | ||
if len(p) != 0 && s.offset != 0 { | ||
panic("shouldn't happen") | ||
} | ||
|
||
// Absorb a bunch of chunks at the same time. | ||
if len(p) >= int(s.lanes)*chunkSize { | ||
p = s.writeX(p) | ||
} | ||
|
||
// Put the remainder in the buffer. | ||
if len(p) > 0 { | ||
copy(s.buf, p) | ||
s.offset = len(p) | ||
} | ||
|
||
return written, nil | ||
} | ||
|
||
// Absorb a multiple of a multiple of lanes * chunkSize. | ||
// Returns the remainder. | ||
func (s *State) writeX(p []byte) []byte { | ||
switch s.lanes { | ||
case 4: | ||
return s.writeX4(p) | ||
default: | ||
return s.writeX2(p) | ||
} | ||
} | ||
|
||
func (s *State) writeX4(p []byte) []byte { | ||
for len(p) >= 4*chunkSize { | ||
var x4 keccakf1600.StateX4 | ||
a := x4.Initialize(true) | ||
|
||
for offset := 0; offset < 48*168; offset += 168 { | ||
for l := 0; l < 4; l++ { | ||
for i := 0; i < 21; i++ { | ||
a[i*4+l] ^= binary.LittleEndian.Uint64( | ||
p[chunkSize*l+8*i+offset:], | ||
) | ||
} | ||
} | ||
|
||
x4.Permute() | ||
} | ||
|
||
for l := 0; l < 4; l++ { | ||
for i := 0; i < 16; i++ { | ||
a[i*4+l] ^= binary.LittleEndian.Uint64( | ||
p[chunkSize*l+8*i+48*168:], | ||
) | ||
} | ||
|
||
a[16*4+l] ^= 0x0b | ||
a[20*4+l] ^= 0x80 << 56 | ||
} | ||
|
||
x4.Permute() | ||
|
||
var buf [32 * 4]byte | ||
for i := 0; i < 4; i++ { | ||
for l := 0; l < 4; l++ { | ||
binary.LittleEndian.PutUint64(buf[32*l+8*i:], a[4*i+l]) | ||
} | ||
} | ||
|
||
_, _ = s.stalk.Write(buf[:]) | ||
p = p[chunkSize*4:] | ||
s.chunk += 4 | ||
} | ||
|
||
return p | ||
} | ||
|
||
func (s *State) writeX2(p []byte) []byte { | ||
// TODO On M2 Pro, 1/3 of the time is spent on this function | ||
// and LittleEndian.Uint64 excluding the actual permutation. | ||
// Rewriting in assembler might be worthwhile. | ||
for len(p) >= 2*chunkSize { | ||
var x2 keccakf1600.StateX2 | ||
a := x2.Initialize(true) | ||
|
||
for offset := 0; offset < 48*168; offset += 168 { | ||
for i := 0; i < 21; i++ { | ||
a[i*2] ^= binary.LittleEndian.Uint64( | ||
p[8*i+offset:], | ||
) | ||
a[i*2+1] ^= binary.LittleEndian.Uint64( | ||
p[chunkSize+8*i+offset:], | ||
) | ||
} | ||
|
||
x2.Permute() | ||
} | ||
|
||
for i := 0; i < 16; i++ { | ||
a[i*2] ^= binary.LittleEndian.Uint64( | ||
p[8*i+48*168:], | ||
) | ||
a[i*2+1] ^= binary.LittleEndian.Uint64( | ||
p[chunkSize+8*i+48*168:], | ||
) | ||
} | ||
|
||
a[16*2] ^= 0x0b | ||
a[16*2+1] ^= 0x0b | ||
a[20*2] ^= 0x80 << 56 | ||
a[20*2+1] ^= 0x80 << 56 | ||
|
||
x2.Permute() | ||
|
||
var buf [32 * 2]byte | ||
for i := 0; i < 4; i++ { | ||
binary.LittleEndian.PutUint64(buf[8*i:], a[2*i]) | ||
binary.LittleEndian.PutUint64(buf[32+8*i:], a[2*i+1]) | ||
} | ||
|
||
_, _ = s.stalk.Write(buf[:]) | ||
p = p[chunkSize*2:] | ||
s.chunk += 2 | ||
} | ||
|
||
return p | ||
} | ||
|
||
func (s *State) Read(p []byte) (int, error) { | ||
if s.stalk.Absorbing() { | ||
// Write context string C | ||
_, _ = s.Write(s.context) | ||
|
||
// Write length_encode( |C| ) | ||
var buf [9]byte | ||
binary.BigEndian.PutUint64(buf[:8], uint64(len(s.context))) | ||
|
||
// Find first non-zero digit in big endian encoding of context length | ||
i := 0 | ||
for buf[i] == 0 && i < 8 { | ||
i++ | ||
} | ||
|
||
buf[8] = byte(8 - i) // number of non-zero bytes in |C| | ||
_, _ = s.Write(buf[i:]) | ||
|
||
// Flush s.buf if not empty | ||
if s.buf != nil { | ||
remainingBuf := s.buf[:s.offset] | ||
for len(remainingBuf) > 0 { | ||
s.chunk++ | ||
var cv [32]byte | ||
h := sha3.NewTurboShake128(0x0B) | ||
to := chunkSize | ||
if len(remainingBuf) < to { | ||
s.chunk-- | ||
to = len(remainingBuf) | ||
} | ||
_, _ = h.Write(remainingBuf[:to]) | ||
_, _ = h.Read(cv[:]) | ||
_, _ = s.stalk.Write(cv[:]) | ||
remainingBuf = remainingBuf[to:] | ||
} | ||
|
||
// Write length_encode( chunk ) | ||
binary.BigEndian.PutUint64(buf[:8], uint64(s.chunk)) | ||
|
||
// Find first non-zero digit in big endian encoding of number of chunks | ||
i = 0 | ||
for buf[i] == 0 && i < 8 { | ||
i++ | ||
} | ||
|
||
buf[8] = byte(8 - i) // number of non-zero bytes in chunk | ||
_, _ = s.stalk.Write(buf[i:]) | ||
_, _ = s.stalk.Write([]byte{0xff, 0xff}) | ||
} | ||
} | ||
|
||
return s.stalk.Read(p) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
package k12 | ||
|
||
import ( | ||
"encoding/hex" | ||
"testing" | ||
) | ||
|
||
// See draft-irtf-cfrg-kangarootwelve-10 §4. | ||
func ptn(n int) []byte { | ||
buf := make([]byte, n) | ||
for i := 0; i < n; i++ { | ||
buf[i] = byte(i % 0xfb) | ||
} | ||
return buf | ||
} | ||
|
||
func k12hex(t *testing.T, msg []byte, c []byte, l int, want string) { | ||
buf := make([]byte, l) | ||
Draft10Sum(buf, msg, c) | ||
got := hex.EncodeToString(buf) | ||
if want != got { | ||
t.Fatalf("%s != %s", want, got) | ||
} | ||
} | ||
|
||
func TestK12(t *testing.T) { | ||
k12hex(t, []byte{}, []byte{}, 31, "1ac2d450fc3b4205d19da7bfca1b37513c0803577ac7167f06fe2ce1f0ef39") | ||
i := 17 | ||
k12hex(t, ptn(i), []byte{}, 32, "6bf75fa2239198db4772e36478f8e19b0f371205f6a9a93a273f51df37122888") | ||
i *= 17 | ||
k12hex(t, ptn(i), []byte{}, 32, "0c315ebcdedbf61426de7dcf8fb725d1e74675d7f5327a5067f367b108ecb67c") | ||
i *= 17 | ||
k12hex(t, ptn(i), []byte{}, 32, "cb552e2ec77d9910701d578b457ddf772c12e322e4ee7fe417f92c758f0d59d0") | ||
i *= 17 | ||
k12hex(t, ptn(i), []byte{}, 32, "8701045e22205345ff4dda05555cbb5c3af1a771c2b89baef37db43d9998b9fe") | ||
i *= 17 | ||
k12hex(t, ptn(i), []byte{}, 32, "844d610933b1b9963cbdeb5ae3b6b05cc7cbd67ceedf883eb678a0a8e0371682") | ||
i *= 17 | ||
k12hex(t, ptn(i), []byte{}, 32, "3c390782a8a4e89fa6367f72feaaf13255c8d95878481d3cd8ce85f58e880af8") | ||
k12hex(t, []byte{}, ptn(1), 32, "fab658db63e94a246188bf7af69a133045f46ee984c56e3c3328caaf1aa1a583") | ||
k12hex(t, []byte{0xff}, ptn(41), 32, "d848c5068ced736f4462159b9867fd4c20b808acc3d5bc48e0b06ba0a3762ec4") | ||
k12hex(t, []byte{0xff, 0xff, 0xff}, ptn(41*41), 32, "c389e5009ae57120854c2e8c64670ac01358cf4c1baf89447a724234dc7ced74") | ||
k12hex(t, []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, ptn(41*41*41), 32, "75d2f86a2e644566726b4fbcfc5657b9dbcf070c7b0dca06450ab291d7443bcf") | ||
} | ||
|
||
func BenchmarkK12_100B(b *testing.B) { benchmarkK12(b, 100, 1) } | ||
func BenchmarkK12_10K(b *testing.B) { benchmarkK12(b, 10000, 1) } | ||
func BenchmarkK12_100K(b *testing.B) { benchmarkK12(b, 10000, 10) } | ||
func BenchmarkK12_1M(b *testing.B) { benchmarkK12(b, 10000, 100) } | ||
func BenchmarkK12_10M(b *testing.B) { benchmarkK12(b, 10000, 1000) } | ||
|
||
func benchmarkK12(b *testing.B, size, num int) { | ||
b.StopTimer() | ||
h := NewDraft10([]byte{}) | ||
data := make([]byte, size) | ||
d := make([]byte, 32) | ||
|
||
b.SetBytes(int64(size * num)) | ||
b.StartTimer() | ||
|
||
for i := 0; i < b.N; i++ { | ||
h.Reset() | ||
for j := 0; j < num; j++ { | ||
_, _ = h.Write(data) | ||
} | ||
_, _ = h.Read(d) | ||
} | ||
} |