Skip to content

Commit

Permalink
Add KangarooTwelve draft -10
Browse files Browse the repository at this point in the history
  • Loading branch information
bwesterb committed Apr 19, 2023
1 parent a3f1816 commit cf87fe1
Show file tree
Hide file tree
Showing 4 changed files with 357 additions and 0 deletions.
4 changes: 4 additions & 0 deletions internal/sha3/sha3.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,7 @@ func (d *State) Sum(in []byte) []byte {
_, _ = dup.Read(hash)
return append(in, hash...)
}

func (d *State) Absorbing() bool {
return d.state == spongeAbsorbing
}
4 changes: 4 additions & 0 deletions internal/sha3/shake.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,7 @@ func TurboShakeSum256(hash, data []byte, D byte) {
_, _ = h.Write(data)
_, _ = h.Read(hash)
}

func (d *State) SwitchDS(D byte) {
d.dsbyte = D
}
281 changes: 281 additions & 0 deletions xof/k12/k12.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
// k12 implements the KangarooTwelve XOF.
package k12

import (
"encoding/binary"

"github.com/cloudflare/circl/internal/sha3"
"github.com/cloudflare/circl/simd/keccakf1600"
)

const chunkSize = 8192

type State struct {
initialTodo int
stalk sha3.State

context []byte

buf []byte // chunkSize * lanes
offset int // offset in buf
lanes uint8 // number of TurboSHAKE128s to compute in parallel
chunk uint // number of current chunk being absorbed
}

// Create a new instance of Kangaroo12 draft version -10
func NewDraft10(c []byte) State {
return State{
initialTodo: chunkSize,
stalk: sha3.NewTurboShake128(0x07),
context: c,
}
}

func (s *State) Reset() {
s.initialTodo = chunkSize
s.stalk.Reset()
s.stalk.SwitchDS(0x07)
s.buf = nil
s.offset = 0
s.chunk = 0
}

func Draft10Sum(hash []byte, msg []byte, c []byte) {
s := NewDraft10(c)
_, _ = s.Write(msg)
_, _ = s.Read(hash)
}

func (s *State) Write(p []byte) (int, error) {
written := len(p)

// The first chunk is written directly to the stalk.
if s.initialTodo > 0 {
taken := s.initialTodo
if len(p) < taken {
taken = len(p)
}
headP := p[:taken]
_, _ = s.stalk.Write(headP)
s.initialTodo -= taken
p = p[taken:]
}

if len(p) == 0 {
return written, nil
}

// If this is the first bit of data written after the initial chunk,
// we're out of the fast-path and allocate some buffers.
if s.buf == nil {
// TODO optimise the X1 case
if keccakf1600.IsEnabledX4() {
s.lanes = 4
} else {
s.lanes = 2
}

s.buf = make([]byte, int(s.lanes)*chunkSize)
_, _ = s.stalk.Write([]byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00})
s.stalk.SwitchDS(0x06)
s.chunk++
}

// If we can't fill all our lanes or the buffer isn't empty, we write the
// data to the buffer.
if s.offset != 0 || len(p) < len(s.buf) {
to := len(s.buf) - s.offset
if len(p) < to {
to = len(p)
}
p2 := p[:to]
p = p[to:]
copy(s.buf[s.offset:], p2)
s.offset += to
}

// Absorb the buffer if we filled it
if s.offset == len(s.buf) {
s.writeX(s.buf)
s.offset = 0
}

// Note that at this point we may assume that s.offset = 0 if len(p) != 0
if len(p) != 0 && s.offset != 0 {
panic("shouldn't happen")
}

// Absorb a bunch of chunks at the same time.
if len(p) >= int(s.lanes)*chunkSize {
p = s.writeX(p)
}

// Put the remainder in the buffer.
if len(p) > 0 {
copy(s.buf, p)
s.offset = len(p)
}

return written, nil
}

// Absorb a multiple of a multiple of lanes * chunkSize.
// Returns the remainder.
func (s *State) writeX(p []byte) []byte {
switch s.lanes {
case 4:
return s.writeX4(p)
default:
return s.writeX2(p)
}
}

func (s *State) writeX4(p []byte) []byte {
for len(p) >= 4*chunkSize {
var x4 keccakf1600.StateX4
a := x4.Initialize(true)

for offset := 0; offset < 48*168; offset += 168 {
for l := 0; l < 4; l++ {
for i := 0; i < 21; i++ {
a[i*4+l] ^= binary.LittleEndian.Uint64(
p[chunkSize*l+8*i+offset:],
)
}
}

x4.Permute()
}

for l := 0; l < 4; l++ {
for i := 0; i < 16; i++ {
a[i*4+l] ^= binary.LittleEndian.Uint64(
p[chunkSize*l+8*i+48*168:],
)
}

a[16*4+l] ^= 0x0b
a[20*4+l] ^= 0x80 << 56
}

x4.Permute()

var buf [32 * 4]byte
for i := 0; i < 4; i++ {
for l := 0; l < 4; l++ {
binary.LittleEndian.PutUint64(buf[32*l+8*i:], a[4*i+l])
}
}

_, _ = s.stalk.Write(buf[:])
p = p[chunkSize*4:]
s.chunk += 4
}

return p
}

func (s *State) writeX2(p []byte) []byte {
// TODO On M2 Pro, 1/3 of the time is spent on this function
// and LittleEndian.Uint64 excluding the actual permutation.
// Rewriting in assembler might be worthwhile.
for len(p) >= 2*chunkSize {
var x2 keccakf1600.StateX2
a := x2.Initialize(true)

for offset := 0; offset < 48*168; offset += 168 {
for i := 0; i < 21; i++ {
a[i*2] ^= binary.LittleEndian.Uint64(
p[8*i+offset:],
)
a[i*2+1] ^= binary.LittleEndian.Uint64(
p[chunkSize+8*i+offset:],
)
}

x2.Permute()
}

for i := 0; i < 16; i++ {
a[i*2] ^= binary.LittleEndian.Uint64(
p[8*i+48*168:],
)
a[i*2+1] ^= binary.LittleEndian.Uint64(
p[chunkSize+8*i+48*168:],
)
}

a[16*2] ^= 0x0b
a[16*2+1] ^= 0x0b
a[20*2] ^= 0x80 << 56
a[20*2+1] ^= 0x80 << 56

x2.Permute()

var buf [32 * 2]byte
for i := 0; i < 4; i++ {
binary.LittleEndian.PutUint64(buf[8*i:], a[2*i])
binary.LittleEndian.PutUint64(buf[32+8*i:], a[2*i+1])
}

_, _ = s.stalk.Write(buf[:])
p = p[chunkSize*2:]
s.chunk += 2
}

return p
}

func (s *State) Read(p []byte) (int, error) {
if s.stalk.Absorbing() {
// Write context string C
_, _ = s.Write(s.context)

// Write length_encode( |C| )
var buf [9]byte
binary.BigEndian.PutUint64(buf[:8], uint64(len(s.context)))

// Find first non-zero digit in big endian encoding of context length
i := 0
for buf[i] == 0 && i < 8 {
i++
}

buf[8] = byte(8 - i) // number of non-zero bytes in |C|
_, _ = s.Write(buf[i:])

// Flush s.buf if not empty
if s.buf != nil {
remainingBuf := s.buf[:s.offset]
for len(remainingBuf) > 0 {
s.chunk++
var cv [32]byte
h := sha3.NewTurboShake128(0x0B)
to := chunkSize
if len(remainingBuf) < to {
s.chunk--
to = len(remainingBuf)
}
_, _ = h.Write(remainingBuf[:to])
_, _ = h.Read(cv[:])
_, _ = s.stalk.Write(cv[:])
remainingBuf = remainingBuf[to:]
}

// Write length_encode( chunk )
binary.BigEndian.PutUint64(buf[:8], uint64(s.chunk))

// Find first non-zero digit in big endian encoding of number of chunks
i = 0
for buf[i] == 0 && i < 8 {
i++
}

buf[8] = byte(8 - i) // number of non-zero bytes in chunk
_, _ = s.stalk.Write(buf[i:])
_, _ = s.stalk.Write([]byte{0xff, 0xff})
}
}

return s.stalk.Read(p)
}
68 changes: 68 additions & 0 deletions xof/k12/k12_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package k12

import (
"encoding/hex"
"testing"
)

// See draft-irtf-cfrg-kangarootwelve-10 §4.
func ptn(n int) []byte {
buf := make([]byte, n)
for i := 0; i < n; i++ {
buf[i] = byte(i % 0xfb)
}
return buf
}

func k12hex(t *testing.T, msg []byte, c []byte, l int, want string) {
buf := make([]byte, l)
Draft10Sum(buf, msg, c)
got := hex.EncodeToString(buf)
if want != got {
t.Fatalf("%s != %s", want, got)
}
}

func TestK12(t *testing.T) {
k12hex(t, []byte{}, []byte{}, 31, "1ac2d450fc3b4205d19da7bfca1b37513c0803577ac7167f06fe2ce1f0ef39")
i := 17
k12hex(t, ptn(i), []byte{}, 32, "6bf75fa2239198db4772e36478f8e19b0f371205f6a9a93a273f51df37122888")
i *= 17
k12hex(t, ptn(i), []byte{}, 32, "0c315ebcdedbf61426de7dcf8fb725d1e74675d7f5327a5067f367b108ecb67c")
i *= 17
k12hex(t, ptn(i), []byte{}, 32, "cb552e2ec77d9910701d578b457ddf772c12e322e4ee7fe417f92c758f0d59d0")
i *= 17
k12hex(t, ptn(i), []byte{}, 32, "8701045e22205345ff4dda05555cbb5c3af1a771c2b89baef37db43d9998b9fe")
i *= 17
k12hex(t, ptn(i), []byte{}, 32, "844d610933b1b9963cbdeb5ae3b6b05cc7cbd67ceedf883eb678a0a8e0371682")
i *= 17
k12hex(t, ptn(i), []byte{}, 32, "3c390782a8a4e89fa6367f72feaaf13255c8d95878481d3cd8ce85f58e880af8")
k12hex(t, []byte{}, ptn(1), 32, "fab658db63e94a246188bf7af69a133045f46ee984c56e3c3328caaf1aa1a583")
k12hex(t, []byte{0xff}, ptn(41), 32, "d848c5068ced736f4462159b9867fd4c20b808acc3d5bc48e0b06ba0a3762ec4")
k12hex(t, []byte{0xff, 0xff, 0xff}, ptn(41*41), 32, "c389e5009ae57120854c2e8c64670ac01358cf4c1baf89447a724234dc7ced74")
k12hex(t, []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, ptn(41*41*41), 32, "75d2f86a2e644566726b4fbcfc5657b9dbcf070c7b0dca06450ab291d7443bcf")
}

func BenchmarkK12_100B(b *testing.B) { benchmarkK12(b, 100, 1) }
func BenchmarkK12_10K(b *testing.B) { benchmarkK12(b, 10000, 1) }
func BenchmarkK12_100K(b *testing.B) { benchmarkK12(b, 10000, 10) }
func BenchmarkK12_1M(b *testing.B) { benchmarkK12(b, 10000, 100) }
func BenchmarkK12_10M(b *testing.B) { benchmarkK12(b, 10000, 1000) }

func benchmarkK12(b *testing.B, size, num int) {
b.StopTimer()
h := NewDraft10([]byte{})
data := make([]byte, size)
d := make([]byte, 32)

b.SetBytes(int64(size * num))
b.StartTimer()

for i := 0; i < b.N; i++ {
h.Reset()
for j := 0; j < num; j++ {
_, _ = h.Write(data)
}
_, _ = h.Read(d)
}
}

0 comments on commit cf87fe1

Please sign in to comment.