-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This implementation utilizes the same registers found in the reference implementation, aiming to produce a minimal semantic diff between the Avo-generated output and the original hand-written assembly. To verify the Avo implementation, the reference and Avo-generated assembly files are fed to `go tool asm`, capturing the debug output into corresponding temp files. The debug output contains supplementary metadata (line numbers, instruction offsets, and source file references) that must be removed in order to obtain a semantic diff of the two files. This is accomplished via a small utility script written in awk. Commands used to verify Avo output: GOROOT=$(go env GOROOT) ASM_PATH="argon2/blamka_amd64.s" REFERENCE="b2d3a6a4b4d36521cd7f653879cf6981e7c5c340" go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \ <(git cat-file -p "$REFERENCE:$ASM_PATH") \ > /tmp/reference.s go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \ "$ASM_PATH" \ > /tmp/avo.s normalize(){ awk '{ $1=$2=$3=""; print substr($0,4) }' } diff <(normalize < /tmp/reference.s) <(normalize < /tmp/avo.s) Change-Id: I3567eb80ef80dff248225f17470122c0a4e6951e Reviewed-on: https://go-review.googlesource.com/c/crypto/+/600315 Reviewed-by: Filippo Valsorda <filippo@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Roland Shoemaker <roland@golang.org>
- Loading branch information
1 parent
bf5f14f
commit 38a0b5d
Showing
4 changed files
with
3,074 additions
and
212 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,287 @@ | ||
// Copyright 2024 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package main | ||
|
||
import ( | ||
. "github.com/mmcloughlin/avo/build" | ||
. "github.com/mmcloughlin/avo/operand" | ||
. "github.com/mmcloughlin/avo/reg" | ||
_ "golang.org/x/crypto/argon2" | ||
) | ||
|
||
//go:generate go run . -out ../blamka_amd64.s -pkg argon2 | ||
|
||
func main() { | ||
Package("golang.org/x/crypto/argon2") | ||
ConstraintExpr("amd64,gc,!purego") | ||
|
||
blamkaSSE4() | ||
mixBlocksSSE2() | ||
xorBlocksSSE2() | ||
Generate() | ||
} | ||
|
||
func blamkaSSE4() { | ||
Implement("blamkaSSE4") | ||
Attributes(NOSPLIT) | ||
AllocLocal(0) | ||
|
||
Load(Param("b"), RAX) | ||
|
||
c40 := c40_DATA() | ||
c48 := c48_DATA() | ||
|
||
MOVOU(c40, X10) | ||
MOVOU(c48, X11) | ||
|
||
BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11) | ||
BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11) | ||
|
||
BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11) | ||
BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11) | ||
RET() | ||
} | ||
|
||
func mixBlocksSSE2() { | ||
Implement("mixBlocksSSE2") | ||
Attributes(NOSPLIT) | ||
AllocLocal(0) | ||
|
||
Load(Param("out"), RDX) | ||
Load(Param("a"), RAX) | ||
Load(Param("b"), RBX) | ||
Load(Param("c"), RCX) | ||
MOVQ(U32(128), RDI) | ||
|
||
Label("loop") | ||
MOVOU(Mem{Base: AX}.Offset(0), X0) | ||
MOVOU(Mem{Base: BX}.Offset(0), X1) | ||
MOVOU(Mem{Base: CX}.Offset(0), X2) | ||
PXOR(X1, X0) | ||
PXOR(X2, X0) | ||
MOVOU(X0, Mem{Base: DX}.Offset(0)) | ||
ADDQ(Imm(16), RAX) | ||
ADDQ(Imm(16), RBX) | ||
ADDQ(Imm(16), RCX) | ||
ADDQ(Imm(16), RDX) | ||
SUBQ(Imm(2), RDI) | ||
JA(LabelRef("loop")) | ||
RET() | ||
} | ||
|
||
func xorBlocksSSE2() { | ||
Implement("xorBlocksSSE2") | ||
Attributes(NOSPLIT) | ||
AllocLocal(0) | ||
|
||
Load(Param("out"), RDX) | ||
Load(Param("a"), RAX) | ||
Load(Param("b"), RBX) | ||
Load(Param("c"), RCX) | ||
MOVQ(U32(128), RDI) | ||
|
||
Label("loop") | ||
MOVOU(Mem{Base: AX}.Offset(0), X0) | ||
MOVOU(Mem{Base: BX}.Offset(0), X1) | ||
MOVOU(Mem{Base: CX}.Offset(0), X2) | ||
MOVOU(Mem{Base: DX}.Offset(0), X3) | ||
PXOR(X1, X0) | ||
PXOR(X2, X0) | ||
PXOR(X3, X0) | ||
MOVOU(X0, Mem{Base: DX}.Offset(0)) | ||
ADDQ(Imm(16), RAX) | ||
ADDQ(Imm(16), RBX) | ||
ADDQ(Imm(16), RCX) | ||
ADDQ(Imm(16), RDX) | ||
SUBQ(Imm(2), RDI) | ||
JA(LabelRef("loop")) | ||
RET() | ||
} | ||
|
||
func SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2 VecPhysical) { | ||
MOVO(v4, t1) | ||
MOVO(v5, v4) | ||
MOVO(t1, v5) | ||
MOVO(v6, t1) | ||
PUNPCKLQDQ(v6, t2) | ||
PUNPCKHQDQ(v7, v6) | ||
PUNPCKHQDQ(t2, v6) | ||
PUNPCKLQDQ(v7, t2) | ||
MOVO(t1, v7) | ||
MOVO(v2, t1) | ||
PUNPCKHQDQ(t2, v7) | ||
PUNPCKLQDQ(v3, t2) | ||
PUNPCKHQDQ(t2, v2) | ||
PUNPCKLQDQ(t1, t2) | ||
PUNPCKHQDQ(t2, v3) | ||
} | ||
|
||
func SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2 VecPhysical) { | ||
MOVO(v4, t1) | ||
MOVO(v5, v4) | ||
MOVO(t1, v5) | ||
MOVO(v2, t1) | ||
PUNPCKLQDQ(v2, t2) | ||
PUNPCKHQDQ(v3, v2) | ||
PUNPCKHQDQ(t2, v2) | ||
PUNPCKLQDQ(v3, t2) | ||
MOVO(t1, v3) | ||
MOVO(v6, t1) | ||
PUNPCKHQDQ(t2, v3) | ||
PUNPCKLQDQ(v7, t2) | ||
PUNPCKHQDQ(t2, v6) | ||
PUNPCKLQDQ(t1, t2) | ||
PUNPCKHQDQ(t2, v7) | ||
} | ||
|
||
func HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48 VecPhysical) { | ||
MOVO(v0, t0) | ||
PMULULQ(v2, t0) | ||
PADDQ(v2, v0) | ||
PADDQ(t0, v0) | ||
PADDQ(t0, v0) | ||
PXOR(v0, v6) | ||
PSHUFD(Imm(0xB1), v6, v6) | ||
MOVO(v4, t0) | ||
PMULULQ(v6, t0) | ||
PADDQ(v6, v4) | ||
PADDQ(t0, v4) | ||
PADDQ(t0, v4) | ||
PXOR(v4, v2) | ||
PSHUFB(c40, v2) | ||
MOVO(v0, t0) | ||
PMULULQ(v2, t0) | ||
PADDQ(v2, v0) | ||
PADDQ(t0, v0) | ||
PADDQ(t0, v0) | ||
PXOR(v0, v6) | ||
PSHUFB(c48, v6) | ||
MOVO(v4, t0) | ||
PMULULQ(v6, t0) | ||
PADDQ(v6, v4) | ||
PADDQ(t0, v4) | ||
PADDQ(t0, v4) | ||
PXOR(v4, v2) | ||
MOVO(v2, t0) | ||
PADDQ(v2, t0) | ||
PSRLQ(Imm(63), v2) | ||
PXOR(t0, v2) | ||
MOVO(v1, t0) | ||
PMULULQ(v3, t0) | ||
PADDQ(v3, v1) | ||
PADDQ(t0, v1) | ||
PADDQ(t0, v1) | ||
PXOR(v1, v7) | ||
PSHUFD(Imm(0xB1), v7, v7) | ||
MOVO(v5, t0) | ||
PMULULQ(v7, t0) | ||
PADDQ(v7, v5) | ||
PADDQ(t0, v5) | ||
PADDQ(t0, v5) | ||
PXOR(v5, v3) | ||
PSHUFB(c40, v3) | ||
MOVO(v1, t0) | ||
PMULULQ(v3, t0) | ||
PADDQ(v3, v1) | ||
PADDQ(t0, v1) | ||
PADDQ(t0, v1) | ||
PXOR(v1, v7) | ||
PSHUFB(c48, v7) | ||
MOVO(v5, t0) | ||
PMULULQ(v7, t0) | ||
PADDQ(v7, v5) | ||
PADDQ(t0, v5) | ||
PADDQ(t0, v5) | ||
PXOR(v5, v3) | ||
MOVO(v3, t0) | ||
PADDQ(v3, t0) | ||
PSRLQ(Imm(63), v3) | ||
PXOR(t0, v3) | ||
} | ||
|
||
func LOAD_MSG_0(block GPPhysical, off int) { | ||
var registers = []VecPhysical{X0, X1, X2, X3, X4, X5, X6, X7} | ||
for i, r := range registers { | ||
MOVOU(Mem{Base: block}.Offset(8*(off+(i*2))), r) | ||
} | ||
} | ||
|
||
func STORE_MSG_0(block GPPhysical, off int) { | ||
var registers = []VecPhysical{X0, X1, X2, X3, X4, X5, X6, X7} | ||
for i, r := range registers { | ||
MOVOU(r, Mem{Base: block}.Offset(8*(off+(i*2)))) | ||
} | ||
} | ||
|
||
func LOAD_MSG_1(block GPPhysical, off int) { | ||
var registers = []VecPhysical{X0, X1, X2, X3, X4, X5, X6, X7} | ||
for i, r := range registers { | ||
MOVOU(Mem{Base: block}.Offset(8*off+i*16*8), r) | ||
} | ||
} | ||
|
||
func STORE_MSG_1(block GPPhysical, off int) { | ||
var registers = []VecPhysical{X0, X1, X2, X3, X4, X5, X6, X7} | ||
for i, r := range registers { | ||
MOVOU(r, Mem{Base: block}.Offset(8*off+i*16*8)) | ||
} | ||
} | ||
|
||
func BLAMKA_ROUND_0(block GPPhysical, off int, t0, t1, c40, c48 VecPhysical) { | ||
LOAD_MSG_0(block, off) | ||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48) | ||
SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1) | ||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48) | ||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1) | ||
STORE_MSG_0(block, off) | ||
} | ||
|
||
func BLAMKA_ROUND_1(block GPPhysical, off int, t0, t1, c40, c48 VecPhysical) { | ||
LOAD_MSG_1(block, off) | ||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48) | ||
SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1) | ||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48) | ||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1) | ||
STORE_MSG_1(block, off) | ||
} | ||
|
||
// ##------------------DATA SECTION-------------------## | ||
|
||
var c40_DATA_ptr, c48_DATA_ptr *Mem | ||
|
||
func c40_DATA() Mem { | ||
if c40_DATA_ptr != nil { | ||
return *c40_DATA_ptr | ||
} | ||
|
||
c40_DATA := GLOBL("·c40", NOPTR|RODATA) | ||
c40_DATA_ptr = &c40_DATA | ||
DATA(0x00, U64(0x0201000706050403)) | ||
DATA(0x08, U64(0x0a09080f0e0d0c0b)) | ||
return c40_DATA | ||
} | ||
func c48_DATA() Mem { | ||
if c48_DATA_ptr != nil { | ||
return *c48_DATA_ptr | ||
} | ||
|
||
c48_DATA := GLOBL("·c48", NOPTR|RODATA) | ||
c48_DATA_ptr = &c48_DATA | ||
DATA(0x00, U64(0x0100070605040302)) | ||
DATA(0x08, U64(0x09080f0e0d0c0b0a)) | ||
return c48_DATA | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
module argon2/_asm | ||
|
||
go 1.23 | ||
|
||
require ( | ||
github.com/mmcloughlin/avo v0.6.0 | ||
golang.org/x/crypto v0.26.0 | ||
) | ||
|
||
require ( | ||
golang.org/x/mod v0.20.0 // indirect | ||
golang.org/x/sync v0.8.0 // indirect | ||
golang.org/x/sys v0.24.0 // indirect | ||
golang.org/x/tools v0.24.0 // indirect | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
github.com/mmcloughlin/avo v0.6.0 h1:QH6FU8SKoTLaVs80GA8TJuLNkUYl4VokHKlPhVDg4YY= | ||
github.com/mmcloughlin/avo v0.6.0/go.mod h1:8CoAGaCSYXtCPR+8y18Y9aB/kxb8JSS6FRI7mSkvD+8= | ||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= | ||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= | ||
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= | ||
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= | ||
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= | ||
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= | ||
golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= | ||
golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= | ||
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= | ||
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= |
Oops, something went wrong.