Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

all: enables dynamic linking, removes R15 is clobbered #407

Merged
merged 9 commits into from
Feb 27, 2023
49 changes: 49 additions & 0 deletions .etc/all_imports.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
//go:build ignore
// +build ignore

// Generates a Go program with all the public imports of CIRCL. It is used to
// test compilation using static (buildmode=default) and dynamic linking
// (buildmode=plugin).
package main

import (
"flag"
"fmt"
"io/fs"
"os"
"strings"
)

func main() {
outputFileName := flag.String("out", "circl.go", "name of the output file.")
flag.Parse()

f, err := os.Create(*outputFileName)
if err != nil {
panic(err)
}
defer f.Close()

skipDirs := []string{".", "testdata", "internal", "templates"}
circl := "github.com/cloudflare/circl/"

fmt.Fprintln(f, "package main")
err = fs.WalkDir(os.DirFS("."), ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
panic(err)
}
if d.IsDir() {
for _, sd := range skipDirs {
if strings.Contains(path, sd) {
return nil
}
}
fmt.Fprintf(f, "import _ \"%v%v\"\n", circl, path)
}
return nil
})
if err != nil {
panic(err)
}
fmt.Fprintln(f, "func main() {}")
}
15 changes: 15 additions & 0 deletions .github/workflows/ci-actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,21 @@ jobs:
- name: Testing
run: |
docker run --rm -v `pwd`:`pwd` -w `pwd` ${{matrix.CFG[1]}}/golang:${{matrix.CFG[2]}} go test -v ./...
build_modes:
needs: [amd64_job]
runs-on: ubuntu-22.04
name: Testing Build Modes
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: '1.20'
- name: Build as Static
run: make circl_static
- name: Build as Plugin
run: make circl_plugin
coverage_amd64_job:
needs: [amd64_job]
if: github.event_name == 'push'
Expand Down
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,17 @@ bootstrap:

clean:
rm -rf $(GOPATH_BUILD)

.INTERMEDIATE: circl.go circl_static.exe circl_plugin.so
circl_static: circl_static.exe
circl_static.exe: circl.go
go clean -cache -modcache
go build -buildmode=default -o $@ $^

circl_plugin: circl_plugin.so
circl_plugin.so: circl.go
go clean -cache -modcache
go build -buildmode=plugin -o $@ $^

circl.go:
go run .etc/all_imports.go -out $@
2 changes: 2 additions & 0 deletions abe/cpabe/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// Package cpabe provides Ciphertext-Policy Attribute-based Encryption algorithms.
package cpabe
2 changes: 2 additions & 0 deletions abe/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// Package abe provides Attribute-based data encryption algorithms.
package abe
2 changes: 2 additions & 0 deletions cipher/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// Package cipher provides data encryption algorithms.
package cipher
63 changes: 32 additions & 31 deletions dh/csidh/fp511_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ TEXT ·mulBmiAsm(SB),NOSPLIT,$8-24
XORQ R12, R12
XORQ R13, R13
XORQ R14, R14
XORQ R15, R15
XORQ CX, CX

MOVQ BP, 0(SP) // push: BP is Callee-save.
XORQ BP, BP
Expand All @@ -113,42 +113,43 @@ TEXT ·mulBmiAsm(SB),NOSPLIT,$8-24
#define MULS_MULX_512(idx, r0, r1, r2, r3, r4, r5, r6, r7, r8) \
\ // Reduction step
armfazh marked this conversation as resolved.
Show resolved Hide resolved
MOVQ ( 0)(SI), DX \
MULXQ ( 8*idx)(DI), DX, CX \
MULXQ ( 8*idx)(DI), DX, AX \
ADDQ r0, DX \
MULXQ ·pNegInv(SB), DX, CX \
MOVQ ·pNegInv(SB), AX \
MULXQ AX, DX, AX \
\
XORQ AX, AX \
MULXQ ·p+ 0(SB), AX, BX; ; ADOXQ AX, r0 \
MULXQ ·p+ 8(SB), AX, CX; ADCXQ BX, r1; ADOXQ AX, r1 \
MULXQ ·p+16(SB), AX, BX; ADCXQ CX, r2; ADOXQ AX, r2 \
MULXQ ·p+24(SB), AX, CX; ADCXQ BX, r3; ADOXQ AX, r3 \
MULXQ ·p+32(SB), AX, BX; ADCXQ CX, r4; ADOXQ AX, r4 \
MULXQ ·p+40(SB), AX, CX; ADCXQ BX, r5; ADOXQ AX, r5 \
MULXQ ·p+48(SB), AX, BX; ADCXQ CX, r6; ADOXQ AX, r6 \
MULXQ ·p+56(SB), AX, CX; ADCXQ BX, r7; ADOXQ AX, r7 \
MOVQ $0, AX ; ADCXQ CX, r8; ADOXQ AX, r8 \
XORQ AX, AX; \
MOVQ ·p+ 0(SB), AX; MULXQ AX, AX, BX; ADOXQ AX, r0; ADCXQ BX, r1 \
MOVQ ·p+ 8(SB), AX; MULXQ AX, AX, BX; ADOXQ AX, r1; ADCXQ BX, r2 \
MOVQ ·p+16(SB), AX; MULXQ AX, AX, BX; ADOXQ AX, r2; ADCXQ BX, r3 \
MOVQ ·p+24(SB), AX; MULXQ AX, AX, BX; ADOXQ AX, r3; ADCXQ BX, r4 \
MOVQ ·p+32(SB), AX; MULXQ AX, AX, BX; ADOXQ AX, r4; ADCXQ BX, r5 \
MOVQ ·p+40(SB), AX; MULXQ AX, AX, BX; ADOXQ AX, r5; ADCXQ BX, r6 \
MOVQ ·p+48(SB), AX; MULXQ AX, AX, BX; ADOXQ AX, r6; ADCXQ BX, r7 \
MOVQ ·p+56(SB), AX; MULXQ AX, AX, BX; ADOXQ AX, r7; ADCXQ BX, r8 \
MOVQ $0, AX; ;;;;;;;;;;;;;;;;;;;;;;; ADOXQ AX, r8; \
bwesterb marked this conversation as resolved.
Show resolved Hide resolved
\ // Multiplication step
MOVQ (8*idx)(DI), DX \
\
XORQ AX, AX \
MULXQ ( 0)(SI), AX, BX; ADOXQ AX, r0 \
MULXQ ( 8)(SI), AX, CX; ADCXQ BX, r1; ADOXQ AX, r1 \
MULXQ (16)(SI), AX, BX; ADCXQ CX, r2; ADOXQ AX, r2 \
MULXQ (24)(SI), AX, CX; ADCXQ BX, r3; ADOXQ AX, r3 \
MULXQ (32)(SI), AX, BX; ADCXQ CX, r4; ADOXQ AX, r4 \
MULXQ (40)(SI), AX, CX; ADCXQ BX, r5; ADOXQ AX, r5 \
MULXQ (48)(SI), AX, BX; ADCXQ CX, r6; ADOXQ AX, r6 \
MULXQ (56)(SI), AX, CX; ADCXQ BX, r7; ADOXQ AX, r7 \
MOVQ $0, AX ; ADCXQ CX, r8; ADOXQ AX, r8

MULS_MULX_512(0, R8, R9, R10, R11, R12, R13, R14, R15, BP)
MULS_MULX_512(1, R9, R10, R11, R12, R13, R14, R15, BP, R8)
MULS_MULX_512(2, R10, R11, R12, R13, R14, R15, BP, R8, R9)
MULS_MULX_512(3, R11, R12, R13, R14, R15, BP, R8, R9, R10)
MULS_MULX_512(4, R12, R13, R14, R15, BP, R8, R9, R10, R11)
MULS_MULX_512(5, R13, R14, R15, BP, R8, R9, R10, R11, R12)
MULS_MULX_512(6, R14, R15, BP, R8, R9, R10, R11, R12, R13)
MULS_MULX_512(7, R15, BP, R8, R9, R10, R11, R12, R13, R14)
MULXQ ( 0)(SI), AX, BX; ADOXQ AX, r0; ADCXQ BX, r1 \
MULXQ ( 8)(SI), AX, BX; ADOXQ AX, r1; ADCXQ BX, r2 \
MULXQ (16)(SI), AX, BX; ADOXQ AX, r2; ADCXQ BX, r3 \
MULXQ (24)(SI), AX, BX; ADOXQ AX, r3; ADCXQ BX, r4 \
MULXQ (32)(SI), AX, BX; ADOXQ AX, r4; ADCXQ BX, r5 \
MULXQ (40)(SI), AX, BX; ADOXQ AX, r5; ADCXQ BX, r6 \
MULXQ (48)(SI), AX, BX; ADOXQ AX, r6; ADCXQ BX, r7 \
MULXQ (56)(SI), AX, BX; ADOXQ AX, r7; ADCXQ BX, r8 \
MOVQ $0, AX ; ADOXQ AX, r8;

MULS_MULX_512(0, R8, R9, R10, R11, R12, R13, R14, CX, BP)
MULS_MULX_512(1, R9, R10, R11, R12, R13, R14, CX, BP, R8)
MULS_MULX_512(2, R10, R11, R12, R13, R14, CX, BP, R8, R9)
MULS_MULX_512(3, R11, R12, R13, R14, CX, BP, R8, R9, R10)
MULS_MULX_512(4, R12, R13, R14, CX, BP, R8, R9, R10, R11)
MULS_MULX_512(5, R13, R14, CX, BP, R8, R9, R10, R11, R12)
MULS_MULX_512(6, R14, CX, BP, R8, R9, R10, R11, R12, R13)
MULS_MULX_512(7, CX, BP, R8, R9, R10, R11, R12, R13, R14)
#undef MULS_MULX_512

MOVQ res+0(FP), DI
Expand Down
64 changes: 42 additions & 22 deletions dh/sidh/internal/p434/arith_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,36 @@
// |-128-| x |--- 256 ---| = |------ 384 ------|
// Assuming the first digit multiplication was already performed.
#define MULX128x256(I1, M1, T1, T2, T3, T4, T5) \
MULXQ M1+ 8(SB), T4, T2 \
MOVQ M1+ 8(SB), AX \
MULXQ AX, T4, T2 \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change (and some a few similar ones below) seem unnecessary.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it's unnecessary, but it is included due to bug in the compiler. (see go/issues/58735). So this is a workaround.

In short, what is happening here is that MULXQ cannot reference to the M1 location because is a global variable.

Copy link
Member

@bwesterb bwesterb Feb 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, makes sense! A note may be nice.

XORQ AX, AX \
armfazh marked this conversation as resolved.
Show resolved Hide resolved
MULXQ M1+16(SB), T5, T3 \
MOVQ M1+16(SB), AX \
MULXQ AX, T5, T3 \
ADOXQ T4, T1 \ // T1: interm1
ADOXQ T5, T2 \ // T2: interm2
MULXQ M1+24(SB), T5, T4 \
MOVQ M1+24(SB), AX \
MULXQ AX, T5, T4 \
ADOXQ T5, T3 \ // T3: interm3
MOVL $0, AX \
ADOXQ AX, T4 \ // T4: interm4
\
XORQ AX, AX \
MOVQ I1, DX \
MULXQ M1+ 0(SB), T5, I1 \ // T0 <- C0
MOVQ M1+ 0(SB), AX \
MULXQ AX, T5, I1 \ // T0 <- C0
ADCXQ T5, T1 \
ADCXQ I1, T2 \ // T1 <- C1
MULXQ M1+ 8(SB), I1, T5 \
MOVQ M1+ 8(SB), AX \
MULXQ AX, I1, T5 \
ADCXQ T5, T3 \
ADOXQ I1, T2 \ // T2 <- C2
MULXQ M1+16(SB), I1, T5 \
MOVQ M1+16(SB), AX \
MULXQ AX, I1, T5 \
ADCXQ T5, T4 \
ADOXQ I1, T3 \ // T3 <- C3
MULXQ M1+24(SB), I1, T5 \
MOVQ M1+24(SB), AX \
MULXQ AX, I1, T5 \
MOVL $0, AX \
ADCXQ AX, T5 \
ADOXQ I1, T4 \ // T4 <- C4
ADOXQ AX, T5 // T5 <- C5
Expand All @@ -63,13 +72,17 @@
// |64| x |--- 256 ---| = |----- 320 ----|
// Assuming the first digit multiplication was already performed.
#define MULX64x256(M1, T1, T2, T3, T4, T5) \
MULXQ M1+ 8(SB), T4, T2 \
MOVQ M1+ 8(SB), AX \
MULXQ AX, T4, T2 \
XORQ AX, AX \
MULXQ M1+16(SB), T5, T3 \
MOVQ M1+16(SB), AX \
MULXQ AX, T5, T3 \
ADOXQ T4, T1 \ // T1 <- C1
ADOXQ T5, T2 \ // T2 <- C2
MULXQ M1+24(SB), T5, T4 \
MOVQ M1+24(SB), AX \
MULXQ AX, T5, T4 \
ADOXQ T5, T3 \ // T3 <- C3
MOVL $0, AX \
ADOXQ AX, T4 // T4 <- C4

// Performs schoolbook multiplication of two 192-bit numbers
Expand Down Expand Up @@ -284,7 +297,8 @@
#define REDC_MULX(P1, MUL01, MUL23, MUL45, MUL67) \
MOVQ 0x0(DI), DX \
MOVQ 0x8(DI), R14 \
MULXQ P1, R8, R9 \
MOVQ P1, AX \
MULXQ AX, R8, R9 \
MUL01 \
MOVQ 0x10(DI), DX \
MOVQ 0x48(DI), CX \
Expand All @@ -295,12 +309,14 @@
ADCQ 0x38(DI), R12 \
ADCQ 0x40(DI), R13 \
ADCQ $0, CX \
MULXQ P1, BX, BP \
MOVQ P1, AX \
MULXQ AX, BX, BP \
MOVQ R9, 0x0(SI) \
MOVQ R10, 0x8(SI) \
MOVQ R11, 0x10(SI) \
MOVQ R12, 0x18(SI) \
MOVQ R13, 0x20(SI) \
MOVQ CX, 0x28(SI) \
MOVQ 0x50(DI), R9 \
MOVQ 0x58(DI), R10 \
MOVQ 0x60(DI), R11 \
Expand All @@ -315,11 +331,14 @@
ADCQ 0x10(SI), BP \
ADCQ 0x18(SI), R12 \
ADCQ 0x20(SI), R13 \
ADCQ CX, R14 \
ADCQ 0x28(SI), R14 \
MOVQ R14, 0x18(SI) \
MOVQ CX, R14 \
MOVQ $0, CX \
ADCQ R9, R15 \
ADCQ R9, R14 \
ADCQ R10, CX \
MULXQ P1, R8, R9 \
MOVQ P1, AX \
MULXQ AX, R8, R9 \
MOVQ BP, 0x0(SI) \
MOVQ R12, 0x8(SI) \
MOVQ R13, 0x10(SI) \
Expand All @@ -329,22 +348,23 @@
MOVQ 0x0(SI), DX \
ADDQ 0x8(SI), R8 \
ADCQ 0x10(SI), R9 \
ADCQ R14, R10 \
ADCQ R15, BP \
ADCQ 0x18(SI), R10 \
ADCQ R14, BP \
ADCQ CX, R12 \
ADCQ R11, R13 \
ADCQ $0, DI \
MULXQ P1, R14, R15 \
MOVQ P1, AX \
MULXQ AX, R14, BX \
MOVQ R8, 0x0(SI) \
MOVQ R9, 0x8(SI) \
MUL67 \
ADDQ R10, R14 \
ADCQ BP, R15 \
ADCQ BP, BX \
ADCQ R12, R8 \
ADCQ R13, R9 \
ADCQ DI, R11 \
MOVQ R14, 0x10(SI) \
MOVQ R15, 0x18(SI) \
MOVQ BX, 0x18(SI) \
MOVQ R8, 0x20(SI) \
MOVQ R9, 0x28(SI) \
MOVQ R11, 0x30(SI)
Expand Down Expand Up @@ -1314,9 +1334,9 @@ TEXT ·rdcP434(SB),$0-16
// available on Broadwell micro-architectures and newer.
redc_bdw:
#define MULX01 MULX128x256(R14,·P434p1+(8*P434_P1_ZEROS),R9 ,R10,R11,R12,R13)
#define MULX23 MULX128x256(R8 ,·P434p1+(8*P434_P1_ZEROS),BP ,R12,R13,R14,R15)
#define MULX23 MULX128x256(R8 ,·P434p1+(8*P434_P1_ZEROS),BP ,R12,R13,R14,CX )
#define MULX45 MULX128x256(BX ,·P434p1+(8*P434_P1_ZEROS),R9 ,R10,BP ,R12,R13)
#define MULX67 MULX64x256 ( ·P434p1+(8*P434_P1_ZEROS),R15,R8 ,R9 ,R11,CX )
#define MULX67 MULX64x256 ( ·P434p1+(8*P434_P1_ZEROS),BX ,R8 ,R9 ,R11,CX )
REDC_MULX(·P434p1+(8*P434_P1_ZEROS)+0(SB), MULX01, MULX23, MULX45, MULX67)
#undef MULX01
#undef MULX23
Expand Down
Loading