Skip to content

Commit

Permalink
Add nopshufb tag (#256)
Browse files Browse the repository at this point in the history
Add `nopshufb` tag that should remove all code utilizing `PSHUFB` - and similar code on other platforms.

# Background

> None of section below is legal advice. Seek your own legal counsel.
> As stated by the [LICENSE](LICENSE) the authors will not be held reliable for any use of this library.
> Users are encouraged to independently verify they comply with all legal requirements. 

As can be seen in [recent news](https://www.datanami.com/2023/10/16/cloudera-hit-with-240-million-judgement-over-erasure-coding/) there has been lawsuits related to possible patents of aspects of erasure coding functionality.

As a possible mitigation it is possible to use the tag `nopshufb` when compiling any code which includes this package.
This will remove all inclusion and use of `PSHUFB` and equivalent on other platforms.

This is done by adding `-tags=nopshufb` to `go build` and similar commands that produce binary output.

The removed code may not be infringing and even after `-tags=nopshufb` there may still be infringing code left.
  • Loading branch information
klauspost authored Nov 15, 2023
1 parent b8dc407 commit e586036
Show file tree
Hide file tree
Showing 29 changed files with 35,503 additions and 215 deletions.
16 changes: 10 additions & 6 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
build:
strategy:
matrix:
go-version: [1.18.x, 1.19.x, 1.20.x]
go-version: [1.19.x, 1.20.x, 1.21.x]
os: [ubuntu-latest, macos-latest, windows-latest]
env:
CGO_ENABLED: 0
Expand All @@ -32,11 +32,15 @@ jobs:
run: go vet ./...

- name: Test
run: go test ./...
run: go test -timeout=15m ./...

- name: Test Noasm
run: go test -tags=noasm -short&&go test -short -no-avx512&&go test -short -no-avx512 -no-avx2&&go test -no-avx512 -no-avx2 -no-ssse3 -short

- name: Test Nopshufb
run: go test -tags=nopshufb -short&&go test -tags=nopshufb -short -no-avx512 -no-gfni&&go test -tags=nopshufb -short&&go test -tags=nopshufb -no-avx512 -no-avx2 -no-ssse3 -no-sse2 -short


- name: Test Race
env:
CGO_ENABLED: 1
Expand All @@ -50,7 +54,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: 1.19.x
go-version: 1.21.x

- name: Checkout code
uses: actions/checkout@v2
Expand Down Expand Up @@ -102,21 +106,21 @@ jobs:
env:
GOOS: linux
GOARCH: arm64
run: go build .&&go build examples/simple-decoder.go&&go build examples/simple-encoder.go&&go build examples/stream-decoder.go&&go build examples/stream-encoder.go
run: go build .&&go build examples/simple-decoder.go&&go build examples/simple-encoder.go&&go build examples/stream-decoder.go&&go build examples/stream-encoder.go&&go build -tags=nopshufb .&&go build -tags=nopshufb examples/simple-decoder.go&&go build examples/simple-encoder.go&&go build -tags=nopshufb examples/stream-decoder.go&&go build examples/stream-encoder.go

- name: Build on PPC64LE
env:
GOOS: linux
GOARCH: ppc64le
run: go build .&&go build examples/simple-decoder.go&&go build examples/simple-encoder.go&&go build examples/stream-decoder.go&&go build examples/stream-encoder.go
run: go build .&&go build examples/simple-decoder.go&&go build examples/simple-encoder.go&&go build examples/stream-decoder.go&&go build examples/stream-encoder.go && go build -tags=nopshufb .&&go build -tags=nopshufb examples/simple-decoder.go&&go build examples/simple-encoder.go&&go build -tags=nopshufb examples/stream-decoder.go&&go build examples/stream-encoder.go

generate:
runs-on: ubuntu-latest
steps:
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: 1.19.x
go-version: 1.21.x

- name: Checkout code
uses: actions/checkout@v2
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,21 @@ BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x
BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x
```
# Legal
> None of section below is legal advice. Seek your own legal counsel.
> As stated by the [LICENSE](LICENSE) the authors will not be held reliable for any use of this library.
> Users are encouraged to independently verify they comply with all legal requirements.
As can be seen in [recent news](https://www.datanami.com/2023/10/16/cloudera-hit-with-240-million-judgement-over-erasure-coding/)
there has been lawsuits related to possible patents of aspects of erasure coding functionality.
As a possible mitigation it is possible to use the tag `nopshufb` when compiling any code which includes this package.
This will remove all inclusion and use of `PSHUFB` and equivalent on other platforms.
This is done by adding `-tags=nopshufb` to `go build` and similar commands that produce binary output.
The removed code may not be infringing and even after `-tags=nopshufb` there may still be infringing code left.
# Links
* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
Expand Down
177 changes: 150 additions & 27 deletions _gen/gen.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
//go:build generate
// +build generate

// Copyright 2022+, Klaus Post. See LICENSE for details.

//go:generate go run -tags=generate,nopshufb . -out ../galois_gen_nopshufb_amd64.s -stubs ../galois_gen_nopshufb_amd64.go -pkg=reedsolomon
//go:generate go fmt ../galois_gen_switch_nopshufb_amd64.go
//go:generate go fmt ../galois_gen_nopshufb_amd64.go
//go:generate go run cleanup.go ../galois_gen_nopshufb_amd64.s

//go:generate go run -tags=generate . -out ../galois_gen_amd64.s -stubs ../galois_gen_amd64.go -pkg=reedsolomon
//go:generate go fmt ../galois_gen_switch_amd64.go
//go:generate go fmt ../galois_gen_amd64.go
Expand Down Expand Up @@ -41,6 +45,11 @@ func main() {
Constraint(buildtags.Not("appengine").ToConstraint())
Constraint(buildtags.Not("noasm").ToConstraint())
Constraint(buildtags.Not("nogen").ToConstraint())
if pshufb {
Constraint(buildtags.Not("nopshufb").ToConstraint())
} else {
Constraint(buildtags.Opt("nopshufb").ToConstraint())
}
Constraint(buildtags.Term("gc").ToConstraint())

TEXT("_dummy_", 0, "func()")
Expand All @@ -54,20 +63,31 @@ func main() {
Comment("#endif")
RET()

genXor()
const perLoopBits = 6
const perLoop = 1 << perLoopBits

for i := 1; i <= inputMax; i++ {
for j := 1; j <= outputMax; j++ {
genMulAvx2(fmt.Sprintf("mulAvxTwo_%dx%d", i, j), i, j, false)
genMulAvx2Sixty64(fmt.Sprintf("mulAvxTwo_%dx%d_64", i, j), i, j, false)
if pshufb {
genMulAvx2(fmt.Sprintf("mulAvxTwo_%dx%d", i, j), i, j, false)
genMulAvx2Sixty64(fmt.Sprintf("mulAvxTwo_%dx%d_64", i, j), i, j, false)
}
genMulAvx512GFNI(fmt.Sprintf("mulGFNI_%dx%d_64", i, j), i, j, false)
genMulAvx512GFNI(fmt.Sprintf("mulGFNI_%dx%d_64Xor", i, j), i, j, true)
genMulAvx2(fmt.Sprintf("mulAvxTwo_%dx%dXor", i, j), i, j, true)
genMulAvx2Sixty64(fmt.Sprintf("mulAvxTwo_%dx%d_64Xor", i, j), i, j, true)
if pshufb {
genMulAvx2(fmt.Sprintf("mulAvxTwo_%dx%dXor", i, j), i, j, true)
genMulAvx2Sixty64(fmt.Sprintf("mulAvxTwo_%dx%d_64Xor", i, j), i, j, true)
}
}
}
f, err := os.Create("../galois_gen_switch_amd64.go")
name := "../galois_gen_switch_amd64.go"
tag := "// +build !nopshufb\n"
if !pshufb {
name = "../galois_gen_switch_nopshufb_amd64.go"
tag = "// +build nopshufb\n"
}
f, err := os.Create(name)
if err != nil {
panic(err)
}
Expand All @@ -79,7 +99,8 @@ func main() {
// +build !appengine
// +build !noasm
// +build gc
// +build !nogen
// +build !nogen
` + tag + `
package reedsolomon
Expand All @@ -88,32 +109,38 @@ import (
)
`)

w.WriteString(fmt.Sprintf(`const (
avx2CodeGen = true
maxAvx2Inputs = %d
maxAvx2Outputs = %d
minAvx2Size = %d
avxSizeMask = maxInt - (minAvx2Size-1)
)`, inputMax, outputMax, perLoop))
w.WriteString(`

if !pshufb {
w.WriteString("\n\nfunc galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { panic(`no pshufb`)}\n")
w.WriteString("func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { panic(`no pshufb`)}\n")
}

if pshufb {
w.WriteString(`
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
n := (stop-start) & avxSizeMask
`)

w.WriteString(`switch len(in) {
w.WriteString(`switch len(in) {
`)
for in, defs := range switchDefs[:] {
w.WriteString(fmt.Sprintf(" case %d:\n switch len(out) {\n", in+1))
for out, def := range defs[:] {
w.WriteString(fmt.Sprintf(" case %d:\n", out+1))
w.WriteString(def)
for in, defs := range switchDefs[:] {
w.WriteString(fmt.Sprintf(" case %d:\n switch len(out) {\n", in+1))
for out, def := range defs[:] {
w.WriteString(fmt.Sprintf(" case %d:\n", out+1))
w.WriteString(def)
}
w.WriteString("}\n")
}
w.WriteString("}\n")
}
w.WriteString(`}
w.WriteString(`}
panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
}
Expand All @@ -122,20 +149,21 @@ func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
`)

w.WriteString(`switch len(in) {
w.WriteString(`switch len(in) {
`)
for in, defs := range switchDefsX[:] {
w.WriteString(fmt.Sprintf(" case %d:\n switch len(out) {\n", in+1))
for out, def := range defs[:] {
w.WriteString(fmt.Sprintf(" case %d:\n", out+1))
w.WriteString(def)
for in, defs := range switchDefsX[:] {
w.WriteString(fmt.Sprintf(" case %d:\n switch len(out) {\n", in+1))
for out, def := range defs[:] {
w.WriteString(fmt.Sprintf(" case %d:\n", out+1))
w.WriteString(def)
}
w.WriteString("}\n")
}
w.WriteString("}\n")
}
w.WriteString(`}
w.WriteString(`}
panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
}
`)
}

w.WriteString(`
Expand Down Expand Up @@ -930,3 +958,98 @@ func genMulAvx512GFNI(name string, inputs int, outputs int, xor bool) {
Label(name + "_end")
RET()
}

func genXor() {
// SSE 2
{
Comment("sSE2XorSlice will XOR in with out and store in out.")
Comment("Processes 16 bytes/loop.")
TEXT("sSE2XorSlice", 0, fmt.Sprintf("func(in, out []byte)"))
Pragma("noescape")
src := Load(Param("in").Base(), GP64())
dst := Load(Param("out").Base(), GP64())
length := Load(Param("in").Len(), GP64())
SHRQ(U8(4), length)
srcX, dstX := XMM(), XMM()
JZ(LabelRef("end"))
Label("loop")
MOVOU(Mem{Base: src}, srcX)
MOVOU(Mem{Base: dst}, dstX)
PXOR(srcX, dstX)
MOVOU(dstX, Mem{Base: dst})
ADDQ(U8(16), src)
ADDQ(U8(16), dst)
DECQ(length)
JNZ(LabelRef("loop"))
Label("end")
RET()
}

// SSE2 64 bytes
{
Comment("sSE2XorSlice_64 will XOR in with out and store in out.")
Comment("Processes 64 bytes/loop.")
TEXT("sSE2XorSlice_64", 0, fmt.Sprintf("func(in, out []byte)"))
Pragma("noescape")
src := Load(Param("in").Base(), GP64())
dst := Load(Param("out").Base(), GP64())
length := Load(Param("in").Len(), GP64())
SHRQ(U8(6), length)
var srcX, dstX [4]reg.VecVirtual
for i := range srcX {
srcX[i], dstX[i] = XMM(), XMM()
}
JZ(LabelRef("end"))
Label("loop")
for i := range srcX {
MOVOU(Mem{Base: src, Disp: 16 * i}, srcX[i])
}
for i := range srcX {
MOVOU(Mem{Base: dst, Disp: 16 * i}, dstX[i])
}
for i := range srcX {
PXOR(srcX[i], dstX[i])
}
for i := range srcX {
MOVOU(dstX[i], Mem{Base: dst, Disp: 16 * i})
}
ADDQ(U8(64), src)
ADDQ(U8(64), dst)
DECQ(length)
JNZ(LabelRef("loop"))
Label("end")
RET()
}
//AVX 2
{
Comment("avx2XorSlice_64 will XOR in with out and store in out.")
Comment("Processes 64 bytes/loop.")
TEXT("avx2XorSlice_64", 0, fmt.Sprintf("func(in, out []byte)"))
Pragma("noescape")
src := Load(Param("in").Base(), GP64())
dst := Load(Param("out").Base(), GP64())
length := Load(Param("in").Len(), GP64())
SHRQ(U8(6), length)
srcX, dstX := YMM(), YMM()
srcX2, dstX2 := YMM(), YMM()
JZ(LabelRef("end"))

Label("loop")
VMOVDQU(Mem{Base: src}, srcX)
VMOVDQU(Mem{Base: src, Disp: 32}, srcX2)
VMOVDQU(Mem{Base: dst}, dstX)
VMOVDQU(Mem{Base: dst, Disp: 32}, dstX2)
VPXOR(srcX, dstX, dstX)
VPXOR(srcX2, dstX2, dstX2)
VMOVDQU(dstX, Mem{Base: dst})
VMOVDQU(dstX2, Mem{Base: dst, Disp: 32})
ADDQ(U8(64), src)
ADDQ(U8(64), dst)
DECQ(length)
JNZ(LabelRef("loop"))

Label("end")
VZEROUPPER()
RET()
}
}
15 changes: 9 additions & 6 deletions _gen/gf16.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func genGF16() {
var ctx gf16ctx
// Ported from static void IFFT_DIT2
// https://github.com/catid/leopard/blob/master/LeopardFF16.cpp#L629
{
if pshufb {
TEXT("ifftDIT2_avx2", attr.NOSPLIT, fmt.Sprintf("func(x, y []byte, table *[8*16]uint8)"))
Pragma("noescape")
tablePtr := Load(Param("table"), GP64())
Expand Down Expand Up @@ -120,7 +120,7 @@ func genGF16() {
VZEROUPPER()
RET()
}
{
if pshufb {
TEXT("fftDIT2_avx2", attr.NOSPLIT, fmt.Sprintf("func(x, y []byte, table *[8*16]uint8)"))
Pragma("noescape")
tablePtr := Load(Param("table"), GP64())
Expand Down Expand Up @@ -173,7 +173,7 @@ func genGF16() {
RET()
}

{
if pshufb {
TEXT("mulgf16_avx2", attr.NOSPLIT, fmt.Sprintf("func(x, y []byte, table *[8*16]uint8)"))
Pragma("noescape")
tablePtr := Load(Param("table"), GP64())
Expand Down Expand Up @@ -213,6 +213,9 @@ func genGF16() {
RET()
}
for _, avx512 := range []bool{true, false} {
if !pshufb {
continue
}
x := [8]int{}
for skipMask := range x[:] {
// AVX-512 only uses more registers for tables.
Expand Down Expand Up @@ -562,7 +565,7 @@ func genGF16() {

// SSSE3:
ctx.avx512 = false
{
if pshufb {
TEXT("ifftDIT2_ssse3", attr.NOSPLIT, fmt.Sprintf("func(x, y []byte, table *[8*16]uint8)"))
Pragma("noescape")
tablePtr := Load(Param("table"), GP64())
Expand Down Expand Up @@ -613,7 +616,7 @@ func genGF16() {

RET()
}
{
if pshufb {
TEXT("fftDIT2_ssse3", attr.NOSPLIT, fmt.Sprintf("func(x, y []byte, table *[8*16]uint8)"))
Pragma("noescape")
tablePtr := Load(Param("table"), GP64())
Expand Down Expand Up @@ -671,7 +674,7 @@ func genGF16() {

RET()
}
{
if pshufb {
TEXT("mulgf16_ssse3", attr.NOSPLIT, fmt.Sprintf("func(x, y []byte, table *[8*16]uint8)"))
Pragma("noescape")
tablePtr := Load(Param("table"), GP64())
Expand Down
Loading

0 comments on commit e586036

Please sign in to comment.