From 02f53915fdc56fbe471b9ec90989493163f2f942 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Jan 2023 01:53:40 +0000 Subject: [PATCH] Bump github.com/containerd/stargz-snapshotter/estargz Bumps [github.com/containerd/stargz-snapshotter/estargz](https://github.com/containerd/stargz-snapshotter) from 0.13.0 to 0.14.1. - [Release notes](https://github.com/containerd/stargz-snapshotter/releases) - [Commits](https://github.com/containerd/stargz-snapshotter/compare/v0.13.0...v0.14.1) --- updated-dependencies: - dependency-name: github.com/containerd/stargz-snapshotter/estargz dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 4 +- go.sum | 8 +- .../klauspost/compress/.goreleaser.yml | 2 +- .../github.com/klauspost/compress/README.md | 17 +- .../klauspost/compress/fse/compress.go | 31 +- .../klauspost/compress/huff0/bitreader.go | 8 +- .../klauspost/compress/huff0/compress.go | 114 ++-- .../compress/huff0/decompress_amd64.s | 584 +++++++++--------- .../klauspost/compress/zstd/blockdec.go | 14 +- .../klauspost/compress/zstd/decodeheader.go | 9 +- .../klauspost/compress/zstd/decoder.go | 93 +-- .../compress/zstd/decoder_options.go | 26 +- .../klauspost/compress/zstd/dict.go | 8 +- .../klauspost/compress/zstd/enc_base.go | 26 +- .../klauspost/compress/zstd/enc_best.go | 63 +- .../klauspost/compress/zstd/enc_better.go | 12 +- .../klauspost/compress/zstd/enc_dfast.go | 16 +- .../klauspost/compress/zstd/enc_fast.go | 12 +- .../klauspost/compress/zstd/encoder.go | 35 ++ .../compress/zstd/encoder_options.go | 36 +- .../klauspost/compress/zstd/framedec.go | 47 +- .../compress/zstd/internal/xxhash/README.md | 49 +- .../compress/zstd/internal/xxhash/xxhash.go | 47 +- .../zstd/internal/xxhash/xxhash_amd64.s | 336 +++++----- .../zstd/internal/xxhash/xxhash_arm64.s | 140 +++-- .../zstd/internal/xxhash/xxhash_asm.go | 2 +- .../zstd/internal/xxhash/xxhash_other.go | 19 +- .../klauspost/compress/zstd/seqdec_amd64.s | 28 +- .../klauspost/compress/zstd/zstd.go | 31 +- vendor/modules.txt | 6 +- 30 files changed, 930 insertions(+), 893 deletions(-) diff --git a/go.mod b/go.mod index a881eb5a48..b2f5006013 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.18 require ( github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.0.0-20220517224237-e6f29200ae04 github.com/chrismellard/docker-credential-acr-env v0.0.0-20220327082430-c57b701bfc08 - github.com/containerd/stargz-snapshotter/estargz v0.13.0 + github.com/containerd/stargz-snapshotter/estargz v0.14.1 github.com/docker/docker v20.10.23+incompatible github.com/dprotaso/go-yit v0.0.0-20220510233725-9ba8df137936 github.com/go-training/helloworld v0.0.0-20200225145412-ba5f4379d78b @@ -81,7 +81,7 @@ require ( github.com/inconshreveable/mousetrap v1.0.1 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect - github.com/klauspost/compress v1.15.12 // indirect + github.com/klauspost/compress v1.15.15 // indirect github.com/letsencrypt/boulder v0.0.0-20221109233200-85aa52084eaf // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mailru/easyjson v0.7.7 // indirect diff --git a/go.sum b/go.sum index b859b43cdd..a7f6463bcf 100644 --- a/go.sum +++ b/go.sum @@ -136,8 +136,8 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/containerd/stargz-snapshotter/estargz v0.13.0 h1:fD7AwuVV+B40p0d9qVkH/Au1qhp8hn/HWJHIYjpEcfw= -github.com/containerd/stargz-snapshotter/estargz v0.13.0/go.mod h1:m+9VaGJGlhCnrcEUod8mYumTmRgblwd3rC5UCEh2Yp0= +github.com/containerd/stargz-snapshotter/estargz v0.14.1 h1:n9M2GDSWM96pyipFTA0DaU+zdtzi3Iwsnj/rIHr1yFM= +github.com/containerd/stargz-snapshotter/estargz v0.14.1/go.mod h1:uPtMw6ucGJYwImjhxk/oghZmfElF/841u86wReNggNk= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= @@ -352,8 +352,8 @@ github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0Lh github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.15.12 h1:YClS/PImqYbn+UILDnqxQCZ3RehC9N318SU3kElDUEM= -github.com/klauspost/compress v1.15.12/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= +github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw= +github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml index 0af08e65e6..a2bf06e94f 100644 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -3,7 +3,7 @@ before: hooks: - ./gen.sh - - go install mvdan.cc/garble@latest + - go install mvdan.cc/garble@v0.7.2 builds: - diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 9ec000ffaa..63f2cd5b25 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -9,7 +9,6 @@ This package provides various compression algorithms. * [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding. * [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently. * [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation. -* [fuzz package](https://github.com/klauspost/compress-fuzz) for fuzz testing all compressors/decompressors here. [![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories) [![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml) @@ -17,6 +16,22 @@ This package provides various compression algorithms. # changelog +* Jan 3rd, 2023 (v1.15.14) + + * flate: Improve speed in big stateless blocks https://github.com/klauspost/compress/pull/718 + * zstd: Minor speed tweaks by @greatroar in https://github.com/klauspost/compress/pull/716 https://github.com/klauspost/compress/pull/720 + * export NoGzipResponseWriter for custom ResponseWriter wrappers by @harshavardhana in https://github.com/klauspost/compress/pull/722 + * s2: Add example for indexing and existing stream https://github.com/klauspost/compress/pull/723 + +* Dec 11, 2022 (v1.15.13) + * zstd: Add [MaxEncodedSize](https://pkg.go.dev/github.com/klauspost/compress@v1.15.13/zstd#Encoder.MaxEncodedSize) to encoder https://github.com/klauspost/compress/pull/691 + * zstd: Various tweaks and improvements https://github.com/klauspost/compress/pull/693 https://github.com/klauspost/compress/pull/695 https://github.com/klauspost/compress/pull/696 https://github.com/klauspost/compress/pull/701 https://github.com/klauspost/compress/pull/702 https://github.com/klauspost/compress/pull/703 https://github.com/klauspost/compress/pull/704 https://github.com/klauspost/compress/pull/705 https://github.com/klauspost/compress/pull/706 https://github.com/klauspost/compress/pull/707 https://github.com/klauspost/compress/pull/708 + +* Oct 26, 2022 (v1.15.12) + + * zstd: Tweak decoder allocs. https://github.com/klauspost/compress/pull/680 + * gzhttp: Always delete `HeaderNoCompression` https://github.com/klauspost/compress/pull/683 + * Sept 26, 2022 (v1.15.11) * flate: Improve level 1-3 compression https://github.com/klauspost/compress/pull/678 diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go index 6f341914c6..dac97e58a2 100644 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -146,54 +146,51 @@ func (s *Scratch) compress(src []byte) error { c1.encodeZero(tt[src[ip-2]]) ip -= 2 } + src = src[:ip] // Main compression loop. switch { case !s.zeroBits && s.actualTableLog <= 8: // We can encode 4 symbols without requiring a flush. // We do not need to check if any output is 0 bits. - for ip >= 4 { + for ; len(src) >= 4; src = src[:len(src)-4] { s.bw.flush32() - v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] c2.encode(tt[v0]) c1.encode(tt[v1]) c2.encode(tt[v2]) c1.encode(tt[v3]) - ip -= 4 } case !s.zeroBits: // We do not need to check if any output is 0 bits. - for ip >= 4 { + for ; len(src) >= 4; src = src[:len(src)-4] { s.bw.flush32() - v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] c2.encode(tt[v0]) c1.encode(tt[v1]) s.bw.flush32() c2.encode(tt[v2]) c1.encode(tt[v3]) - ip -= 4 } case s.actualTableLog <= 8: // We can encode 4 symbols without requiring a flush - for ip >= 4 { + for ; len(src) >= 4; src = src[:len(src)-4] { s.bw.flush32() - v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] c2.encodeZero(tt[v0]) c1.encodeZero(tt[v1]) c2.encodeZero(tt[v2]) c1.encodeZero(tt[v3]) - ip -= 4 } default: - for ip >= 4 { + for ; len(src) >= 4; src = src[:len(src)-4] { s.bw.flush32() - v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] c2.encodeZero(tt[v0]) c1.encodeZero(tt[v1]) s.bw.flush32() c2.encodeZero(tt[v2]) c1.encodeZero(tt[v3]) - ip -= 4 } } @@ -459,15 +456,17 @@ func (s *Scratch) countSimple(in []byte) (max int) { for _, v := range in { s.count[v]++ } - m := uint32(0) + m, symlen := uint32(0), s.symbolLen for i, v := range s.count[:] { + if v == 0 { + continue + } if v > m { m = v } - if v > 0 { - s.symbolLen = uint16(i) + 1 - } + symlen = uint16(i) + 1 } + s.symbolLen = symlen return int(m) } diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go index 504a7be9da..e36d9742f9 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitreader.go +++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go @@ -67,7 +67,6 @@ func (b *bitReaderBytes) fillFast() { // 2 bounds checks. v := b.in[b.off-4 : b.off] - v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value |= uint64(low) << (b.bitsRead - 32) b.bitsRead -= 32 @@ -88,8 +87,7 @@ func (b *bitReaderBytes) fill() { return } if b.off > 4 { - v := b.in[b.off-4:] - v = v[:4] + v := b.in[b.off-4 : b.off] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value |= uint64(low) << (b.bitsRead - 32) b.bitsRead -= 32 @@ -179,7 +177,6 @@ func (b *bitReaderShifted) fillFast() { // 2 bounds checks. v := b.in[b.off-4 : b.off] - v = v[:4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value |= uint64(low) << ((b.bitsRead - 32) & 63) b.bitsRead -= 32 @@ -200,8 +197,7 @@ func (b *bitReaderShifted) fill() { return } if b.off > 4 { - v := b.in[b.off-4:] - v = v[:4] + v := b.in[b.off-4 : b.off] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value |= uint64(low) << ((b.bitsRead - 32) & 63) b.bitsRead -= 32 diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index 4d14542fac..cdc94856f2 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -365,29 +365,29 @@ func (s *Scratch) countSimple(in []byte) (max int, reuse bool) { m := uint32(0) if len(s.prevTable) > 0 { for i, v := range s.count[:] { + if v == 0 { + continue + } if v > m { m = v } - if v > 0 { - s.symbolLen = uint16(i) + 1 - if i >= len(s.prevTable) { - reuse = false - } else { - if s.prevTable[i].nBits == 0 { - reuse = false - } - } + s.symbolLen = uint16(i) + 1 + if i >= len(s.prevTable) { + reuse = false + } else if s.prevTable[i].nBits == 0 { + reuse = false } } return int(m), reuse } for i, v := range s.count[:] { + if v == 0 { + continue + } if v > m { m = v } - if v > 0 { - s.symbolLen = uint16(i) + 1 - } + s.symbolLen = uint16(i) + 1 } return int(m), false } @@ -484,34 +484,35 @@ func (s *Scratch) buildCTable() error { // Different from reference implementation. huffNode0 := s.nodes[0 : huffNodesLen+1] - for huffNode[nonNullRank].count == 0 { + for huffNode[nonNullRank].count() == 0 { nonNullRank-- } lowS := int16(nonNullRank) nodeRoot := nodeNb + lowS - 1 lowN := nodeNb - huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count - huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb) + huffNode[nodeNb].setCount(huffNode[lowS].count() + huffNode[lowS-1].count()) + huffNode[lowS].setParent(nodeNb) + huffNode[lowS-1].setParent(nodeNb) nodeNb++ lowS -= 2 for n := nodeNb; n <= nodeRoot; n++ { - huffNode[n].count = 1 << 30 + huffNode[n].setCount(1 << 30) } // fake entry, strong barrier - huffNode0[0].count = 1 << 31 + huffNode0[0].setCount(1 << 31) // create parents for nodeNb <= nodeRoot { var n1, n2 int16 - if huffNode0[lowS+1].count < huffNode0[lowN+1].count { + if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() { n1 = lowS lowS-- } else { n1 = lowN lowN++ } - if huffNode0[lowS+1].count < huffNode0[lowN+1].count { + if huffNode0[lowS+1].count() < huffNode0[lowN+1].count() { n2 = lowS lowS-- } else { @@ -519,18 +520,19 @@ func (s *Scratch) buildCTable() error { lowN++ } - huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count - huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb) + huffNode[nodeNb].setCount(huffNode0[n1+1].count() + huffNode0[n2+1].count()) + huffNode0[n1+1].setParent(nodeNb) + huffNode0[n2+1].setParent(nodeNb) nodeNb++ } // distribute weights (unlimited tree height) - huffNode[nodeRoot].nbBits = 0 + huffNode[nodeRoot].setNbBits(0) for n := nodeRoot - 1; n >= startNode; n-- { - huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1 + huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1) } for n := uint16(0); n <= nonNullRank; n++ { - huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1 + huffNode[n].setNbBits(huffNode[huffNode[n].parent()].nbBits() + 1) } s.actualTableLog = s.setMaxHeight(int(nonNullRank)) maxNbBits := s.actualTableLog @@ -542,7 +544,7 @@ func (s *Scratch) buildCTable() error { var nbPerRank [tableLogMax + 1]uint16 var valPerRank [16]uint16 for _, v := range huffNode[:nonNullRank+1] { - nbPerRank[v.nbBits]++ + nbPerRank[v.nbBits()]++ } // determine stating value per rank { @@ -557,7 +559,7 @@ func (s *Scratch) buildCTable() error { // push nbBits per symbol, symbol order for _, v := range huffNode[:nonNullRank+1] { - s.cTable[v.symbol].nBits = v.nbBits + s.cTable[v.symbol()].nBits = v.nbBits() } // assign value within rank, symbol order @@ -603,12 +605,12 @@ func (s *Scratch) huffSort() { pos := rank[r].current rank[r].current++ prev := nodes[(pos-1)&huffNodesMask] - for pos > rank[r].base && c > prev.count { + for pos > rank[r].base && c > prev.count() { nodes[pos&huffNodesMask] = prev pos-- prev = nodes[(pos-1)&huffNodesMask] } - nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)} + nodes[pos&huffNodesMask] = makeNodeElt(c, byte(n)) } } @@ -617,7 +619,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { huffNode := s.nodes[1 : huffNodesLen+1] //huffNode = huffNode[: huffNodesLen] - largestBits := huffNode[lastNonNull].nbBits + largestBits := huffNode[lastNonNull].nbBits() // early exit : no elt > maxNbBits if largestBits <= maxNbBits { @@ -627,14 +629,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { baseCost := int(1) << (largestBits - maxNbBits) n := uint32(lastNonNull) - for huffNode[n].nbBits > maxNbBits { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)) - huffNode[n].nbBits = maxNbBits + for huffNode[n].nbBits() > maxNbBits { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits())) + huffNode[n].setNbBits(maxNbBits) n-- } // n stops at huffNode[n].nbBits <= maxNbBits - for huffNode[n].nbBits == maxNbBits { + for huffNode[n].nbBits() == maxNbBits { n-- } // n end at index of smallest symbol using < maxNbBits @@ -655,10 +657,10 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { { currentNbBits := maxNbBits for pos := int(n); pos >= 0; pos-- { - if huffNode[pos].nbBits >= currentNbBits { + if huffNode[pos].nbBits() >= currentNbBits { continue } - currentNbBits = huffNode[pos].nbBits // < maxNbBits + currentNbBits = huffNode[pos].nbBits() // < maxNbBits rankLast[maxNbBits-currentNbBits] = uint32(pos) } } @@ -675,8 +677,8 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { if lowPos == noSymbol { break } - highTotal := huffNode[highPos].count - lowTotal := 2 * huffNode[lowPos].count + highTotal := huffNode[highPos].count() + lowTotal := 2 * huffNode[lowPos].count() if highTotal <= lowTotal { break } @@ -692,13 +694,14 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { // this rank is no longer empty rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease] } - huffNode[rankLast[nBitsToDecrease]].nbBits++ + huffNode[rankLast[nBitsToDecrease]].setNbBits(1 + + huffNode[rankLast[nBitsToDecrease]].nbBits()) if rankLast[nBitsToDecrease] == 0 { /* special case, reached largest symbol */ rankLast[nBitsToDecrease] = noSymbol } else { rankLast[nBitsToDecrease]-- - if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease { + if huffNode[rankLast[nBitsToDecrease]].nbBits() != maxNbBits-nBitsToDecrease { rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */ } } @@ -706,15 +709,15 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { for totalCost < 0 { /* Sometimes, cost correction overshoot */ if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ - for huffNode[n].nbBits == maxNbBits { + for huffNode[n].nbBits() == maxNbBits { n-- } - huffNode[n+1].nbBits-- + huffNode[n+1].setNbBits(huffNode[n+1].nbBits() - 1) rankLast[1] = n + 1 totalCost++ continue } - huffNode[rankLast[1]+1].nbBits-- + huffNode[rankLast[1]+1].setNbBits(huffNode[rankLast[1]+1].nbBits() - 1) rankLast[1]++ totalCost++ } @@ -722,9 +725,26 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { return maxNbBits } -type nodeElt struct { - count uint32 - parent uint16 - symbol byte - nbBits uint8 +// A nodeElt is the fields +// +// count uint32 +// parent uint16 +// symbol byte +// nbBits uint8 +// +// in some order, all squashed into an integer so that the compiler +// always loads and stores entire nodeElts instead of separate fields. +type nodeElt uint64 + +func makeNodeElt(count uint32, symbol byte) nodeElt { + return nodeElt(count) | nodeElt(symbol)<<48 } + +func (e *nodeElt) count() uint32 { return uint32(*e) } +func (e *nodeElt) parent() uint16 { return uint16(*e >> 32) } +func (e *nodeElt) symbol() byte { return byte(*e >> 48) } +func (e *nodeElt) nbBits() uint8 { return uint8(*e >> 56) } + +func (e *nodeElt) setCount(c uint32) { *e = (*e)&0xffffffff00000000 | nodeElt(c) } +func (e *nodeElt) setParent(p int16) { *e = (*e)&0xffff0000ffffffff | nodeElt(uint16(p))<<32 } +func (e *nodeElt) setNbBits(n uint8) { *e = (*e)&0x00ffffffffffffff | nodeElt(n)<<56 } diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s index 8d2187a2ce..c4c7ab2d1f 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s @@ -4,360 +4,349 @@ // func decompress4x_main_loop_amd64(ctx *decompress4xContext) TEXT ·decompress4x_main_loop_amd64(SB), $0-8 - XORQ DX, DX - // Preload values MOVQ ctx+0(FP), AX MOVBQZX 8(AX), DI - MOVQ 16(AX), SI - MOVQ 48(AX), BX - MOVQ 24(AX), R9 - MOVQ 32(AX), R10 - MOVQ (AX), R11 + MOVQ 16(AX), BX + MOVQ 48(AX), SI + MOVQ 24(AX), R8 + MOVQ 32(AX), R9 + MOVQ (AX), R10 // Main loop main_loop: - MOVQ SI, R8 - CMPQ R8, BX + XORL DX, DX + CMPQ BX, SI SETGE DL // br0.fillFast32() - MOVQ 32(R11), R12 - MOVBQZX 40(R11), R13 - CMPQ R13, $0x20 + MOVQ 32(R10), R11 + MOVBQZX 40(R10), R12 + CMPQ R12, $0x20 JBE skip_fill0 - MOVQ 24(R11), AX - SUBQ $0x20, R13 + MOVQ 24(R10), AX + SUBQ $0x20, R12 SUBQ $0x04, AX - MOVQ (R11), R14 + MOVQ (R10), R13 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(R14*1), R14 - MOVQ R13, CX - SHLQ CL, R14 - MOVQ AX, 24(R11) - ORQ R14, R12 + MOVL (AX)(R13*1), R13 + MOVQ R12, CX + SHLQ CL, R13 + MOVQ AX, 24(R10) + ORQ R13, R11 - // exhausted = exhausted || (br0.off < 4) - CMPQ AX, $0x04 - SETLT AL - ORB AL, DL + // exhausted += (br0.off < 4) + CMPQ AX, $0x04 + ADCB $+0, DL skip_fill0: // val0 := br0.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v0 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br0.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val1 := br0.peekTopBits(peekBits) MOVQ DI, CX - MOVQ R12, R14 - SHRQ CL, R14 + MOVQ R11, R13 + SHRQ CL, R13 // v1 := table[val1&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br0.advance(uint8(v1.entry)) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // these two writes get coalesced // out[id * dstEvery + 0] = uint8(v0.entry >> 8) // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (R8) + MOVW AX, (BX) // update the bitreader structure - MOVQ R12, 32(R11) - MOVB R13, 40(R11) - ADDQ R9, R8 + MOVQ R11, 32(R10) + MOVB R12, 40(R10) // br1.fillFast32() - MOVQ 80(R11), R12 - MOVBQZX 88(R11), R13 - CMPQ R13, $0x20 + MOVQ 80(R10), R11 + MOVBQZX 88(R10), R12 + CMPQ R12, $0x20 JBE skip_fill1 - MOVQ 72(R11), AX - SUBQ $0x20, R13 + MOVQ 72(R10), AX + SUBQ $0x20, R12 SUBQ $0x04, AX - MOVQ 48(R11), R14 + MOVQ 48(R10), R13 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(R14*1), R14 - MOVQ R13, CX - SHLQ CL, R14 - MOVQ AX, 72(R11) - ORQ R14, R12 + MOVL (AX)(R13*1), R13 + MOVQ R12, CX + SHLQ CL, R13 + MOVQ AX, 72(R10) + ORQ R13, R11 - // exhausted = exhausted || (br1.off < 4) - CMPQ AX, $0x04 - SETLT AL - ORB AL, DL + // exhausted += (br1.off < 4) + CMPQ AX, $0x04 + ADCB $+0, DL skip_fill1: // val0 := br1.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v0 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br1.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val1 := br1.peekTopBits(peekBits) MOVQ DI, CX - MOVQ R12, R14 - SHRQ CL, R14 + MOVQ R11, R13 + SHRQ CL, R13 // v1 := table[val1&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br1.advance(uint8(v1.entry)) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // these two writes get coalesced // out[id * dstEvery + 0] = uint8(v0.entry >> 8) // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (R8) + MOVW AX, (BX)(R8*1) // update the bitreader structure - MOVQ R12, 80(R11) - MOVB R13, 88(R11) - ADDQ R9, R8 + MOVQ R11, 80(R10) + MOVB R12, 88(R10) // br2.fillFast32() - MOVQ 128(R11), R12 - MOVBQZX 136(R11), R13 - CMPQ R13, $0x20 + MOVQ 128(R10), R11 + MOVBQZX 136(R10), R12 + CMPQ R12, $0x20 JBE skip_fill2 - MOVQ 120(R11), AX - SUBQ $0x20, R13 + MOVQ 120(R10), AX + SUBQ $0x20, R12 SUBQ $0x04, AX - MOVQ 96(R11), R14 + MOVQ 96(R10), R13 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(R14*1), R14 - MOVQ R13, CX - SHLQ CL, R14 - MOVQ AX, 120(R11) - ORQ R14, R12 + MOVL (AX)(R13*1), R13 + MOVQ R12, CX + SHLQ CL, R13 + MOVQ AX, 120(R10) + ORQ R13, R11 - // exhausted = exhausted || (br2.off < 4) - CMPQ AX, $0x04 - SETLT AL - ORB AL, DL + // exhausted += (br2.off < 4) + CMPQ AX, $0x04 + ADCB $+0, DL skip_fill2: // val0 := br2.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v0 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br2.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val1 := br2.peekTopBits(peekBits) MOVQ DI, CX - MOVQ R12, R14 - SHRQ CL, R14 + MOVQ R11, R13 + SHRQ CL, R13 // v1 := table[val1&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br2.advance(uint8(v1.entry)) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // these two writes get coalesced // out[id * dstEvery + 0] = uint8(v0.entry >> 8) // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (R8) + MOVW AX, (BX)(R8*2) // update the bitreader structure - MOVQ R12, 128(R11) - MOVB R13, 136(R11) - ADDQ R9, R8 + MOVQ R11, 128(R10) + MOVB R12, 136(R10) // br3.fillFast32() - MOVQ 176(R11), R12 - MOVBQZX 184(R11), R13 - CMPQ R13, $0x20 + MOVQ 176(R10), R11 + MOVBQZX 184(R10), R12 + CMPQ R12, $0x20 JBE skip_fill3 - MOVQ 168(R11), AX - SUBQ $0x20, R13 + MOVQ 168(R10), AX + SUBQ $0x20, R12 SUBQ $0x04, AX - MOVQ 144(R11), R14 + MOVQ 144(R10), R13 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (AX)(R14*1), R14 - MOVQ R13, CX - SHLQ CL, R14 - MOVQ AX, 168(R11) - ORQ R14, R12 + MOVL (AX)(R13*1), R13 + MOVQ R12, CX + SHLQ CL, R13 + MOVQ AX, 168(R10) + ORQ R13, R11 - // exhausted = exhausted || (br3.off < 4) - CMPQ AX, $0x04 - SETLT AL - ORB AL, DL + // exhausted += (br3.off < 4) + CMPQ AX, $0x04 + ADCB $+0, DL skip_fill3: // val0 := br3.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v0 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br3.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val1 := br3.peekTopBits(peekBits) MOVQ DI, CX - MOVQ R12, R14 - SHRQ CL, R14 + MOVQ R11, R13 + SHRQ CL, R13 // v1 := table[val1&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br3.advance(uint8(v1.entry)) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // these two writes get coalesced // out[id * dstEvery + 0] = uint8(v0.entry >> 8) // out[id * dstEvery + 1] = uint8(v1.entry >> 8) - MOVW AX, (R8) + LEAQ (R8)(R8*2), CX + MOVW AX, (BX)(CX*1) // update the bitreader structure - MOVQ R12, 176(R11) - MOVB R13, 184(R11) - ADDQ $0x02, SI + MOVQ R11, 176(R10) + MOVB R12, 184(R10) + ADDQ $0x02, BX TESTB DL, DL JZ main_loop MOVQ ctx+0(FP), AX - SUBQ 16(AX), SI - SHLQ $0x02, SI - MOVQ SI, 40(AX) + SUBQ 16(AX), BX + SHLQ $0x02, BX + MOVQ BX, 40(AX) RET // func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8 - XORQ DX, DX - // Preload values MOVQ ctx+0(FP), CX MOVBQZX 8(CX), DI MOVQ 16(CX), BX MOVQ 48(CX), SI - MOVQ 24(CX), R9 - MOVQ 32(CX), R10 - MOVQ (CX), R11 + MOVQ 24(CX), R8 + MOVQ 32(CX), R9 + MOVQ (CX), R10 // Main loop main_loop: - MOVQ BX, R8 - CMPQ R8, SI + XORL DX, DX + CMPQ BX, SI SETGE DL // br0.fillFast32() - MOVQ 32(R11), R12 - MOVBQZX 40(R11), R13 - CMPQ R13, $0x20 + MOVQ 32(R10), R11 + MOVBQZX 40(R10), R12 + CMPQ R12, $0x20 JBE skip_fill0 - MOVQ 24(R11), R14 - SUBQ $0x20, R13 - SUBQ $0x04, R14 - MOVQ (R11), R15 + MOVQ 24(R10), R13 + SUBQ $0x20, R12 + SUBQ $0x04, R13 + MOVQ (R10), R14 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R14)(R15*1), R15 - MOVQ R13, CX - SHLQ CL, R15 - MOVQ R14, 24(R11) - ORQ R15, R12 + MOVL (R13)(R14*1), R14 + MOVQ R12, CX + SHLQ CL, R14 + MOVQ R13, 24(R10) + ORQ R14, R11 - // exhausted = exhausted || (br0.off < 4) - CMPQ R14, $0x04 - SETLT AL - ORB AL, DL + // exhausted += (br0.off < 4) + CMPQ R13, $0x04 + ADCB $+0, DL skip_fill0: // val0 := br0.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v0 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br0.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val1 := br0.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v1 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br0.advance(uint8(v1.entry) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 BSWAPL AX // val2 := br0.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v2 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br0.advance(uint8(v2.entry) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val3 := br0.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v3 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br0.advance(uint8(v3.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 BSWAPL AX // these four writes get coalesced @@ -365,88 +354,86 @@ skip_fill0: // out[id * dstEvery + 1] = uint8(v1.entry >> 8) // out[id * dstEvery + 3] = uint8(v2.entry >> 8) // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (R8) + MOVL AX, (BX) // update the bitreader structure - MOVQ R12, 32(R11) - MOVB R13, 40(R11) - ADDQ R9, R8 + MOVQ R11, 32(R10) + MOVB R12, 40(R10) // br1.fillFast32() - MOVQ 80(R11), R12 - MOVBQZX 88(R11), R13 - CMPQ R13, $0x20 + MOVQ 80(R10), R11 + MOVBQZX 88(R10), R12 + CMPQ R12, $0x20 JBE skip_fill1 - MOVQ 72(R11), R14 - SUBQ $0x20, R13 - SUBQ $0x04, R14 - MOVQ 48(R11), R15 + MOVQ 72(R10), R13 + SUBQ $0x20, R12 + SUBQ $0x04, R13 + MOVQ 48(R10), R14 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R14)(R15*1), R15 - MOVQ R13, CX - SHLQ CL, R15 - MOVQ R14, 72(R11) - ORQ R15, R12 + MOVL (R13)(R14*1), R14 + MOVQ R12, CX + SHLQ CL, R14 + MOVQ R13, 72(R10) + ORQ R14, R11 - // exhausted = exhausted || (br1.off < 4) - CMPQ R14, $0x04 - SETLT AL - ORB AL, DL + // exhausted += (br1.off < 4) + CMPQ R13, $0x04 + ADCB $+0, DL skip_fill1: // val0 := br1.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v0 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br1.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val1 := br1.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v1 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br1.advance(uint8(v1.entry) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 BSWAPL AX // val2 := br1.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v2 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br1.advance(uint8(v2.entry) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val3 := br1.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v3 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br1.advance(uint8(v3.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 BSWAPL AX // these four writes get coalesced @@ -454,88 +441,86 @@ skip_fill1: // out[id * dstEvery + 1] = uint8(v1.entry >> 8) // out[id * dstEvery + 3] = uint8(v2.entry >> 8) // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (R8) + MOVL AX, (BX)(R8*1) // update the bitreader structure - MOVQ R12, 80(R11) - MOVB R13, 88(R11) - ADDQ R9, R8 + MOVQ R11, 80(R10) + MOVB R12, 88(R10) // br2.fillFast32() - MOVQ 128(R11), R12 - MOVBQZX 136(R11), R13 - CMPQ R13, $0x20 + MOVQ 128(R10), R11 + MOVBQZX 136(R10), R12 + CMPQ R12, $0x20 JBE skip_fill2 - MOVQ 120(R11), R14 - SUBQ $0x20, R13 - SUBQ $0x04, R14 - MOVQ 96(R11), R15 + MOVQ 120(R10), R13 + SUBQ $0x20, R12 + SUBQ $0x04, R13 + MOVQ 96(R10), R14 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R14)(R15*1), R15 - MOVQ R13, CX - SHLQ CL, R15 - MOVQ R14, 120(R11) - ORQ R15, R12 + MOVL (R13)(R14*1), R14 + MOVQ R12, CX + SHLQ CL, R14 + MOVQ R13, 120(R10) + ORQ R14, R11 - // exhausted = exhausted || (br2.off < 4) - CMPQ R14, $0x04 - SETLT AL - ORB AL, DL + // exhausted += (br2.off < 4) + CMPQ R13, $0x04 + ADCB $+0, DL skip_fill2: // val0 := br2.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v0 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br2.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val1 := br2.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v1 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br2.advance(uint8(v1.entry) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 BSWAPL AX // val2 := br2.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v2 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br2.advance(uint8(v2.entry) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val3 := br2.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v3 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br2.advance(uint8(v3.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 BSWAPL AX // these four writes get coalesced @@ -543,88 +528,86 @@ skip_fill2: // out[id * dstEvery + 1] = uint8(v1.entry >> 8) // out[id * dstEvery + 3] = uint8(v2.entry >> 8) // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (R8) + MOVL AX, (BX)(R8*2) // update the bitreader structure - MOVQ R12, 128(R11) - MOVB R13, 136(R11) - ADDQ R9, R8 + MOVQ R11, 128(R10) + MOVB R12, 136(R10) // br3.fillFast32() - MOVQ 176(R11), R12 - MOVBQZX 184(R11), R13 - CMPQ R13, $0x20 + MOVQ 176(R10), R11 + MOVBQZX 184(R10), R12 + CMPQ R12, $0x20 JBE skip_fill3 - MOVQ 168(R11), R14 - SUBQ $0x20, R13 - SUBQ $0x04, R14 - MOVQ 144(R11), R15 + MOVQ 168(R10), R13 + SUBQ $0x20, R12 + SUBQ $0x04, R13 + MOVQ 144(R10), R14 // b.value |= uint64(low) << (b.bitsRead & 63) - MOVL (R14)(R15*1), R15 - MOVQ R13, CX - SHLQ CL, R15 - MOVQ R14, 168(R11) - ORQ R15, R12 + MOVL (R13)(R14*1), R14 + MOVQ R12, CX + SHLQ CL, R14 + MOVQ R13, 168(R10) + ORQ R14, R11 - // exhausted = exhausted || (br3.off < 4) - CMPQ R14, $0x04 - SETLT AL - ORB AL, DL + // exhausted += (br3.off < 4) + CMPQ R13, $0x04 + ADCB $+0, DL skip_fill3: // val0 := br3.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v0 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br3.advance(uint8(v0.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val1 := br3.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v1 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br3.advance(uint8(v1.entry) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 BSWAPL AX // val2 := br3.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v2 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br3.advance(uint8(v2.entry) MOVB CH, AH - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 // val3 := br3.peekTopBits(peekBits) - MOVQ R12, R14 + MOVQ R11, R13 MOVQ DI, CX - SHRQ CL, R14 + SHRQ CL, R13 // v3 := table[val0&mask] - MOVW (R10)(R14*2), CX + MOVW (R9)(R13*2), CX // br3.advance(uint8(v3.entry) MOVB CH, AL - SHLQ CL, R12 - ADDB CL, R13 + SHLQ CL, R11 + ADDB CL, R12 BSWAPL AX // these four writes get coalesced @@ -632,11 +615,12 @@ skip_fill3: // out[id * dstEvery + 1] = uint8(v1.entry >> 8) // out[id * dstEvery + 3] = uint8(v2.entry >> 8) // out[id * dstEvery + 4] = uint8(v3.entry >> 8) - MOVL AX, (R8) + LEAQ (R8)(R8*2), CX + MOVL AX, (BX)(CX*1) // update the bitreader structure - MOVQ R12, 176(R11) - MOVB R13, 184(R11) + MOVQ R11, 176(R10) + MOVB R12, 184(R10) ADDQ $0x04, BX TESTB DL, DL JZ main_loop @@ -652,7 +636,7 @@ TEXT ·decompress1x_main_loop_amd64(SB), $0-8 MOVQ 16(CX), DX MOVQ 24(CX), BX CMPQ BX, $0x04 - JB error_max_decoded_size_exeeded + JB error_max_decoded_size_exceeded LEAQ (DX)(BX*1), BX MOVQ (CX), SI MOVQ (SI), R8 @@ -667,7 +651,7 @@ main_loop: // Check if we have room for 4 bytes in the output buffer LEAQ 4(DX), CX CMPQ CX, BX - JGE error_max_decoded_size_exeeded + JGE error_max_decoded_size_exceeded // Decode 4 values CMPQ R11, $0x20 @@ -744,7 +728,7 @@ loop_condition: RET // Report error -error_max_decoded_size_exeeded: +error_max_decoded_size_exceeded: MOVQ ctx+0(FP), AX MOVQ $-1, CX MOVQ CX, 40(AX) @@ -757,7 +741,7 @@ TEXT ·decompress1x_main_loop_bmi2(SB), $0-8 MOVQ 16(CX), DX MOVQ 24(CX), BX CMPQ BX, $0x04 - JB error_max_decoded_size_exeeded + JB error_max_decoded_size_exceeded LEAQ (DX)(BX*1), BX MOVQ (CX), SI MOVQ (SI), R8 @@ -772,7 +756,7 @@ main_loop: // Check if we have room for 4 bytes in the output buffer LEAQ 4(DX), CX CMPQ CX, BX - JGE error_max_decoded_size_exeeded + JGE error_max_decoded_size_exceeded // Decode 4 values CMPQ R11, $0x20 @@ -839,7 +823,7 @@ loop_condition: RET // Report error -error_max_decoded_size_exeeded: +error_max_decoded_size_exceeded: MOVQ ctx+0(FP), AX MOVQ $-1, CX MOVQ CX, 40(AX) diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index da814715da..2445bb4fe5 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -82,8 +82,9 @@ type blockDec struct { err error - // Check against this crc - checkCRC []byte + // Check against this crc, if hasCRC is true. + checkCRC uint32 + hasCRC bool // Frame to use for singlethreaded decoding. // Should not be used by the decoder itself since parent may be another frame. @@ -191,16 +192,14 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { } // Read block data. - if cap(b.dataStorage) < cSize { + if _, ok := br.(*byteBuf); !ok && cap(b.dataStorage) < cSize { + // byteBuf doesn't need a destination buffer. if b.lowMem || cSize > maxCompressedBlockSize { b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc) } else { b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc) } } - if cap(b.dst) <= maxSize { - b.dst = make([]byte, 0, maxSize+1) - } b.data, err = br.readBig(cSize, b.dataStorage) if err != nil { if debugDecoder { @@ -209,6 +208,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { } return err } + if cap(b.dst) <= maxSize { + b.dst = make([]byte, 0, maxSize+1) + } return nil } diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go index 5022e71c83..f6a240970d 100644 --- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go +++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go @@ -4,7 +4,6 @@ package zstd import ( - "bytes" "encoding/binary" "errors" "io" @@ -102,8 +101,8 @@ func (h *Header) Decode(in []byte) error { } h.HeaderSize += 4 b, in := in[:4], in[4:] - if !bytes.Equal(b, frameMagic) { - if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 { + if string(b) != frameMagic { + if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 { return ErrMagicMismatch } if len(in) < 4 { @@ -153,7 +152,7 @@ func (h *Header) Decode(in []byte) error { } b, in = in[:size], in[size:] h.HeaderSize += int(size) - switch size { + switch len(b) { case 1: h.DictionaryID = uint32(b[0]) case 2: @@ -183,7 +182,7 @@ func (h *Header) Decode(in []byte) error { } b, in = in[:fcsSize], in[fcsSize:] h.HeaderSize += int(fcsSize) - switch fcsSize { + switch len(b) { case 1: h.FrameContentSize = uint64(b[0]) case 2: diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 74d645f7c3..7113e69ee3 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -5,7 +5,6 @@ package zstd import ( - "bytes" "context" "encoding/binary" "io" @@ -41,8 +40,7 @@ type Decoder struct { frame *frameDec // Custom dictionaries. - // Always uses copies. - dicts map[uint32]dict + dicts map[uint32]*dict // streamWg is the waitgroup for all streams streamWg sync.WaitGroup @@ -104,7 +102,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { } // Transfer option dicts. - d.dicts = make(map[uint32]dict, len(d.o.dicts)) + d.dicts = make(map[uint32]*dict, len(d.o.dicts)) for _, dc := range d.o.dicts { d.dicts[dc.id] = dc } @@ -342,15 +340,8 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { } return dst, err } - if frame.DictionaryID != nil { - dict, ok := d.dicts[*frame.DictionaryID] - if !ok { - return nil, ErrUnknownDictionary - } - if debugDecoder { - println("setting dict", frame.DictionaryID) - } - frame.history.setDict(&dict) + if err = d.setDict(frame); err != nil { + return nil, err } if frame.WindowSize > d.o.maxWindowSize { if debugDecoder { @@ -459,7 +450,11 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) { println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp) } - if !d.o.ignoreChecksum && len(next.b) > 0 { + if d.o.ignoreChecksum { + return true + } + + if len(next.b) > 0 { n, err := d.current.crc.Write(next.b) if err == nil { if n != len(next.b) { @@ -467,18 +462,16 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) { } } } - if next.err == nil && next.d != nil && len(next.d.checkCRC) != 0 { - got := d.current.crc.Sum64() - var tmp [4]byte - binary.LittleEndian.PutUint32(tmp[:], uint32(got)) - if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) { + if next.err == nil && next.d != nil && next.d.hasCRC { + got := uint32(d.current.crc.Sum64()) + if got != next.d.checkCRC { if debugDecoder { - println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)") + printf("CRC Check Failed: %08x (got) != %08x (on stream)\n", got, next.d.checkCRC) } d.current.err = ErrCRCMismatch } else { if debugDecoder { - println("CRC ok", tmp[:]) + printf("CRC ok %08x\n", got) } } } @@ -494,18 +487,12 @@ func (d *Decoder) nextBlockSync() (ok bool) { if !d.syncStream.inFrame { d.frame.history.reset() d.current.err = d.frame.reset(&d.syncStream.br) + if d.current.err == nil { + d.current.err = d.setDict(d.frame) + } if d.current.err != nil { return false } - if d.frame.DictionaryID != nil { - dict, ok := d.dicts[*d.frame.DictionaryID] - if !ok { - d.current.err = ErrUnknownDictionary - return false - } else { - d.frame.history.setDict(&dict) - } - } if d.frame.WindowSize > d.o.maxDecodedSize || d.frame.WindowSize > d.o.maxWindowSize { d.current.err = ErrDecoderSizeExceeded return false @@ -864,13 +851,8 @@ decodeStream: if debugDecoder && err != nil { println("Frame decoder returned", err) } - if err == nil && frame.DictionaryID != nil { - dict, ok := d.dicts[*frame.DictionaryID] - if !ok { - err = ErrUnknownDictionary - } else { - frame.history.setDict(&dict) - } + if err == nil { + err = d.setDict(frame) } if err == nil && d.frame.WindowSize > d.o.maxWindowSize { if debugDecoder { @@ -918,18 +900,22 @@ decodeStream: println("next block returned error:", err) } dec.err = err - dec.checkCRC = nil + dec.hasCRC = false if dec.Last && frame.HasCheckSum && err == nil { crc, err := frame.rawInput.readSmall(4) - if err != nil { + if len(crc) < 4 { + if err == nil { + err = io.ErrUnexpectedEOF + + } println("CRC missing?", err) dec.err = err - } - var tmp [4]byte - copy(tmp[:], crc) - dec.checkCRC = tmp[:] - if debugDecoder { - println("found crc to check:", dec.checkCRC) + } else { + dec.checkCRC = binary.LittleEndian.Uint32(crc) + dec.hasCRC = true + if debugDecoder { + printf("found crc to check: %08x\n", dec.checkCRC) + } } } err = dec.err @@ -948,3 +934,20 @@ decodeStream: hist.reset() d.frame.history.b = frameHistCache } + +func (d *Decoder) setDict(frame *frameDec) (err error) { + dict, ok := d.dicts[frame.DictionaryID] + if ok { + if debugDecoder { + println("setting dict", frame.DictionaryID) + } + frame.history.setDict(dict) + } else if frame.DictionaryID != 0 { + // A zero or missing dictionary id is ambiguous: + // either dictionary zero, or no dictionary. In particular, + // zstd --patch-from uses this id for the source file, + // so only return an error if the dictionary id is not zero. + err = ErrUnknownDictionary + } + return err +} diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go index f42448e69c..07a90dd7af 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -6,6 +6,8 @@ package zstd import ( "errors" + "fmt" + "math/bits" "runtime" ) @@ -18,7 +20,7 @@ type decoderOptions struct { concurrent int maxDecodedSize uint64 maxWindowSize uint64 - dicts []dict + dicts []*dict ignoreChecksum bool limitToCap bool decodeBufsBelow int @@ -85,7 +87,13 @@ func WithDecoderMaxMemory(n uint64) DOption { } // WithDecoderDicts allows to register one or more dictionaries for the decoder. -// If several dictionaries with the same ID is provided the last one will be used. +// +// Each slice in dict must be in the [dictionary format] produced by +// "zstd --train" from the Zstandard reference implementation. +// +// If several dictionaries with the same ID are provided, the last one will be used. +// +// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format func WithDecoderDicts(dicts ...[]byte) DOption { return func(o *decoderOptions) error { for _, b := range dicts { @@ -93,12 +101,24 @@ func WithDecoderDicts(dicts ...[]byte) DOption { if err != nil { return err } - o.dicts = append(o.dicts, *d) + o.dicts = append(o.dicts, d) } return nil } } +// WithEncoderDictRaw registers a dictionary that may be used by the decoder. +// The slice content can be arbitrary data. +func WithDecoderDictRaw(id uint32, content []byte) DOption { + return func(o *decoderOptions) error { + if bits.UintSize > 32 && uint(len(content)) > dictMaxLength { + return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content)) + } + o.dicts = append(o.dicts, &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}) + return nil + } +} + // WithDecoderMaxWindow allows to set a maximum window size for decodes. // This allows rejecting packets that will cause big memory usage. // The Decoder will likely allocate more memory based on the WithDecoderLowmem setting. diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go index a36ae83ef5..66a95c18ef 100644 --- a/vendor/github.com/klauspost/compress/zstd/dict.go +++ b/vendor/github.com/klauspost/compress/zstd/dict.go @@ -1,7 +1,6 @@ package zstd import ( - "bytes" "encoding/binary" "errors" "fmt" @@ -20,7 +19,10 @@ type dict struct { content []byte } -var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec} +const dictMagic = "\x37\xa4\x30\xec" + +// Maximum dictionary size for the reference implementation (1.5.3) is 2 GiB. +const dictMaxLength = 1 << 31 // ID returns the dictionary id or 0 if d is nil. func (d *dict) ID() uint32 { @@ -50,7 +52,7 @@ func loadDict(b []byte) (*dict, error) { ofDec: sequenceDec{fse: &fseDecoder{}}, mlDec: sequenceDec{fse: &fseDecoder{}}, } - if !bytes.Equal(b[:4], dictMagic[:]) { + if string(b[:4]) != dictMagic { return nil, ErrMagicMismatch } d.id = binary.LittleEndian.Uint32(b[4:8]) diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go index 15ae8ee807..bfb2e146c3 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_base.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go @@ -16,6 +16,7 @@ type fastBase struct { cur int32 // maximum offset. Should be at least 2x block size. maxMatchOff int32 + bufferReset int32 hist []byte crc *xxhash.Digest tmp [8]byte @@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc { } func (e *fastBase) addBlock(src []byte) int32 { - if debugAsserts && e.cur > bufferReset { - panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) + if debugAsserts && e.cur > e.bufferReset { + panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset)) } // check if we have space already if len(e.hist)+len(src) > cap(e.hist) { @@ -126,24 +127,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 { panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) } } - a := src[s:] - b := src[t:] - b = b[:len(a)] - end := int32((len(a) >> 3) << 3) - for i := int32(0); i < end; i += 8 { - if diff := load6432(a, i) ^ load6432(b, i); diff != 0 { - return i + int32(bits.TrailingZeros64(diff)>>3) - } - } - - a = a[end:] - b = b[end:] - for i := range a { - if a[i] != b[i] { - return int32(i) + end - } - } - return int32(len(a)) + end + return int32(matchLen(src[s:], src[t:])) } // Reset the encoding table. @@ -171,7 +155,7 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) { // We offset current position so everything will be out of reach. // If above reset line, history will be purged. - if e.cur < bufferReset { + if e.cur < e.bufferReset { e.cur += e.maxMatchOff + int32(len(e.hist)) } e.hist = e.hist[:0] diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index dbbb88d92b..830f5ba74a 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -85,14 +85,10 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = prevEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = prevEntry{} - } + e.table = [bestShortTableSize]prevEntry{} + e.longTable = [bestLongTableSize]prevEntry{} e.cur = e.maxMatchOff break } @@ -193,8 +189,8 @@ encodeLoop: panic("offset0 was 0") } - bestOf := func(a, b match) match { - if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 { + bestOf := func(a, b *match) *match { + if a.est-b.est+(a.s-b.s)*bitsPerByte>>10 < 0 { return a } return b @@ -220,22 +216,26 @@ encodeLoop: return m } - best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) - best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) - best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)) + m1 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1) + m2 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1) + m3 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1) + m4 := matchAt(candidateS.prev-e.cur, s, uint32(cv), -1) + best := bestOf(bestOf(&m1, &m2), bestOf(&m3, &m4)) if canRepeat && best.length < goodEnough { cv32 := uint32(cv >> 8) spp := s + 1 - best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1)) - best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2)) - best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3)) + m1 := matchAt(spp-offset1, spp, cv32, 1) + m2 := matchAt(spp-offset2, spp, cv32, 2) + m3 := matchAt(spp-offset3, spp, cv32, 3) + best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3)) if best.length > 0 { cv32 = uint32(cv >> 24) spp += 2 - best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1)) - best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2)) - best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3)) + m1 := matchAt(spp-offset1, spp, cv32, 1) + m2 := matchAt(spp-offset2, spp, cv32, 2) + m3 := matchAt(spp-offset3, spp, cv32, 3) + best = bestOf(bestOf(best, &m1), bestOf(&m2, &m3)) } } // Load next and check... @@ -262,26 +262,33 @@ encodeLoop: candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] // Short at s+1 - best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) + m1 := matchAt(candidateS.offset-e.cur, s, uint32(cv), -1) // Long at s+1, s+2 - best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)) - best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) - best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)) - best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)) + m2 := matchAt(candidateL.offset-e.cur, s, uint32(cv), -1) + m3 := matchAt(candidateL.prev-e.cur, s, uint32(cv), -1) + m4 := matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1) + m5 := matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1) + best = bestOf(bestOf(bestOf(best, &m1), &m2), bestOf(bestOf(&m3, &m4), &m5)) if false { // Short at s+3. // Too often worse... - best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)) + m := matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1) + best = bestOf(best, &m) } // See if we can find a better match by checking where the current best ends. // Use that offset to see if we can find a better full match. if sAt := best.s + best.length; sAt < sLimit { nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) candidateEnd := e.longTable[nextHashL] - if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 { - bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1)) - if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 { - bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1)) + // Start check at a fixed offset to allow for a few mismatches. + // For this compression level 2 yields the best results. + const skipBeginning = 2 + if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 { + m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) + bestEnd := bestOf(best, &m) + if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 { + m := matchAt(pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) + bestEnd = bestOf(bestEnd, &m) } best = bestEnd } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index d70e3fd3d3..8582f31a7c 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -62,14 +62,10 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = prevEntry{} - } + e.table = [betterShortTableSize]tableEntry{} + e.longTable = [betterLongTableSize]prevEntry{} e.cur = e.maxMatchOff break } @@ -587,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { for i := range e.table[:] { e.table[i] = tableEntry{} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index 1f4a9a2455..7d425109ad 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -44,14 +44,10 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.longTable[:] { - e.longTable[i] = tableEntry{} - } + e.table = [dFastShortTableSize]tableEntry{} + e.longTable = [dFastLongTableSize]tableEntry{} e.cur = e.maxMatchOff break } @@ -388,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - if e.cur >= bufferReset { + if e.cur >= e.bufferReset { for i := range e.table[:] { e.table[i] = tableEntry{} } @@ -685,7 +681,7 @@ encodeLoop: } // We do not store history, so we must offset e.cur to avoid false matches for next user. - if e.cur < bufferReset { + if e.cur < e.bufferReset { e.cur += int32(len(src)) } } @@ -700,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { for i := range e.table[:] { e.table[i] = tableEntry{} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 202636db05..315b1a8f2f 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -43,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { ) // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { for i := range e.table[:] { e.table[i] = tableEntry{} @@ -310,7 +310,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { } // Protect against e.cur wraparound. - if e.cur >= bufferReset { + if e.cur >= e.bufferReset { for i := range e.table[:] { e.table[i] = tableEntry{} } @@ -538,7 +538,7 @@ encodeLoop: println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) } // We do not store history, so we must offset e.cur to avoid false matches for next user. - if e.cur < bufferReset { + if e.cur < e.bufferReset { e.cur += int32(len(src)) } } @@ -555,11 +555,9 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) { return } // Protect against e.cur wraparound. - for e.cur >= bufferReset { + for e.cur >= e.bufferReset-int32(len(e.hist)) { if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } + e.table = [tableSize]tableEntry{} e.cur = e.maxMatchOff break } diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 7aaaedb23e..65c6c36dc1 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -8,6 +8,7 @@ import ( "crypto/rand" "fmt" "io" + "math" rdebug "runtime/debug" "sync" @@ -639,3 +640,37 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } + +// MaxEncodedSize returns the expected maximum +// size of an encoded block or stream. +func (e *Encoder) MaxEncodedSize(size int) int { + frameHeader := 4 + 2 // magic + frame header & window descriptor + if e.o.dict != nil { + frameHeader += 4 + } + // Frame content size: + if size < 256 { + frameHeader++ + } else if size < 65536+256 { + frameHeader += 2 + } else if size < math.MaxInt32 { + frameHeader += 4 + } else { + frameHeader += 8 + } + // Final crc + if e.o.crc { + frameHeader += 4 + } + + // Max overhead is 3 bytes/block. + // There cannot be 0 blocks. + blocks := (size + e.o.blockSize) / e.o.blockSize + + // Combine, add padding. + maxSz := frameHeader + 3*blocks + size + if e.o.pad > 1 { + maxSz += calcSkippableFrame(int64(maxSz), int64(e.o.pad)) + } + return maxSz +} diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index a7c5e1aac4..8e15be2f7f 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -3,6 +3,8 @@ package zstd import ( "errors" "fmt" + "math" + "math/bits" "runtime" "strings" ) @@ -47,22 +49,22 @@ func (o encoderOptions) encoder() encoder { switch o.level { case SpeedFastest: if o.dict != nil { - return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} } - return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} case SpeedDefault: if o.dict != nil { - return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}} + return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}} } - return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} case SpeedBetterCompression: if o.dict != nil { - return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}} + return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}} } - return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} case SpeedBestCompression: - return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}} + return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}} } panic("unknown compression level") } @@ -304,7 +306,13 @@ func WithLowerEncoderMem(b bool) EOption { } // WithEncoderDict allows to register a dictionary that will be used for the encode. +// +// The slice dict must be in the [dictionary format] produced by +// "zstd --train" from the Zstandard reference implementation. +// // The encoder *may* choose to use no dictionary instead for certain payloads. +// +// [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format func WithEncoderDict(dict []byte) EOption { return func(o *encoderOptions) error { d, err := loadDict(dict) @@ -315,3 +323,17 @@ func WithEncoderDict(dict []byte) EOption { return nil } } + +// WithEncoderDictRaw registers a dictionary that may be used by the encoder. +// +// The slice content may contain arbitrary data. It will be used as an initial +// history. +func WithEncoderDictRaw(id uint32, content []byte) EOption { + return func(o *encoderOptions) error { + if bits.UintSize > 32 && uint(len(content)) > dictMaxLength { + return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content)) + } + o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}} + return nil + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 2c0affcd85..d8e8a05bd7 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -5,7 +5,7 @@ package zstd import ( - "bytes" + "encoding/binary" "encoding/hex" "errors" "io" @@ -29,7 +29,7 @@ type frameDec struct { FrameContentSize uint64 - DictionaryID *uint32 + DictionaryID uint32 HasCheckSum bool SingleSegment bool } @@ -43,9 +43,9 @@ const ( MaxWindowSize = 1 << 29 ) -var ( - frameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd} - skippableFrameMagic = []byte{0x2a, 0x4d, 0x18} +const ( + frameMagic = "\x28\xb5\x2f\xfd" + skippableFrameMagic = "\x2a\x4d\x18" ) func newFrameDec(o decoderOptions) *frameDec { @@ -89,9 +89,9 @@ func (d *frameDec) reset(br byteBuffer) error { copy(signature[1:], b) } - if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 { + if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 { if debugDecoder { - println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic)) + println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString([]byte(skippableFrameMagic))) } // Break if not skippable frame. break @@ -114,9 +114,9 @@ func (d *frameDec) reset(br byteBuffer) error { return err } } - if !bytes.Equal(signature[:], frameMagic) { + if string(signature[:]) != frameMagic { if debugDecoder { - println("Got magic numbers: ", signature, "want:", frameMagic) + println("Got magic numbers: ", signature, "want:", []byte(frameMagic)) } return ErrMagicMismatch } @@ -155,7 +155,7 @@ func (d *frameDec) reset(br byteBuffer) error { // Read Dictionary_ID // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id - d.DictionaryID = nil + d.DictionaryID = 0 if size := fhd & 3; size != 0 { if size == 3 { size = 4 @@ -167,7 +167,7 @@ func (d *frameDec) reset(br byteBuffer) error { return err } var id uint32 - switch size { + switch len(b) { case 1: id = uint32(b[0]) case 2: @@ -178,11 +178,7 @@ func (d *frameDec) reset(br byteBuffer) error { if debugDecoder { println("Dict size", size, "ID:", id) } - if id > 0 { - // ID 0 means "sorry, no dictionary anyway". - // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format - d.DictionaryID = &id - } + d.DictionaryID = id } // Read Frame_Content_Size @@ -204,7 +200,7 @@ func (d *frameDec) reset(br byteBuffer) error { println("Reading Frame content", err) return err } - switch fcsSize { + switch len(b) { case 1: d.FrameContentSize = uint64(b[0]) case 2: @@ -305,7 +301,7 @@ func (d *frameDec) checkCRC() error { } // We can overwrite upper tmp now - want, err := d.rawInput.readSmall(4) + buf, err := d.rawInput.readSmall(4) if err != nil { println("CRC missing?", err) return err @@ -315,22 +311,17 @@ func (d *frameDec) checkCRC() error { return nil } - var tmp [4]byte - got := d.crc.Sum64() - // Flip to match file order. - tmp[0] = byte(got >> 0) - tmp[1] = byte(got >> 8) - tmp[2] = byte(got >> 16) - tmp[3] = byte(got >> 24) + want := binary.LittleEndian.Uint32(buf[:4]) + got := uint32(d.crc.Sum64()) - if !bytes.Equal(tmp[:], want) { + if got != want { if debugDecoder { - println("CRC Check Failed:", tmp[:], "!=", want) + printf("CRC check failed: got %08x, want %08x\n", got, want) } return ErrCRCMismatch } if debugDecoder { - println("CRC ok", tmp[:]) + printf("CRC ok %08x\n", got) } return nil } diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md index 69aa3bb587..777290d44c 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md @@ -2,12 +2,7 @@ VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package. - -[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash) -[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash) - -xxhash is a Go implementation of the 64-bit -[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a +xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a high-quality hashing algorithm that is much faster than anything in the Go standard library. @@ -28,31 +23,49 @@ func (*Digest) WriteString(string) (int, error) func (*Digest) Sum64() uint64 ``` -This implementation provides a fast pure-Go implementation and an even faster -assembly implementation for amd64. +The package is written with optimized pure Go and also contains even faster +assembly implementations for amd64 and arm64. If desired, the `purego` build tag +opts into using the Go code even on those architectures. + +[xxHash]: http://cyan4973.github.io/xxHash/ + +## Compatibility + +This package is in a module and the latest code is in version 2 of the module. +You need a version of Go with at least "minimal module compatibility" to use +github.com/cespare/xxhash/v2: + +* 1.9.7+ for Go 1.9 +* 1.10.3+ for Go 1.10 +* Go 1.11 or later + +I recommend using the latest release of Go. ## Benchmarks Here are some quick benchmarks comparing the pure-Go and assembly implementations of Sum64. -| input size | purego | asm | -| --- | --- | --- | -| 5 B | 979.66 MB/s | 1291.17 MB/s | -| 100 B | 7475.26 MB/s | 7973.40 MB/s | -| 4 KB | 17573.46 MB/s | 17602.65 MB/s | -| 10 MB | 17131.46 MB/s | 17142.16 MB/s | +| input size | purego | asm | +| ---------- | --------- | --------- | +| 4 B | 1.3 GB/s | 1.2 GB/s | +| 16 B | 2.9 GB/s | 3.5 GB/s | +| 100 B | 6.9 GB/s | 8.1 GB/s | +| 4 KB | 11.7 GB/s | 16.7 GB/s | +| 10 MB | 12.0 GB/s | 17.3 GB/s | -These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using -the following commands under Go 1.11.2: +These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C +CPU using the following commands under Go 1.19.2: ``` -$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes' -$ go test -benchtime 10s -bench '/xxhash,direct,bytes' +benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$') +benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$') ``` ## Projects using this package - [InfluxDB](https://github.com/influxdata/influxdb) - [Prometheus](https://github.com/prometheus/prometheus) +- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) - [FreeCache](https://github.com/coocood/freecache) +- [FastCache](https://github.com/VictoriaMetrics/fastcache) diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go index 2c112a0ab1..fc40c82001 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go @@ -18,19 +18,11 @@ const ( prime5 uint64 = 2870177450012600261 ) -// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where -// possible in the Go code is worth a small (but measurable) performance boost -// by avoiding some MOVQs. Vars are needed for the asm and also are useful for -// convenience in the Go code in a few places where we need to intentionally -// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the -// result overflows a uint64). -var ( - prime1v = prime1 - prime2v = prime2 - prime3v = prime3 - prime4v = prime4 - prime5v = prime5 -) +// Store the primes in an array as well. +// +// The consts are used when possible in Go code to avoid MOVs but we need a +// contiguous array of the assembly code. +var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5} // Digest implements hash.Hash64. type Digest struct { @@ -52,10 +44,10 @@ func New() *Digest { // Reset clears the Digest's state so that it can be reused. func (d *Digest) Reset() { - d.v1 = prime1v + prime2 + d.v1 = primes[0] + prime2 d.v2 = prime2 d.v3 = 0 - d.v4 = -prime1v + d.v4 = -primes[0] d.total = 0 d.n = 0 } @@ -71,21 +63,23 @@ func (d *Digest) Write(b []byte) (n int, err error) { n = len(b) d.total += uint64(n) + memleft := d.mem[d.n&(len(d.mem)-1):] + if d.n+n < 32 { // This new data doesn't even fill the current block. - copy(d.mem[d.n:], b) + copy(memleft, b) d.n += n return } if d.n > 0 { // Finish off the partial block. - copy(d.mem[d.n:], b) + c := copy(memleft, b) d.v1 = round(d.v1, u64(d.mem[0:8])) d.v2 = round(d.v2, u64(d.mem[8:16])) d.v3 = round(d.v3, u64(d.mem[16:24])) d.v4 = round(d.v4, u64(d.mem[24:32])) - b = b[32-d.n:] + b = b[c:] d.n = 0 } @@ -135,21 +129,20 @@ func (d *Digest) Sum64() uint64 { h += d.total - i, end := 0, d.n - for ; i+8 <= end; i += 8 { - k1 := round(0, u64(d.mem[i:i+8])) + b := d.mem[:d.n&(len(d.mem)-1)] + for ; len(b) >= 8; b = b[8:] { + k1 := round(0, u64(b[:8])) h ^= k1 h = rol27(h)*prime1 + prime4 } - if i+4 <= end { - h ^= uint64(u32(d.mem[i:i+4])) * prime1 + if len(b) >= 4 { + h ^= uint64(u32(b[:4])) * prime1 h = rol23(h)*prime2 + prime3 - i += 4 + b = b[4:] } - for i < end { - h ^= uint64(d.mem[i]) * prime5 + for ; len(b) > 0; b = b[1:] { + h ^= uint64(b[0]) * prime5 h = rol11(h) * prime1 - i++ } h ^= h >> 33 diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s index cea1785619..ddb63aa91b 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s @@ -1,3 +1,4 @@ +//go:build !appengine && gc && !purego && !noasm // +build !appengine // +build gc // +build !purego @@ -5,212 +6,205 @@ #include "textflag.h" -// Register allocation: -// AX h -// SI pointer to advance through b -// DX n -// BX loop end -// R8 v1, k1 -// R9 v2 -// R10 v3 -// R11 v4 -// R12 tmp -// R13 prime1v -// R14 prime2v -// DI prime4v - -// round reads from and advances the buffer pointer in SI. -// It assumes that R13 has prime1v and R14 has prime2v. -#define round(r) \ - MOVQ (SI), R12 \ - ADDQ $8, SI \ - IMULQ R14, R12 \ - ADDQ R12, r \ - ROLQ $31, r \ - IMULQ R13, r - -// mergeRound applies a merge round on the two registers acc and val. -// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v. -#define mergeRound(acc, val) \ - IMULQ R14, val \ - ROLQ $31, val \ - IMULQ R13, val \ - XORQ val, acc \ - IMULQ R13, acc \ - ADDQ DI, acc +// Registers: +#define h AX +#define d AX +#define p SI // pointer to advance through b +#define n DX +#define end BX // loop end +#define v1 R8 +#define v2 R9 +#define v3 R10 +#define v4 R11 +#define x R12 +#define prime1 R13 +#define prime2 R14 +#define prime4 DI + +#define round(acc, x) \ + IMULQ prime2, x \ + ADDQ x, acc \ + ROLQ $31, acc \ + IMULQ prime1, acc + +// round0 performs the operation x = round(0, x). +#define round0(x) \ + IMULQ prime2, x \ + ROLQ $31, x \ + IMULQ prime1, x + +// mergeRound applies a merge round on the two registers acc and x. +// It assumes that prime1, prime2, and prime4 have been loaded. +#define mergeRound(acc, x) \ + round0(x) \ + XORQ x, acc \ + IMULQ prime1, acc \ + ADDQ prime4, acc + +// blockLoop processes as many 32-byte blocks as possible, +// updating v1, v2, v3, and v4. It assumes that there is at least one block +// to process. +#define blockLoop() \ +loop: \ + MOVQ +0(p), x \ + round(v1, x) \ + MOVQ +8(p), x \ + round(v2, x) \ + MOVQ +16(p), x \ + round(v3, x) \ + MOVQ +24(p), x \ + round(v4, x) \ + ADDQ $32, p \ + CMPQ p, end \ + JLE loop // func Sum64(b []byte) uint64 -TEXT ·Sum64(SB), NOSPLIT, $0-32 +TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 // Load fixed primes. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 - MOVQ ·prime4v(SB), DI + MOVQ ·primes+0(SB), prime1 + MOVQ ·primes+8(SB), prime2 + MOVQ ·primes+24(SB), prime4 // Load slice. - MOVQ b_base+0(FP), SI - MOVQ b_len+8(FP), DX - LEAQ (SI)(DX*1), BX + MOVQ b_base+0(FP), p + MOVQ b_len+8(FP), n + LEAQ (p)(n*1), end // The first loop limit will be len(b)-32. - SUBQ $32, BX + SUBQ $32, end // Check whether we have at least one block. - CMPQ DX, $32 + CMPQ n, $32 JLT noBlocks // Set up initial state (v1, v2, v3, v4). - MOVQ R13, R8 - ADDQ R14, R8 - MOVQ R14, R9 - XORQ R10, R10 - XORQ R11, R11 - SUBQ R13, R11 - - // Loop until SI > BX. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ SI, BX - JLE blockLoop - - MOVQ R8, AX - ROLQ $1, AX - MOVQ R9, R12 - ROLQ $7, R12 - ADDQ R12, AX - MOVQ R10, R12 - ROLQ $12, R12 - ADDQ R12, AX - MOVQ R11, R12 - ROLQ $18, R12 - ADDQ R12, AX - - mergeRound(AX, R8) - mergeRound(AX, R9) - mergeRound(AX, R10) - mergeRound(AX, R11) + MOVQ prime1, v1 + ADDQ prime2, v1 + MOVQ prime2, v2 + XORQ v3, v3 + XORQ v4, v4 + SUBQ prime1, v4 + + blockLoop() + + MOVQ v1, h + ROLQ $1, h + MOVQ v2, x + ROLQ $7, x + ADDQ x, h + MOVQ v3, x + ROLQ $12, x + ADDQ x, h + MOVQ v4, x + ROLQ $18, x + ADDQ x, h + + mergeRound(h, v1) + mergeRound(h, v2) + mergeRound(h, v3) + mergeRound(h, v4) JMP afterBlocks noBlocks: - MOVQ ·prime5v(SB), AX + MOVQ ·primes+32(SB), h afterBlocks: - ADDQ DX, AX - - // Right now BX has len(b)-32, and we want to loop until SI > len(b)-8. - ADDQ $24, BX - - CMPQ SI, BX - JG fourByte - -wordLoop: - // Calculate k1. - MOVQ (SI), R8 - ADDQ $8, SI - IMULQ R14, R8 - ROLQ $31, R8 - IMULQ R13, R8 - - XORQ R8, AX - ROLQ $27, AX - IMULQ R13, AX - ADDQ DI, AX - - CMPQ SI, BX - JLE wordLoop - -fourByte: - ADDQ $4, BX - CMPQ SI, BX - JG singles - - MOVL (SI), R8 - ADDQ $4, SI - IMULQ R13, R8 - XORQ R8, AX - - ROLQ $23, AX - IMULQ R14, AX - ADDQ ·prime3v(SB), AX - -singles: - ADDQ $4, BX - CMPQ SI, BX + ADDQ n, h + + ADDQ $24, end + CMPQ p, end + JG try4 + +loop8: + MOVQ (p), x + ADDQ $8, p + round0(x) + XORQ x, h + ROLQ $27, h + IMULQ prime1, h + ADDQ prime4, h + + CMPQ p, end + JLE loop8 + +try4: + ADDQ $4, end + CMPQ p, end + JG try1 + + MOVL (p), x + ADDQ $4, p + IMULQ prime1, x + XORQ x, h + + ROLQ $23, h + IMULQ prime2, h + ADDQ ·primes+16(SB), h + +try1: + ADDQ $4, end + CMPQ p, end JGE finalize -singlesLoop: - MOVBQZX (SI), R12 - ADDQ $1, SI - IMULQ ·prime5v(SB), R12 - XORQ R12, AX +loop1: + MOVBQZX (p), x + ADDQ $1, p + IMULQ ·primes+32(SB), x + XORQ x, h + ROLQ $11, h + IMULQ prime1, h - ROLQ $11, AX - IMULQ R13, AX - - CMPQ SI, BX - JL singlesLoop + CMPQ p, end + JL loop1 finalize: - MOVQ AX, R12 - SHRQ $33, R12 - XORQ R12, AX - IMULQ R14, AX - MOVQ AX, R12 - SHRQ $29, R12 - XORQ R12, AX - IMULQ ·prime3v(SB), AX - MOVQ AX, R12 - SHRQ $32, R12 - XORQ R12, AX - - MOVQ AX, ret+24(FP) + MOVQ h, x + SHRQ $33, x + XORQ x, h + IMULQ prime2, h + MOVQ h, x + SHRQ $29, x + XORQ x, h + IMULQ ·primes+16(SB), h + MOVQ h, x + SHRQ $32, x + XORQ x, h + + MOVQ h, ret+24(FP) RET -// writeBlocks uses the same registers as above except that it uses AX to store -// the d pointer. - // func writeBlocks(d *Digest, b []byte) int -TEXT ·writeBlocks(SB), NOSPLIT, $0-40 +TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 // Load fixed primes needed for round. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 + MOVQ ·primes+0(SB), prime1 + MOVQ ·primes+8(SB), prime2 // Load slice. - MOVQ b_base+8(FP), SI - MOVQ b_len+16(FP), DX - LEAQ (SI)(DX*1), BX - SUBQ $32, BX + MOVQ b_base+8(FP), p + MOVQ b_len+16(FP), n + LEAQ (p)(n*1), end + SUBQ $32, end // Load vN from d. - MOVQ d+0(FP), AX - MOVQ 0(AX), R8 // v1 - MOVQ 8(AX), R9 // v2 - MOVQ 16(AX), R10 // v3 - MOVQ 24(AX), R11 // v4 + MOVQ s+0(FP), d + MOVQ 0(d), v1 + MOVQ 8(d), v2 + MOVQ 16(d), v3 + MOVQ 24(d), v4 // We don't need to check the loop condition here; this function is // always called with at least one block of data to process. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ SI, BX - JLE blockLoop + blockLoop() // Copy vN back to d. - MOVQ R8, 0(AX) - MOVQ R9, 8(AX) - MOVQ R10, 16(AX) - MOVQ R11, 24(AX) - - // The number of bytes written is SI minus the old base pointer. - SUBQ b_base+8(FP), SI - MOVQ SI, ret+32(FP) + MOVQ v1, 0(d) + MOVQ v2, 8(d) + MOVQ v3, 16(d) + MOVQ v4, 24(d) + + // The number of bytes written is p minus the old base pointer. + SUBQ b_base+8(FP), p + MOVQ p, ret+32(FP) RET diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s index 4d64a17d69..17901e0804 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s @@ -1,13 +1,17 @@ -// +build gc,!purego,!noasm +//go:build !appengine && gc && !purego && !noasm +// +build !appengine +// +build gc +// +build !purego +// +build !noasm #include "textflag.h" -// Register allocation. +// Registers: #define digest R1 -#define h R2 // Return value. -#define p R3 // Input pointer. -#define len R4 -#define nblocks R5 // len / 32. +#define h R2 // return value +#define p R3 // input pointer +#define n R4 // input length +#define nblocks R5 // n / 32 #define prime1 R7 #define prime2 R8 #define prime3 R9 @@ -25,60 +29,52 @@ #define round(acc, x) \ MADD prime2, acc, x, acc \ ROR $64-31, acc \ - MUL prime1, acc \ + MUL prime1, acc -// x = round(0, x). +// round0 performs the operation x = round(0, x). #define round0(x) \ MUL prime2, x \ ROR $64-31, x \ - MUL prime1, x \ - -#define mergeRound(x) \ - round0(x) \ - EOR x, h \ - MADD h, prime4, prime1, h \ - -// Update v[1-4] with 32-byte blocks. Assumes len >= 32. -#define blocksLoop() \ - LSR $5, len, nblocks \ - PCALIGN $16 \ - loop: \ - LDP.P 32(p), (x1, x2) \ - round(v1, x1) \ - LDP -16(p), (x3, x4) \ - round(v2, x2) \ - SUB $1, nblocks \ - round(v3, x3) \ - round(v4, x4) \ - CBNZ nblocks, loop \ - -// The primes are repeated here to ensure that they're stored -// in a contiguous array, so we can load them with LDP. -DATA primes<> +0(SB)/8, $11400714785074694791 -DATA primes<> +8(SB)/8, $14029467366897019727 -DATA primes<>+16(SB)/8, $1609587929392839161 -DATA primes<>+24(SB)/8, $9650029242287828579 -DATA primes<>+32(SB)/8, $2870177450012600261 -GLOBL primes<>(SB), NOPTR+RODATA, $40 + MUL prime1, x + +#define mergeRound(acc, x) \ + round0(x) \ + EOR x, acc \ + MADD acc, prime4, prime1, acc + +// blockLoop processes as many 32-byte blocks as possible, +// updating v1, v2, v3, and v4. It assumes that n >= 32. +#define blockLoop() \ + LSR $5, n, nblocks \ + PCALIGN $16 \ + loop: \ + LDP.P 16(p), (x1, x2) \ + LDP.P 16(p), (x3, x4) \ + round(v1, x1) \ + round(v2, x2) \ + round(v3, x3) \ + round(v4, x4) \ + SUB $1, nblocks \ + CBNZ nblocks, loop // func Sum64(b []byte) uint64 -TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32 - LDP b_base+0(FP), (p, len) +TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 + LDP b_base+0(FP), (p, n) - LDP primes<> +0(SB), (prime1, prime2) - LDP primes<>+16(SB), (prime3, prime4) - MOVD primes<>+32(SB), prime5 + LDP ·primes+0(SB), (prime1, prime2) + LDP ·primes+16(SB), (prime3, prime4) + MOVD ·primes+32(SB), prime5 - CMP $32, len - CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 } - BLO afterLoop + CMP $32, n + CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 } + BLT afterLoop ADD prime1, prime2, v1 MOVD prime2, v2 MOVD $0, v3 NEG prime1, v4 - blocksLoop() + blockLoop() ROR $64-1, v1, x1 ROR $64-7, v2, x2 @@ -88,71 +84,75 @@ TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32 ADD x3, x4 ADD x2, x4, h - mergeRound(v1) - mergeRound(v2) - mergeRound(v3) - mergeRound(v4) + mergeRound(h, v1) + mergeRound(h, v2) + mergeRound(h, v3) + mergeRound(h, v4) afterLoop: - ADD len, h + ADD n, h - TBZ $4, len, try8 + TBZ $4, n, try8 LDP.P 16(p), (x1, x2) round0(x1) + + // NOTE: here and below, sequencing the EOR after the ROR (using a + // rotated register) is worth a small but measurable speedup for small + // inputs. ROR $64-27, h EOR x1 @> 64-27, h, h MADD h, prime4, prime1, h round0(x2) ROR $64-27, h - EOR x2 @> 64-27, h + EOR x2 @> 64-27, h, h MADD h, prime4, prime1, h try8: - TBZ $3, len, try4 + TBZ $3, n, try4 MOVD.P 8(p), x1 round0(x1) ROR $64-27, h - EOR x1 @> 64-27, h + EOR x1 @> 64-27, h, h MADD h, prime4, prime1, h try4: - TBZ $2, len, try2 + TBZ $2, n, try2 MOVWU.P 4(p), x2 MUL prime1, x2 ROR $64-23, h - EOR x2 @> 64-23, h + EOR x2 @> 64-23, h, h MADD h, prime3, prime2, h try2: - TBZ $1, len, try1 + TBZ $1, n, try1 MOVHU.P 2(p), x3 AND $255, x3, x1 LSR $8, x3, x2 MUL prime5, x1 ROR $64-11, h - EOR x1 @> 64-11, h + EOR x1 @> 64-11, h, h MUL prime1, h MUL prime5, x2 ROR $64-11, h - EOR x2 @> 64-11, h + EOR x2 @> 64-11, h, h MUL prime1, h try1: - TBZ $0, len, end + TBZ $0, n, finalize MOVBU (p), x4 MUL prime5, x4 ROR $64-11, h - EOR x4 @> 64-11, h + EOR x4 @> 64-11, h, h MUL prime1, h -end: +finalize: EOR h >> 33, h MUL prime2, h EOR h >> 29, h @@ -163,24 +163,22 @@ end: RET // func writeBlocks(d *Digest, b []byte) int -// -// Assumes len(b) >= 32. -TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40 - LDP primes<>(SB), (prime1, prime2) +TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 + LDP ·primes+0(SB), (prime1, prime2) // Load state. Assume v[1-4] are stored contiguously. MOVD d+0(FP), digest LDP 0(digest), (v1, v2) LDP 16(digest), (v3, v4) - LDP b_base+8(FP), (p, len) + LDP b_base+8(FP), (p, n) - blocksLoop() + blockLoop() // Store updated state. STP (v1, v2), 0(digest) STP (v3, v4), 16(digest) - BIC $31, len - MOVD len, ret+32(FP) + BIC $31, n + MOVD n, ret+32(FP) RET diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go index 1a1fac9c26..d4221edf4f 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_asm.go @@ -13,4 +13,4 @@ package xxhash func Sum64(b []byte) uint64 //go:noescape -func writeBlocks(d *Digest, b []byte) int +func writeBlocks(s *Digest, b []byte) int diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go index 209cb4a999..0be16cefc7 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go @@ -15,10 +15,10 @@ func Sum64(b []byte) uint64 { var h uint64 if n >= 32 { - v1 := prime1v + prime2 + v1 := primes[0] + prime2 v2 := prime2 v3 := uint64(0) - v4 := -prime1v + v4 := -primes[0] for len(b) >= 32 { v1 = round(v1, u64(b[0:8:len(b)])) v2 = round(v2, u64(b[8:16:len(b)])) @@ -37,19 +37,18 @@ func Sum64(b []byte) uint64 { h += uint64(n) - i, end := 0, len(b) - for ; i+8 <= end; i += 8 { - k1 := round(0, u64(b[i:i+8:len(b)])) + for ; len(b) >= 8; b = b[8:] { + k1 := round(0, u64(b[:8])) h ^= k1 h = rol27(h)*prime1 + prime4 } - if i+4 <= end { - h ^= uint64(u32(b[i:i+4:len(b)])) * prime1 + if len(b) >= 4 { + h ^= uint64(u32(b[:4])) * prime1 h = rol23(h)*prime2 + prime3 - i += 4 + b = b[4:] } - for ; i < end; i++ { - h ^= uint64(b[i]) * prime5 + for ; len(b) > 0; b = b[1:] { + h ^= uint64(b[0]) * prime5 h = rol11(h) * prime1 } diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index 52e5703c26..b94993a072 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -320,10 +320,6 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET - // Return with not enough output space error - MOVQ $0x00000005, ret+24(FP) - RET - // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: CMOV TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 @@ -617,10 +613,6 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET - // Return with not enough output space error - MOVQ $0x00000005, ret+24(FP) - RET - // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 @@ -897,10 +889,6 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET - // Return with not enough output space error - MOVQ $0x00000005, ret+24(FP) - RET - // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 @@ -1152,10 +1140,6 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET - // Return with not enough output space error - MOVQ $0x00000005, ret+24(FP) - RET - // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool // Requires: SSE TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9 @@ -1389,8 +1373,7 @@ loop_finished: MOVQ ctx+0(FP), AX MOVQ DX, 24(AX) MOVQ DI, 104(AX) - MOVQ 80(AX), CX - SUBQ CX, SI + SUBQ 80(AX), SI MOVQ SI, 112(AX) RET @@ -1402,8 +1385,7 @@ error_match_off_too_big: MOVQ ctx+0(FP), AX MOVQ DX, 24(AX) MOVQ DI, 104(AX) - MOVQ 80(AX), CX - SUBQ CX, SI + SUBQ 80(AX), SI MOVQ SI, 112(AX) RET @@ -1747,8 +1729,7 @@ loop_finished: MOVQ ctx+0(FP), AX MOVQ DX, 24(AX) MOVQ DI, 104(AX) - MOVQ 80(AX), CX - SUBQ CX, SI + SUBQ 80(AX), SI MOVQ SI, 112(AX) RET @@ -1760,8 +1741,7 @@ error_match_off_too_big: MOVQ ctx+0(FP), AX MOVQ DX, 24(AX) MOVQ DI, 104(AX) - MOVQ 80(AX), CX - SUBQ CX, SI + SUBQ 80(AX), SI MOVQ SI, 112(AX) RET diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 3eb3f1c826..5ffa82f5ac 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -36,9 +36,6 @@ const forcePreDef = false // zstdMinMatch is the minimum zstd match length. const zstdMinMatch = 3 -// Reset the buffer offset when reaching this. -const bufferReset = math.MaxInt32 - MaxWindowSize - // fcsUnknown is used for unknown frame content size. const fcsUnknown = math.MaxUint64 @@ -75,7 +72,6 @@ var ( ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit") // ErrUnknownDictionary is returned if the dictionary ID is unknown. - // For the time being dictionaries are not supported. ErrUnknownDictionary = errors.New("unknown dictionary") // ErrFrameSizeExceeded is returned if the stated frame size is exceeded. @@ -110,26 +106,25 @@ func printf(format string, a ...interface{}) { } } -// matchLen returns the maximum length. +// matchLen returns the maximum common prefix length of a and b. // a must be the shortest of the two. -// The function also returns whether all bytes matched. -func matchLen(a, b []byte) int { - b = b[:len(a)] - for i := 0; i < len(a)-7; i += 8 { - if diff := load64(a, i) ^ load64(b, i); diff != 0 { - return i + (bits.TrailingZeros64(diff) >> 3) +func matchLen(a, b []byte) (n int) { + for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { + diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 } + n += 8 } - checked := (len(a) >> 3) << 3 - a = a[checked:] - b = b[checked:] for i := range a { if a[i] != b[i] { - return i + checked + break } + n++ } - return len(a) + checked + return n + } func load3232(b []byte, i int32) uint32 { @@ -140,10 +135,6 @@ func load6432(b []byte, i int32) uint64 { return binary.LittleEndian.Uint64(b[i:]) } -func load64(b []byte, i int) uint64 { - return binary.LittleEndian.Uint64(b[i:]) -} - type byter interface { Bytes() []byte Len() int diff --git a/vendor/modules.txt b/vendor/modules.txt index 08cfa10d42..7ddddab978 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -153,8 +153,8 @@ github.com/awslabs/amazon-ecr-credential-helper/ecr-login/version github.com/chrismellard/docker-credential-acr-env/pkg/credhelper github.com/chrismellard/docker-credential-acr-env/pkg/registry github.com/chrismellard/docker-credential-acr-env/pkg/token -# github.com/containerd/stargz-snapshotter/estargz v0.13.0 -## explicit; go 1.16 +# github.com/containerd/stargz-snapshotter/estargz v0.14.1 +## explicit; go 1.19 github.com/containerd/stargz-snapshotter/estargz github.com/containerd/stargz-snapshotter/estargz/errorutil # github.com/cpuguy83/go-md2man/v2 v2.0.2 @@ -345,7 +345,7 @@ github.com/jmespath/go-jmespath # github.com/josharian/intern v1.0.0 ## explicit; go 1.5 github.com/josharian/intern -# github.com/klauspost/compress v1.15.12 +# github.com/klauspost/compress v1.15.15 ## explicit; go 1.17 github.com/klauspost/compress github.com/klauspost/compress/fse