diff --git a/CHANGELOG.md b/CHANGELOG.md index 66c9a7ec4c3..d5007d860bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ * [ENHANCEMENT] Include additional detail when searching for traces [#916](https://github.com/grafana/tempo/pull/916) (@zalegrala) * [ENHANCEMENT] Add `gen index` and `gen bloom` commands to tempo-cli. [#903](https://github.com/grafana/tempo/pull/903) (@annanay25) * [ENHANCEMENT] Implement trace comparison in Vulture [#904](https://github.com/grafana/tempo/pull/904) (@zalegrala) +* [ENHANCEMENT] Improve zstd read throughput using zstd.Decoder [#948](https://github.com/grafana/tempo/pull/948) (@joe-elliott) * [ENHANCEMENT] Dedupe search records while replaying WAL [#940](https://github.com/grafana/tempo/pull/940) (@annanay25) * [ENHANCEMENT] Add status endpoint to list the available endpoints [#938](https://github.com/grafana/tempo/pull/938) (@zalegrala) * [ENHANCEMENT] Add search block headers [#943](https://github.com/grafana/tempo/pull/943) (@mdisibio) diff --git a/go.mod b/go.mod index c522678345f..9270a759392 100644 --- a/go.mod +++ b/go.mod @@ -33,7 +33,7 @@ require ( github.com/jaegertracing/jaeger v1.21.0 github.com/jedib0t/go-pretty/v6 v6.2.4 github.com/jsternberg/zap-logfmt v1.2.0 - github.com/klauspost/compress v1.13.1 + github.com/klauspost/compress v1.13.5 github.com/minio/minio-go/v7 v7.0.10 github.com/olekukonko/tablewriter v0.0.2 github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e diff --git a/go.sum b/go.sum index 7fe2b5b9f57..8b92603334f 100644 --- a/go.sum +++ b/go.sum @@ -1261,8 +1261,9 @@ github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYs github.com/klauspost/compress v1.11.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.12/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ= github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= +github.com/klauspost/compress v1.13.5 h1:9O69jUPDcsT9fEm74W92rZL9FQY7rCdaXVneq+yyzl4= +github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s= diff --git a/tempodb/encoding/v2/data_reader.go b/tempodb/encoding/v2/data_reader.go index 82851d81b5e..481110dbe00 100644 --- a/tempodb/encoding/v2/data_reader.go +++ b/tempodb/encoding/v2/data_reader.go @@ -9,6 +9,7 @@ import ( tempo_io "github.com/grafana/tempo/pkg/io" "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/encoding/common" + "github.com/klauspost/compress/zstd" ) type dataReader struct { @@ -16,6 +17,7 @@ type dataReader struct { pageBuffer []byte + encoding backend.Encoding pool ReaderPool compressedReader io.Reader } @@ -33,6 +35,7 @@ func NewDataReader(r backend.ContextReader, encoding backend.Encoding) (common.D } return &dataReader{ + encoding: encoding, contextReader: r, pool: pool, }, nil @@ -105,7 +108,14 @@ func (r *dataReader) Read(ctx context.Context, records []common.Record, pagesBuf return nil, nil, err } - pagesBuffer[i], err = tempo_io.ReadAllWithBuffer(reader, len(page), pagesBuffer[i]) + // TODO: leaky abstraction. can a real programmer fix this in the future? + // zstd decoder is ~10-20% faster then the streaming io.Reader interface so prefer that + decoder, ok := reader.(*zstd.Decoder) + if ok { + pagesBuffer[i], err = decoder.DecodeAll(page, pagesBuffer[i][:0]) + } else { + pagesBuffer[i], err = tempo_io.ReadAllWithBuffer(reader, len(page), pagesBuffer[i]) + } if err != nil { return nil, nil, err } @@ -138,7 +148,15 @@ func (r *dataReader) NextPage(buffer []byte) ([]byte, uint32, error) { return nil, 0, err } - buffer, err = tempo_io.ReadAllWithBuffer(compressedReader, len(page.data), buffer) + // TODO: leaky abstraction. can a real programmer fix this in the future? + // zstd decoder is ~10-20% faster then the streaming io.Reader interface so prefer that + decoder, ok := compressedReader.(*zstd.Decoder) + if ok { + buffer, err = decoder.DecodeAll(page.data, buffer[:0]) + } else { + buffer, err = tempo_io.ReadAllWithBuffer(compressedReader, len(page.data), buffer) + } + if err != nil { return nil, 0, err } @@ -148,10 +166,18 @@ func (r *dataReader) NextPage(buffer []byte) ([]byte, uint32, error) { func (r *dataReader) getCompressedReader(page []byte) (io.Reader, error) { var err error + var reader io.Reader + // we are going to use the stateless zstd decoding functionality. if you pass + // a non-nil reader to .GetReader() and then use .DecodeAll() the process hangs + // for unknown reasons. so don't do that. + if r.encoding != backend.EncZstd { + reader = bytes.NewReader(page) + } + if r.compressedReader == nil { - r.compressedReader, err = r.pool.GetReader(bytes.NewReader(page)) + r.compressedReader, err = r.pool.GetReader(reader) } else { - r.compressedReader, err = r.pool.ResetReader(bytes.NewReader(page), r.compressedReader) + r.compressedReader, err = r.pool.ResetReader(reader, r.compressedReader) } return r.compressedReader, err } diff --git a/vendor/github.com/klauspost/compress/.gitattributes b/vendor/github.com/klauspost/compress/.gitattributes new file mode 100644 index 00000000000..402433593c0 --- /dev/null +++ b/vendor/github.com/klauspost/compress/.gitattributes @@ -0,0 +1,2 @@ +* -text +*.bin -text -diff diff --git a/vendor/github.com/klauspost/compress/.gitignore b/vendor/github.com/klauspost/compress/.gitignore new file mode 100644 index 00000000000..b35f8449bf2 --- /dev/null +++ b/vendor/github.com/klauspost/compress/.gitignore @@ -0,0 +1,25 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof +/s2/cmd/_s2sx/sfx-exe diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml new file mode 100644 index 00000000000..c9014ce1da2 --- /dev/null +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -0,0 +1,137 @@ +# This is an example goreleaser.yaml file with some sane defaults. +# Make sure to check the documentation at http://goreleaser.com +before: + hooks: + - ./gen.sh + +builds: + - + id: "s2c" + binary: s2c + main: ./s2/cmd/s2c/main.go + flags: + - -trimpath + env: + - CGO_ENABLED=0 + goos: + - aix + - linux + - freebsd + - netbsd + - windows + - darwin + goarch: + - 386 + - amd64 + - arm + - arm64 + - ppc64 + - ppc64le + - mips64 + - mips64le + goarm: + - 7 + - + id: "s2d" + binary: s2d + main: ./s2/cmd/s2d/main.go + flags: + - -trimpath + env: + - CGO_ENABLED=0 + goos: + - aix + - linux + - freebsd + - netbsd + - windows + - darwin + goarch: + - 386 + - amd64 + - arm + - arm64 + - ppc64 + - ppc64le + - mips64 + - mips64le + goarm: + - 7 + - + id: "s2sx" + binary: s2sx + main: ./s2/cmd/_s2sx/main.go + flags: + - -modfile=s2sx.mod + - -trimpath + env: + - CGO_ENABLED=0 + goos: + - aix + - linux + - freebsd + - netbsd + - windows + - darwin + goarch: + - 386 + - amd64 + - arm + - arm64 + - ppc64 + - ppc64le + - mips64 + - mips64le + goarm: + - 7 + +archives: + - + id: s2-binaries + name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}" + replacements: + aix: AIX + darwin: OSX + linux: Linux + windows: Windows + 386: i386 + amd64: x86_64 + freebsd: FreeBSD + netbsd: NetBSD + format_overrides: + - goos: windows + format: zip + files: + - unpack/* + - s2/LICENSE + - s2/README.md +checksum: + name_template: 'checksums.txt' +snapshot: + name_template: "{{ .Tag }}-next" +changelog: + sort: asc + filters: + exclude: + - '^doc:' + - '^docs:' + - '^test:' + - '^tests:' + - '^Update\sREADME.md' + +nfpms: + - + file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}" + vendor: Klaus Post + homepage: https://github.com/klauspost/compress + maintainer: Klaus Post + description: S2 Compression Tool + license: BSD 3-Clause + formats: + - deb + - rpm + replacements: + darwin: Darwin + linux: Linux + freebsd: FreeBSD + amd64: x86_64 diff --git a/vendor/github.com/klauspost/compress/LICENSE b/vendor/github.com/klauspost/compress/LICENSE index 1eb75ef68e4..6cd1e962761 100644 --- a/vendor/github.com/klauspost/compress/LICENSE +++ b/vendor/github.com/klauspost/compress/LICENSE @@ -26,3 +26,267 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------ + +Files: gzhttp/* + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2016-2017 The New York Times Company + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +------------------ + +Files: s2/cmd/internal/readahead/* + +The MIT License (MIT) + +Copyright (c) 2015 Klaus Post + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--------------------- +Files: snappy/* +Files: internal/snapref/* + +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md new file mode 100644 index 00000000000..235dc7cc68a --- /dev/null +++ b/vendor/github.com/klauspost/compress/README.md @@ -0,0 +1,430 @@ +# compress + +This package provides various compression algorithms. + +* [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression in pure Go. +* [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) is a high performance replacement for Snappy. +* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib). +* [snappy](https://github.com/klauspost/compress/tree/master/snappy) is a drop-in replacement for `github.com/golang/snappy` offering better compression and concurrent streams. +* [huff0](https://github.com/klauspost/compress/tree/master/huff0) and [FSE](https://github.com/klauspost/compress/tree/master/fse) implementations for raw entropy encoding. +* [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp) Provides client and server wrappers for handling gzipped requests efficiently. +* [pgzip](https://github.com/klauspost/pgzip) is a separate package that provides a very fast parallel gzip implementation. +* [fuzz package](https://github.com/klauspost/compress-fuzz) for fuzz testing all compressors/decompressors here. + +[![Go Reference](https://pkg.go.dev/badge/klauspost/compress.svg)](https://pkg.go.dev/github.com/klauspost/compress?tab=subdirectories) +[![Go](https://github.com/klauspost/compress/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/compress/actions/workflows/go.yml) +[![Sourcegraph Badge](https://sourcegraph.com/github.com/klauspost/compress/-/badge.svg)](https://sourcegraph.com/github.com/klauspost/compress?badge) + +# changelog + +* Aug 12, 2021 (v1.13.4) + * Add [snappy replacement package](https://github.com/klauspost/compress/tree/master/snappy). + * zstd: Fix incorrect encoding in "best" mode [#415](https://github.com/klauspost/compress/pull/415) + +* Aug 3, 2021 (v1.13.3) + + * zstd: Improve Best compression [#404](https://github.com/klauspost/compress/pull/404) + * zstd: Fix WriteTo error forwarding [#411](https://github.com/klauspost/compress/pull/411) + * gzhttp: Return http.HandlerFunc instead of http.Handler. Unlikely breaking change. [#406](https://github.com/klauspost/compress/pull/406) + * s2sx: Fix max size error [#399](https://github.com/klauspost/compress/pull/399) + * zstd: Add optional stream content size on reset [#401](https://github.com/klauspost/compress/pull/401) + * zstd: use SpeedBestCompression for level >= 10 [#410](https://github.com/klauspost/compress/pull/410) + +* Jun 14, 2021 (v1.13.1) + + * s2: Add full Snappy output support [#396](https://github.com/klauspost/compress/pull/396) + * zstd: Add configurable [Decoder window](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithDecoderMaxWindow) size [#394](https://github.com/klauspost/compress/pull/394) + * gzhttp: Add header to skip compression [#389](https://github.com/klauspost/compress/pull/389) + * s2: Improve speed with bigger output margin [#395](https://github.com/klauspost/compress/pull/395) + +* Jun 3, 2021 (v1.13.0) + * Added [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp#gzip-handler) which allows wrapping HTTP servers and clients with GZIP compressors. + * zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382) + * zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380) + +* May 25, 2021 (v1.12.3) + * deflate: Better/faster Huffman encoding [#374](https://github.com/klauspost/compress/pull/374) + * deflate: Allocate less for history. [#375](https://github.com/klauspost/compress/pull/375) + * zstd: Forward read errors [#373](https://github.com/klauspost/compress/pull/373) + +* Apr 27, 2021 (v1.12.2) + * zstd: Improve better/best compression [#360](https://github.com/klauspost/compress/pull/360) [#364](https://github.com/klauspost/compress/pull/364) [#365](https://github.com/klauspost/compress/pull/365) + * zstd: Add helpers to compress/decompress zstd inside zip files [#363](https://github.com/klauspost/compress/pull/363) + * deflate: Improve level 5+6 compression [#367](https://github.com/klauspost/compress/pull/367) + * s2: Improve better/best compression [#358](https://github.com/klauspost/compress/pull/358) [#359](https://github.com/klauspost/compress/pull/358) + * s2: Load after checking src limit on amd64. [#362](https://github.com/klauspost/compress/pull/362) + * s2sx: Limit max executable size [#368](https://github.com/klauspost/compress/pull/368) + +* Apr 14, 2021 (v1.12.1) + * snappy package removed. Upstream added as dependency. + * s2: Better compression in "best" mode [#353](https://github.com/klauspost/compress/pull/353) + * s2sx: Add stdin input and detect pre-compressed from signature [#352](https://github.com/klauspost/compress/pull/352) + * s2c/s2d: Add http as possible input [#348](https://github.com/klauspost/compress/pull/348) + * s2c/s2d/s2sx: Always truncate when writing files [#352](https://github.com/klauspost/compress/pull/352) + * zstd: Reduce memory usage further when using [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) [#346](https://github.com/klauspost/compress/pull/346) + * s2: Fix potential problem with amd64 assembly and profilers [#349](https://github.com/klauspost/compress/pull/349) + +* Mar 26, 2021 (v1.11.13) + * zstd: Big speedup on small dictionary encodes [#344](https://github.com/klauspost/compress/pull/344) [#345](https://github.com/klauspost/compress/pull/345) + * zstd: Add [WithLowerEncoderMem](https://pkg.go.dev/github.com/klauspost/compress/zstd#WithLowerEncoderMem) encoder option [#336](https://github.com/klauspost/compress/pull/336) + * deflate: Improve entropy compression [#338](https://github.com/klauspost/compress/pull/338) + * s2: Clean up and minor performance improvement in best [#341](https://github.com/klauspost/compress/pull/341) + +* Mar 5, 2021 (v1.11.12) + * s2: Add `s2sx` binary that creates [self extracting archives](https://github.com/klauspost/compress/tree/master/s2#s2sx-self-extracting-archives). + * s2: Speed up decompression on non-assembly platforms [#328](https://github.com/klauspost/compress/pull/328) + +* Mar 1, 2021 (v1.11.9) + * s2: Add ARM64 decompression assembly. Around 2x output speed. [#324](https://github.com/klauspost/compress/pull/324) + * s2: Improve "better" speed and efficiency. [#325](https://github.com/klauspost/compress/pull/325) + * s2: Fix binaries. + +* Feb 25, 2021 (v1.11.8) + * s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended. + * s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315) + * s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322) + * zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314) + * zip: Fix zip64 headers. [#313](https://github.com/klauspost/compress/pull/313) + +* Jan 14, 2021 (v1.11.7) + * Use Bytes() interface to get bytes across packages. [#309](https://github.com/klauspost/compress/pull/309) + * s2: Add 'best' compression option. [#310](https://github.com/klauspost/compress/pull/310) + * s2: Add ReaderMaxBlockSize, changes `s2.NewReader` signature to include varargs. [#311](https://github.com/klauspost/compress/pull/311) + * s2: Fix crash on small better buffers. [#308](https://github.com/klauspost/compress/pull/308) + * s2: Clean up decoder. [#312](https://github.com/klauspost/compress/pull/312) + +* Jan 7, 2021 (v1.11.6) + * zstd: Make decoder allocations smaller [#306](https://github.com/klauspost/compress/pull/306) + * zstd: Free Decoder resources when Reset is called with a nil io.Reader [#305](https://github.com/klauspost/compress/pull/305) + +* Dec 20, 2020 (v1.11.4) + * zstd: Add Best compression mode [#304](https://github.com/klauspost/compress/pull/304) + * Add header decoder [#299](https://github.com/klauspost/compress/pull/299) + * s2: Add uncompressed stream option [#297](https://github.com/klauspost/compress/pull/297) + * Simplify/speed up small blocks with known max size. [#300](https://github.com/klauspost/compress/pull/300) + * zstd: Always reset literal dict encoder [#303](https://github.com/klauspost/compress/pull/303) + +* Nov 15, 2020 (v1.11.3) + * inflate: 10-15% faster decompression [#293](https://github.com/klauspost/compress/pull/293) + * zstd: Tweak DecodeAll default allocation [#295](https://github.com/klauspost/compress/pull/295) + +* Oct 11, 2020 (v1.11.2) + * s2: Fix out of bounds read in "better" block compression [#291](https://github.com/klauspost/compress/pull/291) + +* Oct 1, 2020 (v1.11.1) + * zstd: Set allLitEntropy true in default configuration [#286](https://github.com/klauspost/compress/pull/286) + +* Sept 8, 2020 (v1.11.0) + * zstd: Add experimental compression [dictionaries](https://github.com/klauspost/compress/tree/master/zstd#dictionaries) [#281](https://github.com/klauspost/compress/pull/281) + * zstd: Fix mixed Write and ReadFrom calls [#282](https://github.com/klauspost/compress/pull/282) + * inflate/gz: Limit variable shifts, ~5% faster decompression [#274](https://github.com/klauspost/compress/pull/274) + +
+ See changes prior to v1.11.0 + +* July 8, 2020 (v1.10.11) + * zstd: Fix extra block when compressing with ReadFrom. [#278](https://github.com/klauspost/compress/pull/278) + * huff0: Also populate compression table when reading decoding table. [#275](https://github.com/klauspost/compress/pull/275) + +* June 23, 2020 (v1.10.10) + * zstd: Skip entropy compression in fastest mode when no matches. [#270](https://github.com/klauspost/compress/pull/270) + +* June 16, 2020 (v1.10.9): + * zstd: API change for specifying dictionaries. See [#268](https://github.com/klauspost/compress/pull/268) + * zip: update CreateHeaderRaw to handle zip64 fields. [#266](https://github.com/klauspost/compress/pull/266) + * Fuzzit tests removed. The service has been purchased and is no longer available. + +* June 5, 2020 (v1.10.8): + * 1.15x faster zstd block decompression. [#265](https://github.com/klauspost/compress/pull/265) + +* June 1, 2020 (v1.10.7): + * Added zstd decompression [dictionary support](https://github.com/klauspost/compress/tree/master/zstd#dictionaries) + * Increase zstd decompression speed up to 1.19x. [#259](https://github.com/klauspost/compress/pull/259) + * Remove internal reset call in zstd compression and reduce allocations. [#263](https://github.com/klauspost/compress/pull/263) + +* May 21, 2020: (v1.10.6) + * zstd: Reduce allocations while decoding. [#258](https://github.com/klauspost/compress/pull/258), [#252](https://github.com/klauspost/compress/pull/252) + * zstd: Stricter decompression checks. + +* April 12, 2020: (v1.10.5) + * s2-commands: Flush output when receiving SIGINT. [#239](https://github.com/klauspost/compress/pull/239) + +* Apr 8, 2020: (v1.10.4) + * zstd: Minor/special case optimizations. [#251](https://github.com/klauspost/compress/pull/251), [#250](https://github.com/klauspost/compress/pull/250), [#249](https://github.com/klauspost/compress/pull/249), [#247](https://github.com/klauspost/compress/pull/247) +* Mar 11, 2020: (v1.10.3) + * s2: Use S2 encoder in pure Go mode for Snappy output as well. [#245](https://github.com/klauspost/compress/pull/245) + * s2: Fix pure Go block encoder. [#244](https://github.com/klauspost/compress/pull/244) + * zstd: Added "better compression" mode. [#240](https://github.com/klauspost/compress/pull/240) + * zstd: Improve speed of fastest compression mode by 5-10% [#241](https://github.com/klauspost/compress/pull/241) + * zstd: Skip creating encoders when not needed. [#238](https://github.com/klauspost/compress/pull/238) + +* Feb 27, 2020: (v1.10.2) + * Close to 50% speedup in inflate (gzip/zip decompression). [#236](https://github.com/klauspost/compress/pull/236) [#234](https://github.com/klauspost/compress/pull/234) [#232](https://github.com/klauspost/compress/pull/232) + * Reduce deflate level 1-6 memory usage up to 59%. [#227](https://github.com/klauspost/compress/pull/227) + +* Feb 18, 2020: (v1.10.1) + * Fix zstd crash when resetting multiple times without sending data. [#226](https://github.com/klauspost/compress/pull/226) + * deflate: Fix dictionary use on level 1-6. [#224](https://github.com/klauspost/compress/pull/224) + * Remove deflate writer reference when closing. [#224](https://github.com/klauspost/compress/pull/224) + +* Feb 4, 2020: (v1.10.0) + * Add optional dictionary to [stateless deflate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc#StatelessDeflate). Breaking change, send `nil` for previous behaviour. [#216](https://github.com/klauspost/compress/pull/216) + * Fix buffer overflow on repeated small block deflate. [#218](https://github.com/klauspost/compress/pull/218) + * Allow copying content from an existing ZIP file without decompressing+compressing. [#214](https://github.com/klauspost/compress/pull/214) + * Added [S2](https://github.com/klauspost/compress/tree/master/s2#s2-compression) AMD64 assembler and various optimizations. Stream speed >10GB/s. [#186](https://github.com/klauspost/compress/pull/186) + +
+ +
+ See changes prior to v1.10.0 + +* Jan 20,2020 (v1.9.8) Optimize gzip/deflate with better size estimates and faster table generation. [#207](https://github.com/klauspost/compress/pull/207) by [luyu6056](https://github.com/luyu6056), [#206](https://github.com/klauspost/compress/pull/206). +* Jan 11, 2020: S2 Encode/Decode will use provided buffer if capacity is big enough. [#204](https://github.com/klauspost/compress/pull/204) +* Jan 5, 2020: (v1.9.7) Fix another zstd regression in v1.9.5 - v1.9.6 removed. +* Jan 4, 2020: (v1.9.6) Regression in v1.9.5 fixed causing corrupt zstd encodes in rare cases. +* Jan 4, 2020: Faster IO in [s2c + s2d commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) compression/decompression. [#192](https://github.com/klauspost/compress/pull/192) +* Dec 29, 2019: Removed v1.9.5 since fuzz tests showed a compatibility problem with the reference zstandard decoder. +* Dec 29, 2019: (v1.9.5) zstd: 10-20% faster block compression. [#199](https://github.com/klauspost/compress/pull/199) +* Dec 29, 2019: [zip](https://godoc.org/github.com/klauspost/compress/zip) package updated with latest Go features +* Dec 29, 2019: zstd: Single segment flag condintions tweaked. [#197](https://github.com/klauspost/compress/pull/197) +* Dec 18, 2019: s2: Faster compression when ReadFrom is used. [#198](https://github.com/klauspost/compress/pull/198) +* Dec 10, 2019: s2: Fix repeat length output when just above at 16MB limit. +* Dec 10, 2019: zstd: Add function to get decoder as io.ReadCloser. [#191](https://github.com/klauspost/compress/pull/191) +* Dec 3, 2019: (v1.9.4) S2: limit max repeat length. [#188](https://github.com/klauspost/compress/pull/188) +* Dec 3, 2019: Add [WithNoEntropyCompression](https://godoc.org/github.com/klauspost/compress/zstd#WithNoEntropyCompression) to zstd [#187](https://github.com/klauspost/compress/pull/187) +* Dec 3, 2019: Reduce memory use for tests. Check for leaked goroutines. +* Nov 28, 2019 (v1.9.3) Less allocations in stateless deflate. +* Nov 28, 2019: 5-20% Faster huff0 decode. Impacts zstd as well. [#184](https://github.com/klauspost/compress/pull/184) +* Nov 12, 2019 (v1.9.2) Added [Stateless Compression](#stateless-compression) for gzip/deflate. +* Nov 12, 2019: Fixed zstd decompression of large single blocks. [#180](https://github.com/klauspost/compress/pull/180) +* Nov 11, 2019: Set default [s2c](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) block size to 4MB. +* Nov 11, 2019: Reduce inflate memory use by 1KB. +* Nov 10, 2019: Less allocations in deflate bit writer. +* Nov 10, 2019: Fix inconsistent error returned by zstd decoder. +* Oct 28, 2019 (v1.9.1) ztsd: Fix crash when compressing blocks. [#174](https://github.com/klauspost/compress/pull/174) +* Oct 24, 2019 (v1.9.0) zstd: Fix rare data corruption [#173](https://github.com/klauspost/compress/pull/173) +* Oct 24, 2019 zstd: Fix huff0 out of buffer write [#171](https://github.com/klauspost/compress/pull/171) and always return errors [#172](https://github.com/klauspost/compress/pull/172) +* Oct 10, 2019: Big deflate rewrite, 30-40% faster with better compression [#105](https://github.com/klauspost/compress/pull/105) + +
+ +
+ See changes prior to v1.9.0 + +* Oct 10, 2019: (v1.8.6) zstd: Allow partial reads to get flushed data. [#169](https://github.com/klauspost/compress/pull/169) +* Oct 3, 2019: Fix inconsistent results on broken zstd streams. +* Sep 25, 2019: Added `-rm` (remove source files) and `-q` (no output except errors) to `s2c` and `s2d` [commands](https://github.com/klauspost/compress/tree/master/s2#commandline-tools) +* Sep 16, 2019: (v1.8.4) Add `s2c` and `s2d` [commandline tools](https://github.com/klauspost/compress/tree/master/s2#commandline-tools). +* Sep 10, 2019: (v1.8.3) Fix s2 decoder [Skip](https://godoc.org/github.com/klauspost/compress/s2#Reader.Skip). +* Sep 7, 2019: zstd: Added [WithWindowSize](https://godoc.org/github.com/klauspost/compress/zstd#WithWindowSize), contributed by [ianwilkes](https://github.com/ianwilkes). +* Sep 5, 2019: (v1.8.2) Add [WithZeroFrames](https://godoc.org/github.com/klauspost/compress/zstd#WithZeroFrames) which adds full zero payload block encoding option. +* Sep 5, 2019: Lazy initialization of zstandard predefined en/decoder tables. +* Aug 26, 2019: (v1.8.1) S2: 1-2% compression increase in "better" compression mode. +* Aug 26, 2019: zstd: Check maximum size of Huffman 1X compressed literals while decoding. +* Aug 24, 2019: (v1.8.0) Added [S2 compression](https://github.com/klauspost/compress/tree/master/s2#s2-compression), a high performance replacement for Snappy. +* Aug 21, 2019: (v1.7.6) Fixed minor issues found by fuzzer. One could lead to zstd not decompressing. +* Aug 18, 2019: Add [fuzzit](https://fuzzit.dev/) continuous fuzzing. +* Aug 14, 2019: zstd: Skip incompressible data 2x faster. [#147](https://github.com/klauspost/compress/pull/147) +* Aug 4, 2019 (v1.7.5): Better literal compression. [#146](https://github.com/klauspost/compress/pull/146) +* Aug 4, 2019: Faster zstd compression. [#143](https://github.com/klauspost/compress/pull/143) [#144](https://github.com/klauspost/compress/pull/144) +* Aug 4, 2019: Faster zstd decompression. [#145](https://github.com/klauspost/compress/pull/145) [#143](https://github.com/klauspost/compress/pull/143) [#142](https://github.com/klauspost/compress/pull/142) +* July 15, 2019 (v1.7.4): Fix double EOF block in rare cases on zstd encoder. +* July 15, 2019 (v1.7.3): Minor speedup/compression increase in default zstd encoder. +* July 14, 2019: zstd decoder: Fix decompression error on multiple uses with mixed content. +* July 7, 2019 (v1.7.2): Snappy update, zstd decoder potential race fix. +* June 17, 2019: zstd decompression bugfix. +* June 17, 2019: fix 32 bit builds. +* June 17, 2019: Easier use in modules (less dependencies). +* June 9, 2019: New stronger "default" [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression mode. Matches zstd default compression ratio. +* June 5, 2019: 20-40% throughput in [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and better compression. +* June 5, 2019: deflate/gzip compression: Reduce memory usage of lower compression levels. +* June 2, 2019: Added [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression! +* May 25, 2019: deflate/gzip: 10% faster bit writer, mostly visible in lower levels. +* Apr 22, 2019: [zstd](https://github.com/klauspost/compress/tree/master/zstd#zstd) decompression added. +* Aug 1, 2018: Added [huff0 README](https://github.com/klauspost/compress/tree/master/huff0#huff0-entropy-compression). +* Jul 8, 2018: Added [Performance Update 2018](#performance-update-2018) below. +* Jun 23, 2018: Merged [Go 1.11 inflate optimizations](https://go-review.googlesource.com/c/go/+/102235). Go 1.9 is now required. Backwards compatible version tagged with [v1.3.0](https://github.com/klauspost/compress/releases/tag/v1.3.0). +* Apr 2, 2018: Added [huff0](https://godoc.org/github.com/klauspost/compress/huff0) en/decoder. Experimental for now, API may change. +* Mar 4, 2018: Added [FSE Entropy](https://godoc.org/github.com/klauspost/compress/fse) en/decoder. Experimental for now, API may change. +* Nov 3, 2017: Add compression [Estimate](https://godoc.org/github.com/klauspost/compress#Estimate) function. +* May 28, 2017: Reduce allocations when resetting decoder. +* Apr 02, 2017: Change back to official crc32, since changes were merged in Go 1.7. +* Jan 14, 2017: Reduce stack pressure due to array copies. See [Issue #18625](https://github.com/golang/go/issues/18625). +* Oct 25, 2016: Level 2-4 have been rewritten and now offers significantly better performance than before. +* Oct 20, 2016: Port zlib changes from Go 1.7 to fix zlib writer issue. Please update. +* Oct 16, 2016: Go 1.7 changes merged. Apples to apples this package is a few percent faster, but has a significantly better balance between speed and compression per level. +* Mar 24, 2016: Always attempt Huffman encoding on level 4-7. This improves base 64 encoded data compression. +* Mar 24, 2016: Small speedup for level 1-3. +* Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster. +* Feb 19, 2016: Handle small payloads faster in level 1-3. +* Feb 19, 2016: Added faster level 2 + 3 compression modes. +* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5. +* Feb 14, 2016: Snappy: Merge upstream changes. +* Feb 14, 2016: Snappy: Fix aggressive skipping. +* Feb 14, 2016: Snappy: Update benchmark. +* Feb 13, 2016: Deflate: Fixed assembler problem that could lead to sub-optimal compression. +* Feb 12, 2016: Snappy: Added AMD64 SSE 4.2 optimizations to matching, which makes easy to compress material run faster. Typical speedup is around 25%. +* Feb 9, 2016: Added Snappy package fork. This version is 5-7% faster, much more on hard to compress content. +* Jan 30, 2016: Optimize level 1 to 3 by not considering static dictionary or storing uncompressed. ~4-5% speedup. +* Jan 16, 2016: Optimization on deflate level 1,2,3 compression. +* Jan 8 2016: Merge [CL 18317](https://go-review.googlesource.com/#/c/18317): fix reading, writing of zip64 archives. +* Dec 8 2015: Make level 1 and -2 deterministic even if write size differs. +* Dec 8 2015: Split encoding functions, so hashing and matching can potentially be inlined. 1-3% faster on AMD64. 5% faster on other platforms. +* Dec 8 2015: Fixed rare [one byte out-of bounds read](https://github.com/klauspost/compress/issues/20). Please update! +* Nov 23 2015: Optimization on token writer. ~2-4% faster. Contributed by [@dsnet](https://github.com/dsnet). +* Nov 20 2015: Small optimization to bit writer on 64 bit systems. +* Nov 17 2015: Fixed out-of-bound errors if the underlying Writer returned an error. See [#15](https://github.com/klauspost/compress/issues/15). +* Nov 12 2015: Added [io.WriterTo](https://golang.org/pkg/io/#WriterTo) support to gzip/inflate. +* Nov 11 2015: Merged [CL 16669](https://go-review.googlesource.com/#/c/16669/4): archive/zip: enable overriding (de)compressors per file +* Oct 15 2015: Added skipping on uncompressible data. Random data speed up >5x. + +
+ +# deflate usage + +* [High Throughput Benchmark](http://blog.klauspost.com/go-gzipdeflate-benchmarks/). +* [Small Payload/Webserver Benchmarks](http://blog.klauspost.com/gzip-performance-for-go-webservers/). +* [Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/). +* [Re-balancing Deflate Compression Levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/) + +The packages are drop-in replacements for standard libraries. Simply replace the import path to use them: + +| old import | new import | Documentation +|--------------------|-----------------------------------------|--------------------| +| `compress/gzip` | `github.com/klauspost/compress/gzip` | [gzip](https://pkg.go.dev/github.com/klauspost/compress/gzip?tab=doc) +| `compress/zlib` | `github.com/klauspost/compress/zlib` | [zlib](https://pkg.go.dev/github.com/klauspost/compress/zlib?tab=doc) +| `archive/zip` | `github.com/klauspost/compress/zip` | [zip](https://pkg.go.dev/github.com/klauspost/compress/zip?tab=doc) +| `compress/flate` | `github.com/klauspost/compress/flate` | [flate](https://pkg.go.dev/github.com/klauspost/compress/flate?tab=doc) + +* Optimized [deflate](https://godoc.org/github.com/klauspost/compress/flate) packages which can be used as a dropin replacement for [gzip](https://godoc.org/github.com/klauspost/compress/gzip), [zip](https://godoc.org/github.com/klauspost/compress/zip) and [zlib](https://godoc.org/github.com/klauspost/compress/zlib). + +You may also be interested in [pgzip](https://github.com/klauspost/pgzip), which is a drop in replacement for gzip, which support multithreaded compression on big files and the optimized [crc32](https://github.com/klauspost/crc32) package used by these packages. + +The packages contains the same as the standard library, so you can use the godoc for that: [gzip](http://golang.org/pkg/compress/gzip/), [zip](http://golang.org/pkg/archive/zip/), [zlib](http://golang.org/pkg/compress/zlib/), [flate](http://golang.org/pkg/compress/flate/). + +Currently there is only minor speedup on decompression (mostly CRC32 calculation). + +Memory usage is typically 1MB for a Writer. stdlib is in the same range. +If you expect to have a lot of concurrently allocated Writers consider using +the stateless compress described below. + +# Stateless compression + +This package offers stateless compression as a special option for gzip/deflate. +It will do compression but without maintaining any state between Write calls. + +This means there will be no memory kept between Write calls, but compression and speed will be suboptimal. + +This is only relevant in cases where you expect to run many thousands of compressors concurrently, +but with very little activity. This is *not* intended for regular web servers serving individual requests. + +Because of this, the size of actual Write calls will affect output size. + +In gzip, specify level `-3` / `gzip.StatelessCompression` to enable. + +For direct deflate use, NewStatelessWriter and StatelessDeflate are available. See [documentation](https://godoc.org/github.com/klauspost/compress/flate#NewStatelessWriter) + +A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer: + +``` + // replace 'ioutil.Discard' with your output. + gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression) + if err != nil { + return err + } + defer gzw.Close() + + w := bufio.NewWriterSize(gzw, 4096) + defer w.Flush() + + // Write to 'w' +``` + +This will only use up to 4KB in memory when the writer is idle. + +Compression is almost always worse than the fastest compression level +and each write will allocate (a little) memory. + +# Performance Update 2018 + +It has been a while since we have been looking at the speed of this package compared to the standard library, so I thought I would re-do my tests and give some overall recommendations based on the current state. All benchmarks have been performed with Go 1.10 on my Desktop Intel(R) Core(TM) i7-2600 CPU @3.40GHz. Since I last ran the tests, I have gotten more RAM, which means tests with big files are no longer limited by my SSD. + +The raw results are in my [updated spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). Due to cgo changes and upstream updates i could not get the cgo version of gzip to compile. Instead I included the [zstd](https://github.com/datadog/zstd) cgo implementation. If I get cgo gzip to work again, I might replace the results in the sheet. + +The columns to take note of are: *MB/s* - the throughput. *Reduction* - the data size reduction in percent of the original. *Rel Speed* relative speed compared to the standard library at the same level. *Smaller* - how many percent smaller is the compressed output compared to stdlib. Negative means the output was bigger. *Loss* means the loss (or gain) in compression as a percentage difference of the input. + +The `gzstd` (standard library gzip) and `gzkp` (this package gzip) only uses one CPU core. [`pgzip`](https://github.com/klauspost/pgzip), [`bgzf`](https://github.com/biogo/hts/tree/master/bgzf) uses all 4 cores. [`zstd`](https://github.com/DataDog/zstd) uses one core, and is a beast (but not Go, yet). + + +## Overall differences. + +There appears to be a roughly 5-10% speed advantage over the standard library when comparing at similar compression levels. + +The biggest difference you will see is the result of [re-balancing](https://blog.klauspost.com/rebalancing-deflate-compression-levels/) the compression levels. I wanted by library to give a smoother transition between the compression levels than the standard library. + +This package attempts to provide a more smooth transition, where "1" is taking a lot of shortcuts, "5" is the reasonable trade-off and "9" is the "give me the best compression", and the values in between gives something reasonable in between. The standard library has big differences in levels 1-4, but levels 5-9 having no significant gains - often spending a lot more time than can be justified by the achieved compression. + +There are links to all the test data in the [spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing) in the top left field on each tab. + +## Web Content + +This test set aims to emulate typical use in a web server. The test-set is 4GB data in 53k files, and is a mixture of (mostly) HTML, JS, CSS. + +Since level 1 and 9 are close to being the same code, they are quite close. But looking at the levels in-between the differences are quite big. + +Looking at level 6, this package is 88% faster, but will output about 6% more data. For a web server, this means you can serve 88% more data, but have to pay for 6% more bandwidth. You can draw your own conclusions on what would be the most expensive for your case. + +## Object files + +This test is for typical data files stored on a server. In this case it is a collection of Go precompiled objects. They are very compressible. + +The picture is similar to the web content, but with small differences since this is very compressible. Levels 2-3 offer good speed, but is sacrificing quite a bit of compression. + +The standard library seems suboptimal on level 3 and 4 - offering both worse compression and speed than level 6 & 7 of this package respectively. + +## Highly Compressible File + +This is a JSON file with very high redundancy. The reduction starts at 95% on level 1, so in real life terms we are dealing with something like a highly redundant stream of data, etc. + +It is definitely visible that we are dealing with specialized content here, so the results are very scattered. This package does not do very well at levels 1-4, but picks up significantly at level 5 and levels 7 and 8 offering great speed for the achieved compression. + +So if you know you content is extremely compressible you might want to go slightly higher than the defaults. The standard library has a huge gap between levels 3 and 4 in terms of speed (2.75x slowdown), so it offers little "middle ground". + +## Medium-High Compressible + +This is a pretty common test corpus: [enwik9](http://mattmahoney.net/dc/textdata.html). It contains the first 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. This is a very good test of typical text based compression and more data heavy streams. + +We see a similar picture here as in "Web Content". On equal levels some compression is sacrificed for more speed. Level 5 seems to be the best trade-off between speed and size, beating stdlib level 3 in both. + +## Medium Compressible + +I will combine two test sets, one [10GB file set](http://mattmahoney.net/dc/10gb.html) and a VM disk image (~8GB). Both contain different data types and represent a typical backup scenario. + +The most notable thing is how quickly the standard library drops to very low compression speeds around level 5-6 without any big gains in compression. Since this type of data is fairly common, this does not seem like good behavior. + + +## Un-compressible Content + +This is mainly a test of how good the algorithms are at detecting un-compressible input. The standard library only offers this feature with very conservative settings at level 1. Obviously there is no reason for the algorithms to try to compress input that cannot be compressed. The only downside is that it might skip some compressible data on false detections. + + +## Huffman only compression + +This compression library adds a special compression level, named `HuffmanOnly`, which allows near linear time compression. This is done by completely disabling matching of previous data, and only reduce the number of bits to represent each character. + +This means that often used characters, like 'e' and ' ' (space) in text use the fewest bits to represent, and rare characters like '¤' takes more bits to represent. For more information see [wikipedia](https://en.wikipedia.org/wiki/Huffman_coding) or this nice [video](https://youtu.be/ZdooBTdW5bM). + +Since this type of compression has much less variance, the compression speed is mostly unaffected by the input data, and is usually more than *180MB/s* for a single core. + +The downside is that the compression ratio is usually considerably worse than even the fastest conventional compression. The compression ratio can never be better than 8:1 (12.5%). + +The linear time compression can be used as a "better than nothing" mode, where you cannot risk the encoder to slow down on some content. For comparison, the size of the "Twain" text is *233460 bytes* (+29% vs. level 1) and encode speed is 144MB/s (4.5x level 1). So in this case you trade a 30% size increase for a 4 times speedup. + +For more information see my blog post on [Fast Linear Time Compression](http://blog.klauspost.com/constant-time-gzipzip-compression/). + +This is implemented on Go 1.7 as "Huffman Only" mode, though not exposed for gzip. + + +# license + +This code is licensed under the same conditions as the original Go code. See LICENSE file. diff --git a/vendor/github.com/klauspost/compress/compressible.go b/vendor/github.com/klauspost/compress/compressible.go new file mode 100644 index 00000000000..ea5a692d513 --- /dev/null +++ b/vendor/github.com/klauspost/compress/compressible.go @@ -0,0 +1,85 @@ +package compress + +import "math" + +// Estimate returns a normalized compressibility estimate of block b. +// Values close to zero are likely uncompressible. +// Values above 0.1 are likely to be compressible. +// Values above 0.5 are very compressible. +// Very small lengths will return 0. +func Estimate(b []byte) float64 { + if len(b) < 16 { + return 0 + } + + // Correctly predicted order 1 + hits := 0 + lastMatch := false + var o1 [256]byte + var hist [256]int + c1 := byte(0) + for _, c := range b { + if c == o1[c1] { + // We only count a hit if there was two correct predictions in a row. + if lastMatch { + hits++ + } + lastMatch = true + } else { + lastMatch = false + } + o1[c1] = c + c1 = c + hist[c]++ + } + + // Use x^0.6 to give better spread + prediction := math.Pow(float64(hits)/float64(len(b)), 0.6) + + // Calculate histogram distribution + variance := float64(0) + avg := float64(len(b)) / 256 + + for _, v := range hist { + Δ := float64(v) - avg + variance += Δ * Δ + } + + stddev := math.Sqrt(float64(variance)) / float64(len(b)) + exp := math.Sqrt(1 / float64(len(b))) + + // Subtract expected stddev + stddev -= exp + if stddev < 0 { + stddev = 0 + } + stddev *= 1 + exp + + // Use x^0.4 to give better spread + entropy := math.Pow(stddev, 0.4) + + // 50/50 weight between prediction and histogram distribution + return math.Pow((prediction+entropy)/2, 0.9) +} + +// ShannonEntropyBits returns the number of bits minimum required to represent +// an entropy encoding of the input bytes. +// https://en.wiktionary.org/wiki/Shannon_entropy +func ShannonEntropyBits(b []byte) int { + if len(b) == 0 { + return 0 + } + var hist [256]int + for _, c := range b { + hist[c]++ + } + shannon := float64(0) + invTotal := 1.0 / float64(len(b)) + for _, v := range hist[:] { + if v > 0 { + n := float64(v) + shannon += math.Ceil(-math.Log2(n*invTotal) * n) + } + } + return int(math.Ceil(shannon)) +} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 16bc51408e0..d1edb356c4b 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -9,10 +9,10 @@ package flate import ( "bufio" + "compress/flate" "fmt" "io" "math/bits" - "strconv" "sync" ) @@ -41,11 +41,7 @@ var fixedOnce sync.Once var fixedHuffmanDecoder huffmanDecoder // A CorruptInputError reports the presence of corrupt input at a given offset. -type CorruptInputError int64 - -func (e CorruptInputError) Error() string { - return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) -} +type CorruptInputError = flate.CorruptInputError // An InternalError reports an error in the flate code itself. type InternalError string @@ -55,26 +51,12 @@ func (e InternalError) Error() string { return "flate: internal error: " + strin // A ReadError reports an error encountered while reading input. // // Deprecated: No longer returned. -type ReadError struct { - Offset int64 // byte offset where error occurred - Err error // error returned by underlying Read -} - -func (e *ReadError) Error() string { - return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() -} +type ReadError = flate.ReadError // A WriteError reports an error encountered while writing output. // // Deprecated: No longer returned. -type WriteError struct { - Offset int64 // byte offset where error occurred - Err error // error returned by underlying Write -} - -func (e *WriteError) Error() string { - return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() -} +type WriteError = flate.WriteError // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to // to switch to a new underlying Reader. This permits reusing a ReadCloser diff --git a/vendor/github.com/klauspost/compress/flate/regmask_other.go b/vendor/github.com/klauspost/compress/flate/regmask_other.go index f477a5d6e5a..1b7a2cbd793 100644 --- a/vendor/github.com/klauspost/compress/flate/regmask_other.go +++ b/vendor/github.com/klauspost/compress/flate/regmask_other.go @@ -1,4 +1,5 @@ -//+build !amd64 +//go:build !amd64 +// +build !amd64 package flate diff --git a/vendor/github.com/klauspost/compress/gen.sh b/vendor/github.com/klauspost/compress/gen.sh new file mode 100644 index 00000000000..aff942205f1 --- /dev/null +++ b/vendor/github.com/klauspost/compress/gen.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +cd s2/cmd/_s2sx/ || exit 1 +go generate . diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go index 21e768b3604..4d7018913e2 100644 --- a/vendor/github.com/klauspost/compress/gzip/gunzip.go +++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go @@ -8,8 +8,8 @@ package gzip import ( "bufio" + "compress/gzip" "encoding/binary" - "errors" "hash/crc32" "io" "time" @@ -30,9 +30,9 @@ const ( var ( // ErrChecksum is returned when reading GZIP data that has an invalid checksum. - ErrChecksum = errors.New("gzip: invalid checksum") + ErrChecksum = gzip.ErrChecksum // ErrHeader is returned when reading GZIP data that has an invalid header. - ErrHeader = errors.New("gzip: invalid header") + ErrHeader = gzip.ErrHeader ) var le = binary.LittleEndian diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index 0823c928cec..8323dc05389 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -161,6 +161,70 @@ func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error) return s.Out, false, nil } +// EstimateSizes will estimate the data sizes +func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err error) { + s, err = s.prepare(in) + if err != nil { + return 0, 0, 0, err + } + + // Create histogram, if none was provided. + tableSz, dataSz, reuseSz = -1, -1, -1 + maxCount := s.maxCount + var canReuse = false + if maxCount == 0 { + maxCount, canReuse = s.countSimple(in) + } else { + canReuse = s.canUseTable(s.prevTable) + } + + // We want the output size to be less than this: + wantSize := len(in) + if s.WantLogLess > 0 { + wantSize -= wantSize >> s.WantLogLess + } + + // Reset for next run. + s.clearCount = true + s.maxCount = 0 + if maxCount >= len(in) { + if maxCount > len(in) { + return 0, 0, 0, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in)) + } + if len(in) == 1 { + return 0, 0, 0, ErrIncompressible + } + // One symbol, use RLE + return 0, 0, 0, ErrUseRLE + } + if maxCount == 1 || maxCount < (len(in)>>7) { + // Each symbol present maximum once or too well distributed. + return 0, 0, 0, ErrIncompressible + } + + // Calculate new table. + err = s.buildCTable() + if err != nil { + return 0, 0, 0, err + } + + if false && !s.canUseTable(s.cTable) { + panic("invalid table generated") + } + + tableSz, err = s.cTable.estTableSize(s) + if err != nil { + return 0, 0, 0, err + } + if canReuse { + reuseSz = s.prevTable.estimateSize(s.count[:s.symbolLen]) + } + dataSz = s.cTable.estimateSize(s.count[:s.symbolLen]) + + // Restore + return tableSz, dataSz, reuseSz, nil +} + func (s *Scratch) compress1X(src []byte) ([]byte, error) { return s.compress1xDo(s.Out, src) } diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go index 7ec2022b650..3ee00ecb470 100644 --- a/vendor/github.com/klauspost/compress/huff0/huff0.go +++ b/vendor/github.com/klauspost/compress/huff0/huff0.go @@ -245,6 +245,68 @@ func (c cTable) write(s *Scratch) error { return nil } +func (c cTable) estTableSize(s *Scratch) (sz int, err error) { + var ( + // precomputed conversion table + bitsToWeight [tableLogMax + 1]byte + huffLog = s.actualTableLog + // last weight is not saved. + maxSymbolValue = uint8(s.symbolLen - 1) + huffWeight = s.huffWeight[:256] + ) + const ( + maxFSETableLog = 6 + ) + // convert to weight + bitsToWeight[0] = 0 + for n := uint8(1); n < huffLog+1; n++ { + bitsToWeight[n] = huffLog + 1 - n + } + + // Acquire histogram for FSE. + hist := s.fse.Histogram() + hist = hist[:256] + for i := range hist[:16] { + hist[i] = 0 + } + for n := uint8(0); n < maxSymbolValue; n++ { + v := bitsToWeight[c[n].nBits] & 15 + huffWeight[n] = v + hist[v]++ + } + + // FSE compress if feasible. + if maxSymbolValue >= 2 { + huffMaxCnt := uint32(0) + huffMax := uint8(0) + for i, v := range hist[:16] { + if v == 0 { + continue + } + huffMax = byte(i) + if v > huffMaxCnt { + huffMaxCnt = v + } + } + s.fse.HistogramFinished(huffMax, int(huffMaxCnt)) + s.fse.TableLog = maxFSETableLog + b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse) + if err == nil && len(b) < int(s.symbolLen>>1) { + sz += 1 + len(b) + return sz, nil + } + // Unable to compress (RLE/uncompressible) + } + // write raw values as 4-bits (max : 15) + if maxSymbolValue > (256 - 128) { + // should not happen : likely means source cannot be compressed + return 0, ErrIncompressible + } + // special case, pack weights 4 bits/weight. + sz += 1 + int(maxSymbolValue/2) + return sz, nil +} + // estimateSize returns the estimated size in bytes of the input represented in the // histogram supplied. func (c cTable) estimateSize(hist []uint32) int { diff --git a/vendor/github.com/klauspost/compress/internal/snapref/LICENSE b/vendor/github.com/klauspost/compress/internal/snapref/LICENSE new file mode 100644 index 00000000000..6050c10f4c8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/snapref/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/klauspost/compress/internal/snapref/decode.go b/vendor/github.com/klauspost/compress/internal/snapref/decode.go new file mode 100644 index 00000000000..40796a49d65 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/snapref/decode.go @@ -0,0 +1,264 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snapref + +import ( + "encoding/binary" + "errors" + "io" +) + +var ( + // ErrCorrupt reports that the input is invalid. + ErrCorrupt = errors.New("snappy: corrupt input") + // ErrTooLarge reports that the uncompressed length is too large. + ErrTooLarge = errors.New("snappy: decoded block is too large") + // ErrUnsupported reports that the input isn't supported. + ErrUnsupported = errors.New("snappy: unsupported input") + + errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") +) + +// DecodedLen returns the length of the decoded block. +func DecodedLen(src []byte) (int, error) { + v, _, err := decodedLen(src) + return v, err +} + +// decodedLen returns the length of the decoded block and the number of bytes +// that the length header occupied. +func decodedLen(src []byte) (blockLen, headerLen int, err error) { + v, n := binary.Uvarint(src) + if n <= 0 || v > 0xffffffff { + return 0, 0, ErrCorrupt + } + + const wordSize = 32 << (^uint(0) >> 32 & 1) + if wordSize == 32 && v > 0x7fffffff { + return 0, 0, ErrTooLarge + } + return int(v), n, nil +} + +const ( + decodeErrCodeCorrupt = 1 + decodeErrCodeUnsupportedLiteralLength = 2 +) + +// Decode returns the decoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire decoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// Decode handles the Snappy block format, not the Snappy stream format. +func Decode(dst, src []byte) ([]byte, error) { + dLen, s, err := decodedLen(src) + if err != nil { + return nil, err + } + if dLen <= len(dst) { + dst = dst[:dLen] + } else { + dst = make([]byte, dLen) + } + switch decode(dst, src[s:]) { + case 0: + return dst, nil + case decodeErrCodeUnsupportedLiteralLength: + return nil, errUnsupportedLiteralLength + } + return nil, ErrCorrupt +} + +// NewReader returns a new Reader that decompresses from r, using the framing +// format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +func NewReader(r io.Reader) *Reader { + return &Reader{ + r: r, + decoded: make([]byte, maxBlockSize), + buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), + } +} + +// Reader is an io.Reader that can read Snappy-compressed bytes. +// +// Reader handles the Snappy stream format, not the Snappy block format. +type Reader struct { + r io.Reader + err error + decoded []byte + buf []byte + // decoded[i:j] contains decoded bytes that have not yet been passed on. + i, j int + readHeader bool +} + +// Reset discards any buffered data, resets all state, and switches the Snappy +// reader to read from r. This permits reusing a Reader rather than allocating +// a new one. +func (r *Reader) Reset(reader io.Reader) { + r.r = reader + r.err = nil + r.i = 0 + r.j = 0 + r.readHeader = false +} + +func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { + if _, r.err = io.ReadFull(r.r, p); r.err != nil { + if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { + r.err = ErrCorrupt + } + return false + } + return true +} + +func (r *Reader) fill() error { + for r.i >= r.j { + if !r.readFull(r.buf[:4], true) { + return r.err + } + chunkType := r.buf[0] + if !r.readHeader { + if chunkType != chunkTypeStreamIdentifier { + r.err = ErrCorrupt + return r.err + } + r.readHeader = true + } + chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 + if chunkLen > len(r.buf) { + r.err = ErrUnsupported + return r.err + } + + // The chunk types are specified at + // https://github.com/google/snappy/blob/master/framing_format.txt + switch chunkType { + case chunkTypeCompressedData: + // Section 4.2. Compressed data (chunk type 0x00). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return r.err + } + buf := r.buf[:chunkLen] + if !r.readFull(buf, false) { + return r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + buf = buf[checksumSize:] + + n, err := DecodedLen(buf) + if err != nil { + r.err = err + return r.err + } + if n > len(r.decoded) { + r.err = ErrCorrupt + return r.err + } + if _, err := Decode(r.decoded, buf); err != nil { + r.err = err + return r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeUncompressedData: + // Section 4.3. Uncompressed data (chunk type 0x01). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return r.err + } + buf := r.buf[:checksumSize] + if !r.readFull(buf, false) { + return r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + // Read directly into r.decoded instead of via r.buf. + n := chunkLen - checksumSize + if n > len(r.decoded) { + r.err = ErrCorrupt + return r.err + } + if !r.readFull(r.decoded[:n], false) { + return r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeStreamIdentifier: + // Section 4.1. Stream identifier (chunk type 0xff). + if chunkLen != len(magicBody) { + r.err = ErrCorrupt + return r.err + } + if !r.readFull(r.buf[:len(magicBody)], false) { + return r.err + } + for i := 0; i < len(magicBody); i++ { + if r.buf[i] != magicBody[i] { + r.err = ErrCorrupt + return r.err + } + } + continue + } + + if chunkType <= 0x7f { + // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). + r.err = ErrUnsupported + return r.err + } + // Section 4.4 Padding (chunk type 0xfe). + // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). + if !r.readFull(r.buf[:chunkLen], false) { + return r.err + } + } + + return nil +} + +// Read satisfies the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + if r.err != nil { + return 0, r.err + } + + if err := r.fill(); err != nil { + return 0, err + } + + n := copy(p, r.decoded[r.i:r.j]) + r.i += n + return n, nil +} + +// ReadByte satisfies the io.ByteReader interface. +func (r *Reader) ReadByte() (byte, error) { + if r.err != nil { + return 0, r.err + } + + if err := r.fill(); err != nil { + return 0, err + } + + c := r.decoded[r.i] + r.i++ + return c, nil +} diff --git a/vendor/github.com/klauspost/compress/internal/snapref/decode_other.go b/vendor/github.com/klauspost/compress/internal/snapref/decode_other.go new file mode 100644 index 00000000000..77395a6b8b9 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/snapref/decode_other.go @@ -0,0 +1,113 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snapref + +// decode writes the decoding of src to dst. It assumes that the varint-encoded +// length of the decompressed bytes has already been read, and that len(dst) +// equals that length. +// +// It returns 0 on success or a decodeErrCodeXxx error code on failure. +func decode(dst, src []byte) int { + var d, s, offset, length int + for s < len(src) { + switch src[s] & 0x03 { + case tagLiteral: + x := uint32(src[s] >> 2) + switch { + case x < 60: + s++ + case x == 60: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-1]) + case x == 61: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-2]) | uint32(src[s-1])<<8 + case x == 62: + s += 4 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + case x == 63: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + } + length = int(x) + 1 + if length <= 0 { + return decodeErrCodeUnsupportedLiteralLength + } + if length > len(dst)-d || length > len(src)-s { + return decodeErrCodeCorrupt + } + copy(dst[d:], src[s:s+length]) + d += length + s += length + continue + + case tagCopy1: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 4 + int(src[s-2])>>2&0x7 + offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + + case tagCopy2: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + + case tagCopy4: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-5])>>2 + offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + } + + if offset <= 0 || d < offset || length > len(dst)-d { + return decodeErrCodeCorrupt + } + // Copy from an earlier sub-slice of dst to a later sub-slice. + // If no overlap, use the built-in copy: + if offset >= length { + copy(dst[d:d+length], dst[d-offset:]) + d += length + continue + } + + // Unlike the built-in copy function, this byte-by-byte copy always runs + // forwards, even if the slices overlap. Conceptually, this is: + // + // d += forwardCopy(dst[d:d+length], dst[d-offset:]) + // + // We align the slices into a and b and show the compiler they are the same size. + // This allows the loop to run without bounds checks. + a := dst[d : d+length] + b := dst[d-offset:] + b = b[:len(a)] + for i := range a { + a[i] = b[i] + } + d += length + } + if d != len(dst) { + return decodeErrCodeCorrupt + } + return 0 +} diff --git a/vendor/github.com/klauspost/compress/internal/snapref/encode.go b/vendor/github.com/klauspost/compress/internal/snapref/encode.go new file mode 100644 index 00000000000..13c6040a5de --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/snapref/encode.go @@ -0,0 +1,289 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snapref + +import ( + "encoding/binary" + "errors" + "io" +) + +// Encode returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// Encode handles the Snappy block format, not the Snappy stream format. +func Encode(dst, src []byte) []byte { + if n := MaxEncodedLen(len(src)); n < 0 { + panic(ErrTooLarge) + } else if len(dst) < n { + dst = make([]byte, n) + } + + // The block starts with the varint-encoded length of the decompressed bytes. + d := binary.PutUvarint(dst, uint64(len(src))) + + for len(src) > 0 { + p := src + src = nil + if len(p) > maxBlockSize { + p, src = p[:maxBlockSize], p[maxBlockSize:] + } + if len(p) < minNonLiteralBlockSize { + d += emitLiteral(dst[d:], p) + } else { + d += encodeBlock(dst[d:], p) + } + } + return dst[:d] +} + +// inputMargin is the minimum number of extra input bytes to keep, inside +// encodeBlock's inner loop. On some architectures, this margin lets us +// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) +// literals can be implemented as a single load to and store from a 16-byte +// register. That literal's actual length can be as short as 1 byte, so this +// can copy up to 15 bytes too much, but that's OK as subsequent iterations of +// the encoding loop will fix up the copy overrun, and this inputMargin ensures +// that we don't overrun the dst and src buffers. +const inputMargin = 16 - 1 + +// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that +// could be encoded with a copy tag. This is the minimum with respect to the +// algorithm used by encodeBlock, not a minimum enforced by the file format. +// +// The encoded output must start with at least a 1 byte literal, as there are +// no previous bytes to copy. A minimal (1 byte) copy after that, generated +// from an emitCopy call in encodeBlock's main loop, would require at least +// another inputMargin bytes, for the reason above: we want any emitLiteral +// calls inside encodeBlock's main loop to use the fast path if possible, which +// requires being able to overrun by inputMargin bytes. Thus, +// minNonLiteralBlockSize equals 1 + 1 + inputMargin. +// +// The C++ code doesn't use this exact threshold, but it could, as discussed at +// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion +// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an +// optimization. It should not affect the encoded form. This is tested by +// TestSameEncodingAsCppShortCopies. +const minNonLiteralBlockSize = 1 + 1 + inputMargin + +// MaxEncodedLen returns the maximum length of a snappy block, given its +// uncompressed length. +// +// It will return a negative value if srcLen is too large to encode. +func MaxEncodedLen(srcLen int) int { + n := uint64(srcLen) + if n > 0xffffffff { + return -1 + } + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // That is, 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + n = 32 + n + n/6 + if n > 0xffffffff { + return -1 + } + return int(n) +} + +var errClosed = errors.New("snappy: Writer is closed") + +// NewWriter returns a new Writer that compresses to w. +// +// The Writer returned does not buffer writes. There is no need to Flush or +// Close such a Writer. +// +// Deprecated: the Writer returned is not suitable for many small writes, only +// for few large writes. Use NewBufferedWriter instead, which is efficient +// regardless of the frequency and shape of the writes, and remember to Close +// that Writer when done. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + obuf: make([]byte, obufLen), + } +} + +// NewBufferedWriter returns a new Writer that compresses to w, using the +// framing format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +// +// The Writer returned buffers writes. Users must call Close to guarantee all +// data has been forwarded to the underlying io.Writer. They may also call +// Flush zero or more times before calling Close. +func NewBufferedWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + ibuf: make([]byte, 0, maxBlockSize), + obuf: make([]byte, obufLen), + } +} + +// Writer is an io.Writer that can write Snappy-compressed bytes. +// +// Writer handles the Snappy stream format, not the Snappy block format. +type Writer struct { + w io.Writer + err error + + // ibuf is a buffer for the incoming (uncompressed) bytes. + // + // Its use is optional. For backwards compatibility, Writers created by the + // NewWriter function have ibuf == nil, do not buffer incoming bytes, and + // therefore do not need to be Flush'ed or Close'd. + ibuf []byte + + // obuf is a buffer for the outgoing (compressed) bytes. + obuf []byte + + // wroteStreamHeader is whether we have written the stream header. + wroteStreamHeader bool +} + +// Reset discards the writer's state and switches the Snappy writer to write to +// w. This permits reusing a Writer rather than allocating a new one. +func (w *Writer) Reset(writer io.Writer) { + w.w = writer + w.err = nil + if w.ibuf != nil { + w.ibuf = w.ibuf[:0] + } + w.wroteStreamHeader = false +} + +// Write satisfies the io.Writer interface. +func (w *Writer) Write(p []byte) (nRet int, errRet error) { + if w.ibuf == nil { + // Do not buffer incoming bytes. This does not perform or compress well + // if the caller of Writer.Write writes many small slices. This + // behavior is therefore deprecated, but still supported for backwards + // compatibility with code that doesn't explicitly Flush or Close. + return w.write(p) + } + + // The remainder of this method is based on bufio.Writer.Write from the + // standard library. + + for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { + var n int + if len(w.ibuf) == 0 { + // Large write, empty buffer. + // Write directly from p to avoid copy. + n, _ = w.write(p) + } else { + n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + w.Flush() + } + nRet += n + p = p[n:] + } + if w.err != nil { + return nRet, w.err + } + n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + nRet += n + return nRet, nil +} + +func (w *Writer) write(p []byte) (nRet int, errRet error) { + if w.err != nil { + return 0, w.err + } + for len(p) > 0 { + obufStart := len(magicChunk) + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + copy(w.obuf, magicChunk) + obufStart = 0 + } + + var uncompressed []byte + if len(p) > maxBlockSize { + uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] + } else { + uncompressed, p = p, nil + } + checksum := crc(uncompressed) + + // Compress the buffer, discarding the result if the improvement + // isn't at least 12.5%. + compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) + chunkType := uint8(chunkTypeCompressedData) + chunkLen := 4 + len(compressed) + obufEnd := obufHeaderLen + len(compressed) + if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { + chunkType = chunkTypeUncompressedData + chunkLen = 4 + len(uncompressed) + obufEnd = obufHeaderLen + } + + // Fill in the per-chunk header that comes before the body. + w.obuf[len(magicChunk)+0] = chunkType + w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) + w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) + w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) + w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) + w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) + w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) + w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) + + if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { + w.err = err + return nRet, err + } + if chunkType == chunkTypeUncompressedData { + if _, err := w.w.Write(uncompressed); err != nil { + w.err = err + return nRet, err + } + } + nRet += len(uncompressed) + } + return nRet, nil +} + +// Flush flushes the Writer to its underlying io.Writer. +func (w *Writer) Flush() error { + if w.err != nil { + return w.err + } + if len(w.ibuf) == 0 { + return nil + } + w.write(w.ibuf) + w.ibuf = w.ibuf[:0] + return w.err +} + +// Close calls Flush and then closes the Writer. +func (w *Writer) Close() error { + w.Flush() + ret := w.err + if w.err == nil { + w.err = errClosed + } + return ret +} diff --git a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go new file mode 100644 index 00000000000..511bba65db8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go @@ -0,0 +1,236 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snapref + +func load32(b []byte, i int) uint32 { + b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= len(lit) && len(lit) <= 65536 +func emitLiteral(dst, lit []byte) int { + i, n := 0, uint(len(lit)-1) + switch { + case n < 60: + dst[0] = uint8(n)<<2 | tagLiteral + i = 1 + case n < 1<<8: + dst[0] = 60<<2 | tagLiteral + dst[1] = uint8(n) + i = 2 + default: + dst[0] = 61<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + i = 3 + } + return i + copy(dst[i:], lit) +} + +// emitCopy writes a copy chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= offset && offset <= 65535 +// 4 <= length && length <= 65535 +func emitCopy(dst []byte, offset, length int) int { + i := 0 + // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The + // threshold for this loop is a little higher (at 68 = 64 + 4), and the + // length emitted down below is is a little lower (at 60 = 64 - 4), because + // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed + // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as + // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as + // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a + // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an + // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. + for length >= 68 { + // Emit a length 64 copy, encoded as 3 bytes. + dst[i+0] = 63<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 64 + } + if length > 64 { + // Emit a length 60 copy, encoded as 3 bytes. + dst[i+0] = 59<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 60 + } + if length >= 12 || offset >= 2048 { + // Emit the remaining copy, encoded as 3 bytes. + dst[i+0] = uint8(length-1)<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + return i + 3 + } + // Emit the remaining copy, encoded as 2 bytes. + dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + dst[i+1] = uint8(offset) + return i + 2 +} + +// extendMatch returns the largest k such that k <= len(src) and that +// src[i:i+k-j] and src[j:k] have the same contents. +// +// It assumes that: +// 0 <= i && i < j && j <= len(src) +func extendMatch(src []byte, i, j int) int { + for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { + } + return j +} + +func hash(u, shift uint32) uint32 { + return (u * 0x1e35a7bd) >> shift +} + +// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlock(dst, src []byte) (d int) { + // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. + // The table element type is uint16, as s < sLimit and sLimit < len(src) + // and len(src) <= maxBlockSize and maxBlockSize == 65536. + const ( + maxTableSize = 1 << 14 + // tableMask is redundant, but helps the compiler eliminate bounds + // checks. + tableMask = maxTableSize - 1 + ) + shift := uint32(32 - 8) + for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + shift-- + } + // In Go, all array elements are zero-initialized, so there is no advantage + // to a smaller tableSize per se. However, it matches the C++ algorithm, + // and in the asm versions of this code, we can get away with zeroing only + // the first tableSize elements. + var table [maxTableSize]uint16 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + nextHash := hash(load32(src, s), shift) + + for { + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match + // is found, immediately go back to looking at every byte. This is a + // small loss (~5% performance, ~0.1% density) for compressible data + // due to more bookkeeping, but for non-compressible data (such as + // JPEG) it's a huge win since the compressor quickly "realizes" the + // data is incompressible and doesn't bother looking for matches + // everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip := 32 + + nextS := s + candidate := 0 + for { + s = nextS + bytesBetweenHashLookups := skip >> 5 + nextS = s + bytesBetweenHashLookups + skip += bytesBetweenHashLookups + if nextS > sLimit { + goto emitRemainder + } + candidate = int(table[nextHash&tableMask]) + table[nextHash&tableMask] = uint16(s) + nextHash = hash(load32(src, nextS), shift) + if load32(src, s) == load32(src, candidate) { + break + } + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + d += emitLiteral(dst[d:], src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + + // Extend the 4-byte match as long as possible. + // + // This is an inlined version of: + // s = extendMatch(src, candidate+4, s+4) + s += 4 + for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { + } + + d += emitCopy(dst[d:], base-candidate, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load64(src, s-1) + prevHash := hash(uint32(x>>0), shift) + table[prevHash&tableMask] = uint16(s - 1) + currHash := hash(uint32(x>>8), shift) + candidate = int(table[currHash&tableMask]) + table[currHash&tableMask] = uint16(s) + if uint32(x>>8) != load32(src, candidate) { + nextHash = hash(uint32(x>>16), shift) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} diff --git a/vendor/github.com/klauspost/compress/internal/snapref/snappy.go b/vendor/github.com/klauspost/compress/internal/snapref/snappy.go new file mode 100644 index 00000000000..34d01f4aa63 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/snapref/snappy.go @@ -0,0 +1,98 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package snapref implements the Snappy compression format. It aims for very +// high speeds and reasonable compression. +// +// There are actually two Snappy formats: block and stream. They are related, +// but different: trying to decompress block-compressed data as a Snappy stream +// will fail, and vice versa. The block format is the Decode and Encode +// functions and the stream format is the Reader and Writer types. +// +// The block format, the more common case, is used when the complete size (the +// number of bytes) of the original data is known upfront, at the time +// compression starts. The stream format, also known as the framing format, is +// for when that isn't always true. +// +// The canonical, C++ implementation is at https://github.com/google/snappy and +// it only implements the block format. +package snapref + +import ( + "hash/crc32" +) + +/* +Each encoded block begins with the varint-encoded length of the decoded data, +followed by a sequence of chunks. Chunks begin and end on byte boundaries. The +first byte of each chunk is broken into its 2 least and 6 most significant bits +called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. +Zero means a literal tag. All other values mean a copy tag. + +For literal tags: + - If m < 60, the next 1 + m bytes are literal bytes. + - Otherwise, let n be the little-endian unsigned integer denoted by the next + m - 59 bytes. The next 1 + n bytes after that are literal bytes. + +For copy tags, length bytes are copied from offset bytes ago, in the style of +Lempel-Ziv compression algorithms. In particular: + - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). + The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 + of the offset. The next byte is bits 0-7 of the offset. + - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). + The length is 1 + m. The offset is the little-endian unsigned integer + denoted by the next 2 bytes. + - For l == 3, this tag is a legacy format that is no longer issued by most + encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in + [1, 65). The length is 1 + m. The offset is the little-endian unsigned + integer denoted by the next 4 bytes. +*/ +const ( + tagLiteral = 0x00 + tagCopy1 = 0x01 + tagCopy2 = 0x02 + tagCopy4 = 0x03 +) + +const ( + checksumSize = 4 + chunkHeaderSize = 4 + magicChunk = "\xff\x06\x00\x00" + magicBody + magicBody = "sNaPpY" + + // maxBlockSize is the maximum size of the input to encodeBlock. It is not + // part of the wire format per se, but some parts of the encoder assume + // that an offset fits into a uint16. + // + // Also, for the framing format (Writer type instead of Encode function), + // https://github.com/google/snappy/blob/master/framing_format.txt says + // that "the uncompressed data in a chunk must be no longer than 65536 + // bytes". + maxBlockSize = 65536 + + // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is + // hard coded to be a const instead of a variable, so that obufLen can also + // be a const. Their equivalence is confirmed by + // TestMaxEncodedLenOfMaxBlockSize. + maxEncodedLenOfMaxBlockSize = 76490 + + obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize + obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize +) + +const ( + chunkTypeCompressedData = 0x00 + chunkTypeUncompressedData = 0x01 + chunkTypePadding = 0xfe + chunkTypeStreamIdentifier = 0xff +) + +var crcTable = crc32.MakeTable(crc32.Castagnoli) + +// crc implements the checksum specified in section 3 of +// https://github.com/google/snappy/blob/master/framing_format.txt +func crc(b []byte) uint32 { + c := crc32.Update(0, crcTable, b) + return uint32(c>>15|c<<17) + 0xa282ead8 +} diff --git a/vendor/github.com/klauspost/compress/s2sx.mod b/vendor/github.com/klauspost/compress/s2sx.mod new file mode 100644 index 00000000000..2263853fcad --- /dev/null +++ b/vendor/github.com/klauspost/compress/s2sx.mod @@ -0,0 +1,4 @@ +module github.com/klauspost/compress + +go 1.16 + diff --git a/vendor/github.com/klauspost/compress/s2sx.sum b/vendor/github.com/klauspost/compress/s2sx.sum new file mode 100644 index 00000000000..e69de29bb2d diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index 787813fa9e2..c8f0f16fc1e 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -152,7 +152,7 @@ file out level insize outsize millis mb/s silesia.tar zskp 1 211947520 73101992 643 313.87 silesia.tar zskp 2 211947520 67504318 969 208.38 silesia.tar zskp 3 211947520 64595893 2007 100.68 -silesia.tar zskp 4 211947520 60995370 7691 26.28 +silesia.tar zskp 4 211947520 60995370 8825 22.90 cgo zstd: silesia.tar zstd 1 211947520 73605392 543 371.56 @@ -162,7 +162,7 @@ silesia.tar zstd 9 211947520 60212393 5063 39.92 gzip, stdlib/this package: silesia.tar gzstd 1 211947520 80007735 1654 122.21 -silesia.tar gzkp 1 211947520 80369488 1168 173.06 +silesia.tar gzkp 1 211947520 80136201 1152 175.45 GOB stream of binary data. Highly compressible. https://files.klauspost.com/compress/gob-stream.7z @@ -171,13 +171,15 @@ file out level insize outsize millis mb/s gob-stream zskp 1 1911399616 235022249 3088 590.30 gob-stream zskp 2 1911399616 205669791 3786 481.34 gob-stream zskp 3 1911399616 175034659 9636 189.17 -gob-stream zskp 4 1911399616 167273881 29337 62.13 +gob-stream zskp 4 1911399616 165609838 50369 36.19 + gob-stream zstd 1 1911399616 249810424 2637 691.26 gob-stream zstd 3 1911399616 208192146 3490 522.31 gob-stream zstd 6 1911399616 193632038 6687 272.56 gob-stream zstd 9 1911399616 177620386 16175 112.70 + gob-stream gzstd 1 1911399616 357382641 10251 177.82 -gob-stream gzkp 1 1911399616 362156523 5695 320.08 +gob-stream gzkp 1 1911399616 359753026 5438 335.20 The test data for the Large Text Compression Benchmark is the first 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. @@ -187,11 +189,13 @@ file out level insize outsize millis mb/s enwik9 zskp 1 1000000000 343848582 3609 264.18 enwik9 zskp 2 1000000000 317276632 5746 165.97 enwik9 zskp 3 1000000000 292243069 12162 78.41 -enwik9 zskp 4 1000000000 275241169 36430 26.18 +enwik9 zskp 4 1000000000 262183768 82837 11.51 + enwik9 zstd 1 1000000000 358072021 3110 306.65 enwik9 zstd 3 1000000000 313734672 4784 199.35 enwik9 zstd 6 1000000000 295138875 10290 92.68 enwik9 zstd 9 1000000000 278348700 28549 33.40 + enwik9 gzstd 1 1000000000 382578136 9604 99.30 enwik9 gzkp 1 1000000000 383825945 6544 145.73 @@ -202,13 +206,15 @@ file out level insize outsize millis mb/s github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40 github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96 github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75 -github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78 +github-june-2days-2019.json zskp 4 6273951764 470320075 170190 35.16 + github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00 github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57 github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18 github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16 + github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79 -github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03 +github-june-2days-2019.json gzkp 1 6273951764 1125417694 21788 274.61 VM Image, Linux mint with a few installed applications: https://files.klauspost.com/compress/rawstudio-mint14.7z @@ -217,13 +223,15 @@ file out level insize outsize millis mb/s rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84 rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07 rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08 -rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16 +rawstudio-mint14.tar zskp 4 8558382592 2965110639 857750 9.52 + rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27 rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92 rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77 rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91 + rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40 -rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49 +rawstudio-mint14.tar gzkp 1 8558382592 3962605659 45113 180.92 CSV data: https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst @@ -232,13 +240,15 @@ file out level insize outsize millis mb/s nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35 nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44 nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66 -nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10 +nyc-taxi-data-10M.csv zskp 4 3325605752 476268884 135958 23.33 + nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18 nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07 nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27 nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12 + nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83 -nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53 +nyc-taxi-data-10M.csv gzkp 1 3325605752 922257165 16780 189.00 ``` ## Decompressor diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 4d984c3b263..f430f58b572 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -260,9 +260,10 @@ func (d *Decoder) WriteTo(w io.Writer) (int64, error) { if len(d.current.b) > 0 { n2, err2 := w.Write(d.current.b) n += int64(n2) - if err2 != nil && d.current.err == nil { + if err2 != nil && (d.current.err == nil || d.current.err == io.EOF) { d.current.err = err2 - break + } else if n2 != len(d.current.b) { + d.current.err = io.ErrShortWrite } } if d.current.err != nil { diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go index 60f29864864..295cd602a42 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_base.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go @@ -38,8 +38,8 @@ func (e *fastBase) AppendCRC(dst []byte) []byte { // WindowSize returns the window size of the encoder, // or a window size small enough to contain the input size, if > 0. -func (e *fastBase) WindowSize(size int) int32 { - if size > 0 && size < int(e.maxMatchOff) { +func (e *fastBase) WindowSize(size int64) int32 { + if size > 0 && size < int64(e.maxMatchOff) { b := int32(1) << uint(bits.Len(uint(size))) // Keep minimum window. if b < 1024 { diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index b7d4b90047f..96028ecd836 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -5,22 +5,61 @@ package zstd import ( + "bytes" "fmt" - "math/bits" + + "github.com/klauspost/compress" ) const ( - bestLongTableBits = 20 // Bits used in the long match table + bestLongTableBits = 22 // Bits used in the long match table bestLongTableSize = 1 << bestLongTableBits // Size of the table + bestLongLen = 8 // Bytes used for table hash // Note: Increasing the short table bits or making the hash shorter // can actually lead to compression degradation since it will 'steal' more from the // long match table and match offsets are quite big. // This greatly depends on the type of input. - bestShortTableBits = 16 // Bits used in the short match table + bestShortTableBits = 18 // Bits used in the short match table bestShortTableSize = 1 << bestShortTableBits // Size of the table + bestShortLen = 4 // Bytes used for table hash + ) +type match struct { + offset int32 + s int32 + length int32 + rep int32 + est int32 +} + +const highScore = 25000 + +// estBits will estimate output bits from predefined tables. +func (m *match) estBits(bitsPerByte int32) { + mlc := mlCode(uint32(m.length - zstdMinMatch)) + var ofc uint8 + if m.rep < 0 { + ofc = ofCode(uint32(m.s-m.offset) + 3) + } else { + ofc = ofCode(uint32(m.rep)) + } + // Cost, excluding + ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc] + + // Add cost of match encoding... + m.est = int32(ofTT.outBits + mlTT.outBits) + m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16) + // Subtract savings compared to literal encoding... + m.est -= (m.length * bitsPerByte) >> 10 + if m.est > 0 { + // Unlikely gain.. + m.length = 0 + m.est = highScore + } +} + // bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. // The long match table contains the previous entry with the same hash, // effectively making it a "chain" of length 2. @@ -109,6 +148,14 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { return } + // Use this to estimate literal cost. + // Scaled by 10 bits. + bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src)) + // Huffman can never go < 1 bit/byte + if bitsPerByte < 1024 { + bitsPerByte = 1024 + } + // Override src src = e.hist sLimit := int32(len(src)) - inputMargin @@ -145,51 +192,49 @@ encodeLoop: panic("offset0 was 0") } - type match struct { - offset int32 - s int32 - length int32 - rep int32 - } - matchAt := func(offset int32, s int32, first uint32, rep int32) match { - if s-offset >= e.maxMatchOff || load3232(src, offset) != first { - return match{offset: offset, s: s} - } - return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} - } - bestOf := func(a, b match) match { - aScore := b.s - a.s + a.length - bScore := a.s - b.s + b.length - if a.rep < 0 { - aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8 - } - if b.rep < 0 { - bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8 - } - if aScore >= bScore { + if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 { return a } return b } const goodEnough = 100 - nextHashL := hash8(cv, bestLongTableBits) - nextHashS := hash4x64(cv, bestShortTableBits) + nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) + nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] + matchAt := func(offset int32, s int32, first uint32, rep int32) match { + if s-offset >= e.maxMatchOff || load3232(src, offset) != first { + return match{s: s, est: highScore} + } + if debugAsserts { + if !bytes.Equal(src[s:s+4], src[offset:offset+4]) { + panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) + } + } + m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} + m.estBits(bitsPerByte) + return m + } + best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1)) + if canRepeat && best.length < goodEnough { - best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1)) - best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2)) - best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3)) + cv32 := uint32(cv >> 8) + spp := s + 1 + best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1)) + best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2)) + best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3)) if best.length > 0 { - best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1)) - best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2)) - best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3)) + cv32 = uint32(cv >> 24) + spp += 2 + best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1)) + best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2)) + best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3)) } } // Load next and check... @@ -209,22 +254,28 @@ encodeLoop: } s++ - candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)] + candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)] cv = load6432(src, s) cv2 := load6432(src, s+1) - candidateL = e.longTable[hash8(cv, bestLongTableBits)] - candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)] + candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)] + candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] + // Short at s+1 best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) + // Long at s+1, s+2 best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1)) best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1)) - + if false { + // Short at s+3. + // Too often worse... + best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)) + } // See if we can find a better match by checking where the current best ends. // Use that offset to see if we can find a better full match. if sAt := best.s + best.length; sAt < sLimit { - nextHashL := hash8(load6432(src, sAt), bestLongTableBits) + nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) candidateEnd := e.longTable[nextHashL] if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 { bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1)) @@ -236,6 +287,12 @@ encodeLoop: } } + if debugAsserts { + if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) { + panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length])) + } + } + // We have a match, we can store the forward value if best.rep > 0 { s = best.s @@ -284,8 +341,8 @@ encodeLoop: off := index0 + e.cur for index0 < s-1 { cv0 := load6432(src, index0) - h0 := hash8(cv0, bestLongTableBits) - h1 := hash4x64(cv0, bestShortTableBits) + h0 := hashLen(cv0, bestLongTableBits, bestLongLen) + h1 := hashLen(cv0, bestShortTableBits, bestShortLen) e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} off++ @@ -311,7 +368,7 @@ encodeLoop: panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) } - if debugAsserts && canRepeat && int(offset1) > len(src) { + if debugAsserts && int(offset1) > len(src) { panic("invalid offset") } @@ -352,8 +409,8 @@ encodeLoop: // every entry for index0 < s-1 { cv0 := load6432(src, index0) - h0 := hash8(cv0, bestLongTableBits) - h1 := hash4x64(cv0, bestShortTableBits) + h0 := hashLen(cv0, bestLongTableBits, bestLongLen) + h1 := hashLen(cv0, bestShortTableBits, bestShortLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} @@ -374,8 +431,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash4x64(cv, bestShortTableBits) - nextHashL := hash8(cv, bestLongTableBits) + nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) + nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -425,7 +482,7 @@ func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { e.Encode(blk, src) } -// ResetDict will reset and set a dictionary if not nil +// Reset will reset and set a dictionary if not nil func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { e.resetBase(d, singleBlock) if d == nil { @@ -441,10 +498,10 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { const hashLog = bestShortTableBits cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hash4x64(cv, hashLog) // 0 -> 4 - nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5 - nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6 - nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7 + nextHash := hashLen(cv, hashLog, bestShortLen) // 0 -> 4 + nextHash1 := hashLen(cv>>8, hashLog, bestShortLen) // 1 -> 5 + nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6 + nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7 e.dictTable[nextHash] = prevEntry{ prev: e.dictTable[nextHash].offset, offset: i, @@ -472,7 +529,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { } if len(d.content) >= 8 { cv := load6432(d.content, 0) - h := hash8(cv, bestLongTableBits) + h := hashLen(cv, bestLongTableBits, bestLongLen) e.dictLongTable[h] = prevEntry{ offset: e.maxMatchOff, prev: e.dictLongTable[h].offset, @@ -482,7 +539,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { off := 8 // First to read for i := e.maxMatchOff + 1; i < end; i++ { cv = cv>>8 | (uint64(d.content[off]) << 56) - h := hash8(cv, bestLongTableBits) + h := hashLen(cv, bestLongTableBits, bestLongLen) e.dictLongTable[h] = prevEntry{ offset: i, prev: e.dictLongTable[h].offset, diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index eab7b5083e4..602c05ee0c4 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -9,6 +9,7 @@ import "fmt" const ( betterLongTableBits = 19 // Bits used in the long match table betterLongTableSize = 1 << betterLongTableBits // Size of the table + betterLongLen = 8 // Bytes used for table hash // Note: Increasing the short table bits or making the hash shorter // can actually lead to compression degradation since it will 'steal' more from the @@ -16,6 +17,7 @@ const ( // This greatly depends on the type of input. betterShortTableBits = 13 // Bits used in the short match table betterShortTableSize = 1 << betterShortTableBits // Size of the table + betterShortLen = 5 // Bytes used for table hash betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard @@ -154,8 +156,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hash5(cv, betterShortTableBits) - nextHashL := hash8(cv, betterLongTableBits) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -214,10 +216,10 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 } cv = load6432(src, s) @@ -275,10 +277,10 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 } cv = load6432(src, s) @@ -353,7 +355,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, betterLongTableBits) + nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) candidateL = e.longTable[nextHashL] coffsetL = candidateL.offset - e.cur @@ -413,8 +415,8 @@ encodeLoop: } // Try to find a better match by searching for a long match at the end of the current best match - if true && s+matched < sLimit { - nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) + if s+matched < sLimit { + nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) cv := load3232(src, s) candidateL := e.longTable[nextHashL] coffsetL := candidateL.offset - e.cur - matched @@ -495,10 +497,10 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 } @@ -516,8 +518,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv, betterShortTableBits) - nextHashL := hash8(cv, betterLongTableBits) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -672,8 +674,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hash5(cv, betterShortTableBits) - nextHashL := hash8(cv, betterLongTableBits) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -734,11 +736,11 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.markLongShardDirty(h0) - h1 := hash5(cv1, betterShortTableBits) + h1 := hashLen(cv1, betterShortTableBits, betterShortLen) e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} e.markShortShardDirty(h1) index0 += 2 @@ -798,11 +800,11 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.markLongShardDirty(h0) - h1 := hash5(cv1, betterShortTableBits) + h1 := hashLen(cv1, betterShortTableBits, betterShortLen) e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} e.markShortShardDirty(h1) index0 += 2 @@ -879,7 +881,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, betterLongTableBits) + nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) candidateL = e.longTable[nextHashL] coffsetL = candidateL.offset - e.cur @@ -940,7 +942,7 @@ encodeLoop: } // Try to find a better match by searching for a long match at the end of the current best match if s+matched < sLimit { - nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) + nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) cv := load3232(src, s) candidateL := e.longTable[nextHashL] coffsetL := candidateL.offset - e.cur - matched @@ -1021,11 +1023,11 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.markLongShardDirty(h0) - h1 := hash5(cv1, betterShortTableBits) + h1 := hashLen(cv1, betterShortTableBits, betterShortLen) e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} e.markShortShardDirty(h1) index0 += 2 @@ -1045,8 +1047,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv, betterShortTableBits) - nextHashL := hash8(cv, betterLongTableBits) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -1113,10 +1115,10 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { const hashLog = betterShortTableBits cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hash5(cv, hashLog) // 0 -> 4 - nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5 - nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6 - nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7 + nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4 + nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5 + nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6 + nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7 e.dictTable[nextHash] = tableEntry{ val: uint32(cv), offset: i, @@ -1145,7 +1147,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { } if len(d.content) >= 8 { cv := load6432(d.content, 0) - h := hash8(cv, betterLongTableBits) + h := hashLen(cv, betterLongTableBits, betterLongLen) e.dictLongTable[h] = prevEntry{ offset: e.maxMatchOff, prev: e.dictLongTable[h].offset, @@ -1155,7 +1157,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { off := 8 // First to read for i := e.maxMatchOff + 1; i < end; i++ { cv = cv>>8 | (uint64(d.content[off]) << 56) - h := hash8(cv, betterLongTableBits) + h := hashLen(cv, betterLongTableBits, betterLongLen) e.dictLongTable[h] = prevEntry{ offset: i, prev: e.dictLongTable[h].offset, diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index 96b21b90e81..d6b3104240b 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -10,6 +10,7 @@ const ( dFastLongTableBits = 17 // Bits used in the long match table dFastLongTableSize = 1 << dFastLongTableBits // Size of the table dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + dFastLongLen = 8 // Bytes used for table hash dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard @@ -17,6 +18,8 @@ const ( dFastShortTableBits = tableBits // Bits used in the short match table dFastShortTableSize = 1 << dFastShortTableBits // Size of the table dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + dFastShortLen = 5 // Bytes used for table hash + ) type doubleFastEncoder struct { @@ -124,8 +127,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -208,7 +211,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, dFastLongTableBits) + nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL = e.longTable[nextHashL] coffsetL = s - (candidateL.offset - e.cur) + checkAt @@ -304,16 +307,16 @@ encodeLoop: cv1 := load6432(src, index1) te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - e.longTable[hash8(cv0, dFastLongTableBits)] = te0 - e.longTable[hash8(cv1, dFastLongTableBits)] = te1 + e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0 + e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1 cv0 >>= 8 cv1 >>= 8 te0.offset++ te1.offset++ te0.val = uint32(cv0) te1.val = uint32(cv1) - e.table[hash5(cv0, dFastShortTableBits)] = te0 - e.table[hash5(cv1, dFastShortTableBits)] = te1 + e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0 + e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1 cv = load6432(src, s) @@ -330,8 +333,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -436,8 +439,8 @@ encodeLoop: var t int32 for { - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -521,7 +524,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, dFastLongTableBits) + nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL = e.longTable[nextHashL] coffsetL = s - (candidateL.offset - e.cur) + checkAt @@ -614,16 +617,16 @@ encodeLoop: cv1 := load6432(src, index1) te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - e.longTable[hash8(cv0, dFastLongTableBits)] = te0 - e.longTable[hash8(cv1, dFastLongTableBits)] = te1 + e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0 + e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1 cv0 >>= 8 cv1 >>= 8 te0.offset++ te1.offset++ te0.val = uint32(cv0) te1.val = uint32(cv1) - e.table[hash5(cv0, dFastShortTableBits)] = te0 - e.table[hash5(cv1, dFastShortTableBits)] = te1 + e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0 + e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1 cv = load6432(src, s) @@ -640,8 +643,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv1>>8, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -782,8 +785,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -868,7 +871,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, dFastLongTableBits) + nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL = e.longTable[nextHashL] coffsetL = s - (candidateL.offset - e.cur) + checkAt @@ -965,8 +968,8 @@ encodeLoop: cv1 := load6432(src, index1) te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - longHash1 := hash8(cv0, dFastLongTableBits) - longHash2 := hash8(cv0, dFastLongTableBits) + longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen) + longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen) e.longTable[longHash1] = te0 e.longTable[longHash2] = te1 e.markLongShardDirty(longHash1) @@ -977,8 +980,8 @@ encodeLoop: te1.offset++ te0.val = uint32(cv0) te1.val = uint32(cv1) - hashVal1 := hash5(cv0, dFastShortTableBits) - hashVal2 := hash5(cv1, dFastShortTableBits) + hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen) + hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen) e.table[hashVal1] = te0 e.markShardDirty(hashVal1) e.table[hashVal2] = te1 @@ -999,8 +1002,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -1071,14 +1074,14 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { } if len(d.content) >= 8 { cv := load6432(d.content, 0) - e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{ + e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{ val: uint32(cv), offset: e.maxMatchOff, } end := int32(len(d.content)) - 8 + e.maxMatchOff for i := e.maxMatchOff + 1; i < end; i++ { cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56) - e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{ + e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{ val: uint32(cv), offset: i, } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go index 2246d286dc6..f2502629bc5 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -11,12 +11,13 @@ import ( ) const ( - tableBits = 15 // Bits used in the table - tableSize = 1 << tableBits // Size of the table - tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table - tableShardSize = tableSize / tableShardCnt // Size of an individual shard - tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. - maxMatchLength = 131074 + tableBits = 15 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table + tableShardSize = tableSize / tableShardCnt // Size of an individual shard + tableFastHashLen = 6 + tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + maxMatchLength = 131074 ) type tableEntry struct { @@ -122,8 +123,8 @@ encodeLoop: panic("offset0 was 0") } - nextHash := hash6(cv, hashLog) - nextHash2 := hash6(cv>>8, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) + nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) candidate := e.table[nextHash] candidate2 := e.table[nextHash2] repIndex := s - offset1 + 2 @@ -301,7 +302,7 @@ encodeLoop: } // Store this, since we have it. - nextHash := hash6(cv, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} seq.matchLen = uint32(l) - zstdMinMatch seq.litLen = 0 @@ -405,8 +406,8 @@ encodeLoop: // By not using them for the first 3 matches for { - nextHash := hash6(cv, hashLog) - nextHash2 := hash6(cv>>8, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) + nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) candidate := e.table[nextHash] candidate2 := e.table[nextHash2] repIndex := s - offset1 + 2 @@ -589,7 +590,7 @@ encodeLoop: } // Store this, since we have it. - nextHash := hash6(cv, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} seq.matchLen = uint32(l) - zstdMinMatch seq.litLen = 0 @@ -715,8 +716,8 @@ encodeLoop: panic("offset0 was 0") } - nextHash := hash6(cv, hashLog) - nextHash2 := hash6(cv>>8, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) + nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) candidate := e.table[nextHash] candidate2 := e.table[nextHash2] repIndex := s - offset1 + 2 @@ -896,7 +897,7 @@ encodeLoop: } // Store this, since we have it. - nextHash := hash6(cv, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} e.markShardDirty(nextHash) seq.matchLen = uint32(l) - zstdMinMatch @@ -957,9 +958,9 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { const hashLog = tableBits cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hash6(cv, hashLog) // 0 -> 5 - nextHash1 := hash6(cv>>8, hashLog) // 1 -> 6 - nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7 + nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 5 + nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 6 + nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7 e.dictTable[nextHash] = tableEntry{ val: uint32(cv), offset: i, diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index ea85548fc9a..e6e315969b0 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -33,7 +33,7 @@ type encoder interface { Block() *blockEnc CRC() *xxhash.Digest AppendCRC([]byte) []byte - WindowSize(size int) int32 + WindowSize(size int64) int32 UseBlock(*blockEnc) Reset(d *dict, singleBlock bool) } @@ -48,6 +48,8 @@ type encoderState struct { err error writeErr error nWritten int64 + nInput int64 + frameContentSize int64 headerWritten bool eofWritten bool fullFrameWritten bool @@ -120,7 +122,21 @@ func (e *Encoder) Reset(w io.Writer) { s.w = w s.err = nil s.nWritten = 0 + s.nInput = 0 s.writeErr = nil + s.frameContentSize = 0 +} + +// ResetContentSize will reset and set a content size for the next stream. +// If the bytes written does not match the size given an error will be returned +// when calling Close(). +// This is removed when Reset is called. +// Sizes <= 0 results in no content size set. +func (e *Encoder) ResetContentSize(w io.Writer, size int64) { + e.Reset(w) + if size >= 0 { + e.state.frameContentSize = size + } } // Write data to the encoder. @@ -190,6 +206,7 @@ func (e *Encoder) nextBlock(final bool) error { return s.err } s.nWritten += int64(n2) + s.nInput += int64(len(s.filling)) s.current = s.current[:0] s.filling = s.filling[:0] s.headerWritten = true @@ -200,8 +217,8 @@ func (e *Encoder) nextBlock(final bool) error { var tmp [maxHeaderSize]byte fh := frameHeader{ - ContentSize: 0, - WindowSize: uint32(s.encoder.WindowSize(0)), + ContentSize: uint64(s.frameContentSize), + WindowSize: uint32(s.encoder.WindowSize(s.frameContentSize)), SingleSegment: false, Checksum: e.o.crc, DictID: e.o.dict.ID(), @@ -243,6 +260,7 @@ func (e *Encoder) nextBlock(final bool) error { // Move blocks forward. s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current + s.nInput += int64(len(s.current)) s.wg.Add(1) go func(src []byte) { if debugEncoder { @@ -394,6 +412,11 @@ func (e *Encoder) Close() error { if err != nil { return err } + if s.frameContentSize > 0 { + if s.nInput != s.frameContentSize { + return fmt.Errorf("frame content size %d given, but %d bytes was written", s.frameContentSize, s.nInput) + } + } if e.state.fullFrameWritten { return s.err } @@ -470,7 +493,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } fh := frameHeader{ ContentSize: uint64(len(src)), - WindowSize: uint32(enc.WindowSize(len(src))), + WindowSize: uint32(enc.WindowSize(int64(len(src)))), SingleSegment: single, Checksum: e.o.crc, DictID: e.o.dict.ID(), diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 16d4ab63c19..7d29e1d689e 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -189,7 +189,7 @@ func EncoderLevelFromZstd(level int) EncoderLevel { case level >= 6 && level < 10: return SpeedBetterCompression case level >= 10: - return SpeedBetterCompression + return SpeedBestCompression } return SpeedDefault } diff --git a/vendor/github.com/klauspost/compress/zstd/hash.go b/vendor/github.com/klauspost/compress/zstd/hash.go index 4a752067fc9..cf33f29a1b4 100644 --- a/vendor/github.com/klauspost/compress/zstd/hash.go +++ b/vendor/github.com/klauspost/compress/zstd/hash.go @@ -13,24 +13,24 @@ const ( prime8bytes = 0xcf1bbcdcb7a56463 ) -// hashLen returns a hash of the lowest l bytes of u for a size size of h bytes. -// l must be >=4 and <=8. Any other value will return hash for 4 bytes. -// h should always be <32. -// Preferably h and l should be a constant. -// FIXME: This does NOT get resolved, if 'mls' is constant, -// so this cannot be used. -func hashLen(u uint64, hashLog, mls uint8) uint32 { +// hashLen returns a hash of the lowest mls bytes of with length output bits. +// mls must be >=3 and <=8. Any other value will return hash for 4 bytes. +// length should always be < 32. +// Preferably length and mls should be a constant for inlining. +func hashLen(u uint64, length, mls uint8) uint32 { switch mls { + case 3: + return (uint32(u<<8) * prime3bytes) >> (32 - length) case 5: - return hash5(u, hashLog) + return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length)) case 6: - return hash6(u, hashLog) + return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length)) case 7: - return hash7(u, hashLog) + return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length)) case 8: - return hash8(u, hashLog) + return uint32((u * prime8bytes) >> (64 - length)) default: - return hash4x64(u, hashLog) + return (uint32(u) * prime4bytes) >> (32 - length) } } @@ -39,39 +39,3 @@ func hashLen(u uint64, hashLog, mls uint8) uint32 { func hash3(u uint32, h uint8) uint32 { return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31) } - -// hash4 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4(u uint32, h uint8) uint32 { - return (u * prime4bytes) >> ((32 - h) & 31) -} - -// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4x64(u uint64, h uint8) uint32 { - return (uint32(u) * prime4bytes) >> ((32 - h) & 31) -} - -// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash5(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63)) -} - -// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash6(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63)) -} - -// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash7(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63)) -} - -// hash8 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash8(u uint64, h uint8) uint32 { - return uint32((u * prime8bytes) >> ((64 - h) & 63)) -} diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go index 35318d7c46c..3ddbd5c0b0a 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go @@ -1,6 +1,5 @@ -// +build !appengine -// +build gc -// +build !purego +//go:build !appengine && gc && !purego +// +build !appengine,gc,!purego package xxhash diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go index 4a5a821603e..1f52f296e71 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go @@ -1,3 +1,4 @@ +//go:build !amd64 || appengine || !gc || purego // +build !amd64 appengine !gc purego package xxhash diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index 0372b1714a7..9e1baad73be 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -10,8 +10,8 @@ import ( "hash/crc32" "io" - "github.com/golang/snappy" "github.com/klauspost/compress/huff0" + snappy "github.com/klauspost/compress/internal/snapref" ) const ( diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go index 9325b928ae2..967f29b3120 100644 --- a/vendor/github.com/klauspost/compress/zstd/zip.go +++ b/vendor/github.com/klauspost/compress/zstd/zip.go @@ -64,8 +64,9 @@ func (r *pooledZipReader) Close() error { } type pooledZipWriter struct { - mu sync.Mutex // guards Close and Read - enc *Encoder + mu sync.Mutex // guards Close and Read + enc *Encoder + pool *sync.Pool } func (w *pooledZipWriter) Write(p []byte) (n int, err error) { @@ -83,7 +84,7 @@ func (w *pooledZipWriter) Close() error { var err error if w.enc != nil { err = w.enc.Close() - zipReaderPool.Put(w.enc) + w.pool.Put(w.enc) w.enc = nil } return err @@ -104,7 +105,7 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) { return nil, err } } - return &pooledZipWriter{enc: enc}, nil + return &pooledZipWriter{enc: enc, pool: &pool}, nil } } diff --git a/vendor/modules.txt b/vendor/modules.txt index 5e661c2b6b7..6052a57ebcb 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -751,12 +751,14 @@ github.com/jsternberg/zap-logfmt # github.com/julienschmidt/httprouter v1.3.0 ## explicit; go 1.7 github.com/julienschmidt/httprouter -# github.com/klauspost/compress v1.13.1 -## explicit; go 1.13 +# github.com/klauspost/compress v1.13.5 +## explicit; go 1.15 +github.com/klauspost/compress github.com/klauspost/compress/flate github.com/klauspost/compress/fse github.com/klauspost/compress/gzip github.com/klauspost/compress/huff0 +github.com/klauspost/compress/internal/snapref github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd/internal/xxhash # github.com/klauspost/cpuid v1.3.1