diff --git a/README.md b/README.md index 15a8244b82..b612a1d7ed 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,8 @@ It offers slightly better compression at lower compression settings, and up to 3 [![Sourcegraph Badge](https://sourcegraph.com/github.com/klauspost/compress/-/badge.svg)](https://sourcegraph.com/github.com/klauspost/compress?badge) # changelog -* Aug4, 2019 (v1.7.5): Better literal compression. [#146](https://github.com/klauspost/compress/pull/146) +* Aug 14, 2019: zstd: Skip incompressible data 2x faster. [#147](https://github.com/klauspost/compress/pull/147) +* Aug 4, 2019 (v1.7.5): Better literal compression. [#146](https://github.com/klauspost/compress/pull/146) * Aug 4, 2019: Faster zstd compression. [#143](https://github.com/klauspost/compress/pull/143) [#144](https://github.com/klauspost/compress/pull/144) * Aug 4, 2019: Faster zstd decompression. [#145](https://github.com/klauspost/compress/pull/145) [#143](https://github.com/klauspost/compress/pull/143) [#142](https://github.com/klauspost/compress/pull/142) * July 15, 2019 (v1.7.4): Fix double EOF block in rare cases on zstd encoder. diff --git a/zstd/encoder.go b/zstd/encoder.go index f1dfef800a..b7011be29a 100644 --- a/zstd/encoder.go +++ b/zstd/encoder.go @@ -257,7 +257,12 @@ func (e *Encoder) nextBlock(final bool) error { } s.wWg.Done() }() - err := blk.encode() + err := errIncompressible + // If we got the exact same number of literals as input, + // assume the literals cannot be compressed. + if len(src) != len(blk.literals) || len(src) != e.o.blockSize { + err = blk.encode() + } switch err { case errIncompressible: if debug { @@ -444,7 +449,13 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if len(src) == 0 { blk.last = true } - err := blk.encode() + err := errIncompressible + // If we got the exact same number of literals as input, + // assume the literals cannot be compressed. + if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize { + err = blk.encode() + } + switch err { case errIncompressible: if debug { diff --git a/zstd/encoder_test.go b/zstd/encoder_test.go index 8f823f45ec..f1c06b1af0 100644 --- a/zstd/encoder_test.go +++ b/zstd/encoder_test.go @@ -723,3 +723,103 @@ func BenchmarkEncoder_EncodeAllPi(b *testing.B) { } } } + +func BenchmarkRandomEncodeAllFastest(b *testing.B) { + rng := rand.New(rand.NewSource(1)) + data := make([]byte, 10<<20) + for i := range data { + data[i] = uint8(rng.Intn(256)) + } + enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(1)) + dst := enc.EncodeAll(data, nil) + wantSize := len(dst) + b.ResetTimer() + b.ReportAllocs() + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + dst := enc.EncodeAll(data, dst[:0]) + if len(dst) != wantSize { + b.Fatal(len(dst), "!=", wantSize) + } + } +} + +func BenchmarkRandomEncodeAllDefault(b *testing.B) { + rng := rand.New(rand.NewSource(1)) + data := make([]byte, 10<<20) + for i := range data { + data[i] = uint8(rng.Intn(256)) + } + enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1)) + dst := enc.EncodeAll(data, nil) + wantSize := len(dst) + b.ResetTimer() + b.ReportAllocs() + b.SetBytes(int64(len(data))) + for i := 0; i < b.N; i++ { + dst := enc.EncodeAll(data, dst[:0]) + if len(dst) != wantSize { + b.Fatal(len(dst), "!=", wantSize) + } + } +} + +func BenchmarkRandomEncoderFastest(b *testing.B) { + rng := rand.New(rand.NewSource(1)) + data := make([]byte, 10<<20) + for i := range data { + data[i] = uint8(rng.Intn(256)) + } + wantSize := int64(len(data)) + enc, _ := NewWriter(ioutil.Discard, WithEncoderLevel(SpeedFastest)) + n, err := io.Copy(enc, bytes.NewBuffer(data)) + if err != nil { + b.Fatal(err) + } + if n != wantSize { + b.Fatal(n, "!=", wantSize) + } + b.ResetTimer() + b.ReportAllocs() + b.SetBytes(wantSize) + for i := 0; i < b.N; i++ { + enc.Reset(ioutil.Discard) + n, err := io.Copy(enc, bytes.NewBuffer(data)) + if err != nil { + b.Fatal(err) + } + if n != wantSize { + b.Fatal(n, "!=", wantSize) + } + } +} + +func BenchmarkRandomEncoderDefault(b *testing.B) { + rng := rand.New(rand.NewSource(1)) + data := make([]byte, 10<<20) + for i := range data { + data[i] = uint8(rng.Intn(256)) + } + wantSize := int64(len(data)) + enc, _ := NewWriter(ioutil.Discard, WithEncoderLevel(SpeedDefault)) + n, err := io.Copy(enc, bytes.NewBuffer(data)) + if err != nil { + b.Fatal(err) + } + if n != wantSize { + b.Fatal(n, "!=", wantSize) + } + b.ResetTimer() + b.ReportAllocs() + b.SetBytes(wantSize) + for i := 0; i < b.N; i++ { + enc.Reset(ioutil.Discard) + n, err := io.Copy(enc, bytes.NewBuffer(data)) + if err != nil { + b.Fatal(err) + } + if n != wantSize { + b.Fatal(n, "!=", wantSize) + } + } +}