Skip to content

Commit

Permalink
zstd: Skip random data 2x faster (#147)
Browse files Browse the repository at this point in the history
If we did not get a single match in a full block, assume that the data cannot be entropy compressed.

It is very likely that the data is random, and in the rare case it is not, we can live with it.

Before:
```
BenchmarkRandomEncodeAllFastest-12           100          10475190 ns/op        1001.01 MB/s           0 B/op          0 allocs/op
BenchmarkRandomEncodeAllDefault-12           100          11075653 ns/op         946.74 MB/s           0 B/op          0 allocs/op
BenchmarkRandomEncoderFastest-12             200           8126950 ns/op        1290.25 MB/s         139 B/op          2 allocs/op
BenchmarkRandomEncoderDefault-12             200           8059989 ns/op        1300.96 MB/s          90 B/op          2 allocs/op
```

After:
```
BenchmarkRandomEncodeAllFastest-12           300           4425967 ns/op        2369.15 MB/s           0 B/op          0 allocs/op
BenchmarkRandomEncodeAllDefault-12           300           5108791 ns/op        2052.49 MB/s           0 B/op          0 allocs/op
BenchmarkRandomEncoderFastest-12             300           5085627 ns/op        2061.84 MB/s          89 B/op          2 allocs/op
BenchmarkRandomEncoderDefault-12             300           5947031 ns/op        1763.19 MB/s          65 B/op          2 allocs/op
```
  • Loading branch information
klauspost committed Aug 14, 2019
1 parent 730c27a commit f82c96c
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 3 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ It offers slightly better compression at lower compression settings, and up to 3
[![Sourcegraph Badge](https://sourcegraph.com/github.com/klauspost/compress/-/badge.svg)](https://sourcegraph.com/github.com/klauspost/compress?badge)

# changelog
* Aug4, 2019 (v1.7.5): Better literal compression. [#146](https://github.com/klauspost/compress/pull/146)
* Aug 14, 2019: zstd: Skip incompressible data 2x faster. [#147](https://github.com/klauspost/compress/pull/147)
* Aug 4, 2019 (v1.7.5): Better literal compression. [#146](https://github.com/klauspost/compress/pull/146)
* Aug 4, 2019: Faster zstd compression. [#143](https://github.com/klauspost/compress/pull/143) [#144](https://github.com/klauspost/compress/pull/144)
* Aug 4, 2019: Faster zstd decompression. [#145](https://github.com/klauspost/compress/pull/145) [#143](https://github.com/klauspost/compress/pull/143) [#142](https://github.com/klauspost/compress/pull/142)
* July 15, 2019 (v1.7.4): Fix double EOF block in rare cases on zstd encoder.
Expand Down
15 changes: 13 additions & 2 deletions zstd/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,12 @@ func (e *Encoder) nextBlock(final bool) error {
}
s.wWg.Done()
}()
err := blk.encode()
err := errIncompressible
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
err = blk.encode()
}
switch err {
case errIncompressible:
if debug {
Expand Down Expand Up @@ -444,7 +449,13 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
if len(src) == 0 {
blk.last = true
}
err := blk.encode()
err := errIncompressible
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
err = blk.encode()
}

switch err {
case errIncompressible:
if debug {
Expand Down
100 changes: 100 additions & 0 deletions zstd/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -723,3 +723,103 @@ func BenchmarkEncoder_EncodeAllPi(b *testing.B) {
}
}
}

func BenchmarkRandomEncodeAllFastest(b *testing.B) {
rng := rand.New(rand.NewSource(1))
data := make([]byte, 10<<20)
for i := range data {
data[i] = uint8(rng.Intn(256))
}
enc, _ := NewWriter(nil, WithEncoderLevel(SpeedFastest), WithEncoderConcurrency(1))
dst := enc.EncodeAll(data, nil)
wantSize := len(dst)
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
dst := enc.EncodeAll(data, dst[:0])
if len(dst) != wantSize {
b.Fatal(len(dst), "!=", wantSize)
}
}
}

func BenchmarkRandomEncodeAllDefault(b *testing.B) {
rng := rand.New(rand.NewSource(1))
data := make([]byte, 10<<20)
for i := range data {
data[i] = uint8(rng.Intn(256))
}
enc, _ := NewWriter(nil, WithEncoderLevel(SpeedDefault), WithEncoderConcurrency(1))
dst := enc.EncodeAll(data, nil)
wantSize := len(dst)
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
dst := enc.EncodeAll(data, dst[:0])
if len(dst) != wantSize {
b.Fatal(len(dst), "!=", wantSize)
}
}
}

func BenchmarkRandomEncoderFastest(b *testing.B) {
rng := rand.New(rand.NewSource(1))
data := make([]byte, 10<<20)
for i := range data {
data[i] = uint8(rng.Intn(256))
}
wantSize := int64(len(data))
enc, _ := NewWriter(ioutil.Discard, WithEncoderLevel(SpeedFastest))
n, err := io.Copy(enc, bytes.NewBuffer(data))
if err != nil {
b.Fatal(err)
}
if n != wantSize {
b.Fatal(n, "!=", wantSize)
}
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(wantSize)
for i := 0; i < b.N; i++ {
enc.Reset(ioutil.Discard)
n, err := io.Copy(enc, bytes.NewBuffer(data))
if err != nil {
b.Fatal(err)
}
if n != wantSize {
b.Fatal(n, "!=", wantSize)
}
}
}

func BenchmarkRandomEncoderDefault(b *testing.B) {
rng := rand.New(rand.NewSource(1))
data := make([]byte, 10<<20)
for i := range data {
data[i] = uint8(rng.Intn(256))
}
wantSize := int64(len(data))
enc, _ := NewWriter(ioutil.Discard, WithEncoderLevel(SpeedDefault))
n, err := io.Copy(enc, bytes.NewBuffer(data))
if err != nil {
b.Fatal(err)
}
if n != wantSize {
b.Fatal(n, "!=", wantSize)
}
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(wantSize)
for i := 0; i < b.N; i++ {
enc.Reset(ioutil.Discard)
n, err := io.Copy(enc, bytes.NewBuffer(data))
if err != nil {
b.Fatal(err)
}
if n != wantSize {
b.Fatal(n, "!=", wantSize)
}
}
}

0 comments on commit f82c96c

Please sign in to comment.