Skip to content

Commit

Permalink
zstd: Use single stream literal encoding (#146)
Browse files Browse the repository at this point in the history
Use single stream literal encoding for small blocks.

Saves a few bytes per block and tiny bit faster.
  • Loading branch information
klauspost committed Aug 4, 2019
1 parent 763de0b commit 606f373
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 13 deletions.
60 changes: 60 additions & 0 deletions huff0/compress_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -519,3 +519,63 @@ func BenchmarkCompress4XReusePrefer(b *testing.B) {
})
}
}

func BenchmarkCompress1XSizes(b *testing.B) {
test := testfiles[0]
sizes := []int{1e2, 2e2, 5e2, 1e3, 5e3, 1e4, 5e4}
for _, size := range sizes {
b.Run(test.name+"-"+fmt.Sprint(size), func(b *testing.B) {
var s Scratch
s.Reuse = ReusePolicyNone
buf0, err := test.fn()
if err != nil {
b.Fatal(err)
}
buf0 = buf0[:size]
_, re, err := Compress1X(buf0, &s)
if err != test.err1X {
b.Fatal("unexpected error:", err)
}
//b.Log("Size:", len(o))
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(int64(len(buf0)))
for i := 0; i < b.N; i++ {
_, re, _ = Compress1X(buf0, &s)
if re {
b.Fatal("reused")
}
}
})
}
}

func BenchmarkCompress4XSizes(b *testing.B) {
test := testfiles[0]
sizes := []int{1e2, 2e2, 5e2, 1e3, 5e3, 1e4, 5e4}
for _, size := range sizes {
b.Run(test.name+"-"+fmt.Sprint(size), func(b *testing.B) {
var s Scratch
s.Reuse = ReusePolicyNone
buf0, err := test.fn()
if err != nil {
b.Fatal(err)
}
buf0 = buf0[:size]
_, re, err := Compress4X(buf0, &s)
if err != test.err1X {
b.Fatal("unexpected error:", err)
}
//b.Log("Size:", len(o))
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(int64(len(buf0)))
for i := 0; i < b.N; i++ {
_, re, _ = Compress4X(buf0, &s)
if re {
b.Fatal("reused")
}
}
})
}
}
60 changes: 47 additions & 13 deletions zstd/blockenc.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,17 @@ func (h *literalsHeader) setSize(regenLen int) {
}

// setSizes will set the size of a compressed literals section and the input length.
func (h *literalsHeader) setSizes(compLen, inLen int) {
func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
// Only retain 2 bits
const mask = 3
lh := uint64(*h & mask)
switch {
case compBits <= 10 && inBits <= 10:
lh |= (1 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
if !single {
lh |= 1 << 2
}
lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
if debug {
const mmask = (1 << 24) - 1
n := (lh >> 4) & mmask
Expand All @@ -175,8 +178,14 @@ func (h *literalsHeader) setSizes(compLen, inLen int) {
}
case compBits <= 14 && inBits <= 14:
lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
if single {
panic("single stream used with more than 10 bits length.")
}
case compBits <= 18 && inBits <= 18:
lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
if single {
panic("single stream used with more than 10 bits length.")
}
default:
panic("internal error: block too big")
}
Expand Down Expand Up @@ -307,12 +316,30 @@ func (b *blockEnc) encodeLits() error {
return nil
}

// TODO: Switch to 1X when less than x bytes.
out, reUsed, err := huff0.Compress4X(b.literals, b.litEnc)
// Bail out of compression is too little.
if len(out) > (len(b.literals) - len(b.literals)>>4) {
var (
out []byte
reUsed, single bool
err error
)
if len(b.literals) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
// Bail out of compression is too little.
err = huff0.ErrIncompressible
}
} else if len(b.literals) > 32 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
// Bail out of compression is too little.
err = huff0.ErrIncompressible
}
} else {
err = huff0.ErrIncompressible
}

switch err {
case huff0.ErrIncompressible:
if debug {
Expand Down Expand Up @@ -351,7 +378,7 @@ func (b *blockEnc) encodeLits() error {
lh.setType(literalsBlockCompressed)
}
// Set sizes
lh.setSizes(len(out), len(b.literals))
lh.setSizes(len(out), len(b.literals), single)
bh.setSize(uint32(len(out) + lh.size() + 1))

// Write block headers.
Expand Down Expand Up @@ -381,16 +408,23 @@ func (b *blockEnc) encode() error {
b.output = bh.appendTo(b.output)

var (
out []byte
reUsed bool
err error
out []byte
reUsed, single bool
err error
)
if len(b.literals) > 32 {
// TODO: Switch to 1X on small blocks.
if len(b.literals) >= 1024 {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else if len(b.literals) > 32 {
// Use 1 stream
single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
if len(out) > len(b.literals)-len(b.literals)>>4 {
err = huff0.ErrIncompressible
}
} else {
err = huff0.ErrIncompressible
}
Expand Down Expand Up @@ -435,7 +469,7 @@ func (b *blockEnc) encode() error {
}
}
}
lh.setSizes(len(out), len(b.literals))
lh.setSizes(len(out), len(b.literals), single)
if debug {
printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
println("Adding literal header:", lh)
Expand Down

0 comments on commit 606f373

Please sign in to comment.