Skip to content

Commit

Permalink
Optimize single block encodes (#199)
Browse files Browse the repository at this point in the history
* Optimize single block encodes
* Blocks without history cannot have invalid repeat codes.
* Remove bounds check.
* Don't rank empty part.
* Big speedup on huff0 encoding.
  • Loading branch information
klauspost authored Dec 28, 2019
1 parent 5c5a6c1 commit bf8e070
Show file tree
Hide file tree
Showing 7 changed files with 709 additions and 54 deletions.
13 changes: 12 additions & 1 deletion huff0/bitwriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,25 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
b.nBits += bits
}

// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
// encSymbol will add up to 16 bits. value may not contain more set bits than indicated.
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
func (b *bitWriter) encSymbol(ct cTable, symbol byte) {
enc := ct[symbol]
b.bitContainer |= uint64(enc.val) << (b.nBits & 63)
b.nBits += enc.nBits
}

// encTwoSymbols will add up to 32 bits. value may not contain more set bits than indicated.
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
encA := ct[av]
encB := ct[bv]
sh := b.nBits & 63
combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63))
b.bitContainer |= combined << sh
b.nBits += encA.nBits + encB.nBits
}

// addBits16ZeroNC will add up to 16 bits.
// It will not check if there is space for them,
// so the caller must ensure that it has flushed recently.
Expand Down
36 changes: 17 additions & 19 deletions huff0/compress.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,28 +163,23 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
for i := len(src) & 3; i > 0; i-- {
bw.encSymbol(cTable, src[n+i-1])
}
n -= 4
if s.actualTableLog <= 8 {
n -= 4
for ; n >= 0; n -= 4 {
tmp := src[n : n+4]
// tmp should be len 4
bw.flush32()
bw.encSymbol(cTable, tmp[3])
bw.encSymbol(cTable, tmp[2])
bw.encSymbol(cTable, tmp[1])
bw.encSymbol(cTable, tmp[0])
bw.encTwoSymbols(cTable, tmp[3], tmp[2])
bw.encTwoSymbols(cTable, tmp[1], tmp[0])
}
} else {
n -= 4
for ; n >= 0; n -= 4 {
tmp := src[n : n+4]
// tmp should be len 4
bw.flush32()
bw.encSymbol(cTable, tmp[3])
bw.encSymbol(cTable, tmp[2])
bw.encTwoSymbols(cTable, tmp[3], tmp[2])
bw.flush32()
bw.encSymbol(cTable, tmp[1])
bw.encSymbol(cTable, tmp[0])
bw.encTwoSymbols(cTable, tmp[1], tmp[0])
}
}
err := bw.close()
Expand Down Expand Up @@ -439,7 +434,7 @@ func (s *Scratch) buildCTable() error {
return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax)
}
var nbPerRank [tableLogMax + 1]uint16
var valPerRank [tableLogMax + 1]uint16
var valPerRank [16]uint16
for _, v := range huffNode[:nonNullRank+1] {
nbPerRank[v.nbBits]++
}
Expand All @@ -455,16 +450,17 @@ func (s *Scratch) buildCTable() error {
}

// push nbBits per symbol, symbol order
// TODO: changed `s.symbolLen` -> `nonNullRank+1` (micro-opt)
for _, v := range huffNode[:nonNullRank+1] {
s.cTable[v.symbol].nBits = v.nbBits
}

// assign value within rank, symbol order
for n, val := range s.cTable[:s.symbolLen] {
v := valPerRank[val.nBits]
s.cTable[n].val = v
valPerRank[val.nBits] = v + 1
t := s.cTable[:s.symbolLen]
for n, val := range t {
nbits := val.nBits & 15
v := valPerRank[nbits]
t[n].val = v
valPerRank[nbits] = v + 1
}

return nil
Expand All @@ -488,10 +484,12 @@ func (s *Scratch) huffSort() {
r := highBit32(v+1) & 31
rank[r].base++
}
for n := 30; n > 0; n-- {
// maxBitLength is log2(BlockSizeMax) + 1
const maxBitLength = 18 + 1
for n := maxBitLength; n > 0; n-- {
rank[n-1].base += rank[n].base
}
for n := range rank[:] {
for n := range rank[:maxBitLength] {
rank[n].current = rank[n].base
}
for n, c := range s.count[:s.symbolLen] {
Expand All @@ -510,7 +508,7 @@ func (s *Scratch) huffSort() {
}

func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
maxNbBits := s.TableLog
maxNbBits := s.actualTableLog
huffNode := s.nodes[1 : huffNodesLen+1]
//huffNode = huffNode[: huffNodesLen]

Expand Down
25 changes: 21 additions & 4 deletions zstd/blockenc.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,20 @@ func (b *blockEnc) encodeRaw(a []byte) {
}
}

// encodeRaw can be used to set the output to a raw representation of supplied bytes.
func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
var bh blockHeader
bh.setLast(b.last)
bh.setSize(uint32(len(src)))
bh.setType(blockTypeRaw)
dst = bh.appendTo(dst)
dst = append(dst, src...)
if debug {
println("Adding RAW block, length", len(src))
}
return dst
}

// encodeLits can be used if the block is only litLen.
func (b *blockEnc) encodeLits(raw bool) error {
var bh blockHeader
Expand Down Expand Up @@ -437,7 +451,7 @@ func fuzzFseEncoder(data []byte) int {
return 1
}

// encode will encode the block and put the output in b.output.
// encode will encode the block and append the output in b.output.
func (b *blockEnc) encode(raw bool) error {
if len(b.sequences) == 0 {
return b.encodeLits(raw)
Expand All @@ -451,6 +465,8 @@ func (b *blockEnc) encode(raw bool) error {
var lh literalsHeader
bh.setLast(b.last)
bh.setType(blockTypeCompressed)
// Store offset of the block header. Needed when we know the size.
bhOffset := len(b.output)
b.output = bh.appendTo(b.output)

var (
Expand All @@ -468,6 +484,7 @@ func (b *blockEnc) encode(raw bool) error {
} else {
err = huff0.ErrIncompressible
}

switch err {
case huff0.ErrIncompressible:
lh.setType(literalsBlockRaw)
Expand Down Expand Up @@ -735,18 +752,18 @@ func (b *blockEnc) encode(raw bool) error {
}
b.output = wr.out

if len(b.output)-3 >= b.size {
if len(b.output)-3-bhOffset >= b.size {
// Maybe even add a bigger margin.
b.litEnc.Reuse = huff0.ReusePolicyNone
return errIncompressible
}

// Size is output minus block header.
bh.setSize(uint32(len(b.output)) - 3)
bh.setSize(uint32(len(b.output)-bhOffset) - 3)
if debug {
println("Rewriting block header", bh)
}
_ = bh.appendTo(b.output[:0])
_ = bh.appendTo(b.output[bhOffset:bhOffset])
b.coders.setPrev(llEnc, mlEnc, ofEnc)
return nil
}
Expand Down
Loading

0 comments on commit bf8e070

Please sign in to comment.