Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize single block encodes #199

Merged
merged 5 commits into from
Dec 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion huff0/bitwriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,25 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
b.nBits += bits
}

// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
// encSymbol will add up to 16 bits. value may not contain more set bits than indicated.
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
func (b *bitWriter) encSymbol(ct cTable, symbol byte) {
enc := ct[symbol]
b.bitContainer |= uint64(enc.val) << (b.nBits & 63)
b.nBits += enc.nBits
}

// encTwoSymbols will add up to 32 bits. value may not contain more set bits than indicated.
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
encA := ct[av]
encB := ct[bv]
sh := b.nBits & 63
combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63))
b.bitContainer |= combined << sh
b.nBits += encA.nBits + encB.nBits
}

// addBits16ZeroNC will add up to 16 bits.
// It will not check if there is space for them,
// so the caller must ensure that it has flushed recently.
Expand Down
36 changes: 17 additions & 19 deletions huff0/compress.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,28 +163,23 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
for i := len(src) & 3; i > 0; i-- {
bw.encSymbol(cTable, src[n+i-1])
}
n -= 4
if s.actualTableLog <= 8 {
n -= 4
for ; n >= 0; n -= 4 {
tmp := src[n : n+4]
// tmp should be len 4
bw.flush32()
bw.encSymbol(cTable, tmp[3])
bw.encSymbol(cTable, tmp[2])
bw.encSymbol(cTable, tmp[1])
bw.encSymbol(cTable, tmp[0])
bw.encTwoSymbols(cTable, tmp[3], tmp[2])
bw.encTwoSymbols(cTable, tmp[1], tmp[0])
}
} else {
n -= 4
for ; n >= 0; n -= 4 {
tmp := src[n : n+4]
// tmp should be len 4
bw.flush32()
bw.encSymbol(cTable, tmp[3])
bw.encSymbol(cTable, tmp[2])
bw.encTwoSymbols(cTable, tmp[3], tmp[2])
bw.flush32()
bw.encSymbol(cTable, tmp[1])
bw.encSymbol(cTable, tmp[0])
bw.encTwoSymbols(cTable, tmp[1], tmp[0])
}
}
err := bw.close()
Expand Down Expand Up @@ -439,7 +434,7 @@ func (s *Scratch) buildCTable() error {
return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax)
}
var nbPerRank [tableLogMax + 1]uint16
var valPerRank [tableLogMax + 1]uint16
var valPerRank [16]uint16
for _, v := range huffNode[:nonNullRank+1] {
nbPerRank[v.nbBits]++
}
Expand All @@ -455,16 +450,17 @@ func (s *Scratch) buildCTable() error {
}

// push nbBits per symbol, symbol order
// TODO: changed `s.symbolLen` -> `nonNullRank+1` (micro-opt)
for _, v := range huffNode[:nonNullRank+1] {
s.cTable[v.symbol].nBits = v.nbBits
}

// assign value within rank, symbol order
for n, val := range s.cTable[:s.symbolLen] {
v := valPerRank[val.nBits]
s.cTable[n].val = v
valPerRank[val.nBits] = v + 1
t := s.cTable[:s.symbolLen]
for n, val := range t {
nbits := val.nBits & 15
v := valPerRank[nbits]
t[n].val = v
valPerRank[nbits] = v + 1
}

return nil
Expand All @@ -488,10 +484,12 @@ func (s *Scratch) huffSort() {
r := highBit32(v+1) & 31
rank[r].base++
}
for n := 30; n > 0; n-- {
// maxBitLength is log2(BlockSizeMax) + 1
const maxBitLength = 18 + 1
for n := maxBitLength; n > 0; n-- {
rank[n-1].base += rank[n].base
}
for n := range rank[:] {
for n := range rank[:maxBitLength] {
rank[n].current = rank[n].base
}
for n, c := range s.count[:s.symbolLen] {
Expand All @@ -510,7 +508,7 @@ func (s *Scratch) huffSort() {
}

func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
maxNbBits := s.TableLog
maxNbBits := s.actualTableLog
huffNode := s.nodes[1 : huffNodesLen+1]
//huffNode = huffNode[: huffNodesLen]

Expand Down
25 changes: 21 additions & 4 deletions zstd/blockenc.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,20 @@ func (b *blockEnc) encodeRaw(a []byte) {
}
}

// encodeRaw can be used to set the output to a raw representation of supplied bytes.
func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
var bh blockHeader
bh.setLast(b.last)
bh.setSize(uint32(len(src)))
bh.setType(blockTypeRaw)
dst = bh.appendTo(dst)
dst = append(dst, src...)
if debug {
println("Adding RAW block, length", len(src))
}
return dst
}

// encodeLits can be used if the block is only litLen.
func (b *blockEnc) encodeLits(raw bool) error {
var bh blockHeader
Expand Down Expand Up @@ -437,7 +451,7 @@ func fuzzFseEncoder(data []byte) int {
return 1
}

// encode will encode the block and put the output in b.output.
// encode will encode the block and append the output in b.output.
func (b *blockEnc) encode(raw bool) error {
if len(b.sequences) == 0 {
return b.encodeLits(raw)
Expand All @@ -451,6 +465,8 @@ func (b *blockEnc) encode(raw bool) error {
var lh literalsHeader
bh.setLast(b.last)
bh.setType(blockTypeCompressed)
// Store offset of the block header. Needed when we know the size.
bhOffset := len(b.output)
b.output = bh.appendTo(b.output)

var (
Expand All @@ -468,6 +484,7 @@ func (b *blockEnc) encode(raw bool) error {
} else {
err = huff0.ErrIncompressible
}

switch err {
case huff0.ErrIncompressible:
lh.setType(literalsBlockRaw)
Expand Down Expand Up @@ -735,18 +752,18 @@ func (b *blockEnc) encode(raw bool) error {
}
b.output = wr.out

if len(b.output)-3 >= b.size {
if len(b.output)-3-bhOffset >= b.size {
// Maybe even add a bigger margin.
b.litEnc.Reuse = huff0.ReusePolicyNone
return errIncompressible
}

// Size is output minus block header.
bh.setSize(uint32(len(b.output)) - 3)
bh.setSize(uint32(len(b.output)-bhOffset) - 3)
if debug {
println("Rewriting block header", bh)
}
_ = bh.appendTo(b.output[:0])
_ = bh.appendTo(b.output[bhOffset:bhOffset])
b.coders.setPrev(llEnc, mlEnc, ofEnc)
return nil
}
Expand Down
Loading