Skip to content

Commit

Permalink
flate: Improve entropy compression (#461)
Browse files Browse the repository at this point in the history
Even though they are rarely a benefit check for size of predefined tables.

Minor tweaks, biggest being bigger number of tokens and  second disabling filling.
  • Loading branch information
klauspost authored Dec 28, 2021
1 parent e2d8c6d commit dff5f6b
Show file tree
Hide file tree
Showing 24 changed files with 143 additions and 54 deletions.
32 changes: 21 additions & 11 deletions flate/deflate.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@ const (
maxMatchLength = 258 // The longest match for the compressor
minOffsetSize = 1 // The shortest offset that makes any sense

// The maximum number of tokens we put into a single flat block, just too
// stop things from getting too large.
maxFlateBlockTokens = 1 << 14
// The maximum number of tokens we will encode at the time.
// Smaller sizes usually creates less optimal blocks.
// Bigger can make context switching slow.
// We use this for levels 7-9, so we make it big.
maxFlateBlockTokens = 1 << 15
maxStoreBlockSize = 65535
hashBits = 17 // After 17 performance degrades
hashSize = 1 << hashBits
Expand Down Expand Up @@ -74,7 +76,7 @@ var levels = []compressionLevel{
{0, 0, 0, 0, 0, 6},
// Levels 7-9 use increasingly more lazy matching
// and increasingly stringent conditions for "good enough".
{8, 12, 24, 24, skipNever, 7},
{6, 10, 12, 16, skipNever, 7},
{10, 24, 32, 64, skipNever, 8},
{32, 258, 258, 1024, skipNever, 9},
}
Expand Down Expand Up @@ -175,7 +177,8 @@ func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error {
window = d.window[d.blockStart:index]
}
d.blockStart = index
d.w.writeBlock(tok, eof, window)
//d.w.writeBlock(tok, eof, window)
d.w.writeBlockDynamic(tok, eof, window, d.sync)
return d.w.err
}
return nil
Expand Down Expand Up @@ -301,7 +304,7 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead, bpb int) (lengt
if wEnd == win[i+length] {
n := matchLen(win[i:i+minMatchLook], wPos)
if n > length {
newGain := n*bpb - bits.Len32(uint32(pos-i)) - 1
newGain := n*bpb - bits.Len32(uint32(pos-i))
if newGain > cGain {
length = n
offset = pos - i
Expand Down Expand Up @@ -541,20 +544,27 @@ func (d *compressor) deflateLazy() {

// If we have a long run of no matches, skip additional bytes
// Resets when s.ii overflows after 64KB.
if s.ii > uint16(d.nice) {
n := int(s.ii >> 5)
if n := int(s.ii) - d.chain; n > 0 {
n = 1 + int(n>>6)
for j := 0; j < n; j++ {
if s.index >= d.windowEnd-1 {
break
}

d.tokens.AddLiteral(d.window[s.index-1])
if d.tokens.n == maxFlateBlockTokens {
if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
return
}
d.tokens.Reset()
}
// Index...
if s.index < s.maxInsertIndex {
h := hash4(d.window[s.index:])
ch := s.hashHead[h]
s.chainHead = int(ch)
s.hashPrev[s.index&windowMask] = ch
s.hashHead[h] = uint32(s.index + s.hashOffset)
}
s.index++
}
// Flush last byte
Expand Down Expand Up @@ -697,13 +707,13 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
level = 5
fallthrough
case level >= 1 && level <= 6:
d.w.logNewTablePenalty = 8
d.w.logNewTablePenalty = 7
d.fast = newFastEnc(level)
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeFast
case 7 <= level && level <= 9:
d.w.logNewTablePenalty = 10
d.w.logNewTablePenalty = 8
d.state = &advancedState{}
d.compressionLevel = levels[level]
d.initDeflate()
Expand Down
2 changes: 1 addition & 1 deletion flate/flate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func TestRegressions(t *testing.T) {
t.Error(err)
}
if !bytes.Equal(data1, data2) {
fmt.Printf("want:%x\ngot: %x\n", data1, data2)
//fmt.Printf("want:%x\ngot: %x\n", data1, data2)
t.Error("not equal")
}
})
Expand Down
144 changes: 107 additions & 37 deletions flate/huffman_bit_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,37 +155,33 @@ func (w *huffmanBitWriter) reset(writer io.Writer) {
w.lastHuffMan = false
}

func (w *huffmanBitWriter) canReuse(t *tokens) (offsets, lits bool) {
offsets, lits = true, true
func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) {
a := t.offHist[:offsetCodeCount]
b := w.offsetFreq[:len(a)]
for i := range a {
if b[i] == 0 && a[i] != 0 {
offsets = false
break
b := w.offsetEncoding.codes
b = b[:len(a)]
for i, v := range a {
if v != 0 && b[i].len == 0 {
return false
}
}

a = t.extraHist[:literalCount-256]
b = w.literalFreq[256:literalCount]
b = w.literalEncoding.codes[256:literalCount]
b = b[:len(a)]
for i := range a {
if b[i] == 0 && a[i] != 0 {
lits = false
break
for i, v := range a {
if v != 0 && b[i].len == 0 {
return false
}
}
if lits {
a = t.litHist[:]
b = w.literalFreq[:len(a)]
for i := range a {
if b[i] == 0 && a[i] != 0 {
lits = false
break
}

a = t.litHist[:256]
b = w.literalEncoding.codes[:len(a)]
for i, v := range a {
if v != 0 && b[i].len == 0 {
return false
}
}
return
return true
}

func (w *huffmanBitWriter) flush() {
Expand Down Expand Up @@ -566,7 +562,7 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
w.lastHeader = 0
}
numLiterals, numOffsets := w.indexTokens(tokens, false)
w.generate(tokens)
w.generate()
var extraBits int
storedSize, storable := w.storedSize(input)
if storable {
Expand Down Expand Up @@ -595,7 +591,7 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) {
}

// Stored bytes?
if storable && storedSize < size {
if storable && storedSize <= size {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
Expand Down Expand Up @@ -634,22 +630,39 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
w.lastHeader = 0
w.lastHuffMan = false
}
if !sync {
tokens.Fill()

// fillReuse enables filling of empty values.
// This will make encodings always reusable without testing.
// However, this does not appear to benefit on most cases.
const fillReuse = false

// Check if we can reuse...
if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) {
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}

numLiterals, numOffsets := w.indexTokens(tokens, !sync)
extraBits := 0
ssize, storable := w.storedSize(input)

const usePrefs = true
if storable || w.lastHeader > 0 {
extraBits = w.extraBitSize()
}

var size int

// Check if we should reuse.
if w.lastHeader > 0 {
// Estimate size for using a new table.
// Use the previous header size as the best estimate.
newSize := w.lastHeader + tokens.EstimatedBits()
newSize += newSize >> w.logNewTablePenalty
newSize += int(w.literalEncoding.codes[endBlockMarker].len) + newSize>>w.logNewTablePenalty

// The estimated size is calculated as an optimal table.
// We add a penalty to make it more realistic and re-use a bit more.
reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize()
reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits

// Check if a new table is better.
if newSize < reuseSize {
Expand All @@ -660,35 +673,79 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
} else {
size = reuseSize
}

if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size {
// Check if we get a reasonable size decrease.
if storable && ssize <= size {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
w.writeFixedHeader(eof)
if !sync {
tokens.AddEOB()
}
w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
return
}
// Check if we get a reasonable size decrease.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
if storable && ssize <= size {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
w.lastHeader = 0
return
}
}

// We want a new block/table
if w.lastHeader == 0 {
w.generate(tokens)
if fillReuse && !sync {
w.fillTokens()
numLiterals, numOffsets = maxNumLit, maxNumDist
} else {
w.literalFreq[endBlockMarker] = 1
}

w.generate()
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
w.codegenEncoding.generate(w.codegenFreq[:], 7)

var numCodegens int
size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, w.extraBitSize())
// Store bytes, if we don't get a reasonable improvement.
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) {
if fillReuse && !sync {
// Reindex for accurate size...
w.indexTokens(tokens, true)
}
size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits)

// Store predefined, if we don't get a reasonable improvement.
if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size {
// Store bytes, if we don't get an improvement.
if storable && ssize <= preSize {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
w.writeFixedHeader(eof)
if !sync {
tokens.AddEOB()
}
w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes)
return
}

if storable && ssize <= size {
// Store bytes, if we don't get an improvement.
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
w.lastHeader = 0
return
}

// Write Huffman table.
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
w.lastHeader, _ = w.headerSize()
if !sync {
w.lastHeader, _ = w.headerSize()
}
w.lastHuffMan = false
}

Expand All @@ -699,6 +756,19 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes)
}

func (w *huffmanBitWriter) fillTokens() {
for i, v := range w.literalFreq[:literalCount] {
if v == 0 {
w.literalFreq[i] = 1
}
}
for i, v := range w.offsetFreq[:offsetCodeCount] {
if v == 0 {
w.offsetFreq[i] = 1
}
}
}

// indexTokens indexes a slice of tokens, and updates
// literalFreq and offsetFreq, and generates literalEncoding
// and offsetEncoding.
Expand Down Expand Up @@ -733,7 +803,7 @@ func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, num
return
}

func (w *huffmanBitWriter) generate(t *tokens) {
func (w *huffmanBitWriter) generate() {
w.literalEncoding.generate(w.literalFreq[:literalCount], 15)
w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
}
Expand Down Expand Up @@ -867,7 +937,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
offsetComb := offsetCombined[offsetCode]
if offsetComb > 1<<16 {
//w.writeBits(extraOffset, extraOffsetBits)
bits |= uint64(offset&matchOffsetOnlyMask-(offsetComb&0xffff)) << (nbits & 63)
bits |= uint64(offset-(offsetComb&0xffff)) << (nbits & 63)
nbits += uint16(offsetComb >> 16)
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
Expand Down
9 changes: 9 additions & 0 deletions flate/inflate.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,11 +328,17 @@ func (f *decompressor) nextBlock() {
switch typ {
case 0:
f.dataBlock()
if debugDecode {
fmt.Println("stored block")
}
case 1:
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlockDecoder()()
if debugDecode {
fmt.Println("predefinied huffman block")
}
case 2:
// compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil {
Expand All @@ -341,6 +347,9 @@ func (f *decompressor) nextBlock() {
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlockDecoder()()
if debugDecode {
fmt.Println("dynamic huffman block")
}
default:
// 3 is reserved.
if debugDecode {
Expand Down
Binary file modified flate/testdata/huffman-null-max.dyn.expect
Binary file not shown.
Binary file modified flate/testdata/huffman-null-max.dyn.expect-noinput
Binary file not shown.
Binary file modified flate/testdata/huffman-pi.dyn.expect
Binary file not shown.
Binary file modified flate/testdata/huffman-pi.dyn.expect-noinput
Binary file not shown.
Binary file modified flate/testdata/huffman-rand-1k.dyn.expect-noinput
Binary file not shown.
Binary file modified flate/testdata/huffman-rand-limit.dyn.expect
Binary file not shown.
Binary file modified flate/testdata/huffman-rand-limit.dyn.expect-noinput
Binary file not shown.
Binary file modified flate/testdata/huffman-rand-limit.sync.expect
Binary file not shown.
Binary file modified flate/testdata/huffman-rand-limit.sync.expect-noinput
Binary file not shown.
Binary file modified flate/testdata/huffman-shifts.dyn.expect
Binary file not shown.
Binary file modified flate/testdata/huffman-shifts.dyn.expect-noinput
Binary file not shown.
Binary file modified flate/testdata/huffman-text-shift.dyn.expect
Binary file not shown.
3 changes: 1 addition & 2 deletions flate/testdata/huffman-text-shift.dyn.expect-noinput
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
�`�@�����5R|@ו1C�ᚄ4ϒ��|ʂ���������������.��zgEN�L������E#2¬EQ<��D��8.IDHÂ�D�@. E^������� @"�Ҡ ����`�M
KS4��*�n%P�1n��AA�`�OS^.���a�JUx�x�2�s��4�%yW��X+&F�$I�&�)�Igd<l9 ��7�TC���Y�mE�+T"�d��e�!�eˇ���1闍Ș�+�<
��J�0���!�<(l�P*( �:�s�覂�Д4aI�%|SV��xO&�U>�7�C�qM�u��d29ߨ�x�s�޷�$�� Qi^�t�wU�͊�;���C�•���C�Zf��ȥ6�J���v�Զ� Xdp�j�(����]��^v8:K��dH�@�>.��3�SAJ��.3�{�;��5F吒oJ�Y6ϯ�˛��������l�_���?�����8�d
Expand Down
Binary file modified flate/testdata/huffman-text.dyn.expect
Binary file not shown.
7 changes: 4 additions & 3 deletions flate/testdata/huffman-text.dyn.expect-noinput
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
�`�J�|�ஏb���F��=M/MX�+�K������������ˊ�;��޹���`�.�&;$
���A A �:��F8T� h� ͍�˘�P� �"PI&@�� lG p`7�Td�x���D�GA^k�, � �OA�U�!���AV�J��QV�2,��ށ���j(,;]X�`��
��*xqF_��2>n^��A��Um�� �Œ���2>�T��� g�O�� ���U��+�����d��5ʕ�d��6_�i�2�
���J�0���r=�`K��2Aasē)�H�����Iɟb�]�k��y�{h�0E{6�6�[��c��db�;��"%�#u����["�llB
%*�
�&��H�S�v����a���h�9��'B62CI���C��G6�����t���g�R]��K�m�!Č*�ꚺx5[��g�QF�ء��?>�)�
7���풳�^� w;�$�d��2E�^��/έ{�-���x�6S�.9���
Expand Down
Binary file modified flate/testdata/huffman-zero.dyn.expect
Binary file not shown.
Binary file modified flate/testdata/huffman-zero.dyn.expect-noinput
Binary file not shown.
Binary file modified flate/testdata/huffman-zero.sync.expect
Binary file not shown.
Binary file modified flate/testdata/huffman-zero.sync.expect-noinput
Binary file not shown.
Binary file modified flate/testdata/null-long-match.dyn.expect-noinput
Binary file not shown.

0 comments on commit dff5f6b

Please sign in to comment.