diff --git a/flate/deflate.go b/flate/deflate.go index 18ec6bdd4e..b27f5a93bc 100644 --- a/flate/deflate.go +++ b/flate/deflate.go @@ -41,9 +41,11 @@ const ( maxMatchLength = 258 // The longest match for the compressor minOffsetSize = 1 // The shortest offset that makes any sense - // The maximum number of tokens we put into a single flat block, just too - // stop things from getting too large. - maxFlateBlockTokens = 1 << 14 + // The maximum number of tokens we will encode at the time. + // Smaller sizes usually creates less optimal blocks. + // Bigger can make context switching slow. + // We use this for levels 7-9, so we make it big. + maxFlateBlockTokens = 1 << 15 maxStoreBlockSize = 65535 hashBits = 17 // After 17 performance degrades hashSize = 1 << hashBits @@ -74,7 +76,7 @@ var levels = []compressionLevel{ {0, 0, 0, 0, 0, 6}, // Levels 7-9 use increasingly more lazy matching // and increasingly stringent conditions for "good enough". - {8, 12, 24, 24, skipNever, 7}, + {6, 10, 12, 16, skipNever, 7}, {10, 24, 32, 64, skipNever, 8}, {32, 258, 258, 1024, skipNever, 9}, } @@ -175,7 +177,8 @@ func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { window = d.window[d.blockStart:index] } d.blockStart = index - d.w.writeBlock(tok, eof, window) + //d.w.writeBlock(tok, eof, window) + d.w.writeBlockDynamic(tok, eof, window, d.sync) return d.w.err } return nil @@ -301,7 +304,7 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead, bpb int) (lengt if wEnd == win[i+length] { n := matchLen(win[i:i+minMatchLook], wPos) if n > length { - newGain := n*bpb - bits.Len32(uint32(pos-i)) - 1 + newGain := n*bpb - bits.Len32(uint32(pos-i)) if newGain > cGain { length = n offset = pos - i @@ -541,13 +544,12 @@ func (d *compressor) deflateLazy() { // If we have a long run of no matches, skip additional bytes // Resets when s.ii overflows after 64KB. - if s.ii > uint16(d.nice) { - n := int(s.ii >> 5) + if n := int(s.ii) - d.chain; n > 0 { + n = 1 + int(n>>6) for j := 0; j < n; j++ { if s.index >= d.windowEnd-1 { break } - d.tokens.AddLiteral(d.window[s.index-1]) if d.tokens.n == maxFlateBlockTokens { if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { @@ -555,6 +557,14 @@ func (d *compressor) deflateLazy() { } d.tokens.Reset() } + // Index... + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } s.index++ } // Flush last byte @@ -697,13 +707,13 @@ func (d *compressor) init(w io.Writer, level int) (err error) { level = 5 fallthrough case level >= 1 && level <= 6: - d.w.logNewTablePenalty = 8 + d.w.logNewTablePenalty = 7 d.fast = newFastEnc(level) d.window = make([]byte, maxStoreBlockSize) d.fill = (*compressor).fillBlock d.step = (*compressor).storeFast case 7 <= level && level <= 9: - d.w.logNewTablePenalty = 10 + d.w.logNewTablePenalty = 8 d.state = &advancedState{} d.compressionLevel = levels[level] d.initDeflate() diff --git a/flate/flate_test.go b/flate/flate_test.go index 648d9796b8..182909ab0e 100644 --- a/flate/flate_test.go +++ b/flate/flate_test.go @@ -163,7 +163,7 @@ func TestRegressions(t *testing.T) { t.Error(err) } if !bytes.Equal(data1, data2) { - fmt.Printf("want:%x\ngot: %x\n", data1, data2) + //fmt.Printf("want:%x\ngot: %x\n", data1, data2) t.Error("not equal") } }) diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go index fda85c2571..fb1701eecc 100644 --- a/flate/huffman_bit_writer.go +++ b/flate/huffman_bit_writer.go @@ -155,37 +155,33 @@ func (w *huffmanBitWriter) reset(writer io.Writer) { w.lastHuffMan = false } -func (w *huffmanBitWriter) canReuse(t *tokens) (offsets, lits bool) { - offsets, lits = true, true +func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { a := t.offHist[:offsetCodeCount] - b := w.offsetFreq[:len(a)] - for i := range a { - if b[i] == 0 && a[i] != 0 { - offsets = false - break + b := w.offsetEncoding.codes + b = b[:len(a)] + for i, v := range a { + if v != 0 && b[i].len == 0 { + return false } } a = t.extraHist[:literalCount-256] - b = w.literalFreq[256:literalCount] + b = w.literalEncoding.codes[256:literalCount] b = b[:len(a)] - for i := range a { - if b[i] == 0 && a[i] != 0 { - lits = false - break + for i, v := range a { + if v != 0 && b[i].len == 0 { + return false } } - if lits { - a = t.litHist[:] - b = w.literalFreq[:len(a)] - for i := range a { - if b[i] == 0 && a[i] != 0 { - lits = false - break - } + + a = t.litHist[:256] + b = w.literalEncoding.codes[:len(a)] + for i, v := range a { + if v != 0 && b[i].len == 0 { + return false } } - return + return true } func (w *huffmanBitWriter) flush() { @@ -566,7 +562,7 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { w.lastHeader = 0 } numLiterals, numOffsets := w.indexTokens(tokens, false) - w.generate(tokens) + w.generate() var extraBits int storedSize, storable := w.storedSize(input) if storable { @@ -595,7 +591,7 @@ func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { } // Stored bytes? - if storable && storedSize < size { + if storable && storedSize <= size { w.writeStoredHeader(len(input), eof) w.writeBytes(input) return @@ -634,22 +630,39 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b w.lastHeader = 0 w.lastHuffMan = false } - if !sync { - tokens.Fill() + + // fillReuse enables filling of empty values. + // This will make encodings always reusable without testing. + // However, this does not appear to benefit on most cases. + const fillReuse = false + + // Check if we can reuse... + if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) { + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 } + numLiterals, numOffsets := w.indexTokens(tokens, !sync) + extraBits := 0 + ssize, storable := w.storedSize(input) + + const usePrefs = true + if storable || w.lastHeader > 0 { + extraBits = w.extraBitSize() + } var size int + // Check if we should reuse. if w.lastHeader > 0 { // Estimate size for using a new table. // Use the previous header size as the best estimate. newSize := w.lastHeader + tokens.EstimatedBits() - newSize += newSize >> w.logNewTablePenalty + newSize += int(w.literalEncoding.codes[endBlockMarker].len) + newSize>>w.logNewTablePenalty // The estimated size is calculated as an optimal table. // We add a penalty to make it more realistic and re-use a bit more. - reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize() + reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits // Check if a new table is better. if newSize < reuseSize { @@ -660,35 +673,79 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b } else { size = reuseSize } + + if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size { + // Check if we get a reasonable size decrease. + if storable && ssize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + w.writeFixedHeader(eof) + if !sync { + tokens.AddEOB() + } + w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) + return + } // Check if we get a reasonable size decrease. - if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { + if storable && ssize <= size { w.writeStoredHeader(len(input), eof) w.writeBytes(input) - w.lastHeader = 0 return } } // We want a new block/table if w.lastHeader == 0 { - w.generate(tokens) + if fillReuse && !sync { + w.fillTokens() + numLiterals, numOffsets = maxNumLit, maxNumDist + } else { + w.literalFreq[endBlockMarker] = 1 + } + + w.generate() // Generate codegen and codegenFrequencies, which indicates how to encode // the literalEncoding and the offsetEncoding. w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) w.codegenEncoding.generate(w.codegenFreq[:], 7) + var numCodegens int - size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, w.extraBitSize()) - // Store bytes, if we don't get a reasonable improvement. - if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { + if fillReuse && !sync { + // Reindex for accurate size... + w.indexTokens(tokens, true) + } + size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + // Store predefined, if we don't get a reasonable improvement. + if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size { + // Store bytes, if we don't get an improvement. + if storable && ssize <= preSize { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + w.writeFixedHeader(eof) + if !sync { + tokens.AddEOB() + } + w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) + return + } + + if storable && ssize <= size { + // Store bytes, if we don't get an improvement. w.writeStoredHeader(len(input), eof) w.writeBytes(input) - w.lastHeader = 0 return } // Write Huffman table. w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) - w.lastHeader, _ = w.headerSize() + if !sync { + w.lastHeader, _ = w.headerSize() + } w.lastHuffMan = false } @@ -699,6 +756,19 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes) } +func (w *huffmanBitWriter) fillTokens() { + for i, v := range w.literalFreq[:literalCount] { + if v == 0 { + w.literalFreq[i] = 1 + } + } + for i, v := range w.offsetFreq[:offsetCodeCount] { + if v == 0 { + w.offsetFreq[i] = 1 + } + } +} + // indexTokens indexes a slice of tokens, and updates // literalFreq and offsetFreq, and generates literalEncoding // and offsetEncoding. @@ -733,7 +803,7 @@ func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, num return } -func (w *huffmanBitWriter) generate(t *tokens) { +func (w *huffmanBitWriter) generate() { w.literalEncoding.generate(w.literalFreq[:literalCount], 15) w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15) } @@ -867,7 +937,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) offsetComb := offsetCombined[offsetCode] if offsetComb > 1<<16 { //w.writeBits(extraOffset, extraOffsetBits) - bits |= uint64(offset&matchOffsetOnlyMask-(offsetComb&0xffff)) << (nbits & 63) + bits |= uint64(offset-(offsetComb&0xffff)) << (nbits & 63) nbits += uint16(offsetComb >> 16) if nbits >= 48 { binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) diff --git a/flate/inflate.go b/flate/inflate.go index d1edb356c4..d5f62f6a2c 100644 --- a/flate/inflate.go +++ b/flate/inflate.go @@ -328,11 +328,17 @@ func (f *decompressor) nextBlock() { switch typ { case 0: f.dataBlock() + if debugDecode { + fmt.Println("stored block") + } case 1: // compressed, fixed Huffman tables f.hl = &fixedHuffmanDecoder f.hd = nil f.huffmanBlockDecoder()() + if debugDecode { + fmt.Println("predefinied huffman block") + } case 2: // compressed, dynamic Huffman tables if f.err = f.readHuffman(); f.err != nil { @@ -341,6 +347,9 @@ func (f *decompressor) nextBlock() { f.hl = &f.h1 f.hd = &f.h2 f.huffmanBlockDecoder()() + if debugDecode { + fmt.Println("dynamic huffman block") + } default: // 3 is reserved. if debugDecode { diff --git a/flate/testdata/huffman-null-max.dyn.expect b/flate/testdata/huffman-null-max.dyn.expect index 0a3c71ceb3..f4e27a8146 100644 Binary files a/flate/testdata/huffman-null-max.dyn.expect and b/flate/testdata/huffman-null-max.dyn.expect differ diff --git a/flate/testdata/huffman-null-max.dyn.expect-noinput b/flate/testdata/huffman-null-max.dyn.expect-noinput index 0a3c71ceb3..f4e27a8146 100644 Binary files a/flate/testdata/huffman-null-max.dyn.expect-noinput and b/flate/testdata/huffman-null-max.dyn.expect-noinput differ diff --git a/flate/testdata/huffman-pi.dyn.expect b/flate/testdata/huffman-pi.dyn.expect index 11756feafb..66c76ceb23 100644 Binary files a/flate/testdata/huffman-pi.dyn.expect and b/flate/testdata/huffman-pi.dyn.expect differ diff --git a/flate/testdata/huffman-pi.dyn.expect-noinput b/flate/testdata/huffman-pi.dyn.expect-noinput index 11756feafb..66c76ceb23 100644 Binary files a/flate/testdata/huffman-pi.dyn.expect-noinput and b/flate/testdata/huffman-pi.dyn.expect-noinput differ diff --git a/flate/testdata/huffman-rand-1k.dyn.expect-noinput b/flate/testdata/huffman-rand-1k.dyn.expect-noinput index 5162399686..e45583ee31 100644 Binary files a/flate/testdata/huffman-rand-1k.dyn.expect-noinput and b/flate/testdata/huffman-rand-1k.dyn.expect-noinput differ diff --git a/flate/testdata/huffman-rand-limit.dyn.expect b/flate/testdata/huffman-rand-limit.dyn.expect index 57e59322e9..881e59c9ab 100644 Binary files a/flate/testdata/huffman-rand-limit.dyn.expect and b/flate/testdata/huffman-rand-limit.dyn.expect differ diff --git a/flate/testdata/huffman-rand-limit.dyn.expect-noinput b/flate/testdata/huffman-rand-limit.dyn.expect-noinput index 008b9afee9..881e59c9ab 100644 Binary files a/flate/testdata/huffman-rand-limit.dyn.expect-noinput and b/flate/testdata/huffman-rand-limit.dyn.expect-noinput differ diff --git a/flate/testdata/huffman-rand-limit.sync.expect b/flate/testdata/huffman-rand-limit.sync.expect index 2d6527934e..881e59c9ab 100644 Binary files a/flate/testdata/huffman-rand-limit.sync.expect and b/flate/testdata/huffman-rand-limit.sync.expect differ diff --git a/flate/testdata/huffman-rand-limit.sync.expect-noinput b/flate/testdata/huffman-rand-limit.sync.expect-noinput index 2d6527934e..881e59c9ab 100644 Binary files a/flate/testdata/huffman-rand-limit.sync.expect-noinput and b/flate/testdata/huffman-rand-limit.sync.expect-noinput differ diff --git a/flate/testdata/huffman-shifts.dyn.expect b/flate/testdata/huffman-shifts.dyn.expect index 2f4fd17add..9ad731f3cf 100644 Binary files a/flate/testdata/huffman-shifts.dyn.expect and b/flate/testdata/huffman-shifts.dyn.expect differ diff --git a/flate/testdata/huffman-shifts.dyn.expect-noinput b/flate/testdata/huffman-shifts.dyn.expect-noinput index 2f4fd17add..9ad731f3cf 100644 Binary files a/flate/testdata/huffman-shifts.dyn.expect-noinput and b/flate/testdata/huffman-shifts.dyn.expect-noinput differ diff --git a/flate/testdata/huffman-text-shift.dyn.expect b/flate/testdata/huffman-text-shift.dyn.expect index 3a4dcc4cab..486bdf6f69 100644 Binary files a/flate/testdata/huffman-text-shift.dyn.expect and b/flate/testdata/huffman-text-shift.dyn.expect differ diff --git a/flate/testdata/huffman-text-shift.dyn.expect-noinput b/flate/testdata/huffman-text-shift.dyn.expect-noinput index 29788aa0a8..486bdf6f69 100644 --- a/flate/testdata/huffman-text-shift.dyn.expect-noinput +++ b/flate/testdata/huffman-text-shift.dyn.expect-noinput @@ -1,2 +1 @@ -`@5R|@ו1Cᚄ4ϒ|ʂ.zgENL E#2¬EQ<D8.IDHÂD@. E^ @"Ҡ `M -KS4*n%P1nAA`OS^.aJUxx2s4%yWX+&F$I&)I gd<l9 7TCYmE+T"de!eˇ1闍Ș+< \ No newline at end of file +J0!<(lP*( :s覂Д4aI%|SVxO&U>7CqMud29ߨxs޷$ Qi^t wU͊;C•CZfȥ6JvԶ Xdpj (]^v8:KdH@>.3SAJ.3{;5F吒oJY6ϯ˛l_?8d \ No newline at end of file diff --git a/flate/testdata/huffman-text.dyn.expect b/flate/testdata/huffman-text.dyn.expect index 1fb84b322c..b9cc20d0eb 100644 Binary files a/flate/testdata/huffman-text.dyn.expect and b/flate/testdata/huffman-text.dyn.expect differ diff --git a/flate/testdata/huffman-text.dyn.expect-noinput b/flate/testdata/huffman-text.dyn.expect-noinput index 6ef6dd44dd..b9cc20d0eb 100644 --- a/flate/testdata/huffman-text.dyn.expect-noinput +++ b/flate/testdata/huffman-text.dyn.expect-noinput @@ -1,3 +1,4 @@ -`J|ஏbF=M/MX+Kˊ;޹`.&;$ -A A :F8T h ͍˘P "PI&@ lG p`7TdxDGA^k, OAU!AVJQV2,ށj(,;]X` -*xqF_2>n^AUm Œ2>T gO U+d5ʕd6_i2 \ No newline at end of file +J0r=`K2Aasē)HIɟb]ky{h0E{66[cdb;"%#u["llB +%* +&HSvah9'B62CICG6tg R]Km!Č*ꚺx5[gQFء?>) +7풳^ w;$d2E^/έ{-x6S.9 \ No newline at end of file diff --git a/flate/testdata/huffman-zero.dyn.expect b/flate/testdata/huffman-zero.dyn.expect index 230433ca0c..dbe401c54c 100644 Binary files a/flate/testdata/huffman-zero.dyn.expect and b/flate/testdata/huffman-zero.dyn.expect differ diff --git a/flate/testdata/huffman-zero.dyn.expect-noinput b/flate/testdata/huffman-zero.dyn.expect-noinput index cefc1d3f66..dbe401c54c 100644 Binary files a/flate/testdata/huffman-zero.dyn.expect-noinput and b/flate/testdata/huffman-zero.dyn.expect-noinput differ diff --git a/flate/testdata/huffman-zero.sync.expect b/flate/testdata/huffman-zero.sync.expect index 830348a79a..dbe401c54c 100644 Binary files a/flate/testdata/huffman-zero.sync.expect and b/flate/testdata/huffman-zero.sync.expect differ diff --git a/flate/testdata/huffman-zero.sync.expect-noinput b/flate/testdata/huffman-zero.sync.expect-noinput index 830348a79a..dbe401c54c 100644 Binary files a/flate/testdata/huffman-zero.sync.expect-noinput and b/flate/testdata/huffman-zero.sync.expect-noinput differ diff --git a/flate/testdata/null-long-match.dyn.expect-noinput b/flate/testdata/null-long-match.dyn.expect-noinput index 14167a3344..62d55e6b83 100644 Binary files a/flate/testdata/null-long-match.dyn.expect-noinput and b/flate/testdata/null-long-match.dyn.expect-noinput differ