diff --git a/flate/deflate.go b/flate/deflate.go index 25dbe3e15f..3f428d8b52 100644 --- a/flate/deflate.go +++ b/flate/deflate.go @@ -645,15 +645,15 @@ func (d *compressor) init(w io.Writer, level int) (err error) { d.fill = (*compressor).fillBlock d.step = (*compressor).store case level == ConstantCompression: - d.w.logNewTablePenalty = 4 - d.window = make([]byte, maxStoreBlockSize) + d.w.logNewTablePenalty = 8 + d.window = make([]byte, 32<<10) d.fill = (*compressor).fillBlock d.step = (*compressor).storeHuff case level == DefaultCompression: level = 5 fallthrough case level >= 1 && level <= 6: - d.w.logNewTablePenalty = 6 + d.w.logNewTablePenalty = 8 d.fast = newFastEnc(level) d.window = make([]byte, maxStoreBlockSize) d.fill = (*compressor).fillBlock diff --git a/flate/fast_encoder.go b/flate/fast_encoder.go index 4a73e1bdd3..f2477abfcb 100644 --- a/flate/fast_encoder.go +++ b/flate/fast_encoder.go @@ -6,6 +6,7 @@ package flate import ( + "encoding/binary" "fmt" "math/bits" ) @@ -65,26 +66,15 @@ func load32(b []byte, i int) uint32 { } func load64(b []byte, i int) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } func load3232(b []byte, i int32) uint32 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:4] - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 + return binary.LittleEndian.Uint32(b[i:]) } func load6432(b []byte, i int32) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } func hash(u uint32) uint32 { @@ -225,9 +215,9 @@ func (e *fastGen) Reset() { func matchLen(a, b []byte) int { b = b[:len(a)] var checked int - if len(a) > 4 { + if len(a) >= 4 { // Try 4 bytes first - if diff := load32(a, 0) ^ load32(b, 0); diff != 0 { + if diff := binary.LittleEndian.Uint32(a) ^ binary.LittleEndian.Uint32(b); diff != 0 { return bits.TrailingZeros32(diff) >> 3 } // Switch to 8 byte matching. @@ -236,7 +226,7 @@ func matchLen(a, b []byte) int { b = b[4:] for len(a) >= 8 { b = b[:len(a)] - if diff := load64(a, 0) ^ load64(b, 0); diff != 0 { + if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { return checked + (bits.TrailingZeros64(diff) >> 3) } checked += 8 diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go index 208d66711d..db54be1398 100644 --- a/flate/huffman_bit_writer.go +++ b/flate/huffman_bit_writer.go @@ -5,6 +5,7 @@ package flate import ( + "encoding/binary" "io" ) @@ -206,7 +207,7 @@ func (w *huffmanBitWriter) write(b []byte) { } func (w *huffmanBitWriter) writeBits(b int32, nb uint16) { - w.bits |= uint64(b) << (w.nbits & reg16SizeMask64) + w.bits |= uint64(b) << w.nbits w.nbits += nb if w.nbits >= 48 { w.writeOutBits() @@ -420,13 +421,11 @@ func (w *huffmanBitWriter) writeOutBits() { w.bits >>= 48 w.nbits -= 48 n := w.nbytes - w.bytes[n] = byte(bits) - w.bytes[n+1] = byte(bits >> 8) - w.bytes[n+2] = byte(bits >> 16) - w.bytes[n+3] = byte(bits >> 24) - w.bytes[n+4] = byte(bits >> 32) - w.bytes[n+5] = byte(bits >> 40) + + // We over-write, but faster... + binary.LittleEndian.PutUint64(w.bytes[n:], bits) n += 6 + if n >= bufferFlushSize { if w.err != nil { n = 0 @@ -435,6 +434,7 @@ func (w *huffmanBitWriter) writeOutBits() { w.write(w.bytes[:n]) n = 0 } + w.nbytes = n } @@ -759,7 +759,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) } else { // inlined c := lengths[lengthCode&31] - w.bits |= uint64(c.code) << (w.nbits & reg16SizeMask64) + w.bits |= uint64(c.code) << w.nbits w.nbits += c.len if w.nbits >= 48 { w.writeOutBits() @@ -779,7 +779,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) } else { // inlined c := offs[offsetCode&31] - w.bits |= uint64(c.code) << (w.nbits & reg16SizeMask64) + w.bits |= uint64(c.code) << w.nbits w.nbits += c.len if w.nbits >= 48 { w.writeOutBits() @@ -830,8 +830,8 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { // Assume header is around 70 bytes: // https://stackoverflow.com/a/25454430 const guessHeaderSizeBits = 70 * 8 - estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync) - estBits += w.lastHeader + 15 + estBits := histogramSize(input, w.literalFreq[:], !eof && !sync) + estBits += w.lastHeader + len(input)/32 if w.lastHeader == 0 { estBits += guessHeaderSizeBits } @@ -845,9 +845,9 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { return } + reuseSize := 0 if w.lastHeader > 0 { - reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256]) - estBits += estExtra + reuseSize = w.literalEncoding.bitLength(w.literalFreq[:256]) if estBits < reuseSize { // We owe an EOB @@ -859,6 +859,10 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { const numLiterals = endBlockMarker + 1 const numOffsets = 1 if w.lastHeader == 0 { + if !eof && !sync { + // Generate a slightly suboptimal tree that can be used for all. + fillHist(w.literalFreq[:numLiterals]) + } w.literalFreq[endBlockMarker] = 1 w.literalEncoding.generate(w.literalFreq[:numLiterals], 15) @@ -878,19 +882,14 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { for _, t := range input { // Bitwriting inlined, ~30% speedup c := encoding[t] - w.bits |= uint64(c.code) << ((w.nbits) & reg16SizeMask64) + w.bits |= uint64(c.code) << w.nbits w.nbits += c.len if w.nbits >= 48 { bits := w.bits w.bits >>= 48 w.nbits -= 48 n := w.nbytes - w.bytes[n] = byte(bits) - w.bytes[n+1] = byte(bits >> 8) - w.bytes[n+2] = byte(bits >> 16) - w.bytes[n+3] = byte(bits >> 24) - w.bytes[n+4] = byte(bits >> 32) - w.bytes[n+5] = byte(bits >> 40) + binary.LittleEndian.PutUint64(w.bytes[n:], bits) n += 6 if n >= bufferFlushSize { if w.err != nil { diff --git a/flate/huffman_code.go b/flate/huffman_code.go index 4c39a30187..0d3445a1cc 100644 --- a/flate/huffman_code.go +++ b/flate/huffman_code.go @@ -122,6 +122,16 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int { return total } +func (h *huffmanEncoder) bitLengthRaw(b []byte) int { + var total int + for _, f := range b { + if f != 0 { + total += int(h.codes[f].len) + } + } + return total +} + // Return the number of literals assigned to each bit size in the Huffman encoding // // This method is only called when list.length >= 3 @@ -327,37 +337,40 @@ func atLeastOne(v float32) float32 { return v } +// Unassigned values are assigned '1' in the histogram. +func fillHist(b []uint16) { + for i, v := range b { + if v == 0 { + b[i] = 1 + } + } +} + // histogramSize accumulates a histogram of b in h. // An estimated size in bits is returned. -// Unassigned values are assigned '1' in the histogram. // len(h) must be >= 256, and h's elements must be all zeroes. -func histogramSize(b []byte, h []uint16, fill bool) (int, int) { +func histogramSize(b []byte, h []uint16, fill bool) (bits int) { h = h[:256] for _, t := range b { h[t]++ } - invTotal := 1.0 / float32(len(b)) - shannon := float32(0.0) - var extra float32 + total := len(b) if fill { - oneBits := atLeastOne(-mFastLog2(invTotal)) - for i, v := range h[:] { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - } else { - h[i] = 1 - extra += oneBits + for _, v := range h { + if v == 0 { + total++ } } - } else { - for _, v := range h[:] { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - } + } + + invTotal := 1.0 / float32(total) + shannon := float32(0.0) + for _, v := range h { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n } } - return int(shannon + 0.99), int(extra + 0.99) + return int(shannon + 0.99) } diff --git a/flate/testdata/huffman-rand-limit.golden b/flate/testdata/huffman-rand-limit.golden index 7ef6745049..57e59322e9 100644 Binary files a/flate/testdata/huffman-rand-limit.golden and b/flate/testdata/huffman-rand-limit.golden differ diff --git a/flate/testdata/huffman-text-shift.golden b/flate/testdata/huffman-text-shift.golden index 80531ad983..3a4dcc4cab 100644 Binary files a/flate/testdata/huffman-text-shift.golden and b/flate/testdata/huffman-text-shift.golden differ