From 9886fdcbbdf3e303746ae94b6ab5f6f0a9154c49 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 7 Sep 2021 14:18:59 +0200 Subject: [PATCH 1/3] huff0: 4X Decompression experiment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Significant degradations, abandoned. ``` λ benchcmp before.txt after.txt benchmark old ns/op new ns/op delta BenchmarkDecompress4XNoTable/digits-32 166962 183522 +9.92% BenchmarkDecompress4XNoTable/gettysburg-32 2747 2783 +1.31% BenchmarkDecompress4XNoTable/twain-32 579833 581783 +0.34% BenchmarkDecompress4XNoTable/low-ent.10k-32 56815 65143 +14.66% BenchmarkDecompress4XNoTable/superlow-ent-10k-32 15217 17399 +14.34% BenchmarkDecompress4XNoTable/case1-32 223 215 -3.63% BenchmarkDecompress4XNoTable/case2-32 178 173 -3.03% BenchmarkDecompress4XNoTable/case3-32 186 178 -4.10% BenchmarkDecompress4XNoTable/pngdata.001-32 78199 79470 +1.63% BenchmarkDecompress4XNoTable/normcount2-32 295 280 -5.22% BenchmarkDecompress4XTable/digits-32 169105 186653 +10.38% BenchmarkDecompress4XTable/gettysburg-32 4113 4243 +3.16% BenchmarkDecompress4XTable/twain-32 580482 589091 +1.48% BenchmarkDecompress4XTable/low-ent.10k-32 57488 66318 +15.36% BenchmarkDecompress4XTable/superlow-ent-10k-32 15801 18111 +14.62% BenchmarkDecompress4XTable/case1-32 2060 2062 +0.10% BenchmarkDecompress4XTable/case2-32 2004 2015 +0.55% BenchmarkDecompress4XTable/case3-32 2026 2040 +0.69% BenchmarkDecompress4XTable/pngdata.001-32 81603 83582 +2.43% BenchmarkDecompress4XTable/normcount2-32 1426 1401 -1.75% ``` --- huff0/decompress.go | 179 +++++++++++++++++++++++++------------------- 1 file changed, 101 insertions(+), 78 deletions(-) diff --git a/huff0/decompress.go b/huff0/decompress.go index 9b7cc8e97b..540272e9a4 100644 --- a/huff0/decompress.go +++ b/huff0/decompress.go @@ -20,9 +20,8 @@ type dEntrySingle struct { // double-symbols decoding type dEntryDouble struct { - seq uint16 + seq [2]byte nBits uint8 - len uint8 } // Uses special code for all tables that are < 8 bits. @@ -914,7 +913,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { out := dst dstEvery := (dstSize + 3) / 4 - shift := (8 - d.actualTableLog) & 7 + shift := (56 + (8 - d.actualTableLog)) & 63 const tlSize = 1 << 8 single := d.dt.single[:tlSize] @@ -938,37 +937,41 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { br[stream].fillFast() br[stream2].fillFast() - v := single[br[stream].peekByteFast()>>shift].entry + v := single[uint8(br[stream].value>>shift)].entry + v2 := single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 } { @@ -977,37 +980,41 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { br[stream].fillFast() br[stream2].fillFast() - v := single[br[stream].peekByteFast()>>shift].entry + v := single[uint8(br[stream].value>>shift)].entry + v2 := single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 } off += 4 @@ -1073,7 +1080,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { } // Read value and increment offset. - v := single[br.peekByteFast()>>shift].entry + v := single[uint8(br.value>>shift)].entry nBits := uint8(v) br.advance(nBits) bitsLeft -= int(nBits) @@ -1121,7 +1128,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { out := dst dstEvery := (dstSize + 3) / 4 - const shift = 0 + const shift = 56 const tlSize = 1 << 8 const tlMask = tlSize - 1 single := d.dt.single[:tlSize] @@ -1145,37 +1152,45 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { br[stream].fillFast() br[stream2].fillFast() - v := single[br[stream].peekByteFast()>>shift].entry + v := single[uint8(br[stream].value>>shift)].entry buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 - v2 := single[br[stream2].peekByteFast()>>shift].entry + v2 := single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 - v2 = single[br[stream2].peekByteFast()>>shift].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 - v2 = single[br[stream2].peekByteFast()>>shift].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 - v2 = single[br[stream2].peekByteFast()>>shift].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 } { @@ -1184,37 +1199,45 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { br[stream].fillFast() br[stream2].fillFast() - v := single[br[stream].peekByteFast()>>shift].entry + v := single[uint8(br[stream].value>>shift)].entry buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 - v2 := single[br[stream2].peekByteFast()>>shift].entry + v2 := single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 - v2 = single[br[stream2].peekByteFast()>>shift].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 - v2 = single[br[stream2].peekByteFast()>>shift].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 - v2 = single[br[stream2].peekByteFast()>>shift].entry + v2 = single[uint8(br[stream2].value>>shift)].entry buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 } off += 4 @@ -1280,7 +1303,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { } // Read value and increment offset. - v := single[br.peekByteFast()>>shift].entry + v := single[br.peekByteFast()].entry nBits := uint8(v) br.advance(nBits) bitsLeft -= int(nBits) From 316f32aa15ca0ef09d97401487b7b05aeeed7d7e Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 9 Sep 2021 10:48:42 +0200 Subject: [PATCH 2/3] noop change --- huff0/decompress.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/huff0/decompress.go b/huff0/decompress.go index 540272e9a4..ef51c85033 100644 --- a/huff0/decompress.go +++ b/huff0/decompress.go @@ -20,8 +20,9 @@ type dEntrySingle struct { // double-symbols decoding type dEntryDouble struct { - seq [2]byte + seq [4]byte nBits uint8 + len uint8 } // Uses special code for all tables that are < 8 bits. From 1f1d5b2f2de9e1a19838ab7969d02c634626722a Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sat, 8 Jan 2022 18:47:10 +0100 Subject: [PATCH 3/3] Make it actually faster. --- huff0/decompress.go | 172 ++++++++++++++++----------------- huff0/decompress_test.go | 202 ++++++++++++++++++++++++--------------- 2 files changed, 207 insertions(+), 167 deletions(-) diff --git a/huff0/decompress.go b/huff0/decompress.go index ef51c85033..f3db4a3b55 100644 --- a/huff0/decompress.go +++ b/huff0/decompress.go @@ -935,87 +935,91 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { // Interleave 2 decodes. const stream = 0 const stream2 = 1 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[uint8(br[stream].value>>shift)].entry - v2 := single[uint8(br[stream2].value>>shift)].entry + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream] = uint8(v >> 8) buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream].bitsRead += uint8(v) - br[stream].value <<= v & 63 - br[stream2].bitsRead += uint8(v2) - br[stream2].value <<= v2 & 63 - v = single[uint8(br[stream].value>>shift)].entry - v2 = single[uint8(br[stream2].value>>shift)].entry + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream+1] = uint8(v >> 8) buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream].bitsRead += uint8(v) - br[stream].value <<= v & 63 - br[stream2].bitsRead += uint8(v2) - br[stream2].value <<= v2 & 63 - v = single[uint8(br[stream].value>>shift)].entry - v2 = single[uint8(br[stream2].value>>shift)].entry + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream+2] = uint8(v >> 8) buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream].bitsRead += uint8(v) - br[stream].value <<= v & 63 - br[stream2].bitsRead += uint8(v2) - br[stream2].value <<= v2 & 63 - v = single[uint8(br[stream].value>>shift)].entry - v2 = single[uint8(br[stream2].value>>shift)].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream].bitsRead += uint8(v) - br[stream].value <<= v & 63 - br[stream2].bitsRead += uint8(v2) - br[stream2].value <<= v2 & 63 + buf[off+bufoff*stream+3] = uint8(v >> 8) } { const stream = 2 const stream2 = 3 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[uint8(br[stream].value>>shift)].entry - v2 := single[uint8(br[stream2].value>>shift)].entry + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream] = uint8(v >> 8) buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream].bitsRead += uint8(v) - br[stream].value <<= v & 63 - br[stream2].bitsRead += uint8(v2) - br[stream2].value <<= v2 & 63 - v = single[uint8(br[stream].value>>shift)].entry - v2 = single[uint8(br[stream2].value>>shift)].entry + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream+1] = uint8(v >> 8) buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream].bitsRead += uint8(v) - br[stream].value <<= v & 63 - br[stream2].bitsRead += uint8(v2) - br[stream2].value <<= v2 & 63 - v = single[uint8(br[stream].value>>shift)].entry - v2 = single[uint8(br[stream2].value>>shift)].entry + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream+2] = uint8(v >> 8) buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream].bitsRead += uint8(v) - br[stream].value <<= v & 63 - br[stream2].bitsRead += uint8(v2) - br[stream2].value <<= v2 & 63 - v = single[uint8(br[stream].value>>shift)].entry - v2 = single[uint8(br[stream2].value>>shift)].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream].bitsRead += uint8(v) - br[stream].value <<= v & 63 - br[stream2].bitsRead += uint8(v2) - br[stream2].value <<= v2 & 63 + buf[off+bufoff*stream+3] = uint8(v >> 8) } off += 4 @@ -1154,44 +1158,40 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { br[stream2].fillFast() v := single[uint8(br[stream].value>>shift)].entry - buf[off+bufoff*stream] = uint8(v >> 8) + v2 := single[uint8(br[stream2].value>>shift)].entry br[stream].bitsRead += uint8(v) br[stream].value <<= v & 63 - - v2 := single[uint8(br[stream2].value>>shift)].entry - buf[off+bufoff*stream2] = uint8(v2 >> 8) br[stream2].bitsRead += uint8(v2) br[stream2].value <<= v2 & 63 + buf[off+bufoff*stream] = uint8(v >> 8) + buf[off+bufoff*stream2] = uint8(v2 >> 8) v = single[uint8(br[stream].value>>shift)].entry - buf[off+bufoff*stream+1] = uint8(v >> 8) + v2 = single[uint8(br[stream2].value>>shift)].entry br[stream].bitsRead += uint8(v) br[stream].value <<= v & 63 - - v2 = single[uint8(br[stream2].value>>shift)].entry - buf[off+bufoff*stream2+1] = uint8(v2 >> 8) br[stream2].bitsRead += uint8(v2) br[stream2].value <<= v2 & 63 + buf[off+bufoff*stream+1] = uint8(v >> 8) + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) v = single[uint8(br[stream].value>>shift)].entry - buf[off+bufoff*stream+2] = uint8(v >> 8) + v2 = single[uint8(br[stream2].value>>shift)].entry br[stream].bitsRead += uint8(v) br[stream].value <<= v & 63 - - v2 = single[uint8(br[stream2].value>>shift)].entry - buf[off+bufoff*stream2+2] = uint8(v2 >> 8) br[stream2].bitsRead += uint8(v2) br[stream2].value <<= v2 & 63 + buf[off+bufoff*stream+2] = uint8(v >> 8) + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) v = single[uint8(br[stream].value>>shift)].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) + v2 = single[uint8(br[stream2].value>>shift)].entry br[stream].bitsRead += uint8(v) br[stream].value <<= v & 63 - - v2 = single[uint8(br[stream2].value>>shift)].entry - buf[off+bufoff*stream2+3] = uint8(v2 >> 8) br[stream2].bitsRead += uint8(v2) br[stream2].value <<= v2 & 63 + buf[off+bufoff*stream+3] = uint8(v >> 8) + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) } { @@ -1201,44 +1201,40 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { br[stream2].fillFast() v := single[uint8(br[stream].value>>shift)].entry - buf[off+bufoff*stream] = uint8(v >> 8) + v2 := single[uint8(br[stream2].value>>shift)].entry br[stream].bitsRead += uint8(v) br[stream].value <<= v & 63 - - v2 := single[uint8(br[stream2].value>>shift)].entry - buf[off+bufoff*stream2] = uint8(v2 >> 8) br[stream2].bitsRead += uint8(v2) br[stream2].value <<= v2 & 63 + buf[off+bufoff*stream] = uint8(v >> 8) + buf[off+bufoff*stream2] = uint8(v2 >> 8) v = single[uint8(br[stream].value>>shift)].entry - buf[off+bufoff*stream+1] = uint8(v >> 8) + v2 = single[uint8(br[stream2].value>>shift)].entry br[stream].bitsRead += uint8(v) br[stream].value <<= v & 63 - - v2 = single[uint8(br[stream2].value>>shift)].entry - buf[off+bufoff*stream2+1] = uint8(v2 >> 8) br[stream2].bitsRead += uint8(v2) br[stream2].value <<= v2 & 63 + buf[off+bufoff*stream+1] = uint8(v >> 8) + buf[off+bufoff*stream2+1] = uint8(v2 >> 8) v = single[uint8(br[stream].value>>shift)].entry - buf[off+bufoff*stream+2] = uint8(v >> 8) + v2 = single[uint8(br[stream2].value>>shift)].entry br[stream].bitsRead += uint8(v) br[stream].value <<= v & 63 - - v2 = single[uint8(br[stream2].value>>shift)].entry - buf[off+bufoff*stream2+2] = uint8(v2 >> 8) br[stream2].bitsRead += uint8(v2) br[stream2].value <<= v2 & 63 + buf[off+bufoff*stream+2] = uint8(v >> 8) + buf[off+bufoff*stream2+2] = uint8(v2 >> 8) v = single[uint8(br[stream].value>>shift)].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) + v2 = single[uint8(br[stream2].value>>shift)].entry br[stream].bitsRead += uint8(v) br[stream].value <<= v & 63 - - v2 = single[uint8(br[stream2].value>>shift)].entry - buf[off+bufoff*stream2+3] = uint8(v2 >> 8) br[stream2].bitsRead += uint8(v2) br[stream2].value <<= v2 & 63 + buf[off+bufoff*stream+3] = uint8(v >> 8) + buf[off+bufoff*stream2+3] = uint8(v2 >> 8) } off += 4 diff --git a/huff0/decompress_test.go b/huff0/decompress_test.go index b95d8be0c1..a42358200b 100644 --- a/huff0/decompress_test.go +++ b/huff0/decompress_test.go @@ -2,6 +2,7 @@ package huff0 import ( "bytes" + "fmt" "testing" ) @@ -98,88 +99,94 @@ func TestDecompress1X(t *testing.T) { func TestDecompress4X(t *testing.T) { for _, test := range testfiles { t.Run(test.name, func(t *testing.T) { - var s = &Scratch{} - buf0, err := test.fn() - if err != nil { - t.Fatal(err) - } - if len(buf0) > BlockSizeMax { - buf0 = buf0[:BlockSizeMax] - } - b, re, err := Compress4X(buf0, s) - if err != test.err4X { - t.Errorf("want error %v (%T), got %v (%T)", test.err1X, test.err1X, err, err) - } - if err != nil { - t.Log(test.name, err.Error()) - return - } - if b == nil { - t.Error("got no output") - return - } - if len(s.OutTable) == 0 { - t.Error("got no table definition") - } - if re { - t.Error("claimed to have re-used.") - } - if len(s.OutData) == 0 { - t.Error("got no data output") - } + for _, tl := range []uint8{0, 5, 6, 7, 8, 9, 10, 11} { + t.Run(fmt.Sprintf("tablelog-%d", tl), func(t *testing.T) { + var s = &Scratch{} + s.TableLog = tl + buf0, err := test.fn() + if err != nil { + t.Fatal(err) + } + if len(buf0) > BlockSizeMax { + buf0 = buf0[:BlockSizeMax] + } + b, re, err := Compress4X(buf0, s) + if err != test.err4X { + t.Errorf("want error %v (%T), got %v (%T)", test.err1X, test.err1X, err, err) + } + if err != nil { + t.Log(test.name, err.Error()) + return + } + if b == nil { + t.Error("got no output") + return + } + if len(s.OutTable) == 0 { + t.Error("got no table definition") + } + if re { + t.Error("claimed to have re-used.") + } + if len(s.OutData) == 0 { + t.Error("got no data output") + } - wantRemain := len(s.OutData) - t.Logf("%s: %d -> %d bytes (%.2f:1) %t (table: %d bytes)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)), re, len(s.OutTable)) + wantRemain := len(s.OutData) + t.Logf("%s: %d -> %d bytes (%.2f:1) %t (table: %d bytes)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)), re, len(s.OutTable)) - s.Out = nil - var remain []byte - s, remain, err = ReadTable(b, s) - if err != nil { - t.Error(err) - return - } - var buf bytes.Buffer - if s.matches(s.prevTable, &buf); buf.Len() > 0 { - t.Error(buf.String()) - } - if len(remain) != wantRemain { - t.Fatalf("remain mismatch, want %d, got %d bytes", wantRemain, len(remain)) - } - t.Logf("remain: %d bytes, ok", len(remain)) - dc, err := s.Decompress4X(remain, len(buf0)) - if err != nil { - t.Error(err) - return - } - if len(buf0) != len(dc) { - t.Errorf(test.name+"decompressed, want size: %d, got %d", len(buf0), len(dc)) - if len(buf0) > len(dc) { - buf0 = buf0[:len(dc)] - } else { - dc = dc[:len(buf0)] - } - if !bytes.Equal(buf0, dc) { - if len(dc) > 1024 { - t.Log(string(dc[:1024])) - t.Errorf(test.name+"decompressed, got delta: \n(in)\t%02x !=\n(out)\t%02x\n", buf0[:1024], dc[:1024]) - } else { - t.Log(string(dc)) - t.Errorf(test.name+"decompressed, got delta: (in) %v != (out) %v\n", buf0, dc) + s.Out = nil + var remain []byte + s, remain, err = ReadTable(b, s) + if err != nil { + t.Error(err) + return } - } - return - } - if !bytes.Equal(buf0, dc) { - if len(buf0) > 1024 { - t.Log(string(dc[:1024])) - } else { - t.Log(string(dc)) - } - //t.Errorf(test.name+": decompressed, got delta: \n%s") - t.Errorf(test.name + ": decompressed, got delta") - } - if !t.Failed() { - t.Log("... roundtrip ok!") + var buf bytes.Buffer + if s.matches(s.prevTable, &buf); buf.Len() > 0 { + t.Error(buf.String()) + } + if len(remain) != wantRemain { + t.Fatalf("remain mismatch, want %d, got %d bytes", wantRemain, len(remain)) + } + t.Logf("remain: %d bytes, ok", len(remain)) + dc, err := s.Decompress4X(remain, len(buf0)) + if err != nil { + t.Error(err) + return + } + if len(buf0) != len(dc) { + t.Errorf(test.name+"decompressed, want size: %d, got %d", len(buf0), len(dc)) + if len(buf0) > len(dc) { + buf0 = buf0[:len(dc)] + } else { + dc = dc[:len(buf0)] + } + if !bytes.Equal(buf0, dc) { + if len(dc) > 1024 { + t.Log(string(dc[:1024])) + t.Errorf(test.name+"decompressed, got delta: \n(in)\t%02x !=\n(out)\t%02x\n", buf0[:1024], dc[:1024]) + } else { + t.Log(string(dc)) + t.Errorf(test.name+"decompressed, got delta: (in) %v != (out) %v\n", buf0, dc) + } + } + return + } + if !bytes.Equal(buf0, dc) { + if len(buf0) > 1024 { + t.Log(string(dc[:1024])) + } else { + t.Log(string(dc)) + } + //t.Errorf(test.name+": decompressed, got delta: \n%s") + t.Errorf(test.name + ": decompressed, got delta") + } + if !t.Failed() { + t.Log("... roundtrip ok!") + } + + }) } }) } @@ -479,6 +486,43 @@ func BenchmarkDecompress4XNoTable(b *testing.B) { } } +func BenchmarkDecompress4XNoTableTableLog8(b *testing.B) { + for _, tt := range testfiles[:1] { + test := tt + if test.err4X != nil { + continue + } + b.Run(test.name, func(b *testing.B) { + var s = &Scratch{} + s.Reuse = ReusePolicyNone + buf0, err := test.fn() + if err != nil { + b.Fatal(err) + } + if len(buf0) > BlockSizeMax { + buf0 = buf0[:BlockSizeMax] + } + s.TableLog = 8 + compressed, _, err := Compress4X(buf0, s) + if err != test.err1X { + b.Fatal("unexpected error:", err) + } + s.Out = nil + s, remain, _ := ReadTable(compressed, s) + s.Decompress4X(remain, len(buf0)) + b.ResetTimer() + b.ReportAllocs() + b.SetBytes(int64(len(buf0))) + for i := 0; i < b.N; i++ { + _, err = s.Decompress4X(remain, len(buf0)) + if err != nil { + b.Fatal(err) + } + } + }) + } +} + func BenchmarkDecompress4XTable(b *testing.B) { for _, tt := range testfiles { test := tt