diff --git a/huff0/decompress.go b/huff0/decompress.go index 9b7cc8e97b..f3db4a3b55 100644 --- a/huff0/decompress.go +++ b/huff0/decompress.go @@ -20,7 +20,7 @@ type dEntrySingle struct { // double-symbols decoding type dEntryDouble struct { - seq uint16 + seq [4]byte nBits uint8 len uint8 } @@ -914,7 +914,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { out := dst dstEvery := (dstSize + 3) / 4 - shift := (8 - d.actualTableLog) & 7 + shift := (56 + (8 - d.actualTableLog)) & 63 const tlSize = 1 << 8 single := d.dt.single[:tlSize] @@ -935,79 +935,91 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { // Interleave 2 decodes. const stream = 0 const stream2 = 1 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[br[stream].peekByteFast()>>shift].entry + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + buf[off+bufoff*stream+3] = uint8(v >> 8) } { const stream = 2 const stream2 = 3 - br[stream].fillFast() - br[stream2].fillFast() - - v := single[br[stream].peekByteFast()>>shift].entry + br1 := &br[stream] + br2 := &br[stream2] + br1.fillFast() + br2.fillFast() + + v := single[uint8(br1.value>>shift)].entry + v2 := single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - - v = single[br[stream].peekByteFast()>>shift].entry - buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - v2 = single[br[stream2].peekByteFast()>>shift].entry + v = single[uint8(br1.value>>shift)].entry + v2 = single[uint8(br2.value>>shift)].entry + br1.bitsRead += uint8(v) + br1.value <<= v & 63 + br2.bitsRead += uint8(v2) + br2.value <<= v2 & 63 buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) + buf[off+bufoff*stream+3] = uint8(v >> 8) } off += 4 @@ -1073,7 +1085,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) { } // Read value and increment offset. - v := single[br.peekByteFast()>>shift].entry + v := single[uint8(br.value>>shift)].entry nBits := uint8(v) br.advance(nBits) bitsLeft -= int(nBits) @@ -1121,7 +1133,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { out := dst dstEvery := (dstSize + 3) / 4 - const shift = 0 + const shift = 56 const tlSize = 1 << 8 const tlMask = tlSize - 1 single := d.dt.single[:tlSize] @@ -1145,37 +1157,41 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { br[stream].fillFast() br[stream2].fillFast() - v := single[br[stream].peekByteFast()>>shift].entry + v := single[uint8(br[stream].value>>shift)].entry + v2 := single[uint8(br[stream2].value>>shift)].entry + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) } { @@ -1184,37 +1200,41 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { br[stream].fillFast() br[stream2].fillFast() - v := single[br[stream].peekByteFast()>>shift].entry + v := single[uint8(br[stream].value>>shift)].entry + v2 := single[uint8(br[stream2].value>>shift)].entry + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 buf[off+bufoff*stream] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 := single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 buf[off+bufoff*stream+1] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+1] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 buf[off+bufoff*stream+2] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+2] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) - v = single[br[stream].peekByteFast()>>shift].entry + v = single[uint8(br[stream].value>>shift)].entry + v2 = single[uint8(br[stream2].value>>shift)].entry + br[stream].bitsRead += uint8(v) + br[stream].value <<= v & 63 + br[stream2].bitsRead += uint8(v2) + br[stream2].value <<= v2 & 63 buf[off+bufoff*stream+3] = uint8(v >> 8) - br[stream].advance(uint8(v)) - - v2 = single[br[stream2].peekByteFast()>>shift].entry buf[off+bufoff*stream2+3] = uint8(v2 >> 8) - br[stream2].advance(uint8(v2)) } off += 4 @@ -1280,7 +1300,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) { } // Read value and increment offset. - v := single[br.peekByteFast()>>shift].entry + v := single[br.peekByteFast()].entry nBits := uint8(v) br.advance(nBits) bitsLeft -= int(nBits) diff --git a/huff0/decompress_test.go b/huff0/decompress_test.go index b95d8be0c1..a42358200b 100644 --- a/huff0/decompress_test.go +++ b/huff0/decompress_test.go @@ -2,6 +2,7 @@ package huff0 import ( "bytes" + "fmt" "testing" ) @@ -98,88 +99,94 @@ func TestDecompress1X(t *testing.T) { func TestDecompress4X(t *testing.T) { for _, test := range testfiles { t.Run(test.name, func(t *testing.T) { - var s = &Scratch{} - buf0, err := test.fn() - if err != nil { - t.Fatal(err) - } - if len(buf0) > BlockSizeMax { - buf0 = buf0[:BlockSizeMax] - } - b, re, err := Compress4X(buf0, s) - if err != test.err4X { - t.Errorf("want error %v (%T), got %v (%T)", test.err1X, test.err1X, err, err) - } - if err != nil { - t.Log(test.name, err.Error()) - return - } - if b == nil { - t.Error("got no output") - return - } - if len(s.OutTable) == 0 { - t.Error("got no table definition") - } - if re { - t.Error("claimed to have re-used.") - } - if len(s.OutData) == 0 { - t.Error("got no data output") - } + for _, tl := range []uint8{0, 5, 6, 7, 8, 9, 10, 11} { + t.Run(fmt.Sprintf("tablelog-%d", tl), func(t *testing.T) { + var s = &Scratch{} + s.TableLog = tl + buf0, err := test.fn() + if err != nil { + t.Fatal(err) + } + if len(buf0) > BlockSizeMax { + buf0 = buf0[:BlockSizeMax] + } + b, re, err := Compress4X(buf0, s) + if err != test.err4X { + t.Errorf("want error %v (%T), got %v (%T)", test.err1X, test.err1X, err, err) + } + if err != nil { + t.Log(test.name, err.Error()) + return + } + if b == nil { + t.Error("got no output") + return + } + if len(s.OutTable) == 0 { + t.Error("got no table definition") + } + if re { + t.Error("claimed to have re-used.") + } + if len(s.OutData) == 0 { + t.Error("got no data output") + } - wantRemain := len(s.OutData) - t.Logf("%s: %d -> %d bytes (%.2f:1) %t (table: %d bytes)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)), re, len(s.OutTable)) + wantRemain := len(s.OutData) + t.Logf("%s: %d -> %d bytes (%.2f:1) %t (table: %d bytes)", test.name, len(buf0), len(b), float64(len(buf0))/float64(len(b)), re, len(s.OutTable)) - s.Out = nil - var remain []byte - s, remain, err = ReadTable(b, s) - if err != nil { - t.Error(err) - return - } - var buf bytes.Buffer - if s.matches(s.prevTable, &buf); buf.Len() > 0 { - t.Error(buf.String()) - } - if len(remain) != wantRemain { - t.Fatalf("remain mismatch, want %d, got %d bytes", wantRemain, len(remain)) - } - t.Logf("remain: %d bytes, ok", len(remain)) - dc, err := s.Decompress4X(remain, len(buf0)) - if err != nil { - t.Error(err) - return - } - if len(buf0) != len(dc) { - t.Errorf(test.name+"decompressed, want size: %d, got %d", len(buf0), len(dc)) - if len(buf0) > len(dc) { - buf0 = buf0[:len(dc)] - } else { - dc = dc[:len(buf0)] - } - if !bytes.Equal(buf0, dc) { - if len(dc) > 1024 { - t.Log(string(dc[:1024])) - t.Errorf(test.name+"decompressed, got delta: \n(in)\t%02x !=\n(out)\t%02x\n", buf0[:1024], dc[:1024]) - } else { - t.Log(string(dc)) - t.Errorf(test.name+"decompressed, got delta: (in) %v != (out) %v\n", buf0, dc) + s.Out = nil + var remain []byte + s, remain, err = ReadTable(b, s) + if err != nil { + t.Error(err) + return } - } - return - } - if !bytes.Equal(buf0, dc) { - if len(buf0) > 1024 { - t.Log(string(dc[:1024])) - } else { - t.Log(string(dc)) - } - //t.Errorf(test.name+": decompressed, got delta: \n%s") - t.Errorf(test.name + ": decompressed, got delta") - } - if !t.Failed() { - t.Log("... roundtrip ok!") + var buf bytes.Buffer + if s.matches(s.prevTable, &buf); buf.Len() > 0 { + t.Error(buf.String()) + } + if len(remain) != wantRemain { + t.Fatalf("remain mismatch, want %d, got %d bytes", wantRemain, len(remain)) + } + t.Logf("remain: %d bytes, ok", len(remain)) + dc, err := s.Decompress4X(remain, len(buf0)) + if err != nil { + t.Error(err) + return + } + if len(buf0) != len(dc) { + t.Errorf(test.name+"decompressed, want size: %d, got %d", len(buf0), len(dc)) + if len(buf0) > len(dc) { + buf0 = buf0[:len(dc)] + } else { + dc = dc[:len(buf0)] + } + if !bytes.Equal(buf0, dc) { + if len(dc) > 1024 { + t.Log(string(dc[:1024])) + t.Errorf(test.name+"decompressed, got delta: \n(in)\t%02x !=\n(out)\t%02x\n", buf0[:1024], dc[:1024]) + } else { + t.Log(string(dc)) + t.Errorf(test.name+"decompressed, got delta: (in) %v != (out) %v\n", buf0, dc) + } + } + return + } + if !bytes.Equal(buf0, dc) { + if len(buf0) > 1024 { + t.Log(string(dc[:1024])) + } else { + t.Log(string(dc)) + } + //t.Errorf(test.name+": decompressed, got delta: \n%s") + t.Errorf(test.name + ": decompressed, got delta") + } + if !t.Failed() { + t.Log("... roundtrip ok!") + } + + }) } }) } @@ -479,6 +486,43 @@ func BenchmarkDecompress4XNoTable(b *testing.B) { } } +func BenchmarkDecompress4XNoTableTableLog8(b *testing.B) { + for _, tt := range testfiles[:1] { + test := tt + if test.err4X != nil { + continue + } + b.Run(test.name, func(b *testing.B) { + var s = &Scratch{} + s.Reuse = ReusePolicyNone + buf0, err := test.fn() + if err != nil { + b.Fatal(err) + } + if len(buf0) > BlockSizeMax { + buf0 = buf0[:BlockSizeMax] + } + s.TableLog = 8 + compressed, _, err := Compress4X(buf0, s) + if err != test.err1X { + b.Fatal("unexpected error:", err) + } + s.Out = nil + s, remain, _ := ReadTable(compressed, s) + s.Decompress4X(remain, len(buf0)) + b.ResetTimer() + b.ReportAllocs() + b.SetBytes(int64(len(buf0))) + for i := 0; i < b.N; i++ { + _, err = s.Decompress4X(remain, len(buf0)) + if err != nil { + b.Fatal(err) + } + } + }) + } +} + func BenchmarkDecompress4XTable(b *testing.B) { for _, tt := range testfiles { test := tt