-
Notifications
You must be signed in to change notification settings - Fork 314
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement huff0 entropy encoder (#89)
* Implement huff0 entropy encoder.
- Loading branch information
Showing
266 changed files
with
2,688 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ go: | |
- 1.7.x | ||
- 1.8.x | ||
- 1.9.x | ||
- 1.10.x | ||
- master | ||
|
||
install: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/huff0-fuzz.zip |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
// Copyright 2018 Klaus Post. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. | ||
|
||
package huff0 | ||
|
||
import ( | ||
"errors" | ||
"io" | ||
) | ||
|
||
// bitReader reads a bitstream in reverse. | ||
// The last set bit indicates the start of the stream and is used | ||
// for aligning the input. | ||
type bitReader struct { | ||
in []byte | ||
off uint // next byte to read is at in[off - 1] | ||
value uint64 | ||
bitsRead uint8 | ||
} | ||
|
||
// init initializes and resets the bit reader. | ||
func (b *bitReader) init(in []byte) error { | ||
if len(in) < 1 { | ||
return errors.New("corrupt stream: too short") | ||
} | ||
b.in = in | ||
b.off = uint(len(in)) | ||
// The highest bit of the last byte indicates where to start | ||
v := in[len(in)-1] | ||
if v == 0 { | ||
return errors.New("corrupt stream, did not find end of stream") | ||
} | ||
b.bitsRead = 64 | ||
b.value = 0 | ||
b.fill() | ||
b.fill() | ||
b.bitsRead += 8 - uint8(highBit32(uint32(v))) | ||
return nil | ||
} | ||
|
||
// getBits will return n bits. n can be 0. | ||
func (b *bitReader) getBits(n uint8) uint16 { | ||
if n == 0 || b.bitsRead >= 64 { | ||
return 0 | ||
} | ||
return b.getBitsFast(n) | ||
} | ||
|
||
// getBitsFast requires that at least one bit is requested every time. | ||
// There are no checks if the buffer is filled. | ||
func (b *bitReader) getBitsFast(n uint8) uint16 { | ||
const regMask = 64 - 1 | ||
v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) | ||
b.bitsRead += n | ||
return v | ||
} | ||
|
||
// peekBitsFast requires that at least one bit is requested every time. | ||
// There are no checks if the buffer is filled. | ||
func (b *bitReader) peekBitsFast(n uint8) uint16 { | ||
const regMask = 64 - 1 | ||
v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) | ||
return v | ||
} | ||
|
||
// fillFast() will make sure at least 32 bits are available. | ||
// There must be at least 4 bytes available. | ||
func (b *bitReader) fillFast() { | ||
if b.bitsRead < 32 { | ||
return | ||
} | ||
// Do single re-slice to avoid bounds checks. | ||
v := b.in[b.off-4 : b.off] | ||
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||
b.value = (b.value << 32) | uint64(low) | ||
b.bitsRead -= 32 | ||
b.off -= 4 | ||
} | ||
|
||
// fill() will make sure at least 32 bits are available. | ||
func (b *bitReader) fill() { | ||
if b.bitsRead < 32 { | ||
return | ||
} | ||
if b.off > 4 { | ||
v := b.in[b.off-4 : b.off] | ||
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) | ||
b.value = (b.value << 32) | uint64(low) | ||
b.bitsRead -= 32 | ||
b.off -= 4 | ||
return | ||
} | ||
for b.off > 0 { | ||
b.value = (b.value << 8) | uint64(b.in[b.off-1]) | ||
b.bitsRead -= 8 | ||
b.off-- | ||
} | ||
} | ||
|
||
// finished returns true if all bits have been read from the bit stream. | ||
func (b *bitReader) finished() bool { | ||
return b.off == 0 && b.bitsRead >= 64 | ||
} | ||
|
||
// close the bitstream and returns an error if out-of-buffer reads occurred. | ||
func (b *bitReader) close() error { | ||
// Release reference. | ||
b.in = nil | ||
if b.bitsRead > 64 { | ||
return io.ErrUnexpectedEOF | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
// Copyright 2018 Klaus Post. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. | ||
|
||
package huff0 | ||
|
||
import "fmt" | ||
|
||
// bitWriter will write bits. | ||
// First bit will be LSB of the first byte of output. | ||
type bitWriter struct { | ||
bitContainer uint64 | ||
nBits uint8 | ||
out []byte | ||
} | ||
|
||
// bitMask16 is bitmasks. Has extra to avoid bounds check. | ||
var bitMask16 = [32]uint16{ | ||
0, 1, 3, 7, 0xF, 0x1F, | ||
0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, | ||
0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF, | ||
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, | ||
0xFFFF, 0xFFFF} /* up to 16 bits */ | ||
|
||
// addBits16NC will add up to 16 bits. | ||
// It will not check if there is space for them, | ||
// so the caller must ensure that it has flushed recently. | ||
func (b *bitWriter) addBits16NC(value uint16, bits uint8) { | ||
b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63) | ||
b.nBits += bits | ||
} | ||
|
||
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. | ||
// It will not check if there is space for them, so the caller must ensure that it has flushed recently. | ||
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { | ||
b.bitContainer |= uint64(value) << (b.nBits & 63) | ||
b.nBits += bits | ||
} | ||
|
||
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. | ||
// It will not check if there is space for them, so the caller must ensure that it has flushed recently. | ||
func (b *bitWriter) encSymbol(ct cTable, symbol byte) { | ||
enc := ct[symbol] | ||
b.bitContainer |= uint64(enc.val) << (b.nBits & 63) | ||
b.nBits += enc.nBits | ||
} | ||
|
||
// addBits16ZeroNC will add up to 16 bits. | ||
// It will not check if there is space for them, | ||
// so the caller must ensure that it has flushed recently. | ||
// This is fastest if bits can be zero. | ||
func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) { | ||
if bits == 0 { | ||
return | ||
} | ||
value <<= (16 - bits) & 15 | ||
value >>= (16 - bits) & 15 | ||
b.bitContainer |= uint64(value) << (b.nBits & 63) | ||
b.nBits += bits | ||
} | ||
|
||
// flush will flush all pending full bytes. | ||
// There will be at least 56 bits available for writing when this has been called. | ||
// Using flush32 is faster, but leaves less space for writing. | ||
func (b *bitWriter) flush() { | ||
v := b.nBits >> 3 | ||
switch v { | ||
case 0: | ||
case 1: | ||
b.out = append(b.out, | ||
byte(b.bitContainer), | ||
) | ||
case 2: | ||
b.out = append(b.out, | ||
byte(b.bitContainer), | ||
byte(b.bitContainer>>8), | ||
) | ||
case 3: | ||
b.out = append(b.out, | ||
byte(b.bitContainer), | ||
byte(b.bitContainer>>8), | ||
byte(b.bitContainer>>16), | ||
) | ||
case 4: | ||
b.out = append(b.out, | ||
byte(b.bitContainer), | ||
byte(b.bitContainer>>8), | ||
byte(b.bitContainer>>16), | ||
byte(b.bitContainer>>24), | ||
) | ||
case 5: | ||
b.out = append(b.out, | ||
byte(b.bitContainer), | ||
byte(b.bitContainer>>8), | ||
byte(b.bitContainer>>16), | ||
byte(b.bitContainer>>24), | ||
byte(b.bitContainer>>32), | ||
) | ||
case 6: | ||
b.out = append(b.out, | ||
byte(b.bitContainer), | ||
byte(b.bitContainer>>8), | ||
byte(b.bitContainer>>16), | ||
byte(b.bitContainer>>24), | ||
byte(b.bitContainer>>32), | ||
byte(b.bitContainer>>40), | ||
) | ||
case 7: | ||
b.out = append(b.out, | ||
byte(b.bitContainer), | ||
byte(b.bitContainer>>8), | ||
byte(b.bitContainer>>16), | ||
byte(b.bitContainer>>24), | ||
byte(b.bitContainer>>32), | ||
byte(b.bitContainer>>40), | ||
byte(b.bitContainer>>48), | ||
) | ||
case 8: | ||
b.out = append(b.out, | ||
byte(b.bitContainer), | ||
byte(b.bitContainer>>8), | ||
byte(b.bitContainer>>16), | ||
byte(b.bitContainer>>24), | ||
byte(b.bitContainer>>32), | ||
byte(b.bitContainer>>40), | ||
byte(b.bitContainer>>48), | ||
byte(b.bitContainer>>56), | ||
) | ||
default: | ||
panic(fmt.Errorf("bits (%d) > 64", b.nBits)) | ||
} | ||
b.bitContainer >>= v << 3 | ||
b.nBits &= 7 | ||
} | ||
|
||
// flush32 will flush out, so there are at least 32 bits available for writing. | ||
func (b *bitWriter) flush32() { | ||
if b.nBits < 32 { | ||
return | ||
} | ||
b.out = append(b.out, | ||
byte(b.bitContainer), | ||
byte(b.bitContainer>>8), | ||
byte(b.bitContainer>>16), | ||
byte(b.bitContainer>>24)) | ||
b.nBits -= 32 | ||
b.bitContainer >>= 32 | ||
} | ||
|
||
// flushAlign will flush remaining full bytes and align to next byte boundary. | ||
func (b *bitWriter) flushAlign() { | ||
nbBytes := (b.nBits + 7) >> 3 | ||
for i := uint8(0); i < nbBytes; i++ { | ||
b.out = append(b.out, byte(b.bitContainer>>(i*8))) | ||
} | ||
b.nBits = 0 | ||
b.bitContainer = 0 | ||
} | ||
|
||
// close will write the alignment bit and write the final byte(s) | ||
// to the output. | ||
func (b *bitWriter) close() error { | ||
// End mark | ||
b.addBits16Clean(1, 1) | ||
// flush until next byte. | ||
b.flushAlign() | ||
return nil | ||
} | ||
|
||
// reset and continue writing by appending to out. | ||
func (b *bitWriter) reset(out []byte) { | ||
b.bitContainer = 0 | ||
b.nBits = 0 | ||
b.out = out | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
// Copyright 2018 Klaus Post. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. | ||
|
||
package huff0 | ||
|
||
// byteReader provides a byte reader that reads | ||
// little endian values from a byte stream. | ||
// The input stream is manually advanced. | ||
// The reader performs no bounds checks. | ||
type byteReader struct { | ||
b []byte | ||
off int | ||
} | ||
|
||
// init will initialize the reader and set the input. | ||
func (b *byteReader) init(in []byte) { | ||
b.b = in | ||
b.off = 0 | ||
} | ||
|
||
// advance the stream b n bytes. | ||
func (b *byteReader) advance(n uint) { | ||
b.off += int(n) | ||
} | ||
|
||
// Int32 returns a little endian int32 starting at current offset. | ||
func (b byteReader) Int32() int32 { | ||
v3 := int32(b.b[b.off+3]) | ||
v2 := int32(b.b[b.off+2]) | ||
v1 := int32(b.b[b.off+1]) | ||
v0 := int32(b.b[b.off]) | ||
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0 | ||
} | ||
|
||
// Uint32 returns a little endian uint32 starting at current offset. | ||
func (b byteReader) Uint32() uint32 { | ||
v3 := uint32(b.b[b.off+3]) | ||
v2 := uint32(b.b[b.off+2]) | ||
v1 := uint32(b.b[b.off+1]) | ||
v0 := uint32(b.b[b.off]) | ||
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0 | ||
} | ||
|
||
// unread returns the unread portion of the input. | ||
func (b byteReader) unread() []byte { | ||
return b.b[b.off:] | ||
} | ||
|
||
// remain will return the number of bytes remaining. | ||
func (b byteReader) remain() int { | ||
return len(b.b) - b.off | ||
} |
Oops, something went wrong.