diff --git a/snappy/xerial/LICENSE b/snappy/xerial/LICENSE new file mode 100644 index 0000000000..a97223897a --- /dev/null +++ b/snappy/xerial/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2016 Evan Huus +Copyright (c) 2023 Klaus Post + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/snappy/xerial/README.md b/snappy/xerial/README.md new file mode 100644 index 0000000000..08d4332897 --- /dev/null +++ b/snappy/xerial/README.md @@ -0,0 +1,50 @@ +# go-xerial-snappy + +Xerial-compatible Snappy framing support for golang. + +Packages using Xerial for snappy encoding use a framing format incompatible with +basically everything else in existence. + +Apps that use this format include Apache Kafka (see +https://github.com/dpkp/kafka-python/issues/126#issuecomment-35478921 for +details). + +# Fork + +Forked from [github.com/eapache/go-xerial-snappy](https://github.com/eapache/go-xerial-snappy). + +Changes: + +* Uses [S2](https://github.com/klauspost/compress/tree/master/s2#snappy-compatibility) for better/faster compression and decompression. +* Fixes 0-length roundtrips. +* Adds `DecodeCapped`, which allows decompression with capped output size. +* `DecodeInto` will decode directly into destination if there is space enough. +* `Encode` will now encode directly into 'dst' if it has space enough. +* Fixes short snappy buffers returning `ErrMalformed`. +* Renames `EncodeStream` to `Encode`. +* Adds `EncodeBetter` for better than default compression at ~half the speed. + + +Comparison (before/after): + +``` +BenchmarkSnappyStreamEncode-32 959010 1170 ns/op 875.15 MB/s 1280 B/op 1 allocs/op +BenchmarkSnappyStreamEncode-32 1000000 1107 ns/op 925.04 MB/s 0 B/op 0 allocs/op +--> Output size: 913 -> 856 bytes + +BenchmarkSnappyStreamEncodeBetter-32 477739 2506 ns/op 408.62 MB/s 0 B/op 0 allocs/op +--> Output size: 835 bytes + +BenchmarkSnappyStreamEncodeMassive-32 100 10596963 ns/op 966.31 MB/s 40977 B/op 1 allocs/op +BenchmarkSnappyStreamEncodeMassive-32 100 10220236 ns/op 1001.93 MB/s 0 B/op 0 allocs/op +--> Output size: 2365547 -> 2256991 bytes + +BenchmarkSnappyStreamEncodeBetterMassive-32 69 16983314 ns/op 602.94 MB/s 0 B/op 0 allocs/op +--> Output size: 2011997 bytes + +BenchmarkSnappyStreamDecodeInto-32 1887378 639.5 ns/op 1673.19 MB/s 1088 B/op 3 allocs/op +BenchmarkSnappyStreamDecodeInto-32 2707915 436.2 ns/op 2452.99 MB/s 0 B/op 0 allocs/op + +BenchmarkSnappyStreamDecodeIntoMassive-32 267 4559594 ns/op 2245.81 MB/s 71120 B/op 1 allocs/op +BenchmarkSnappyStreamDecodeIntoMassive-32 282 4285844 ns/op 2389.26 MB/s 0 B/op 0 allocs/op +``` \ No newline at end of file diff --git a/snappy/xerial/fuzz_test.go b/snappy/xerial/fuzz_test.go new file mode 100644 index 0000000000..b03e5bb455 --- /dev/null +++ b/snappy/xerial/fuzz_test.go @@ -0,0 +1,64 @@ +package xerial + +import ( + "bytes" + "testing" + + "github.com/klauspost/compress/internal/fuzz" + "github.com/klauspost/compress/s2" +) + +func FuzzDecode(f *testing.F) { + fuzz.AddFromZip(f, "testdata/FuzzDecoder.zip", fuzz.TypeGoFuzz, false) + const limit = 1 << 20 + dst := make([]byte, 0, limit) + f.Fuzz(func(t *testing.T, data []byte) { + got, _ := DecodeCapped(dst[:0], data) + if len(got) > cap(dst) { + t.Fatalf("cap exceeded: %d > %d", len(got), cap(dst)) + } + }) +} + +func FuzzEncode(f *testing.F) { + fuzz.AddFromZip(f, "../../s2/testdata/enc_regressions.zip", fuzz.TypeRaw, false) + fuzz.AddFromZip(f, "../../s2/testdata/fuzz/block-corpus-raw.zip", fuzz.TypeRaw, testing.Short()) + fuzz.AddFromZip(f, "../../s2/testdata/fuzz/block-corpus-enc.zip", fuzz.TypeGoFuzz, testing.Short()) + + f.Fuzz(func(t *testing.T, data []byte) { + t.Run("standard", func(t *testing.T) { + encoded := Encode(make([]byte, 0, len(data)/2), data) + decoded, err := Decode(encoded) + if err != nil { + t.Errorf("input: %+v, encoded: %+v", data, encoded) + t.Fatal(err) + } + if !bytes.Equal(decoded, data) { + t.Fatal("mismatch") + } + + }) + t.Run("better", func(t *testing.T) { + encoded := EncodeBetter(make([]byte, 0, len(data)/2), data) + decoded, err := Decode(encoded) + if err != nil { + t.Errorf("input: %+v, encoded: %+v", data, encoded) + t.Fatal(err) + } + if !bytes.Equal(decoded, data) { + t.Fatal("mismatch") + } + }) + t.Run("snappy", func(t *testing.T) { + encoded := s2.EncodeSnappy(make([]byte, 0, len(data)/2), data) + decoded, err := Decode(encoded) + if err != nil { + t.Errorf("input: %+v, encoded: %+v", data, encoded) + t.Fatal(err) + } + if !bytes.Equal(decoded, data) { + t.Fatal("mismatch") + } + }) + }) +} diff --git a/snappy/xerial/testdata/FuzzDecoder.zip b/snappy/xerial/testdata/FuzzDecoder.zip new file mode 100644 index 0000000000..e5eeec822a Binary files /dev/null and b/snappy/xerial/testdata/FuzzDecoder.zip differ diff --git a/snappy/xerial/xerial.go b/snappy/xerial/xerial.go new file mode 100644 index 0000000000..a4a8fb638d --- /dev/null +++ b/snappy/xerial/xerial.go @@ -0,0 +1,262 @@ +package xerial + +import ( + "bytes" + "encoding/binary" + "errors" + + "github.com/klauspost/compress/s2" +) + +var ( + xerialHeader = []byte{130, 83, 78, 65, 80, 80, 89, 0} + + // This is xerial version 1 and minimally compatible with version 1 + xerialVersionInfo = []byte{0, 0, 0, 1, 0, 0, 0, 1} + + // ErrMalformed is returned by the decoder when the xerial framing + // is malformed + ErrMalformed = errors.New("malformed xerial framing") +) + +// Encode *appends* to the specified 'dst' the compressed +// 'src' in xerial framing format. If 'dst' does not have enough +// capacity, then a new slice will be allocated. If 'dst' has +// non-zero length, then if *must* have been built using this function. +func Encode(dst, src []byte) []byte { + if len(dst) == 0 { + dst = append(dst, xerialHeader...) + dst = append(dst, xerialVersionInfo...) + } + + // Snappy encode in blocks of maximum 32KB + var ( + max = len(src) + blockSize = 32 * 1024 + pos = 0 + chunk []byte + ) + + for pos < max { + newPos := min(pos+blockSize, max) + // Find maximum length we need + needLen := s2.MaxEncodedLen(newPos-pos) + 4 + if cap(dst)-len(dst) >= needLen { + // Encode directly into dst + dstStart := len(dst) + 4 // Start offset in dst + dstSizePos := dst[len(dst):dstStart] // Reserve space for compressed size + dstEnd := len(dst) + needLen // End offset in dst + // Compress into dst and get actual size. + actual := s2.EncodeSnappy(dst[dstStart:dstEnd], src[pos:newPos]) + // Update dst size + dst = dst[:dstStart+len(actual)] + // Store compressed size + binary.BigEndian.PutUint32(dstSizePos, uint32(len(actual))) + } else { + chunk = s2.EncodeSnappy(chunk[:cap(chunk)], src[pos:newPos]) + origLen := len(dst) + // First encode the compressed size (big-endian) + // Put* panics if the buffer is too small, so pad 4 bytes first + dst = append(dst, dst[0:4]...) + binary.BigEndian.PutUint32(dst[origLen:], uint32(len(chunk))) + // And now the compressed data + dst = append(dst, chunk...) + } + pos = newPos + } + return dst +} + +// EncodeBetter *appends* to the specified 'dst' the compressed +// 'src' in xerial framing format. If 'dst' does not have enough +// capacity, then a new slice will be allocated. If 'dst' has +// non-zero length, then if *must* have been built using this function. +func EncodeBetter(dst, src []byte) []byte { + if len(dst) == 0 { + dst = append(dst, xerialHeader...) + dst = append(dst, xerialVersionInfo...) + } + + // Snappy encode in blocks of maximum 32KB + var ( + max = len(src) + blockSize = 32 * 1024 + pos = 0 + chunk []byte + ) + + for pos < max { + newPos := min(pos+blockSize, max) + // Find maximum length we need + needLen := s2.MaxEncodedLen(newPos-pos) + 4 + if cap(dst)-len(dst) >= needLen { + // Encode directly into dst + dstStart := len(dst) + 4 // Start offset in dst + dstSizePos := dst[len(dst):dstStart] // Reserve space for compressed size + dstEnd := len(dst) + needLen // End offset in dst + // Compress into dst and get actual size. + actual := s2.EncodeSnappyBetter(dst[dstStart:dstEnd], src[pos:newPos]) + // Update dst size + dst = dst[:dstStart+len(actual)] + // Store compressed size + binary.BigEndian.PutUint32(dstSizePos, uint32(len(actual))) + } else { + chunk = s2.EncodeSnappyBetter(chunk[:cap(chunk)], src[pos:newPos]) + origLen := len(dst) + // First encode the compressed size (big-endian) + // Put* panics if the buffer is too small, so pad 4 bytes first + dst = append(dst, dst[0:4]...) + binary.BigEndian.PutUint32(dst[origLen:], uint32(len(chunk))) + // And now the compressed data + dst = append(dst, chunk...) + } + pos = newPos + } + return dst +} + +func min(x, y int) int { + if x < y { + return x + } + return y +} + +const ( + sizeOffset = 16 + sizeBytes = 4 +) + +// Decode decodes snappy data whether it is traditional unframed +// or includes the xerial framing format. +func Decode(src []byte) ([]byte, error) { + return DecodeInto(nil, src) +} + +// DecodeInto decodes snappy data whether it is traditional unframed +// or includes the xerial framing format into the specified `dst`. +// It is assumed that the entirety of `dst` including all capacity is available +// for use by this function. If `dst` is nil *or* insufficiently large to hold +// the decoded `src`, new space will be allocated. +// To never allocate bigger destination, use DecodeCapped. +func DecodeInto(dst, src []byte) ([]byte, error) { + var max = len(src) + + if max < len(xerialHeader) || !bytes.Equal(src[:8], xerialHeader) { + dst, err := s2.Decode(dst[:cap(dst)], src) + if err != nil { + return dst, ErrMalformed + } + return dst, nil + } + if max == sizeOffset { + return []byte{}, nil + } + if max < sizeOffset+sizeBytes { + return nil, ErrMalformed + } + if len(dst) > 0 { + dst = dst[:0] + } + var ( + pos = sizeOffset + chunk []byte + ) + + for pos+sizeBytes <= max { + size := int(binary.BigEndian.Uint32(src[pos : pos+sizeBytes])) + pos += sizeBytes + + nextPos := pos + size + // On architectures where int is 32-bytes wide size + pos could + // overflow so we need to check the low bound as well as the + // high + if nextPos < pos || nextPos > max { + return nil, ErrMalformed + } + nextLen, err := s2.DecodedLen(src[pos:nextPos]) + if err != nil { + return nil, err + } + if cap(dst)-len(dst) >= nextLen { + // Decode directly into dst + dstStart := len(dst) + dstEnd := dstStart + nextLen + _, err = s2.Decode(dst[dstStart:dstEnd], src[pos:nextPos]) + if err != nil { + return nil, err + } + dst = dst[:dstEnd] + } else { + chunk, err = s2.Decode(chunk[:cap(chunk)], src[pos:nextPos]) + if err != nil { + return nil, err + } + dst = append(dst, chunk...) + } + pos = nextPos + } + return dst, nil +} + +var ErrDstTooSmall = errors.New("destination buffer too small") + +// DecodeCapped decodes snappy data whether it is traditional unframed +// or includes the xerial framing format into the specified `dst`. +// It is assumed that the entirety of `dst` including all capacity is available +// for use by this function. If `dst` is nil *or* insufficiently large to hold +// the decoded `src`, ErrDstTooSmall is returned. +func DecodeCapped(dst, src []byte) ([]byte, error) { + var max = len(src) + if dst == nil { + return nil, ErrDstTooSmall + } + if max < len(xerialHeader) || !bytes.Equal(src[:8], xerialHeader) { + l, err := s2.DecodedLen(src) + if err != nil { + return nil, ErrMalformed + } + if l > cap(dst) { + return nil, ErrDstTooSmall + } + return s2.Decode(dst[:cap(dst)], src) + } + dst = dst[:0] + if max == sizeOffset { + return dst, nil + } + if max < sizeOffset+sizeBytes { + return nil, ErrMalformed + } + pos := sizeOffset + + for pos+sizeBytes <= max { + size := int(binary.BigEndian.Uint32(src[pos : pos+sizeBytes])) + pos += sizeBytes + + nextPos := pos + size + // On architectures where int is 32-bytes wide size + pos could + // overflow so we need to check the low bound as well as the + // high + if nextPos < pos || nextPos > max { + return nil, ErrMalformed + } + nextLen, err := s2.DecodedLen(src[pos:nextPos]) + if err != nil { + return nil, err + } + if cap(dst)-len(dst) < nextLen { + return nil, ErrDstTooSmall + } + // Decode directly into dst + dstStart := len(dst) + dstEnd := dstStart + nextLen + _, err = s2.Decode(dst[dstStart:dstEnd], src[pos:nextPos]) + if err != nil { + return nil, err + } + dst = dst[:dstEnd] + pos = nextPos + } + return dst, nil +} diff --git a/snappy/xerial/xerial_test.go b/snappy/xerial/xerial_test.go new file mode 100644 index 0000000000..acfca868a4 --- /dev/null +++ b/snappy/xerial/xerial_test.go @@ -0,0 +1,276 @@ +package xerial + +import ( + "bytes" + "math/rand" + "testing" + + "github.com/klauspost/compress/s2" +) + +const largeString = `Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur? At vero eos et accusamus et iusto odio dignissimos ducimus qui blanditiis praesentium voluptatum deleniti atque corrupti quos dolores et quas molestias except` + +var snappyStreamTestCases = map[string][]byte{ + "PLAINDATA": {130, 83, 78, 65, 80, 80, 89, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 11, 9, 32, 80, 76, 65, 73, 78, 68, 65, 84, 65}, + `{"a":"UtaitILHMDAAAAfU","b":"日本"}`: {130, 83, 78, 65, 80, 80, 89, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 39, 37, 144, 123, 34, 97, 34, 58, 34, 85, 116, 97, 105, 116, 73, 76, 72, 77, 68, 65, 65, 65, 65, 102, 85, 34, 44, 34, 98, 34, 58, 34, 230, 151, 165, 230, 156, 172, 34, 125}, + largeString: {130, 83, 78, 65, 80, 80, 89, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 3, 89, 128, 8, 240, 90, 83, 101, 100, 32, 117, 116, 32, 112, 101, 114, 115, 112, 105, 99, 105, 97, 116, 105, 115, 32, 117, 110, 100, 101, 32, 111, 109, 110, 105, 115, 32, 105, 115, 116, 101, 32, 110, 97, 116, 117, 115, 32, 101, 114, 114, 111, 114, 32, 115, 105, 116, 32, 118, 111, 108, 117, 112, 116, 97, 116, 101, 109, 32, 97, 99, 99, 117, 115, 97, 110, 116, 105, 117, 109, 32, 100, 111, 108, 111, 114, 101, 109, 113, 117, 101, 32, 108, 97, 117, 100, 97, 5, 22, 240, 60, 44, 32, 116, 111, 116, 97, 109, 32, 114, 101, 109, 32, 97, 112, 101, 114, 105, 97, 109, 44, 32, 101, 97, 113, 117, 101, 32, 105, 112, 115, 97, 32, 113, 117, 97, 101, 32, 97, 98, 32, 105, 108, 108, 111, 32, 105, 110, 118, 101, 110, 116, 111, 114, 101, 32, 118, 101, 114, 105, 116, 97, 1, 141, 4, 101, 116, 1, 36, 88, 115, 105, 32, 97, 114, 99, 104, 105, 116, 101, 99, 116, 111, 32, 98, 101, 97, 116, 97, 101, 32, 118, 105, 1, 6, 120, 100, 105, 99, 116, 97, 32, 115, 117, 110, 116, 32, 101, 120, 112, 108, 105, 99, 97, 98, 111, 46, 32, 78, 101, 109, 111, 32, 101, 110, 105, 109, 5, 103, 0, 109, 46, 180, 0, 12, 113, 117, 105, 97, 17, 16, 0, 115, 5, 209, 72, 97, 115, 112, 101, 114, 110, 97, 116, 117, 114, 32, 97, 117, 116, 32, 111, 100, 105, 116, 5, 9, 36, 102, 117, 103, 105, 116, 44, 32, 115, 101, 100, 9, 53, 32, 99, 111, 110, 115, 101, 113, 117, 117, 110, 1, 42, 20, 109, 97, 103, 110, 105, 32, 9, 245, 16, 115, 32, 101, 111, 115, 1, 36, 28, 32, 114, 97, 116, 105, 111, 110, 101, 17, 96, 33, 36, 1, 51, 36, 105, 32, 110, 101, 115, 99, 105, 117, 110, 116, 1, 155, 1, 254, 16, 112, 111, 114, 114, 111, 1, 51, 36, 115, 113, 117, 97, 109, 32, 101, 115, 116, 44, 1, 14, 13, 81, 5, 183, 4, 117, 109, 1, 18, 0, 97, 9, 19, 4, 32, 115, 1, 149, 12, 109, 101, 116, 44, 9, 135, 76, 99, 116, 101, 116, 117, 114, 44, 32, 97, 100, 105, 112, 105, 115, 99, 105, 32, 118, 101, 108, 50, 173, 0, 24, 110, 111, 110, 32, 110, 117, 109, 9, 94, 84, 105, 117, 115, 32, 109, 111, 100, 105, 32, 116, 101, 109, 112, 111, 114, 97, 32, 105, 110, 99, 105, 100, 33, 52, 20, 117, 116, 32, 108, 97, 98, 33, 116, 4, 101, 116, 9, 106, 0, 101, 5, 219, 20, 97, 109, 32, 97, 108, 105, 5, 62, 33, 164, 8, 114, 97, 116, 29, 212, 12, 46, 32, 85, 116, 41, 94, 52, 97, 100, 32, 109, 105, 110, 105, 109, 97, 32, 118, 101, 110, 105, 33, 221, 72, 113, 117, 105, 115, 32, 110, 111, 115, 116, 114, 117, 109, 32, 101, 120, 101, 114, 99, 105, 33, 202, 104, 111, 110, 101, 109, 32, 117, 108, 108, 97, 109, 32, 99, 111, 114, 112, 111, 114, 105, 115, 32, 115, 117, 115, 99, 105, 112, 105, 13, 130, 8, 105, 111, 115, 1, 64, 12, 110, 105, 115, 105, 1, 150, 5, 126, 44, 105, 100, 32, 101, 120, 32, 101, 97, 32, 99, 111, 109, 5, 192, 0, 99, 41, 131, 33, 172, 8, 63, 32, 81, 1, 107, 4, 97, 117, 33, 101, 96, 118, 101, 108, 32, 101, 117, 109, 32, 105, 117, 114, 101, 32, 114, 101, 112, 114, 101, 104, 101, 110, 100, 101, 114, 105, 65, 63, 12, 105, 32, 105, 110, 1, 69, 16, 118, 111, 108, 117, 112, 65, 185, 1, 47, 24, 105, 116, 32, 101, 115, 115, 101, 1, 222, 64, 109, 32, 110, 105, 104, 105, 108, 32, 109, 111, 108, 101, 115, 116, 105, 97, 101, 46, 103, 0, 0, 44, 1, 45, 16, 32, 105, 108, 108, 117, 37, 143, 45, 36, 0, 109, 5, 110, 65, 33, 20, 97, 116, 32, 113, 117, 111, 17, 92, 44, 115, 32, 110, 117, 108, 108, 97, 32, 112, 97, 114, 105, 9, 165, 24, 65, 116, 32, 118, 101, 114, 111, 69, 34, 44, 101, 116, 32, 97, 99, 99, 117, 115, 97, 109, 117, 115, 1, 13, 104, 105, 117, 115, 116, 111, 32, 111, 100, 105, 111, 32, 100, 105, 103, 110, 105, 115, 115, 105, 109, 111, 115, 32, 100, 117, 99, 105, 1, 34, 80, 113, 117, 105, 32, 98, 108, 97, 110, 100, 105, 116, 105, 105, 115, 32, 112, 114, 97, 101, 115, 101, 101, 87, 17, 111, 56, 116, 117, 109, 32, 100, 101, 108, 101, 110, 105, 116, 105, 32, 97, 116, 65, 89, 28, 99, 111, 114, 114, 117, 112, 116, 105, 1, 150, 0, 115, 13, 174, 5, 109, 8, 113, 117, 97, 65, 5, 52, 108, 101, 115, 116, 105, 97, 115, 32, 101, 120, 99, 101, 112, 116, 0, 0, 0, 1, 0}, +} + +func makeMassive(input string, numCopies int) string { + outBuff := make([]byte, len(input)*numCopies) + + for i := 0; i < numCopies; i++ { + copy(outBuff[len(outBuff):], input) + } + + return string(outBuff) +} + +func TestSnappyEncodeStream(t *testing.T) { + for src := range snappyStreamTestCases { + dst := Encode(nil, []byte(src)) + + // Block size can change the bytes generated, so let's just decode and make sure in matches out + dec, err := Decode(dst) + if err != nil { + t.Error(err) + } + if src != string(dec) { + t.Errorf("Expected decode to match encode orig = %s, decoded = %s", src, string(dec)) + } + } +} + +func TestSnappyLargeStringEncodeStream(t *testing.T) { + massiveString := makeMassive(largeString, 10000) + dst := Encode(nil, []byte(massiveString)) + dec, err := Decode(dst) + if err != nil { + t.Error(err) + } + if massiveString != string(dec) { + t.Errorf("Decoded string didn't match original input (not printing due to size)") + } +} + +func TestSnappyDecodeStreams(t *testing.T) { + for exp, src := range snappyStreamTestCases { + dst, err := Decode(src) + if err != nil { + t.Error("Encoding error: ", err) + } else if !bytes.Equal(dst, []byte(exp)) { + t.Errorf("Expected %s to be generated from [%d]byte, but was %s", exp, len(src), string(dst)) + } + } +} + +func TestSnappyDecodeMalformedTruncatedHeader(t *testing.T) { + // Truncated headers should not cause a panic. + for i := 0; i < len(xerialHeader); i++ { + buf := make([]byte, i) + copy(buf, xerialHeader[:i]) + if _, err := Decode(buf); err != ErrMalformed { + t.Errorf("expected ErrMalformed got %v", err) + } + } +} + +func TestSnappyDecodeMalformedTruncatedSize(t *testing.T) { + // Inputs with valid Xerial header but truncated "size" field + sizes := []int{sizeOffset + 1, sizeOffset + 2, sizeOffset + 3} + for _, size := range sizes { + buf := make([]byte, size) + copy(buf, xerialHeader) + if _, err := Decode(buf); err != ErrMalformed { + t.Errorf("expected ErrMalformed got %v", err) + } + } +} + +func TestSnappyDecodeMalformedBNoData(t *testing.T) { + // No data after the size field + buf := make([]byte, 20) + copy(buf, xerialHeader) + // indicate that there's one byte of data to be read + buf[len(buf)-1] = 1 + if _, err := Decode(buf); err != ErrMalformed { + t.Errorf("expected ErrMalformed got %v", err) + } +} + +func TestSnappyMasterDecodeFailed(t *testing.T) { + buf := make([]byte, 21) + copy(buf, xerialHeader) + // indicate that there's one byte of data to be read + buf[len(buf)-2] = 1 + // A payload which will not decode + buf[len(buf)-1] = 1 + if _, err := Decode(buf); err == ErrMalformed || err == nil { + t.Errorf("unexpected err: %v", err) + } +} + +func BenchmarkSnappyStreamDecode(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + for n := 0; n < b.N; n++ { + bytes := 0 + for _, test := range snappyStreamTestCases { + dst, err := Decode(test) + if err != nil { + b.Error("Decoding error: ", err) + } + bytes += len(dst) + } + b.SetBytes(int64(bytes)) + } +} + +func BenchmarkSnappyStreamDecodeInto(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + var ( + dst = make([]byte, 1024, 1024) + err error + ) + + for n := 0; n < b.N; n++ { + bytes := 0 + for _, test := range snappyStreamTestCases { + dst, err = DecodeInto(dst, test) + if err != nil { + b.Error("Decoding error: ", err) + } + bytes += len(dst) + } + b.SetBytes(int64(bytes)) + } +} +func BenchmarkSnappyStreamDecodeMassive(b *testing.B) { + massiveString := makeMassive(largeString, 10000) + enc := Encode(nil, []byte(massiveString)) + + b.ReportAllocs() + b.ResetTimer() + b.SetBytes(int64(len(massiveString))) + + for n := 0; n < b.N; n++ { + _, err := Decode(enc) + if err != nil { + b.Error("Decoding error: ", err) + } + } +} + +func BenchmarkSnappyStreamDecodeIntoMassive(b *testing.B) { + massiveString := makeMassive(largeString, 10000) + enc := Encode(nil, []byte(massiveString)) + + var ( + dst = make([]byte, 1024, len(massiveString)) + err error + ) + + b.ReportAllocs() + b.ResetTimer() + b.SetBytes(int64(len(massiveString))) + + for n := 0; n < b.N; n++ { + dst, err = DecodeInto(dst, enc) + if err != nil { + b.Error("Decoding error: ", err) + } + } +} + +func BenchmarkSnappyStreamEncode(b *testing.B) { + test := []byte(largeString) + + var ( + dst = make([]byte, 0, 20+s2.MaxEncodedLen(len(test))) + err error + ) + + b.ReportAllocs() + b.ResetTimer() + b.SetBytes(int64(len(test))) + + for n := 0; n < b.N; n++ { + dst = Encode(dst[:0], test) + if err != nil { + b.Error("Encoding error: ", err) + } + } +} + +func BenchmarkSnappyStreamEncodeBetter(b *testing.B) { + test := []byte(largeString) + + var ( + dst = make([]byte, 0, 20+s2.MaxEncodedLen(len(test))) + err error + ) + + b.ReportAllocs() + b.ResetTimer() + b.SetBytes(int64(len(test))) + + for n := 0; n < b.N; n++ { + dst = EncodeBetter(dst[:0], test) + if err != nil { + b.Error("Encoding error: ", err) + } + } +} + +func BenchmarkSnappyStreamEncodeMassive(b *testing.B) { + massiveString := []byte(makeMassive(largeString, 10000)) + + // Inject some randomness, so it isn't just all copies. + rng := rand.New(rand.NewSource(0)) + for i := 0; i < len(massiveString)/10; i++ { + massiveString[rng.Intn(len(massiveString))]++ + } + var ( + dst = make([]byte, 0, s2.MaxEncodedLen(len(massiveString))) + err error + ) + + b.ReportAllocs() + b.ResetTimer() + b.SetBytes(int64(len(massiveString))) + + for n := 0; n < b.N; n++ { + dst = Encode(dst[:0], massiveString) + if err != nil { + b.Error("Encoding error: ", err) + } + } +} + +func BenchmarkSnappyStreamEncodeBetterMassive(b *testing.B) { + massiveString := []byte(makeMassive(largeString, 10000)) + + // Inject some randomness, so it isn't just all copies. + rng := rand.New(rand.NewSource(0)) + for i := 0; i < len(massiveString)/10; i++ { + massiveString[rng.Intn(len(massiveString))]++ + } + var ( + dst = make([]byte, 0, s2.MaxEncodedLen(len(massiveString))) + err error + ) + + b.ReportAllocs() + b.ResetTimer() + b.SetBytes(int64(len(massiveString))) + + for n := 0; n < b.N; n++ { + dst = EncodeBetter(dst[:0], massiveString) + if err != nil { + b.Error("Encoding error: ", err) + } + } +}