From 2e5a9733247dfd3bdac454736635714341e2a369 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Fri, 10 Mar 2023 11:56:57 -0800 Subject: [PATCH] zstd: Check FSE init values (#772) * zstd: Check FSE init values If `br.init(s.br.unread())` fails, it may decode bogus data if previous block returned without reading everything from the bit reader. This is used to feed the huff0 table for literal decoding. Return error correctly. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=56870 Add parsing of OSS reported input. * Don't use file (yet) Fail on error nilness mismatch * Revert useless file change --- flate/fuzz_test.go | 6 ++-- fse/decompress.go | 4 ++- fse/fse_test.go | 2 +- internal/fuzz/helpers.go | 35 +++++++++++++++--- s2/dict_test.go | 6 ++-- s2/fuzz_test.go | 6 ++-- s2/lz4convert_test.go | 4 +-- zip/fuzz_test.go | 4 +-- zstd/blockdec.go | 4 +++ zstd/fuzz_test.go | 44 ++++++++++++----------- zstd/testdata/fuzz/decode-corpus-raw.zip | Bin 4952786 -> 4953274 bytes zstd/testdata/fuzz/decode-oss.zip | Bin 0 -> 516 bytes 12 files changed, 76 insertions(+), 39 deletions(-) create mode 100644 zstd/testdata/fuzz/decode-oss.zip diff --git a/flate/fuzz_test.go b/flate/fuzz_test.go index fdd41cfb93..527bad25d1 100644 --- a/flate/fuzz_test.go +++ b/flate/fuzz_test.go @@ -26,9 +26,9 @@ func TestMain(m *testing.M) { } func FuzzEncoding(f *testing.F) { - fuzz.AddFromZip(f, "testdata/regression.zip", true, false) - fuzz.AddFromZip(f, "testdata/fuzz/encode-raw-corpus.zip", true, testing.Short()) - fuzz.AddFromZip(f, "testdata/fuzz/FuzzEncoding.zip", false, testing.Short()) + fuzz.AddFromZip(f, "testdata/regression.zip", fuzz.TypeRaw, false) + fuzz.AddFromZip(f, "testdata/fuzz/encode-raw-corpus.zip", fuzz.TypeRaw, testing.Short()) + fuzz.AddFromZip(f, "testdata/fuzz/FuzzEncoding.zip", fuzz.TypeGoFuzz, testing.Short()) startFuzz := *fuzzStartF endFuzz := *fuzzEndF diff --git a/fse/decompress.go b/fse/decompress.go index 926f5f1535..cc05d0f7ea 100644 --- a/fse/decompress.go +++ b/fse/decompress.go @@ -260,7 +260,9 @@ func (s *Scratch) buildDtable() error { // If the buffer is over-read an error is returned. func (s *Scratch) decompress() error { br := &s.bits - br.init(s.br.unread()) + if err := br.init(s.br.unread()); err != nil { + return err + } var s1, s2 decoder // Initialize and decode first state and symbol. diff --git a/fse/fse_test.go b/fse/fse_test.go index a924874d9b..5290a55043 100644 --- a/fse/fse_test.go +++ b/fse/fse_test.go @@ -62,7 +62,7 @@ var decTestfiles = []struct { {name: "crash4", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash4.bin") }, err: "symbolLen (1) too small"}, {name: "crash5", fn: func() ([]byte, error) { return os.ReadFile("../testdata/crash5.bin") }, err: "symbolLen (1) too small"}, {name: "crash6", fn: func() ([]byte, error) { return os.ReadFile("../testdata/dec-crash6.bin") }, err: "newState (32768) outside table size (32768)"}, - {name: "something", fn: func() ([]byte, error) { return os.ReadFile("../testdata/fse-artifact3.bin") }, err: "output size (1048576) > DecompressLimit (1048576)"}, + {name: "something", fn: func() ([]byte, error) { return os.ReadFile("../testdata/fse-artifact3.bin") }, err: "corrupt stream, did not find end of stream"}, } func TestCompress(t *testing.T) { diff --git a/internal/fuzz/helpers.go b/internal/fuzz/helpers.go index 3efadad39a..d2f406a6fe 100644 --- a/internal/fuzz/helpers.go +++ b/internal/fuzz/helpers.go @@ -6,6 +6,7 @@ package fuzz import ( "archive/zip" "bytes" + "encoding/binary" "fmt" "go/ast" "go/parser" @@ -16,8 +17,20 @@ import ( "testing" ) +type InputType uint8 + +const ( + // TypeRaw indicates that files are raw bytes. + TypeRaw InputType = iota + // TypeGoFuzz indicates files are from Go Fuzzer. + TypeGoFuzz + // TypeOSSFuzz indicates that files are from OSS fuzzer with size before data. + TypeOSSFuzz +) + // AddFromZip will read the supplied zip and add all as corpus for f. -func AddFromZip(f *testing.F, filename string, raw, short bool) { +// Byte slices only. +func AddFromZip(f *testing.F, filename string, t InputType, short bool) { file, err := os.Open(filename) if err != nil { f.Fatal(err) @@ -44,11 +57,25 @@ func AddFromZip(f *testing.F, filename string, raw, short bool) { f.Fatal(err) } rc.Close() - raw := raw + t := t + if t == TypeOSSFuzz { + t = TypeRaw // Fallback + if len(b) >= 4 { + sz := binary.BigEndian.Uint32(b) + if sz == uint32(len(b))-4 { + f.Add(b[4:]) + continue + } + } + } + if bytes.HasPrefix(b, []byte("go test fuzz")) { - raw = false + t = TypeGoFuzz + } else { + t = TypeRaw } - if raw { + + if t == TypeRaw { f.Add(b) continue } diff --git a/s2/dict_test.go b/s2/dict_test.go index c0bab2d87e..6643f67169 100644 --- a/s2/dict_test.go +++ b/s2/dict_test.go @@ -404,9 +404,9 @@ func TestDictSize(t *testing.T) { } func FuzzDictBlocks(f *testing.F) { - fuzz.AddFromZip(f, "testdata/enc_regressions.zip", true, false) - fuzz.AddFromZip(f, "testdata/fuzz/block-corpus-raw.zip", true, testing.Short()) - fuzz.AddFromZip(f, "testdata/fuzz/block-corpus-enc.zip", false, testing.Short()) + fuzz.AddFromZip(f, "testdata/enc_regressions.zip", fuzz.TypeRaw, false) + fuzz.AddFromZip(f, "testdata/fuzz/block-corpus-raw.zip", fuzz.TypeRaw, testing.Short()) + fuzz.AddFromZip(f, "testdata/fuzz/block-corpus-enc.zip", fuzz.TypeGoFuzz, testing.Short()) // Fuzzing tweaks: const ( diff --git a/s2/fuzz_test.go b/s2/fuzz_test.go index 830d3656d6..1cc6c853bc 100644 --- a/s2/fuzz_test.go +++ b/s2/fuzz_test.go @@ -13,9 +13,9 @@ import ( ) func FuzzEncodingBlocks(f *testing.F) { - fuzz.AddFromZip(f, "testdata/enc_regressions.zip", true, false) - fuzz.AddFromZip(f, "testdata/fuzz/block-corpus-raw.zip", true, testing.Short()) - fuzz.AddFromZip(f, "testdata/fuzz/block-corpus-enc.zip", false, testing.Short()) + fuzz.AddFromZip(f, "testdata/enc_regressions.zip", fuzz.TypeRaw, false) + fuzz.AddFromZip(f, "testdata/fuzz/block-corpus-raw.zip", fuzz.TypeRaw, testing.Short()) + fuzz.AddFromZip(f, "testdata/fuzz/block-corpus-enc.zip", fuzz.TypeGoFuzz, testing.Short()) // Fuzzing tweaks: const ( diff --git a/s2/lz4convert_test.go b/s2/lz4convert_test.go index 82ee5bda55..bdc1c11b2b 100644 --- a/s2/lz4convert_test.go +++ b/s2/lz4convert_test.go @@ -352,8 +352,8 @@ func BenchmarkCompressBlockReference(b *testing.B) { } func FuzzLZ4Block(f *testing.F) { - fuzz.AddFromZip(f, "testdata/fuzz/lz4-convert-corpus-raw.zip", true, false) - fuzz.AddFromZip(f, "testdata/fuzz/FuzzLZ4Block.zip", false, false) + fuzz.AddFromZip(f, "testdata/fuzz/lz4-convert-corpus-raw.zip", fuzz.TypeRaw, false) + fuzz.AddFromZip(f, "testdata/fuzz/FuzzLZ4Block.zip", fuzz.TypeGoFuzz, false) // Fuzzing tweaks: const ( // Max input size: diff --git a/zip/fuzz_test.go b/zip/fuzz_test.go index ef7b7af249..3517d55e63 100644 --- a/zip/fuzz_test.go +++ b/zip/fuzz_test.go @@ -30,8 +30,8 @@ func FuzzReader(f *testing.F) { } f.Add(b) } - fuzz.AddFromZip(f, "testdata/FuzzReader-raw.zip", true, testing.Short()) - fuzz.AddFromZip(f, "testdata/FuzzReader-enc.zip", false, testing.Short()) + fuzz.AddFromZip(f, "testdata/FuzzReader-raw.zip", fuzz.TypeRaw, testing.Short()) + fuzz.AddFromZip(f, "testdata/FuzzReader-enc.zip", fuzz.TypeGoFuzz, testing.Short()) f.Fuzz(func(t *testing.T, b []byte) { r, err := NewReader(bytes.NewReader(b), int64(len(b))) diff --git a/zstd/blockdec.go b/zstd/blockdec.go index 2445bb4fe5..5f272d87f6 100644 --- a/zstd/blockdec.go +++ b/zstd/blockdec.go @@ -9,6 +9,7 @@ import ( "encoding/binary" "errors" "fmt" + "hash/crc32" "io" "os" "path/filepath" @@ -442,6 +443,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err } } var err error + if debugDecoder { + println("huff table input:", len(literals), "CRC:", crc32.ChecksumIEEE(literals)) + } huff, literals, err = huff0.ReadTable(literals, huff) if err != nil { println("reading huffman table:", err) diff --git a/zstd/fuzz_test.go b/zstd/fuzz_test.go index 0c8212cc5d..9f3c2a8810 100644 --- a/zstd/fuzz_test.go +++ b/zstd/fuzz_test.go @@ -15,18 +15,9 @@ import ( ) func FuzzDecodeAll(f *testing.F) { - fuzz.AddFromZip(f, "testdata/fuzz/decode-corpus-raw.zip", true, testing.Short()) - fuzz.AddFromZip(f, "testdata/fuzz/decode-corpus-encoded.zip", false, testing.Short()) - decLow, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderConcurrency(2), WithDecoderMaxMemory(20<<20), WithDecoderMaxWindow(1<<20), IgnoreChecksum(true)) - if err != nil { - f.Fatal(err) - } - defer decLow.Close() - decHi, err := NewReader(nil, WithDecoderLowmem(false), WithDecoderConcurrency(2), WithDecoderMaxMemory(20<<20), WithDecoderMaxWindow(1<<20), IgnoreChecksum(true)) - if err != nil { - f.Fatal(err) - } - defer decHi.Close() + fuzz.AddFromZip(f, "testdata/decode-regression.zip", fuzz.TypeRaw, false) + fuzz.AddFromZip(f, "testdata/fuzz/decode-corpus-raw.zip", fuzz.TypeRaw, testing.Short()) + fuzz.AddFromZip(f, "testdata/fuzz/decode-corpus-encoded.zip", fuzz.TypeGoFuzz, testing.Short()) f.Fuzz(func(t *testing.T, b []byte) { // Just test if we crash... @@ -36,10 +27,23 @@ func FuzzDecodeAll(f *testing.F) { t.Fatal(r) } }() - b1, err1 := decLow.DecodeAll(b, nil) - b2, err2 := decHi.DecodeAll(b, nil) + + decLow, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderConcurrency(2), WithDecoderMaxMemory(20<<20), WithDecoderMaxWindow(1<<20), IgnoreChecksum(true)) + if err != nil { + f.Fatal(err) + } + defer decLow.Close() + decHi, err := NewReader(nil, WithDecoderLowmem(false), WithDecoderConcurrency(2), WithDecoderMaxMemory(20<<20), WithDecoderMaxWindow(1<<20), IgnoreChecksum(true)) + if err != nil { + f.Fatal(err) + } + defer decHi.Close() + b1, err1 := decLow.DecodeAll(b, make([]byte, 0, len(b))) + b2, err2 := decHi.DecodeAll(b, make([]byte, 0, len(b))) if err1 != err2 { - t.Log(err1, err2) + if (err1 == nil) != (err2 == nil) { + t.Errorf("err low: %v, hi: %v", err1, err2) + } } if err1 != nil { b1, b2 = b1[:0], b2[:0] @@ -60,8 +64,8 @@ func FuzzDecAllNoBMI2(f *testing.F) { } func FuzzDecoder(f *testing.F) { - fuzz.AddFromZip(f, "testdata/fuzz/decode-corpus-raw.zip", true, testing.Short()) - fuzz.AddFromZip(f, "testdata/fuzz/decode-corpus-encoded.zip", false, testing.Short()) + fuzz.AddFromZip(f, "testdata/fuzz/decode-corpus-raw.zip", fuzz.TypeRaw, testing.Short()) + fuzz.AddFromZip(f, "testdata/fuzz/decode-corpus-encoded.zip", fuzz.TypeGoFuzz, testing.Short()) brLow := newBytesReader(nil) brHi := newBytesReader(nil) @@ -112,9 +116,9 @@ func FuzzNoBMI2Dec(f *testing.F) { } func FuzzEncoding(f *testing.F) { - fuzz.AddFromZip(f, "testdata/fuzz/encode-corpus-raw.zip", true, testing.Short()) - fuzz.AddFromZip(f, "testdata/comp-crashers.zip", true, false) - fuzz.AddFromZip(f, "testdata/fuzz/encode-corpus-encoded.zip", false, testing.Short()) + fuzz.AddFromZip(f, "testdata/fuzz/encode-corpus-raw.zip", fuzz.TypeRaw, testing.Short()) + fuzz.AddFromZip(f, "testdata/comp-crashers.zip", fuzz.TypeRaw, false) + fuzz.AddFromZip(f, "testdata/fuzz/encode-corpus-encoded.zip", fuzz.TypeGoFuzz, testing.Short()) // Fuzzing tweaks: const ( // Test a subset of encoders. diff --git a/zstd/testdata/fuzz/decode-corpus-raw.zip b/zstd/testdata/fuzz/decode-corpus-raw.zip index 8314d3f63bb45643f270b2fedcb6daf25a71666a..190061ab3d74cb6a336c685bce18362a1156e875 100644 GIT binary patch delta 698 zcmbu5y-OQ$9EX3oc;`!et5-m-lLmbkR z585uSuEA2UTAVCGp__vaF1k4Ne-Kw8{e}$Mp(yx+XZa1^=izhuZIAo)eUJMD3piL& z03HGgQG{ZYz=l$kp&S*cL=_&O8g|s677y_Vk5PvwsK-+@K!k)wG@%(SXhj>^;XnsE z(S>fv=)p56aH1D3sBohX8a(i#9|L%fLA=0AyxP|<#k94+&Nx=Y-hP~)n9iQp z;lEo2`Sn;V-y-*E3T1c|)vZ#Es!9%LM=oa4Y}_=>WAn@JldO<_=q9GPYe?QVuTnag2d~C3o&#OPg^)U`~P+nPER7}>iw1shyGRN2Z~=W flr!aB%hH`(x9$}V@j~vVPxOh~i4t~2cNzN&YJk@8 delta 208 zcmWN^NlwCW7(nq-pbUzjR0Re91xG++9z>89ltFI830%B{ldxdJiiCtKbjM8$d*b(! z-{S37Ykj|5>%Vx=@#4deL4Y73!klnQgfpVVIOl>mCYM}sO@bsh+>#>A9SVyKS?+n@ zktd#cAxEA9uM~NsM41XzYSd}aq(z%|I(*Qj$0vR4jdM_oqveQ!7>Y0PEkmzvQCewLm2OFDaY=GwajI@^ zW?p7)W>sp6t{YI?B{kVGC&w?}$=B0J*Ua3&!r0i-z{J$V*x1;_L_xz)GdUq4;Np!{ z|HB&;D=VuT+k_Phn3x43v<-mv1;8DW7Mu~b-F+>mF3