From 0f2816a1fe3a7bab575744ee0ee5e9ef305f8707 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 23 Jan 2022 22:08:20 -0800 Subject: [PATCH] refactored fuzzer tests for sequence compression api add explicit delimiter mode to libfuzzer test --- lib/compress/zstd_compress.c | 6 +- tests/fuzz/sequence_compression_api.c | 104 ++++++++++++++++---------- tests/fuzzer.c | 68 ++++++++--------- 3 files changed, 102 insertions(+), 76 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9ed15f03b06..621f9b95023 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -5995,10 +5995,10 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) return sequenceCopier; } -/* Discover the size of next by searching for the block delimiter. - * Note that a block delimiter must exist in this mode, +/* Discover the size of next block by searching for the delimiter. + * Note that a block delimiter **must** exist in this mode, * otherwise it's an input error. - * The value retrieved will be later compared to ensure it remains within bounds */ + * The block size retrieved will be later compared to ensure it remains within bounds */ static size_t blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos) { diff --git a/tests/fuzz/sequence_compression_api.c b/tests/fuzz/sequence_compression_api.c index a2959e1aca6..4d6fd1247d4 100644 --- a/tests/fuzz/sequence_compression_api.c +++ b/tests/fuzz/sequence_compression_api.c @@ -26,8 +26,8 @@ #include "zstd_helpers.h" #include "fuzz_data_producer.h" -static ZSTD_CCtx *cctx = NULL; -static ZSTD_DCtx *dctx = NULL; +static ZSTD_CCtx* cctx = NULL; +static ZSTD_DCtx* dctx = NULL; static void* literalsBuffer = NULL; static void* generatedSrc = NULL; static ZSTD_Sequence* generatedSequences = NULL; @@ -55,7 +55,7 @@ static uint32_t FUZZ_RDG_rand(uint32_t* src) /* Make a pseudorandom string - this simple function exists to avoid * taking a dependency on datagen.h to have RDG_genBuffer(). */ -static char *generatePseudoRandomString(char *str, size_t size) { +static char* generatePseudoRandomString(char* str, size_t size) { const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_"; uint32_t seed = 0; if (size) { @@ -69,7 +69,9 @@ static char *generatePseudoRandomString(char *str, size_t size) { /* Returns size of source buffer */ static size_t decodeSequences(void* dst, size_t nbSequences, - size_t literalsSize, const void* dict, size_t dictSize) { + size_t literalsSize, + const void* dict, size_t dictSize) +{ const uint8_t* litPtr = literalsBuffer; const uint8_t* const litBegin = literalsBuffer; const uint8_t* const litEnd = litBegin + literalsSize; @@ -87,7 +89,7 @@ static size_t decodeSequences(void* dst, size_t nbSequences, if (litPtr + generatedSequences[i].litLength > litEnd) { litPtr = litBegin; } - ZSTD_memcpy(op, litPtr, generatedSequences[i].litLength); + memcpy(op, litPtr, generatedSequences[i].litLength); bytesWritten += generatedSequences[i].litLength; op += generatedSequences[i].litLength; litPtr += generatedSequences[i].litLength; @@ -109,7 +111,7 @@ static size_t decodeSequences(void* dst, size_t nbSequences, } } for (; j < matchLength; ++j) { - op[j] = op[j-(int)generatedSequences[i].offset]; + op[j] = op[j - generatedSequences[i].offset]; } op += j; FUZZ_ASSERT(generatedSequences[i].matchLength == j + k); @@ -120,53 +122,53 @@ static size_t decodeSequences(void* dst, size_t nbSequences, FUZZ_ASSERT(litPtr <= litEnd); lastLLSize = (uint32_t)(litEnd - litPtr); if (lastLLSize <= oend - op) { - ZSTD_memcpy(op, litPtr, lastLLSize); + memcpy(op, litPtr, lastLLSize); generatedSrcBufferSize += lastLLSize; } return generatedSrcBufferSize; } /* Returns nb sequences generated - * TODO: Add repcode fuzzing once we support repcode match splits + * TODO: support generation for ZSTD_sf_explicitBlockDelimiters mode */ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, size_t literalsSizeLimit, size_t dictSize, - size_t windowLog) { + size_t windowLog, ZSTD_sequenceFormat_e mode) +{ + const uint32_t repCode = 0; /* not used by sequence ingestion api */ + const uint32_t windowSize = 1 << windowLog; uint32_t bytesGenerated = 0; uint32_t nbSeqGenerated = 0; - uint32_t litLength; - uint32_t matchLength; - uint32_t matchBound; - uint32_t offset; - uint32_t offsetBound; - uint32_t repCode = 0; uint32_t isFirstSequence = 1; - uint32_t windowSize = 1 << windowLog; + uint32_t blockSize = 0; + const uint32_t blockSizeMax = MIN(128 << 10, 1 << windowLog); - while (nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ + while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE && !FUZZ_dataProducer_empty(producer)) { - matchBound = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE; - litLength = isFirstSequence && dictSize == 0 ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSizeLimit) - : FUZZ_dataProducer_uint32Range(producer, 0, literalsSizeLimit); + uint32_t matchLength; + uint32_t matchBound = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE; + uint32_t offset; + uint32_t offsetBound; + const uint32_t minLitLength = (isFirstSequence && (dictSize == 0)); + const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit); bytesGenerated += litLength; if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { break; } - offsetBound = bytesGenerated > windowSize ? windowSize : bytesGenerated + dictSize; + offsetBound = (bytesGenerated > windowSize) ? windowSize : bytesGenerated + (uint32_t)dictSize; offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound); if (dictSize > 0 && bytesGenerated <= windowSize) { /* Prevent match length from being such that it would be associated with an offset too large * from the decoder's perspective. If not possible (match would be too small), * then reduce the offset if necessary. */ - size_t bytesToReachWindowSize = windowSize - bytesGenerated; + const size_t bytesToReachWindowSize = windowSize - bytesGenerated; if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) { - uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound; + const uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound; offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound); } else { - matchBound = bytesToReachWindowSize > ZSTD_FUZZ_MATCHLENGTH_MAXSIZE ? - ZSTD_FUZZ_MATCHLENGTH_MAXSIZE : bytesToReachWindowSize; + matchBound = MIN(ZSTD_FUZZ_MATCHLENGTH_MAXSIZE, (uint32_t)bytesToReachWindowSize); } } matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound); @@ -174,20 +176,39 @@ static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { break; } - ZSTD_Sequence seq = {offset, litLength, matchLength, repCode}; - generatedSequences[nbSeqGenerated++] = seq; - isFirstSequence = 0; - } + { ZSTD_Sequence seq = {offset, litLength, matchLength, repCode}; + const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength+1); + #define SPLITPROB 6000 + #define SPLITMARK 5234 + const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK); + if (mode == ZSTD_sf_explicitBlockDelimiters) { + const size_t seqSize = seq.litLength + seq.matchLength; + if (blockSize + seqSize > blockSizeMax) { /* reaching limit : must end block now */ + const ZSTD_Sequence endBlock = {0, 0, 0, 0}; + generatedSequences[nbSeqGenerated++] = endBlock; + } + if (split) { + const ZSTD_Sequence endBlock = {lastLits, 0, 0, 0}; + seq.litLength -= lastLits; + generatedSequences[nbSeqGenerated++] = endBlock; + } + } + generatedSequences[nbSeqGenerated++] = seq; + isFirstSequence = 0; + } } return nbSeqGenerated; } +/* TODO: fuzz ZSTD_sf_explicitBlockDelimiters mode + */ static size_t roundTripTest(void *result, size_t resultCapacity, void *compressed, size_t compressedCapacity, size_t srcSize, const void *dict, size_t dictSize, size_t generatedSequencesSize, - size_t wLog, unsigned cLevel, unsigned hasDict) + int wLog, int cLevel, unsigned hasDict, + ZSTD_sequenceFormat_e mode) { size_t cSize; size_t dSize; @@ -200,8 +221,7 @@ static size_t roundTripTest(void *result, size_t resultCapacity, ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog); ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN); ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1); - /* TODO: Add block delim mode fuzzing */ - ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode); if (hasDict) { FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict, dictSize)); FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary(dctx, dict, dictSize)); @@ -231,15 +251,18 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) size_t cBufSize; size_t generatedSrcSize; size_t nbSequences; - void* dictBuffer; + void* dictBuffer = NULL; size_t dictSize = 0; unsigned hasDict; unsigned wLog; int cLevel; + ZSTD_sequenceFormat_e mode; - FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size); + FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size); + FUZZ_ASSERT(producer); if (literalsBuffer == NULL) { literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE); + FUZZ_ASSERT(literalsBuffer); literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE); } @@ -247,11 +270,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) if (hasDict) { dictSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE); dictBuffer = FUZZ_malloc(dictSize); + FUZZ_ASSERT(dictBuffer); dictBuffer = generatePseudoRandomString(dictBuffer, dictSize); } /* Generate window log first so we dont generate offsets too large */ wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX_32); cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22); + mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1); if (!generatedSequences) { generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ); @@ -259,7 +284,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) if (!generatedSrc) { generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); } - nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog); + nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode); generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize); cBufSize = ZSTD_compressBound(generatedSrcSize); cBuf = FUZZ_malloc(cBufSize); @@ -276,14 +301,15 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size) FUZZ_ASSERT(dctx); } - size_t const result = roundTripTest(rBuf, rBufSize, + { const size_t result = roundTripTest(rBuf, rBufSize, cBuf, cBufSize, generatedSrcSize, dictBuffer, dictSize, nbSequences, - wLog, cLevel, hasDict); - FUZZ_ZASSERT(result); - FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size"); + (int)wLog, cLevel, hasDict, mode); + FUZZ_ZASSERT(result); + FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size"); + } FUZZ_ASSERT_MSG(!FUZZ_memcmp(generatedSrc, rBuf, generatedSrcSize), "Corruption!"); free(rBuf); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index d168d657611..e09c9dda95e 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -3114,18 +3114,17 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : ZSTD_getSequences followed by ZSTD_compressSequences : ", testNb++); { - size_t srcSize = 500 KB; - BYTE* src = (BYTE*)CNBuffer; - BYTE* dst = (BYTE*)compressedBuffer; - size_t dstSize = ZSTD_compressBound(srcSize); - size_t decompressSize = srcSize; - char* decompressBuffer = (char*)malloc(decompressSize); + const size_t srcSize = 500 KB; + const BYTE* const src = (BYTE*)CNBuffer; + BYTE* const dst = (BYTE*)compressedBuffer; + const size_t dstCapacity = ZSTD_compressBound(srcSize); + const size_t decompressSize = srcSize; + char* const decompressBuffer = (char*)malloc(decompressSize); size_t compressedSize; - size_t dSize; - ZSTD_CCtx* cctx = ZSTD_createCCtx(); - ZSTD_Sequence* seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence)); - size_t seqsSize; + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_Sequence* const seqs = (ZSTD_Sequence*)malloc(srcSize * sizeof(ZSTD_Sequence)); + size_t nbSeqs; if (seqs == NULL) goto _output_error; assert(cctx != NULL); @@ -3133,36 +3132,37 @@ static int basicUnitTests(U32 const seed, double compressibility) /* Populate src with random data */ RDG_genBuffer(CNBuffer, srcSize, compressibility, 0., seed); - /* Test with block delimiters roundtrip */ - seqsSize = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize); + /* Roundtrip Test with block delimiters generated by ZSTD_generateSequences() */ + nbSeqs = ZSTD_generateSequences(cctx, seqs, srcSize, src, srcSize); ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_explicitBlockDelimiters); - compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize); + compressedSize = ZSTD_compressSequences(cctx, dst, dstCapacity, seqs, nbSeqs, src, srcSize); if (ZSTD_isError(compressedSize)) { DISPLAY("Error in sequence compression with block delims\n"); goto _output_error; } - dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize); - if (ZSTD_isError(dSize)) { - DISPLAY("Error in sequence compression roundtrip with block delims\n"); - goto _output_error; - } + { size_t const dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize); + if (ZSTD_isError(dSize)) { + DISPLAY("Error in sequence compression roundtrip with block delims\n"); + goto _output_error; + } } assert(!memcmp(decompressBuffer, src, srcSize)); - /* Test with no block delimiters roundtrip */ - seqsSize = ZSTD_mergeBlockDelimiters(seqs, seqsSize); - ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); - ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters); - compressedSize = ZSTD_compressSequences(cctx, dst, dstSize, seqs, seqsSize, src, srcSize); + /* Roundtrip Test with no block delimiters */ + { size_t const nbSeqsAfterMerge = ZSTD_mergeBlockDelimiters(seqs, nbSeqs); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters); + compressedSize = ZSTD_compressSequences(cctx, dst, dstCapacity, seqs, nbSeqsAfterMerge, src, srcSize); + } if (ZSTD_isError(compressedSize)) { DISPLAY("Error in sequence compression with no block delims\n"); goto _output_error; } - dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize); - if (ZSTD_isError(dSize)) { - DISPLAY("Error in sequence compression roundtrip with no block delims\n"); - goto _output_error; - } + { size_t const dSize = ZSTD_decompress(decompressBuffer, decompressSize, dst, compressedSize); + if (ZSTD_isError(dSize)) { + DISPLAY("Error in sequence compression roundtrip with no block delims\n"); + goto _output_error; + } } assert(!memcmp(decompressBuffer, src, srcSize)); ZSTD_freeCCtx(cctx); @@ -3969,9 +3969,9 @@ static int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, U32 const DISPLAYLEVEL(5, "fuzzer t%u: Bufferless streaming compression test \n", testNb); { U32 const testLog = FUZ_rand(&lseed) % maxSrcLog; U32 const dictLog = FUZ_rand(&lseed) % maxSrcLog; - int const cLevel = (FUZ_rand(&lseed) % - (ZSTD_maxCLevel() - - (MAX(testLog, dictLog) / cLevelLimiter))) + + int const cLevel = (int)(FUZ_rand(&lseed) % + ((U32)ZSTD_maxCLevel() - + (MAX(testLog, dictLog) / (U32)cLevelLimiter))) + 1; maxTestSize = FUZ_rLogLength(&lseed, testLog); if (maxTestSize >= dstBufferSize) maxTestSize = dstBufferSize-1; @@ -4067,7 +4067,7 @@ static int fuzzerTests(U32 seed, unsigned nbTests, unsigned startTest, U32 const free(cBuffer); free(dstBuffer); free(mirrorBuffer); - return result; + return (int)result; _output_error: result = 1; @@ -4104,7 +4104,7 @@ static unsigned readU32FromChar(const char** stringPtr) { unsigned result = 0; while ((**stringPtr >='0') && (**stringPtr <='9')) - result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; + result *= 10, result += (unsigned)(**stringPtr - '0'), (*stringPtr)++ ; if ((**stringPtr=='K') || (**stringPtr=='M')) { result <<= 10; if (**stringPtr=='M') result <<= 10; @@ -4246,7 +4246,7 @@ int main(int argc, const char** argv) } } if (!result) - result = fuzzerTests(seed, nbTests, testNb, maxDuration, ((double)proba) / 100, bigTests); + result = fuzzerTests(seed, (unsigned)nbTests, (unsigned)testNb, maxDuration, ((double)proba) / 100, bigTests); if (mainPause) { int unused; DISPLAY("Press Enter \n");