Skip to content

Commit

Permalink
Fix corruption that rarely occurs in 32-bit mode with wlog=25
Browse files Browse the repository at this point in the history
Fix an off-by-one error in the compressor that emits corrupt blocks if:

* Zstd is compiled in 32-bit mode
* The windowLog == 25 exactly
* An offset of 2^25-3, 2^25-2, 2^25-1, or 2^25 is emitted
* The bitstream had 7 bits leftover before writing the offset

This bug has been present since before v1.0, but wasn't able to easily
be triggered, since until somewhat recently zstd wasn't able to find
matches that were within 128KB of the window size.

Add a test case, and fix 2 bugs in `ZSTD_compressSequences()`:
* The `ZSTD_isRLE()` check was incorrect. It wouldn't produce
  corruption, but it could waste CPU and not emit RLE even if the block
  was RLE
* One windowSize was `1 << windowLog`, not `1u << windowLog`

Thanks to @tansy for finding the issue, and giving us a reproducer!

Fixes Issue #3350.
  • Loading branch information
terrelln committed Dec 15, 2022
1 parent 6e3667a commit 7a70eae
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 5 deletions.
6 changes: 3 additions & 3 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -2624,7 +2624,7 @@ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
void* entropyWorkspace, size_t entropyWkspSize,
const int bmi2)
{
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
const int longOffsets = cctxParams->cParams.windowLog >= STREAM_ACCUMULATOR_MIN;
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
unsigned* count = (unsigned*)entropyWorkspace;
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
Expand Down Expand Up @@ -5897,7 +5897,7 @@ static size_t
ZSTD_validateSequence(U32 offCode, U32 matchLength,
size_t posInSrc, U32 windowLog, size_t dictSize)
{
U32 const windowSize = 1 << windowLog;
U32 const windowSize = 1u << windowLog;
/* posInSrc represents the amount of data the decoder would decode up to this point.
* As long as the amount of data decoded is less than or equal to window size, offsets may be
* larger than the total length of output decoded in order to reference the dict, even larger than
Expand Down Expand Up @@ -6256,7 +6256,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,

if (!cctx->isFirstBlock &&
ZSTD_maybeRLE(&cctx->seqStore) &&
ZSTD_isRLE((BYTE const*)src, srcSize)) {
ZSTD_isRLE(ip, blockSize)) {
/* We don't want to emit our first block as a RLE even if it qualifies because
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
* This is only an issue for zstd <= v1.4.3
Expand Down
60 changes: 58 additions & 2 deletions tests/zstreamtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ static U32 badParameters(ZSTD_CCtx* zc, ZSTD_parameters const savedParams)
return 0;
}

static int basicUnitTests(U32 seed, double compressibility)
static int basicUnitTests(U32 seed, double compressibility, int bigTests)
{
size_t const CNBufferSize = COMPRESSIBLE_NOISE_LENGTH;
void* CNBuffer = malloc(CNBufferSize);
Expand Down Expand Up @@ -1565,6 +1565,62 @@ static int basicUnitTests(U32 seed, double compressibility)
CHECK(!ZSTD_isError(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, -1)), "Out of range doesn't error");
DISPLAYLEVEL(3, "OK \n");

DISPLAYLEVEL(3, "test%3i : Test offset == windowSize : ", testNb++);
{
int windowLog;
int const kMaxWindowLog = bigTests ? 29 : 26;
size_t const kNbSequences = 10000;
size_t const kMaxSrcSize = (1u << kMaxWindowLog) + 10 * kNbSequences;
char* src = calloc(kMaxSrcSize, 1);
ZSTD_Sequence* sequences = malloc(sizeof(ZSTD_Sequence) * kNbSequences);
for (windowLog = ZSTD_WINDOWLOG_MIN; windowLog <= kMaxWindowLog; ++windowLog) {
size_t const srcSize = ((size_t)1 << windowLog) + 10 * (kNbSequences - 1);

sequences[0].offset = 32;
sequences[0].litLength = 32;
sequences[0].matchLength = (1u << windowLog) - 32;
sequences[0].rep = 0;
{
size_t i;
for (i = 1; i < kNbSequences; ++i) {
sequences[i].offset = (1u << windowLog) - (FUZ_rand(&seed) % 8);
sequences[i].litLength = FUZ_rand(&seed) & 7;
sequences[i].matchLength = 10 - sequences[i].litLength;
sequences[i].rep = 0;
}
}

CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_minMatch, 3));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_validateSequences, 1));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_windowLog, windowLog));
assert(srcSize <= kMaxSrcSize);
cSize = ZSTD_compressSequences(zc, compressedBuffer, compressedBufferSize, sequences, kNbSequences, src, srcSize);
CHECK_Z(cSize);
CHECK_Z(ZSTD_DCtx_reset(zd, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_windowLogMax, windowLog))
{
ZSTD_inBuffer in = {compressedBuffer, cSize, 0};
size_t decompressedBytes = 0;
for (;;) {
ZSTD_outBuffer out = {decodedBuffer, decodedBufferSize, 0};
size_t const ret = ZSTD_decompressStream(zd, &out, &in);
CHECK_Z(ret);
CHECK(decompressedBytes + out.pos > srcSize, "Output too large");
CHECK(memcmp(out.dst, src + decompressedBytes, out.pos), "Corrupted");
decompressedBytes += out.pos;
if (ret == 0) {
break;
}
}
CHECK(decompressedBytes != srcSize, "Output wrong size");
}
}
free(src);
}
DISPLAYLEVEL(3, "OK \n");

/* Overlen overwriting window data bug */
DISPLAYLEVEL(3, "test%3i : wildcopy doesn't overwrite potential match data : ", testNb++);
{ /* This test has a window size of 1024 bytes and consists of 3 blocks:
Expand Down Expand Up @@ -2677,7 +2733,7 @@ int main(int argc, const char** argv)
if (nbTests<=0) nbTests=1;

if (testNb==0) {
result = basicUnitTests(0, ((double)proba) / 100); /* constant seed for predictability */
result = basicUnitTests(0, ((double)proba) / 100, bigTests); /* constant seed for predictability */
}

if (!result) {
Expand Down

0 comments on commit 7a70eae

Please sign in to comment.