Skip to content

Commit

Permalink
fix sequence compression API in Explicit Delimiter mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Cyan4973 committed Jan 25, 2022
1 parent cc7d23b commit 87dcd33
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 19 deletions.
2 changes: 1 addition & 1 deletion lib/common/error_private.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(version_unsupported): return "Version not supported";
case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
case PREFIX(corruption_detected): return "Corrupted block detected";
case PREFIX(corruption_detected): return "Input corruption detected";
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
case PREFIX(parameter_unsupported): return "Unsupported parameter";
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
Expand Down
73 changes: 64 additions & 9 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -5752,9 +5752,9 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx,
}

typedef struct {
U32 idx; /* Index in array of ZSTD_Sequence */
U32 posInSequence; /* Position within sequence at idx */
size_t posInSrc; /* Number of bytes given by sequences provided so far */
U32 idx; /* Index in array of ZSTD_Sequence */
U32 posInSequence; /* Position within sequence at idx */
size_t posInSrc; /* Number of bytes given by sequences provided so far */
} ZSTD_sequencePosition;

/* ZSTD_validateSequence() :
Expand Down Expand Up @@ -5809,6 +5809,8 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
repcodes_t updatedRepcodes;
U32 dictSize;

DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize);

if (cctx->cdict) {
dictSize = (U32)cctx->cdict->dictContentSize;
} else if (cctx->prefixDict.dict) {
Expand Down Expand Up @@ -5995,6 +5997,56 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
return sequenceCopier;
}

/* Discover the size of next by searching for the block delimiter.
* Note that a block delimiter must exist in this mode,
* otherwise it's an input error.
* The value retrieved will be later compared to ensure it remains within bounds */
static size_t
blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
{
int end = 0;
size_t blockSize = 0;
size_t spos = seqPos.idx;
assert(spos <= inSeqsSize);
while (spos < inSeqsSize) {
end = (inSeqs[spos].offset == 0);
blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength;
if (end) {
if (inSeqs[spos].matchLength != 0)
RETURN_ERROR(corruption_detected, "delimiter format error : both matchlength and offset must be == 0");
break;
}
spos++;
}
if (!end)
RETURN_ERROR(corruption_detected, "Reached end of sequences without finding a block delimiter");
return blockSize;
}

/* More a "target" block size */
static size_t blockSize_noDelimiter(size_t blockSize, size_t remaining)
{
int const lastBlock = (remaining <= blockSize);
return lastBlock ? remaining : blockSize;
}

static size_t determine_blockSize(ZSTD_sequenceFormat_e mode,
size_t blockSize, size_t remaining,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos)
{
DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining);
if (mode == ZSTD_sf_noBlockDelimiters)
return blockSize_noDelimiter(blockSize, remaining);
{ size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos);
FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters");
if (explicitBlockSize > blockSize)
RETURN_ERROR(corruption_detected, "sequences incorrectly define a too large block");
if (explicitBlockSize > remaining)
RETURN_ERROR(srcSize_wrong, "sequences define a frame longer than source");
return explicitBlockSize;
}
}

/* Compress, block-by-block, all of the sequences given.
*
* Returns the cumulative size of all compressed blocks (including their headers),
Expand All @@ -6007,9 +6059,6 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
const void* src, size_t srcSize)
{
size_t cSize = 0;
U32 lastBlock;
size_t blockSize;
size_t compressedSeqsSize;
size_t remaining = srcSize;
ZSTD_sequencePosition seqPos = {0, 0, 0};

Expand All @@ -6029,10 +6078,15 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
}

while (remaining) {
size_t compressedSeqsSize;
size_t cBlockSize;
size_t additionalByteAdjustment;
lastBlock = remaining <= cctx->blockSize;
blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters,
cctx->blockSize, remaining,
inSeqs, inSeqsSize, seqPos);
U32 const lastBlock = (blockSize == remaining);
assert(blockSize <= remaining);
FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size");
ZSTD_resetSeqStore(&cctx->seqStore);
DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);

Expand Down Expand Up @@ -6113,7 +6167,8 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
return cSize;
}

size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
size_t ZSTD_compressSequences(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize)
{
Expand Down
21 changes: 12 additions & 9 deletions lib/zstd.h
Original file line number Diff line number Diff line change
Expand Up @@ -1375,23 +1375,25 @@ typedef enum {
} ZSTD_sequenceFormat_e;

/*! ZSTD_generateSequences() :
* Generate sequences using ZSTD_compress2, given a source buffer.
* Generate sequences using ZSTD_compress2(), given a source buffer.
*
* Each block will end with a dummy sequence
* with offset == 0, matchLength == 0, and litLength == length of last literals.
* litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
* simply acts as a block delimiter.
*
* zc can be used to insert custom compression params.
* This function invokes ZSTD_compress2
* @zc can be used to insert custom compression params.
* This function invokes ZSTD_compress2().
*
* The output of this function can be fed into ZSTD_compressSequences() with CCtx
* setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
* @return : number of sequences generated
*/

ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
size_t outSeqsSize, const void* src, size_t srcSize);
ZSTDLIB_STATIC_API size_t
ZSTD_generateSequences( ZSTD_CCtx* zc,
ZSTD_Sequence* outSeqs, size_t outSeqsSize,
const void* src, size_t srcSize);

/*! ZSTD_mergeBlockDelimiters() :
* Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
Expand Down Expand Up @@ -1432,11 +1434,12 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si
* Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
* Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
* and cannot emit an RLE block that disagrees with the repcode history
* @return : final compressed size or a ZSTD error.
* @return : final compressed size, or a ZSTD error code.
*/
ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize);
ZSTDLIB_STATIC_API size_t
ZSTD_compressSequences( ZSTD_CCtx* cctx, void* dst, size_t dstSize,
const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
const void* src, size_t srcSize);


/*! ZSTD_writeSkippableFrame() :
Expand Down

0 comments on commit 87dcd33

Please sign in to comment.