From 542209a87075cf7d66f5763e1e5d97b1b138a34a Mon Sep 17 00:00:00 2001 From: udayanbapat <104396882+udayanbapat@users.noreply.github.com> Date: Tue, 26 Apr 2022 10:15:23 -0700 Subject: [PATCH 1/4] Intial commit to address 3090. Added support to decompress empty block --- lib/common/zstd_internal.h | 2 +- lib/compress/zstd_compress.c | 12 +++++++++--- lib/decompress/zstd_decompress_block.c | 5 +++-- tests/golden-decompression/empty-block.zst | Bin 0 -> 11 bytes tests/playTests.sh | 5 ++++- 5 files changed, 17 insertions(+), 7 deletions(-) create mode 100644 tests/golden-decompression/empty-block.zst diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 8e2b84a2365..3babf250019 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -93,7 +93,7 @@ typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; #define ZSTD_FRAMECHECKSUMSIZE 4 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index e43bbec44fc..3ba38d46849 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2817,7 +2817,9 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) assert(srcSize <= ZSTD_BLOCKSIZE_MAX); /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding + * additional 1. We need to revisit and change this logic to be more consistent */ + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); } else { @@ -3954,7 +3956,9 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, ZSTD_matchState_t* const ms = &cctx->blockState.matchState; U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding + * additional 1. We need to revisit and change this logic to be more consistent */ + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1, dstSize_tooSmall, "not enough space to store compressed block"); if (remaining < blockSize) blockSize = remaining; @@ -6151,7 +6155,9 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, blockSize -= additionalByteAdjustment; /* If blocks are too small, emit as a nocompress block */ - if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding + * additional 1. We need to revisit and change this logic to be more consistent */ + if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 7c046dab506..d9da2f1868d 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -135,7 +135,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, ZSTD_FALLTHROUGH; case set_compressed: - RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3"); { size_t lhSize, litSize, litCSize; U32 singleStream=0; U32 const lhlCode = (istart[0] >> 2) & 3; @@ -280,12 +280,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; case 1: lhSize = 2; + RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3"); litSize = MEM_readLE16(istart) >> 4; break; case 3: lhSize = 3; + RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4"); litSize = MEM_readLE24(istart) >> 4; - RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); break; } RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); diff --git a/tests/golden-decompression/empty-block.zst b/tests/golden-decompression/empty-block.zst new file mode 100644 index 0000000000000000000000000000000000000000..2a3782aff0d1a6bab62dcdc1af29b448322a84bd GIT binary patch literal 11 QcmdPcs{dDkL6iXq028eOKL7v# literal 0 HcmV?d00001 diff --git a/tests/playTests.sh b/tests/playTests.sh index 873c3c672cd..50aef5df5a3 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -418,8 +418,11 @@ println "\n===> decompression only tests " dd bs=1048576 count=1 if=/dev/zero of=tmp zstd -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst" $DIFF -s tmp1 tmp -rm -f tmp* +touch tmp_empty +zstd -d -o tmp2 "$TESTDIR/golden-decompression/empty-block.zst" +$DIFF -s tmp2 tmp_empty +rm -f tmp* println "\n===> compress multiple files" println hello > tmp1 From 077b11048b9085b6fe89278611197342a48d6a99 Mon Sep 17 00:00:00 2001 From: udayanbapat <104396882+udayanbapat@users.noreply.github.com> Date: Wed, 27 Apr 2022 09:06:27 -0700 Subject: [PATCH 2/4] Update zstd_decompress_block.c Addressed review comments for the case of 'set_basic' --- lib/decompress/zstd_decompress_block.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index d9da2f1868d..b23ddb977bc 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -234,10 +234,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; case 1: lhSize = 2; + RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3"); litSize = MEM_readLE16(istart) >> 4; break; case 3: lhSize = 3; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4"); litSize = MEM_readLE24(istart) >> 4; break; } @@ -285,7 +287,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; case 3: lhSize = 3; - RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4"); + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4"); litSize = MEM_readLE24(istart) >> 4; break; } From c85c5ad2a57758b108e5d697ea9f4a27fff43fc4 Mon Sep 17 00:00:00 2001 From: udayanbapat <104396882+udayanbapat@users.noreply.github.com> Date: Wed, 18 May 2022 13:53:58 -0700 Subject: [PATCH 3/4] Update lib/decompress/zstd_decompress_block.c Co-authored-by: Nick Terrell --- lib/decompress/zstd_decompress_block.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index b23ddb977bc..d790595b4a6 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -234,7 +234,6 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; case 1: lhSize = 2; - RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3"); litSize = MEM_readLE16(istart) >> 4; break; case 3: From 9dd0a2559c9dc3d8b7a3df9411c56348571660bd Mon Sep 17 00:00:00 2001 From: udayanbapat <104396882+udayanbapat@users.noreply.github.com> Date: Wed, 18 May 2022 13:54:06 -0700 Subject: [PATCH 4/4] Update lib/decompress/zstd_decompress_block.c Co-authored-by: Nick Terrell --- lib/decompress/zstd_decompress_block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index d790595b4a6..5daa1353773 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -238,7 +238,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; case 3: lhSize = 3; - RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4"); + RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3"); litSize = MEM_readLE24(istart) >> 4; break; }