Skip to content

Commit

Permalink
Merge pull request #4178 from facebook/split_fromBorders
Browse files Browse the repository at this point in the history
Add fastest block-splitter variant
  • Loading branch information
Cyan4973 authored Oct 28, 2024
2 parents 7fb5347 + 4e1a879 commit 5bae43b
Show file tree
Hide file tree
Showing 6 changed files with 229 additions and 177 deletions.
10 changes: 10 additions & 0 deletions lib/compress/hist.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ unsigned HIST_isError(size_t code) { return ERR_isError(code); }
/*-**************************************************************
* Histogram functions
****************************************************************/
void HIST_add(unsigned* count, const void* src, size_t srcSize)
{
const BYTE* ip = (const BYTE*)src;
const BYTE* const end = ip + srcSize;

while (ip<end) {
count[*ip++]++;
}
}

unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
{
Expand Down
7 changes: 7 additions & 0 deletions lib/compress/hist.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,10 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
*/
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize);

/*! HIST_add() :
* Lowest level: just add nb of occurences of characters from @src into @count.
* @count is not reset. @count array is presumed large enough (i.e. 1 KB).
@ This function does not need any additional stack memory.
*/
void HIST_add(unsigned* count, const void* src, size_t srcSize);
22 changes: 6 additions & 16 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -4493,32 +4493,22 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,

static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, ZSTD_strategy strat, S64 savings)
{
/* split level based on compression strategy, from `fast` to `btultra2` */
static const int splitLevels[] = { 0, 0, 1, 2, 2, 3, 3, 4, 4, 4 };
/* note: conservatively only split full blocks (128 KB) currently.
* While it's possible to go lower, let's keep it simple for a first implementation.
* Besides, benefits of splitting are reduced when blocks are already small.
*/
if (srcSize < 128 KB || blockSizeMax < 128 KB)
return MIN(srcSize, blockSizeMax);
/* do not split incompressible data though:
* ensure a 3 bytes per full block overhead limit.
* Note: as a consequence, the first full block skips the splitting detector.
* require verified savings to allow pre-splitting.
* Note: as a consequence, the first full block is not split.
*/
if (savings < 3) return 128 KB;
/* dynamic splitting has a cpu cost for analysis,
* due to that cost it's only used for higher levels */
if (strat >= ZSTD_btopt)
return ZSTD_splitBlock(src, blockSizeMax, 3, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_lazy2)
return ZSTD_splitBlock(src, blockSizeMax, 2, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_greedy)
return ZSTD_splitBlock(src, blockSizeMax, 1, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_dfast)
return ZSTD_splitBlock(src, blockSizeMax, 0, cctx->tmpWorkspace, cctx->tmpWkspSize);
/* blind split strategy
* heuristic value, tested as being "generally better".
* no cpu cost, but can over-split homegeneous data.
*/
return 92 KB;
* select a variant among multiple gradual speed/accuracy tradeoffs */
return ZSTD_splitBlock(src, blockSizeMax, splitLevels[strat], cctx->tmpWorkspace, cctx->tmpWkspSize);
}

/*! ZSTD_compress_frameChunk() :
Expand Down
53 changes: 48 additions & 5 deletions lib/compress/zstd_preSplit.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "../common/mem.h" /* S64 */
#include "../common/zstd_deps.h" /* ZSTD_memset */
#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */
#include "hist.h" /* HIST_add */
#include "zstd_preSplit.h"


Expand Down Expand Up @@ -77,10 +78,10 @@ typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize)

#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate

#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
{ \
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
{ \
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
}

ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
Expand Down Expand Up @@ -185,10 +186,52 @@ static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
(void)flushEvents; (void)removeEvents;
}

/* ZSTD_splitBlock_fromBorders(): very fast strategy :
* compare fingerprint from beginning and end of the block,
* derive from their difference if it's preferable to split in the middle,
* repeat the process a second time, for finer grained decision.
* 3 times did not brought improvements, so I stopped at 2.
* Benefits are good enough for a cheap heuristic.
* More accurate splitting saves more, but speed impact is also more perceptible.
* For better accuracy, use more elaborate variant *_byChunks.
*/
static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,
void* workspace, size_t wkspSize)
{
#define SEGMENT_SIZE 512
FPStats* const fpstats = (FPStats*)workspace;
Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));
assert(blockSize == (128 << 10));
assert(workspace != NULL);
assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;

initStats(fpstats);
HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);
HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);
fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;
if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))
return blockSize;

HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);
middleEvents->nbEvents = SEGMENT_SIZE;
{ U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);
U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);
U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;
if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)
return 64 KB;
return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;
}
}

size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize)
{
assert(0<=level && level<=3);
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level, workspace, wkspSize);
assert(0<=level && level<=4);
if (level == 0)
return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);
/* level >= 1*/
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);
}
16 changes: 9 additions & 7 deletions lib/compress/zstd_preSplit.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@ extern "C" {

#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208

/* @level must be a value between 0 and 3.
* higher levels spend more energy to find block boundaries
* @workspace must be aligned on 8-bytes boundaries
/* ZSTD_splitBlock():
* @level must be a value between 0 and 4.
* higher levels spend more energy to detect block boundaries.
* @workspace must be aligned for size_t.
* @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE
* note2:
* for the time being, this function only accepts full 128 KB blocks,
* therefore @blockSizeMax must be == 128 KB.
* This could be extended to smaller sizes in the future.
* note:
* For the time being, this function only accepts full 128 KB blocks.
* Therefore, @blockSize must be == 128 KB.
* While this could be extended to smaller sizes in the future,
* it is not yet clear if this would be useful. TBD.
*/
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
Expand Down
Loading

0 comments on commit 5bae43b

Please sign in to comment.