Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added faster block splitter variants for levels 3-7 #4176

Merged
merged 8 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -4507,9 +4507,13 @@ static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t src
/* dynamic splitting has a cpu cost for analysis,
* due to that cost it's only used for higher levels */
if (strat >= ZSTD_btopt)
return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl2, cctx->tmpWorkspace, cctx->tmpWkspSize);
return ZSTD_splitBlock(src, blockSizeMax, 3, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_lazy2)
return ZSTD_splitBlock(src, srcSize, blockSizeMax, split_lvl1, cctx->tmpWorkspace, cctx->tmpWkspSize);
return ZSTD_splitBlock(src, blockSizeMax, 2, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_greedy)
return ZSTD_splitBlock(src, blockSizeMax, 1, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_dfast)
return ZSTD_splitBlock(src, blockSizeMax, 0, cctx->tmpWorkspace, cctx->tmpWkspSize);
/* blind split strategy
* heuristic value, tested as being "generally better".
* no cpu cost, but can over-split homegeneous data.
Expand Down
99 changes: 56 additions & 43 deletions lib/compress/zstd_preSplit.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,20 @@
#define THRESHOLD_PENALTY 3

#define HASHLENGTH 2
#define HASHLOG 10
#define HASHTABLESIZE (1 << HASHLOG)
#define HASHLOG_MAX 10
#define HASHTABLESIZE (1 << HASHLOG_MAX)
#define HASHMASK (HASHTABLESIZE - 1)
#define KNUTH 0x9e3779b9

static unsigned hash2(const void *p)
/* for hashLog > 8, hash 2 bytes.
* for hashLog == 8, just take the byte, no hashing.
* The speed of this method relies on compile-time constant propagation */
FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)
{
return (U32)(MEM_read16(p)) * KNUTH >> (32 - HASHLOG);
assert(hashLog >= 8);
if (hashLog == 8) return (U32)((const BYTE*)p)[0];
assert(hashLog <= HASHLOG_MAX);
return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);
}


Expand All @@ -46,45 +52,51 @@ static void initStats(FPStats* fpstats)
ZSTD_memset(fpstats, 0, sizeof(FPStats));
}

FORCE_INLINE_TEMPLATE void addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate)
FORCE_INLINE_TEMPLATE void
addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
{
const char* p = (const char*)src;
size_t limit = srcSize - HASHLENGTH + 1;
size_t n;
assert(srcSize >= HASHLENGTH);
for (n = 0; n < limit; n+=samplingRate) {
fp->events[hash2(p+n)]++;
fp->events[hash2(p+n, hashLog)]++;
}
fp->nbEvents += limit/samplingRate;
}

#define ADDEVENTS_RATE(_rate) ZSTD_addEvents_##_rate
FORCE_INLINE_TEMPLATE void
recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
{
ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog));
fp->nbEvents = 0;
addEvents_generic(fp, src, srcSize, samplingRate, hashLog);
}

typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);

#define ZSTD_GEN_ADDEVENTS_SAMPLE(_rate) \
static void ADDEVENTS_RATE(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate

#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
{ \
addEvents_generic(fp, src, srcSize, _rate); \
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
}

ZSTD_GEN_ADDEVENTS_SAMPLE(1)
ZSTD_GEN_ADDEVENTS_SAMPLE(5)

ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
ZSTD_GEN_RECORD_FINGERPRINT(5, 10)
ZSTD_GEN_RECORD_FINGERPRINT(11, 9)
ZSTD_GEN_RECORD_FINGERPRINT(43, 8)

typedef void (*addEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);

static void recordFingerprint(Fingerprint* fp, const void* src, size_t s, addEvents_f addEvents)
{
ZSTD_memset(fp, 0, sizeof(*fp));
addEvents(fp, src, s);
}

static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }

static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2)
static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog)
{
U64 distance = 0;
size_t n;
for (n = 0; n < HASHTABLESIZE; n++) {
assert(hashLog <= HASHLOG_MAX);
for (n = 0; n < ((size_t)1 << hashLog); n++) {
distance +=
abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents);
}
Expand All @@ -96,12 +108,13 @@ static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2)
*/
static int compareFingerprints(const Fingerprint* ref,
const Fingerprint* newfp,
int penalty)
int penalty,
unsigned hashLog)
{
assert(ref->nbEvents > 0);
assert(newfp->nbEvents > 0);
{ U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents;
U64 deviation = fpDistance(ref, newfp);
U64 deviation = fpDistance(ref, newfp, hashLog);
U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE;
return deviation >= threshold;
}
Expand Down Expand Up @@ -137,45 +150,45 @@ static void removeEvents(Fingerprint* acc, const Fingerprint* slice)
}

#define CHUNKSIZE (8 << 10)
/* Note: technically, we use CHUNKSIZE, so that's 8 KB */
static size_t ZSTD_splitBlock_byChunks(const void* src, size_t srcSize,
size_t blockSizeMax, addEvents_f f,
static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize)
{
static const RecordEvents_f records_fs[] = {
FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)
};
static const unsigned hashParams[] = { 8, 9, 10, 10 };
const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);
FPStats* const fpstats = (FPStats*)workspace;
const char* p = (const char*)src;
const char* p = (const char*)blockStart;
int penalty = THRESHOLD_PENALTY;
size_t pos = 0;
if (srcSize <= blockSizeMax) return srcSize;
assert(blockSizeMax == (128 << 10));
assert(blockSize == (128 << 10));
assert(workspace != NULL);
assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;

initStats(fpstats);
recordFingerprint(&fpstats->pastEvents, p, CHUNKSIZE, f);
for (pos = CHUNKSIZE; pos <= blockSizeMax - CHUNKSIZE; pos += CHUNKSIZE) {
recordFingerprint(&fpstats->newEvents, p + pos, CHUNKSIZE, f);
if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty)) {
record_f(&fpstats->pastEvents, p, CHUNKSIZE);
for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {
record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);
if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {
return pos;
} else {
mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);
if (penalty > 0) penalty--;
}
}
assert(pos == blockSizeMax);
return blockSizeMax;
assert(pos == blockSize);
return blockSize;
(void)flushEvents; (void)removeEvents;
}

size_t ZSTD_splitBlock(const void* src, size_t srcSize,
size_t blockSizeMax, ZSTD_SplitBlock_strategy_e splitStrat,
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize)
{
if (splitStrat == split_lvl2)
return ZSTD_splitBlock_byChunks(src, srcSize, blockSizeMax, ADDEVENTS_RATE(1), workspace, wkspSize);

assert(splitStrat == split_lvl1);
return ZSTD_splitBlock_byChunks(src, srcSize, blockSizeMax, ADDEVENTS_RATE(5), workspace, wkspSize);
assert(0<=level && level<=3);
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level, workspace, wkspSize);
}
9 changes: 4 additions & 5 deletions lib/compress/zstd_preSplit.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,19 @@
extern "C" {
#endif

typedef enum { split_lvl1, split_lvl2 } ZSTD_SplitBlock_strategy_e;

#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208

/* note:
/* @level must be a value between 0 and 3.
* higher levels spend more energy to find block boundaries
* @workspace must be aligned on 8-bytes boundaries
* @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE
* note2:
* for the time being, this function only accepts full 128 KB blocks,
* therefore @blockSizeMax must be == 128 KB.
* This could be extended to smaller sizes in the future.
*/
size_t ZSTD_splitBlock(const void* src, size_t srcSize,
size_t blockSizeMax, ZSTD_SplitBlock_strategy_e splitStrat,
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
int level,
void* workspace, size_t wkspSize);

#if defined (__cplusplus)
Expand Down
Loading