Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite XXH3_update #805

Merged
merged 2 commits into from
Feb 28, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 65 additions & 73 deletions xxhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -5431,31 +5431,57 @@ XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOE
return XXH_OK;
}

/* Note : when XXH3_consumeStripes() is invoked,
* there must be a guarantee that at least one more byte must be consumed from input
* so that the function can blindly consume all stripes using the "normal" secret segment */
XXH_FORCE_INLINE void
/*!
* @internal
* @brief Processes a large input for XXH3_update() and XXH3_digest_long().
*
* Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
*
* @param acc Pointer to the 8 accumulator lanes
* @param nbStripesSoFarPtr In/out pointer to the number of leftover stripes in the block*
* @param nbStripesPerBlock Number of stripes in a block
* @param input Input pointer
* @param nbStripes Number of stripes to process
* @param secret Secret pointer
* @param secretLimit Offset of the last block in @p secret
* @param f_acc Pointer to an XXH3_accumulate implementation
* @param f_scramble Pointer to an XXH3_scrambleAcc implementation
* @return Pointer past the end of @p input after processing
*/
XXH_FORCE_INLINE const xxh_u8 *
easyaspi314 marked this conversation as resolved.
Show resolved Hide resolved
XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
XXH3_f_accumulate f_acc,
XXH3_f_scrambleAcc f_scramble)
{
XXH_ASSERT(nbStripes <= nbStripesPerBlock); /* can handle max 1 scramble per invocation */
XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
/* need a scrambling operation */
size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
f_acc(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock);
f_scramble(acc, secret + secretLimit);
f_acc(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock);
*nbStripesSoFarPtr = nbStripesAfterBlock;
} else {
f_acc(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes);
const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
/* Process full blocks */
if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
/* Process the initial partial block... */
size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;

do {
/* Accumulate and scramble */
f_acc(acc, input, initialSecret, nbStripesThisIter);
f_scramble(acc, secret + secretLimit);
input += nbStripesThisIter * XXH_STRIPE_LEN;
nbStripes -= nbStripesThisIter;
/* Then continue the loop with the full block size */
nbStripesThisIter = nbStripesPerBlock;
initialSecret = secret;
} while (nbStripes > nbStripesPerBlock);
*nbStripesSoFarPtr = 0;
}
/* Process a partial block */
if (nbStripes > 0) {
f_acc(acc, input, initialSecret, nbStripes);
input += nbStripes * XXH_STRIPE_LEN;
*nbStripesSoFarPtr += nbStripes;
}
/* Return end pointer */
return input;
}

#ifndef XXH3_STREAM_USE_STACK
Expand Down Expand Up @@ -5485,15 +5511,16 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
* when operating accumulators directly into state.
* Operating into stack space seems to enable proper optimization.
* clang, on the other hand, doesn't seem to need this trick */
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
XXH_memcpy(acc, state->acc, sizeof(acc));
#else
xxh_u64* XXH_RESTRICT const acc = state->acc;
#endif
state->totalLen += len;
XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);

/* small input : just fill in tmp buffer */
if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
XXH_memcpy(state->buffer + state->bufferedSize, input, len);
state->bufferedSize += (XXH32_hash_t)len;
return XXH_OK;
Expand All @@ -5519,53 +5546,16 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
state->bufferedSize = 0;
}
XXH_ASSERT(input < bEnd);

/* large input to consume : ingest per full block */
if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
/* join to current block's end */
{ size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
XXH_ASSERT(nbStripesToEnd <= nbStripes);
f_acc(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd);
f_scramble(acc, secret + state->secretLimit);
state->nbStripesSoFar = 0;
input += nbStripesToEnd * XXH_STRIPE_LEN;
nbStripes -= nbStripesToEnd;
}
/* consume per entire blocks */
while(nbStripes >= state->nbStripesPerBlock) {
f_acc(acc, input, secret, state->nbStripesPerBlock);
f_scramble(acc, secret + state->secretLimit);
input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
nbStripes -= state->nbStripesPerBlock;
}
/* consume last partial block */
f_acc(acc, input, secret, nbStripes);
input += nbStripes * XXH_STRIPE_LEN;
XXH_ASSERT(input < bEnd); /* at least some bytes left */
state->nbStripesSoFar = nbStripes;
/* buffer predecessor of last partial stripe */
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
} else {
/* content to consume <= block size */
/* Consume input by a multiple of internal buffer size */
if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
do {
XXH3_consumeStripes(acc,
input = XXH3_consumeStripes(acc,
&state->nbStripesSoFar, state->nbStripesPerBlock,
input, XXH3_INTERNALBUFFER_STRIPES,
secret, state->secretLimit,
f_acc, f_scramble);
input += XXH3_INTERNALBUFFER_SIZE;
} while (input<limit);
/* buffer predecessor of last partial stripe */
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
}
}
input, nbStripes,
secret, state->secretLimit,
f_acc, f_scramble);
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);

}
/* Some remaining input (always) : buffer it */
XXH_ASSERT(input < bEnd);
XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
Expand All @@ -5574,7 +5564,7 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
state->bufferedSize = (XXH32_hash_t)(bEnd-input);
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
/* save stack accumulators into state */
memcpy(state->acc, acc, sizeof(acc));
XXH_memcpy(state->acc, acc, sizeof(acc));
#endif
}

Expand All @@ -5595,33 +5585,36 @@ XXH3_digest_long (XXH64_hash_t* acc,
const XXH3_state_t* state,
const unsigned char* secret)
{
xxh_u8 lastStripe[XXH_STRIPE_LEN];
const xxh_u8* lastStripePtr;

/*
* Digest on a local copy. This way, the state remains unaltered, and it can
* continue ingesting more input afterwards.
*/
XXH_memcpy(acc, state->acc, sizeof(state->acc));
if (state->bufferedSize >= XXH_STRIPE_LEN) {
/* Consume remaining stripes then point to remaining data in buffer */
size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
size_t nbStripesSoFar = state->nbStripesSoFar;
XXH3_consumeStripes(acc,
&nbStripesSoFar, state->nbStripesPerBlock,
state->buffer, nbStripes,
secret, state->secretLimit,
XXH3_accumulate, XXH3_scrambleAcc);
/* last stripe */
XXH3_accumulate_512(acc,
state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
secret + state->secretLimit - XXH_SECRET_LASTACC_START);
lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
} else { /* bufferedSize < XXH_STRIPE_LEN */
xxh_u8 lastStripe[XXH_STRIPE_LEN];
/* Copy to temp buffer */
size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
XXH3_accumulate_512(acc,
lastStripe,
secret + state->secretLimit - XXH_SECRET_LASTACC_START);
lastStripePtr = lastStripe;
}
/* Last stripe */
XXH3_accumulate_512(acc,
lastStripePtr,
secret + state->secretLimit - XXH_SECRET_LASTACC_START);
}

/*! @ingroup XXH3_family */
Expand Down Expand Up @@ -6116,8 +6109,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NO
XXH_PUBLIC_API XXH_errorcode
XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
{
return XXH3_update(state, (const xxh_u8*)input, len,
XXH3_accumulate, XXH3_scrambleAcc);
return XXH3_64bits_update(state, input, len);
}

/*! @ingroup XXH3_family */
Expand Down