Skip to content

Commit

Permalink
Merge pull request #805 from easyaspi314/xxh3_update_rewrite
Browse files Browse the repository at this point in the history
Rewrite XXH3_update
  • Loading branch information
Cyan4973 authored Feb 28, 2023
2 parents 3f5c75c + 86c3c05 commit 2b328a1
Showing 1 changed file with 65 additions and 73 deletions.
138 changes: 65 additions & 73 deletions xxhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -5431,31 +5431,57 @@ XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOE
return XXH_OK;
}

/* Note : when XXH3_consumeStripes() is invoked,
* there must be a guarantee that at least one more byte must be consumed from input
* so that the function can blindly consume all stripes using the "normal" secret segment */
XXH_FORCE_INLINE void
/*!
* @internal
* @brief Processes a large input for XXH3_update() and XXH3_digest_long().
*
* Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
*
* @param acc Pointer to the 8 accumulator lanes
* @param nbStripesSoFarPtr In/out pointer to the number of leftover stripes in the block*
* @param nbStripesPerBlock Number of stripes in a block
* @param input Input pointer
* @param nbStripes Number of stripes to process
* @param secret Secret pointer
* @param secretLimit Offset of the last block in @p secret
* @param f_acc Pointer to an XXH3_accumulate implementation
* @param f_scramble Pointer to an XXH3_scrambleAcc implementation
* @return Pointer past the end of @p input after processing
*/
XXH_FORCE_INLINE const xxh_u8 *
XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
XXH3_f_accumulate f_acc,
XXH3_f_scrambleAcc f_scramble)
{
XXH_ASSERT(nbStripes <= nbStripesPerBlock); /* can handle max 1 scramble per invocation */
XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
/* need a scrambling operation */
size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
f_acc(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock);
f_scramble(acc, secret + secretLimit);
f_acc(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock);
*nbStripesSoFarPtr = nbStripesAfterBlock;
} else {
f_acc(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes);
const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
/* Process full blocks */
if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
/* Process the initial partial block... */
size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;

do {
/* Accumulate and scramble */
f_acc(acc, input, initialSecret, nbStripesThisIter);
f_scramble(acc, secret + secretLimit);
input += nbStripesThisIter * XXH_STRIPE_LEN;
nbStripes -= nbStripesThisIter;
/* Then continue the loop with the full block size */
nbStripesThisIter = nbStripesPerBlock;
initialSecret = secret;
} while (nbStripes > nbStripesPerBlock);
*nbStripesSoFarPtr = 0;
}
/* Process a partial block */
if (nbStripes > 0) {
f_acc(acc, input, initialSecret, nbStripes);
input += nbStripes * XXH_STRIPE_LEN;
*nbStripesSoFarPtr += nbStripes;
}
/* Return end pointer */
return input;
}

#ifndef XXH3_STREAM_USE_STACK
Expand Down Expand Up @@ -5485,15 +5511,16 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
* when operating accumulators directly into state.
* Operating into stack space seems to enable proper optimization.
* clang, on the other hand, doesn't seem to need this trick */
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
XXH_memcpy(acc, state->acc, sizeof(acc));
#else
xxh_u64* XXH_RESTRICT const acc = state->acc;
#endif
state->totalLen += len;
XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);

/* small input : just fill in tmp buffer */
if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
XXH_memcpy(state->buffer + state->bufferedSize, input, len);
state->bufferedSize += (XXH32_hash_t)len;
return XXH_OK;
Expand All @@ -5519,53 +5546,16 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
state->bufferedSize = 0;
}
XXH_ASSERT(input < bEnd);

/* large input to consume : ingest per full block */
if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
/* join to current block's end */
{ size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
XXH_ASSERT(nbStripesToEnd <= nbStripes);
f_acc(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd);
f_scramble(acc, secret + state->secretLimit);
state->nbStripesSoFar = 0;
input += nbStripesToEnd * XXH_STRIPE_LEN;
nbStripes -= nbStripesToEnd;
}
/* consume per entire blocks */
while(nbStripes >= state->nbStripesPerBlock) {
f_acc(acc, input, secret, state->nbStripesPerBlock);
f_scramble(acc, secret + state->secretLimit);
input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
nbStripes -= state->nbStripesPerBlock;
}
/* consume last partial block */
f_acc(acc, input, secret, nbStripes);
input += nbStripes * XXH_STRIPE_LEN;
XXH_ASSERT(input < bEnd); /* at least some bytes left */
state->nbStripesSoFar = nbStripes;
/* buffer predecessor of last partial stripe */
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
} else {
/* content to consume <= block size */
/* Consume input by a multiple of internal buffer size */
if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
do {
XXH3_consumeStripes(acc,
input = XXH3_consumeStripes(acc,
&state->nbStripesSoFar, state->nbStripesPerBlock,
input, XXH3_INTERNALBUFFER_STRIPES,
secret, state->secretLimit,
f_acc, f_scramble);
input += XXH3_INTERNALBUFFER_SIZE;
} while (input<limit);
/* buffer predecessor of last partial stripe */
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
}
}
input, nbStripes,
secret, state->secretLimit,
f_acc, f_scramble);
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);

}
/* Some remaining input (always) : buffer it */
XXH_ASSERT(input < bEnd);
XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
Expand All @@ -5574,7 +5564,7 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
state->bufferedSize = (XXH32_hash_t)(bEnd-input);
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
/* save stack accumulators into state */
memcpy(state->acc, acc, sizeof(acc));
XXH_memcpy(state->acc, acc, sizeof(acc));
#endif
}

Expand All @@ -5595,33 +5585,36 @@ XXH3_digest_long (XXH64_hash_t* acc,
const XXH3_state_t* state,
const unsigned char* secret)
{
xxh_u8 lastStripe[XXH_STRIPE_LEN];
const xxh_u8* lastStripePtr;

/*
* Digest on a local copy. This way, the state remains unaltered, and it can
* continue ingesting more input afterwards.
*/
XXH_memcpy(acc, state->acc, sizeof(state->acc));
if (state->bufferedSize >= XXH_STRIPE_LEN) {
/* Consume remaining stripes then point to remaining data in buffer */
size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
size_t nbStripesSoFar = state->nbStripesSoFar;
XXH3_consumeStripes(acc,
&nbStripesSoFar, state->nbStripesPerBlock,
state->buffer, nbStripes,
secret, state->secretLimit,
XXH3_accumulate, XXH3_scrambleAcc);
/* last stripe */
XXH3_accumulate_512(acc,
state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
secret + state->secretLimit - XXH_SECRET_LASTACC_START);
lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
} else { /* bufferedSize < XXH_STRIPE_LEN */
xxh_u8 lastStripe[XXH_STRIPE_LEN];
/* Copy to temp buffer */
size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
XXH3_accumulate_512(acc,
lastStripe,
secret + state->secretLimit - XXH_SECRET_LASTACC_START);
lastStripePtr = lastStripe;
}
/* Last stripe */
XXH3_accumulate_512(acc,
lastStripePtr,
secret + state->secretLimit - XXH_SECRET_LASTACC_START);
}

/*! @ingroup XXH3_family */
Expand Down Expand Up @@ -6116,8 +6109,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NO
XXH_PUBLIC_API XXH_errorcode
XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
{
return XXH3_update(state, (const xxh_u8*)input, len,
XXH3_accumulate, XXH3_scrambleAcc);
return XXH3_64bits_update(state, input, len);
}

/*! @ingroup XXH3_family */
Expand Down

0 comments on commit 2b328a1

Please sign in to comment.