Skip to content
This repository has been archived by the owner on Apr 22, 2024. It is now read-only.

Commit

Permalink
Make CRC64 faster by putting more operations into the pipeline earlier
Browse files Browse the repository at this point in the history
* 53-73% faster on Xeon 2670 v0 @ 2.6ghz
* 2-2.5x faster on Core i3 8130U @ 2.2 ghz
* 1.6-2.46 bytes/cycle on i3 8130U
* likely >2x faster than crcspeed on newer CPUs with more resources than a 2012-era Xeon 2670
* crc64 combine function runs in <50 nanoseconds typical with vector + cache optimizations
  (~8 *microseconds* without vector optimizations, ~80 *microseconds without cache,
  the combination is extra effective)
* redis-benchmark gets --crc <buffer size> testing option
* still single-threaded
  • Loading branch information
josiahcarlson committed Mar 31, 2023
1 parent 1f76bb1 commit 55642fe
Show file tree
Hide file tree
Showing 7 changed files with 843 additions and 31 deletions.
6 changes: 3 additions & 3 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,11 @@ endif

REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX)
REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX)
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o $(COMMANDS_FILENAME).o strl.o connection.o unix.o logreqres.o
REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX)
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crccombine.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX)
REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o redisassert.o release.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o
REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o redisassert.o release.o crccombine.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o
REDIS_CHECK_RDB_NAME=redis-check-rdb$(PROG_SUFFIX)
REDIS_CHECK_AOF_NAME=redis-check-aof$(PROG_SUFFIX)
ALL_SOURCES=$(sort $(patsubst %.o,%.c,$(REDIS_SERVER_OBJ) $(REDIS_CLI_OBJ) $(REDIS_BENCHMARK_OBJ)))
Expand Down
34 changes: 26 additions & 8 deletions src/crc64.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,32 @@ static uint64_t crc64_table[8][256] = {{0}};
* \return The reflected data.
*****************************************************************************/
static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) {
uint_fast64_t ret = data & 0x01;

for (size_t i = 1; i < data_len; i++) {
data >>= 1;
ret = (ret << 1) | (data & 0x01);
}

return ret;
// only ever called for data_len == 64;

// Borrowed from bit twiddling hacks, original in the public domain.
// https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
// Extended to 64 bits, and added byteswap for final 3 steps.
// 16-30x 64-bit operations, no comparisons (16 for native byteswap, 30 for pure C)
// should be ~9-16x faster than before.

// swap odd and even bits
data = ((data >> 1) & 0x5555555555555555ULL) | ((data & 0x5555555555555555ULL) << 1);
// swap consecutive pairs
data = ((data >> 2) & 0x3333333333333333ULL) | ((data & 0x3333333333333333ULL) << 2);
// swap nibbles ...
data = ((data >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((data & 0x0F0F0F0F0F0F0F0FULL) << 4);
#if defined(__GNUC__) || defined(__clang__)
data = __builtin_bswap64(data);
#else
// swap bytes
data = ((data >> 8) & 0x00FF00FF00FF00FFULL) | ((data & 0x00FF00FF00FF00FFULL) << 8);
// swap 2-byte long pairs
data = ( data >> 16 & 0xFFFF0000FFFFULL) | ((data & 0xFFFF0000FFFFULL) << 16);
// swap 4-byte quads
data = ( data >> 32 & 0xFFFFFFFFULL) | ((data & 0xFFFFFFFFULL) << 32);
#endif
// adjust for non-64-bit reversals
return data >> (64 - data_len);
}

/**
Expand Down
Loading

0 comments on commit 55642fe

Please sign in to comment.