From b283c6b508792892d76b9a4911e3086e37518748 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Tue, 30 Apr 2024 14:57:21 -0700 Subject: [PATCH 1/9] Initial PR outlining the governance for the project (#345) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial PR to add a governance doc outlining permissions for the main Valkey project as well as define responsibilities for sub-projects. --------- Signed-off-by: Madelyn Olson Co-authored-by: Viktor Söderqvist Co-authored-by: Ping Xie Co-authored-by: zhaozhao.zz Co-authored-by: hwware Co-authored-by: binyan Date: Wed, 1 May 2024 08:41:37 +0900 Subject: [PATCH 2/9] Modify mem_freed variable in evict.c and Update debug.c (#376) Fix the mem_freed variable to be initialized with init. with this PR prevents the variable from acting unknowingly. Signed-off-by: NAM UK KIM --- src/debug.c | 2 +- src/evict.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/debug.c b/src/debug.c index d07159cf0f..33d145b28d 100644 --- a/src/debug.c +++ b/src/debug.c @@ -2625,7 +2625,7 @@ static size_t get_ready_to_signal_threads_tids(int sig_num, pid_t tids[TIDS_MAX_ if(tids_count == TIDS_MAX_SIZE) break; } - /* Swap the last tid with the the current thread id */ + /* Swap the last tid with the current thread id */ if(current_thread_index != -1) { pid_t last_tid = tids[tids_count - 1]; diff --git a/src/evict.c b/src/evict.c index 7b4937303c..fcac92dfc8 100644 --- a/src/evict.c +++ b/src/evict.c @@ -545,7 +545,7 @@ int performEvictions(void) { int keys_freed = 0; size_t mem_reported, mem_tofree; - long long mem_freed; /* May be negative */ + long long mem_freed = 0; /* Maybe become negative */ mstime_t latency, eviction_latency; long long delta; int slaves = listLength(server.slaves); @@ -563,8 +563,6 @@ int performEvictions(void) { unsigned long eviction_time_limit_us = evictionTimeLimitUs(); - mem_freed = 0; - latencyStartMonitor(latency); monotime evictionTimer; From 44f273d13bb506550c2473ac41ca00f5ca95b156 Mon Sep 17 00:00:00 2001 From: Lipeng Zhu Date: Wed, 1 May 2024 09:02:22 +0800 Subject: [PATCH 3/9] Delete unused declaration (#400) Delete unused declaration `void *dictEntryMetadata(dictEntry *de);` in dict.h. --------- Signed-off-by: Lipeng Zhu --- src/dict.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dict.h b/src/dict.h index 8aaacd41e3..1f4cb317ee 100644 --- a/src/dict.h +++ b/src/dict.h @@ -210,7 +210,6 @@ void dictSetDoubleVal(dictEntry *de, double val); int64_t dictIncrSignedIntegerVal(dictEntry *de, int64_t val); uint64_t dictIncrUnsignedIntegerVal(dictEntry *de, uint64_t val); double dictIncrDoubleVal(dictEntry *de, double val); -void *dictEntryMetadata(dictEntry *de); void *dictGetKey(const dictEntry *de); void *dictGetVal(const dictEntry *de); int64_t dictGetSignedIntegerVal(const dictEntry *de); From 89f72bc3ae1ae6e61c5306b318b83ee30d527baf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Wed, 1 May 2024 03:26:59 +0200 Subject: [PATCH 4/9] Don't include config.h from serverassert.h (#404) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Serverassert is a drop-in replacement of assert. We use it even in code copied from other sources. To make these files usable outside of Valkey, it should be enough to replace the `serverassert.h` include with ``. Therefore, this file shouldn't have any dependencies to the rest of the valkey code. --------- Signed-off-by: Viktor Söderqvist --- src/serverassert.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/serverassert.h b/src/serverassert.h index d8c3dd4255..08f97e5741 100644 --- a/src/serverassert.h +++ b/src/serverassert.h @@ -38,7 +38,20 @@ #ifndef VALKEY_ASSERT_H #define VALKEY_ASSERT_H -#include "config.h" +/* This file shouldn't have any dependencies to any other Valkey code. */ + +#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) +#define valkey_unreachable __builtin_unreachable +#else +#include +#define valkey_unreachable abort +#endif + +#if __GNUC__ >= 3 +#define likely(x) __builtin_expect(!!(x), 1) +#else +#define likely(x) (x) +#endif #define assert(_e) (likely((_e))?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),valkey_unreachable())) #define panic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),valkey_unreachable() From f4e10eee06cd978452a17e69d684e8afc8eeae3f Mon Sep 17 00:00:00 2001 From: Josiah Carlson Date: Tue, 30 Apr 2024 19:32:01 -0700 Subject: [PATCH 5/9] CRC64 perf improvements from Redis patches (#350) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve the performance of crc64 for large batches by processing large number of bytes in parallel and combining the results. ## Performance * 53-73% faster on Xeon 2670 v0 @ 2.6ghz * 2-2.5x faster on Core i3 8130U @ 2.2 ghz * 1.6-2.46 bytes/cycle on i3 8130U * likely >2x faster than crcspeed on newer CPUs with more resources than a 2012-era Xeon 2670 * crc64 combine function runs in <50 nanoseconds typical with vector + cache optimizations (~8 *microseconds* without vector optimizations, ~80 *microseconds without cache, the combination is extra effective) * still single-threaded * valkey-server test crc64 --help (requires `make distclean && make SERVER_TEST=yes`) --------- Signed-off-by: Josiah Carlson Signed-off-by: Madelyn Olson Co-authored-by: Viktor Söderqvist Co-authored-by: Madelyn Olson --- src/Makefile | 9 +- src/crc64.c | 264 ++++++++++++++++++++++++++++++++++++++++++----- src/crccombine.c | 253 +++++++++++++++++++++++++++++++++++++++++++++ src/crccombine.h | 10 ++ src/crcspeed.c | 168 ++++++++++++++++++++++++++---- src/crcspeed.h | 2 + 6 files changed, 657 insertions(+), 49 deletions(-) create mode 100644 src/crccombine.c create mode 100644 src/crccombine.h diff --git a/src/Makefile b/src/Makefile index b73d509fe5..833349839f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -131,6 +131,9 @@ ifdef REDIS_LDFLAGS endif FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(SERVER_CFLAGS) +ifeq ($(SERVER_TEST),yes) + FINAL_CFLAGS +=-DSERVER_TEST=1 +endif FINAL_LDFLAGS=$(LDFLAGS) $(OPT) $(SERVER_LDFLAGS) $(DEBUG) FINAL_LIBS=-lm DEBUG=-g -ggdb @@ -382,11 +385,11 @@ endif ENGINE_NAME=valkey SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX) ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX) -ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o +ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX) -ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o +ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o ENGINE_BENCHMARK_NAME=$(ENGINE_NAME)-benchmark$(PROG_SUFFIX) -ENGINE_BENCHMARK_OBJ=ae.o anet.o valkey-benchmark.o adlist.o dict.o zmalloc.o serverassert.o release.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o +ENGINE_BENCHMARK_OBJ=ae.o anet.o valkey-benchmark.o adlist.o dict.o zmalloc.o serverassert.o release.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o ENGINE_CHECK_RDB_NAME=$(ENGINE_NAME)-check-rdb$(PROG_SUFFIX) ENGINE_CHECK_AOF_NAME=$(ENGINE_NAME)-check-aof$(PROG_SUFFIX) ALL_SOURCES=$(sort $(patsubst %.o,%.c,$(ENGINE_SERVER_OBJ) $(ENGINE_CLI_OBJ) $(ENGINE_BENCHMARK_OBJ))) diff --git a/src/crc64.c b/src/crc64.c index 0f71eea780..9d4e98ee70 100644 --- a/src/crc64.c +++ b/src/crc64.c @@ -28,6 +28,7 @@ #include "crc64.h" #include "crcspeed.h" +#include "serverassert.h" static uint64_t crc64_table[8][256] = {{0}}; #define POLY UINT64_C(0xad93d23594c935a9) @@ -67,14 +68,33 @@ static uint64_t crc64_table[8][256] = {{0}}; * \return The reflected data. *****************************************************************************/ static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) { - uint_fast64_t ret = data & 0x01; + /* only ever called for data_len == 64 in this codebase + * + * Borrowed from bit twiddling hacks, original in the public domain. + * https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel + * Extended to 64 bits, and added byteswap for final 3 steps. + * 16-30x 64-bit operations, no comparisons (16 for native byteswap, 30 for pure C) + */ - for (size_t i = 1; i < data_len; i++) { - data >>= 1; - ret = (ret << 1) | (data & 0x01); - } - - return ret; + assert(data_len <= 64); + /* swap odd and even bits */ + data = ((data >> 1) & 0x5555555555555555ULL) | ((data & 0x5555555555555555ULL) << 1); + /* swap consecutive pairs */ + data = ((data >> 2) & 0x3333333333333333ULL) | ((data & 0x3333333333333333ULL) << 2); + /* swap nibbles ... */ + data = ((data >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((data & 0x0F0F0F0F0F0F0F0FULL) << 4); +#if defined(__GNUC__) || defined(__clang__) + data = __builtin_bswap64(data); +#else + /* swap bytes */ + data = ((data >> 8) & 0x00FF00FF00FF00FFULL) | ((data & 0x00FF00FF00FF00FFULL) << 8); + /* swap 2-byte long pairs */ + data = ( data >> 16 & 0xFFFF0000FFFFULL) | ((data & 0xFFFF0000FFFFULL) << 16); + /* swap 4-byte quads */ + data = ( data >> 32 & 0xFFFFFFFFULL) | ((data & 0xFFFFFFFFULL) << 32); +#endif + /* adjust for non-64-bit reversals */ + return data >> (64 - data_len); } /** @@ -126,29 +146,221 @@ uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) { #ifdef SERVER_TEST #include +static void genBenchmarkRandomData(char *data, int count); +static int bench_crc64(unsigned char *data, uint64_t size, long long passes, uint64_t check, char *name, int csv); +static void bench_combine(char *label, uint64_t size, uint64_t expect, int csv); +long long _ustime(void); + +#include +#include +#include +#include +#include +#include + +#include "zmalloc.h" +#include "crccombine.h" + +long long _ustime(void) { + struct timeval tv; + long long ust; + + gettimeofday(&tv, NULL); + ust = ((long long)tv.tv_sec)*1000000; + ust += tv.tv_usec; + return ust; +} + +static int bench_crc64(unsigned char *data, uint64_t size, long long passes, uint64_t check, char *name, int csv) { + uint64_t min = size, hash; + long long original_start = _ustime(), original_end; + for (long long i=passes; i > 0; i--) { + hash = crc64(0, data, size); + } + original_end = _ustime(); + min = (original_end - original_start) * 1000 / passes; + /* approximate nanoseconds without nstime */ + if (csv) { + printf("%s,%" PRIu64 ",%" PRIu64 ",%d\n", + name, size, (1000 * size) / min, hash == check); + } else { + printf("test size=%" PRIu64 " algorithm=%s %" PRIu64 " M/sec matches=%d\n", + size, name, (1000 * size) / min, hash == check); + } + return hash != check; +} + +const uint64_t BENCH_RPOLY = UINT64_C(0x95ac9329ac4bc9b5); + +static void bench_combine(char *label, uint64_t size, uint64_t expect, int csv) { + uint64_t min = size, start = expect, thash = expect ^ (expect >> 17); + long long original_start = _ustime(), original_end; + for (int i=0; i < 1000; i++) { + crc64_combine(thash, start, size, BENCH_RPOLY, 64); + } + original_end = _ustime(); + /* ran 1000 times, want ns per, counted us per 1000 ... */ + min = original_end - original_start; + if (csv) { + printf("%s,%" PRIu64 ",%" PRIu64 "\n", label, size, min); + } else { + printf("%s size=%" PRIu64 " in %" PRIu64 " nsec\n", label, size, min); + } +} + +static void genBenchmarkRandomData(char *data, int count) { + static uint32_t state = 1234; + int i = 0; + + while (count--) { + state = (state*1103515245+12345); + data[i++] = '0'+((state>>16)&63); + } +} + #define UNUSED(x) (void)(x) int crc64Test(int argc, char *argv[], int flags) { - UNUSED(argc); - UNUSED(argv); UNUSED(flags); - crc64_init(); - printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n", - (uint64_t)_crc64(0, "123456789", 9)); - printf("[64speed]: e9c6d914c4b8d9ca == %016" PRIx64 "\n", - (uint64_t)crc64(0, (unsigned char*)"123456789", 9)); - char li[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed " - "do eiusmod tempor incididunt ut labore et dolore magna " - "aliqua. Ut enim ad minim veniam, quis nostrud exercitation " - "ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis " - "aute irure dolor in reprehenderit in voluptate velit esse " - "cillum dolore eu fugiat nulla pariatur. Excepteur sint " - "occaecat cupidatat non proident, sunt in culpa qui officia " - "deserunt mollit anim id est laborum."; - printf("[calcula]: c7794709e69683b3 == %016" PRIx64 "\n", - (uint64_t)_crc64(0, li, sizeof(li))); - printf("[64speed]: c7794709e69683b3 == %016" PRIx64 "\n", - (uint64_t)crc64(0, (unsigned char*)li, sizeof(li))); + + uint64_t crc64_test_size = 0; + int i, lastarg, csv = 0, loop = 0, combine = 0; +again: + for (i = 3; i < argc; i++) { + lastarg = (i == (argc-1)); + if (!strcmp(argv[i],"--help")) { + goto usage; + } else if (!strcmp(argv[i],"--csv")) { + csv = 1; + } else if (!strcmp(argv[i],"-l")) { + loop = 1; + } else if (!strcmp(argv[i],"--crc")) { + if (lastarg) goto invalid; + crc64_test_size = atoll(argv[++i]); + } else if (!strcmp(argv[i],"--combine")) { + combine = 1; + } else { +invalid: + printf("Invalid option \"%s\" or option argument missing\n\n",argv[i]); +usage: + printf( +"Usage: crc64 [OPTIONS]\n\n" +" --csv Output in CSV format\n" +" -l Loop. Run the tests forever\n" +" --crc Benchmark crc64 faster options, using a buffer this big, and quit when done.\n" +" --combine Benchmark crc64 combine value ranges and timings.\n" + ); + return 1; + } + } + + if (crc64_test_size == 0 && combine == 0) { + crc64_init(); + printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n", + (uint64_t)_crc64(0, "123456789", 9)); + printf("[64speed]: e9c6d914c4b8d9ca == %016" PRIx64 "\n", + (uint64_t)crc64(0, (unsigned char*)"123456789", 9)); + char li[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed " + "do eiusmod tempor incididunt ut labore et dolore magna " + "aliqua. Ut enim ad minim veniam, quis nostrud exercitation " + "ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis " + "aute irure dolor in reprehenderit in voluptate velit esse " + "cillum dolore eu fugiat nulla pariatur. Excepteur sint " + "occaecat cupidatat non proident, sunt in culpa qui officia " + "deserunt mollit anim id est laborum."; + printf("[calcula]: c7794709e69683b3 == %016" PRIx64 "\n", + (uint64_t)_crc64(0, li, sizeof(li))); + printf("[64speed]: c7794709e69683b3 == %016" PRIx64 "\n", + (uint64_t)crc64(0, (unsigned char*)li, sizeof(li))); + return 0; + + } + + int init_this_loop = 1; + long long init_start, init_end; + + do { + unsigned char* data = NULL; + uint64_t passes = 0; + if (crc64_test_size) { + data = zmalloc(crc64_test_size); + genBenchmarkRandomData((char*)data, crc64_test_size); + /* We want to hash about 1 gig of data in total, looped, to get a good + * idea of our performance. + */ + passes = (UINT64_C(0x100000000) / crc64_test_size); + passes = passes >= 2 ? passes : 2; + passes = passes <= 1000 ? passes : 1000; + } + + crc64_init(); + /* warm up the cache */ + set_crc64_cutoffs(crc64_test_size+1, crc64_test_size+1); + uint64_t expect = crc64(0, data, crc64_test_size); + + if (!combine && crc64_test_size) { + if (csv && init_this_loop) printf("algorithm,buffer,performance,crc64_matches\n"); + + /* get the single-character version for single-byte Redis behavior */ + set_crc64_cutoffs(0, crc64_test_size+1); + if (bench_crc64(data, crc64_test_size, passes, expect, "crc_1byte", csv)) return 1; + + set_crc64_cutoffs(crc64_test_size+1, crc64_test_size+1); + /* run with 8-byte "single" path, crcfaster */ + if (bench_crc64(data, crc64_test_size, passes, expect, "crcspeed", csv)) return 1; + + /* run with dual 8-byte paths */ + set_crc64_cutoffs(1, crc64_test_size+1); + if (bench_crc64(data, crc64_test_size, passes, expect, "crcdual", csv)) return 1; + + /* run with tri 8-byte paths */ + set_crc64_cutoffs(1, 1); + if (bench_crc64(data, crc64_test_size, passes, expect, "crctri", csv)) return 1; + + /* Be free memory region, be free. */ + zfree(data); + data = NULL; + } + + uint64_t INIT_SIZE = UINT64_C(0xffffffffffffffff); + if (combine) { + if (init_this_loop) { + init_start = _ustime(); + crc64_combine( + UINT64_C(0xdeadbeefdeadbeef), + UINT64_C(0xfeebdaedfeebdaed), + INIT_SIZE, + BENCH_RPOLY, 64); + init_end = _ustime(); + + init_end -= init_start; + init_end *= 1000; + if (csv) { + printf("operation,size,nanoseconds\n"); + printf("init_64,%" PRIu64 ",%" PRIu64 "\n", INIT_SIZE, (uint64_t)init_end); + } else { + printf("init_64 size=%" PRIu64 " in %" PRIu64 " nsec\n", INIT_SIZE, (uint64_t)init_end); + } + /* use the hash itself as the size (unpredictable) */ + bench_combine("hash_as_size_combine", crc64_test_size, expect, csv); + + /* let's do something big (predictable, so fast) */ + bench_combine("largest_combine", INIT_SIZE, expect, csv); + } + bench_combine("combine", crc64_test_size, expect, csv); + } + init_this_loop = 0; + /* step down by ~1.641 for a range of test sizes */ + crc64_test_size -= (crc64_test_size >> 2) + (crc64_test_size >> 3) + (crc64_test_size >> 6); + } while (crc64_test_size > 3); + if (loop) goto again; return 0; } +# endif + + +#ifdef SERVER_TEST_MAIN +int main(int argc, char *argv[]) { + return crc64Test(argc, argv); +} #endif diff --git a/src/crccombine.c b/src/crccombine.c new file mode 100644 index 0000000000..4d9a18c65b --- /dev/null +++ b/src/crccombine.c @@ -0,0 +1,253 @@ +#include +#include +#include +#if defined(__i386__) || defined(__X86_64__) +#include +#endif +#include "crccombine.h" + +/* Copyright (C) 2013 Mark Adler + * Copyright (C) 2019-2024 Josiah Carlson + * Portions originally from: crc64.c Version 1.4 16 Dec 2013 Mark Adler + * Modifications by Josiah Carlson + * - Added implementation variations with sample timings for gf_matrix_times*() + * - Most folks would be best using gf2_matrix_times_vec or + * gf2_matrix_times_vec2, unless some processor does AVX2 fast. + * - This is the implementation of the MERGE_CRC macro defined in + * crcspeed.c (which calls crc_combine()), and is a specialization of the + * generic crc_combine() (and related from the 2013 edition of Mark Adler's + * crc64.c)) for the sake of clarity and performance. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler + madler@alumni.caltech.edu +*/ + +#define STATIC_ASSERT(VVV) do {int test = 1 / (VVV);test++;} while (0) + +#if !((defined(__i386__) || defined(__X86_64__))) + +/* This cuts 40% of the time vs bit-by-bit. */ + +uint64_t gf2_matrix_times_switch(uint64_t *mat, uint64_t vec) { + /* + * Without using any vector math, this handles 4 bits at a time, + * and saves 40+% of the time compared to the bit-by-bit version. Use if you + * have no vector compile option available to you. With cache, we see: + * E5-2670 ~1-2us to extend ~1 meg 64 bit hash + */ + uint64_t sum; + + sum = 0; + while (vec) { + /* reversing the case order is ~10% slower on Xeon E5-2670 */ + switch (vec & 15) { + case 15: + sum ^= *mat ^ *(mat+1) ^ *(mat+2) ^ *(mat+3); + break; + case 14: + sum ^= *(mat+1) ^ *(mat+2) ^ *(mat+3); + break; + case 13: + sum ^= *mat ^ *(mat+2) ^ *(mat+3); + break; + case 12: + sum ^= *(mat+2) ^ *(mat+3); + break; + case 11: + sum ^= *mat ^ *(mat+1) ^ *(mat+3); + break; + case 10: + sum ^= *(mat+1) ^ *(mat+3); + break; + case 9: + sum ^= *mat ^ *(mat+3); + break; + case 8: + sum ^= *(mat+3); + break; + case 7: + sum ^= *mat ^ *(mat+1) ^ *(mat+2); + break; + case 6: + sum ^= *(mat+1) ^ *(mat+2); + break; + case 5: + sum ^= *mat ^ *(mat+2); + break; + case 4: + sum ^= *(mat+2); + break; + case 3: + sum ^= *mat ^ *(mat+1); + break; + case 2: + sum ^= *(mat+1); + break; + case 1: + sum ^= *mat; + break; + default: + break; + } + vec >>= 4; + mat += 4; + } + return sum; +} + +#define CRC_MULTIPLY gf2_matrix_times_switch + +#else + +/* + Warning: here there be dragons involving vector math, and macros to save us + from repeating the same information over and over. +*/ + +uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec) { + /* + * Uses xmm registers on x86, works basically everywhere fast, doing + * cycles of movqda, mov, shr, pand, and, pxor, at least on gcc 8. + * Is 9-11x faster than original. + * E5-2670 ~29us to extend ~1 meg 64 bit hash + * i3-8130U ~22us to extend ~1 meg 64 bit hash + */ + v2uq sum = {0, 0}, + *mv2 = (v2uq*)mat; + /* this table allows us to eliminate conditions during gf2_matrix_times_vec2() */ + static v2uq masks2[4] = { + {0,0}, + {-1,0}, + {0,-1}, + {-1,-1}, + }; + + /* Almost as beautiful as gf2_matrix_times_vec, but only half as many + * bits per step, so we need 2 per chunk4 operation. Faster in my tests. */ + +#define DO_CHUNK4() \ + sum ^= (*mv2++) & masks2[vec & 3]; \ + vec >>= 2; \ + sum ^= (*mv2++) & masks2[vec & 3]; \ + vec >>= 2 + +#define DO_CHUNK16() \ + DO_CHUNK4(); \ + DO_CHUNK4(); \ + DO_CHUNK4(); \ + DO_CHUNK4() + + DO_CHUNK16(); + DO_CHUNK16(); + DO_CHUNK16(); + DO_CHUNK16(); + + STATIC_ASSERT(sizeof(uint64_t) == 8); + STATIC_ASSERT(sizeof(long long unsigned int) == 8); + return sum[0] ^ sum[1]; +} + +#undef DO_CHUNK16 +#undef DO_CHUNK4 + +#define CRC_MULTIPLY gf2_matrix_times_vec2 +#endif + +static void gf2_matrix_square(uint64_t *square, uint64_t *mat, uint8_t dim) { + unsigned n; + + for (n = 0; n < dim; n++) + square[n] = CRC_MULTIPLY(mat, mat[n]); +} + +/* Turns out our Redis / Jones CRC cycles at this point, so we can support + * more than 64 bits of extension if we want. Trivially. */ +static uint64_t combine_cache[64][64]; + +/* Mark Adler has some amazing updates to crc.c in his crcany repository. I + * like static caches, and not worrying about finding cycles generally. We are + * okay to spend the 32k of memory here, leaving the algorithm unchanged from + * as it was a decade ago, and be happy that it costs <200 microseconds to + * init, and that subsequent calls to the combine function take under 100 + * nanoseconds. We also note that the crcany/crc.c code applies to any CRC, and + * we are currently targeting one: Jones CRC64. + */ + +void init_combine_cache(uint64_t poly, uint8_t dim) { + unsigned n, cache_num = 0; + combine_cache[1][0] = poly; + int prev = 1; + uint64_t row = 1; + for (n = 1; n < dim; n++) + { + combine_cache[1][n] = row; + row <<= 1; + } + + gf2_matrix_square(combine_cache[0], combine_cache[1], dim); + gf2_matrix_square(combine_cache[1], combine_cache[0], dim); + + /* do/while to overwrite the first two layers, they are not used, but are + * re-generated in the last two layers for the Redis polynomial */ + do { + gf2_matrix_square(combine_cache[cache_num], combine_cache[cache_num + prev], dim); + prev = -1; + } while (++cache_num < 64); +} + +/* Return the CRC-64 of two sequential blocks, where crc1 is the CRC-64 of the + * first block, crc2 is the CRC-64 of the second block, and len2 is the length + * of the second block. + * + * If you want reflections on your CRCs; do them outside before / after. + * WARNING: if you enable USE_STATIC_COMBINE_CACHE to make this fast, you MUST + * ALWAYS USE THE SAME POLYNOMIAL, otherwise you will get the wrong results. + * You MAY bzero() the even/odd static arrays, which will induce a re-cache on + * next call as a work-around, but ... maybe just parameterize the cached + * models at that point like Mark Adler does in modern crcany/crc.c . + */ + +uint64_t crc64_combine(uint64_t crc1, uint64_t crc2, uintmax_t len2, uint64_t poly, uint8_t dim) { + /* degenerate case */ + if (len2 == 0) + return crc1; + + unsigned cache_num = 0; + if (combine_cache[0][0] == 0) { + init_combine_cache(poly, dim); + } + + /* apply len2 zeros to crc1 (first square will put the operator for one + zero byte, eight zero bits, in even) */ + do + { + /* apply zeros operator for this bit of len2 */ + if (len2 & 1) + crc1 = CRC_MULTIPLY(combine_cache[cache_num], crc1); + len2 >>= 1; + cache_num = (cache_num + 1) & 63; + /* if no more bits set, then done */ + } while (len2 != 0); + + /* return combined crc */ + crc1 ^= crc2; + return crc1; +} + +#undef CRC_MULTIPLY diff --git a/src/crccombine.h b/src/crccombine.h new file mode 100644 index 0000000000..8da7c5fe6a --- /dev/null +++ b/src/crccombine.h @@ -0,0 +1,10 @@ + +#include + + +/* mask types */ +typedef unsigned long long v2uq __attribute__ ((vector_size (16))); + +uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec); +void init_combine_cache(uint64_t poly, uint8_t dim); +uint64_t crc64_combine(uint64_t crc1, uint64_t crc2, uintmax_t len2, uint64_t poly, uint8_t dim); diff --git a/src/crcspeed.c b/src/crcspeed.c index 9682d8e0be..c7073cba2f 100644 --- a/src/crcspeed.c +++ b/src/crcspeed.c @@ -1,11 +1,21 @@ /* * Copyright (C) 2013 Mark Adler + * Copyright (C) 2019-2024 Josiah Carlson * Originally by: crc64.c Version 1.4 16 Dec 2013 Mark Adler * Modifications by Matt Stancliff : * - removed CRC64-specific behavior * - added generation of lookup tables by parameters * - removed inversion of CRC input/result * - removed automatic initialization in favor of explicit initialization + * Modifications by Josiah Carlson + * - Added case/vector/AVX/+ versions of crc combine function; see crccombine.c + * - added optional static cache + * - Modified to use 1 thread to: + * - Partition large crc blobs into 2-3 segments + * - Process the 2-3 segments in parallel + * - Merge the resulting crcs + * -> Resulting in 10-90% performance boost for data > 1 meg + * - macro-ized to reduce copy/pasta This software is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages @@ -28,6 +38,10 @@ */ #include "crcspeed.h" +#include "crccombine.h" + +#define CRC64_LEN_MASK UINT64_C(0x7ffffffffffffff8) +#define CRC64_REVERSED_POLY UINT64_C(0x95ac9329ac4bc9b5) /* Fill in a CRC constants table. */ void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) { @@ -39,7 +53,7 @@ void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) { table[0][n] = crcfn(0, &v, 1); } - /* generate nested CRC table for future slice-by-8 lookup */ + /* generate nested CRC table for future slice-by-8/16/24+ lookup */ for (int n = 0; n < 256; n++) { crc = table[0][n]; for (int k = 1; k < 8; k++) { @@ -47,6 +61,10 @@ void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) { table[k][n] = crc; } } +#if USE_STATIC_COMBINE_CACHE + /* initialize combine cache for CRC stapling for slice-by 16/24+ */ + init_combine_cache(CRC64_REVERSED_POLY, 64); +#endif } void crcspeed16little_init(crcfn16 crcfn, uint16_t table[8][256]) { @@ -104,45 +122,151 @@ void crcspeed16big_init(crcfn16 fn, uint16_t big_table[8][256]) { } } +/* Note: doing all of our crc/next modifications *before* the crc table + * references is an absolute speedup on all CPUs tested. So... keep these + * macros separate. + */ + +#define DO_8_1(crc, next) \ + crc ^= *(uint64_t *)next; \ + next += 8 + +#define DO_8_2(crc) \ + crc = little_table[7][(uint8_t)crc] ^ \ + little_table[6][(uint8_t)(crc >> 8)] ^ \ + little_table[5][(uint8_t)(crc >> 16)] ^ \ + little_table[4][(uint8_t)(crc >> 24)] ^ \ + little_table[3][(uint8_t)(crc >> 32)] ^ \ + little_table[2][(uint8_t)(crc >> 40)] ^ \ + little_table[1][(uint8_t)(crc >> 48)] ^ \ + little_table[0][crc >> 56] + +#define CRC64_SPLIT(div) \ + olen = len; \ + next2 = next1 + ((len / div) & CRC64_LEN_MASK); \ + len = (next2 - next1) + +#define MERGE_CRC(crcn) \ + crc1 = crc64_combine(crc1, crcn, next2 - next1, CRC64_REVERSED_POLY, 64) + +#define MERGE_END(last, DIV) \ + len = olen - ((next2 - next1) * DIV); \ + next1 = last + +/* Variables so we can change for benchmarking; these seem to be fairly + * reasonable for Intel CPUs made since 2010. Please adjust as necessary if + * or when your CPU has more load / execute units. We've written benchmark code + * to help you tune your platform, see crc64Test. */ +#if defined(__i386__) || defined(__X86_64__) +static size_t CRC64_TRI_CUTOFF = (2*1024); +static size_t CRC64_DUAL_CUTOFF = (128); +#else +static size_t CRC64_TRI_CUTOFF = (16*1024); +static size_t CRC64_DUAL_CUTOFF = (1024); +#endif + + +void set_crc64_cutoffs(size_t dual_cutoff, size_t tri_cutoff) { + CRC64_DUAL_CUTOFF = dual_cutoff; + CRC64_TRI_CUTOFF = tri_cutoff; +} + /* Calculate a non-inverted CRC multiple bytes at a time on a little-endian * architecture. If you need inverted CRC, invert *before* calling and invert * *after* calling. - * 64 bit crc = process 8 bytes at once; + * 64 bit crc = process 8/16/24 bytes at once; */ -uint64_t crcspeed64little(uint64_t little_table[8][256], uint64_t crc, +uint64_t crcspeed64little(uint64_t little_table[8][256], uint64_t crc1, void *buf, size_t len) { - unsigned char *next = buf; + unsigned char *next1 = buf; + + if (CRC64_DUAL_CUTOFF < 1) { + goto final; + } /* process individual bytes until we reach an 8-byte aligned pointer */ - while (len && ((uintptr_t)next & 7) != 0) { - crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); + while (len && ((uintptr_t)next1 & 7) != 0) { + crc1 = little_table[0][(crc1 ^ *next1++) & 0xff] ^ (crc1 >> 8); len--; } - /* fast middle processing, 8 bytes (aligned!) per loop */ + if (len > CRC64_TRI_CUTOFF) { + /* 24 bytes per loop, doing 3 parallel 8 byte chunks at a time */ + unsigned char *next2, *next3; + uint64_t olen, crc2=0, crc3=0; + CRC64_SPLIT(3); + /* len is now the length of the first segment, the 3rd segment possibly + * having extra bytes to clean up at the end + */ + next3 = next2 + len; + while (len >= 8) { + len -= 8; + DO_8_1(crc1, next1); + DO_8_1(crc2, next2); + DO_8_1(crc3, next3); + DO_8_2(crc1); + DO_8_2(crc2); + DO_8_2(crc3); + } + + /* merge the 3 crcs */ + MERGE_CRC(crc2); + MERGE_CRC(crc3); + MERGE_END(next3, 3); + } else if (len > CRC64_DUAL_CUTOFF) { + /* 16 bytes per loop, doing 2 parallel 8 byte chunks at a time */ + unsigned char *next2; + uint64_t olen, crc2=0; + CRC64_SPLIT(2); + /* len is now the length of the first segment, the 2nd segment possibly + * having extra bytes to clean up at the end + */ + while (len >= 8) { + len -= 8; + DO_8_1(crc1, next1); + DO_8_1(crc2, next2); + DO_8_2(crc1); + DO_8_2(crc2); + } + + /* merge the 2 crcs */ + MERGE_CRC(crc2); + MERGE_END(next2, 2); + } + /* We fall through here to handle our = 8) { - crc ^= *(uint64_t *)next; - crc = little_table[7][crc & 0xff] ^ - little_table[6][(crc >> 8) & 0xff] ^ - little_table[5][(crc >> 16) & 0xff] ^ - little_table[4][(crc >> 24) & 0xff] ^ - little_table[3][(crc >> 32) & 0xff] ^ - little_table[2][(crc >> 40) & 0xff] ^ - little_table[1][(crc >> 48) & 0xff] ^ - little_table[0][crc >> 56]; - next += 8; len -= 8; + DO_8_1(crc1, next1); + DO_8_2(crc1); } - +final: /* process remaining bytes (can't be larger than 8) */ while (len) { - crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); + crc1 = little_table[0][(crc1 ^ *next1++) & 0xff] ^ (crc1 >> 8); len--; } - return crc; + return crc1; } +/* clean up our namespace */ +#undef DO_8_1 +#undef DO_8_2 +#undef CRC64_SPLIT +#undef MERGE_CRC +#undef MERGE_END +#undef CRC64_REVERSED_POLY +#undef CRC64_LEN_MASK + + +/* note: similar perf advantages can be had for long strings in crc16 using all + * of the same optimizations as above; though this is unnecessary. crc16 is + * normally used to shard keys; not hash / verify data, so is used on shorter + * data that doesn't warrant such changes. */ + uint16_t crcspeed16little(uint16_t little_table[8][256], uint16_t crc, void *buf, size_t len) { unsigned char *next = buf; @@ -190,6 +314,10 @@ uint64_t crcspeed64big(uint64_t big_table[8][256], uint64_t crc, void *buf, len--; } + /* note: alignment + 2/3-way processing can probably be handled here nearly + the same as above, using our updated DO_8_2 macro. Not included in these + changes, as other authors, I don't have big-endian to test with. */ + while (len >= 8) { crc ^= *(uint64_t *)next; crc = big_table[0][crc & 0xff] ^ diff --git a/src/crcspeed.h b/src/crcspeed.h index d7ee95ebb5..c29f236bc0 100644 --- a/src/crcspeed.h +++ b/src/crcspeed.h @@ -34,6 +34,8 @@ typedef uint64_t (*crcfn64)(uint64_t, const void *, const uint64_t); typedef uint16_t (*crcfn16)(uint16_t, const void *, const uint64_t); +void set_crc64_cutoffs(size_t dual_cutoff, size_t tri_cutoff); + /* CRC-64 */ void crcspeed64little_init(crcfn64 fn, uint64_t table[8][256]); void crcspeed64big_init(crcfn64 fn, uint64_t table[8][256]); From 8abeb79f52cbf02d16d546f48e16592173593132 Mon Sep 17 00:00:00 2001 From: Shivshankar Date: Wed, 1 May 2024 12:15:19 -0400 Subject: [PATCH 6/9] Rename redis in aof logs and proc title redis-aof-rewrite to valkey-aof-rewrite (#393) Renamed redis to valkey/server in aof.c serverlogs. The AOF rewrite child process title is set to "redis-aof-rewrite" if Valkey was started from a redis-server symlink, otherwise to "valkey-aof-rewrite". This is a breaking changes since logs are changed. Part of #207. --------- Signed-off-by: Shivshankar-Reddy --- src/aof.c | 18 +++++++++++------- src/cluster_legacy.c | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/aof.c b/src/aof.c index cb3d83c0cf..00914d1e65 100644 --- a/src/aof.c +++ b/src/aof.c @@ -996,7 +996,7 @@ int startAppendOnly(void) { if (rewriteAppendOnlyFileBackground() == C_ERR) { server.aof_state = AOF_OFF; - serverLog(LL_WARNING,"Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error."); + serverLog(LL_WARNING,"The server needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error."); return C_ERR; } } @@ -1121,7 +1121,7 @@ void flushAppendOnlyFile(int force) { /* Otherwise fall through, and go write since we can't wait * over two seconds. */ server.aof_delayed_fsync++; - serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis."); + serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down the server."); } } /* We want to perform a single write. This should be guaranteed atomic @@ -1183,7 +1183,7 @@ void flushAppendOnlyFile(int force) { if (ftruncate(server.aof_fd, server.aof_last_incr_size) == -1) { if (can_log) { serverLog(LL_WARNING, "Could not remove short write " - "from the append-only file. Redis may refuse " + "from the append-only file. The server may refuse " "to load the AOF the next time it starts. " "ftruncate: %s", strerror(errno)); } @@ -1224,7 +1224,7 @@ void flushAppendOnlyFile(int force) { * OK state and log the event. */ if (server.aof_last_write_status == C_ERR) { serverLog(LL_NOTICE, - "AOF write error looks solved, Redis can write again."); + "AOF write error looks solved. The server can write again."); server.aof_last_write_status = C_OK; } } @@ -1634,14 +1634,14 @@ int loadSingleAppendOnlyFile(char *filename) { } } serverLog(LL_WARNING, "Unexpected end of file reading the append only file %s. You can: " - "1) Make a backup of your AOF file, then use ./redis-check-aof --fix . " + "1) Make a backup of your AOF file, then use ./valkey-check-aof --fix . " "2) Alternatively you can set the 'aof-load-truncated' configuration option to yes and restart the server.", filename); ret = AOF_FAILED; goto cleanup; fmterr: /* Format error. */ serverLog(LL_WARNING, "Bad file format reading the append only file %s: " - "make a backup of your AOF file, then use ./redis-check-aof --fix ", filename); + "make a backup of your AOF file, then use ./valkey-check-aof --fix ", filename); ret = AOF_FAILED; /* fall through to cleanup. */ @@ -2471,7 +2471,11 @@ int rewriteAppendOnlyFileBackground(void) { char tmpfile[256]; /* Child */ - serverSetProcTitle("redis-aof-rewrite"); + if (strstr(server.exec_argv[0],"redis-server") != NULL) { + serverSetProcTitle("redis-aof-rewrite"); + } else { + serverSetProcTitle("valkey-aof-rewrite"); + } serverSetCpuAffinity(server.aof_rewrite_cpulist); snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); if (rewriteAppendOnlyFile(tmpfile) == C_OK) { diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 3e7e40c8a2..6754a0b8e0 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -808,7 +808,7 @@ int clusterLockConfig(char *filename) { * we need save `fd` to `cluster_config_file_lock_fd`, so that in serverFork(), * it will be closed in the child process. * If it is not closed, when the main process is killed -9, but the child process - * (redis-aof-rewrite) is still alive, the fd(lock) will still be held by the + * (valkey-aof-rewrite) is still alive, the fd(lock) will still be held by the * child process, and the main process will fail to get lock, means fail to start. */ server.cluster_config_file_lock_fd = fd; #else From 68ca258b0f1bb01ae720a903b35f22102581da20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rolandas=20=C5=A0imkus?= Date: Thu, 2 May 2024 15:53:37 +0300 Subject: [PATCH 7/9] Changed links and naming to valkey instead of redis (#389) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a minor change where only naming and links now points properly to valkey. Fixes #388 --------- Signed-off-by: Rolandas Šimkus Signed-off-by: simkusr Signed-off-by: simkusr Signed-off-by: Viktor Söderqvist Co-authored-by: simkusr Co-authored-by: Viktor Söderqvist --- 00-RELEASENOTES | 6 +++--- MANIFESTO | 4 ++-- deps/README.md | 24 ++++++++++++------------ deps/linenoise/README.markdown | 2 +- src/commands/README.md | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/00-RELEASENOTES b/00-RELEASENOTES index 0a5a8e4dd6..58f7f8dd18 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -1,5 +1,5 @@ Hello! This file is just a placeholder, since this is the "unstable" branch -of Redis, the place where all the development happens. +of Valkey, the place where all the development happens. There is no release notes for this branch, it gets forked into another branch every time there is a partial feature freeze in order to eventually create @@ -9,8 +9,8 @@ Usually "unstable" is stable enough for you to use it in development environment however you should never use it in production environments. It is possible to download the latest stable release here: - https://download.redis.io/redis-stable.tar.gz + https://valkey.io/download/ -More information is available at https://redis.io +More information is available at https://valkey.io Happy hacking! diff --git a/MANIFESTO b/MANIFESTO index 3727894624..01f2f2f33b 100644 --- a/MANIFESTO +++ b/MANIFESTO @@ -1,5 +1,5 @@ -[Note: this is the Redis manifesto, for general information about - installing and running Redis read the README file instead.] +[Note: This was the manifesto of Redis. It does not represent the ideals of Valkey, but is + kept in remembrance for the ideals that Salvatore had for the project.] Redis Manifesto =============== diff --git a/deps/README.md b/deps/README.md index 8da051baa7..8a04f04b00 100644 --- a/deps/README.md +++ b/deps/README.md @@ -1,9 +1,9 @@ -This directory contains all Redis dependencies, except for the libc that +This directory contains all Valkey dependencies, except for the libc that should be provided by the operating system. * **Jemalloc** is our memory allocator, used as replacement for libc malloc on Linux by default. It has good performances and excellent fragmentation behavior. This component is upgraded from time to time. * **hiredis** is the official C client library for Redis. It is used by redis-cli, redis-benchmark and Redis Sentinel. It is part of the Redis official ecosystem but is developed externally from the Redis repository, so we just upgrade it as needed. -* **linenoise** is a readline replacement. It is developed by the same authors of Redis but is managed as a separated project and updated as needed. +* **linenoise** is a readline replacement. It is developed by the same authors of Valkey but is managed as a separated project and updated as needed. * **lua** is Lua 5.1 with minor changes for security and additional libraries. * **hdr_histogram** Used for per-command latency tracking histograms. @@ -13,10 +13,10 @@ How to upgrade the above dependencies Jemalloc --- -Jemalloc is modified with changes that allow us to implement the Redis -active defragmentation logic. However this feature of Redis is not mandatory -and Redis is able to understand if the Jemalloc version it is compiled -against supports such Redis-specific modifications. So in theory, if you +Jemalloc is modified with changes that allow us to implement the Valkey +active defragmentation logic. However this feature of Valkey is not mandatory +and Valkey is able to understand if the Jemalloc version it is compiled +against supports such Valkey-specific modifications. So in theory, if you are not interested in the active defragmentation, you can replace Jemalloc just following these steps: @@ -28,7 +28,7 @@ just following these steps: Jemalloc configuration script is broken and will not work nested in another git repository. -However note that we change Jemalloc settings via the `configure` script of Jemalloc using the `--with-lg-quantum` option, setting it to the value of 3 instead of 4. This provides us with more size classes that better suit the Redis data structures, in order to gain memory efficiency. +However note that we change Jemalloc settings via the `configure` script of Jemalloc using the `--with-lg-quantum` option, setting it to the value of 3 instead of 4. This provides us with more size classes that better suit the Valkey data structures, in order to gain memory efficiency. If you want to upgrade Jemalloc while also providing support for active defragmentation, in addition to the above steps you need to perform @@ -38,7 +38,7 @@ the following additional steps: to add `#define JEMALLOC_FRAG_HINT`. 6. Implement the function `je_get_defrag_hint()` inside `src/jemalloc.c`. You can see how it is implemented in the current Jemalloc source tree shipped - with Redis, and rewrite it according to the new Jemalloc internals, if they + with Valkey, and rewrite it according to the new Jemalloc internals, if they changed, otherwise you could just copy the old implementation if you are upgrading just to a similar version of Jemalloc. @@ -61,7 +61,7 @@ cd deps/jemalloc Hiredis --- -Hiredis is used by Sentinel, `redis-cli` and `redis-benchmark`. Like Redis, uses the SDS string library, but not necessarily the same version. In order to avoid conflicts, this version has all SDS identifiers prefixed by `hi`. +Hiredis is used by Sentinel, `valkey-cli` and `valkey-benchmark`. Like Valkey, uses the SDS string library, but not necessarily the same version. In order to avoid conflicts, this version has all SDS identifiers prefixed by `hi`. 1. `git subtree pull --prefix deps/hiredis https://github.com/redis/hiredis.git --squash`
This should hopefully merge the local changes into the new version. @@ -71,7 +71,7 @@ Linenoise --- Linenoise is rarely upgraded as needed. The upgrade process is trivial since -Redis uses a non modified version of linenoise, so to upgrade just do the +Valkey uses a non modified version of linenoise, so to upgrade just do the following: 1. Remove the linenoise directory. @@ -81,11 +81,11 @@ Lua --- We use Lua 5.1 and no upgrade is planned currently, since we don't want to break -Lua scripts for new Lua features: in the context of Redis Lua scripts the +Lua scripts for new Lua features: in the context of Valkey Lua scripts the capabilities of 5.1 are usually more than enough, the release is rock solid, and we definitely don't want to break old scripts. -So upgrading of Lua is up to the Redis project maintainers and should be a +So upgrading of Lua is up to the Valkey project maintainers and should be a manual procedure performed by taking a diff between the different versions. Currently we have at least the following differences between official Lua 5.1 diff --git a/deps/linenoise/README.markdown b/deps/linenoise/README.markdown index b3752da162..fdfc5c666c 100644 --- a/deps/linenoise/README.markdown +++ b/deps/linenoise/README.markdown @@ -17,7 +17,7 @@ Line editing with some support for history is a really important feature for com So what usually happens is either: * Large programs with configure scripts disabling line editing if readline is not present in the system, or not supporting it at all since readline is GPL licensed and libedit (the BSD clone) is not as known and available as readline is (Real world example of this problem: Tclsh). - * Smaller programs not using a configure script not supporting line editing at all (A problem we had with Redis-cli for instance). + * Smaller programs not using a configure script not supporting line editing at all (A problem we had with Valkey-cli for instance). The result is a pollution of binaries without line editing support. diff --git a/src/commands/README.md b/src/commands/README.md index 0a4d730f1b..d4c27b2448 100644 --- a/src/commands/README.md +++ b/src/commands/README.md @@ -2,7 +2,7 @@ This directory contains JSON files, one for each command. Each JSON contains all the information about the command itself, but these JSON files are not to be used directly! Any third party who needs access to command information must get it from `COMMAND INFO` and `COMMAND DOCS`. -The output can be extracted in a JSON format by using `redis-cli --json`, in the same manner as in `utils/generate-commands-json.py`. +The output can be extracted in a JSON format by using `valkey-cli --json`, in the same manner as in `utils/generate-commands-json.py`. The JSON files are used to generate commands.def within this repo and JSON files for documentation, and despite looking similar to the output of `COMMAND` there are some fields and flags that are implicitly populated, and that's the From d1de34930af0fa371dc703b9d8d2852ebd0b8f2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Thu, 2 May 2024 17:02:38 +0200 Subject: [PATCH 8/9] Document the commands JSON files (#403) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These JSON files were originally not intended to be used directly, since they contain internals and some fiels like "acl_categories" that are not the final ACL categories. (Valkey will apply some implicit rules to compute the final ACL categories.) However, people see JSON files and use them directly anyway. So it's better to document them. In a later PR, we can get rid of all implicit ACL categories and instead populate them explicitly in the JSON files. Then, we'll add a validation (e.g. in generate-command-code.py) that the implied categories are set. --------- Signed-off-by: Viktor Söderqvist Co-authored-by: Binbin --- src/commands/README.md | 222 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 214 insertions(+), 8 deletions(-) diff --git a/src/commands/README.md b/src/commands/README.md index d4c27b2448..27bcef50a9 100644 --- a/src/commands/README.md +++ b/src/commands/README.md @@ -1,13 +1,219 @@ +Command JSON files +================== + This directory contains JSON files, one for each command. -Each JSON contains all the information about the command itself, but these JSON files are not to be used directly! -Any third party who needs access to command information must get it from `COMMAND INFO` and `COMMAND DOCS`. -The output can be extracted in a JSON format by using `valkey-cli --json`, in the same manner as in `utils/generate-commands-json.py`. +Each JSON file contains all the information about the command itself. It is the +"single source of truth" (SSOT) for the command's metadata. + +These JSON files were originally not intended to be used directly, since they +contain internals and some fields like "acl_categories" that are not the final +ACL categories. (Valkey will apply some implicit rules to compute the final ACL +categories.) However, people see JSON files and use them directly anyway. + +Any third party who needs access to command information were originally supposed +to get it from `COMMAND INFO` and `COMMAND DOCS`. These commands can be combined +into a JSON file by the script `utils/generate-commands-json.py`. Confusingly +enough, this JSON file as a slightly different format! + +Structure +--------- + +Each JSON file contains an object with a single key. The key is the command name +in uppercase, e.g. "HSCAN" (hscan.json). The value is a JSON object with the +following keys. To be safe, assume all of them are optional. + +* `"summary"`: a string with a short description of the command. One sentence. +* `"complexity"`: a string like `"O(1)"` or longer, like `"O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection."`. +* `"group"`: a string used for categorization in documentation. One of these: + * `"bitmap"` + * `"cluster"` + * `"connection"` + * `"generic"` + * `"geo"` + * `"hash"` + * `"hyperloglog"` + * `"list"` + * `"pubsub"` + * `"scripting"` + * `"sentinel"` + * `"server"` + * `"set"` + * `"sorted_set"` + * `"stream"` + * `"string"` + * `"transactions"` +* `"since"`: a string with a version number, like "7.0.0". It's the version + (Redis OSS or Valkey) where the command was introduced. +* `"arity"`: The number of arguments, including the command name itself. A + negative number means "at least", e.g. -3 means at least 3. +* `"container"`: Only present for subcommands. See below. +* `"history"`: An array of changes, each change represented by a 2-element array + on the form `[VERSION, DESCRIPTION]`. Omit if empty. Don't add an empty array. +* `"function"`: The name of the C function in Valkey's source code implementing + the command. (Don't use it for anything else.) +* `"command_flags"`: An array of flags represented as strings. Command flags: + * `"ADMIN"` + * `"ALLOW_BUSY"` + * `"ASKING"` + * `"BLOCKING"` + * `"DENYOOM"` + * `"FAST"` + * `"LOADING"` + * `"MAY_REPLICATE"` + * `"NO_ASYNC_LOADING"` + * `"NO_AUTH"` + * `"NO_MANDATORY_KEYS"` + * `"NO_MULTI"` + * `"NOSCRIPT"` + * `"ONLY_SENTINEL"` + * `"PROTECTED"` + * `"PUBSUB"` + * `"READONLY"` + * `"SENTINEL"` + * `"SKIP_MONITOR"` + * `"SKIP_SLOWLOG"` + * `"STALE"` + * `"TOUCHES_ARBITRARY_KEYS"` + * `"WRITE"` +* `"acl_categories"`: A list of ACL categories in uppercase. Note that the + effective ACL categies include "implicit ACL categories" explained below. + * `"ADMIN"` + * `"BITMAP"` + * `"CONNECTION"` + * `"DANGEROUS"` + * `"GEO"` + * `"HASH"` + * `"HYPERLOGLOG"` + * `"KEYSPACE"` + * `"LIST"` + * `"SCRIPTING"` + * `"SET"` + * `"SORTEDSET"` + * `"STREAM"` + * `"STRING"` + * `"TRANSACTION"` +* `"command_tips"`: Optional. A list of one or more of these strings: + * `"NONDETERMINISTIC_OUTPUT"` + * `"NONDETERMINISTIC_OUTPUT_ORDER"` + * `"REQUEST_POLICY:ALL_NODES"` + * `"REQUEST_POLICY:ALL_SHARDS"` + * `"REQUEST_POLICY:MULTI_SHARD"` + * `"REQUEST_POLICY:SPECIAL"` + * `"RESPONSE_POLICY:AGG_LOGICAL_AND"` + * `"RESPONSE_POLICY:AGG_MIN"` + * `"RESPONSE_POLICY:AGG_SUM"` + * `"RESPONSE_POLICY:ALL_SUCCEEDED"` + * `"RESPONSE_POLICY:ONE_SUCCEEDED"` + * `"RESPONSE_POLICY:SPECIAL"` +* `"key_specs"`: An array of key specifications. See below. +* `"reply_schema"`: A [JSON Schema](https://json-schema.org/) that describes the + reply of the command. This isn't complete. For example, JSON Schema can't + distinguish arrays from sets, commands returning a set are declared to return + an array. +* `"arguments"`: An array of arguments. Each argument is an object with the following keys: + * `"name"`: A string identifying the argument. It's unique among the arguments. + * `"type"`: The type of the argument. + * `"block"`: A group of arguments. The elements are in the key `"arguments"`. + * `"double"`: A number, not necessarily an integer. + * `"integer"`: An integer. + * `"key"`: A string representing a key in the database. + * `"oneof"`: One of a list of alternatives. The alternatives are in the key `"arguments"`. + * `"pattern"`: A string representing a glob-style pattern. + * `"pure-token"`: A fixed string. The string is in the key `"token"`. + * `"string"`: A string. + * `"unix-time"`: An integer representing a unix time in either seconds or milliseconds. + * `"arguments"`: A list with the same structure as its parent. Present if type is "block" or "oneof". + * `"display"`: ("entries-read", "key" or "pattern") + * `"key_spec_index"`: An index into the `"key_specs"` array. Only if `"type"` is `"key"`. + * `"multiple":` true if the argument can be repeated multiple times. Omitted means false. + * `"multiple_token"`: Unclear meaning. Maybe meaningless. + * `"optional":` True if the argument is optional. Omitted means false. + * `"since"`: Version (string) when the argument was introduced. + * `"token"`: A string indicating a fixed string value. This is always present + if type is "pure-token". If type is anything else, then `"token"` indicates + the argument is preceded by an extra (fixed string) argument. + +Implicit ACL categories +----------------------- + +The ACL categories specified as `"acl_categories"` are not the ones actually used. +The effective ACL categories are affected also by command flags. + +The logic for this can be found in the function `setImplicitACLCategories()` in +`server.c`. Here are the rules (unless they have changed since this +documentation was written): + +* Command flag WRITE implies ACL category WRITE. +* Command flag READONLY and not ACL category SCRIPTING implies ACL category READ. + "Exclude scripting commands from the RO category." +* Command flag ADMIN implies ACL categories ADMIN and DANGEROUS. +* Command flag PUBSUB implies ACL category PUBSUB. +* Command flag FAST implies ACL category FAST. +* Command flag BLOCKING implies ACL category BLOCKING. +* Not ACL category FAST implies ACL category SLOW. "If it's not fast, it's slow." + +There's an issue about explicitly listing all categories, removing this +discrepancy: https://github.com/valkey-io/valkey/issues/417 + +Key specs +--------- + +Key specifications are specified in the array `"key_specs"` key of a command. + +Each element in this array is an object with the following keys: + +* `"flags"`: An array of strings indicating what kind of access is the command does on the key. + * `"ACCESS"` + * `"DELETE"` + * `"INCOMPLETE"` + * `"INSERT"` + * `"NOT_KEY"` + * `"OW"` + * `"RM"` + * `"RO"` + * `"RW"` + * `"UPDATE"` + * `"VARIABLE_FLAGS"` +* `"begin_search"`: How to find the first key used by this key spec. It's an + object with only one key. The key determines the method for finding the first + key. Here are the possible forms of the `"begin_search"` object: + * `{"index": {"pos": N}}`: The first key is at position N in the command line, + where 0 is the command name. + * `{"keyword": KEYWORD, "startfrom": N}`: The first key is found by searching + for an argument with the exact value KEYWORD starting from index N in the + command line. The first key is the argument after the keyword. + * `{"unknown": null}`: Finding the keys of this command is too complicated to + explain. +* `"find_keys"`: How to find the remainnig keys of this key spec. It's an object + on one of these forms: + * `{"range": {"lastkey": LAST, "step": STEP, "limit": LIMIT}}`: A range of keys. + * LAST: If LAST is positive, it's the index of the last key relative to the + first key. If last is negative, then -1 is the end of the whole command + line, -2 is the penultimate argument of the command, and so on. + * STEP: The number of arguments to skip to find the next one. Typically 1. + * LIMIT: If LAST is -1, we use the limit to stop the search by a factor. 0 + and 1 mean no limit. 2 means half of the remaining arguments, 3 means a + third, and so on. + +Commands with subcommands +------------------------- + +Commands with subcommands are special. Examples of commands with subcommands are +`CLUSTER` and `ACL`. Their first argument is a subcommand which determines the +syntax of the rest the command, which is stored in its own JSON file. + +For example `CLUSTER INFO` is stored in a file called `cluster-info.json`. The +toplevel key is called `"INFO"`. Within the body, there's a key called +`"container"` with the value `"CLUSTER"`. The file `cluster.json` exists by it +doesn't have an `"arguments"` key. -The JSON files are used to generate commands.def within this repo and JSON files for documentation, and -despite looking similar to the output of `COMMAND` there are some fields and flags that are implicitly populated, and that's the -reason one shouldn't rely on the raw files. +Appendix +-------- -The `reply_schema` section is a standard JSON Schema (see https://json-schema.org/) that describes the reply of each command. -It is designed to someday be used to auto-generate code in client libraries, but is not yet mature and is not exposed externally. +How to list all the `group`, `command_flags` and `acl_categries`, etc. used in all these files: + cat *.json | jq '.[].group' | grep -F '"' | sed 's/^ *//;s/, *$//;s/^/ * `/;s/$/`/' | sort | uniq + cat *.json | jq '.[].command_flags' | grep -F '"' | sed 's/^ *//;s/, *$//;s/^/ * `/;s/$/`/' | sort | uniq + cat *.json | jq '.[].acl_categories' | grep -F '"' | sed 's/^ *//;s/, *$//;s/^/ * `/;s/$/`/' | sort | uniq + cat *.json | jq '.[].arguments[]?.type' | grep -F '"' | sed 's/^ *//;s/, *$//;s/^/ * `/;s/$/`/' | sort | uniq From 443d80f1686377ad42cbf92d98ecc6d240325ee1 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Fri, 3 May 2024 00:36:07 +0900 Subject: [PATCH 9/9] Fix typo in comment in quicklist.h (#416) Signed-off-by: Ikko Eltociear Ashimine --- src/quicklist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/quicklist.h b/src/quicklist.h index 4658ba8271..0d26dff10b 100644 --- a/src/quicklist.h +++ b/src/quicklist.h @@ -68,7 +68,7 @@ typedef struct quicklistLZF { char compressed[]; } quicklistLZF; -/* Bookmarks are padded with realloc at the end of of the quicklist struct. +/* Bookmarks are padded with realloc at the end of the quicklist struct. * They should only be used for very big lists if thousands of nodes were the * excess memory usage is negligible, and there's a real need to iterate on them * in portions.