valkey-io · SoftlyRaining · Oct 23, 2024 · Oct 23, 2024 · zuiderkwast · Oct 24, 2024
diff --git a/src/hashset.c b/src/hashset.c
@@ -438,48 +438,61 @@ static inline int cursorIsLessThan(size_t a, size_t b) {
     return rev(a) < rev(b);
 }
 
-/* Rehashes one bucket. */
+/* Rehashes buckets, following potential probe chains. This means that between
+ * rehash steps, any element that hashes to an already-rehashed bucket has been
+ * rehashed, even if it was moved to a different bucket due to open addressing.
+ * Knowing this allows scan to skip already-rehashed buckets. */
 static void rehashStep(hashset *s) {
     assert(hashsetIsRehashing(s));
     size_t idx = s->rehash_idx;
-    bucket *b = &s->tables[0][idx];
-    int pos;
-    for (pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) {
-        if (!isPositionFilled(b, pos)) continue; /* empty */
-        void *element = b->elements[pos];
-        uint8_t h2 = b->hashes[pos];
-        /* Insert into table 1. */
-        uint64_t hash;
-        /* When shrinking, it's possible to avoid computing the hash. We can
-         * just use idx has the hash, but only if we know that probing didn't
-         * push this element away from its primary bucket, so only if the
-         * bucket before the current one hasn't ever been full. */
-        if (s->bucket_exp[1] <= s->bucket_exp[0] &&
-            !s->tables[0][prevCursor(idx, expToMask(s->bucket_exp[0]))].everfull) {
-            hash = idx;
-        } else {
-            hash = hashElement(s, element);
+    uint8_t prev_bucket_everfull = s->tables[0][prevCursor(idx, expToMask(s->bucket_exp[0]))].everfull;
+    while (1) {
+        bucket *b = &s->tables[0][idx];
+        if (b->presence) {
+            for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) {
+                if (!isPositionFilled(b, pos)) continue; /* empty */
+                void *element = b->elements[pos];
+                uint8_t h2 = b->hashes[pos];
+                /* Insert into table 1. */
+                uint64_t hash;
+                /* When shrinking, it's possible to avoid computing the hash. We can
+                 * just use idx has the hash, but only if we know that probing didn't
+                 * push this element away from its primary bucket, so only if the
+                 * bucket before the current one hasn't ever been full. */
+                if (s->bucket_exp[1] <= s->bucket_exp[0] && !prev_bucket_everfull) {
+                    hash = idx;
+                } else {
+                    hash = hashElement(s, element);
+                }
+                int pos_in_dst_bucket;
+                bucket *dst = findBucketForInsert(s, hash, &pos_in_dst_bucket, NULL);
+                dst->elements[pos_in_dst_bucket] = element;
+                dst->hashes[pos_in_dst_bucket] = h2;
+                dst->presence |= (1 << pos_in_dst_bucket);
+                if (!dst->everfull && bucketIsFull(dst)) {
+                    dst->everfull = 1;
+                    s->everfulls[1]++;
+                }
+                s->used[0]--;
+                s->used[1]++;
+            }
         }
-        int pos_in_dst_bucket;
-        bucket *dst = findBucketForInsert(s, hash, &pos_in_dst_bucket, NULL);
-        dst->elements[pos_in_dst_bucket] = element;
-        dst->hashes[pos_in_dst_bucket] = h2;
-        dst->presence |= (1 << pos_in_dst_bucket);
-        if (!dst->everfull && bucketIsFull(dst)) {
-            dst->everfull = 1;
-            s->everfulls[1]++;
+        /* Mark the source bucket as empty. */
+        b->presence = 0;
+        /* Bucket done. Advance to the next bucket in probing order. We rehash in
+         * this order to be able to skip already rehashed buckets in scan. */
+        idx = nextCursor(idx, expToMask(s->bucket_exp[0]));
+        if (idx == 0) {
+            rehashingCompleted(s);
+            return;
         }
-        s->used[0]--;
-        s->used[1]++;
-    }
-    /* Mark the source bucket as empty. */
-    b->presence = 0;
-    /* Bucket done. Advance to the next bucket in probing order. We rehash in
-     * this order to be able to skip already rehashed buckets in scan. */
-    s->rehash_idx = nextCursor(s->rehash_idx, expToMask(s->bucket_exp[0]));
-    if (s->rehash_idx == 0) {
-        rehashingCompleted(s);
+
+        /* exit loop if not in probe sequence */
+        if (!b->everfull) break;
+        prev_bucket_everfull = 1;
     }
+
+    s->rehash_idx = idx;
 }
 
 /* Called internally on lookup and other reads to the table. */
@@ -1260,10 +1273,13 @@ size_t hashsetScan(hashset *s, size_t cursor, hashsetScanFunction fn, void *priv
      * indicate that the full scan is completed. */
     int cursor_passed_zero = 0;
 
-    /* Mask the start cursor to the bigger of the tables, so we can detect if we
+    /* Mask the start cursor to the smaller of the tables, so we can detect if we
      * come back to the start cursor and break the loop. It can happen if enough
      * tombstones (in both tables while rehashing) make us continue scanning. */
-    cursor = cursor & (expToMask(s->bucket_exp[0]) | expToMask(s->bucket_exp[1]));
+    cursor &= expToMask(s->bucket_exp[0]);
+    if (hashsetIsRehashing(s)) {
+        cursor &= expToMask(s->bucket_exp[1]);
+    }
     size_t start_cursor = cursor;
     do {
         in_probe_sequence = 0; /* Set to 1 if an ever-full bucket is scanned. */
@@ -1297,9 +1313,9 @@ size_t hashsetScan(hashset *s, size_t cursor, hashsetScanFunction fn, void *priv
             size_t mask_small = expToMask(s->bucket_exp[table_small]);
             size_t mask_large = expToMask(s->bucket_exp[table_large]);
 
-            /* Emit elements in the smaller table, if this bucket hasn't already
-             * been rehashed. */
-            if (table_small == 0 && !cursorIsLessThan(cursor, s->rehash_idx)) {
+            /* Emit elements in the smaller table if it's the new table or if
+             * this bucket hasn't been rehashed yet. */
+            if (table_small == 1 || !cursorIsLessThan(cursor, s->rehash_idx)) {
                 bucket *b = &s->tables[table_small][cursor & mask_small];
                 if (b->presence) {
                     for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) {
@@ -1312,26 +1328,32 @@ size_t hashsetScan(hashset *s, size_t cursor, hashsetScanFunction fn, void *priv
                 in_probe_sequence |= b->everfull;
             }
 
-            /* Iterate over indices in larger table that are the expansion of
-             * the index pointed to by the cursor in the smaller table. */
-            do {
-                /* Emit elements in bigger table. */
-                bucket *b = &s->tables[table_large][cursor & mask_large];
-                if (b->presence) {
-                    for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) {
-                        if (isPositionFilled(b, pos)) {
-                            void *emit = emit_ref ? &b->elements[pos] : b->elements[pos];
-                            fn(privdata, emit);
+            /* Emit elements in the larger table if it's the new table or if
+             * these buckets haven't been rehashed yet. */
+            if (table_large == 1 || !cursorIsLessThan(cursor, s->rehash_idx)) {
+                /* Iterate over indices in larger table that are the expansion
+                 * of the index pointed to by the cursor in the smaller table. */
+                size_t large_cursor = cursor;
+                do {
+                    /* Emit elements in bigger table. */
+                    bucket *b = &s->tables[table_large][large_cursor & mask_large];
+                    if (b->presence) {
+                        for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) {
+                            if (isPositionFilled(b, pos)) {
+                                void *emit = emit_ref ? &b->elements[pos] : b->elements[pos];
+                                fn(privdata, emit);
+                            }
                         }
                     }
-                }
-                in_probe_sequence |= b->everfull;
+                    in_probe_sequence |= b->everfull;
 
-                /* Increment the reverse cursor not covered by the smaller mask.*/
-                cursor = nextCursor(cursor, mask_large);
+                    /* Increment the reverse cursor not covered by the smaller mask.*/
+                    large_cursor = nextCursor(large_cursor, mask_large);
 
-                /* Continue while bits covered by mask difference is non-zero */
-            } while (cursor & (mask_small ^ mask_large) && cursor != start_cursor);
+                    /* Continue while bits covered by mask difference is non-zero */
+                } while (large_cursor & (mask_small ^ mask_large));
+            }
+            cursor = nextCursor(cursor, mask_small);
         }
         if (cursor == 0) {
             cursor_passed_zero = 1;
@@ -1651,24 +1673,32 @@ void hashsetDump(hashset *s) {
 }
 
 void hashsetHistogram(hashset *s) {
-    for (int table = 0; table <= 1; table++) {
-        for (size_t idx = 0; idx < numBuckets(s->bucket_exp[table]); idx++) {
-            bucket *b = &s->tables[table][idx];
+    for (int table = 0; table <= 1 && s->bucket_exp[table] >= 0; table++) {
+        size_t mask = expToMask(s->bucket_exp[table]);
+        size_t cursor = 0;
+        do {
+            bucket *b = &s->tables[table][cursor];
             char c = b->presence == 0 && b->everfull ? 'X' : '0' + __builtin_popcount(b->presence);
             printf("%c", c);
-        }
+
+            cursor = nextCursor(cursor, mask);
+        } while (cursor != 0);
         if (table == 0) printf(" ");
     }
     printf("\n");
 }
 
 void hashsetProbeMap(hashset *s) {
-    for (int table = 0; table <= 1; table++) {
-        for (size_t idx = 0; idx < numBuckets(s->bucket_exp[table]); idx++) {
-            bucket *b = &s->tables[table][idx];
+    for (int table = 0; table <= 1 && s->bucket_exp[table] >= 0; table++) {
+        size_t mask = expToMask(s->bucket_exp[table]);
+        size_t cursor = 0;
+        do {
+            bucket *b = &s->tables[table][cursor];
             char c = b->everfull ? 'X' : 'o';
             printf("%c", c);
-        }
+
+            cursor = nextCursor(cursor, mask);
+        } while (cursor != 0);
         if (table == 0) printf(" ");
     }
     printf("\n");

diff --git a/src/unit/test_files.h b/src/unit/test_files.h
@@ -27,6 +27,7 @@ int test_instant_rehashing(int argc, char **argv, int flags);
 int test_probing_chain_length(int argc, char **argv, int flags);
 int test_two_phase_insert_and_pop(int argc, char **argv, int flags);
 int test_scan(int argc, char **argv, int flags);
+int test_scan_with_rehashed_long_chain(int argc, char **argv, int flags);
 int test_iterator(int argc, char **argv, int flags);
 int test_safe_iterator(int argc, char **argv, int flags);
 int test_random_element(int argc, char **argv, int flags);
@@ -165,7 +166,7 @@ unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}};
 unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}};
 unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}};
 unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, NULL}};
-unitTest __test_hashset_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_probing_chain_length", test_probing_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_scan", test_scan}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_random_element", test_random_element}, {"test_full_probe", test_full_probe}, {NULL, NULL}};
+unitTest __test_hashset_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_probing_chain_length", test_probing_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_scan", test_scan}, {"test_scan_with_rehashed_long_chain", test_scan_with_rehashed_long_chain}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_random_element", test_random_element}, {"test_full_probe", test_full_probe}, {NULL, NULL}};
 unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}};
 unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict}, {NULL, NULL}};
 unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}};