From 3879c1b18d27904d8a0d471748bd6e915475f580 Mon Sep 17 00:00:00 2001 From: Rain Valentine Date: Wed, 23 Oct 2024 22:56:24 +0000 Subject: [PATCH 1/2] fix rehashing shortcut used in hashsetScan Signed-off-by: Rain Valentine --- src/hashset.c | 160 ++++++++++++++++++++++++---------------- src/unit/test_files.h | 3 +- src/unit/test_hashset.c | 150 +++++++++++++++++++++++++++++++++++++ 3 files changed, 247 insertions(+), 66 deletions(-) diff --git a/src/hashset.c b/src/hashset.c index 707604b30f..732bb61368 100644 --- a/src/hashset.c +++ b/src/hashset.c @@ -438,48 +438,61 @@ static inline int cursorIsLessThan(size_t a, size_t b) { return rev(a) < rev(b); } -/* Rehashes one bucket. */ +/* Rehashes buckets, following potential probe chains. This means that between + * rehash steps, any element that hashes to an already-rehashed bucket has been + * rehashed, even if it was moved to a different bucket due to open addressing. + * Knowing this allows scan to skip already-rehashed buckets. */ static void rehashStep(hashset *s) { assert(hashsetIsRehashing(s)); size_t idx = s->rehash_idx; - bucket *b = &s->tables[0][idx]; - int pos; - for (pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { - if (!isPositionFilled(b, pos)) continue; /* empty */ - void *element = b->elements[pos]; - uint8_t h2 = b->hashes[pos]; - /* Insert into table 1. */ - uint64_t hash; - /* When shrinking, it's possible to avoid computing the hash. We can - * just use idx has the hash, but only if we know that probing didn't - * push this element away from its primary bucket, so only if the - * bucket before the current one hasn't ever been full. */ - if (s->bucket_exp[1] <= s->bucket_exp[0] && - !s->tables[0][prevCursor(idx, expToMask(s->bucket_exp[0]))].everfull) { - hash = idx; - } else { - hash = hashElement(s, element); + uint8_t prev_bucket_everfull = s->tables[0][prevCursor(idx, expToMask(s->bucket_exp[0]))].everfull; + while (1) { + bucket *b = &s->tables[0][idx]; + if (b->presence) { + for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { + if (!isPositionFilled(b, pos)) continue; /* empty */ + void *element = b->elements[pos]; + uint8_t h2 = b->hashes[pos]; + /* Insert into table 1. */ + uint64_t hash; + /* When shrinking, it's possible to avoid computing the hash. We can + * just use idx has the hash, but only if we know that probing didn't + * push this element away from its primary bucket, so only if the + * bucket before the current one hasn't ever been full. */ + if (s->bucket_exp[1] <= s->bucket_exp[0] && !prev_bucket_everfull) { + hash = idx; + } else { + hash = hashElement(s, element); + } + int pos_in_dst_bucket; + bucket *dst = findBucketForInsert(s, hash, &pos_in_dst_bucket, NULL); + dst->elements[pos_in_dst_bucket] = element; + dst->hashes[pos_in_dst_bucket] = h2; + dst->presence |= (1 << pos_in_dst_bucket); + if (!dst->everfull && bucketIsFull(dst)) { + dst->everfull = 1; + s->everfulls[1]++; + } + s->used[0]--; + s->used[1]++; + } } - int pos_in_dst_bucket; - bucket *dst = findBucketForInsert(s, hash, &pos_in_dst_bucket, NULL); - dst->elements[pos_in_dst_bucket] = element; - dst->hashes[pos_in_dst_bucket] = h2; - dst->presence |= (1 << pos_in_dst_bucket); - if (!dst->everfull && bucketIsFull(dst)) { - dst->everfull = 1; - s->everfulls[1]++; + /* Mark the source bucket as empty. */ + b->presence = 0; + /* Bucket done. Advance to the next bucket in probing order. We rehash in + * this order to be able to skip already rehashed buckets in scan. */ + idx = nextCursor(idx, expToMask(s->bucket_exp[0])); + if (idx == 0) { + rehashingCompleted(s); + return; } - s->used[0]--; - s->used[1]++; - } - /* Mark the source bucket as empty. */ - b->presence = 0; - /* Bucket done. Advance to the next bucket in probing order. We rehash in - * this order to be able to skip already rehashed buckets in scan. */ - s->rehash_idx = nextCursor(s->rehash_idx, expToMask(s->bucket_exp[0])); - if (s->rehash_idx == 0) { - rehashingCompleted(s); + + /* exit loop if not in probe sequence */ + if (!b->everfull) break; + prev_bucket_everfull = 1; } + + s->rehash_idx = idx; } /* Called internally on lookup and other reads to the table. */ @@ -1260,10 +1273,13 @@ size_t hashsetScan(hashset *s, size_t cursor, hashsetScanFunction fn, void *priv * indicate that the full scan is completed. */ int cursor_passed_zero = 0; - /* Mask the start cursor to the bigger of the tables, so we can detect if we + /* Mask the start cursor to the smaller of the tables, so we can detect if we * come back to the start cursor and break the loop. It can happen if enough * tombstones (in both tables while rehashing) make us continue scanning. */ - cursor = cursor & (expToMask(s->bucket_exp[0]) | expToMask(s->bucket_exp[1])); + cursor &= expToMask(s->bucket_exp[0]); + if (hashsetIsRehashing(s)) { + cursor &= expToMask(s->bucket_exp[1]); + } size_t start_cursor = cursor; do { in_probe_sequence = 0; /* Set to 1 if an ever-full bucket is scanned. */ @@ -1297,9 +1313,9 @@ size_t hashsetScan(hashset *s, size_t cursor, hashsetScanFunction fn, void *priv size_t mask_small = expToMask(s->bucket_exp[table_small]); size_t mask_large = expToMask(s->bucket_exp[table_large]); - /* Emit elements in the smaller table, if this bucket hasn't already - * been rehashed. */ - if (table_small == 0 && !cursorIsLessThan(cursor, s->rehash_idx)) { + /* Emit elements in the smaller table if it's the new table or if + * this bucket hasn't been rehashed yet. */ + if (table_small == 1 || !cursorIsLessThan(cursor, s->rehash_idx)) { bucket *b = &s->tables[table_small][cursor & mask_small]; if (b->presence) { for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { @@ -1312,26 +1328,32 @@ size_t hashsetScan(hashset *s, size_t cursor, hashsetScanFunction fn, void *priv in_probe_sequence |= b->everfull; } - /* Iterate over indices in larger table that are the expansion of - * the index pointed to by the cursor in the smaller table. */ - do { - /* Emit elements in bigger table. */ - bucket *b = &s->tables[table_large][cursor & mask_large]; - if (b->presence) { - for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { - if (isPositionFilled(b, pos)) { - void *emit = emit_ref ? &b->elements[pos] : b->elements[pos]; - fn(privdata, emit); + /* Emit elements in the larger table if it's the new table or if + * these buckets haven't been rehashed yet. */ + if (table_large == 1 || !cursorIsLessThan(cursor, s->rehash_idx)) { + /* Iterate over indices in larger table that are the expansion + * of the index pointed to by the cursor in the smaller table. */ + size_t large_cursor = cursor; + do { + /* Emit elements in bigger table. */ + bucket *b = &s->tables[table_large][large_cursor & mask_large]; + if (b->presence) { + for (int pos = 0; pos < ELEMENTS_PER_BUCKET; pos++) { + if (isPositionFilled(b, pos)) { + void *emit = emit_ref ? &b->elements[pos] : b->elements[pos]; + fn(privdata, emit); + } } } - } - in_probe_sequence |= b->everfull; + in_probe_sequence |= b->everfull; - /* Increment the reverse cursor not covered by the smaller mask.*/ - cursor = nextCursor(cursor, mask_large); + /* Increment the reverse cursor not covered by the smaller mask.*/ + large_cursor = nextCursor(large_cursor, mask_large); - /* Continue while bits covered by mask difference is non-zero */ - } while (cursor & (mask_small ^ mask_large) && cursor != start_cursor); + /* Continue while bits covered by mask difference is non-zero */ + } while (large_cursor & (mask_small ^ mask_large)); + } + cursor = nextCursor(cursor, mask_small); } if (cursor == 0) { cursor_passed_zero = 1; @@ -1651,24 +1673,32 @@ void hashsetDump(hashset *s) { } void hashsetHistogram(hashset *s) { - for (int table = 0; table <= 1; table++) { - for (size_t idx = 0; idx < numBuckets(s->bucket_exp[table]); idx++) { - bucket *b = &s->tables[table][idx]; + for (int table = 0; table <= 1 && s->bucket_exp[table] >= 0; table++) { + size_t mask = expToMask(s->bucket_exp[table]); + size_t cursor = 0; + do { + bucket *b = &s->tables[table][cursor]; char c = b->presence == 0 && b->everfull ? 'X' : '0' + __builtin_popcount(b->presence); printf("%c", c); - } + + cursor = nextCursor(cursor, mask); + } while (cursor != 0); if (table == 0) printf(" "); } printf("\n"); } void hashsetProbeMap(hashset *s) { - for (int table = 0; table <= 1; table++) { - for (size_t idx = 0; idx < numBuckets(s->bucket_exp[table]); idx++) { - bucket *b = &s->tables[table][idx]; + for (int table = 0; table <= 1 && s->bucket_exp[table] >= 0; table++) { + size_t mask = expToMask(s->bucket_exp[table]); + size_t cursor = 0; + do { + bucket *b = &s->tables[table][cursor]; char c = b->everfull ? 'X' : 'o'; printf("%c", c); - } + + cursor = nextCursor(cursor, mask); + } while (cursor != 0); if (table == 0) printf(" "); } printf("\n"); diff --git a/src/unit/test_files.h b/src/unit/test_files.h index 3089be028d..40db748708 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -27,6 +27,7 @@ int test_instant_rehashing(int argc, char **argv, int flags); int test_probing_chain_length(int argc, char **argv, int flags); int test_two_phase_insert_and_pop(int argc, char **argv, int flags); int test_scan(int argc, char **argv, int flags); +int test_scan_with_rehashed_long_chain(int argc, char **argv, int flags); int test_iterator(int argc, char **argv, int flags); int test_safe_iterator(int argc, char **argv, int flags); int test_random_element(int argc, char **argv, int flags); @@ -165,7 +166,7 @@ unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}}; unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}}; unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}}; unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, NULL}}; -unitTest __test_hashset_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_probing_chain_length", test_probing_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_scan", test_scan}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_random_element", test_random_element}, {"test_full_probe", test_full_probe}, {NULL, NULL}}; +unitTest __test_hashset_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_probing_chain_length", test_probing_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_scan", test_scan}, {"test_scan_with_rehashed_long_chain", test_scan_with_rehashed_long_chain}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_random_element", test_random_element}, {"test_full_probe", test_full_probe}, {NULL, NULL}}; unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}}; unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict}, {NULL, NULL}}; unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}}; diff --git a/src/unit/test_hashset.c b/src/unit/test_hashset.c index c27d6b3edd..ae6de803e8 100644 --- a/src/unit/test_hashset.c +++ b/src/unit/test_hashset.c @@ -372,6 +372,156 @@ int test_scan(int argc, char **argv, int flags) { return 0; } +typedef struct { + uint64_t value; + uint64_t hash; +} mock_hash_element; + +static mock_hash_element* mock_hash_element_create(uint64_t value, uint64_t hash) { + mock_hash_element *element = malloc(sizeof(mock_hash_element)); + element->value = value; + element->hash = hash; + return element; +} + +static uint64_t mock_hash_element_get_hash(const void *element) { + if (element == NULL) return 0UL; + mock_hash_element *mock = (mock_hash_element *) element; + return (mock->hash != 0) ? mock->hash : mock->value; +} + +typedef struct scan_data_list_t { + struct scan_data_list_t *next; + size_t cursor; + uint64_t chain_hash; + uint8_t element_seen[]; +} scan_data_list; + +static void scan_data_list_scan_function(void *privdata, void *element) { + scan_data_list* scan_data = (scan_data_list*) privdata; + mock_hash_element* mock_element = (mock_hash_element*) element; + + if (mock_element->hash != scan_data->chain_hash) return; + if (mock_element->value == -1UL) return; + scan_data->element_seen[mock_element->value]++; +} + +int test_scan_with_rehashed_long_chain(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + // creates hashset with elements displaced due to open addressing, such that on a rehash they will move "backwards" + + const size_t num_random_elements = 128; + const size_t num_chained_elements = 40; + + const size_t buckets_to_displace = 6; + const size_t bucket_size = 7; // for 64 bit + const size_t num_chain_base_elements = buckets_to_displace * bucket_size; + + hashsetType type = { + .hashFunction = mock_hash_element_get_hash, + .elementDestructor = freekeyval, + }; + + /* Seed, to make sure each round is different. */ + randomSeed(); + + hashset *s = hashsetCreate(&type); + hashsetExpand(s, num_random_elements + num_chained_elements + num_chain_base_elements); + // add background random elements (helps to keep fill between limits, prevent unwanted rehash) + for (size_t i = 0; i < num_random_elements; i++) { + hashsetAdd(s, mock_hash_element_create((uint64_t) genrand64_int64(), 0)); + } + size_t initial_buckets = hashsetBuckets(s); + + // create long chain + randomSeed(); + uint64_t chain_hash = (uint64_t) genrand64_int64(); + // make base of chain + mock_hash_element* chain_base[num_chain_base_elements]; + if (chain_hash == 0) chain_hash++; + for (size_t i = 0; i < num_chain_base_elements; i++) { + chain_base[i] = mock_hash_element_create(-1, chain_hash); + hashsetAdd(s, chain_base[i]); + } + // add elements to end of chain + for (size_t i = 0; i < num_chained_elements; i++) { + hashsetAdd(s, mock_hash_element_create(i, chain_hash)); + } + // delete base of chain. This means that elements remaining on the end of the chain should move "backwards" on a rehash. + for (size_t i = 0; i < num_chain_base_elements; i++) { + hashsetDelete(s, chain_base[i]); + } + + printf("Bucket fill: "); + hashsetHistogram(s); + printf("probe map : "); + hashsetProbeMap(s); + + // unexpected rehash would invalidate the test + assert(hashsetBuckets(s) == initial_buckets); + + // create a "train" of scans such that there is a scan paused at every cursor position where hashsetScan() pauses + // rehash in step with + scan_data_list* scan_list = NULL; + int any_scan_finished = 0; + while (1) { + // insert new scan start of list + scan_data_list* new = (scan_data_list*) calloc(1, sizeof(scan_data_list) + num_chained_elements); + new->next = scan_list; + new->chain_hash = chain_hash; + new->cursor = hashsetScan(s, 0, scan_data_list_scan_function, new, 0); + scan_list = new; + + // step forward all scans + for (scan_data_list* current = scan_list; current != NULL; current = current->next) { + if (current->cursor == 0) { + any_scan_finished = 1; + } else { + current->cursor = hashsetScan(s, current->cursor, scan_data_list_scan_function, current, 0); + } + } + + if (!hashsetIsRehashing(s)) { + if (any_scan_finished) { + // begin a rehash by adding elements + while (!hashsetIsRehashing(s)) { + hashsetAdd(s, mock_hash_element_create((uint64_t) genrand64_int64(), 0)); + } + } + } else { + // continue rehash by one step + hashsetFind(s, NULL, NULL); + + // stop when rehashing finishes + if (!hashsetIsRehashing(s)) { + break; + } + } + } + + // ensure that scans didn't miss any of the probe chained elements + for (scan_data_list* current = scan_list; current != NULL; current = current->next) { + // finish scan if needed + while (current->cursor != 0) { + current->cursor = hashsetScan(s, current->cursor, scan_data_list_scan_function, current, 0); + } + + for (size_t i = 0; i < num_chained_elements; i++) { + TEST_ASSERT(current->element_seen[i] > 0); + } + } + + for (scan_data_list* current = scan_list; current != NULL;) { + scan_data_list* next = current->next; + free(current); + current = next; + } + hashsetRelease(s); + return 0; +} + int test_iterator(int argc, char **argv, int flags) { UNUSED(argc); UNUSED(argv); From 9bf0c39e51eddfb125897241af87678c667755e9 Mon Sep 17 00:00:00 2001 From: Rain Valentine Date: Wed, 23 Oct 2024 23:15:14 +0000 Subject: [PATCH 2/2] forgot clang format again Signed-off-by: Rain Valentine --- src/hashset.c | 14 ++++----- src/unit/test_hashset.c | 68 ++++++++++++++++++++++------------------- 2 files changed, 43 insertions(+), 39 deletions(-) diff --git a/src/hashset.c b/src/hashset.c index 732bb61368..7b9b1396df 100644 --- a/src/hashset.c +++ b/src/hashset.c @@ -456,9 +456,9 @@ static void rehashStep(hashset *s) { /* Insert into table 1. */ uint64_t hash; /* When shrinking, it's possible to avoid computing the hash. We can - * just use idx has the hash, but only if we know that probing didn't - * push this element away from its primary bucket, so only if the - * bucket before the current one hasn't ever been full. */ + * just use idx has the hash, but only if we know that probing didn't + * push this element away from its primary bucket, so only if the + * bucket before the current one hasn't ever been full. */ if (s->bucket_exp[1] <= s->bucket_exp[0] && !prev_bucket_everfull) { hash = idx; } else { @@ -480,7 +480,7 @@ static void rehashStep(hashset *s) { /* Mark the source bucket as empty. */ b->presence = 0; /* Bucket done. Advance to the next bucket in probing order. We rehash in - * this order to be able to skip already rehashed buckets in scan. */ + * this order to be able to skip already rehashed buckets in scan. */ idx = nextCursor(idx, expToMask(s->bucket_exp[0])); if (idx == 0) { rehashingCompleted(s); @@ -1331,7 +1331,7 @@ size_t hashsetScan(hashset *s, size_t cursor, hashsetScanFunction fn, void *priv /* Emit elements in the larger table if it's the new table or if * these buckets haven't been rehashed yet. */ if (table_large == 1 || !cursorIsLessThan(cursor, s->rehash_idx)) { - /* Iterate over indices in larger table that are the expansion + /* Iterate over indices in larger table that are the expansion * of the index pointed to by the cursor in the smaller table. */ size_t large_cursor = cursor; do { @@ -1680,7 +1680,7 @@ void hashsetHistogram(hashset *s) { bucket *b = &s->tables[table][cursor]; char c = b->presence == 0 && b->everfull ? 'X' : '0' + __builtin_popcount(b->presence); printf("%c", c); - + cursor = nextCursor(cursor, mask); } while (cursor != 0); if (table == 0) printf(" "); @@ -1696,7 +1696,7 @@ void hashsetProbeMap(hashset *s) { bucket *b = &s->tables[table][cursor]; char c = b->everfull ? 'X' : 'o'; printf("%c", c); - + cursor = nextCursor(cursor, mask); } while (cursor != 0); if (table == 0) printf(" "); diff --git a/src/unit/test_hashset.c b/src/unit/test_hashset.c index ae6de803e8..cb1f6da1aa 100644 --- a/src/unit/test_hashset.c +++ b/src/unit/test_hashset.c @@ -377,7 +377,7 @@ typedef struct { uint64_t hash; } mock_hash_element; -static mock_hash_element* mock_hash_element_create(uint64_t value, uint64_t hash) { +static mock_hash_element *mock_hash_element_create(uint64_t value, uint64_t hash) { mock_hash_element *element = malloc(sizeof(mock_hash_element)); element->value = value; element->hash = hash; @@ -386,7 +386,7 @@ static mock_hash_element* mock_hash_element_create(uint64_t value, uint64_t hash static uint64_t mock_hash_element_get_hash(const void *element) { if (element == NULL) return 0UL; - mock_hash_element *mock = (mock_hash_element *) element; + mock_hash_element *mock = (mock_hash_element *)element; return (mock->hash != 0) ? mock->hash : mock->value; } @@ -398,8 +398,8 @@ typedef struct scan_data_list_t { } scan_data_list; static void scan_data_list_scan_function(void *privdata, void *element) { - scan_data_list* scan_data = (scan_data_list*) privdata; - mock_hash_element* mock_element = (mock_hash_element*) element; + scan_data_list *scan_data = (scan_data_list *)privdata; + mock_hash_element *mock_element = (mock_hash_element *)element; if (mock_element->hash != scan_data->chain_hash) return; if (mock_element->value == -1UL) return; @@ -410,7 +410,8 @@ int test_scan_with_rehashed_long_chain(int argc, char **argv, int flags) { UNUSED(argc); UNUSED(argv); UNUSED(flags); - // creates hashset with elements displaced due to open addressing, such that on a rehash they will move "backwards" + /* creates hashset with elements displaced due to open addressing, such that + * on a rehash they will move "backwards" */ const size_t num_random_elements = 128; const size_t num_chained_elements = 40; @@ -429,53 +430,56 @@ int test_scan_with_rehashed_long_chain(int argc, char **argv, int flags) { hashset *s = hashsetCreate(&type); hashsetExpand(s, num_random_elements + num_chained_elements + num_chain_base_elements); - // add background random elements (helps to keep fill between limits, prevent unwanted rehash) + /* add background random elements (helps to keep fill between limits, + * prevent unwanted rehash) */ for (size_t i = 0; i < num_random_elements; i++) { - hashsetAdd(s, mock_hash_element_create((uint64_t) genrand64_int64(), 0)); + hashsetAdd(s, mock_hash_element_create((uint64_t)genrand64_int64(), 0)); } size_t initial_buckets = hashsetBuckets(s); - // create long chain + /* create long chain */ randomSeed(); - uint64_t chain_hash = (uint64_t) genrand64_int64(); - // make base of chain - mock_hash_element* chain_base[num_chain_base_elements]; + uint64_t chain_hash = (uint64_t)genrand64_int64(); + /* make base of chain */ + mock_hash_element *chain_base[num_chain_base_elements]; if (chain_hash == 0) chain_hash++; for (size_t i = 0; i < num_chain_base_elements; i++) { chain_base[i] = mock_hash_element_create(-1, chain_hash); hashsetAdd(s, chain_base[i]); } - // add elements to end of chain + /* add elements to end of chain */ for (size_t i = 0; i < num_chained_elements; i++) { hashsetAdd(s, mock_hash_element_create(i, chain_hash)); } - // delete base of chain. This means that elements remaining on the end of the chain should move "backwards" on a rehash. + /* delete base of chain. This means that elements remaining on the end of + * the chain should move "backwards" on a rehash. */ for (size_t i = 0; i < num_chain_base_elements; i++) { hashsetDelete(s, chain_base[i]); } - + printf("Bucket fill: "); hashsetHistogram(s); printf("probe map : "); hashsetProbeMap(s); - // unexpected rehash would invalidate the test + /* unexpected rehash would invalidate the test */ assert(hashsetBuckets(s) == initial_buckets); - // create a "train" of scans such that there is a scan paused at every cursor position where hashsetScan() pauses - // rehash in step with - scan_data_list* scan_list = NULL; + /* create a "train" of scans such that there is a scan paused at every + * cursor position where hashsetScan() pauses while incremental rehash is in + * progress */ + scan_data_list *scan_list = NULL; int any_scan_finished = 0; while (1) { - // insert new scan start of list - scan_data_list* new = (scan_data_list*) calloc(1, sizeof(scan_data_list) + num_chained_elements); + /* insert new scan start of list */ + scan_data_list *new = (scan_data_list *)calloc(1, sizeof(scan_data_list) + num_chained_elements); new->next = scan_list; new->chain_hash = chain_hash; new->cursor = hashsetScan(s, 0, scan_data_list_scan_function, new, 0); scan_list = new; - // step forward all scans - for (scan_data_list* current = scan_list; current != NULL; current = current->next) { + /* step forward all scans */ + for (scan_data_list *current = scan_list; current != NULL; current = current->next) { if (current->cursor == 0) { any_scan_finished = 1; } else { @@ -485,25 +489,25 @@ int test_scan_with_rehashed_long_chain(int argc, char **argv, int flags) { if (!hashsetIsRehashing(s)) { if (any_scan_finished) { - // begin a rehash by adding elements + /* begin a rehash by adding elements */ while (!hashsetIsRehashing(s)) { - hashsetAdd(s, mock_hash_element_create((uint64_t) genrand64_int64(), 0)); + hashsetAdd(s, mock_hash_element_create((uint64_t)genrand64_int64(), 0)); } } } else { - // continue rehash by one step + /* continue rehash by one step */ hashsetFind(s, NULL, NULL); - // stop when rehashing finishes + /* stop when rehashing finishes */ if (!hashsetIsRehashing(s)) { break; } } } - // ensure that scans didn't miss any of the probe chained elements - for (scan_data_list* current = scan_list; current != NULL; current = current->next) { - // finish scan if needed + /* ensure that scans didn't miss any of the probe chained elements */ + for (scan_data_list *current = scan_list; current != NULL; current = current->next) { + /* finish scan if needed */ while (current->cursor != 0) { current->cursor = hashsetScan(s, current->cursor, scan_data_list_scan_function, current, 0); } @@ -512,9 +516,9 @@ int test_scan_with_rehashed_long_chain(int argc, char **argv, int flags) { TEST_ASSERT(current->element_seen[i] > 0); } } - - for (scan_data_list* current = scan_list; current != NULL;) { - scan_data_list* next = current->next; + + for (scan_data_list *current = scan_list; current != NULL;) { + scan_data_list *next = current->next; free(current); current = next; }