From 6d60f78c8f5939353a45f482c43f3d77c1e7a01c Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 17 Apr 2018 15:49:18 -0400 Subject: [PATCH] IdDict: support deletion, and support `nothing` used as a key previously, if we deleted one key or added `nothing` as a key, we might lose some of the other entries too (until rehash) fix #26833 (cherry-pick PR #26839, 5e57c214f872083ccacafa0f753e794ec654a21a) --- src/table.c | 60 ++++++++++++++++++++++++++++++++-------------------- test/dict.jl | 24 +++++++++++++++++++++ 2 files changed, 61 insertions(+), 23 deletions(-) diff --git a/src/table.c b/src/table.c index fc136403ad93f..8b99241da0ac7 100644 --- a/src/table.c +++ b/src/table.c @@ -8,7 +8,7 @@ #define keyhash(k) jl_object_id(k) #define h2index(hv,sz) (size_t)(((hv) & ((sz)-1))*2) -static void **jl_table_lookup_bp(jl_array_t **pa, void *key); +static int jl_table_assign_bp(jl_array_t **pa, void *key, void *val); JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz) { @@ -23,7 +23,7 @@ JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz) JL_GC_PUSH1(&newa); for(i=0; i < sz; i+=2) { if (ol[i+1] != NULL) { - (*jl_table_lookup_bp(&newa, ol[i])) = ol[i+1]; + jl_table_assign_bp(&newa, ol[i], ol[i + 1]); jl_gc_wb(newa, ol[i+1]); // it is however necessary here because allocation // can (and will) occur in a recursive call inside table_lookup_bp @@ -37,12 +37,12 @@ JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz) return newa; } -static void **jl_table_lookup_bp(jl_array_t **pa, void *key) +static int jl_table_assign_bp(jl_array_t **pa, void *key, void *val) { // pa points to a **rooted** gc frame slot uint_t hv; jl_array_t *a = *pa; - size_t orig, index, iter; + size_t orig, index, iter, empty_slot; size_t newsz, sz = hash_size(a); assert(sz >= 1); size_t maxprobe = max_probe(sz); @@ -54,16 +54,25 @@ static void **jl_table_lookup_bp(jl_array_t **pa, void *key) index = h2index(hv,sz); sz *= 2; orig = index; + empty_slot = -1; do { - if (tab[index+1] == NULL) { - tab[index] = key; - jl_gc_wb(a, key); - return &tab[index+1]; + if (tab[index] == NULL) { + if (empty_slot == -1) + empty_slot = index; + break; + } + if (jl_egal((jl_value_t *)key, (jl_value_t *)tab[index])) { + if (tab[index + 1] != NULL) { + tab[index + 1] = val; + jl_gc_wb(a, val); + return 0; + } + // `nothing` is our sentinel value for deletion, so need to keep searching if it's also our search key + assert(key == jl_nothing); + if (empty_slot == -1) + empty_slot = index; } - - if (jl_egal((jl_value_t*)key, (jl_value_t*)tab[index])) - return &tab[index+1]; index = (index+2) & (sz-1); iter++; @@ -71,6 +80,14 @@ static void **jl_table_lookup_bp(jl_array_t **pa, void *key) break; } while (index != orig); + if (empty_slot != -1) { + tab[empty_slot] = key; + jl_gc_wb(a, key); + tab[empty_slot + 1] = val; + jl_gc_wb(a, val); + return 1; + } + /* table full */ /* quadruple size, rehash, retry the insert */ /* it's important to grow the table really fast; otherwise we waste */ @@ -90,12 +107,9 @@ static void **jl_table_lookup_bp(jl_array_t **pa, void *key) maxprobe = max_probe(sz); goto retry_bp; - - return NULL; } /* returns bp if key is in hash, otherwise NULL */ -/* if return is non-NULL and *bp == NULL then key was deleted */ static void **jl_table_peek_bp(jl_array_t *a, void *key) { size_t sz = hash_size(a); @@ -111,8 +125,12 @@ static void **jl_table_peek_bp(jl_array_t *a, void *key) do { if (tab[index] == NULL) return NULL; - if (jl_egal((jl_value_t*)key, (jl_value_t*)tab[index])) - return &tab[index+1]; + if (jl_egal((jl_value_t*)key, (jl_value_t*)tab[index])) { + if (tab[index+1] != NULL) + return &tab[index+1]; + // `nothing` is our sentinel value for deletion, so need to keep searching if it's also our search key + assert(key == jl_nothing); + } index = (index+2) & (sz-1); iter++; @@ -128,9 +146,7 @@ jl_array_t *jl_eqtable_put(jl_array_t *h, void *key, void *val) { JL_GC_PUSH1(&h); // &h may be assigned to in jl_idtable_rehash so it need to be rooted - void **bp = jl_table_lookup_bp(&h, key); - *bp = val; - jl_gc_wb(h, val); + jl_table_assign_bp(&h, key, val); JL_GC_POP(); return h; } @@ -139,16 +155,14 @@ JL_DLLEXPORT jl_value_t *jl_eqtable_get(jl_array_t *h, void *key, jl_value_t *deflt) { void **bp = jl_table_peek_bp(h, key); - if (bp == NULL || *bp == NULL) - return deflt; - return (jl_value_t*)*bp; + return (bp == NULL) ? deflt : (jl_value_t *)*bp; } JL_DLLEXPORT jl_value_t *jl_eqtable_pop(jl_array_t *h, void *key, jl_value_t *deflt) { void **bp = jl_table_peek_bp(h, key); - if (bp == NULL || *bp == NULL) + if (bp == NULL) return deflt; jl_value_t *val = (jl_value_t*)*bp; *(bp-1) = jl_nothing; // clear the key diff --git a/test/dict.jl b/test/dict.jl index 1ac24f0bb33b4..9f900b82f49c9 100644 --- a/test/dict.jl +++ b/test/dict.jl @@ -413,6 +413,30 @@ let d = @inferred ObjectIdDict(Pair(1,1), Pair(2,2), Pair(3,3)) @test eltype(d) == Pair{Any,Any} end +@testset "issue #26833, deletion from IdDict" begin + d = ObjectIdDict() + i = 1 + # generate many hash collisions + while length(d) < 32 # expected to occur at i <≈ 2^16 * 2^5 + if object_id(i) % UInt16 == 0x1111 + push!(d, i => true) + end + i += 1 + end + k = collect(keys(d)) + @test haskey(d, k[1]) + delete!(d, k[1]) + @test length(d) == 31 + @test !haskey(d, k[1]) + @test haskey(d, k[end]) + push!(d, k[end] => false) + @test length(d) == 31 + @test haskey(d, k[end]) + @test !pop!(d, k[end]) + @test !haskey(d, k[end]) + @test length(d) == 30 +end + # Issue #7944 let d = Dict{Int,Int}() get!(d, 0) do