Skip to content

Commit

Permalink
Merge pull request #15176 from JuliaLang/spj/maxprobe
Browse files Browse the repository at this point in the history
fix issue with #15077
  • Loading branch information
JeffBezanson committed Mar 2, 2016
2 parents a26b792 + 58aa57c commit b61f660
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 17 deletions.
40 changes: 27 additions & 13 deletions base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ SerializationState(io::IO) = SerializationState{typeof(io)}(io)

# dict

# These can be changed, to trade off better performance for space
const global maxallowedprobe = 16
const global maxprobeshift = 6

type Dict{K,V} <: Associative{K,V}
slots::Array{UInt8,1}
keys::Array{K,1}
Expand All @@ -408,10 +412,11 @@ type Dict{K,V} <: Associative{K,V}
count::Int
dirty::Bool
idxfloor::Int # an index <= the indexes of all used slots
maxprobe::Int

function Dict()
n = 16
new(zeros(UInt8,n), Array(K,n), Array(V,n), 0, 0, false, 1)
new(zeros(UInt8,n), Array(K,n), Array(V,n), 0, 0, false, 1, 0)
end
function Dict(kv)
h = Dict{K,V}()
Expand All @@ -434,7 +439,8 @@ type Dict{K,V} <: Associative{K,V}
rehash!(d)
end
@assert d.ndel == 0
new(copy(d.slots), copy(d.keys), copy(d.vals), 0, d.count, d.dirty, d.idxfloor)
new(copy(d.slots), copy(d.keys), copy(d.vals), 0, d.count, d.dirty, d.idxfloor,
d.maxprobe)
end
end
Dict() = Dict{Any,Any}()
Expand Down Expand Up @@ -523,7 +529,7 @@ function rehash!{K,V}(h::Dict{K,V}, newsz = length(h.keys))
vals = Array(V, newsz)
count0 = h.count
count = 0
maxprobe = max(16, newsz>>6)
maxprobe = h.maxprobe

for i = 1:sz
if olds[i] == 0x1
Expand All @@ -533,11 +539,8 @@ function rehash!{K,V}(h::Dict{K,V}, newsz = length(h.keys))
while slots[index] != 0
index = (index & (newsz-1)) + 1
end
if index - index0 > maxprobe
# rare condition: new table size causes more grouping of keys than before
# see issue #15077
return rehash!(h, newsz*2)
end
probe = (index - index0) & (newsz-1)
probe > maxprobe && (maxprobe = probe)
slots[index] = 0x1
keys[index] = k
vals[index] = v
Expand All @@ -555,6 +558,7 @@ function rehash!{K,V}(h::Dict{K,V}, newsz = length(h.keys))
h.vals = vals
h.count = count
h.ndel = 0
h.maxprobe = maxprobe

return h
end
Expand Down Expand Up @@ -590,7 +594,7 @@ end
function ht_keyindex{K,V}(h::Dict{K,V}, key)
sz = length(h.keys)
iter = 0
maxprobe = max(16, sz>>6)
maxprobe = h.maxprobe
index = hashindex(key, sz)
keys = h.keys

Expand All @@ -603,10 +607,9 @@ function ht_keyindex{K,V}(h::Dict{K,V}, key)
end

index = (index & (sz-1)) + 1
iter+=1
iter += 1
iter > maxprobe && break
end

return -1
end

Expand All @@ -616,7 +619,7 @@ end
function ht_keyindex2{K,V}(h::Dict{K,V}, key)
sz = length(h.keys)
iter = 0
maxprobe = max(16, sz>>6)
maxprobe = h.maxprobe
index = hashindex(key, sz)
avail = 0
keys = h.keys
Expand All @@ -638,12 +641,23 @@ function ht_keyindex2{K,V}(h::Dict{K,V}, key)
end

index = (index & (sz-1)) + 1
iter+=1
iter += 1
iter > maxprobe && break
end

avail < 0 && return avail

maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
# Check if key is not present, may need to keep searching to find slot
while iter < maxallowed
if !isslotfilled(h,index)
h.maxprobe = iter
return -index
end
index = (index & (sz-1)) + 1
iter += 1
end

rehash!(h, h.count > 64000 ? sz*2 : sz*4)

return ht_keyindex2(h, key)
Expand Down
68 changes: 64 additions & 4 deletions test/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,20 @@ let d = Dict(zip(1:1000,1:1000)), f = (k,v) -> iseven(k)
end

# issue #15077

immutable MyString <: AbstractString
str::ASCIIString
end
import Base.==

const global hashoffset = [UInt(190)]

Base.hash(s::MyString) = hash(s.str) + hashoffset[]
Base.endof(s::MyString) = endof(s.str)
Base.next(s::MyString, v::Int) = next(s.str, v)
Base.isequal(a::MyString, b::MyString) = isequal(a.str, b.str)
==(a::MyString, b::MyString) = (a.str == b.str)

let badKeys = ASCIIString["FINO_emv5.0","FINO_ema0.1","RATE_ema1.0","NIBPM_ema1.0",
"SAO2_emv5.0","O2FLOW_ema5.0","preop_Neuro/Psych_","gender_",
"FIO2_ema0.1","PEAK_ema5.0","preop_Reproductive_denies","O2FLOW_ema0.1",
Expand All @@ -461,9 +475,55 @@ let badKeys = ASCIIString["FINO_emv5.0","FINO_ema0.1","RATE_ema1.0","NIBPM_ema1.
"RESPRATE_ema0.1","preop_Functional Status_<2","preop_Renal_symptoms",
"ECGRATE_ema5.0","FIO2_emv5.0","RESPRATE_emv5.0","7wu3ty0a4fs","BVO",
"4UrCWXUsaT"]
d = Dict{AbstractString,Float64}()
for k in badKeys
d[k] = 1
d = Dict{AbstractString,Int}()
for i = 1:length(badKeys)
d[badKeys[i]] = i
end
# Check all keys for missing values
for i = 1:length(badKeys)
@test d[badKeys[i]] == i
end

# Walk through all possible hash values (mod size of hash table)
for offset = 0:1023
d2 = Dict{MyString,Int}()
hashoffset[] = offset
for i = 1:length(badKeys)
d2[MyString(badKeys[i])] = i
end
# Check all keys for missing values
for i = 1:length(badKeys)
@test d2[MyString(badKeys[i])] == i
end
end
end

immutable MyInt <: Integer
val::UInt
end

Base.hash(v::MyInt) = v.val + hashoffset[]
Base.endof(v::MyInt) = endof(v.val)
Base.next(v::MyInt, i::Int) = next(v.val, i)
Base.isequal(a::MyInt, b::MyInt) = isequal(a.val, b.val)
==(a::MyInt, b::MyInt) = (a.val == b.val)

let badKeys = UInt16[0xb800,0xa501,0xcdff,0x6303,0xe40a,0xcf0e,0xf3df,0xae99,0x9913,0x741c,
0xd01f,0xc822,0x9723,0xb7a0,0xea25,0x7423,0x6029,0x202a,0x822b,0x492c,
0xd02c,0x862d,0x8f34,0xe529,0xf938,0x4f39,0xd03a,0x473b,0x1e3b,0x1d3a,
0xcc39,0x7339,0xcf40,0x8740,0x813d,0xe640,0xc443,0x6344,0x3744,0x2c3d,
0x8c48,0xdf49,0x5743]

# Walk through all possible hash values (mod size of hash table)
for offset = 0:1023
d2 = Dict{MyInt, Int}()
hashoffset[] = offset
for i = 1:length(badKeys)
d2[MyInt(badKeys[i])] = i
end
# Check all keys for missing values
for i = 1:length(badKeys)
@test d2[MyInt(badKeys[i])] == i
end
end
@test d["NIBPD_emv5.0"] == 1
end

0 comments on commit b61f660

Please sign in to comment.