Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of Dict{K,V} (~5%) by storing elements in pairs::Vector{Pair{K,V}} #44332

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,43 @@ end
end

# END 1.9 deprecations

# BEGIN 1.10 deprecations
struct DepricatedKeyDictAccessor{K,V} <: AbstractVector{K}
dict::Dict{K,V}
end

struct DepricatedValueDictAccessor{K,V} <: AbstractVector{V}
dict::Dict{K,V}
end

getindex(a::DepricatedKeyDictAccessor, i::Integer) = a.dict.pairs[i].first
getindex(a::DepricatedValueDictAccessor, i::Integer) = a.dict.pairs[i].second

function setindex!(a::DepricatedKeyDictAccessor{K,V}, value, i::Integer) where {K,V}
d = a.dict
d.pairs[i] = Pair{K,V}(value, d.pairs[i].second)
a
end

function setindex!(a::DepricatedValueDictAccessor{K,V}, value, i::Integer) where {K,V}
d = a.dict
d.pairs[i] = Pair{K,V}(d.pairs[i].first, value)
a
end

size(a::DepricatedKeyDictAccessor) = (length(a.dict.pairs),)
size(a::DepricatedValueDictAccessor) = (length(a.dict.pairs),)

function getproperty(d::Dict, s::Symbol)
if s == :keys
depwarn("For Dict, please use dict.pairs[i].first instead of dict.keys[i].", :getproperty, force=true)
return DepricatedKeyDictAccessor(d)
elseif s == :vals
depwarn("For Dict, please use dict.pairs[i].second instead of dict.vals[i].", :getproperty, force=true)
return DepricatedValueDictAccessor(d)
end
return getfield(d, s)
end

# END 1.10 deprecations
105 changes: 44 additions & 61 deletions base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ Dict{String, Int64} with 2 entries:
mutable struct Dict{K,V} <: AbstractDict{K,V}
# Metadata: empty => 0x00, removed => 0x7f, full => 0b1[7 most significant hash bits]
slots::Vector{UInt8}
keys::Array{K,1}
vals::Array{V,1}
pairs::Vector{Pair{K,V}} # stored pairs (key::K => value::V)
ndel::Int
count::Int
age::UInt
Expand All @@ -67,14 +66,13 @@ mutable struct Dict{K,V} <: AbstractDict{K,V}

function Dict{K,V}() where V where K
n = 16
new(zeros(UInt8,n), Vector{K}(undef, n), Vector{V}(undef, n), 0, 0, 0, n, 0)
new(zeros(UInt8,n), Vector{Pair{K,V}}(undef, n), 0, 0, 0, n, 0)
end
function Dict{K,V}(d::Dict{K,V}) where V where K
new(copy(d.slots), copy(d.keys), copy(d.vals), d.ndel, d.count, d.age,
d.idxfloor, d.maxprobe)
new(copy(d.slots), copy(d.pairs), d.ndel, d.count, d.age, d.idxfloor, d.maxprobe)
end
function Dict{K, V}(slots, keys, vals, ndel, count, age, idxfloor, maxprobe) where {K, V}
new(slots, keys, vals, ndel, count, age, idxfloor, maxprobe)
function Dict{K, V}(slots, pairs, ndel, count, age, idxfloor, maxprobe) where {K, V}
new(slots, pairs, ndel, count, age, idxfloor, maxprobe)
end
end
function Dict{K,V}(kv) where V where K
Expand Down Expand Up @@ -163,53 +161,46 @@ end
@propagate_inbounds isslotfilled(h::Dict, i::Int) = (h.slots[i] & 0x80) != 0
@propagate_inbounds isslotmissing(h::Dict, i::Int) = h.slots[i] == 0x7f

@constprop :none function rehash!(h::Dict{K,V}, newsz = length(h.keys)) where V where K
@constprop :none function rehash!(h::Dict{K,V}, newsz = length(h.pairs)) where V where K
olds = h.slots
oldk = h.keys
oldv = h.vals
oldp = h.pairs
sz = length(olds)
newsz = _tablesz(newsz)
h.age += 1
h.idxfloor = 1
if h.count == 0
resize!(h.slots, newsz)
fill!(h.slots, 0x0)
resize!(h.keys, newsz)
resize!(h.vals, newsz)
resize!(h.pairs, newsz)
h.ndel = 0
return h
end

slots = zeros(UInt8,newsz)
keys = Vector{K}(undef, newsz)
vals = Vector{V}(undef, newsz)
pairs = Vector{Pair{K,V}}(undef, newsz)
age0 = h.age
count = 0
maxprobe = 0

for i = 1:sz
@inbounds if (olds[i] & 0x80) != 0
k = oldk[i]
v = oldv[i]
index, sh = hashindex(k, newsz)
index, sh = hashindex(oldp[i].first, newsz)
index0 = index
while slots[index] != 0
index = (index & (newsz-1)) + 1
end
probe = (index - index0) & (newsz-1)
probe > maxprobe && (maxprobe = probe)
slots[index] = olds[i]
keys[index] = k
vals[index] = v
pairs[index] = oldp[i]
count += 1
end
end

@assert h.age == age0 "Multiple concurrent writes to Dict detected!"
h.age += 1
h.slots = slots
h.keys = keys
h.vals = vals
h.pairs = pairs
h.count = count
h.ndel = 0
h.maxprobe = maxprobe
Expand Down Expand Up @@ -246,10 +237,8 @@ Dict{String, Int64}()
function empty!(h::Dict{K,V}) where V where K
fill!(h.slots, 0x0)
sz = length(h.slots)
empty!(h.keys)
empty!(h.vals)
resize!(h.keys, sz)
resize!(h.vals, sz)
empty!(h.pairs)
resize!(h.pairs, sz)
h.ndel = 0
h.count = 0
h.age += 1
Expand All @@ -260,16 +249,16 @@ end
# get the index where a key is stored, or -1 if not present
function ht_keyindex(h::Dict{K,V}, key) where V where K
isempty(h) && return -1
sz = length(h.keys)
sz = length(h.pairs)
iter = 0
maxprobe = h.maxprobe
index, sh = hashindex(key, sz)
keys = h.keys
pairs = h.pairs

@inbounds while true
isslotempty(h,index) && return -1
if h.slots[index] == sh
k = keys[index]
k = pairs[index].first
if (key === k || isequal(key, k))
return index
end
Expand All @@ -287,12 +276,12 @@ end
# sh::UInt8 - short hash (7 highest hash bits)
# This version is for use by setindex! and get!
function ht_keyindex2_shorthash!(h::Dict{K,V}, key) where V where K
sz = length(h.keys)
sz = length(h.pairs)
iter = 0
maxprobe = h.maxprobe
index, sh = hashindex(key, sz)
avail = 0
keys = h.keys
pairs = h.pairs

@inbounds while true
if isslotempty(h,index)
Expand All @@ -306,7 +295,7 @@ function ht_keyindex2_shorthash!(h::Dict{K,V}, key) where V where K
avail = -index
end
elseif h.slots[index] == sh
k = keys[index]
k = pairs[index].first
if key === k || isequal(key, k)
return index, sh
end
Expand Down Expand Up @@ -338,17 +327,16 @@ end
# Only for better backward compatibility. It can be removed in the future.
ht_keyindex2!(h::Dict, key) = ht_keyindex2_shorthash!(h, key)[1]

@propagate_inbounds function _setindex!(h::Dict, v, key, index, sh = _shorthash7(hash(key)))
@propagate_inbounds function _setindex!(h::Dict{K,V}, v, key, index, sh = _shorthash7(hash(key))) where V where K
h.slots[index] = sh
h.keys[index] = key
h.vals[index] = v
h.pairs[index] = Pair{K,V}(key, v)
h.count += 1
h.age += 1
if index < h.idxfloor
h.idxfloor = index
end

sz = length(h.keys)
sz = length(h.pairs)
# Rehash now if necessary
if h.ndel >= ((3*sz)>>2) || h.count*3 > sz*2
# > 3/4 deleted or > 2/3 full
Expand All @@ -371,8 +359,7 @@ function setindex!(h::Dict{K,V}, v0, key::K) where V where K

if index > 0
h.age += 1
@inbounds h.keys[index] = key
@inbounds h.vals[index] = v
@inbounds h.pairs[index] = Pair{K,V}(key, v)
else
@inbounds _setindex!(h, v, key, -index, sh)
end
Expand All @@ -386,8 +373,7 @@ function setindex!(h::Dict{K,Any}, v, key::K) where K

if index > 0
h.age += 1
@inbounds h.keys[index] = key
@inbounds h.vals[index] = v
@inbounds h.pairs[index] = Pair{K,Any}(key, v)
else
@inbounds _setindex!(h, v, key, -index, sh)
end
Expand Down Expand Up @@ -462,7 +448,7 @@ end
function get!(default::Callable, h::Dict{K,V}, key::K) where V where K
index, sh = ht_keyindex2_shorthash!(h, key)

index > 0 && return h.vals[index]
index > 0 && return h.pairs[index].second

age0 = h.age
v = convert(V, default())
Expand All @@ -471,8 +457,7 @@ function get!(default::Callable, h::Dict{K,V}, key::K) where V where K
end
if index > 0
h.age += 1
@inbounds h.keys[index] = key
@inbounds h.vals[index] = v
@inbounds h.pairs[index] = Pair{K,V}(key, v)
else
@inbounds _setindex!(h, v, key, -index, sh)
end
Expand All @@ -481,7 +466,7 @@ end

function getindex(h::Dict{K,V}, key) where V where K
index = ht_keyindex(h, key)
@inbounds return (index < 0) ? throw(KeyError(key)) : h.vals[index]::V
@inbounds return (index < 0) ? throw(KeyError(key)) : h.pairs[index].second::V
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder why all these type annotations here were added in the first place. They probably don't hurt, but I also don't see why they'd be needed.

end

"""
Expand All @@ -508,7 +493,7 @@ get(collection, key, default)

function get(h::Dict{K,V}, key, default) where V where K
index = ht_keyindex(h, key)
@inbounds return (index < 0) ? default : h.vals[index]::V
@inbounds return (index < 0) ? default : h.pairs[index].second::V
end

"""
Expand All @@ -530,7 +515,7 @@ get(::Function, collection, key)

function get(default::Callable, h::Dict{K,V}, key) where V where K
index = ht_keyindex(h, key)
@inbounds return (index < 0) ? default() : h.vals[index]::V
@inbounds return (index < 0) ? default() : h.pairs[index].second::V
end

"""
Expand Down Expand Up @@ -576,11 +561,11 @@ julia> getkey(D, 'd', 'a')
"""
function getkey(h::Dict{K,V}, key, default) where V where K
index = ht_keyindex(h, key)
@inbounds return (index<0) ? default : h.keys[index]::K
@inbounds return (index<0) ? default : h.pairs[index].first::K
end

function _pop!(h::Dict, index)
@inbounds val = h.vals[index]
@inbounds val = h.pairs[index].second
_delete!(h, index)
return val
end
Expand Down Expand Up @@ -622,16 +607,14 @@ end
function pop!(h::Dict)
isempty(h) && throw(ArgumentError("dict must be non-empty"))
idx = skip_deleted_floor!(h)
@inbounds key = h.keys[idx]
@inbounds val = h.vals[idx]
@inbounds pair = h.pairs[idx]
_delete!(h, idx)
key => val
pair
end

function _delete!(h::Dict{K,V}, index) where {K,V}
@inbounds h.slots[index] = 0x7f
@inbounds _unsetindex!(h.keys, index)
@inbounds _unsetindex!(h.vals, index)
@inbounds _unsetindex!(h.pairs, index)
h.ndel += 1
h.count -= 1
h.age += 1
Expand Down Expand Up @@ -686,7 +669,7 @@ function skip_deleted_floor!(h::Dict)
idx
end

@propagate_inbounds _iterate(t::Dict{K,V}, i) where {K,V} = i == 0 ? nothing : (Pair{K,V}(t.keys[i],t.vals[i]), i == typemax(Int) ? 0 : i+1)
@propagate_inbounds _iterate(t::Dict{K,V}, i) where {K,V} = i == 0 ? nothing : (t.pairs[i], i == typemax(Int) ? 0 : i+1)
@propagate_inbounds function iterate(t::Dict)
_iterate(t, skip_deleted(t, t.idxfloor))
end
Expand All @@ -699,14 +682,14 @@ length(t::Dict) = t.count
i == 0 && return nothing
i = skip_deleted(v.dict, i)
i == 0 && return nothing
vals = T <: KeySet ? v.dict.keys : v.dict.vals
(@inbounds vals[i], i == typemax(Int) ? 0 : i+1)
p = @inbounds v.dict.pairs[i]
return p[T <: KeySet ? 1 : 2], i == typemax(Int) ? 0 : i+1
end

function filter!(pred, h::Dict{K,V}) where {K,V}
h.count == 0 && return h
@inbounds for i=1:length(h.slots)
if ((h.slots[i] & 0x80) != 0) && !pred(Pair{K,V}(h.keys[i], h.vals[i]))
if ((h.slots[i] & 0x80) != 0) && !pred(h.pairs[i])
_delete!(h, i)
end
end
Expand All @@ -719,13 +702,13 @@ function reduce(::typeof(merge), items::Vector{<:Dict})
return reduce(merge!, items; init=Dict{K,V}())
end

function map!(f, iter::ValueIterator{<:Dict})
function map!(f, iter::ValueIterator{<:Dict{K, V}}) where {K, V}
dict = iter.dict
vals = dict.vals
pairs = dict.pairs
# @inbounds is here so that it gets propagated to isslotfilled
@inbounds for i = dict.idxfloor:lastindex(vals)
@inbounds for i = dict.idxfloor:lastindex(pairs)
if isslotfilled(dict, i)
vals[i] = f(vals[i])
pairs[i] = Pair{K,V}(pairs[i].first, f(pairs[i].second))
end
end
return iter
Expand All @@ -736,7 +719,7 @@ function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
for (k, v) in d2
i, sh = ht_keyindex2_shorthash!(d1, k)
if i > 0
d1.vals[i] = combine(d1.vals[i], v)
d1.pairs[i] = Pair{K,V}(d1.pairs[i].first, combine(d1.pairs[i].second, v))
else
if !isequal(k, convert(K, k))
throw(ArgumentError("$(limitrepr(k)) is not a valid key for type $K"))
Expand Down
Loading