From 063180ab37c0aa05e8722a36c26eaac37e056cb9 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Thu, 4 May 2023 10:28:14 -0700 Subject: [PATCH] Define a method for `hash(::Type, ::UInt)` Currently, `hash(::Type, ::UInt)` uses `objectid`, which can have some odd behavior for types: in particular, subsequent identical type-valued variable definitions can have `objectid`s which differ from the first such definition. This has some bizarre downstream effects when e.g. using types as the values of a `Set` or the keys of a `Dict`. See issue 49620 for examples. There is an internal `type_hash` C function used for caching types but isn't exposed to Julia, as Jameson pointed out in the linked issue. This commit exposes it as `jl_type_hash` which is then used via `ccall` to define a method `hash(::Type, ::UInt)`. This method then fixes #49620. Note, however, that this does not affect the differing `objectid`s for otherwise identical types. --- base/hashing.jl | 1 + src/jltypes.c | 8 ++++++++ src/julia.h | 1 + test/hashing.jl | 7 +++++++ 4 files changed, 17 insertions(+) diff --git a/base/hashing.jl b/base/hashing.jl index 1aadd8b7e46a9..5dbae09123bd6 100644 --- a/base/hashing.jl +++ b/base/hashing.jl @@ -29,6 +29,7 @@ See also: [`objectid`](@ref), [`Dict`](@ref), [`Set`](@ref). """ hash(x::Any) = hash(x, zero(UInt)) hash(w::WeakRef, h::UInt) = hash(w.value, h) +hash(T::Type, h::UInt) = hash_uint(3h - ccall(:jl_type_hash, UInt, (Any,), T)) ## hashing general objects ## diff --git a/src/jltypes.c b/src/jltypes.c index 902e1e557f7e0..85255f9247439 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -1575,6 +1575,14 @@ static unsigned type_hash(jl_value_t *kj, int *failed) JL_NOTSAFEPOINT } } +JL_DLLEXPORT uintptr_t jl_type_hash(jl_value_t *v) JL_NOTSAFEPOINT +{ + // NOTE: The value of `failed` is purposefully ignored here. The parameter is relevant + // for other parts of the internal algorithm but not for exposing to the Julia side. + int failed = 0; + return type_hash(v, &failed); +} + static unsigned typekey_hash(jl_typename_t *tn, jl_value_t **key, size_t n, int nofail) JL_NOTSAFEPOINT { if (tn == jl_type_typename && key[0] == jl_bottom_type) diff --git a/src/julia.h b/src/julia.h index 0a542dd8b6bcb..89fea54fc428f 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1403,6 +1403,7 @@ JL_DLLEXPORT int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_v JL_DLLEXPORT int jl_egal__special(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT; JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT; JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT; +JL_DLLEXPORT uintptr_t jl_type_hash(jl_value_t *v) JL_NOTSAFEPOINT; STATIC_INLINE int jl_egal__unboxed_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT { diff --git a/test/hashing.jl b/test/hashing.jl index b672c3de817c6..943109924f280 100644 --- a/test/hashing.jl +++ b/test/hashing.jl @@ -295,3 +295,10 @@ if Sys.WORD_SIZE >= 64 objectid(s) end end + +# Issue #49620 +let t1 = Tuple{AbstractVector,AbstractVector{<:Integer},UnitRange{<:Integer}}, + t2 = Tuple{AbstractVector,AbstractVector{<:Integer},UnitRange{<:Integer}} + @test hash(t1) == hash(t2) + @test length(Set{Type}([t1, t2])) == 1 +end