Skip to content

Commit

Permalink
Implement faster thread local rng for the scheduler.
Browse files Browse the repository at this point in the history
  • Loading branch information
gbaraldi committed Aug 16, 2024
1 parent 5230d27 commit 6aa9a35
Showing 1 changed file with 50 additions and 1 deletion.
51 changes: 50 additions & 1 deletion base/partr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,56 @@ const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)]
const heaps_lock = [SpinLock(), SpinLock()]


cong(max::UInt32) = iszero(max) ? UInt32(0) : ccall(:jl_rand_ptls, UInt32, (UInt32,), max) + UInt32(1)
"""
cong(max::UInt32)
Return a random UInt32 in the range `1:max` except if max is 0, in that case return 0.
"""
cong(max::UInt32) = iszero(max) ? UInt32(0) : jl_rand_ptls(max) + UInt32(1) #TODO: make sure users don't use 0 and remove this check

"""
jl_rand_ptls(max::UInt32)
Return a random UInt32 in the range `0:max-1` using the thread-local RNG
state. Max must be greater than 0.
"""
Base.@assume_effects :removable :inaccessiblememonly :notaskstate function jl_rand_ptls(max::UInt32)
# Are these effects correct? We are technically lying to the compiler
# Though these are the same lies we tell to say that an unexcaped allocation has no effects
ptls = Base.unsafe_convert(Ptr{UInt64}, Core.getptls())
rngseed = Base.unsafe_load(ptls, 2) # TODO: What's the best way to do this for 32bit.
val, seed = rand_uniform_max_int32(max, rngseed)
Base.unsafe_store!(ptls, seed, 2)
return val % UInt32
end

# This implementation is based on OpenSSLs implementation of rand_uniform
# https://github.com/openssl/openssl/blob/1d2cbd9b5a126189d5e9bc78a3bdb9709427d02b/crypto/rand/rand_uniform.c#L13-L99
# Comments are vendored from their implementation as well.
# For the original developer check the PR to swift https://github.com/apple/swift/pull/39143.

# Essentially it boils down to incrementally generating a fixed point
# number on the interval [0, 1) and multiplying this number by the upper
# range limit. Once it is certain what the fractional part contributes to
# the integral part of the product, the algorithm has produced a definitive
# result.
"""
rand_uniform_max_int32(max::UInt32, seed::UInt64)
Return a random UInt32 in the range `0:max-1` using the given seed.
Max must be greater than 0.
"""
Base.@assume_effects :total function rand_uniform_max_int32(max::UInt32, seed::UInt64)
if max == UInt32(1)
return UInt32(0), seed
end
# We are generating a fixed point number on the interval [0, 1).
# Multiplying this by the range gives us a number on [0, upper).
# The high word of the multiplication result represents the integral part
# This is not completely unbiased as it's missing the fractional part of the original implementation but it's good enough for our purposes
seed = UInt64(69069) * seed + UInt64(362437)
prod = (UInt64(max)) * (seed % UInt32) # 64 bit product
i = prod >> 32 % UInt32 # integral part
return i % UInt32, seed
end



function multiq_sift_up(heap::taskheap, idx::Int32)
Expand Down

0 comments on commit 6aa9a35

Please sign in to comment.