Skip to content

Commit

Permalink
Switch to pre-fetching model for thread-safety (#382)
Browse files Browse the repository at this point in the history
* Switch to pre-fetching model for thread-safety

* Fix oversight with TZData.compile

* Reload cache on compile

* Add cache override for testing

* Remove unnecessary cache reload

* Fix CI specific tests

* Remove thread-safety tests
  • Loading branch information
omus authored May 24, 2023
1 parent 804864c commit 03af16a
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 160 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ jobs:
TZDATA_VERSION: 2016j # Matches tzdata version used in tests
- uses: julia-actions/julia-buildpkg@latest
- uses: julia-actions/julia-runtest@latest
env:
JULIA_NUM_THREADS: "8" # Can probably use "auto" here instead
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v2
with:
Expand Down
5 changes: 3 additions & 2 deletions src/TimeZones.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ function __init__()
# Write out our compiled tzdata representations into a scratchspace
_COMPILED_DIR[] = _compiled_dir(tzdata_version())

# Initialize the thread-local TimeZone cache (issue #342)
_reset_tz_cache()
# Load the pre-computed TZData into memory. Skip pre-fetching the first time
# TimeZones.jl is loaded by `deps/build.jl` as we have yet to compile the tzdata.
isdir(_COMPILED_DIR[]) && _reload_cache(_COMPILED_DIR[])

# Base extension needs to happen everytime the module is loaded (issue #24)
Dates.CONVERSION_SPECIFIERS['z'] = TimeZone
Expand Down
7 changes: 4 additions & 3 deletions src/build.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ using TimeZones.TZData: DEFAULT_TZDATA_VERSION, tzdata_version
Builds the TimeZones package with the specified tzdata `version` and `regions`. The
`version` is typically a 4-digit year followed by a lowercase ASCII letter
(e.g. "$DEFAULT_TZDATA_VERSION"). The `force` flag is used to re-download tzdata archives.
!!! warning
This function is *not* thread-safe and meant primarily for experimentation.
"""
function build(version::AbstractString=tzdata_version(); force::Bool=false)
built = TimeZones.TZData.build(version, returned=:namedtuple)
Expand All @@ -16,9 +19,7 @@ function build(version::AbstractString=tzdata_version(); force::Bool=false)

# Set the compiled directory to the new location
_COMPILED_DIR[] = built.compiled_dir

# Reset cached information
_reset_tz_cache()
_reload_cache(_COMPILED_DIR[])

@info "Successfully built TimeZones"
end
69 changes: 29 additions & 40 deletions src/types/timezone.jl
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
# Thread-local TimeZone cache, which caches time zones _per thread_, allowing thread-safe
# caching. Note that this means the cache will grow in size, and may store redundant objects
# accross multiple threads, but this extra space usage allows for fast, lock-free access
# to the cache, while still being thread-safe.
const THREAD_TZ_CACHES = Vector{Dict{String,Tuple{TimeZone,Class}}}()

# Based upon the thread-safe Global RNG implementation in the Random stdlib:
# https://github.com/JuliaLang/julia/blob/e4fcdf5b04fd9751ce48b0afc700330475b42443/stdlib/Random/src/RNGs.jl#L369-L385
@inline _tz_cache() = _tz_cache(Threads.threadid())
@noinline function _tz_cache(tid::Int)
0 < tid <= length(THREAD_TZ_CACHES) || _tz_cache_length_assert()
if @inbounds isassigned(THREAD_TZ_CACHES, tid)
@inbounds cache = THREAD_TZ_CACHES[tid]
else
cache = eltype(THREAD_TZ_CACHES)()
@inbounds THREAD_TZ_CACHES[tid] = cache
# Retains the compiled tzdata in memory. Read-only access is thread-safe and any changes
# to this structure can result in inconsistent behaviour.
const _TZ_CACHE = Dict{String,Tuple{TimeZone,Class}}()

function _reload_cache(compiled_dir)
empty!(_TZ_CACHE)
check = Tuple{String,String}[(compiled_dir, "")]

for (dir, partial) in check
for filename in readdir(dir)
startswith(filename, ".") && continue

path = joinpath(dir, filename)
name = isempty(partial) ? filename : join([partial, filename], "/")

if isdir(path)
push!(check, (path, name))
else
_TZ_CACHE[name] = open(TZJFile.read, path, "r")(name)
end
end
end
return cache
end
@noinline _tz_cache_length_assert() = @assert false "0 < tid <= length(THREAD_TZ_CACHES)"

function _reset_tz_cache()
# ensures that we didn't save a bad object
resize!(empty!(THREAD_TZ_CACHES), Threads.nthreads())
!isempty(_TZ_CACHE) || error("Cache remains empty after loading")

return nothing
end

"""
Expand Down Expand Up @@ -65,14 +67,8 @@ US/Pacific (UTC-8/UTC-7)
TimeZone(::AbstractString, ::Class)

function TimeZone(str::AbstractString, mask::Class=Class(:DEFAULT))
# Note: If the class `mask` does not match the time zone we'll still load the
# information into the cache to ensure the result is consistent.
tz, class = get!(_tz_cache(), str) do
tz_path = joinpath(_COMPILED_DIR[], split(str, "/")...)

if isfile(tz_path)
open(TZJFile.read, tz_path, "r")(str)
elseif occursin(FIXED_TIME_ZONE_REGEX, str)
tz, class = get(_TZ_CACHE, str) do
if occursin(FIXED_TIME_ZONE_REGEX, str)
FixedTimeZone(str), Class(:FIXED)
elseif !isdir(_COMPILED_DIR[]) || isempty(readdir(_COMPILED_DIR[]))
# Note: Julia 1.0 supresses the build logs which can hide issues in time zone
Expand Down Expand Up @@ -121,16 +117,9 @@ function istimezone(str::AbstractString, mask::Class=Class(:DEFAULT))
return true
end

# Perform more expensive checks against pre-compiled time zones
tz, class = get(_tz_cache(), str) do
tz_path = joinpath(_COMPILED_DIR[], split(str, "/")...)

if isfile(tz_path)
# Cache the data since we're already performing the deserialization
_tz_cache()[str] = open(TZJFile.read, tz_path, "r")(str)
else
nothing, Class(:NONE)
end
# Checks against pre-compiled time zones
tz, class = get(_TZ_CACHE, str) do
nothing, Class(:NONE)
end

return tz !== nothing && mask & class != Class(:NONE)
Expand Down
20 changes: 9 additions & 11 deletions src/tzdata/compile.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using Dates
using Dates: parse_components

using ...TimeZones: _tz_cache, _tz_source_dir, _compiled_dir
using ...TimeZones: _tz_source_dir, _compiled_dir
using ...TimeZones: TimeZones, TimeZone, FixedTimeZone, VariableTimeZone, Transition, Class
using ...TimeZones: rename
using ..TZData: TimeOffset, ZERO, MIN_GMT_OFFSET, MAX_GMT_OFFSET, MIN_SAVE, MAX_SAVE,
Expand Down Expand Up @@ -691,16 +691,7 @@ end

function compile(tz_source::TZSource, dest_dir::AbstractString; kwargs...)
results = compile(tz_source; kwargs...)

isdir(dest_dir) || error("Destination directory doesn't exist")
# When we recompile the TimeZones from a new source, we clear all the existing cached
# TimeZone objects, so that newly constructed objects pick up the newly compiled rules.
# Since we use thread-local caches, we spawn a task on _each thread_ to clear that
# thread's local cache.
Threads.@threads :static for i in 1:Threads.nthreads()
@assert Threads.threadid() === i "TimeZones.TZData.compile() must be called from the main, top-level Task."
empty!(_tz_cache())
end

for (tz, class) in results
parts = split(TimeZones.name(tz), '/')
Expand All @@ -723,5 +714,12 @@ function compile(
dest_dir::AbstractString=_compiled_dir(tzdata_version());
kwargs...
)
compile(TZSource(readdir(tz_source_dir; join=true)), dest_dir; kwargs...)
results = compile(TZSource(readdir(tz_source_dir; join=true)), dest_dir; kwargs...)

# TimeZones 1.0 has supported automatic flushing of the cache when calling `compile`
# (e.g. `compile(max_year=2200)`). We'll keep this behaviour to ensure we are not
# breaking our API but the low-level `compile` function should ideally be cache unaware.
TimeZones._reload_cache(dest_dir)

return results
end
9 changes: 7 additions & 2 deletions test/ci.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,13 @@ using TimeZones: TZData

# Note: Using `TZData.compile(max_year=2200)` will end up updating the compiled data for
# `tzdata_version()` instead of what we last built using `TZDATA_VERSION`.
tz_source = TZData.TZSource(joinpath.(tz_source_dir, ["europe", "africa"]))
TZData.compile(tz_source, compiled_dir, max_year=2200)
TZData.compile(tz_source_dir, compiled_dir, max_year=2200)

# TODO: In the future the `TZData.compile` function won't reload the cache. We'll need
# revise the above line to be something like:
# tz_source = TZData.TZSource(joinpath.(tz_source_dir, ["europe", "africa"]))
# TZData.compile(tz_source, compiled_dir, max_year=2200)
# TimeZones._reload_cache(compiled_dir)

new_warsaw = TimeZone("Europe/Warsaw")

Expand Down
35 changes: 30 additions & 5 deletions test/helpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,35 @@ end
# Used in tests as a shorter form of: `sprint(show, ..., context=:compact => true)`
show_compact = (io, args...) -> show(IOContext(io, :compact => true), args...)

# Takes the tuple from `compile` and adds the result into TimeZones cache. Typically should
# not be used and only should be required if the test tzdata version and built tzdata
# version do not match.
function cache_tz((tz, class)::Tuple{TimeZone, TimeZones.Class})
TimeZones._tz_cache()[TimeZones.name(tz)] = (tz, class)
# Modified the internal TimeZones cache. Should only be used as part of testing and only is
# needed when the data between the test tzdata version and the built tzdata versions differ.

function add!(cache::Dict, t::Tuple{TimeZone,TimeZones.Class})
tz, class = t
name = TimeZones.name(tz)
push!(cache, name => t)
return tz
end

function add!(cache::Dict, tz::VariableTimeZone)
# Not all `VariableTimeZone`s are the STANDARD class. However, for testing purposes
# the class doesn't need to be precise.
class = TimeZones.Class(:STANDARD)
return add!(cache, (tz, class))
end

function add!(cache::Dict, tz::FixedTimeZone)
class = TimeZones.Class(:FIXED)
return add!(cache, (tz, class))
end

function with_tz_cache(f, cache::Dict{String,Tuple{TimeZone,TimeZones.Class}})
old_cache = deepcopy(TimeZones._TZ_CACHE)
copy!(TimeZones._TZ_CACHE, cache)

try
return f()
finally
copy!(TimeZones._TZ_CACHE, old_cache)
end
end
64 changes: 34 additions & 30 deletions test/io.jl
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
using TimeZones.TZData: parse_components
using TimeZones: Transition

cache = Dict{String,Tuple{TimeZone,TimeZones.Class}}()

dt = DateTime(1942,12,25,1,23,45)
custom_dt = DateTime(1800,1,1)

utc = FixedTimeZone("UTC")
gmt = FixedTimeZone("GMT", 0)
gmt = add!(cache, FixedTimeZone("GMT", 0))
foo = FixedTimeZone("FOO", 0)
null = FixedTimeZone("", 10800)
fixed = FixedTimeZone("UTC+01:00")
est = FixedTimeZone("EST", -18000)
warsaw = first(compile("Europe/Warsaw", tzdata["europe"]))
apia = cache_tz(compile("Pacific/Apia", tzdata["australasia"]))
honolulu = cache_tz(compile("Pacific/Honolulu", tzdata["northamerica"])) # Uses cutoff
ulyanovsk = first(compile("Europe/Ulyanovsk", tzdata["europe"])) # No named abbreviations
new_york = first(compile("America/New_York", tzdata["northamerica"])) # Underscore in name
est = add!(cache, FixedTimeZone("EST", -18000))
warsaw = add!(cache, compile("Europe/Warsaw", tzdata["europe"]))
apia = add!(cache, compile("Pacific/Apia", tzdata["australasia"]))
honolulu = add!(cache, compile("Pacific/Honolulu", tzdata["northamerica"])) # Uses cutoff
ulyanovsk = add!(cache, compile("Europe/Ulyanovsk", tzdata["europe"])) # No named abbreviations
new_york = add!(cache, compile("America/New_York", tzdata["northamerica"])) # Underscore in name
custom = VariableTimeZone("Test/Custom", [Transition(custom_dt, utc)]) # Non-cached variable time zone

@test sprint(print, utc) == "UTC"
Expand All @@ -29,29 +31,31 @@ custom = VariableTimeZone("Test/Custom", [Transition(custom_dt, utc)]) # Non-ca
@test sprint(print, ulyanovsk) == "Europe/Ulyanovsk"
@test sprint(print, custom) == "Test/Custom"

@test sprint(show_compact, utc) == "tz\"UTC\""
@test sprint(show_compact, gmt) == "tz\"GMT\""
@test sprint(show_compact, foo) == "FixedTimeZone(\"FOO\", 0)"
@test sprint(show_compact, null) == "FixedTimeZone(\"\", 10800)"
@test sprint(show_compact, fixed) == "tz\"UTC+01:00\""
@test sprint(show_compact, est) == "tz\"EST\""
@test sprint(show_compact, warsaw) == "tz\"Europe/Warsaw\""
@test sprint(show_compact, apia) == "tz\"Pacific/Apia\""
@test sprint(show_compact, honolulu) == "tz\"Pacific/Honolulu\""
@test sprint(show_compact, ulyanovsk) == "tz\"Europe/Ulyanovsk\""
@test sprint(show_compact, custom) == "VariableTimeZone(\"Test/Custom\", ...)"

@test sprint(show, utc) == "tz\"UTC\""
@test sprint(show, gmt) == "tz\"GMT\""
@test sprint(show, foo) == "FixedTimeZone(\"FOO\", 0)"
@test sprint(show, null) == "FixedTimeZone(\"\", 10800)"
@test sprint(show, fixed) == "tz\"UTC+01:00\""
@test sprint(show, est) == "tz\"EST\""
@test sprint(show, warsaw) == "tz\"Europe/Warsaw\""
@test sprint(show, apia) == "tz\"Pacific/Apia\""
@test sprint(show, honolulu) == "tz\"Pacific/Honolulu\""
@test sprint(show, ulyanovsk) == "tz\"Europe/Ulyanovsk\""
@test sprint(show, custom) == "VariableTimeZone(\"Test/Custom\", Transition[Transition($(repr(custom_dt)), tz\"UTC\")], nothing)"
with_tz_cache(cache) do
@test sprint(show_compact, utc) == "tz\"UTC\""
@test sprint(show_compact, gmt) == "tz\"GMT\""
@test sprint(show_compact, foo) == "FixedTimeZone(\"FOO\", 0)"
@test sprint(show_compact, null) == "FixedTimeZone(\"\", 10800)"
@test sprint(show_compact, fixed) == "tz\"UTC+01:00\""
@test sprint(show_compact, est) == "tz\"EST\""
@test sprint(show_compact, warsaw) == "tz\"Europe/Warsaw\""
@test sprint(show_compact, apia) == "tz\"Pacific/Apia\""
@test sprint(show_compact, honolulu) == "tz\"Pacific/Honolulu\""
@test sprint(show_compact, ulyanovsk) == "tz\"Europe/Ulyanovsk\""
@test sprint(show_compact, custom) == "VariableTimeZone(\"Test/Custom\", ...)"

@test sprint(show, utc) == "tz\"UTC\""
@test sprint(show, gmt) == "tz\"GMT\""
@test sprint(show, foo) == "FixedTimeZone(\"FOO\", 0)"
@test sprint(show, null) == "FixedTimeZone(\"\", 10800)"
@test sprint(show, fixed) == "tz\"UTC+01:00\""
@test sprint(show, est) == "tz\"EST\""
@test sprint(show, warsaw) == "tz\"Europe/Warsaw\""
@test sprint(show, apia) == "tz\"Pacific/Apia\""
@test sprint(show, honolulu) == "tz\"Pacific/Honolulu\""
@test sprint(show, ulyanovsk) == "tz\"Europe/Ulyanovsk\""
@test sprint(show, custom) == "VariableTimeZone(\"Test/Custom\", Transition[Transition($(repr(custom_dt)), tz\"UTC\")], nothing)"
end

@test sprint(show, MIME("text/plain"), utc) == "UTC"
@test sprint(show, MIME("text/plain"), gmt) == "GMT"
Expand Down
9 changes: 0 additions & 9 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,6 @@ include("helpers.jl")
include("parse.jl")
include("plotting.jl")

is_apple_silicon = first(Sys.cpu_info()).model == "Apple M1"
if is_apple_silicon && Threads.nthreads() == 8
@info "Thread safety tests can hang on Apple Silicon when using 8-threads. Use 7-threads instead"
elseif Threads.nthreads() == 1
@info "Skipping thread safety check as Julia was started with a single thread"
else
include("thread-safety.jl")
end

# Note: Run the build tests last to ensure that re-compiling the time zones files
# doesn't interfere with other tests.
if lowercase(get(ENV, "CI", "false")) == "true"
Expand Down
53 changes: 0 additions & 53 deletions test/thread-safety.jl

This file was deleted.

3 changes: 0 additions & 3 deletions test/types/timezone.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
using TimeZones: Class

@testset "istimezone" begin
# Invalidate the cache to ensure that `istimezone` works for non-loaded time zones.
TimeZones._reset_tz_cache()

@test istimezone("Europe/Warsaw")
@test istimezone("UTC+02")
@test !istimezone("Europe/Camelot")
Expand Down

0 comments on commit 03af16a

Please sign in to comment.