Skip to content

Commit

Permalink
Make precompile files relocatable (#49866)
Browse files Browse the repository at this point in the history
String replacement with `@depot` when serializing out happens with any
paths that are located inside a `DEPOT_PATH` (first match wins). If no
match, then we emit the absolute file path as before. Right now we only
emit one token `@depot`.

String replacement of `@depot` when loading happens now on a `.ji` file
basis and only if all the listed include dependencies can be resolved to
files located in one and the same depot on `DEPOT_PATH` (again, first
match wins). If we can't resolve, then the cache is invalided with
`stale_cachefile`.
  • Loading branch information
fatteneder authored Oct 20, 2023
1 parent d432821 commit f2df1b4
Show file tree
Hide file tree
Showing 18 changed files with 360 additions and 91 deletions.
200 changes: 140 additions & 60 deletions base/loading.jl

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ let
print_time(stdlib, tt)
end
for dep in Base._require_dependencies
dep[3] == 0.0 && continue
push!(Base._included_files, dep[1:2])
mod, path, fsize, mtime = dep[1], dep[2], dep[3], dep[5]
(fsize == 0 || mtime == 0.0) && continue
push!(Base._included_files, (mod, path))
end
empty!(Base._require_dependencies)
Base._track_dependencies[] = false
Expand Down
10 changes: 6 additions & 4 deletions doc/src/manual/modules.md
Original file line number Diff line number Diff line change
Expand Up @@ -447,10 +447,12 @@ recompiled upon `using` or `import`. Dependencies are modules it
imports, the Julia build, files it includes, or explicit dependencies declared by [`include_dependency(path)`](@ref)
in the module file(s).

For file dependencies, a change is determined by examining whether the modification time (`mtime`)
of each file loaded by `include` or added explicitly by `include_dependency` is unchanged, or equal
to the modification time truncated to the nearest second (to accommodate systems that can't copy
mtime with sub-second accuracy). It also takes into account whether the path to the file chosen
For file dependencies loaded by `include`, a change is determined by examining whether the
file size (`fsize`) or content (condensed into a hash) is unchanged.
For file dependencies loaded by `include_dependency` a change is determined by examining whether the modification time (`mtime`)
is unchanged, or equal to the modification time truncated to the nearest second
(to accommodate systems that can't copy mtime with sub-second accuracy).
It also takes into account whether the path to the file chosen
by the search logic in `require` matches the path that had created the precompile file. It also takes
into account the set of dependencies already loaded into the current process and won't recompile those
modules, even if their files change or disappear, in order to avoid creating incompatibilities between
Expand Down
34 changes: 26 additions & 8 deletions src/precompile.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,26 +36,43 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
// char*: src text
// At the end we write int32(0) as a terminal sentinel.
size_t len = jl_array_len(udeps);
static jl_value_t *replace_depot_func = NULL;
if (!replace_depot_func)
replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path"));
ios_t srctext;
jl_value_t *deptuple = NULL;
JL_GC_PUSH2(&deptuple, &udeps);
for (size_t i = 0; i < len; i++) {
jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
deptuple = jl_array_ptr_ref(udeps, i);
jl_value_t *depmod = jl_fieldref(deptuple, 0); // module
// Dependencies declared with `include_dependency` are excluded
// because these may not be Julia code (and could be huge)
if (depmod != (jl_value_t*)jl_main_module) {
jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath
const char *depstr = jl_string_data(dep);
if (!depstr[0])
jl_value_t *abspath = jl_fieldref(deptuple, 1); // file abspath
const char *abspathstr = jl_string_data(abspath);
if (!abspathstr[0])
continue;
ios_t *srctp = ios_file(&srctext, depstr, 1, 0, 0, 0);
ios_t *srctp = ios_file(&srctext, abspathstr, 1, 0, 0, 0);
if (!srctp) {
jl_printf(JL_STDERR, "WARNING: could not cache source text for \"%s\".\n",
jl_string_data(dep));
abspathstr);
continue;
}
size_t slen = jl_string_len(dep);

jl_value_t **replace_depot_args;
JL_GC_PUSHARGS(replace_depot_args, 2);
replace_depot_args[0] = replace_depot_func;
replace_depot_args[1] = abspath;
jl_task_t *ct = jl_current_task;
size_t last_age = ct->world_age;
ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 2);
ct->world_age = last_age;
JL_GC_POP();

size_t slen = jl_string_len(depalias);
write_int32(f, slen);
ios_write(f, depstr, slen);
ios_write(f, jl_string_data(depalias), slen);
posfile = ios_pos(f);
write_uint64(f, 0); // placeholder for length of this file in bytes
uint64_t filelen = (uint64_t) ios_copyall(f, &srctext);
Expand All @@ -65,6 +82,7 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
ios_seek_end(f);
}
}
JL_GC_POP();
}
write_int32(f, 0); // mark the end of the source text
}
Expand Down
5 changes: 3 additions & 2 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -2691,8 +2691,9 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a
write_uint8(f, jl_cache_flags());
// write description of contents (name, uuid, buildid)
write_worklist_for_header(f, worklist);
// Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist
// (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header.
// Determine unique (module, abspath, fsize, hash, mtime) dependencies for the files defining modules in the worklist
// (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header
// (abspath will be converted to a relocateable @depot path before writing, cf. Base.replace_depot_path).
// Also write Preferences.
// last word of the dependency list is the end of the data / start of the srctextpos
*srctextpos = write_dependency_list(f, worklist, udeps); // srctextpos: position of srctext entry in header index (update later)
Expand Down
26 changes: 22 additions & 4 deletions src/staticdata_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -706,18 +706,36 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL);
ct->world_age = last_age;

static jl_value_t *replace_depot_func = NULL;
if (!replace_depot_func)
replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path"));

// write a placeholder for total size so that we can quickly seek past all of the
// dependencies if we don't need them
initial_pos = ios_pos(s);
write_uint64(s, 0);
size_t i, l = udeps ? jl_array_len(udeps) : 0;
for (i = 0; i < l; i++) {
jl_value_t *deptuple = jl_array_ptr_ref(udeps, i);
jl_value_t *dep = jl_fieldref(deptuple, 1); // file abspath
size_t slen = jl_string_len(dep);
jl_value_t *abspath = jl_fieldref(deptuple, 1);

jl_value_t **replace_depot_args;
JL_GC_PUSHARGS(replace_depot_args, 2);
replace_depot_args[0] = replace_depot_func;
replace_depot_args[1] = abspath;
ct = jl_current_task;
size_t last_age = ct->world_age;
ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 2);
ct->world_age = last_age;
JL_GC_POP();

size_t slen = jl_string_len(depalias);
write_int32(s, slen);
ios_write(s, jl_string_data(dep), slen);
write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 2))); // mtime
ios_write(s, jl_string_data(depalias), slen);
write_uint64(s, jl_unbox_uint64(jl_fieldref(deptuple, 2))); // fsize
write_uint32(s, jl_unbox_uint32(jl_fieldref(deptuple, 3))); // hash
write_float64(s, jl_unbox_float64(jl_fieldref(deptuple, 4))); // mtime
jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module
jl_module_t *depmod_top = depmod;
while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top)
Expand Down
2 changes: 2 additions & 0 deletions test/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
/ccalltest
/ccalltest.s
/libccalltest.*
/relocatedepot
/RelocationTestPkg2/src/foo.txt
26 changes: 24 additions & 2 deletions test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ unexport JULIA_BINDIR :=
TESTGROUPS = unicode strings compiler
TESTS = all default stdlib $(TESTGROUPS) \
$(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \
$(filter-out runtests testdefs, \
$(filter-out runtests testdefs relocatedepot, \
$(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/*.jl))) \
$(foreach group,$(TESTGROUPS), \
$(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/$(group)/*.jl)))
Expand All @@ -34,6 +34,28 @@ $(addprefix revise-, $(TESTS)): revise-% :
@cd $(SRCDIR) && \
$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)

relocatedepot:
@rm -rf $(SRCDIR)/relocatedepot
@cd $(SRCDIR) && \
$(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@)
@mkdir $(SRCDIR)/relocatedepot
@cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot
@cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot
@cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot
@cd $(SRCDIR) && \
$(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@)

revise-relocatedepot: revise-% :
@rm -rf $(SRCDIR)/relocatedepot
@cd $(SRCDIR) && \
$(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)
@mkdir $(SRCDIR)/relocatedepot
@cp -R $(build_datarootdir)/julia $(SRCDIR)/relocatedepot
@cp -R $(SRCDIR)/RelocationTestPkg1 $(SRCDIR)/relocatedepot
@cp -R $(SRCDIR)/RelocationTestPkg2 $(SRCDIR)/relocatedepot
@cd $(SRCDIR) && \
$(call PRINT_JULIA, $(call spawn,JULIA_DEBUG=loading RELOCATEDEPOT="" JULIA_DEPOT_PATH=$(SRCDIR)/relocatedepot/julia $(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)

embedding:
@$(MAKE) -C $(SRCDIR)/$@ check $(EMBEDDING_ARGS)

Expand All @@ -47,4 +69,4 @@ clean:
@$(MAKE) -C embedding $@ $(EMBEDDING_ARGS)
@$(MAKE) -C gcext $@ $(GCEXT_ARGS)

.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) embedding gcext clangsa clean
.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext clangsa clean
4 changes: 4 additions & 0 deletions test/RelocationTestPkg1/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name = "RelocationTestPkg1"
uuid = "854e1adb-5a97-46bf-a391-1cfe05ac726d"
authors = ["flo "]
version = "0.1.0"
5 changes: 5 additions & 0 deletions test/RelocationTestPkg1/src/RelocationTestPkg1.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module RelocationTestPkg1

greet() = print("Hello World!")

end # module RelocationTestPkg1
Empty file.
4 changes: 4 additions & 0 deletions test/RelocationTestPkg2/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name = "RelocationTestPkg2"
uuid = "8d933983-b090-4b0b-a37e-c34793f459d1"
authors = ["flo "]
version = "0.1.0"
6 changes: 6 additions & 0 deletions test/RelocationTestPkg2/src/RelocationTestPkg2.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
module RelocationTestPkg2

include_dependency("foo.txt")
greet() = print("Hello World!")

end # module RelocationTestPkg2
Empty file.
2 changes: 1 addition & 1 deletion test/choosetests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const TESTNAMES = [
"some", "meta", "stacktraces", "docs", "gc",
"misc", "threads", "stress", "binaryplatforms", "atexit",
"enums", "cmdlineargs", "int", "interpreter",
"checked", "bitset", "floatfuncs", "precompile",
"checked", "bitset", "floatfuncs", "precompile", "relocatedepot",
"boundscheck", "error", "ambiguous", "cartesian", "osutils",
"channels", "iostream", "secretbuffer", "specificity",
"reinterpretarray", "syntax", "corelogging", "missing", "asyncmap",
Expand Down
16 changes: 8 additions & 8 deletions test/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,8 @@ precompile_test_harness(false) do dir
@test string(Base.Docs.doc(Foo.Bar.bar)) == "bar function\n"
@test string(Base.Docs.doc(Foo.Bar)) == "Bar module\n"

modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
discard_module = mod_fl_mt -> (mod_fl_mt.filename, mod_fl_mt.mtime)
modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
discard_module = mod_fl_mt -> mod_fl_mt.filename
@test modules == [ Base.PkgId(Foo) => Base.module_build_id(Foo) % UInt64 ]
@test map(x -> x.filename, deps) == [ Foo_file, joinpath(dir, "foo.jl"), joinpath(dir, "bar.jl") ]
@test requires == [ Base.PkgId(Foo) => Base.PkgId(string(FooBase_module)),
Expand Down Expand Up @@ -422,7 +422,7 @@ precompile_test_harness(false) do dir
@test Dict(modules) == modules_ok

@test discard_module.(deps) == deps1
modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile; srcfiles_only=true)
modules, (_, deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
@test map(x -> x.filename, deps) == [Foo_file]

@test current_task()(0x01, 0x4000, 0x30031234) == 2
Expand Down Expand Up @@ -485,7 +485,7 @@ precompile_test_harness(false) do dir
""")
Nest = Base.require(Main, Nest_module)
cachefile = joinpath(cachedir, "$Nest_module.ji")
modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
@test last(deps).modpath == ["NestInner"]

UsesB_module = :UsesB4b3a94a1a081a8cb
Expand All @@ -507,7 +507,7 @@ precompile_test_harness(false) do dir
""")
UsesB = Base.require(Main, UsesB_module)
cachefile = joinpath(cachedir, "$UsesB_module.ji")
modules, (deps, requires), required_modules, _... = Base.parse_cache_header(cachefile)
modules, (deps, _, requires), required_modules, _... = Base.parse_cache_header(cachefile)
id1, id2 = only(requires)
@test Base.pkgorigins[id1].cachepath == cachefile
@test Base.pkgorigins[id2].cachepath == joinpath(cachedir, "$B_module.ji")
Expand Down Expand Up @@ -584,12 +584,12 @@ precompile_test_harness(false) do dir
fb_uuid = Base.module_build_id(FooBar)
sleep(2); touch(FooBar_file)
insert!(DEPOT_PATH, 1, dir2)
@test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true
@test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
@eval using FooBar1
@test !isfile(joinpath(cachedir2, "FooBar.ji"))
@test !isfile(joinpath(cachedir, "FooBar1.ji"))
@test isfile(joinpath(cachedir2, "FooBar1.ji"))
@test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) === true
@test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Tsc
@test Base.stale_cachefile(FooBar1_file, joinpath(cachedir2, "FooBar1.ji")) isa Tsc
@test fb_uuid == Base.module_build_id(FooBar)
fb_uuid1 = Base.module_build_id(FooBar1)
Expand Down Expand Up @@ -1712,7 +1712,7 @@ precompile_test_harness("PkgCacheInspector") do load_path
try
# isvalid_cache_header returns checksum id or zero
Base.isvalid_cache_header(io) == 0 && throw(ArgumentError("Invalid header in cache file $cachefile."))
depmodnames = Base.parse_cache_header(io)[3]
depmodnames = Base.parse_cache_header(io, cachefile)[3]
Base.isvalid_file_crc(io) || throw(ArgumentError("Invalid checksum in cache file $cachefile."))
finally
close(io)
Expand Down
104 changes: 104 additions & 0 deletions test/relocatedepot.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
using Test
using Logging


include("testenv.jl")


function test_harness(@nospecialize(fn))
load_path = copy(LOAD_PATH)
depot_path = copy(DEPOT_PATH)
try
fn()
finally
copy!(LOAD_PATH, load_path)
copy!(DEPOT_PATH, depot_path)
end
end


if !test_relocated_depot

@testset "precompile RelocationTestPkg1" begin
pkgname = "RelocationTestPkg1"
test_harness() do
push!(LOAD_PATH, @__DIR__)
push!(DEPOT_PATH, @__DIR__)
pkg = Base.identify_package(pkgname)
cachefiles = Base.find_all_in_cache_path(pkg)
rm.(cachefiles, force=true)
@test Base.isprecompiled(pkg) == false
Base.require(pkg) # precompile
@test Base.isprecompiled(pkg, ignore_loaded=true) == true
end
end

@testset "precompile RelocationTestPkg2 (contains include_dependency)" begin
pkgname = "RelocationTestPkg2"
test_harness() do
push!(LOAD_PATH, @__DIR__)
push!(DEPOT_PATH, @__DIR__)
pkg = Base.identify_package(pkgname)
cachefiles = Base.find_all_in_cache_path(pkg)
rm.(cachefiles, force=true)
@test Base.isprecompiled(pkg) == false
touch(joinpath(@__DIR__, pkgname, "src", "foo.txt"))
Base.require(pkg) # precompile
@info "SERS OIDA"
@test Base.isprecompiled(pkg, ignore_loaded=true) == true
end
end

else

# must come before any of the load tests, because the will recompile and generate new cache files
@testset "attempt loading precompiled pkgs when depot is missing" begin
test_harness() do
empty!(LOAD_PATH)
push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
for pkgname in ("RelocationTestPkg1", "RelocationTestPkg2")
pkg = Base.identify_package(pkgname)
cachefile = only(Base.find_all_in_cache_path(pkg))
@info cachefile
@test_throws ArgumentError("""
Failed to determine depot from srctext files in cache file $cachefile.
- Make sure you have adjusted DEPOT_PATH in case you relocated depots.""") Base.isprecompiled(pkg)
end
end
end

@testset "load stdlib from test/relocatedepot" begin
test_harness() do
push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot"))
# stdlib should be already precompiled
pkg = Base.identify_package("DelimitedFiles")
@test Base.isprecompiled(pkg) == true
end
end

@testset "load RelocationTestPkg1 from test/relocatedepot" begin
pkgname = "RelocationTestPkg1"
test_harness() do
push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot"))
pkg = Base.identify_package(pkgname)
@test Base.isprecompiled(pkg) == true
Base.require(pkg) # re-precompile
@test Base.isprecompiled(pkg) == true
end
end

@testset "load RelocationTestPkg2 (contains include_dependency) from test/relocatedepot" begin
pkgname = "RelocationTestPkg2"
test_harness() do
push!(LOAD_PATH, joinpath(@__DIR__, "relocatedepot"))
push!(DEPOT_PATH, joinpath(@__DIR__, "relocatedepot"))
pkg = Base.identify_package(pkgname)
@test Base.isprecompiled(pkg) == false # moving depot changes mtime of include_dependency
Base.require(pkg) # re-precompile
@test Base.isprecompiled(pkg) == true
end
end

end
2 changes: 2 additions & 0 deletions test/testenv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ if !@isdefined(testenv_defined)
const rr_exename = ``
end

const test_relocated_depot = haskey(ENV, "RELOCATEDEPOT")

function addprocs_with_testenv(X; rr_allowed=true, kwargs...)
exename = rr_allowed ? `$rr_exename $test_exename` : test_exename
if X isa Integer
Expand Down

0 comments on commit f2df1b4

Please sign in to comment.