From 3a9345c0b0157b6664ed5c28a9ff255cbc7357b0 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 17 Jul 2023 14:54:17 -0400 Subject: [PATCH] precompile: ensure globals are not accidentally created where disallowed (#50541) Usually this is caught by use of `eval`, but we should try to move away from that broad rule to specific functions such as this one, such that eventually we can remove that rule from `eval`. Fix #50538 --- src/module.c | 80 ++++++++++++++++++++++++++++++++-------------- src/staticdata.c | 6 ++++ src/toplevel.c | 24 ++++---------- test/precompile.jl | 31 ++++++++++++++++++ test/staged.jl | 2 ++ 5 files changed, 102 insertions(+), 41 deletions(-) diff --git a/src/module.c b/src/module.c index 89c4c6cdb674e..59bd308d99a41 100644 --- a/src/module.c +++ b/src/module.c @@ -190,16 +190,44 @@ static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name) return b; } +extern jl_mutex_t jl_modules_mutex; + +static void check_safe_newbinding(jl_module_t *m, jl_sym_t *var) +{ + if (jl_current_task->ptls->in_pure_callback) + jl_errorf("new globals cannot be created in a generated function"); + if (jl_options.incremental && jl_generating_output()) { + JL_LOCK(&jl_modules_mutex); + int open = ptrhash_has(&jl_current_modules, (void*)m); + if (!open && jl_module_init_order != NULL) { + size_t i, l = jl_array_len(jl_module_init_order); + for (i = 0; i < l; i++) { + if (m == (jl_module_t*)jl_array_ptr_ref(jl_module_init_order, i)) { + open = 1; + break; + } + } + } + JL_UNLOCK(&jl_modules_mutex); + if (!open) { + jl_errorf("Creating a new global in closed module `%s` (`%s`) breaks incremental compilation " + "because the side effects will not be permanent.", + jl_symbol_name(m->name), jl_symbol_name(var)); + } + } +} + static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var) JL_GLOBALLY_ROOTED; // get binding for assignment JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) { jl_binding_t *b = jl_get_module_binding(m, var, 1); - - if (b) { - jl_binding_t *b2 = NULL; - if (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b) { + jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner); + if (b2 != b) { + if (b2 == NULL) + check_safe_newbinding(m, var); + if (b2 != NULL || (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b)) { jl_module_t *from = jl_binding_dbgmodule(b, m, var); if (from == m) jl_errorf("cannot assign a value to imported variable %s.%s", @@ -209,7 +237,6 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_symbol_name(from->name), jl_symbol_name(var), jl_symbol_name(m->name)); } } - return b; } @@ -223,29 +250,31 @@ JL_DLLEXPORT jl_module_t *jl_get_module_of_binding(jl_module_t *m, jl_sym_t *var } // get binding for adding a method -// like jl_get_binding_wr, but has different error paths +// like jl_get_binding_wr, but has different error paths and messages JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_t *var) { jl_binding_t *b = jl_get_module_binding(m, var, 1); - - jl_binding_t *b2 = NULL; - if (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b) { - jl_value_t *f = jl_atomic_load_relaxed(&b2->value); - jl_module_t *from = jl_binding_dbgmodule(b, m, var); - if (f == NULL) { - // we must have implicitly imported this with using, so call jl_binding_dbgmodule to try to get the name of the module we got this from - jl_errorf("invalid method definition in %s: exported function %s.%s does not exist", - jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var)); - } - // TODO: we might want to require explicitly importing types to add constructors - // or we might want to drop this error entirely - if (!b->imported && !(b2->constp && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) { - jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended", - jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var)); + jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner); + if (b2 != b) { + if (b2 == NULL) + check_safe_newbinding(m, var); + if (b2 != NULL || (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b)) { + jl_value_t *f = jl_atomic_load_relaxed(&b2->value); + jl_module_t *from = jl_binding_dbgmodule(b, m, var); + if (f == NULL) { + // we must have implicitly imported this with using, so call jl_binding_dbgmodule to try to get the name of the module we got this from + jl_errorf("invalid method definition in %s: exported function %s.%s does not exist", + jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var)); + } + // TODO: we might want to require explicitly importing types to add constructors + // or we might want to drop this error entirely + if (!b->imported && !(b2->constp && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) { + jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended", + jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var)); + } + return b2; } - return b2; } - return b; } @@ -761,7 +790,10 @@ JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *va JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT) { // this function is mostly only used during initialization, so the data races here are not too important to us - jl_binding_t *bp = jl_get_binding_wr(m, var); + jl_binding_t *bp = jl_get_module_binding(m, var, 1); + jl_binding_t *b2 = NULL; + if (!jl_atomic_cmpswap(&bp->owner, &b2, bp) && b2 != bp) + jl_errorf("invalid redefinition of constant %s", jl_symbol_name(var)); if (jl_atomic_load_relaxed(&bp->value) == NULL) { jl_value_t *old_ty = NULL; jl_atomic_cmpswap_relaxed(&bp->ty, &old_ty, (jl_value_t*)jl_any_type); diff --git a/src/staticdata.c b/src/staticdata.c index df080bc68c88f..c05422fd10969 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -1234,6 +1234,12 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED jl_binding_t *b = (jl_binding_t*)v; if (b->globalref == NULL || jl_object_in_image((jl_value_t*)b->globalref->mod)) jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity + // Assign type Any to any owned bindings that don't have a type. + // We don't want these accidentally managing to diverge later in different compilation units. + if (jl_atomic_load_relaxed(&b->owner) == b) { + jl_value_t *old_ty = NULL; + jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type); + } } } diff --git a/src/toplevel.c b/src/toplevel.c index 51ff93488426f..46ae3f3cf0314 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -944,8 +944,10 @@ JL_DLLEXPORT jl_value_t *jl_toplevel_eval(jl_module_t *m, jl_value_t *v) } // Check module `m` is open for `eval/include`, or throw an error. -static void jl_check_open_for(jl_module_t *m, const char* funcname) +JL_DLLEXPORT void jl_check_top_level_effect(jl_module_t *m, char *fname) { + if (jl_current_task->ptls->in_pure_callback) + jl_errorf("%s cannot be used in a generated function", fname); if (jl_options.incremental && jl_generating_output()) { if (m != jl_main_module) { // TODO: this was grand-fathered in JL_LOCK(&jl_modules_mutex); @@ -965,25 +967,15 @@ static void jl_check_open_for(jl_module_t *m, const char* funcname) jl_errorf("Evaluation into the closed module `%s` breaks incremental compilation " "because the side effects will not be permanent. " "This is likely due to some other module mutating `%s` with `%s` during " - "precompilation - don't do this.", name, name, funcname); + "precompilation - don't do this.", name, name, fname); } } } } -JL_DLLEXPORT void jl_check_top_level_effect(jl_module_t *m, char *fname) -{ - if (jl_current_task->ptls->in_pure_callback) - jl_errorf("%s cannot be used in a generated function", fname); - jl_check_open_for(m, fname); -} - JL_DLLEXPORT jl_value_t *jl_toplevel_eval_in(jl_module_t *m, jl_value_t *ex) { - jl_task_t *ct = jl_current_task; - if (ct->ptls->in_pure_callback) - jl_error("eval cannot be used in a generated function"); - jl_check_open_for(m, "eval"); + jl_check_top_level_effect(m, "eval"); jl_value_t *v = NULL; int last_lineno = jl_lineno; const char *last_filename = jl_filename; @@ -1029,10 +1021,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text, if (!jl_is_string(text) || !jl_is_string(filename)) { jl_errorf("Expected `String`s for `text` and `filename`"); } - jl_task_t *ct = jl_current_task; - if (ct->ptls->in_pure_callback) - jl_error("cannot use include inside a generated function"); - jl_check_open_for(module, "include"); + jl_check_top_level_effect(module, "include"); jl_value_t *result = jl_nothing; jl_value_t *ast = NULL; @@ -1045,6 +1034,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text, jl_errorf("jl_parse_all() must generate a top level expression"); } + jl_task_t *ct = jl_current_task; int last_lineno = jl_lineno; const char *last_filename = jl_filename; size_t last_age = ct->world_age; diff --git a/test/precompile.jl b/test/precompile.jl index 62d862c384040..d76a5a9a16f85 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -1771,6 +1771,37 @@ precompile_test_harness("Issue #48391") do load_path @test_throws ErrorException isless(x, x) end +precompile_test_harness("Issue #50538") do load_path + write(joinpath(load_path, "I50538.jl"), + """ + module I50538 + const newglobal = try + Base.newglobal = false + catch ex + ex isa ErrorException || rethrow() + ex + end + const newtype = try + Core.set_binding_type!(Base, :newglobal) + catch ex + ex isa ErrorException || rethrow() + ex + end + global undefglobal + end + """) + ji, ofile = Base.compilecache(Base.PkgId("I50538")) + @eval using I50538 + @test I50538.newglobal.msg == "Creating a new global in closed module `Base` (`newglobal`) breaks incremental compilation because the side effects will not be permanent." + @test I50538.newtype.msg == "Creating a new global in closed module `Base` (`newglobal`) breaks incremental compilation because the side effects will not be permanent." + @test_throws(ErrorException("cannot set type for global I50538.undefglobal. It already has a value or is already set to a different type."), + Core.set_binding_type!(I50538, :undefglobal, Int)) + Core.set_binding_type!(I50538, :undefglobal, Any) + @test Core.get_binding_type(I50538, :undefglobal) === Any + @test !isdefined(I50538, :undefglobal) +end + + empty!(Base.DEPOT_PATH) append!(Base.DEPOT_PATH, original_depot_path) empty!(Base.LOAD_PATH) diff --git a/test/staged.jl b/test/staged.jl index df351d8d47b96..5204f5f6ca777 100644 --- a/test/staged.jl +++ b/test/staged.jl @@ -308,6 +308,8 @@ end @generated function f33243() :(global x33243 = 2) end +@test_throws ErrorException f33243() +global x33243 @test f33243() === 2 @test x33243 === 2