From 572f274e73d44d31bb3a92062bb4e0ff0ae37a82 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 5 Jan 2024 03:31:54 +0000 Subject: [PATCH] staticdata: handle cycles in datatypes Handle any sort of cycle encountered in the datatype super fields by always deferring that field until later and setting a deferred mechanism for updating the field only after the supertype is ready. Fix #52660 --- src/staticdata.c | 147 +++++++++++++++++++++++------------------ src/staticdata_utils.c | 13 ++-- test/precompile.jl | 6 +- 3 files changed, 95 insertions(+), 71 deletions(-) diff --git a/src/staticdata.c b/src/staticdata.c index 8244c5a4373ce..d907bc9e45c79 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -314,7 +314,6 @@ static arraylist_t deser_sym; static htable_t external_objects; static htable_t serialization_order; // to break cycles, mark all objects that are serialized -static htable_t unique_ready; // as we serialize types, we need to know if all reachable objects are also already serialized. This tracks whether `immediate` has been set for all of them. static htable_t nullptrs; // FIFO queue for objects to be serialized. Anything requiring fixup upon deserialization // must be "toplevel" in this queue. For types, parameters and field types must appear @@ -485,6 +484,7 @@ typedef struct { arraylist_t relocs_list; // a list of (location, target) pairs, see description at top arraylist_t gctags_list; // " arraylist_t uniquing_types; // a list of locations that reference types that must be de-duplicated + arraylist_t uniquing_super; // a list of datatypes, used in super fields, that need to be marked in uniquing_types once they are reached, for handling unique-ing of them on deserialization arraylist_t uniquing_objs; // a list of locations that reference non-types that must be de-duplicated arraylist_t fixup_types; // a list of locations of types requiring (re)caching arraylist_t fixup_objs; // a list of locations of objects requiring (re)caching @@ -757,14 +757,13 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ { jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); jl_queue_for_serialization_(s, (jl_value_t*)t, 1, immediate); + const jl_datatype_layout_t *layout = t->layout; if (!recursive) goto done_fields; if (s->incremental && jl_is_datatype(v) && immediate) { jl_datatype_t *dt = (jl_datatype_t*)v; - // ensure super is queued (though possibly not yet handled, since it may have cycles) - jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, 1); // ensure all type parameters are recached jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1); if (jl_is_datatype_singleton(dt) && needs_uniquing(dt->instance)) { @@ -773,7 +772,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ // (it may get serialized from elsewhere though) record_field_change(&dt->instance, jl_nothing); } - immediate = 0; // do not handle remaining fields immediately (just field types remains) + goto done_fields; // for now } if (s->incremental && jl_is_method_instance(v)) { jl_method_instance_t *mi = (jl_method_instance_t*)v; @@ -829,11 +828,9 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ } } - if (immediate) // must be things that can be recursively handled, and valid as type parameters assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v)); - const jl_datatype_layout_t *layout = t->layout; if (layout->npointers == 0) { // bitstypes do not require recursion } @@ -896,22 +893,35 @@ done_fields: ; // We've encountered an item we need to cache void **bp = ptrhash_bp(&serialization_order, v); - assert(*bp != (void*)(uintptr_t)-1); - if (s->incremental) { - void **bp2 = ptrhash_bp(&unique_ready, v); - if (*bp2 == HT_NOTFOUND) - assert(*bp == (void*)(uintptr_t)-2); - else if (*bp != (void*)(uintptr_t)-2) - return; - } - else { - assert(*bp == (void*)(uintptr_t)-2); - } + assert(*bp == (void*)(uintptr_t)-2); arraylist_push(&serialization_queue, (void*) v); size_t idx = serialization_queue.len - 1; assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize"); - *bp = (void*)((char*)HT_NOTFOUND + 1 + idx); + + // DataType is very unusual, in that some of the fields need to be pre-order, and some + // (notably super) must not be (even if `jl_queue_for_serialization_` would otherwise + // try to promote itself to be immediate) + if (s->incremental && jl_is_datatype(v) && immediate && recursive) { + jl_datatype_t *dt = (jl_datatype_t*)v; + void **bp = ptrhash_bp(&serialization_order, (void*)dt->super); + if (*bp != (void*)-2) { + // if super is already on the stack of things to handle when this returns, do + // not try to handle it now + jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, immediate); + } + immediate = 0; + char *data = (char*)jl_data_ptr(v); + size_t i, np = layout->npointers; + for (i = 0; i < np; i++) { + uint32_t ptr = jl_ptr_offset(t, i); + if (ptr * sizeof(jl_value_t*) == offsetof(jl_datatype_t, super)) + continue; // skip the super field, since it might not be quite validly ordered + int mutabl = 1; + jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl); + jl_queue_for_serialization_(s, fld, 1, immediate); + } + } } static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED @@ -930,28 +940,19 @@ static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, i } void **bp = ptrhash_bp(&serialization_order, v); - if (*bp == HT_NOTFOUND) { - *bp = (void*)(uintptr_t)(immediate ? -2 : -1); - } - else { - if (!s->incremental || !immediate || !recursive) - return; - void **bp2 = ptrhash_bp(&unique_ready, v); - if (*bp2 == HT_NOTFOUND) - *bp2 = v; // now is unique_ready - else { - assert(*bp != (void*)(uintptr_t)-1); - return; // already was unique_ready - } - assert(*bp != (void*)(uintptr_t)-2); // should be unique_ready then - if (*bp == (void*)(uintptr_t)-1) - *bp = (void*)(uintptr_t)-2; // now immediate - } + assert(!immediate || *bp != (void*)(uintptr_t)-2); + if (*bp == HT_NOTFOUND) + *bp = (void*)(uintptr_t)-1; // now enqueued + else if (!s->incremental || !immediate || !recursive || *bp != (void*)(uintptr_t)-1) + return; - if (immediate) + if (immediate) { + *bp = (void*)(uintptr_t)-2; // now immediate jl_insert_into_serialization_queue(s, v, recursive, immediate); - else + } + else { arraylist_push(&object_worklist, (void*)v); + } } // Do a pre-order traversal of the to-serialize worklist, in the identical order @@ -1101,8 +1102,10 @@ static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t o if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) { if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld))) arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset); - else + else if (jl_is_method_instance(fld)) arraylist_push(&s->uniquing_objs, (void*)(uintptr_t)offset); + else + assert(0 && "unknown object type with needs_uniquing set"); } } @@ -1301,7 +1304,15 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED write_pointerfield(s, (jl_value_t*)mi->sparam_vals); continue; } - else if (!jl_is_datatype(v)) { + else if (jl_is_datatype(v)) { + for (size_t i = 0; i < s->uniquing_super.len; i++) { + if (s->uniquing_super.items[i] == (void*)v) { + s->uniquing_super.items[i] = arraylist_pop(&s->uniquing_super); + arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(reloc_offset|3)); + } + } + } + else { assert(jl_is_datatype_singleton(t) && "unreachable"); } } @@ -1698,6 +1709,9 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED ios_write(s->const_data, (char*)&dyn, sizeof(jl_fielddescdyn_t)); } } + void *superidx = ptrhash_get(&serialization_order, dt->super); + if (s->incremental && superidx != HT_NOTFOUND && (char*)superidx - 1 - (char*)HT_NOTFOUND > item && needs_uniquing((jl_value_t*)dt->super)) + arraylist_push(&s->uniquing_super, dt->super); } else if (jl_is_typename(v)) { assert(f == s->s); @@ -1741,6 +1755,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED } } } + assert(s->uniquing_super.len == 0); } // In deserialization, create Symbols and set up the @@ -2562,7 +2577,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2)); } htable_new(&serialization_order, 25000); - htable_new(&unique_ready, 0); htable_new(&nullptrs, 0); arraylist_new(&object_worklist, 0); arraylist_new(&serialization_queue, 0); @@ -2587,6 +2601,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, arraylist_new(&s.relocs_list, 0); arraylist_new(&s.gctags_list, 0); arraylist_new(&s.uniquing_types, 0); + arraylist_new(&s.uniquing_super, 0); arraylist_new(&s.uniquing_objs, 0); arraylist_new(&s.fixup_types, 0); arraylist_new(&s.fixup_objs, 0); @@ -2838,6 +2853,11 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, arraylist_free(&object_worklist); arraylist_free(&serialization_queue); arraylist_free(&layout_table); + arraylist_free(&s.uniquing_types); + arraylist_free(&s.uniquing_super); + arraylist_free(&s.uniquing_objs); + arraylist_free(&s.fixup_types); + arraylist_free(&s.fixup_objs); arraylist_free(&s.ccallable_list); arraylist_free(&s.memowner_list); arraylist_free(&s.memref_list); @@ -2849,7 +2869,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, if (worklist) htable_free(&external_objects); htable_free(&serialization_order); - htable_free(&unique_ready); htable_free(&nullptrs); htable_free(&symbol_table); htable_free(&fptr_to_id); @@ -3220,31 +3239,43 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl uintptr_t item = (uintptr_t)s.uniquing_types.items[i]; // check whether we are operating on the typetag // (needing to ignore GC bits) or a regular field - int tag = (item & 1) == 1; - // check whether this is a gvar index - int gvar = (item & 2) == 2; + // and check whether this is a gvar index + int tag = (item & 3); item &= ~(uintptr_t)3; uintptr_t *pfld; jl_value_t **obj, *newobj; - if (gvar) { + if (tag == 3) { + obj = (jl_value_t**)(image_base + item); + pfld = NULL; + for (size_t i = 0; i < delay_list.len; i += 2) { + if (obj == (jl_value_t **)delay_list.items[i + 0]) { + pfld = (uintptr_t*)delay_list.items[i + 1]; + delay_list.items[i + 1] = arraylist_pop(&delay_list); + delay_list.items[i + 0] = arraylist_pop(&delay_list); + break; + } + } + assert(pfld); + } + else if (tag == 2) { if (image->gvars_base == NULL) continue; item >>= 2; assert(item < s.gvar_record->size / sizeof(reloc_t)); pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item); obj = *(jl_value_t***)pfld; - assert(tag == 0); } else { pfld = (uintptr_t*)(image_base + item); - if (tag) + if (tag == 1) obj = (jl_value_t**)jl_typeof(jl_valueof(pfld)); else obj = *(jl_value_t***)pfld; if ((char*)obj > (char*)pfld) { + // this must be the super field assert(tag == 0); - arraylist_push(&delay_list, pfld); arraylist_push(&delay_list, obj); + arraylist_push(&delay_list, pfld); ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid *pfld = (uintptr_t)NULL; continue; @@ -3294,25 +3325,14 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl assert(newobj && newobj != jl_nothing); arraylist_push(&cleanup_list, (void*)obj); } - if (tag) + if (tag == 1) *pfld = (uintptr_t)newobj | GC_OLD | GC_IN_IMAGE; else *pfld = (uintptr_t)newobj; assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg)); assert(jl_typetagis(obj, otyp)); } - // A few fields (reached via super) might be self-recursive. This is rare, but handle them now. - // They cannot be instances though, since the type must fully exist before the singleton field can be allocated - for (size_t i = 0; i < delay_list.len; ) { - uintptr_t *pfld = (uintptr_t*)delay_list.items[i++]; - jl_value_t **obj = (jl_value_t **)delay_list.items[i++]; - assert(jl_is_datatype(obj)); - jl_datatype_t *dt = (jl_datatype_t*)obj[0]; - assert(jl_is_datatype(dt)); - jl_value_t *newobj = (jl_value_t*)dt; - *pfld = (uintptr_t)newobj; - assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg)); - } + assert(delay_list.len == 0); arraylist_free(&delay_list); // now that all the fields of dt are assigned and unique, copy them into // their final newdt memory location: this ensures we do not accidentally @@ -3360,11 +3380,12 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl for (size_t i = 0; i < s.uniquing_objs.len; i++) { uintptr_t item = (uintptr_t)s.uniquing_objs.items[i]; // check whether this is a gvar index - int gvar = (item & 2) == 2; + int tag = (item & 3); + assert(tag == 0 || tag == 2); item &= ~(uintptr_t)3; uintptr_t *pfld; jl_value_t **obj, *newobj; - if (gvar) { + if (tag == 2) { if (image->gvars_base == NULL) continue; item >>= 2; diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 35233cf0795f0..9fded902df0bd 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -45,16 +45,15 @@ int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeo jl_datatype_t *dt = (jl_datatype_t*)t; assert(jl_object_in_image((jl_value_t*)dt->name) && "type_in_worklist mistake?"); jl_datatype_t *super = dt->super; - // check if super is news, since then we must be new also - // (it is also possible that super is indeterminate now, wait for `t` - // to be resolved, then will be determined later and fixed up by the - // delay_list, for this and any other references to it). - while (super != jl_any_type) { - assert(super); + // fast-path: check if super is in news, since then we must be new also + // (it is also possible that super is indeterminate or NULL right now, + // waiting for `t` to be resolved, then will be determined later as + // soon as possible afterwards). + while (super != NULL && super != jl_any_type) { if (ptrhash_has(news, (void*)super)) return 1; if (!(image_base < (char*)super && (char*)super <= image_base + sizeof_sysimg)) - break; // fast-path for rejection of super + break; // the rest must all be non-new // otherwise super might be something that was not cached even though a later supertype might be // for example while handling `Type{Mask{4, U} where U}`, if we have `Mask{4, U} <: AbstractSIMDVector{4}` super = super->super; diff --git a/test/precompile.jl b/test/precompile.jl index e655dd03b9798..a717d543bc0ce 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -115,6 +115,8 @@ precompile_test_harness(false) do dir d = den(a) return h end + abstract type AbstractAlgebraMap{A} end + struct GAPGroupHomomorphism{A, B} <: AbstractAlgebraMap{GAPGroupHomomorphism{B, A}} end end """) write(Foo2_file, @@ -130,7 +132,7 @@ precompile_test_harness(false) do dir write(Foo_file, """ module $Foo_module - import $FooBase_module, $FooBase_module.typeA + import $FooBase_module, $FooBase_module.typeA, $FooBase_module.GAPGroupHomomorphism import $Foo2_module: $Foo2_module, override, overridenc import $FooBase_module.hash import Test @@ -213,6 +215,8 @@ precompile_test_harness(false) do dir Base.convert(::Type{Some{Value18343}}, ::Value18343{Some}) = 2 Base.convert(::Type{Ref}, ::Value18343{T}) where {T} = 3 + const GAPType1 = GAPGroupHomomorphism{Nothing, Nothing} + const GAPType2 = GAPGroupHomomorphism{1, 2} # issue #28297 mutable struct Result