Skip to content

Commit

Permalink
Encode link_id in tagged linkage
Browse files Browse the repository at this point in the history
On 64-bit, we have enough space to encode (1) the tag, (2) the
`depmods` index, and (3) the offset all in a single 64-bit pointer
field. This means we don't need the external `link_id` arrays,
which reduces the size of many pkgimages by ~5%.

On 32-bit, we don't have enough bits to implement this strategy.
However, most linkages seem to be against the sysimage, and so
by giving that a separate tag we can achieve similar compression
because the `link_id` lists will be much shorter.
  • Loading branch information
timholy authored and vtjnash committed Feb 14, 2023
1 parent 6976bac commit 20be3c6
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 25 deletions.
117 changes: 92 additions & 25 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,12 @@ typedef struct {
arraylist_t fixup_types; // a list of locations of types requiring (re)caching
arraylist_t fixup_objs; // a list of locations of objects requiring (re)caching
arraylist_t ccallable_list; // @ccallable entry points to install
#ifdef _P64
// On 64-bit, we can encode the "`depmods` index" in the reftag, so all we need is a mapping between
// the buildid_idx & depmods_idx:
jl_array_t *buildid_depmods_idxs;
// On 64bit, all the `link_ids_*` arrays below will be NULL.
#endif
// record of build_ids for all external linkages, in order of serialization for the current sysimg/pkgimg
// conceptually, the base pointer for the jth externally-linked item is determined from
// i = findfirst(==(link_ids[j]), jl_build_ids)
Expand Down Expand Up @@ -386,7 +392,8 @@ enum RefTags {
SymbolRef, // symbols
FunctionRef, // generic functions
BuiltinFunctionRef, // builtin functions
ExternalLinkage // items defined externally (used when serializing packages)
SysimageLinkage, // reference to the sysimage (from pkgimage)
ExternalLinkage // reference to some other pkgimage
};

// calling conventions for internal entry points.
Expand All @@ -404,9 +411,11 @@ typedef enum {

#ifdef _P64
#define RELOC_TAG_OFFSET 61
#define DEPS_IDX_OFFSET 40 // only on 64-bit can we encode the dependency-index as part of the tagged reloc
#else
// this supports up to 8 RefTags, 512MB of pointer data, and 4/2 (64/32-bit) GB of constant data.
#define RELOC_TAG_OFFSET 29
#define DEPS_IDX_OFFSET RELOC_TAG_OFFSET
#endif

#if RELOC_TAG_OFFSET <= 32
Expand Down Expand Up @@ -758,7 +767,7 @@ done_fields: ;
}
arraylist_push(&serialization_queue, (void*) v);
size_t idx = serialization_queue.len - 1;
assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");
assert(serialization_queue.len < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "too many items to serialize");

*bp = (void*)((char*)HT_NOTFOUND + 1 + idx);
}
Expand Down Expand Up @@ -863,29 +872,40 @@ static void write_pointer(ios_t *s) JL_NOTSAFEPOINT
static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) {
size_t i = external_blob_index(v);
if (i < n_linkage_blobs()) {
// We found the sysimg/pkg that this item links against
// Compute the relocation code
size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i];
offset /= sizeof(void*);
assert(offset < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to external image too large");

if (i == 0)
return ((uintptr_t)SysimageLinkage << RELOC_TAG_OFFSET) + offset; // sysimage
assert(link_ids && jl_is_array(link_ids));
#ifdef _P64
uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids);
return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET)
+ (((uintptr_t)link_id_data[i]) << DEPS_IDX_OFFSET) + offset; // on 64-bit, link_ids stores the mapping from build_id_idx -> deps_idx
#else
// On 32bit, we store the image key in `link_ids`
assert(jl_build_ids && jl_is_array(jl_build_ids));
uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids);
// We found the sysimg/pkg that this item links against
// Store the image key in `link_ids`
jl_array_grow_end(link_ids, 1);
uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids);
uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids); // wait until after the `grow`
link_id_data[jl_array_len(link_ids)-1] = build_id_data[i];
// Compute the relocation code
size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i];
offset /= sizeof(void*);
assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to external image too large");
// jl_printf(JL_STDOUT, "External link %ld against blob %d with key %ld at position 0x%lx with offset 0x%lx to \n", jl_array_len(link_ids), i, build_id_data[i>>1], ios_pos(s->s), offset);
// jl_(v);
return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET) + offset;
#endif
}
return 0;
}

// Return the integer `id` for `v`. Generically this is looked up in `serialization_order`,
// but symbols, small integers, and a couple of special items (`nothing` and the root Task)
// have special handling.
#ifdef _P64
#define backref_id(s, v, link_ids) _backref_id(s, (jl_value_t*)(v), s->buildid_depmods_idxs)
#else
#define backref_id(s, v, link_ids) _backref_id(s, (jl_value_t*)(v), link_ids)
#endif
static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_NOTSAFEPOINT
{
assert(v != NULL && "cannot get backref to NULL object");
Expand All @@ -898,7 +918,7 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *
write_uint32(s->symbols, l);
ios_write(s->symbols, jl_symbol_name((jl_sym_t*)v), l + 1);
size_t offset = ++nsym_tag;
assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many symbols");
assert(offset < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "too many symbols");
idx = (void*)((char*)HT_NOTFOUND + ((uintptr_t)SymbolRef << RELOC_TAG_OFFSET) + offset);
*pidx = idx;
}
Expand Down Expand Up @@ -1143,7 +1163,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
// write data and relocations
newa->data = NULL; // relocation offset
data /= sizeof(void*);
assert(data < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to constant data too large");
assert(data < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to constant data too large");
arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
if (jl_is_cpointer_type(et)) {
Expand Down Expand Up @@ -1246,7 +1266,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), 8);
write_padding(s->const_data, data - ios_pos(s->const_data));
data /= sizeof(void*);
assert(data < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to constant data too large");
assert(data < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to constant data too large");
arraylist_push(&s->relocs_list, (void*)(reloc_offset + 8)); // relocation location
arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + data)); // relocation target
void *pdata = jl_unbox_voidpointer(jl_get_nth_field(v, 2));
Expand Down Expand Up @@ -1533,6 +1553,8 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
case FunctionRef:
assert(offset < JL_API_MAX && "unknown function pointer id");
break;
case SysimageLinkage:
break;
case ExternalLinkage:
break;
default:
Expand Down Expand Up @@ -1599,15 +1621,25 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
//default:
assert("corrupt relocation item id");
}
case SysimageLinkage:
assert(jl_linkage_blobs.len > 0);
return (uintptr_t)jl_linkage_blobs.items[0] + offset*sizeof(void*);
case ExternalLinkage:
assert(link_ids);
assert(link_index);
assert(jl_build_ids);
uint64_t *link_id_data = (uint64_t*)jl_array_data(link_ids);
uint64_t *build_id_data = (uint64_t*)jl_array_data(jl_build_ids);
#ifdef _P64
size_t depsidx = offset >> DEPS_IDX_OFFSET;
offset &= ((size_t)1 << DEPS_IDX_OFFSET) - 1;
assert(depsidx < jl_array_len(link_ids)); // on 64-bit, here link_ids is a build_id lookup by depmod index
uint64_t build_id = link_id_data[depsidx];
#else
assert(link_index);
assert(0 <= *link_index && *link_index < jl_array_len(link_ids));
uint64_t build_id = link_id_data[*link_index];
*link_index += 1;
#endif
size_t i = 0, nids = jl_array_len(jl_build_ids);
while (i < nids) {
if (build_id == build_id_data[i])
Expand Down Expand Up @@ -1799,8 +1831,12 @@ static jl_value_t *jl_delayed_reloc(jl_serializer_state *s, uintptr_t offset) JL
uintptr_t base = (uintptr_t)&s->s->buf[0];
size_t size = s->s->size;
int link_index = 0;
#ifdef _P64
jl_value_t *ret = (jl_value_t*)get_item_for_reloc(s, base, size, offset, s->buildid_depmods_idxs, &link_index);
#else
jl_value_t *ret = (jl_value_t*)get_item_for_reloc(s, base, size, offset, s->link_ids_relocs, &link_index);
assert(link_index < jl_array_len(s->link_ids_relocs));
#endif
return ret;
}

Expand Down Expand Up @@ -1890,13 +1926,24 @@ static void jl_update_all_gvars(jl_serializer_state *s, jl_image_t *image, uint3
reloc_t *gvars = (reloc_t*)&s->gvar_record->buf[0];
int gvar_link_index = 0;
int external_fns_link_index = 0;
jl_array_t *link_ids = NULL;
for (i = 0; i < l; i++) {
uintptr_t offset = gvars[i];
uintptr_t v = 0;
if (i < external_fns_begin) {
v = get_item_for_reloc(s, base, size, offset, s->link_ids_gvars, &gvar_link_index);
#ifdef _P64
link_ids = s->buildid_depmods_idxs;
#else
link_ids = s->link_ids_gvars;
#endif
v = get_item_for_reloc(s, base, size, offset, link_ids, &gvar_link_index);
} else {
v = get_item_for_reloc(s, base, size, offset, s->link_ids_external_fnvars, &external_fns_link_index);
#ifdef _P64
link_ids = s->buildid_depmods_idxs;
#else
link_ids = s->link_ids_external_fnvars;
#endif
v = get_item_for_reloc(s, base, size, offset, link_ids, &external_fns_link_index);
}
uintptr_t *gv = sysimg_gvars(image->gvars_base, image->gvars_offsets, i);
*gv = v;
Expand Down Expand Up @@ -2217,7 +2264,7 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
}

// In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage
static void jl_save_system_image_to_stream(ios_t *f,
static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *udeps,
jl_array_t *worklist, jl_array_t *extext_methods,
jl_array_t *new_specializations, jl_array_t *method_roots_list,
jl_array_t *ext_targets, jl_array_t *edges) JL_GC_DISABLED
Expand Down Expand Up @@ -2265,10 +2312,15 @@ static void jl_save_system_image_to_stream(ios_t *f,
arraylist_new(&s.fixup_types, 0);
arraylist_new(&s.fixup_objs, 0);
arraylist_new(&s.ccallable_list, 0);
#ifdef _P64
s.buildid_depmods_idxs = image_to_depmodidx(jl_build_ids, udeps);
s.link_ids_relocs = s.link_ids_gctags = s.link_ids_gvars = s.link_ids_external_fnvars = NULL;
#else
s.link_ids_relocs = jl_alloc_array_1d(jl_array_uint64_type, 0);
s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, 0);
s.link_ids_gvars = jl_alloc_array_1d(jl_array_uint64_type, 0);
s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_uint64_type, 0);
#endif
htable_new(&s.callers_with_edges, 0);
jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL;

Expand Down Expand Up @@ -2367,21 +2419,21 @@ static void jl_save_system_image_to_stream(ios_t *f,
jl_write_relocations(&s);
}

if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
if (sysimg.size > ((uintptr_t)1 << DEPS_IDX_OFFSET)) {
jl_printf(
JL_STDERR,
"ERROR: system image too large: sysimg.size is %jd but the limit is %" PRIxPTR "\n",
(intmax_t)sysimg.size,
((uintptr_t)1 << RELOC_TAG_OFFSET)
((uintptr_t)1 << DEPS_IDX_OFFSET)
);
jl_exit(1);
}
if (const_data.size / sizeof(void*) > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
if (const_data.size / sizeof(void*) > ((uintptr_t)1 << DEPS_IDX_OFFSET)) {
jl_printf(
JL_STDERR,
"ERROR: system image too large: const_data.size is %jd but the limit is %" PRIxPTR "\n",
(intmax_t)const_data.size,
((uintptr_t)1 << RELOC_TAG_OFFSET)*sizeof(void*)
((uintptr_t)1 << DEPS_IDX_OFFSET)*sizeof(void*)
);
jl_exit(1);
}
Expand Down Expand Up @@ -2457,6 +2509,7 @@ static void jl_save_system_image_to_stream(ios_t *f,
jl_write_value(&s, ext_targets);
jl_write_value(&s, edges);
}
#ifndef _P64
write_uint32(f, jl_array_len(s.link_ids_gctags));
ios_write(f, (char*)jl_array_data(s.link_ids_gctags), jl_array_len(s.link_ids_gctags)*sizeof(uint64_t));
write_uint32(f, jl_array_len(s.link_ids_relocs));
Expand All @@ -2465,6 +2518,7 @@ static void jl_save_system_image_to_stream(ios_t *f,
ios_write(f, (char*)jl_array_data(s.link_ids_gvars), jl_array_len(s.link_ids_gvars)*sizeof(uint64_t));
write_uint32(f, jl_array_len(s.link_ids_external_fnvars));
ios_write(f, (char*)jl_array_data(s.link_ids_external_fnvars), jl_array_len(s.link_ids_external_fnvars)*sizeof(uint64_t));
#endif
write_uint32(f, external_fns_begin);
jl_write_arraylist(s.s, &s.ccallable_list);
}
Expand Down Expand Up @@ -2591,7 +2645,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
}
if (_native_data != NULL)
native_functions = *_native_data;
jl_save_system_image_to_stream(ff, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
jl_save_system_image_to_stream(ff, *udeps, worklist, extext_methods, new_specializations, method_roots_list, ext_targets, edges);
if (_native_data != NULL)
native_functions = NULL;
// make sure we don't run any Julia code concurrently before this point
Expand Down Expand Up @@ -2764,6 +2818,9 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
offset_ext_targets = jl_read_offset(&s);
offset_edges = jl_read_offset(&s);
}
#ifdef _P64
s.buildid_depmods_idxs = depmod_to_imageidx(depmods);
#else
size_t nlinks_gctags = read_uint32(f);
if (nlinks_gctags > 0) {
s.link_ids_gctags = jl_alloc_array_1d(jl_array_uint64_type, nlinks_gctags);
Expand All @@ -2784,6 +2841,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
s.link_ids_external_fnvars = jl_alloc_array_1d(jl_array_uint64_type, nlinks_external_fnvars);
ios_read(f, (char*)jl_array_data(s.link_ids_external_fnvars), nlinks_external_fnvars * sizeof(uint64_t));
}
#endif
uint32_t external_fns_begin = read_uint32(f);
jl_read_arraylist(s.s, ccallable_list ? ccallable_list : &s.ccallable_list);
if (s.incremental) {
Expand All @@ -2810,10 +2868,19 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
*base = image_base;

s.s = &sysimg;
jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD); // gctags
#ifdef _P64
jl_array_t *link_ids = NULL;
#else
jl_array_t *link_ids = s.link_ids_gctags;
#endif
jl_read_reloclist(&s, link_ids, GC_OLD); // gctags
size_t sizeof_tags = ios_pos(&relocs);
(void)sizeof_tags;
jl_read_reloclist(&s, s.link_ids_relocs, 0); // general relocs
#ifdef _P64
#else
link_ids = s.link_ids_relocs;
#endif
jl_read_reloclist(&s, link_ids, 0); // general relocs
// s.link_ids_gvars will be processed in `jl_update_all_gvars`
// s.link_ids_external_fns will be processed in `jl_update_all_gvars`
jl_update_all_gvars(&s, image, external_fns_begin); // gvars relocs
Expand Down
53 changes: 53 additions & 0 deletions src/staticdata_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -1231,3 +1231,56 @@ JL_DLLEXPORT uint64_t jl_read_verify_header(ios_t *s, uint8_t *pkgimage, int64_t
}
return checksum;
}

// Returns `depmodidxs` where `udeps[depmodidxs[i]]` corresponds to `build_ids[i]`
static jl_array_t *image_to_depmodidx(jl_array_t *build_ids, jl_array_t *udeps)
{
if (!udeps || !build_ids)
return NULL;
size_t j = 0, lbids = jl_array_len(build_ids), ldeps = jl_array_len(udeps);
uint64_t *bids = (uint64_t*)jl_array_data(build_ids);
jl_array_t *depmodidxs = jl_alloc_array_1d(jl_array_int32_type, lbids);
int32_t *dmidxs = (int32_t*)jl_array_data(depmodidxs);
for (size_t i = 0; i < lbids; i++) {
dmidxs[i] = -1;
uint64_t bid = bids[i];
j = 0; // sad that this is of O(M*N)
while (j < ldeps) {
jl_value_t *deptuple = jl_array_ptr_ref(udeps, j);
jl_module_t *depmod = (jl_module_t*)jl_fieldref(deptuple, 0); // evaluating module
jl_module_t *depmod_top = depmod;
while (depmod_top->parent != jl_main_module && depmod_top->parent != depmod_top)
depmod_top = depmod_top->parent;
if (depmod_top == jl_base_module) {
dmidxs[i] = 0;
break;
}
if (depmod_top->build_id.lo == bid) {
dmidxs[i] = j;
break;
}
j++;
}
assert(dmidxs[i] >= 0);
}
return depmodidxs;
}

// Returns `imageidxs` where `imageidxs[i]` is the blob corresponding to `depmods[i]`
static jl_array_t *depmod_to_imageidx(jl_array_t *depmods)
{
if (!depmods)
return NULL;
size_t ldeps = jl_array_len(depmods);
jl_array_t *imageidxs = jl_alloc_array_1d(jl_array_int32_type, ldeps);
int32_t *imgidxs = (int32_t*)jl_array_data(imageidxs);
for (size_t i = 0; i < ldeps; i++) {
imgidxs[i] = -1;
jl_value_t *depmod = jl_array_ptr_ref(depmods, i);
assert(jl_is_module(depmod));
size_t j = external_blob_index(depmod);
assert(j < 1<<31);
imgidxs[i] = (int32_t)j;
}
return imageidxs;
}

0 comments on commit 20be3c6

Please sign in to comment.