From 958da95647da5f9a824f99cb56846eed22a4d098 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 7 Aug 2023 17:48:25 -0400 Subject: [PATCH] print feature flags used for matching pkgimage (#50172) ``` julia> @ccall jl_dump_host_cpu()::Cvoid CPU: znver2 Features: sse3, pclmul, ssse3, fma, cx16, sse4.1, sse4.2, movbe, popcnt, aes, xsave, avx, f16c, rdrnd, fsgsbase, bmi, avx2, bmi2, rdseed, adx, clflushopt, clwb, sha, rdpid, sahf, lzcnt, sse4a, prfchw, mwaitx, xsaveopt, xsavec, xsaves, clzero, wbnoinvd julia> target = only(Base.current_image_targets()) znver2; flags=0; features_en=(sse3, pclmul, ssse3, fma, cx16, sse4.1, sse4.2, movbe, popcnt, aes, xsave, avx, f16c, fsgsbase, bmi, avx2, bmi2, adx, clflushopt, clwb, sha, rdpid, sahf, lzcnt, sse4a, prfchw, mwaitx, xsavec, xsaves, clzero, wbnoinvd) ``` Co-authored-by: Prem Chintalapudi Co-authored-by: Jameson Nash --- base/loading.jl | 99 +++++++++++++++++++++++++++++++++++--- src/processor.cpp | 79 ++++++++++++++++++++++++------ src/processor.h | 11 ++++- src/processor_arm.cpp | 21 +++++--- src/processor_fallback.cpp | 17 +++++-- src/processor_x86.cpp | 20 +++++--- 6 files changed, 206 insertions(+), 41 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 6ab2a1cd53010..b7fe40bf01ccb 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2841,11 +2841,9 @@ get_compiletime_preferences(m::Module) = get_compiletime_preferences(PkgId(m).uu get_compiletime_preferences(::Nothing) = String[] function check_clone_targets(clone_targets) - try - ccall(:jl_check_pkgimage_clones, Cvoid, (Ptr{Cchar},), clone_targets) - return true - catch - return false + rejection_reason = ccall(:jl_check_pkgimage_clones, Any, (Ptr{Cchar},), clone_targets) + if rejection_reason !== nothing + return rejection_reason end end @@ -2877,6 +2875,88 @@ function show(io::IO, cf::CacheFlags) print(io, ", opt_level = ", cf.opt_level) end +struct ImageTarget + name::String + flags::Int32 + ext_features::String + features_en::Vector{UInt8} + features_dis::Vector{UInt8} +end + +function parse_image_target(io::IO) + flags = read(io, Int32) + nfeature = read(io, Int32) + feature_en = read(io, 4*nfeature) + feature_dis = read(io, 4*nfeature) + name_len = read(io, Int32) + name = String(read(io, name_len)) + ext_features_len = read(io, Int32) + ext_features = String(read(io, ext_features_len)) + ImageTarget(name, flags, ext_features, feature_en, feature_dis) +end + +function parse_image_targets(targets::Vector{UInt8}) + io = IOBuffer(targets) + ntargets = read(io, Int32) + targets = Vector{ImageTarget}(undef, ntargets) + for i in 1:ntargets + targets[i] = parse_image_target(io) + end + return targets +end + +function current_image_targets() + targets = @ccall jl_reflect_clone_targets()::Vector{UInt8} + return parse_image_targets(targets) +end + +struct FeatureName + name::Cstring + bit::UInt32 # bit index into a `uint32_t` array; + llvmver::UInt32 # 0 if it is available on the oldest LLVM version we support +end + +function feature_names() + fnames = Ref{Ptr{FeatureName}}() + nf = Ref{Csize_t}() + @ccall jl_reflect_feature_names(fnames::Ptr{Ptr{FeatureName}}, nf::Ptr{Csize_t})::Cvoid + if fnames[] == C_NULL + @assert nf[] == 0 + return Vector{FeatureName}(undef, 0) + end + Base.unsafe_wrap(Array, fnames[], nf[], own=false) +end + +function test_feature(features::Vector{UInt8}, feat::FeatureName) + bitidx = feat.bit + u8idx = div(bitidx, 8) + 1 + bit = bitidx % 8 + return (features[u8idx] & (1 << bit)) != 0 +end + +function show(io::IO, it::ImageTarget) + print(io, it.name) + if !isempty(it.ext_features) + print(io, ",", it.ext_features) + end + print(io, "; flags=", it.flags) + print(io, "; features_en=(") + first = true + for feat in feature_names() + if test_feature(it.features_en, feat) + name = Base.unsafe_string(feat.name) + if first + first = false + print(io, name) + else + print(io, ", ", name) + end + end + end + print(io, ")") + # Is feature_dis useful? +end + # Set by FileWatching.__init__() global mkpidlock_hook global trymkpidlock_hook @@ -2914,7 +2994,6 @@ function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=300) f() end end - # returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey # otherwise returns the list of dependencies to also check @constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false) @@ -2948,8 +3027,12 @@ end @debug "Rejecting cache file $cachefile for $modkey since it would require usage of pkgimage" return true end - if !check_clone_targets(clone_targets) - @debug "Rejecting cache file $cachefile for $modkey since pkgimage can't be loaded on this target" + rejection_reasons = check_clone_targets(clone_targets) + if !isnothing(rejection_reasons) + @debug("Rejecting cache file $cachefile for $modkey:", + Reasons=rejection_reasons, + var"Image Targets"=parse_image_targets(clone_targets), + var"Current Targets"=current_image_targets()) return true end if !isfile(ocachefile) diff --git a/src/processor.cpp b/src/processor.cpp index 24a434af91ad3..9a602ba836f89 100644 --- a/src/processor.cpp +++ b/src/processor.cpp @@ -107,13 +107,13 @@ static inline bool test_nbit(const T1 &bits, T2 _bitidx) } template -static inline void unset_bits(T &bits) +static inline void unset_bits(T &bits) JL_NOTSAFEPOINT { (void)bits; } template -static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest) +static inline void unset_bits(T &bits, T1 _bitidx, Rest... rest) JL_NOTSAFEPOINT { auto bitidx = static_cast(_bitidx); auto u32idx = bitidx / 32; @@ -142,7 +142,7 @@ static inline void set_bit(T &bits, T1 _bitidx, bool val) template struct FeatureList { uint32_t eles[n]; - uint32_t &operator[](size_t pos) + uint32_t &operator[](size_t pos) JL_NOTSAFEPOINT { return eles[pos]; } @@ -297,12 +297,6 @@ static inline void append_ext_features(std::vector &features, * Target specific type/constant definitions, always enable. */ -struct FeatureName { - const char *name; - uint32_t bit; // bit index into a `uint32_t` array; - uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support -}; - template struct CPUSpec { const char *name; @@ -636,7 +630,13 @@ static inline jl_image_t parse_sysimg(void *hdl, F &&callback) jl_dlsym(hdl, "jl_image_pointers", (void**)&pointers, 1); const void *ids = pointers->target_data; - uint32_t target_idx = callback(ids); + jl_value_t* rejection_reason = nullptr; + JL_GC_PUSH1(&rejection_reason); + uint32_t target_idx = callback(ids, &rejection_reason); + if (target_idx == (uint32_t)-1) { + jl_throw(jl_new_struct(jl_errorexception_type, rejection_reason)); + } + JL_GC_POP(); if (pointers->header->version != 1) { jl_error("Image file is not compatible with this version of Julia"); @@ -855,17 +855,20 @@ struct SysimgMatch { // Find the best match in the sysimg. // Select the best one based on the largest vector register and largest compatible feature set. template -static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size) +static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_vector_size, jl_value_t **rejection_reason) { SysimgMatch match; bool match_name = false; int feature_size = 0; + std::vector rejection_reasons; + rejection_reasons.reserve(sysimg.size()); for (uint32_t i = 0; i < sysimg.size(); i++) { auto &imgt = sysimg[i]; if (!(imgt.en.features & target.dis.features).empty()) { // Check sysimg enabled features against runtime disabled features // This is valid (and all what we can do) // even if one or both of the targets are unknown. + rejection_reasons.push_back("Rejecting this target due to use of runtime-disabled features\n"); continue; } if (imgt.name == target.name) { @@ -876,25 +879,44 @@ static inline SysimgMatch match_sysimg_targets(S &&sysimg, T &&target, F &&max_v } } else if (match_name) { + rejection_reasons.push_back("Rejecting this target since another target has a cpu name match\n"); continue; } int new_vsz = max_vector_size(imgt.en.features); - if (match.vreg_size > new_vsz) + if (match.vreg_size > new_vsz) { + rejection_reasons.push_back("Rejecting this target since another target has a larger vector register size\n"); continue; + } int new_feature_size = imgt.en.features.nbits(); if (match.vreg_size < new_vsz) { match.best_idx = i; match.vreg_size = new_vsz; feature_size = new_feature_size; + rejection_reasons.push_back("Updating best match to this target due to larger vector register size\n"); continue; } - if (new_feature_size < feature_size) + if (new_feature_size < feature_size) { + rejection_reasons.push_back("Rejecting this target since another target has a larger feature set\n"); continue; + } match.best_idx = i; feature_size = new_feature_size; + rejection_reasons.push_back("Updating best match to this target\n"); + } + if (match.best_idx == (uint32_t)-1) { + // Construct a nice error message for debugging purposes + std::string error_msg = "Unable to find compatible target in cached code image.\n"; + for (size_t i = 0; i < rejection_reasons.size(); i++) { + error_msg += "Target "; + error_msg += std::to_string(i); + error_msg += " ("; + error_msg += sysimg[i].name; + error_msg += "): "; + error_msg += rejection_reasons[i]; + } + if (rejection_reason) + *rejection_reason = jl_pchar_to_string(error_msg.data(), error_msg.size()); } - if (match.best_idx == (uint32_t)-1) - jl_error("Unable to find compatible target in system image."); return match; } @@ -946,3 +968,30 @@ static inline void dump_cpu_spec(uint32_t cpu, const FeatureList &features, #include "processor_fallback.cpp" #endif + +extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets() { + auto specs = jl_get_llvm_clone_targets(); + const uint32_t base_flags = 0; + std::vector data; + auto push_i32 = [&] (uint32_t v) { + uint8_t buff[4]; + memcpy(buff, &v, 4); + data.insert(data.end(), buff, buff + 4); + }; + push_i32(specs.size()); + for (uint32_t i = 0; i < specs.size(); i++) { + push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME)); + auto &specdata = specs[i].data; + data.insert(data.end(), specdata.begin(), specdata.end()); + } + + jl_value_t *arr = (jl_value_t*)jl_alloc_array_1d(jl_array_uint8_type, data.size()); + uint8_t *out = (uint8_t*)jl_array_data(arr); + memcpy(out, data.data(), data.size()); + return arr; +} + +extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **fnames, size_t *nf) { + *fnames = feature_names; + *nf = nfeature_names; +} diff --git a/src/processor.h b/src/processor.h index 2255cf4c10daa..74610cbe64b28 100644 --- a/src/processor.h +++ b/src/processor.h @@ -226,7 +226,7 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_features(void); // Dump the name and feature set of the host CPU // For debugging only JL_DLLEXPORT void jl_dump_host_cpu(void); -JL_DLLEXPORT void jl_check_pkgimage_clones(char* data); +JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char* data); JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero); JL_DLLEXPORT int32_t jl_get_zero_subnormals(void); @@ -274,6 +274,15 @@ struct jl_target_spec_t { extern "C" JL_DLLEXPORT std::vector jl_get_llvm_clone_targets(void) JL_NOTSAFEPOINT; std::string jl_get_cpu_name_llvm(void) JL_NOTSAFEPOINT; std::string jl_get_cpu_features_llvm(void) JL_NOTSAFEPOINT; + +struct FeatureName { + const char *name; + uint32_t bit; // bit index into a `uint32_t` array; + uint32_t llvmver; // 0 if it is available on the oldest LLVM version we support +}; + +extern "C" JL_DLLEXPORT jl_value_t* jl_reflect_clone_targets(); +extern "C" JL_DLLEXPORT void jl_reflect_feature_names(const FeatureName **feature_names, size_t *nfeatures); #endif #endif diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 0a8090a8a6d9c..f47d1509f4975 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -1561,7 +1561,7 @@ static int max_vector_size(const FeatureList &features) #endif } -static uint32_t sysimg_init_cb(const void *id) +static uint32_t sysimg_init_cb(const void *id, jl_value_t **rejection_reason) { // First see what target is requested for the JIT. auto &cmdline = get_cmdline_targets(); @@ -1573,7 +1573,9 @@ static uint32_t sysimg_init_cb(const void *id) t.name = nname; } } - auto match = match_sysimg_targets(sysimg, target, max_vector_size); + auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason); + if (match.best_idx == -1) + return match.best_idx; // Now we've decided on which sysimg version to use. // Make sure the JIT target is compatible with it and save the JIT target. if (match.vreg_size != max_vector_size(target.en.features) && @@ -1586,7 +1588,7 @@ static uint32_t sysimg_init_cb(const void *id) return match.best_idx; } -static uint32_t pkgimg_init_cb(const void *id) +static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason JL_REQUIRE_ROOTED_SLOT) { TargetData target = jit_targets.front(); auto pkgimg = deserialize_target_data((const uint8_t*)id); @@ -1595,8 +1597,7 @@ static uint32_t pkgimg_init_cb(const void *id) t.name = nname; } } - auto match = match_sysimg_targets(pkgimg, target, max_vector_size); - + auto match = match_sysimg_targets(pkgimg, target, max_vector_size, rejection_reason); return match.best_idx; } @@ -1823,9 +1824,15 @@ jl_image_t jl_init_processor_pkgimg(void *hdl) return parse_sysimg(hdl, pkgimg_init_cb); } -JL_DLLEXPORT void jl_check_pkgimage_clones(char *data) +JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data) { - pkgimg_init_cb(data); + jl_value_t *rejection_reason = NULL; + JL_GC_PUSH1(&rejection_reason); + uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason); + JL_GC_POP(); + if (match_idx == (uint32_t)-1) + return rejection_reason; + return jl_nothing; } std::pair> jl_get_llvm_target(bool imaging, uint32_t &flags) diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp index d50edc8e9b621..08a4274f44ac8 100644 --- a/src/processor_fallback.cpp +++ b/src/processor_fallback.cpp @@ -2,6 +2,9 @@ // Fallback processor detection and dispatch +static constexpr FeatureName *feature_names = nullptr; +static constexpr uint32_t nfeature_names = 0; + namespace Fallback { static inline const std::string &host_cpu_name() @@ -33,7 +36,7 @@ static TargetData<1> arg_target_data(const TargetData<1> &arg, bool require_host return res; } -static uint32_t sysimg_init_cb(const void *id) +static uint32_t sysimg_init_cb(const void *id, jl_value_t **rejection_reason) { // First see what target is requested for the JIT. auto &cmdline = get_cmdline_targets(); @@ -51,7 +54,7 @@ static uint32_t sysimg_init_cb(const void *id) return best_idx; } -static uint32_t pkgimg_init_cb(const void *id) +static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason) { TargetData<1> target = jit_targets.front(); // Find the last name match or use the default one. @@ -175,9 +178,15 @@ JL_DLLEXPORT void jl_dump_host_cpu(void) jl_safe_printf("Features: %s\n", jl_get_cpu_features_llvm().c_str()); } -JL_DLLEXPORT void jl_check_pkgimage_clones(char *data) +JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data) { - pkgimg_init_cb(data); + jl_value_t *rejection_reason = NULL; + JL_GC_PUSH1(&rejection_reason); + uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason); + JL_GC_POP(); + if (match_idx == (uint32_t)-1) + return rejection_reason; + return jl_nothing; } extern "C" int jl_test_cpu_feature(jl_cpu_feature_t) diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp index b9e7d8c0f0daf..73e0992bcf37c 100644 --- a/src/processor_x86.cpp +++ b/src/processor_x86.cpp @@ -840,7 +840,7 @@ static int max_vector_size(const FeatureList &features) return 16; } -static uint32_t sysimg_init_cb(const void *id) +static uint32_t sysimg_init_cb(const void *id, jl_value_t** rejection_reason) { // First see what target is requested for the JIT. auto &cmdline = get_cmdline_targets(); @@ -868,7 +868,9 @@ static uint32_t sysimg_init_cb(const void *id) "virtualized environment. Please read " "https://docs.julialang.org/en/v1/devdocs/sysimg/ for more."); } - auto match = match_sysimg_targets(sysimg, target, max_vector_size); + auto match = match_sysimg_targets(sysimg, target, max_vector_size, rejection_reason); + if (match.best_idx == (uint32_t)-1) + return match.best_idx; // Now we've decided on which sysimg version to use. // Make sure the JIT target is compatible with it and save the JIT target. if (match.vreg_size != max_vector_size(target.en.features) && @@ -884,7 +886,7 @@ static uint32_t sysimg_init_cb(const void *id) return match.best_idx; } -static uint32_t pkgimg_init_cb(const void *id) +static uint32_t pkgimg_init_cb(const void *id, jl_value_t **rejection_reason) { TargetData target = jit_targets.front(); auto pkgimg = deserialize_target_data((const uint8_t*)id); @@ -893,7 +895,7 @@ static uint32_t pkgimg_init_cb(const void *id) t.name = nname; } } - auto match = match_sysimg_targets(pkgimg, target, max_vector_size); + auto match = match_sysimg_targets(pkgimg, target, max_vector_size, rejection_reason); return match.best_idx; } @@ -1032,9 +1034,15 @@ JL_DLLEXPORT void jl_dump_host_cpu(void) cpus, ncpu_names); } -JL_DLLEXPORT void jl_check_pkgimage_clones(char *data) +JL_DLLEXPORT jl_value_t* jl_check_pkgimage_clones(char *data) { - pkgimg_init_cb(data); + jl_value_t *rejection_reason = NULL; + JL_GC_PUSH1(&rejection_reason); + uint32_t match_idx = pkgimg_init_cb(data, &rejection_reason); + JL_GC_POP(); + if (match_idx == (uint32_t)-1) + return rejection_reason; + return jl_nothing; } JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void)