From 2a2cfdc0ff9ec5ebca14ae49ba4a377d14ed7ea4 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 25 Aug 2020 07:57:07 -0500 Subject: [PATCH] Add a generic code-inferrability test Closes #36393 --- base/reflection.jl | 15 ++ test/choosetests.jl | 2 +- test/inference_qa.jl | 216 ++++++++++++++++++++++++ test/runtests.jl | 4 + test/testhelpers/methodanalysis.jl | 260 +++++++++++++++++++++++++++++ 5 files changed, 496 insertions(+), 1 deletion(-) create mode 100644 test/inference_qa.jl create mode 100644 test/testhelpers/methodanalysis.jl diff --git a/base/reflection.jl b/base/reflection.jl index 1352776326936..bd959a853e6c7 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -753,6 +753,13 @@ function signature_type(@nospecialize(f), @nospecialize(args)) end end +function call_type(@nospecialize(tt)) + ft = tt.parameters[1] + argt = Tuple{tt.parameters[2:end]...} + name = Symbol(String(ft.name.name)[2:end]) # strip off leading '#' + return (getfield(ft.name.module, name), argt) +end + """ code_lowered(f, types; generated=true, debuginfo=:default) @@ -929,6 +936,14 @@ function visit(f, d::Core.TypeMapEntry) end nothing end +function visit(f, d::SimpleVector) + for i = 1:length(d) + if isassigned(d, i) + f(d[i]) + end + end + nothing +end function length(mt::Core.MethodTable) n = 0 diff --git a/test/choosetests.jl b/test/choosetests.jl index 914b199a7a852..73fbeb80bb8dd 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -55,7 +55,7 @@ function choosetests(choices = []) "boundscheck", "error", "ambiguous", "cartesian", "osutils", "channels", "iostream", "secretbuffer", "specificity", "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap", - "smallarrayshrink" + "smallarrayshrink", "inference_qa" ] tests = [] diff --git a/test/inference_qa.jl b/test/inference_qa.jl new file mode 100644 index 0000000000000..158e47e6d9657 --- /dev/null +++ b/test/inference_qa.jl @@ -0,0 +1,216 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +# The aim of these tests is to enforce a minimum quality of inferrability of Julia's +# Base and specifically its precompiled methods. Passing these tests does not +# indicate that Julia has no inference problems, but they are designed to catch some +# inadvertent problems. While `@inferred` only tests the return type, the tests +# in this file are designed to check the overall inference quality of method *internals*. + +# If you fail tests here, you can usually fix problems using `@code_warntype` or Cthulhu.jl's `@descend`. + +using Test + +isdefined(@__MODULE__, :is_atrisk_type) || include("testhelpers/methodanalysis.jl") + +## Test the testing tools + +@testset "is_atrisk_type" begin + @test is_atrisk_type(Tuple{typeof(==),Any,Any}) + @test is_atrisk_type(Tuple{typeof(==),Symbol,Any}) + @test is_atrisk_type(Tuple{typeof(==),Any,Symbol}) + @test !is_atrisk_type(Tuple{typeof(==),Symbol,Symbol}) + @test !is_atrisk_type(Tuple{typeof(convert),Type{Any},Any}) + @test !is_atrisk_type(Tuple{typeof(convert),Type{AbstractString},AbstractString}) + @test !is_atrisk_type(Tuple{typeof(convert),Type{AbstractString},String}) + @test is_atrisk_type(Tuple{typeof(convert),Type{String},AbstractString}) + @test !is_atrisk_type(Tuple{typeof(convert),Type{Union{Int,Float32}},Int}) + @test !is_atrisk_type(Tuple{typeof(convert),Type{Union{Int,Float32}},Int32}) + @test is_atrisk_type(Tuple{typeof(convert),Type{Union{Int,Float32}},Integer}) + @test !is_atrisk_type(Tuple{typeof(convert),Type{T} where T<:Union{Int,Float32},Int}) + @test !is_atrisk_type(Tuple{typeof(map),Function,Vector{Any}}) + @test !is_atrisk_type(Tuple{typeof(getindex),Dict{Union{String,Int},Any},Union{String,Int}}) + @test is_atrisk_type(Tuple{typeof(getindex),Dict{Union{String,Int},Any},Any}) + @test !is_atrisk_type(Tuple{Type{BoundsError},Any,Any}) + @test is_atrisk_type(Tuple{typeof(sin),Any}) + @test !is_atrisk_type(Tuple{typeof(length),Tuple{Any,Any}}) + @test is_atrisk_type(Tuple{typeof(setindex!),Vector{Int},Any,Int}) + @test !is_atrisk_type(Tuple{typeof(setindex!),Vector{Any},Any,Int}) + @test is_atrisk_type(Tuple{typeof(push!),Vector{Int},Any}) + @test !is_atrisk_type(Tuple{typeof(push!),Vector{Any},Any}) + @test !is_atrisk_type(Tuple{typeof(push!),Vector{T} where T,Any}) +end + +## Prepwork + +# Count # of backedges for MethodInstances with abstract types + +function get_atrisk_methodinstances() + mivulnerabilities = Pair{MethodInstance,Int}[] + for mi in methodinstances() + if !fromcc(mi.def.module) + if is_atrisk_type(mi.specTypes) + push!(mivulnerabilities, mi => length(all_backedges(mi))) + end + end + end + return sort!(mivulnerabilities; by=last) +end +mivulnerabilities = get_atrisk_methodinstances() + +# Get all the things that depends on these at-risk methodinstances +const atrisk_methodinstances = Set{MethodInstance}() +for (mi, c) in mivulnerabilities + isdefined(mi, :backedges) && all_backedges!(atrisk_methodinstances, mi) + push!(atrisk_methodinstances, mi) +end + +# Split into exported & private functions +mivulnerabilities_exported = Pair{MethodInstance,Int}[] +mivulnerabilities_private = similar(mivulnerabilities_exported) +for (mi, c) in mivulnerabilities + if isexported(mi) + push!(mivulnerabilities_exported, mi=>c) + else + push!(mivulnerabilities_private, mi=>c) + end +end + +## Tests + +@testset "Base.require vulnerabilities" begin + # Invalidating the code that loads packages leads to major slowdowns, especially if it happens repeatedly + # in a dependent chain of package loads. Ideally, we'd make this code-path "bulletproof". + for m in methods(Base.require) + @test isempty(remove_unlikely_methodinstances(atrisk_triggers(m, first.(mivulnerabilities_exported)))) + + # It's far less important to protect against invalidation of private functions, + # since generally packages should not extend these functions. Nevertheless it wouldn't + # be a bad thing. + @test_broken isempty(remove_unlikely_methodinstances(atrisk_triggers(m, first.(mivulnerabilities_private)))) + end +end + +# If you fail these tests, it may or may not be your fault---you may just be the +# one that pushed one of these tests over the edge. Check `badcounts` and `meancounts` +# before and after your changes; if the change is quite small, it's unlikely that your changes +# are the root cause. In such cases, failures here should be investigated or reported, +# but if inference problems can be fixed elsewhere you may not have to change your PR. +# Conversely, if your changes *do* increase these numbers substantially, your changes have likely +# triggered widespread inference problems---you should fix them before merging your PR. +# +# Never increase the thresholds here without public discussion. Indeed, the goal is to +# shrink them as much as possible. +@testset "Aggregate at-risk methodinstances" begin + # Test overall number of atrisk MethodInstances and their average number of backedges + badexp = Set(remove_unlikely_methodinstances(first.(mivulnerabilities_exported))) + badcounts = filter(pr->pr.first ∈ badexp, mivulnerabilities_exported) + @test length(badcounts) < 1250 # 1000 + if length(badcounts) < 800 + @info "There are now only $(length(badcounts)) at-risk specializations of exported methods, consider dropping the threshold" + end + meancounts = sum(last.(badcounts))/length(badcounts) + @test meancounts < 33 # 32 + if meancounts < 24 + @info "The mean number of at-risk backedges is now only $meancounts, consider dropping the threshold" + end +end + +@testset "Specific return types" begin + # All the is* functions + # Not all of the broken cases have been checked carefully; it's possible some of these should return + # `Union{Bool,Missing}` or something. + @test function_returns(isabspath, Bool) + @test function_returns(isabstracttype, Bool) + @test_broken function_returns(isapprox, Bool) + @test_broken function_returns(isascii, Bool) + # @test function_returns(isassigned, Bool) + @test function_returns(isbits, Bool) + @test function_returns(isbitstype, Bool) + @test function_returns(isblockdev, Bool) + @test function_returns(ischardev, Bool) + @test function_returns(iscntrl, Bool) + @test function_returns(isconcretetype, Bool) + @test function_returns(isconst, Bool) + @test function_returns(isdefined, Bool) + @test function_returns(isdigit, Bool) + @test function_returns(isdir, Bool) + @test function_returns(isdirpath, Bool) + @test_broken function_returns(isdisjoint, Bool) + @test function_returns(isdispatchtuple, Bool) + @test_broken function_returns(isempty, Bool) + @test_broken function_returns(isequal, Bool; minargs=2) + @test_broken function_returns(iseven, Bool) + @test function_returns(isexported, Bool) + @test function_returns(isfifo, Bool) + @test function_returns(isfile, Bool) + @test_broken function_returns(isfinite, Bool) + @test_broken function_returns(isinf, Bool) + @test_broken function_returns(isinteger, Bool) + @test function_returns(isinteractive, Bool) + @test_broken function_returns(isless, Bool) + @test function_returns(isletter, Bool) + @test function_returns(islink, Bool) + @test function_returns(islocked, Bool) + @test function_returns(islowercase, Bool) + @test_broken function_returns(ismarked, Bool) + @test function_returns(ismissing, Bool) + @test function_returns(ismount, Bool) + @test function_returns(ismutable, Bool) + @test function_returns(isnan, Bool) + @test function_returns(isnothing, Bool) + @test function_returns(isnumeric, Bool) + @test_broken function_returns(isodd, Bool) + @test_broken function_returns(isone, Bool) + @test_broken function_returns(isopen, Bool) + @test function_returns(ispath, Bool) + @test_broken function_returns(isperm, Bool) + @test_broken function_returns(ispow2, Bool) + @test function_returns(isprimitivetype, Bool) + @test function_returns(isprint, Bool) + @test function_returns(ispunct, Bool) + @test_broken function_returns(isreadable, Bool) + @test_broken function_returns(isreadonly, Bool) + @test_broken function_returns(isready, Bool) + @test_broken function_returns(isreal, Bool) + @test function_returns(issetequal, Bool) + @test function_returns(issetgid, Bool) + @test function_returns(issetuid, Bool) + @test function_returns(issocket, Bool) + @test_broken function_returns(issorted, Bool) + @test function_returns(isspace, Bool) + @test function_returns(issticky, Bool) + @test function_returns(isstructtype, Bool) + @test_broken function_returns(issubnormal, Bool) + @test_broken function_returns(issubset, Bool) + @test function_returns(istaskdone, Bool) + @test function_returns(istaskfailed, Bool) + @test function_returns(istaskstarted, Bool) + @test_broken function_returns(istextmime, Bool) + @test function_returns(isuppercase, Bool) + @test_broken function_returns(isvalid, Bool) + @test_broken function_returns(iswritable, Bool) + @test function_returns(isxdigit, Bool) + @test_broken function_returns(iszero, Bool) + + @test function_returns(eof, Bool) +end + +@testset "Never inferred" begin + # Check that we never infer certain methodinstances, + # It would be great to broaden this beyond Real, but this is a good start. + # If you fail these tests, it means an internal operation forced + # the compiler to infer one of these methods for a problematic combination of types. + function subtype_real(@nospecialize T) + while isa(T, TypeVar) + T = T.ub + end + return T<:Real + end + for f in (==, isequal, <, <=) + for mi in methodinstances(f) + if any(subtype_real, Base.unwrap_unionall(mi.specTypes).parameters) + @test !is_atrisk_type(mi.specTypes) + end + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 205d15767dc3e..2b595eb98597c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -77,6 +77,10 @@ linalg_tests = tests[linalg_test_ids] deleteat!(tests, linalg_test_ids) prepend!(tests, linalg_tests) +# do inference_qa at the beginning (in a fresh session) to avoid trouble from specializations +# introduced by running other tests +filtertests!(tests, "inference_qa") + import LinearAlgebra cd(@__DIR__) do n = 1 diff --git a/test/testhelpers/methodanalysis.jl b/test/testhelpers/methodanalysis.jl new file mode 100644 index 0000000000000..6ce14cbd42852 --- /dev/null +++ b/test/testhelpers/methodanalysis.jl @@ -0,0 +1,260 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +using Base: Callable, IdSet +using Core: MethodInstance + +""" + is_atrisk_type(tt) + +Given a Tuple-type signature (e.g., `Tuple{typeof(sum),Vector{Int}}`), determine whether this signature +is "at risk" for invalidation. Essentially it returns `true` if one or more arguments are of abstract type, +although there are prominent exceptions: + +- Constructor calls with arbitrary argument types +- `convert(X, x)` where `isa(x, X)` +- `setindex!` and `push!` methods where the valtype is a subtype of the eltype (for AbstractDicts, likewise for the keytype) +- `getindex`, `length`, `isempty`, and `iterate` on any tuple + +All of these are "allowed," meaning that they return `false`. +Moreover, some specific non-concrete argument types---like `Union`s of concrete types and `Function`--- +do not trigger a return of `true`, although other at-risk argument types can lead to an overall `true` return +for the signature. +""" +function is_atrisk_type(@nospecialize(typ)) + # signatures like `convert(Vector, a)`, `foo(::Vararg{Synbol,N}) where N` do not seem to pose a problem + isa(typ, TypeVar) && return false + # isbits parameters are not a problem + isa(typ, Type) || return false + if isa(typ, UnionAll) + typ = Base.unwrap_unionall(typ) + end + # Exclude signatures with Union{} + typ === Union{} && return false + isa(typ, Union) && return is_atrisk_type(typ.a) | is_atrisk_type(typ.b) + # Type{T}: signatures like `convert(::Type{AbstractString}, ::String)` are not problematic + typ <: Type && return false + if typ <: Tuple && length(typ.parameters) >= 1 + p1 = typ.parameters[1] + # Constructor calls are not themselves a problem (any `convert`s they trigger might be, but those are covered) + isa(p1, Type) && p1 <: Type && return false + # convert(::Type{T}, ::S) where S<:T is not problematic + if p1 === typeof(Base.convert) || p1 === typeof(Core.convert) + p2, p3 = typ.parameters[2], typ.parameters[3] + if isa(p2, Type) + p2 = Base.unwrap_unionall(p2) + if isa(p2, DataType) && length(p2.parameters) === 1 + T = p2.parameters[1] + if isa(T, TypeVar) + T = T.ub + end + isa(p3, Type) && isa(T, Type) && p3 <: T && return false + end + end + # `getindex`, `length`, etc are OK for various Tuple{T1,T2,...} + elseif p1 === typeof(Base.getindex) || + p1 === typeof(Base.length) || + p1 === typeof(Base.isempty) || + p1 === typeof(Base.iterate) || p1 === typeof(Core.iterate) + p2 = typ.parameters[2] + if isa(p2, Type) + p2 = Base.unwrap_unionall(p2) + p2 <: Tuple && return false + end + # show(io::IO, x) is OK as long as typeof(x) is safe + elseif p1 === typeof(Base.show) || p1 === typeof(Base.print) || p1 === typeof(Base.println) + # is_atrisk_type(typ.parameters[2]) && return true + for i = 3:length(typ.parameters) + is_atrisk_type(typ.parameters[i]) && return true + end + return false + # setindex!(a, x, idx), push!(a, x), and similar are safe if typeof(x) <: eltype(a) + elseif (p1 === typeof(Base.setindex!) || p1 === typeof(Base.push!) || p1 === typeof(Base.pushfirst!) || + p1 === typeof(Base.setproperty!)) && length(typ.parameters) >= 3 + p2, p3 = typ.parameters[2], typ.parameters[3] + (isa(p2, TypeVar) || isa(p3, TypeVar)) && return true + if p2 <: AbstractDict && length(typ.parameters) >= 4 + p4 = typ.parameters[4] + isa(p4, TypeVar) && return true + p3 <: safe_valtype(p2) && p4 <: safe_keytype(p2) && return false + p2 <: IdDict && return false # these are annotated @nospecialize + elseif p2 <: Base.RefValue && length(typ.parameters) >= 4 + p4 = typ.parameters[4] + isa(p4, TypeVar) && return true + p4 <: eltype(p2) && return false + else + p3 <: eltype(p2) && return false + end + # likewise for `get!` + elseif p1 === typeof(Base.get!) && length(typ.parameters) >= 4 + p3, p4 = typ.parameters[2], typ.parameters[4] + (isa(p3, TypeVar) || isa(p4, TypeVar)) && return true + if p3 <: AbstractDict + p4 <: safe_valtype(p3) && return false + p3 <: IdDict && return false # these are annotated @nospecialize + end + # cull some @nospecialize methods + elseif p1 === typeof(Base.which) || p1 === typeof(Base.hasmethod) || p1 === typeof(Base.rethrow) + return false + end + end + # Standard DataTypes + isconcretetype(typ) && return false + # ::Function args are excluded + typ === Function && return false + !isempty(typ.parameters) && (any(is_atrisk_type, typ.parameters) || return false) + return true +end + +safe_valtype(::Type{<:AbstractDict{K,V}}) where {K,V} = @isdefined(V) ? V : Union{} +safe_valtype(::Type) = Union{} +safe_keytype(::Type{<:AbstractDict{K,V}}) where {K,V} = @isdefined(K) ? K : Union{} +safe_keytype(::Type) = Union{} + +# Get the name of a method as written in the code. This strips keyword-method mangling. +function codename(sym::Symbol) + symstr = String(sym) + # Body methods + m = match(r"^#(.*?)#\d+$", symstr) + m !== nothing && return Symbol(only(m.captures)) + # kw methods + m = match(r"^(.*?)##kw$", symstr) + m !== nothing && return Symbol(only(m.captures)) + return sym +end + +isexported(mi::MethodInstance) = isdefined(Main, codename(mi.def.name)) +getfunc(mi::MethodInstance) = getfunc(mi.def) +getfunc(m::Method) = getfield(m.module, m.name) +nmethods(mi::MethodInstance) = length(methods(getfunc(mi))) + +# Test whether a module is Core.Compiler or inside it +# (Methods there are protected from invalidation by other means) +function fromcc(mod::Module) + fn = fullname(mod) + return length(fn) >= 2 && fn[1] === :Core && fn[2] === :Compiler +end + +function atrisk_method(m::Method, atrisk_backedges) + for mi in methodinstances(m) + mi ∈ atrisk_backedges && return true + end + return false +end + +function atrisk_triggers(m::Method, atrisk_instances) + triggers = Set{MethodInstance}() + for mi in atrisk_instances + if atrisk_method(m, all_backedges(mi)) + push!(triggers, mi) + end + end + return triggers +end + +# This removes MethodInstances that no one in their right mind should ever invalidate by specialization. +function remove_unlikely_methodinstances(list) + out = MethodInstance[] + for mi in list + mi = mi::MethodInstance # must have MethodInstance elements + # All `continue` statements below omit the MethodInstance + name = codename(mi.def.name) + name ∈ (:invokelatest, :unwrap_unionall, :rewrap_unionall) && continue + # Vararg methods for printing etc + if name ∈ (:print, :println, :sprint, :string, :error) + nargs = length(mi.specTypes.parameters) # includes the #self argument + mparams = (Base.unwrap_unionall(mi.def.sig)::DataType).parameters + (nargs > length(mparams) || mparams[nargs] <: Vararg) && continue + end + # No one should ever specialize on notify or schedule's `val` argument + name === :notify && !is_atrisk_type(mi.specTypes.parameters[2]) && + !any(is_atrisk_type, mi.specTypes.parameters[4:end]) && continue + name === :schedule && !any(is_atrisk_type, mi.specTypes.parameters[2:end-1]) && continue + # Add more removal-filters here + + # We've decided to keep it + push!(out, mi) + end + return out +end + +# Check for inference quality in specific functions. +# This is valid only for functions that should always return a particular type for any valid call of their methods. +function function_returns(@nospecialize(f), @nospecialize(typ); allow_missing_for_missing=true, minargs=0) + for m in methods(f) + sig = Base.unwrap_unionall(m.sig) + for rt in Base.return_types(Base.call_type(Base.unwrap_unionall(m.sig))...) + rt <: typ && continue + if allow_missing_for_missing && any(T->T===Missing, sig.parameters[2:end]) && rt === Missing + continue + end + length(sig.parameters) - 1 < minargs && continue + return false + end + end + return true +end + +function methodinstances() + visited = IdSet{Any}() + mis = MethodInstance[] + for mod in Base.loaded_modules_array() + methodinstances!(mis, mod, visited) + end + return mis +end + +function methodinstances(@nospecialize parent) + visited = IdSet{Any}() + mis = MethodInstance[] + return methodinstances!(mis, parent, visited) +end + + +function methodinstances!(mis, mod::Module, visited) + mod ∈ visited && return mis + push!(visited, mod) + for nm in names(mod; all=true) + if isdefined(mod, nm) + obj = getfield(mod, nm) + if isa(obj, Module) + methodinstances!(mis, obj, visited) + elseif isa(obj, Callable) + methodinstances!(mis, obj, visited) + end + end + end + return mis +end + +function methodinstances!(mis, @nospecialize(f::Callable), visited) + f ∈ visited && return nothing + f === Vararg && return nothing # methods(Varargs) errors due to Type{Vararg} + push!(visited, f) + for m in methods(f) + methodinstances!(mis, m, visited) + end + return mis +end + +function methodinstances!(mis, m::Method, visited) + m ∈ visited && return nothing + m === Vararg && return nothing # methods(Varargs) errors due to Type{Vararg} + push!(visited, m) + Base.visit(m.specializations) do mi + push!(mis, mi) + end + return mis +end + +all_backedges(mi::MethodInstance) = all_backedges!(Set{MethodInstance}(), mi) + +function all_backedges!(backedges, mi) + push!(backedges, mi) + if isdefined(mi, :backedges) + for be in mi.backedges + be ∈ backedges && continue + all_backedges!(backedges, be) + end + end + return backedges +end