Skip to content

Commit

Permalink
optimizer: allow EA-powered finalizer inlining
Browse files Browse the repository at this point in the history
E.g. this allows `finalizer` inlining in the following case:
```julia
mutable struct ForeignBuffer{T}
    const ptr::Ptr{T}
end
const foreign_buffer_finalized = Ref(false)
function foreign_alloc(::Type{T}, length) where T
    ptr = Libc.malloc(sizeof(T) * length)
    ptr = Base.unsafe_convert(Ptr{T}, ptr)
    obj = ForeignBuffer{T}(ptr)
    return finalizer(obj) do obj
        Base.@assume_effects :notaskstate :nothrow
        foreign_buffer_finalized[] = true
        Libc.free(obj.ptr)
    end
end
function f_EA_finalizer(N::Int)
    workspace = foreign_alloc(Float64, N)
    GC.@preserve workspace begin
        (;ptr) = workspace
        Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr)
    end
end
```
```julia
julia> @code_typed f_EA_finalizer(42)
CodeInfo(
1 ── %1  = Base.mul_int(8, N)::Int64
│    %2  = Core.lshr_int(%1, 63)::Int64
│    %3  = Core.trunc_int(Core.UInt8, %2)::UInt8
│    %4  = Core.eq_int(%3, 0x01)::Bool
└───       goto #3 if not %4
2 ──       invoke Core.throw_inexacterror(:convert::Symbol, UInt64::Type, %1::Int64)::Union{}
└───       unreachable
3 ──       goto #4
4 ── %9  = Core.bitcast(Core.UInt64, %1)::UInt64
└───       goto #5
5 ──       goto #6
6 ──       goto #7
7 ──       goto #8
8 ── %14 = $(Expr(:foreigncall, :(:malloc), Ptr{Nothing}, svec(UInt64), 0, :(:ccall), :(%9), :(%9)))::Ptr{Nothing}
└───       goto #9
9 ── %16 = Base.bitcast(Ptr{Float64}, %14)::Ptr{Float64}
│    %17 = %new(ForeignBuffer{Float64}, %16)::ForeignBuffer{Float64}
└───       goto #10
10 ─ %19 = $(Expr(:gc_preserve_begin, :(%17)))
│    %20 = Base.getfield(%17, :ptr)::Ptr{Float64}
│          invoke Main.println(Main.devnull::Base.DevNull, "ptr = "::String, %20::Ptr{Float64})::Nothing
│          $(Expr(:gc_preserve_end, :(%19)))
│    %23 = Main.foreign_buffer_finalized::Base.RefValue{Bool}
│          Base.setfield!(%23, :x, true)::Bool
│    %25 = Base.getfield(%17, :ptr)::Ptr{Float64}
│    %26 = Base.bitcast(Ptr{Nothing}, %25)::Ptr{Nothing}
│          $(Expr(:foreigncall, :(:free), Nothing, svec(Ptr{Nothing}), 0, :(:ccall), :(%26), :(%25)))::Nothing
└───       return nothing
) => Nothing
```

However, this is still a WIP. Before merging, I want to improve EA's
precision a bit and at least fix the test case that is currently marked as
`broken`. I also need to check its impact on compiler performance.

Additionally, I believe this feature is not yet practical.
In particular, there is still significant room for improvement in the
following areas:
- EA's interprocedural capabilities: currently EA is performed ad-hoc
  for limited frames because of latency reasons, which significantly
  reduces its precision in the presence of interprocedural calls.
- Relaxing the `:nothrow` check for finalizer inlining: the current
  algorithm requires `:nothrow`-ness on all paths from the allocation of
  the mutable struct to its last use, which is not practical for
  real-world cases. Even when `:nothrow` cannot be guaranteed, auxiliary
  optimizations such as inserting a `finalize` call after the last use
  might still be possible.
  • Loading branch information
aviatesk committed Oct 1, 2024
1 parent 06e7b9d commit 370c805
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 42 deletions.
2 changes: 1 addition & 1 deletion base/compiler/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState)
if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
ir = sv.ir
nargs = let def = sv.result.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end
estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), GetNativeEscapeCache(interp))
estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), get_escape_cache(interp))
argescapes = EscapeAnalysis.ArgEscapeCache(estate)
stack_analysis_result!(sv.result, argescapes)
validate_mutable_arg_escapes!(estate, sv)
Expand Down
51 changes: 34 additions & 17 deletions base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import ._TOP_MOD: ==, getindex, setindex!
using Core: MethodMatch, SimpleVector, ifelse, sizeof
using Core.IR
using ._TOP_MOD: # Base definitions
@__MODULE__, @assert, @eval, @goto, @inbounds, @inline, @label, @noinline,
@__MODULE__, @assert, @eval, @goto, @inbounds, @inline, @label, @noinline, @show,
@nospecialize, @specialize, BitSet, Callable, Csize_t, IdDict, IdSet, UnitRange, Vector,
copy, delete!, empty!, enumerate, error, first, get, get!, haskey, in, isassigned,
isempty, ismutabletype, keys, last, length, max, min, missing, pop!, push!, pushfirst!,
Expand Down Expand Up @@ -657,11 +657,13 @@ function analyze_escapes(ir::IRCode, nargs::Int, 𝕃ₒ::AbstractLattice, get_e
# `escape_exception!` conservatively propagates `AllEscape` anyway,
# and so escape information imposed on `:the_exception` isn't computed
continue
elseif head === :gc_preserve_begin
# GC preserve is handled by `escape_gc_preserve!`
elseif head === :gc_preserve_end
escape_gc_preserve!(astate, pc, stmt.args)
elseif head === :static_parameter || # this exists statically, not interested in its escape
head === :copyast || # XXX can this account for some escapes?
head === :isdefined || # just returns `Bool`, nothing accounts for any escapes
head === :gc_preserve_begin || # `GC.@preserve` expressions themselves won't be used anywhere
head === :gc_preserve_end # `GC.@preserve` expressions themselves won't be used anywhere
head === :copyast || # XXX escape something?
head === :isdefined # just returns `Bool`, nothing accounts for any escapes
continue
else
add_conservative_changes!(astate, pc, stmt.args)
Expand Down Expand Up @@ -1062,17 +1064,21 @@ end
function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any})
mi = first(args)::MethodInstance
first_idx, last_idx = 2, length(args)
add_liveness_changes!(astate, pc, args, first_idx, last_idx)
# TODO inspect `astate.ir.stmts[pc][:info]` and use const-prop'ed `InferenceResult` if available
cache = astate.get_escape_cache(mi)
ret = SSAValue(pc)
if cache isa Bool
if cache
for (i, argidx) in enumerate(first_idx:last_idx)
add_alias_change!(astate, ret, args[argidx])
end
return nothing # guaranteed to have no escape
else
return add_conservative_changes!(astate, pc, args, 2)
end
end
cache = cache::ArgEscapeCache
ret = SSAValue(pc)
retinfo = astate.estate[ret] # escape information imposed on the call statement
method = mi.def::Method
nargs = Int(method.nargs)
Expand Down Expand Up @@ -1160,6 +1166,17 @@ function escape_foreigncall!(astate::AnalysisState, pc::Int, args::Vector{Any})
end
end

function escape_gc_preserve!(astate::AnalysisState, pc::Int, args::Vector{Any})
@assert length(args) == 1 "invalid :gc_preserve_end"
val = args[1]
@assert val isa SSAValue "invalid :gc_preserve_end"
beginstmt = astate.ir[val][:stmt]
@assert isexpr(beginstmt, :gc_preserve_begin) "invalid :gc_preserve_end"
beginargs = beginstmt.args
# COMBAK we might need to add liveness for all statements from `:gc_preserve_begin` to `:gc_preserve_end`
add_liveness_changes!(astate, pc, beginargs)
end

normalize(@nospecialize x) = isa(x, QuoteNode) ? x.value : x

function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any})
Expand All @@ -1185,20 +1202,12 @@ function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any})
if result === missing
# if this call hasn't been handled by any of pre-defined handlers, escape it conservatively
add_conservative_changes!(astate, pc, args)
return
elseif result === true
add_liveness_changes!(astate, pc, args, 2)
return # ThrownEscape is already checked
elseif is_nothrow(astate.ir, pc)
add_liveness_changes!(astate, pc, args, 2)
else
# we escape statements with the `ThrownEscape` property using the effect-freeness
# computed by `stmt_effect_flags` invoked within inlining
# TODO throwness ≠ "effect-free-ness"
if is_nothrow(astate.ir, pc)
add_liveness_changes!(astate, pc, args, 2)
else
add_fallback_changes!(astate, pc, args, 2)
end
return
add_fallback_changes!(astate, pc, args, 2)
end
end

Expand Down Expand Up @@ -1526,4 +1535,12 @@ function escape_array_copy!(astate::AnalysisState, pc::Int, args::Vector{Any})
add_liveness_changes!(astate, pc, args, 6)
end

function escape_builtin!(::typeof(Core.finalizer), astate::AnalysisState, pc::Int, args::Vector{Any})
if length(args) 3
obj = args[3]
add_liveness_change!(astate, obj, pc) # TODO setup a proper FinalizerEscape?
end
return false
end

end # baremodule EscapeAnalysis
79 changes: 57 additions & 22 deletions base/compiler/ssair/passes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1300,7 +1300,13 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
# Inlining performs legality checks on the finalizer to determine
# whether or not we may inline it. If so, it appends extra arguments
# at the end of the intrinsic. Detect that here.
length(stmt.args) == 5 || continue
if length(stmt.args) == 4 && stmt.args[4] === nothing
# constant case
elseif length(stmt.args) == 5 && stmt.args[4] isa Bool && stmt.args[5] isa MethodInstance
# inlining case
else
continue
end
end
is_finalizer = true
elseif isexpr(stmt, :foreigncall)
Expand Down Expand Up @@ -1685,7 +1691,32 @@ end
function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing,InliningState})
𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
lazypostdomtree = LazyPostDomtree(ir)
function find_finalizer_idx(defuse::SSADefUse)
finalizer_idx = nothing
for use in defuse.uses
if use.kind === :finalizer
# For now: Only allow one finalizer per allocation
finalizer_idx !== nothing && return false
finalizer_idx = use.idx
end
end
if finalizer_idx === nothing
return true
elseif inlining === nothing
return false
end
return finalizer_idx
end
for (defidx, (intermediaries, defuse)) in defuses
# Find the type for this allocation
defexpr = ir[SSAValue(defidx)][:stmt]
isexpr(defexpr, :new) || continue
typ = unwrap_unionall(ir.stmts[defidx][:type])
# Could still end up here if we tried to setfield! on an immutable, which would
# error at runtime, but is not illegal to have in the IR.
typ = widenconst(typ)
ismutabletype(typ) || continue
typ = typ::DataType
# Check if there are any uses we did not account for. If so, the variable
# escapes and we cannot eliminate the allocation. This works, because we're guaranteed
# not to include any intermediaries that have dead uses. As a result, missing uses will only ever
Expand All @@ -1696,29 +1727,33 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}}
nuses += used_ssas[iidx]
end
nuses_total = used_ssas[defidx] + nuses - length(intermediaries)
nleaves == nuses_total || continue
# Find the type for this allocation
defexpr = ir[SSAValue(defidx)][:stmt]
isexpr(defexpr, :new) || continue
typ = unwrap_unionall(ir.stmts[defidx][:type])
# Could still end up here if we tried to setfield! on an immutable, which would
# error at runtime, but is not illegal to have in the IR.
typ = widenconst(typ)
ismutabletype(typ) || continue
typ = typ::DataType
# First check for any finalizer calls
finalizer_idx = nothing
for use in defuse.uses
if use.kind === :finalizer
# For now: Only allow one finalizer per allocation
finalizer_idx !== nothing && @goto skip
finalizer_idx = use.idx
if nleaves nuses_total
finalizer_idx = find_finalizer_idx(defuse)
if finalizer_idx isa Int
nargs = length(ir.argtypes) # TODO
estate = EscapeAnalysis.analyze_escapes(ir, nargs, 𝕃ₒ, get_escape_cache(inlining.interp))
einfo = estate[SSAValue(defidx)]
if EscapeAnalysis.has_no_escape(einfo)
already = BitSet(use.idx for use in defuse.uses)
for idx = einfo.Liveness
if idx already
push!(defuse.uses, SSAUse(:EALiveness, idx))
end
end
try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining::InliningState,
lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
end
end
end
if finalizer_idx !== nothing && inlining !== nothing
try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining,
lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
continue
else
finalizer_idx = find_finalizer_idx(defuse)
if finalizer_idx isa Int
try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining::InliningState,
lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
continue
elseif !finalizer_idx
continue
end
end
# Partition defuses by field
fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]
Expand Down
2 changes: 2 additions & 0 deletions base/compiler/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,8 @@ typeinf_lattice(::AbstractInterpreter) = InferenceLattice(BaseInferenceLattice.i
ipo_lattice(::AbstractInterpreter) = InferenceLattice(IPOResultLattice.instance)
optimizer_lattice(::AbstractInterpreter) = SimpleInferenceLattice.instance

get_escape_cache(interp::AbstractInterpreter) = GetNativeEscapeCache(interp)

abstract type CallInfo end

@nospecialize
Expand Down
1 change: 1 addition & 0 deletions test/compiler/EscapeAnalysis/EAUtils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ CC.OptimizationParams(interp::EscapeAnalyzer) = interp.opt_params
CC.get_inference_world(interp::EscapeAnalyzer) = interp.world
CC.get_inference_cache(interp::EscapeAnalyzer) = interp.inf_cache
CC.cache_owner(::EscapeAnalyzer) = EAToken()
CC.get_escape_cache(interp::EscapeAnalyzer) = GetEscapeCache(interp)

function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, ir::IRCode, caller::InferenceResult)
# run EA on all frames that have been optimized
Expand Down
2 changes: 1 addition & 1 deletion test/compiler/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ if opt_level > 0
@test occursin("ret $Iptr %\"x::$(Int)\"", load_dummy_ref_ir)
end

# Issue 22770
# Issue JuliaLang/julia#22770
let was_gced = false
@noinline make_tuple(x) = tuple(x)
@noinline use(x) = ccall(:jl_breakpoint, Cvoid, ())
Expand Down
2 changes: 1 addition & 1 deletion test/compiler/effects.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1135,7 +1135,7 @@ end

# These need EA
@test Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_1, (Base.RefValue{Int},)))
@test Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_2, (Base.RefValue{Int},)))
@test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_ref_with_unused_arg_2, (Base.RefValue{Int},)))
@test Core.Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(set_arg_ref!, (Base.RefValue{Int},)))
@test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_1, (Vector{Int},)))
@test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_2, (Vector{Int},)))
Expand Down
30 changes: 30 additions & 0 deletions test/compiler/inline.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2223,3 +2223,33 @@ let src = code_typed1(bar_split_error, Tuple{})
@test count(iscall((src, foo_split)), src.code) == 0
@test count(iscall((src, Core.throw_methoderror)), src.code) > 0
end

# finalizer inlining with EA
mutable struct ForeignBuffer{T}
const ptr::Ptr{T}
end
const foreign_buffer_finalized = Ref(false)
function foreign_alloc(::Type{T}, length) where T
ptr = Libc.malloc(sizeof(T) * length)
ptr = Base.unsafe_convert(Ptr{T}, ptr)
obj = ForeignBuffer{T}(ptr)
return finalizer(obj) do obj
Base.@assume_effects :notaskstate :nothrow
foreign_buffer_finalized[] = true
Libc.free(obj.ptr)
end
end
function f_EA_finalizer(N::Int)
workspace = foreign_alloc(Float64, N)
GC.@preserve workspace begin
(;ptr) = workspace
Base.@assume_effects :nothrow @noinline println(devnull, "ptr = ", ptr)
end
end
let src = code_typed1(f_EA_finalizer, (Int,))
@test count(iscall((src, Core.finalizer)), src.code) == 0
end
let;Base.Experimental.@force_compile
f_EA_finalizer(42000)
@test foreign_buffer_finalized[]
end

0 comments on commit 370c805

Please sign in to comment.