Skip to content

Commit

Permalink
invert linetable representation
Browse files Browse the repository at this point in the history
Previously, our linetables (similar to LLVM) represented line
information as a linked list from callee via inlined_at up to the
original information. This requires many copies of this information to
be created. Instead we can take advantage of the necessary existence of
the line table from the child to flip this chain of information and
instead make each statement be a table describing (for each IR
instruction):
`(current line number, (index into edges, index into edges statements))`
plus a table of all edges, plus a table with the original line numbers
from the parser, plus the file name. This is all packed into the struct

    struct DebugInfo
        def::Union{Method,MethodInstance,Symbol}
        linetable::Union{Nothing,DebugInfo}
        edges::SimpleVector{DebugInfo}
        codelocs::String
    end

Which is described in doc/src/devdocs/ast.md for what each field means
and look at stacktraces.jl or compiler/ssair/show.jl to look at how to
decode and interpret this information.

For the sysimage, this saves several megabytes (about 113 MB -> 110 MB)
and about 5% of the stdlib pkgimages (294 MB -> 279 MB).

It also now happens to have the full type information for the inlined
functions. Now if you create an `IRShow.DILineInfoPrinter` with
`showtypes=true`, it can print that information when printing IR.
  • Loading branch information
vtjnash committed Feb 23, 2024
1 parent 2ebb896 commit 39e02f3
Show file tree
Hide file tree
Showing 56 changed files with 1,458 additions and 884 deletions.
45 changes: 30 additions & 15 deletions base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
# }
#end

# struct GenericMemoryRef{kind::Symbol, T, AS::AddrSpace}
#struct GenericMemoryRef{kind::Symbol, T, AS::AddrSpace}
# mem::GenericMemory{kind, T, AS}
# data::Ptr{Cvoid} # make this GenericPtr{addrspace, Cvoid}
#end
Expand Down Expand Up @@ -125,12 +125,13 @@
# file::Union{Symbol,Nothing}
#end

#struct LineInfoNode
# module::Module
# method::Any (Union{Symbol, Method, MethodInstance})
# file::Symbol
# line::Int32
# inlined_at::Int32
#struct LegacyLineInfoNode end # only used internally during lowering

#struct DebugInfo
# def::Any # (Union{Symbol, Method, MethodInstance})
# linetable::Any # (Union{Nothing,DebugInfo})
# edges::SimpleVector # Vector{DebugInfo}
# codelocs::String # compressed Vector{UInt8}
#end

#struct GotoNode
Expand Down Expand Up @@ -296,6 +297,9 @@ TypeVar(@nospecialize(n), @nospecialize(ub)) = _typevar(n::Symbol, Union{}, ub)
TypeVar(@nospecialize(n), @nospecialize(lb), @nospecialize(ub)) = _typevar(n::Symbol, lb, ub)
UnionAll(@nospecialize(v), @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v::TypeVar, t)

const Memory{T} = GenericMemory{:not_atomic, T, CPU}
const MemoryRef{T} = GenericMemoryRef{:not_atomic, T, CPU}

# simple convert for use by constructors of types in Core
# note that there is no actual conversion defined here,
# so the methods and ccall's in Core aren't permitted to use convert
Expand Down Expand Up @@ -466,8 +470,10 @@ eval(Core, quote
isa(f, String) && (f = Symbol(f))
return $(Expr(:new, :LineNumberNode, :l, :f))
end
LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) =
$(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))
DebugInfo(def::Union{Method,MethodInstance,Symbol}, linetable::Union{Nothing,DebugInfo}, edges::SimpleVector, codelocs::String) =
$(Expr(:new, :DebugInfo, :def, :linetable, :edges, :codelocs))
DebugInfo(def::Union{Method,MethodInstance,Symbol}) =
$(Expr(:new, :DebugInfo, :def, nothing, Core.svec(), ""))
SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))
PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))
PiNode(@nospecialize(val), @nospecialize(typ)) = $(Expr(:new, :PiNode, :val, :typ))
Expand All @@ -482,16 +488,25 @@ eval(Core, quote
MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) = $(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))
end)

struct LineInfoNode # legacy support for aiding Serializer.deserialize of old IR
mod::Module
method
file::Symbol
line::Int32
inlined_at::Int32
LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) = new(mod, method, file, line, inlined_at)
end


function CodeInstance(
mi::MethodInstance, owner, @nospecialize(rettype), @nospecialize(exctype), @nospecialize(inferred_const),
@nospecialize(inferred), const_flags::Int32, min_world::UInt, max_world::UInt,
ipo_effects::UInt32, effects::UInt32, @nospecialize(analysis_results),
relocatability::UInt8)
relocatability::UInt8, edges::DebugInfo)
return ccall(:jl_new_codeinst, Ref{CodeInstance},
(Any, Any, Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8),
(Any, Any, Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8, Any),
mi, owner, rettype, exctype, inferred_const, inferred, const_flags, min_world, max_world,
ipo_effects, effects, analysis_results,
relocatability)
ipo_effects, effects, analysis_results, relocatability, edges)
end
GlobalRef(m::Module, s::Symbol) = ccall(:jl_module_globalref, Ref{GlobalRef}, (Any, Any), m, s)
Module(name::Symbol=:anonymous, std_imports::Bool=true, default_names::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool, Bool), name, std_imports, default_names)
Expand Down Expand Up @@ -629,12 +644,12 @@ module IR

export CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
NewvarNode, SSAValue, SlotNumber, Argument,
PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
PiNode, PhiNode, PhiCNode, UpsilonNode, DebugInfo,
Const, PartialStruct, InterConditional, EnterNode

using Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
NewvarNode, SSAValue, SlotNumber, Argument,
PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
PiNode, PhiNode, PhiCNode, UpsilonNode, DebugInfo,
Const, PartialStruct, InterConditional, EnterNode

end # module IR
Expand Down
25 changes: 10 additions & 15 deletions base/compiler/inferencestate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -478,21 +478,16 @@ function should_insert_coverage(mod::Module, src::CodeInfo)
coverage_enabled(mod) && return true
JLOptions().code_coverage == 3 || return false
# path-specific coverage mode: if any line falls in a tracked file enable coverage for all
linetable = src.linetable
if isa(linetable, Vector{Any})
for line in linetable
line = line::LineInfoNode
if is_file_tracked(line.file)
return true
end
end
elseif isa(linetable, Vector{LineInfoNode})
for line in linetable
if is_file_tracked(line.file)
return true
end
end
end
return should_insert_coverage(src.debuginfo)
end
should_insert_coverage(mod::Symbol) = is_file_tracked(mod)
should_insert_coverage(mod::Method) = should_insert_coverage(mod.file)
should_insert_coverage(mod::MethodInstance) = should_insert_coverage(mod.def)
should_insert_coverage(mod::Module) = false
function should_insert_coverage(info::DebugInfo)
linetable = info.linetable
linetable === nothing || (should_insert_coverage(linetable) && return true)
should_insert_coverage(info.def) && return true
return false
end

Expand Down
92 changes: 73 additions & 19 deletions base/compiler/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -987,22 +987,79 @@ function run_passes_ipo_safe(
if is_asserts()
@timeit "verify 3" begin
verify_ir(ir, true, false, optimizer_lattice(sv.inlining.interp))
verify_linetable(ir.linetable)
verify_linetable(ir.debuginfo, length(ir.stmts))
end
end
@label __done__ # used by @pass
return ir
end

function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
linetable = ci.linetable
if !isa(linetable, Vector{LineInfoNode})
linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
function strip_trailing_junk!(code::Vector{Any}, ssavaluetypes::Vector{Any}, ssaflags::Vector, debuginfo::DebugInfoStream, cfg::CFG, info::Vector{CallInfo})
# Remove `nothing`s at the end, we don't handle them well
# (we expect the last instruction to be a terminator)
codelocs = debuginfo.codelocs
for i = length(code):-1:1
if code[i] !== nothing
resize!(code, i)
resize!(ssavaluetypes, i)
resize!(codelocs, 3i)
resize!(info, i)
resize!(ssaflags, i)
break
end
end
# If the last instruction is not a terminator, add one. This can
# happen for implicit return on dead branches.
term = code[end]
if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
push!(code, ReturnNode())
push!(ssavaluetypes, Union{})
push!(codelocs, 0, 0, 0)
push!(info, NoCallInfo())
push!(ssaflags, IR_FLAG_NOTHROW)

# Update CFG to include appended terminator
old_range = cfg.blocks[end].stmts
new_range = StmtRange(first(old_range), last(old_range) + 1)
cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
(length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
end
nothing
end

function changed_lineinfo(di::DebugInfo, codeloc::Int, prevloc::Int)
while true
next = getdebugidx(di, codeloc)
next[1] < 0 && return false # invalid info
next[1] == 0 && next[2] == 0 && return false # no new info
prevloc <= 0 && return true # no old info
prev = getdebugidx(di, prevloc)
next === prev && return false # exactly identical
prev[1] < 0 && return true # previous invalid info, now valid
edge = next[2]
edge === prev[2] || return true # change to this edge
linetable = di.linetable
# check for change to line number here
if linetable === nothing || next[1] == 0
next[1] == prev[1] || return true
else
changed_lineinfo(linetable, next[1], prev[1]) && return true
end
# check for change to edge here
edge == 0 && return false # no edge here
di = di.edges[Int(edge)]::DebugInfo
codeloc = Int(next[3])
prevloc = Int(prev[3])
end
end

function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
# Update control-flow to reflect any unreachable branches.
ssavaluetypes = ci.ssavaluetypes::Vector{Any}
code = copy_exprargs(ci.code)
ci.code = code = copy_exprargs(ci.code)
di = DebugInfoStream(sv.linfo, ci.debuginfo, length(code))
codelocs = di.codelocs
ssaflags = ci.ssaflags
for i = 1:length(code)
expr = code[i]
if !(i in sv.unreachable)
Expand All @@ -1018,11 +1075,11 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
((block + 1) != destblock) && cfg_delete_edge!(sv.cfg, block, destblock)
expr = Expr(:call, Core.typeassert, expr.cond, Bool)
elseif i + 1 in sv.unreachable
@assert has_flag(ci.ssaflags[i], IR_FLAG_NOTHROW)
@assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
cfg_delete_edge!(sv.cfg, block, block + 1)
expr = GotoNode(expr.dest)
elseif expr.dest in sv.unreachable
@assert has_flag(ci.ssaflags[i], IR_FLAG_NOTHROW)
@assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
cfg_delete_edge!(sv.cfg, block, block_for_inst(sv.cfg, expr.dest))
expr = nothing
end
Expand All @@ -1049,20 +1106,17 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
# Go through and add an unreachable node after every
# Union{} call. Then reindex labels.
stmtinfo = sv.stmt_info
codelocs = ci.codelocs
ssaflags = ci.ssaflags
meta = Expr[]
idx = 1
oldidx = 1
nstmts = length(code)
ssachangemap = labelchangemap = blockchangemap = nothing
prevloc = zero(eltype(ci.codelocs))
prevloc = 0
while idx <= length(code)
codeloc = codelocs[idx]
if sv.insert_coverage && codeloc != prevloc && codeloc != 0
if sv.insert_coverage && changed_lineinfo(ci.debuginfo, oldidx, prevloc)
# insert a side-effect instruction before the current instruction in the same basic block
insert!(code, idx, Expr(:code_coverage_effect))
insert!(codelocs, idx, codeloc)
splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
insert!(ssavaluetypes, idx, Nothing)
insert!(stmtinfo, idx, NoCallInfo())
insert!(ssaflags, idx, IR_FLAG_NULL)
Expand All @@ -1081,7 +1135,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
end
blockchangemap[block_for_inst(sv.cfg, oldidx)] += 1
idx += 1
prevloc = codeloc
prevloc = oldidx
end
if ssavaluetypes[idx] === Union{} && !(oldidx in sv.unreachable) && !isa(code[idx], PhiNode)
# We should have converted any must-throw terminators to an equivalent w/o control-flow edges
Expand All @@ -1103,7 +1157,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
# terminator with an explicit `unreachable` marker.
if block_end > idx
code[block_end] = ReturnNode()
codelocs[block_end] = codelocs[idx]
codelocs[3block_end-2], codelocs[3block_end-1], codelocs[3block_end-0] = (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0])
ssavaluetypes[block_end] = Union{}
stmtinfo[block_end] = NoCallInfo()
ssaflags[block_end] = IR_FLAG_NOTHROW
Expand All @@ -1118,7 +1172,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
idx += block_end - idx
else
insert!(code, idx + 1, ReturnNode())
insert!(codelocs, idx + 1, codelocs[idx])
splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
insert!(ssavaluetypes, idx + 1, Union{})
insert!(stmtinfo, idx + 1, NoCallInfo())
insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
Expand Down Expand Up @@ -1155,14 +1209,14 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
for i = 1:length(code)
code[i] = process_meta!(meta, code[i])
end
strip_trailing_junk!(ci, sv.cfg, code, stmtinfo)
strip_trailing_junk!(code, ssavaluetypes, ssaflags, di, sv.cfg, stmtinfo)
types = Any[]
stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
# NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
# types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
# and eliminates slots (see below)
argtypes = sv.slottypes
return IRCode(stmts, sv.cfg, linetable, argtypes, meta, sv.sptypes)
return IRCode(stmts, sv.cfg, di, argtypes, meta, sv.sptypes)
end

function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
Expand Down
Loading

0 comments on commit 39e02f3

Please sign in to comment.