some updates and fixes to inlining cost

- Instead of always inlining functions marked at-inline, increase the cost threshold 20x - Don't inline functions inferred not to return - statement_cost no longer needs to look at nested Exprs in general - Fix cost of `:copyast`
JuliaLang · Jul 7, 2018 · 9277d3a · 9277d3a · chriselrod · Jul 13, 2018
1 parent 30bf7ac
commit 9277d3a
Show file tree

Hide file tree

Showing 10 changed files with 91 additions and 70 deletions.
diff --git a/base/array.jl b/base/array.jl
@@ -1666,7 +1666,9 @@ julia> findnext(isodd, A, CartesianIndex(1, 1))
 CartesianIndex(1, 1)
 ```
 """
-function findnext(testf::Function, A, start)
+@inline findnext(testf::Function, A, start) = findnext_internal(testf, A, start)
+
+function findnext_internal(testf::Function, A, start)
     l = last(keys(A))
     i = start
     while i <= l
@@ -1854,7 +1856,9 @@ julia> findprev(isodd, A, CartesianIndex(1, 2))
 CartesianIndex(2, 1)
 ```
 """
-function findprev(testf::Function, A, start)
+@inline findprev(testf::Function, A, start) = findprev_internal(testf, A, start)
+
+function findprev_internal(testf::Function, A, start)
     i = start
     while i >= first(keys(A))
         testf(A[i]) && return i

diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
@@ -139,8 +139,9 @@ function abstract_call_method_with_const_args(@nospecialize(f), argtypes::Vector
     code === nothing && return Any
     code = code::MethodInstance
     # decide if it's likely to be worthwhile
-    cache_inlineable = false
-    if isdefined(code, :inferred)
+    declared_inline = isdefined(method, :source) && ccall(:jl_ast_flag_inlineable, Bool, (Any,), method.source)
+    cache_inlineable = declared_inline
+    if isdefined(code, :inferred) && !cache_inlineable
         cache_inf = code.inferred
         if !(cache_inf === nothing)
             cache_src_inferred = ccall(:jl_ast_flag_inferred, Bool, (Any,), cache_inf)

diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
@@ -212,14 +212,21 @@ function optimize(opt::OptimizationState, @nospecialize(result))
         else
             force_noinline = true
         end
+        if !opt.src.inlineable && result === Union{}
+            force_noinline = true
+        end
     end
     if force_noinline
         opt.src.inlineable = false
-    elseif !opt.src.inlineable && isa(def, Method)
+    elseif isa(def, Method)
         bonus = 0
         if result ⊑ Tuple && !isbitstype(widenconst(result))
             bonus = opt.params.inline_tupleret_bonus
         end
+        if opt.src.inlineable
+            # For functions declared @inline, increase the cost threshold 20x
+            bonus += opt.params.inline_cost_threshold*19
+        end
         opt.src.inlineable = isinlineable(def, opt, bonus)
     end
     nothing
@@ -270,86 +277,87 @@ isknowntype(@nospecialize T) = (T == Union{}) || isconcretetype(T)
 
 function statement_cost(ex::Expr, line::Int, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any}, params::Params)
     head = ex.head
-    if is_meta_expr_head(head) || head == :copyast # not sure if copyast is right
+    if is_meta_expr_head(head)
         return 0
-    end
-    argcost = 0
-    for a in ex.args
-        if a isa Expr
-            argcost = plus_saturate(argcost, statement_cost(a, -1, src, spvals, slottypes, params))
-        end
-    end
-    if head == :return || head == :(=)
-        return argcost
-    end
-    extyp = line == -1 ? Any : src.ssavaluetypes[line]
-    if head == :call
+    elseif head === :call
         ftyp = argextype(ex.args[1], src, spvals, slottypes)
-        if isa(ftyp, Type)
-            return argcost
+        f = singleton_type(ftyp)
+        if isa(f, IntrinsicFunction)
+            iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
+            if !isassigned(T_IFUNC_COST, iidx)
+                # unknown/unhandled intrinsic
+                return params.inline_nonleaf_penalty
+            end
+            return T_IFUNC_COST[iidx]
         end
-        if isa(ftyp, Const)
-            f = (ftyp::Const).val
-            if isa(f, IntrinsicFunction)
-                iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
-                if !isassigned(T_IFUNC_COST, iidx)
-                    # unknown/unhandled intrinsic
-                    return plus_saturate(argcost, params.inline_nonleaf_penalty)
-                end
-                return plus_saturate(argcost, T_IFUNC_COST[iidx])
+        if isa(f, Builtin)
+            # The efficiency of operations like a[i] and s.b
+            # depend strongly on whether the result can be
+            # inferred, so check the type of ex
+            if f === Main.Core.getfield || f === Main.Core.tuple
+                # we might like to penalize non-inferrability, but
+                # tuple iteration/destructuring makes that impossible
+                # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
+                return 0
+            elseif f === Main.Core.arrayref && length(ex.args) >= 3
+                atyp = argextype(ex.args[3], src, spvals, slottypes)
+                return isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty
             end
-            if isa(f, Builtin)
-                # The efficiency of operations like a[i] and s.b
-                # depend strongly on whether the result can be
-                # inferred, so check the type of ex
-                if f == Main.Core.getfield || f == Main.Core.tuple
-                    # we might like to penalize non-inferrability, but
-                    # tuple iteration/destructuring makes that
-                    # impossible
-                    # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
-                    return argcost
-                elseif f == Main.Core.arrayref && length(ex.args) >= 3
-                    atyp = argextype(ex.args[3], src, spvals, slottypes)
-                    return plus_saturate(argcost, isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty)
-                end
-                fidx = findfirst(x->x===f, T_FFUNC_KEY)
-                if fidx === nothing
-                    # unknown/unhandled builtin or anonymous function
-                    # Use the generic cost of a direct function call
-                    return plus_saturate(argcost, 20)
-                end
-                return plus_saturate(argcost, T_FFUNC_COST[fidx])
+            fidx = findfirst(x->x===f, T_FFUNC_KEY)
+            if fidx === nothing
+                # unknown/unhandled builtin or anonymous function
+                # Use the generic cost of a direct function call
+                return 20
             end
+            return T_FFUNC_COST[fidx]
         end
-        return plus_saturate(argcost, params.inline_nonleaf_penalty)
-    elseif head == :foreigncall || head == :invoke
+        return params.inline_nonleaf_penalty
+    elseif head === :foreigncall || head === :invoke
         # Calls whose "return type" is Union{} do not actually return:
         # they are errors. Since these are not part of the typical
         # run-time of the function, we omit them from
         # consideration. This way, non-inlined error branches do not
         # prevent inlining.
-        return extyp == Union{} ? 0 : plus_saturate(20, argcost)
-    elseif head == :llvmcall
-        return plus_saturate(10, argcost) # a wild guess at typical cost
-    elseif head == :enter
+        extyp = line == -1 ? Any : src.ssavaluetypes[line]
+        return extyp === Union{} ? 0 : 20
+    elseif head === :return
+        a = ex.args[1]
+        if a isa Expr
+            return statement_cost(a, -1, src, spvals, slottypes, params)
+        end
+        return 0
+    elseif head === :(=)
+        if ex.args[1] isa GlobalRef
+            cost = 20
+        else
+            cost = 0
+        end
+        a = ex.args[2]
+        if a isa Expr
+            cost = plus_saturate(cost, statement_cost(a, -1, src, spvals, slottypes, params))
+        end
+        return cost
+    elseif head === :copyast
+        return 100
+    elseif head === :enter
         # try/catch is a couple function calls,
         # but don't inline functions with try/catch
         # since these aren't usually performance-sensitive functions,
         # and llvm is more likely to miscompile them when these functions get large
         return typemax(Int)
-    elseif head == :gotoifnot
+    elseif head === :gotoifnot
         target = ex.args[2]::Int
         # loops are generally always expensive
         # but assume that forward jumps are already counted for from
         # summing the cost of the not-taken branch
-        return target < line ? plus_saturate(40, argcost) : argcost
+        return target < line ? 40 : 0
     end
-    return argcost
+    return 0
 end
 
 function inline_worthy(body::Array{Any,1}, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any},
                        params::Params, cost_threshold::Integer=params.inline_cost_threshold)
-    bodycost = 0
+    bodycost::Int = 0
     for line = 1:length(body)
         stmt = body[line]
         if stmt isa Expr
@@ -363,9 +371,9 @@ function inline_worthy(body::Array{Any,1}, src::CodeInfo, spvals::SimpleVector,
             continue
         end
         bodycost = plus_saturate(bodycost, thiscost)
-        bodycost == typemax(Int) && return false
+        bodycost > cost_threshold && return false
     end
-    return bodycost <= cost_threshold
+    return true
 end
 
 function is_known_call(e::Expr, @nospecialize(func), src, spvals::SimpleVector, slottypes::Vector{Any} = empty_slottypes)

diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
@@ -585,6 +585,8 @@ end
 function singleton_type(@nospecialize(ft))
     if isa(ft, Const)
         return ft.val
+    elseif ft isa DataType && isdefined(ft, :instance)
+        return ft.instance
     end
     return nothing
 end

diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
@@ -188,7 +188,9 @@ add_tfunc(checked_smul_int, 2, 2, chk_tfunc, 10)
 add_tfunc(checked_umul_int, 2, 2, chk_tfunc, 10)
     ## other, misc intrinsics ##
 add_tfunc(Core.Intrinsics.llvmcall, 3, INT_INF,
-          (@nospecialize(fptr), @nospecialize(rt), @nospecialize(at), a...) -> instanceof_tfunc(rt)[1], 10)
+          # TODO: Lower this inlining cost. We currently need to prevent inlining llvmcall
+          # to avoid issues with its IR.
+          (@nospecialize(fptr), @nospecialize(rt), @nospecialize(at), a...) -> instanceof_tfunc(rt)[1], 1000)
 cglobal_tfunc(@nospecialize(fptr)) = Ptr{Cvoid}
 cglobal_tfunc(@nospecialize(fptr), @nospecialize(t)) = (isType(t) ? Ptr{t.parameters[1]} : Ptr)
 cglobal_tfunc(@nospecialize(fptr), t::Const) = (isa(t.val, Type) ? Ptr{t.val} : Ptr)

diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
@@ -211,7 +211,7 @@ is_valid_lvalue(@nospecialize(x)) = isa(x, Slot) || isa(x, GlobalRef)
 
 function is_valid_argument(@nospecialize(x))
     if isa(x, Slot) || isa(x, SSAValue) || isa(x, GlobalRef) || isa(x, QuoteNode) ||
-        (isa(x,Expr) && (x.head in (:static_parameter, :boundscheck, :copyast))) ||
+        (isa(x,Expr) && (x.head in (:static_parameter, :boundscheck))) ||
         isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
         isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
         return true
@@ -223,7 +223,7 @@ end
 
 function is_valid_rvalue(@nospecialize(x))
     is_valid_argument(x) && return true
-    if isa(x, Expr) && x.head in (:new, :the_exception, :isdefined, :call, :invoke, :foreigncall, :cfunction, :gc_preserve_begin)
+    if isa(x, Expr) && x.head in (:new, :the_exception, :isdefined, :call, :invoke, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
         return true
     end
     return false

diff --git a/base/deprecated.jl b/base/deprecated.jl
@@ -735,8 +735,9 @@ end
 @deprecate findprev(A, v, i::Integer) something(findprev(isequal(v), A, i), 0)
 @deprecate findlast(A, v)             something(findlast(isequal(v), A), 0)
 # to fix ambiguities introduced by deprecations
-findnext(pred::Function, A, i::Integer) = invoke(findnext, Tuple{Function, Any, Any}, pred, A, i)
-findprev(pred::Function, A, i::Integer) = invoke(findprev, Tuple{Function, Any, Any}, pred, A, i)
+# TODO: also remove find*_internal in array.jl
+findnext(pred::Function, A, i::Integer) = findnext_internal(pred, A, i)
+findprev(pred::Function, A, i::Integer) = findprev_internal(pred, A, i)
 # also remove deprecation warnings in find* functions in array.jl, sparse/sparsematrix.jl,
 # and sparse/sparsevector.jl.
 

diff --git a/base/strings/string.jl b/base/strings/string.jl
@@ -80,6 +80,7 @@ pointer(s::String) = unsafe_convert(Ptr{UInt8}, s)
 pointer(s::String, i::Integer) = pointer(s)+(i-1)
 
 ncodeunits(s::String) = Core.sizeof(s)
+sizeof(s::String) = Core.sizeof(s)
 codeunit(s::String) = UInt8
 
 @inline function codeunit(s::String, i::Integer)

diff --git a/test/compiler/compiler.jl b/test/compiler/compiler.jl
@@ -860,7 +860,7 @@ function break_21369()
         local fr
         while true
             fr = Base.StackTraces.lookup(bt[i])[end]
-            if !fr.from_c
+            if !fr.from_c && fr.func !== :error
                 break
             end
             i += 1

diff --git a/test/show.jl b/test/show.jl
@@ -1269,10 +1269,12 @@ function compute_annotations(f, types)
     join((strip(string(a, " "^(max_loc_method-length(a)), b)) for (a, b) in zip(la, lb)), '\n')
 end
 
-g_line() = leaf() # Deliberately not implemented to end up as a leaf after inlining
+@noinline leaffunc() = print()
+
+@inline g_line() = leaffunc()
 
 # Test that separate instances of the same function do not get merged
-function f_line()
+@inline function f_line()
    g_line()
    g_line()
    g_line()