package analysis, enter analysis from method signature (#101)

* wip: package analysis, enter analysis from method signature * fix report uniqify logic when printing * configurable * test
aviatesk · Apr 5, 2021 · cabe156 · cabe156
1 parent e881971
commit cabe156
Show file tree

Hide file tree

Showing 14 changed files with 278 additions and 83 deletions.
diff --git a/.JET.toml b/.JET.toml
@@ -1 +1,2 @@
+analyze_from_definitions = true
 concretization_patterns = ["EGAL_TYPES = x_", "_JET_CONFIGURATIONS = x_"]
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
@@ -128,9 +128,13 @@ SUITE["invalidation"] = @jetbenchmarkable (@analyze_call println(QuoteNode(nothi
     end
 end
 SUITE["self profiling"] = @jetbenchmarkable(
-    analyze_call(JET.virtual_process!,
-                 (AbstractString, AbstractString, Module, JET.JETInterpreter, JET.ToplevelConfig,),
-                 ),
+    analyze_call(JET.virtual_process, (AbstractString,
+                                       AbstractString,
+                                       Module,
+                                       JET.JETInterpreter,
+                                       JET.ToplevelConfig,
+                                       Module,
+                                       )),
     setup = begin
         using JET
         @analyze_call identity(nothing)

diff --git a/docs/src/internals.md b/docs/src/internals.md
@@ -23,7 +23,7 @@ JET.AbstractGlobal
 ## Top-level Analysis
 
 ```@docs
-JET.virtual_process!
+JET.virtual_process
 JET.ConcreteInterpreter
 JET.partially_interpret!
 ```

diff --git a/docs/src/usages.md b/docs/src/usages.md
@@ -11,22 +11,22 @@
 
 JET can analyze your "top-level" code.
 This means your can just give your Julia file or code to JET and get error reports.
-[`report_and_watch_file`](@ref), [`report_file`](@ref) and [`report_text`](@ref) are the main entry points for that.
+[`report_file`](@ref), [`report_and_watch_file`](@ref) and [`report_text`](@ref) are the main entry points for that.
 
 JET will analyze your code "half-statically" – JET will selectively interpret "top-level definitions" (like a function definition)
 and try to simulate Julia's top-level code execution, while it tries to avoid executing any other parts of code like function calls,
 but analyze them using [abstract interpretation](https://en.wikipedia.org/wiki/Abstract_interpretation) (this is a part where JET "statically" analyzes your code).
-If you're interested in how JET selects "top-level definitions", please see [`JET.virtual_process!`](@ref).
+If you're interested in how JET selects "top-level definitions", please see [`JET.virtual_process`](@ref).
 
 !!! warning
     Because JET will actually interpret "top-level definitions" in your code, it certainly _runs_ your code.
     So we should note that JET can cause some side effects from your code; for example JET will try to expand all the
     macros used in your code, and so the side effects involved with macro expansions will also happen in JET's analysis process.
 
 ```@docs
-report_text
 report_file
 report_and_watch_file
+report_text
 ```
 
 

diff --git a/src/JET.jl b/src/JET.jl
@@ -134,7 +134,8 @@ import JuliaInterpreter:
     maybe_evaluate_builtin,
     collect_args,
     is_return,
-    is_quotenode_egal
+    is_quotenode_egal,
+    @lookup
 
 import MacroTools: @capture
 
@@ -449,6 +450,16 @@ This function will look for `$CONFIG_FILE_NAME` configuration file in the direct
 When found, the configurations specified in the file will overwrite the given `jetconfigs`.
 See [Configuration File](@ref) for more details.
 
+!!! tip
+    When you want to analyze your package, but any file using it isn't available, the
+      `analyze_from_definitions` option can be useful (see [`ToplevelConfig`](@ref)'s `analyze_from_definitions` option). \\
+    For example, JET can analyze JET itself like below:
+    ```julia
+    # from the root directory of JET.jl
+    julia> report_file("src/JET";
+                       analyze_from_definitions = true)
+    ```
+
 !!! note
     This function will enable the toplevel logger by default with the default logging level
     (see [Logging Configurations](@ref) for more details).
@@ -585,13 +596,13 @@ function analyze_text(text::AbstractString,
                       jetconfigs...)
     interp = JETInterpreter(; jetconfigs...)
     config = ToplevelConfig(; jetconfigs...)
-    return virtual_process!(text,
-                            filename,
-                            actualmod,
-                            interp,
-                            config,
-                            virtualmod,
-                            )
+    return virtual_process(text,
+                           filename,
+                           actualmod,
+                           interp,
+                           config,
+                           virtualmod,
+                           )
 end
 
 function analyze_toplevel!(interp::JETInterpreter, src::CodeInfo)

diff --git a/src/abstractinterpretation.jl b/src/abstractinterpretation.jl
@@ -77,7 +77,7 @@ end
 
 An overload for `abstract_call_gf_by_type(interp::JETInterpreter, ...)`, which keeps
   inference on non-concrete call sites in a toplevel frame created by
-  [`virtual_process!`](@ref).
+  [`virtual_process`](@ref).
 """
 function CC.bail_out_toplevel_call(interp::JETInterpreter, @nospecialize(sig), sv)
     return isa(sv.linfo.def, Module) && !isdispatchtuple(sig) && !istoplevel(interp, sv)

diff --git a/src/abstractinterpreterinterface.jl b/src/abstractinterpreterinterface.jl
@@ -298,10 +298,10 @@ end
                                          analysis_params = nothing,
                                          inf_params      = nothing,
                                          opt_params      = nothing,
-                                         concretized     = BitVector(),
-                                         toplevelmod     = __toplevelmod__,
-                                         toplevelmods    = Set{Module}(),
-                                         global_slots    = Dict{Int,Symbol}(),
+                                         concretized     = _CONCRETIZED,
+                                         toplevelmod     = _TOPLEVELMOD,
+                                         toplevelmods    = _TOPLEVELMODS,
+                                         global_slots    = _GLOBAL_SLOTS,
                                          logger          = nothing,
                                          depth           = 0,
                                          jetconfigs...)
@@ -328,6 +328,10 @@ end
 
 # dummies for non-toplevel analysis
 module __toplevelmod__ end
+const _CONCRETIZED  = BitVector()
+const _TOPLEVELMOD  = __toplevelmod__
+const _TOPLEVELMODS = Set{Module}()
+const _GLOBAL_SLOTS = Dict{Int,Symbol}()
 
 # constructor for sequential toplevel JET analysis
 function JETInterpreter(interp::JETInterpreter, concretized, toplevelmod)

diff --git a/src/legacy/abstractinterpretation b/src/legacy/abstractinterpretation
@@ -6,7 +6,7 @@
 
 the aims of this overload are:
 1. report `NoMethodErrorReport` on empty method signature matching
-2. keep inference on non-concrete call sites in a toplevel frame created by [`virtual_process!`](@ref)
+2. keep inference on non-concrete call sites in a toplevel frame created by [`virtual_process`](@ref)
 3. don't bail out even after the current return type grows up to `Any` and collects as much
    error points as possible; of course it slows down inference performance, but hopefully it
    stays to be "practical" speed (because the number of matching methods is limited beforehand)

diff --git a/src/print.jl b/src/print.jl
@@ -253,8 +253,9 @@ function print_reports(io::IO,
                        jetconfigs...)
     config = PrintConfig(; jetconfigs...)
 
-    # XXX the same hack is already imposed in `_typeinf`, so we may not need this
-    reports = unique(get_identity_key, reports)
+    # here we more aggressively uniqify reports, ignoring the difference between different `MethodInstance`s
+    # as far as the report location and its signature are the same
+    reports = unique(print_identity_key, reports)
 
     if isempty(reports)
         if config.print_inference_success
@@ -283,6 +284,24 @@ function print_reports(io::IO,
     return true
 end
 
+@withmixedhash struct VirtualFrameNoLinfo
+    file::Symbol
+    line::Int
+    sig::Vector{Any}
+    # linfo::MethodInstance
+end
+VirtualFrameNoLinfo(vf::VirtualFrame) = VirtualFrameNoLinfo(vf.file, vf.line, vf.sig)
+
+@withmixedhash struct PrintIdentityKey
+    T::Type{<:InferenceErrorReport}
+    sig::Vector{Any}
+    # entry_frame::VirtualFrame
+    error_frame::VirtualFrameNoLinfo
+end
+
+print_identity_key(report::T) where {T<:InferenceErrorReport} =
+    PrintIdentityKey(T, report.sig, #=VirtualFrameNoLinfo(first(report.st)),=# VirtualFrameNoLinfo(last(report.st)))
+
 # traverse abstract call stack, print frames
 function print_report(io, report::InferenceErrorReport, config, wrote_linfos, depth = 1)
     if length(report.st) == depth # error here

diff --git a/src/reports.jl b/src/reports.jl
@@ -130,16 +130,6 @@ function restore_cached_report(cache::InferenceErrorReportCache)
     return T(st, cache.msg, cache.sig, cache.spec_args)::InferenceErrorReport
 end
 
-@withmixedhash struct IdentityKey
-    T::Type{<:InferenceErrorReport}
-    sig::Vector{Any}
-    # entry_frame::VirtualFrame
-    error_frame::VirtualFrame
-end
-
-get_identity_key(report::T) where {T<:InferenceErrorReport} =
-    IdentityKey(T, report.sig, #=first(report.st),=# last(report.st))
-
 macro reportdef(ex, kwargs...)
     T = esc(first(ex.args))
     args = map(ex.args) do x

diff --git a/src/typeinfer.jl b/src/typeinfer.jl
@@ -122,7 +122,7 @@ function CC._typeinf(interp::JETInterpreter, frame::InferenceState)
 
     # XXX this is a dirty fix for performance problem, we need more "proper" fix
     # https://github.com/aviatesk/JET.jl/issues/75
-    unique!(get_identity_key, reports)
+    unique!(report_identity_key, reports)
 
     reports_after = Set(reports)
 
@@ -250,6 +250,16 @@ end
 
 is_unreachable(@nospecialize(x)) = isa(x, ReturnNode) && !isdefined(x, :val)
 
+@withmixedhash struct ReportIdentityKey
+    T::Type{<:InferenceErrorReport}
+    sig::Vector{Any}
+    # entry_frame::VirtualFrame
+    error_frame::VirtualFrame
+end
+
+report_identity_key(report::T) where {T<:InferenceErrorReport} =
+    ReportIdentityKey(T, report.sig, #=first(report.st),=# last(report.st))
+
 # basically same as `is_throw_call`, but also toplevel module handling added
 function is_throw_call_expr(interp::JETInterpreter, frame::InferenceState, @nospecialize(e))
     if isa(e, Expr)
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		analyze_from_definitions = true
		concretization_patterns = ["EGAL_TYPES = x_", "_JET_CONFIGURATIONS = x_"]