Remove legacy pass manager pipelines. (#605)

JuliaGPU · Jul 18, 2024 · c9391bf · c9391bf
1 parent 4f63a1f
commit c9391bf
Showing 1 changed file with 1 addition and 283 deletions.
diff --git a/src/optim.jl b/src/optim.jl
@@ -1,15 +1,6 @@
 # LLVM IR optimization
 
-function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level=2)
-    optimize_newpm!(job, mod; opt_level)
-    # TODO: clean up
-    return
-end
-
-
-## new pm
-
-function optimize_newpm!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level)
+function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level=1)
     tm = llvm_machine(job.config.target)
 
     global current_job
@@ -292,279 +283,6 @@ function buildCleanupPipeline(mpm, @nospecialize(job::CompilerJob), opt_level)
 end
 
 
-## legacy pm
-
-function optimize_legacypm!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level)
-    triple = llvm_triple(job.config.target)
-    tm = llvm_machine(job.config.target)
-
-    global current_job
-    current_job = job
-
-    @dispose pm=ModulePassManager() begin
-        addTargetPasses!(pm, tm, triple)
-        addOptimizationPasses!(pm, opt_level)
-        run!(pm, mod)
-    end
-
-    # NOTE: we need to use multiple distinct pass managers to force pass ordering;
-    #       intrinsics should never get lowered before Julia has optimized them.
-    # XXX: why doesn't the barrier noop pass work here?
-
-    # lower intrinsics
-    @dispose pm=ModulePassManager() begin
-        addTargetPasses!(pm, tm, triple)
-
-        if !uses_julia_runtime(job)
-            lower_gc_frame!(pm)
-        end
-
-        if job.config.kernel
-            # GC lowering is the last pass that may introduce calls to the runtime library,
-            # and thus additional uses of the kernel state intrinsic.
-            # TODO: now that all kernel state-related passes are being run here, merge some?
-            add_kernel_state!(pm)
-            lower_kernel_state!(pm)
-            cleanup_kernel_state!(pm)
-        end
-
-        if !uses_julia_runtime(job)
-            # remove dead uses of ptls
-            aggressive_dce!(pm)
-            lower_ptls!(pm)
-        end
-
-        if uses_julia_runtime(job)
-            lower_exc_handlers!(pm)
-        end
-        # the Julia GC lowering pass also has some clean-up that is required
-        late_lower_gc_frame!(pm)
-        if uses_julia_runtime(job)
-            final_lower_gc!(pm)
-        end
-
-        remove_ni!(pm)
-        remove_julia_addrspaces!(pm)
-
-        if uses_julia_runtime(job)
-            # We need these two passes and the instcombine below
-            # after GC lowering to let LLVM do some constant propagation on the tags.
-            # and remove some unnecessary write barrier checks.
-            gvn!(pm)
-            sccp!(pm)
-            # Remove dead use of ptls
-            dce!(pm)
-            LLVM.Interop.lower_ptls!(pm, dump_native(job))
-            instruction_combining!(pm)
-            # Clean up write barrier and ptls lowering
-            cfgsimplification!(pm)
-        end
-
-        # Julia's operand bundles confuse the inliner, so repeat here now they are gone.
-        # FIXME: we should fix the inliner so that inlined code gets optimized early-on
-        always_inliner!(pm)
-
-        # some of Julia's optimization passes happen _after_ lowering intrinsics
-        combine_mul_add!(pm)
-        div_rem_pairs!(pm)
-
-        run!(pm, mod)
-    end
-
-    # target-specific optimizations
-    optimize_module!(job, mod)
-
-    # we compile a module containing the entire call graph,
-    # so perform some interprocedural optimizations.
-    #
-    # for some reason, these passes need to be distinct from the regular optimization chain,
-    # or certain values (such as the constant arrays used to populare llvm.compiler.user ad
-    # part of the LateLowerGCFrame pass) aren't collected properly.
-    #
-    # these might not always be safe, as Julia's IR metadata isn't designed for IPO.
-    @dispose pm=ModulePassManager() begin
-        addTargetPasses!(pm, tm, triple)
-
-        # simplify function calls that don't use the returned value
-        dead_arg_elimination!(pm)
-
-        run!(pm, mod)
-    end
-
-    return
-end
-
-function addTargetPasses!(pm, tm, triple)
-    add_library_info!(pm, triple)
-    add_transform_info!(pm, tm)
-end
-
-# Based on Julia's optimization pipeline, minus the SLP and loop vectorizers.
-function addOptimizationPasses!(pm, opt_level)
-    # compare with the using Julia's optimization pipeline directly:
-    #ccall(:jl_add_optimization_passes, Cvoid,
-    #      (LLVM.API.LLVMPassManagerRef, Cint, Cint),
-    #      pm, opt_level, #=lower_intrinsics=# 0)
-    #return
-
-    # NOTE: LLVM 12 disabled the hoisting of common instruction
-    #       before loop vectorization (https://reviews.llvm.org/D84108).
-    #
-    #       This is re-enabled with calls to cfg_simplify here,
-    #       to merge allocations and sometimes eliminate them,
-    #       since AllocOpt does not handle PhiNodes.
-    #       Enable this instruction hoisting because of this and Union benchmarks.
-
-    constant_merge!(pm)
-
-    if opt_level < 2
-        cpu_features!(pm)
-        if opt_level == 1
-            instruction_simplify!(pm)
-        end
-        if LLVM.version() >= v"12"
-            cfgsimplification!(pm; hoist_common_insts=true)
-        else
-            cfgsimplification!(pm)
-        end
-        if opt_level == 1
-            scalar_repl_aggregates!(pm)
-            instruction_combining!(pm)
-            early_cse!(pm)
-            # maybe add GVN?
-            # also try GVNHoist and GVNSink
-        end
-        mem_cpy_opt!(pm)
-        always_inliner!(pm) # Respect always_inline
-        lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
-        return
-    end
-
-    propagate_julia_addrsp!(pm)
-    scoped_no_alias_aa!(pm)
-    type_based_alias_analysis!(pm)
-    if opt_level >= 3
-        basic_alias_analysis!(pm)
-    end
-    if LLVM.version() >= v"12"
-        cfgsimplification!(pm; hoist_common_insts=true)
-    else
-        cfgsimplification!(pm)
-    end
-    dce!(pm)
-    scalar_repl_aggregates!(pm)
-
-    #mem_cpy_opt!(pm)
-
-    always_inliner!(pm) # Respect always_inline
-
-    # Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard
-    # time merging the `alloca` for the unboxed data and the `alloca` created by
-    # the `alloc_opt` pass.
-
-    alloc_opt!(pm)
-    # consider AggressiveInstCombinePass at optlevel > 2
-    instruction_combining!(pm)
-    if LLVM.version() >= v"12"
-        cfgsimplification!(pm; hoist_common_insts=true)
-    else
-        cfgsimplification!(pm)
-    end
-    cpu_features!(pm)
-    scalar_repl_aggregates!(pm)
-    # SROA can duplicate PHI nodes which can block LowerSIMD
-    instruction_combining!(pm)
-    jump_threading!(pm)
-    correlated_value_propagation!(pm)
-
-    reassociate!(pm)
-
-    early_cse!(pm)
-
-    # Load forwarding above can expose allocations that aren't actually used
-    # remove those before optimizing loops.
-    alloc_opt!(pm)
-    loop_rotate!(pm)
-    # moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
-
-    # LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
-    lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
-    licm!(pm)
-    julia_licm!(pm)
-    if LLVM.version() >= v"15"
-        simple_loop_unswitch_legacy!(pm)
-    else
-        # XXX: simple loop unswitch is available on older versions of LLVM too,
-        #      but using this pass instead of the old one breaks Metal.jl.
-        loop_unswitch!(pm)
-    end
-    licm!(pm)
-    julia_licm!(pm)
-    inductive_range_check_elimination!(pm)
-    # Subsequent passes not stripping metadata from terminator
-    instruction_simplify!(pm)
-    loop_idiom!(pm)
-    ind_var_simplify!(pm)
-    loop_deletion!(pm)
-    loop_unroll!(pm) # TODO: in Julia createSimpleLoopUnroll
-
-    # Run our own SROA on heap objects before LLVM's
-    alloc_opt!(pm)
-    # Re-run SROA after loop-unrolling (useful for small loops that operate,
-    # over the structure of an aggregate)
-    scalar_repl_aggregates!(pm)
-    # might not be necessary:
-    instruction_simplify!(pm)
-
-    gvn!(pm)
-    mem_cpy_opt!(pm)
-    sccp!(pm)
-
-    # These next two passes must come before IRCE to eliminate the bounds check in #43308
-    correlated_value_propagation!(pm)
-    dce!(pm)
-
-    inductive_range_check_elimination!(pm)  # Must come between the two GVN passes
-
-    # Run instcombine after redundancy elimination to exploit opportunities
-    # opened up by them.
-    # This needs to be InstCombine instead of InstSimplify to allow
-    # loops over Union-typed arrays to vectorize.
-    instruction_combining!(pm)
-    jump_threading!(pm)
-    if opt_level >= 3
-        gvn!(pm)    # Must come after JumpThreading and before LoopVectorize
-    end
-    dead_store_elimination!(pm)
-
-    # More dead allocation (store) deletion before loop optimization
-    # consider removing this:
-    alloc_opt!(pm)
-    # see if all of the constant folding has exposed more loops
-    # to simplification and deletion
-    # this helps significantly with cleaning up iteration
-    cfgsimplification!(pm)  # See note above, don't hoist instructions before LV
-    loop_deletion!(pm)
-    instruction_combining!(pm)
-    loop_vectorize!(pm)
-    loop_load_elimination!(pm)
-    # Cleanup after LV pass
-    instruction_combining!(pm)
-    if LLVM.version() >= v"12"
-        cfgsimplification!(pm; # Aggressive CFG simplification
-            forward_switch_cond_to_phi=true,
-            convert_switch_to_lookup_table=true,
-            need_canonical_loop=true,
-            hoist_common_insts=true,
-            #sink_common_insts=true # FIXME: Causes assertion in llvm-late-lowering
-        )
-    else
-        cfgsimplification!(pm)
-    end
-
-    aggressive_dce!(pm)
-end
-
 
 ## custom passes