From 8b8ad0bdad4d50c4f21a3ac82922f6c78fd559e8 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 24 Jul 2023 15:45:15 -0400 Subject: [PATCH 01/14] parse to LoopID --- src/codegen.cpp | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 119b9fcafa2a4..a840775cf8d31 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1609,6 +1609,7 @@ class jl_codectx_t { jl_codegen_params_t &emission_context; llvm::MapVector call_targets; Function *f = NULL; + MDNode* LoopID = NULL; // local var info. globals are not in here. std::vector slots; std::map phic_slots; @@ -5773,16 +5774,22 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_ } else if (head == jl_loopinfo_sym) { // parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4)) + // to LLVM LoopID SmallVector MDs; + + // Reserve first location for self reference to the LoopID metadata node. + TempMDTuple TempNode = MDNode::getTemporary(ctx.builder.getContext(), None); + MDs.push_back(TempNode.get()); + for (int i = 0, ie = nargs; i < ie; ++i) { Metadata *MD = to_md_tree(args[i], ctx.builder.getContext()); if (MD) MDs.push_back(MD); } - MDNode* MD = MDNode::get(ctx.builder.getContext(), MDs); - CallInst *I = ctx.builder.CreateCall(prepare_call(jl_loopinfo_marker_func)); - I->setMetadata("julia.loopinfo", MD); + ctx.LoopID = MDNode::getDistinct(ctx.builder.getContext(), MDs); + // Replace the temporary node with a self-reference. + ctx.LoopID->replaceOperandWith(0, ctx.LoopID); return jl_cgval_t(); } else if (head == jl_leave_sym || head == jl_coverageeffect_sym @@ -8399,7 +8406,11 @@ static jl_llvm_functions_t if (jl_is_gotonode(stmt)) { int lname = jl_gotonode_label(stmt); come_from_bb[cursor+1] = ctx.builder.GetInsertBlock(); - ctx.builder.CreateBr(BB[lname]); + auto br = ctx.builder.CreateBr(BB[lname]); + if (ctx.LoopID) { + br->setMetadata(LLVMContext::MD_loop, ctx.LoopID); + ctx.LoopID = NULL; + } find_next_stmt(lname - 1); continue; } @@ -8421,6 +8432,11 @@ static jl_llvm_functions_t ctx.builder.CreateBr(ifnot); else ctx.builder.CreateCondBr(isfalse, ifnot, ifso); + + if (ctx.LoopID) { + ctx.LoopID = NULL; + jl_error("LoopInfo found for gotoifnot branch"); + } find_next_stmt(cursor + 1); continue; } From 3ad712fa061a54aa03d9c89a644d825692d59d11 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 24 Jul 2023 17:30:28 -0400 Subject: [PATCH 02/14] Turn llvm-simdloop into a loop pass --- src/jl_exported_funcs.inc | 2 +- src/llvm-julia-passes.inc | 2 +- src/llvm-simdloop.cpp | 231 +++++++++++++++----------------------- src/passes.h | 10 +- src/pipeline.cpp | 2 +- 5 files changed, 97 insertions(+), 150 deletions(-) diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 7d54d13d699d0..704235f3b21eb 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -582,7 +582,6 @@ YY(LLVMExtraAddCPUFeaturesPass) \ YY(LLVMExtraMPMAddCPUFeaturesPass) \ YY(LLVMExtraMPMAddRemoveNIPass) \ - YY(LLVMExtraMPMAddLowerSIMDLoopPass) \ YY(LLVMExtraMPMAddMultiVersioningPass) \ YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \ YY(LLVMExtraMPMAddRemoveAddrspacesPass) \ @@ -596,6 +595,7 @@ YY(LLVMExtraFPMAddGCInvariantVerifierPass) \ YY(LLVMExtraFPMAddFinalLowerGCPass) \ YY(LLVMExtraLPMAddJuliaLICMPass) \ + YY(LLVMExtraLPMAddLowerSIMDLoopPass) \ YY(JLJITGetLLVMOrcExecutionSession) \ YY(JLJITGetJuliaOJIT) \ YY(JLJITGetExternalJITDylib) \ diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc index 198ab0e868538..bd89c01c6fdfe 100644 --- a/src/llvm-julia-passes.inc +++ b/src/llvm-julia-passes.inc @@ -2,7 +2,6 @@ #ifdef MODULE_PASS MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass()) MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass()) -MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass()) MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass()) MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass()) MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass()) @@ -24,4 +23,5 @@ FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass()) //Loop passes #ifdef LOOP_PASS LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass()) +LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass()) #endif diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index a2ac6fcfbf7a0..d7c999d104964 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -73,7 +74,7 @@ static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFE /// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv, /// mark the ops as permitting reassociation/commuting. /// As of LLVM 4.0, FDiv is not handled by the loop vectorizer -static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT +static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT { typedef SmallVector chainVector; chainVector chain; @@ -84,7 +85,7 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe // Find the user of instruction I that is within loop L. for (User *UI : I->users()) { /*}*/ Instruction *U = cast(UI); - if (L->contains(U)) { + if (L.contains(U)) { if (J) { LLVM_DEBUG(dbgs() << "LSL: not a reduction var because op has two internal uses: " << *I << "\n"); REMARK([&]() { @@ -157,122 +158,86 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe MaxChainLength.updateMax(length); } -static bool markLoopInfo(Module &M, Function *marker, function_ref GetLI) JL_NOTSAFEPOINT +static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT { - bool Changed = false; - std::vector ToDelete; - for (User *U : marker->users()) { - ++TotalMarkedLoops; - Instruction *I = cast(U); - ToDelete.push_back(I); - - BasicBlock *B = I->getParent(); - OptimizationRemarkEmitter ORE(B->getParent()); - LoopInfo &LI = GetLI(*B->getParent()); - Loop *L = LI.getLoopFor(B); - if (!L) { - I->removeFromParent(); - continue; - } + MDNode *LoopID = L.getLoopID(); + if (!LoopID) + return false; + bool simd = false; + bool ivdep = false; + + BasicBlock *Lh = L.getHeader(); + LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n"); + + if (LoopID->getNumOperands() <= 1) { + LLVM_DEBUG(dbgs() << "LSL: Returning early due to few operands" << *LoopID << "\n"); + return false; + } + MDNode *MDs = dyn_cast(LoopID->getOperand(1)); - LLVM_DEBUG(dbgs() << "LSL: loopinfo marker found\n"); - bool simd = false; - bool ivdep = false; - SmallVector MDs; - - BasicBlock *Lh = L->getHeader(); - LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n"); - - // Reserve first location for self reference to the LoopID metadata node. - TempMDTuple TempNode = MDNode::getTemporary(Lh->getContext(), None); - MDs.push_back(TempNode.get()); - - // Walk `julia.loopinfo` metadata and filter out `julia.simdloop` and `julia.ivdep` - if (I->hasMetadataOtherThanDebugLoc()) { - MDNode *JLMD= I->getMetadata("julia.loopinfo"); - if (JLMD) { - LLVM_DEBUG(dbgs() << "LSL: has julia.loopinfo metadata with " << JLMD->getNumOperands() <<" operands\n"); - for (unsigned i = 0, ie = JLMD->getNumOperands(); i < ie; ++i) { - Metadata *Op = JLMD->getOperand(i); - const MDString *S = dyn_cast(Op); - if (S) { - LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n"); - if (S->getString().startswith("julia")) { - if (S->getString().equals("julia.simdloop")) - simd = true; - if (S->getString().equals("julia.ivdep")) - ivdep = true; - continue; - } - } - MDs.push_back(Op); - } + if (!MDs) { + LLVM_DEBUG(dbgs() << "LSL: Returning early due to no Metadata attached" << *LoopID << "\n"); + return false; + } + + for (unsigned i = 0, ie = MDs->getNumOperands(); i < ie; ++i) { + Metadata *Op = MDs->getOperand(i); + const MDString *S = dyn_cast(Op); + if (S) { + LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n"); + if (S->getString().startswith("julia")) { + if (S->getString().equals("julia.simdloop")) + simd = true; + if (S->getString().equals("julia.ivdep")) + ivdep = true; + continue; } } + } - LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n"); + LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n"); + if (!simd && !ivdep) + return false; - REMARK([=]() { - return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", I->getDebugLoc(), B) - << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }"; - }); + // TODO: Can we drop `julia.simdloop` and `julia.ivdep`? - MDNode *n = L->getLoopID(); - if (n) { - // Loop already has a LoopID so copy over Metadata - // original loop id is operand 0 - for (unsigned i = 1, ie = n->getNumOperands(); i < ie; ++i) { - Metadata *Op = n->getOperand(i); - MDs.push_back(Op); - } - } - MDNode *LoopID = MDNode::getDistinct(Lh->getContext(), MDs); - // Replace the temporary node with a self-reference. - LoopID->replaceOperandWith(0, LoopID); - L->setLoopID(LoopID); - assert(L->getLoopID()); + // REMARK([=]() { + // return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", L.getLoopLatch()->getTerminator()->getDebugLoc(), L) + // << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }"; + // }); + // If ivdep is true we assume that there is no memory dependency between loop iterations + // This is a fairly strong assumption and does often not hold true for generic code. + if (ivdep) { + ++IVDepLoops; MDNode *m = MDNode::get(Lh->getContext(), ArrayRef(LoopID)); - - // If ivdep is true we assume that there is no memory dependency between loop iterations - // This is a fairly strong assumption and does often not hold true for generic code. - if (ivdep) { - ++IVDepLoops; - // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop. - for (BasicBlock *BB : L->blocks()) { - for (Instruction &I : *BB) { - if (I.mayReadOrWriteMemory()) { - ++IVDepInstructions; - I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m); - } - } + // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop. + for (BasicBlock *BB : L.blocks()) { + for (Instruction &I : *BB) { + if (I.mayReadOrWriteMemory()) { + ++IVDepInstructions; + I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m); + } } - assert(L->isAnnotatedParallel()); } + assert(L.isAnnotatedParallel()); + } - if (simd) { - ++SimdLoops; - // Mark floating-point reductions as okay to reassociate/commute. - for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) { - if (PHINode *Phi = dyn_cast(I)) - enableUnsafeAlgebraIfReduction(Phi, L, ORE); - else - break; - } + if (simd) { + ++SimdLoops; + // Mark floating-point reductions as okay to reassociate/commute. + for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) { + if (PHINode *Phi = dyn_cast(I)) + enableUnsafeAlgebraIfReduction(Phi, L, ORE); + else + break; } - - I->removeFromParent(); - - Changed = true; } - for (Instruction *I : ToDelete) - I->deleteValue(); - marker->eraseFromParent(); #ifdef JL_VERIFY_PASSES assert(!verifyLLVMIR(M)); #endif - return Changed; + return true; } } // end anonymous namespace @@ -283,23 +248,19 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref(M).getManager(); +PreservedAnalyses LowerSIMDLoopPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) - auto GetLI = [&FAM](Function &F) -> LoopInfo & { - return FAM.getResult(F); - }; - - if (markLoopInfo(M, loopinfo_marker, GetLI)) { - auto preserved = PreservedAnalyses::allInSet(); - preserved.preserve(); +{ + OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); + if (processLoop(L, ORE)) { +#ifdef JL_DEBUG_BUILD + if (AR.MSSA) + AR.MSSA->verifyMemorySSA(); +#endif + auto preserved = getLoopPassPreservedAnalyses(); + preserved.preserveSet(); + preserved.preserve(); return preserved; } @@ -307,37 +268,23 @@ PreservedAnalyses LowerSIMDLoopPass::run(Module &M, ModuleAnalysisManager &AM) } namespace { -class LowerSIMDLoopLegacy : public ModulePass { - //LowerSIMDLoop Impl; +class LowerSIMDLoopLegacy : public LoopPass { public: - static char ID; + static char ID; - LowerSIMDLoopLegacy() : ModulePass(ID) { - } - - bool runOnModule(Module &M) override { - bool Changed = false; - - Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker"); - - auto GetLI = [this](Function &F) JL_NOTSAFEPOINT -> LoopInfo & { - return getAnalysis(F).getLoopInfo(); - }; - - if (loopinfo_marker) - Changed |= markLoopInfo(M, loopinfo_marker, GetLI); + LowerSIMDLoopLegacy() : LoopPass(ID) { + } - return Changed; - } + bool runOnLoop(Loop *L, LPPassManager &LPM) override + { + OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); + return processLoop(*L, ORE); + } - void getAnalysisUsage(AnalysisUsage &AU) const override - { - ModulePass::getAnalysisUsage(AU); - AU.addRequired(); - AU.addPreserved(); - AU.setPreservesCFG(); - } + void getAnalysisUsage(AnalysisUsage &AU) const override { + getLoopAnalysisUsage(AU); + } }; } // end anonymous namespace diff --git a/src/passes.h b/src/passes.h index 1f4aa2e4bb89a..9c3b0421670b5 100644 --- a/src/passes.h +++ b/src/passes.h @@ -62,11 +62,6 @@ struct RemoveNIPass : PassInfoMixin { static bool isRequired() { return true; } }; -struct LowerSIMDLoopPass : PassInfoMixin { - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; - static bool isRequired() { return true; } -}; - struct MultiVersioningPass : PassInfoMixin { bool external_use; MultiVersioningPass(bool external_use = false) : external_use(external_use) {} @@ -103,6 +98,11 @@ struct JuliaLICMPass : PassInfoMixin { LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT; }; +struct LowerSIMDLoopPass : PassInfoMixin { + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT; +}; + #define MODULE_MARKER_PASS(NAME) \ struct NAME##MarkerPass : PassInfoMixin { \ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \ diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 6830252997460..3ed40f018f23f 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -402,6 +402,7 @@ static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB FPM.addPass(BeforeLoopOptimizationMarkerPass()); { LoopPassManager LPM; + LPM.addPass(LowerSIMDLoopPass()); if (O.getSpeedupLevel() >= 2) { LPM.addPass(LoopRotatePass()); } @@ -562,7 +563,6 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL buildEarlySimplificationPipeline(MPM, PB, O, options); MPM.addPass(AlwaysInlinerPass()); buildEarlyOptimizerPipeline(MPM, PB, O, options); - MPM.addPass(LowerSIMDLoopPass()); { FunctionPassManager FPM; buildLoopOptimizerPipeline(FPM, PB, O, options); From e1ecb2f2d5bb771e3d98a574abda86ab71f6aadb Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 24 Jul 2023 17:35:06 -0400 Subject: [PATCH 03/14] remove loopinfo_marker func --- src/codegen.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index a840775cf8d31..ecd0c33c375d4 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -992,14 +992,7 @@ static const auto jl_typeof_func = new JuliaFunction<>{ Attributes(C, {Attribute::NonNull}), None); }, }; -static const auto jl_loopinfo_marker_func = new JuliaFunction<>{ - "julia.loopinfo_marker", - [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); }, - [](LLVMContext &C) { return AttributeList::get(C, - Attributes(C, {Attribute::ReadOnly, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}), - AttributeSet(), - None); }, -}; + static const auto jl_write_barrier_func = new JuliaFunction<>{ "julia.write_barrier", [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), @@ -9179,7 +9172,6 @@ static void init_jit_functions(void) add_named_global(jl_object_id__func, &jl_object_id_); add_named_global(jl_alloc_obj_func, (void*)NULL); add_named_global(jl_newbits_func, (void*)jl_new_bits); - add_named_global(jl_loopinfo_marker_func, (void*)NULL); add_named_global(jl_typeof_func, (void*)NULL); add_named_global(jl_write_barrier_func, (void*)NULL); add_named_global(jldlsym_func, &jl_load_and_lookup); From 1c21def2cf509ac20e4b1527c9660a52c281ef2a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 25 Jul 2023 09:59:46 -0400 Subject: [PATCH 04/14] fix some of the llvmpasses test --- test/llvmpasses/loopinfo.jl | 26 +++++++++++--------------- test/llvmpasses/parsing.ll | 2 +- test/llvmpasses/simdloop.ll | 30 ++++++++++++------------------ 3 files changed, 24 insertions(+), 34 deletions(-) diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl index b9b388c73d0c5..0d94a6e14e56a 100644 --- a/test/llvmpasses/loopinfo.jl +++ b/test/llvmpasses/loopinfo.jl @@ -3,7 +3,7 @@ # RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll # RUN: cat %t/module.ll | FileCheck %s # RUN: cat %t/module.ll | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S - | FileCheck %s -check-prefix=LOWER -# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S - | FileCheck %s -check-prefix=LOWER +# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S - | FileCheck %s -check-prefix=LOWER # RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll # RUN: cat %t/module.ll | FileCheck %s -check-prefix=FINAL @@ -27,10 +27,9 @@ function simdf(X) acc = zero(eltype(X)) @simd for x in X acc += x -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO:![0-9]+]] +# CHECK: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]] # LOWER-NOT: llvm.mem.parallel_loop_access # LOWER: fadd reassoc contract double -# LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]] # FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double> end @@ -43,9 +42,8 @@ function simdf2(X) acc = zero(eltype(X)) @simd ivdep for x in X acc += x -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO2:![0-9]+]] +# CHECK: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]] # LOWER: llvm.mem.parallel_loop_access -# LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: fadd reassoc contract double # LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]] end @@ -61,8 +59,7 @@ end for i in 1:N iteration(i) $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.count"), 3))) -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]] -# LOWER-NOT: call void @julia.loopinfo_marker() +# CHECK: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]] # LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]] # FINAL: call {{(swiftcc )?}}void @j_iteration # FINAL: call {{(swiftcc )?}}void @j_iteration @@ -87,8 +84,7 @@ end iteration(i) end $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"),))) -# CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]] -# LOWER-NOT: call void @julia.loopinfo_marker() +# CHECK: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]] # LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]] # FINAL: call {{(swiftcc )?}}void @j_iteration # FINAL: call {{(swiftcc )?}}void @j_iteration @@ -119,14 +115,14 @@ end end ## Check all the MD nodes -# CHECK: [[LOOPINFO]] = !{!"julia.simdloop"} -# CHECK: [[LOOPINFO2]] = !{!"julia.simdloop", !"julia.ivdep"} -# CHECK: [[LOOPINFO3]] = !{[[LOOPUNROLL:![0-9]+]]} +# CHECK: [[LOOPID]] = distinct !{[[LOOPID]], !"julia.simdloop"} +# CHECK: [[LOOPID2]] = distinct !{[[LOOPID2]], !"julia.simdloop", !"julia.ivdep"} +# CHECK: [[LOOPID3]] = distinct !{[[LOOPID3]], [[LOOPUNROLL:![0-9]+]]} # CHECK: [[LOOPUNROLL]] = !{!"llvm.loop.unroll.count", i64 3} -# CHECK: [[LOOPINFO4]] = !{[[LOOPUNROLL2:![0-9]+]]} +# CHECK: [[LOOPID4]] = distinct !{[[LOOPID4]], [[LOOPUNROLL2:![0-9]+]]} # CHECK: [[LOOPUNROLL2]] = !{!"llvm.loop.unroll.full"} -# LOWER: [[LOOPID]] = distinct !{[[LOOPID]]} -# LOWER: [[LOOPID2]] = distinct !{[[LOOPID2]]} +# LOWER: [[LOOPID]] = distinct !{[[LOOPID]], !"julia.simdloop"} +# LOWER: [[LOOPID2]] = distinct !{[[LOOPID2]], !"julia.simdloop", !"julia.ivdep"} # LOWER: [[LOOPID3]] = distinct !{[[LOOPID3]], [[LOOPUNROLL:![0-9]+]]} # LOWER: [[LOOPUNROLL]] = !{!"llvm.loop.unroll.count", i64 3} # LOWER: [[LOOPID4]] = distinct !{[[LOOPID4]], [[LOOPUNROLL2:![0-9]+]]} diff --git a/test/llvmpasses/parsing.ll b/test/llvmpasses/parsing.ll index 26f0b6b63fbf2..6a5909ff5fd40 100644 --- a/test/llvmpasses/parsing.ll +++ b/test/llvmpasses/parsing.ll @@ -1,6 +1,6 @@ ; COM: NewPM-only test, tests for ability to parse Julia passes -; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,LowerSIMDLoop,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(JuliaLICM),GCInvariantVerifier,GCInvariantVerifier),LowerPTLSPass,LowerPTLSPass,JuliaMultiVersioning,JuliaMultiVersioning)' -S %s -o /dev/null +; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier,GCInvariantVerifier),LowerPTLSPass,LowerPTLSPass,JuliaMultiVersioning,JuliaMultiVersioning)' -S %s -o /dev/null define void @test() { ret void diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll index 929fbeea2c3f5..96f06ae68a8b3 100644 --- a/test/llvmpasses/simdloop.ll +++ b/test/llvmpasses/simdloop.ll @@ -1,12 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license ; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s ; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s - -declare void @julia.loopinfo_marker() +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='loop(LowerSIMDLoop)' -S %s | FileCheck %s ; CHECK-LABEL: @simd_test( define void @simd_test(double *%a, double *%b) { @@ -22,9 +20,8 @@ loop: %cval = fadd double %aval, %bval store double %cval, double *%bptr %nexti = add i64 %i, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !3 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop + br i1 %done, label %loopdone, label %loop, !llvm.loop !2 loopdone: ret void } @@ -42,9 +39,8 @@ loop: %nextv = fsub double %v, %aval ; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !3 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop + br i1 %done, label %loopdone, label %loop, !llvm.loop !2 loopdone: ret double %nextv } @@ -61,9 +57,8 @@ loop: %nextv = fsub double %v, %aval ; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !2 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop + br i1 %done, label %loopdone, label %loop, !llvm.loop !0 loopdone: ret double %nextv } @@ -82,20 +77,19 @@ for.body: ; preds = %for.body, %entry %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv store i32 %add, i32* %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - call void @julia.loopinfo_marker(), !julia.loopinfo !4 %exitcond = icmp eq i64 %indvars.iv.next, 48 ; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]] - br i1 %exitcond, label %for.end, label %for.body + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ; preds = %for.body %1 = load i32, i32* %a, align 4 ret i32 %1 } -!1 = !{} -!2 = !{!"julia.simdloop"} +!0 = distinct !{!0, !1} +!1 = !{!"julia.simdloop"} +!2 = distinct !{!2, !3} !3 = !{!"julia.simdloop", !"julia.ivdep"} -!4 = !{!"julia.simdloop", !"julia.ivdep", !5} -!5 = !{!"llvm.loop.vectorize.disable", i1 0} -; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[LOOP_DISABLE:![0-9]+]]} -; CHECK-NEXT: [[LOOP_DISABLE]] = !{!"llvm.loop.vectorize.disable", i1 false} +!4 = distinct !{!4, !5} +!5 = !{!"julia.simdloop", !"julia.ivdep", !6} +!6 = !{!"llvm.loop.vectorize.disable", i1 0} From 2d93513e204c769c6d057fda837d161117f7de3d Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 25 Jul 2023 10:02:14 -0400 Subject: [PATCH 05/14] allow loopID on condbr --- src/codegen.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index ecd0c33c375d4..912deba1fd961 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8421,14 +8421,15 @@ static jl_llvm_functions_t workstack.push_back(lname - 1); BasicBlock *ifnot = BB[lname]; BasicBlock *ifso = BB[cursor+2]; + Instruction *br; if (ifnot == ifso) - ctx.builder.CreateBr(ifnot); + br = ctx.builder.CreateBr(ifnot); else - ctx.builder.CreateCondBr(isfalse, ifnot, ifso); + br = ctx.builder.CreateCondBr(isfalse, ifnot, ifso); if (ctx.LoopID) { + br->setMetadata(LLVMContext::MD_loop, ctx.LoopID); ctx.LoopID = NULL; - jl_error("LoopInfo found for gotoifnot branch"); } find_next_stmt(cursor + 1); continue; From e60b7794b491db7117ed0b730219a1e91ac77a70 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 25 Jul 2023 14:33:59 -0400 Subject: [PATCH 06/14] Revert "fixup! Turn llvm-simdloop into a loop pass" This reverts commit 0cfa740a5f224ab0de070e0d31c11d7f6ed16669. --- src/llvm-simdloop.cpp | 15 ++------------- test/llvmpasses/simdloop.ll | 17 +++++++---------- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index d7c999d104964..13b00370c480b 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -169,19 +169,8 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT BasicBlock *Lh = L.getHeader(); LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n"); - if (LoopID->getNumOperands() <= 1) { - LLVM_DEBUG(dbgs() << "LSL: Returning early due to few operands" << *LoopID << "\n"); - return false; - } - MDNode *MDs = dyn_cast(LoopID->getOperand(1)); - - if (!MDs) { - LLVM_DEBUG(dbgs() << "LSL: Returning early due to no Metadata attached" << *LoopID << "\n"); - return false; - } - - for (unsigned i = 0, ie = MDs->getNumOperands(); i < ie; ++i) { - Metadata *Op = MDs->getOperand(i); + for (unsigned i = 0, ie = LoopID->getNumOperands(); i < ie; ++i) { + Metadata *Op = LoopID->getOperand(i); const MDString *S = dyn_cast(Op); if (S) { LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n"); diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll index 96f06ae68a8b3..ad3e125893068 100644 --- a/test/llvmpasses/simdloop.ll +++ b/test/llvmpasses/simdloop.ll @@ -21,7 +21,7 @@ loop: store double %cval, double *%bptr %nexti = add i64 %i, 1 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop, !llvm.loop !2 + br i1 %done, label %loopdone, label %loop, !llvm.loop !1 loopdone: ret void } @@ -40,7 +40,7 @@ loop: ; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 %done = icmp sgt i64 %nexti, 500 - br i1 %done, label %loopdone, label %loop, !llvm.loop !2 + br i1 %done, label %loopdone, label %loop, !llvm.loop !1 loopdone: ret double %nextv } @@ -79,17 +79,14 @@ for.body: ; preds = %for.body, %entry %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 48 ; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]] - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 for.end: ; preds = %for.body %1 = load i32, i32* %a, align 4 ret i32 %1 } -!0 = distinct !{!0, !1} -!1 = !{!"julia.simdloop"} -!2 = distinct !{!2, !3} -!3 = !{!"julia.simdloop", !"julia.ivdep"} -!4 = distinct !{!4, !5} -!5 = !{!"julia.simdloop", !"julia.ivdep", !6} -!6 = !{!"llvm.loop.vectorize.disable", i1 0} +!0 = distinct !{!0, !"julia.simdloop"} +!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"} +!2 = distinct !{!2, !"julia.simdloop", !"julia.ivdep", !3} +!3 = !{!"llvm.loop.vectorize.disable", i1 0} From 2838d7bc64481ddc63a80d385d1306a1e76e51a3 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 7 Aug 2023 18:29:05 -0400 Subject: [PATCH 07/14] attach loop id only to backwards branch --- src/codegen.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 912deba1fd961..3c0f76709e771 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8125,6 +8125,7 @@ static jl_llvm_functions_t std::map BB; std::map come_from_bb; int cursor = 0; + int current_label = 0; auto find_next_stmt = [&] (int seq_next) { // new style ir is always in dominance order, but frontend IR might not be // `seq_next` is the next statement we want to emit @@ -8141,6 +8142,7 @@ static jl_llvm_functions_t workstack.pop_back(); auto nextbb = BB.find(item + 1); if (nextbb == BB.end()) { + // Not a BB cursor = item; return; } @@ -8151,8 +8153,10 @@ static jl_llvm_functions_t seq_next = -1; // if this BB is non-empty, we've visited it before so skip it if (!nextbb->second->getTerminator()) { + // New BB ctx.builder.SetInsertPoint(nextbb->second); cursor = item; + current_label = item; return; } } @@ -8400,7 +8404,8 @@ static jl_llvm_functions_t int lname = jl_gotonode_label(stmt); come_from_bb[cursor+1] = ctx.builder.GetInsertBlock(); auto br = ctx.builder.CreateBr(BB[lname]); - if (ctx.LoopID) { + // Check if backwards branch + if (ctx.LoopID && lname <= current_label) { br->setMetadata(LLVMContext::MD_loop, ctx.LoopID); ctx.LoopID = NULL; } @@ -8427,7 +8432,8 @@ static jl_llvm_functions_t else br = ctx.builder.CreateCondBr(isfalse, ifnot, ifso); - if (ctx.LoopID) { + // Check if backwards branch + if (ctx.LoopID && lname <= current_label) { br->setMetadata(LLVMContext::MD_loop, ctx.LoopID); ctx.LoopID = NULL; } From e1576e576a63eae81d618b9209b8ac1daac1d62a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 9 Aug 2023 18:58:51 -0400 Subject: [PATCH 08/14] fix JL_VERIFY_PASSES --- src/llvm-simdloop.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index 13b00370c480b..cc33a69d66e33 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -224,7 +224,7 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT } #ifdef JL_VERIFY_PASSES - assert(!verifyLLVMIR(M)); + assert(!verifyLLVMIR(*L)); #endif return true; } From 694042c2165bee6c38aa6e84cc097e460ad470dc Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 9 Aug 2023 19:15:22 -0400 Subject: [PATCH 09/14] add simdloop-memoryssa test --- test/llvmpasses/julia-simdloop-memoryssa.ll | 55 +++++++++++++++++++ .../{simdloop.ll => julia-simdloop.ll} | 0 2 files changed, 55 insertions(+) create mode 100644 test/llvmpasses/julia-simdloop-memoryssa.ll rename test/llvmpasses/{simdloop.ll => julia-simdloop.ll} (100%) diff --git a/test/llvmpasses/julia-simdloop-memoryssa.ll b/test/llvmpasses/julia-simdloop-memoryssa.ll new file mode 100644 index 0000000000000..0c1c4ac021996 --- /dev/null +++ b/test/llvmpasses/julia-simdloop-memoryssa.ll @@ -0,0 +1,55 @@ +; COM: NewPM-only test, tests that memoryssa is preserved correctly + +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK + +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(LowerSIMDLoop),print)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK + +; CHECK-LABEL: MemorySSA for function: simd_test +; CHECK-LABEL: @simd_test( +define void @simd_test(double *%a, double *%b) { +; CHECK: top: +top: + br label %loop +; CHECK: loop: +loop: +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({top,liveOnEntry},{loop,[[MSSA_USE:[0-9]+]]}) + %i = phi i64 [0, %top], [%nexti, %loop] + %aptr = getelementptr double, double *%a, i64 %i + %bptr = getelementptr double, double *%b, i64 %i +; CHECK: MemoryUse([[MPHI]]) MayAlias +; CHECK: llvm.mem.parallel_loop_access + %aval = load double, double *%aptr +; CHECK: MemoryUse([[MPHI]]) MayAlias + %bval = load double, double *%aptr + %cval = fadd double %aval, %bval +; CHECK: [[MSSA_USE]] = MemoryDef([[MPHI]]) + store double %cval, double *%bptr + %nexti = add i64 %i, 1 + %done = icmp sgt i64 %nexti, 500 + br i1 %done, label %loopdone, label %loop, !llvm.loop !1 +loopdone: + ret void +} + +; CHECK-LABEL: MemorySSA for function: simd_test_sub2 +; CHECK-LABEL: @simd_test_sub2( +define double @simd_test_sub2(double *%a) { +top: + br label %loop +loop: + %i = phi i64 [0, %top], [%nexti, %loop] + %v = phi double [0.000000e+00, %top], [%nextv, %loop] + %aptr = getelementptr double, double *%a, i64 %i +; CHECK: MemoryUse(liveOnEntry) MayAlias + %aval = load double, double *%aptr + %nextv = fsub double %v, %aval +; CHECK: fsub reassoc contract double %v, %aval + %nexti = add i64 %i, 1 + %done = icmp sgt i64 %nexti, 500 + br i1 %done, label %loopdone, label %loop, !llvm.loop !0 +loopdone: + ret double %nextv +} + +!0 = distinct !{!0, !"julia.simdloop"} +!1 = distinct !{!1, !"julia.simdloop", !"julia.ivdep"} \ No newline at end of file diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/julia-simdloop.ll similarity index 100% rename from test/llvmpasses/simdloop.ll rename to test/llvmpasses/julia-simdloop.ll From 7f2e87087b5f640d237d356a43246b26138f6d4b Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 9 Aug 2023 19:17:04 -0400 Subject: [PATCH 10/14] update docs --- doc/src/devdocs/llvm-passes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/devdocs/llvm-passes.md b/doc/src/devdocs/llvm-passes.md index eec8b07c2701e..d4d5b92421277 100644 --- a/doc/src/devdocs/llvm-passes.md +++ b/doc/src/devdocs/llvm-passes.md @@ -58,9 +58,9 @@ This pass removes the non-integral address spaces from the module's datalayout s * Filename: `llvm-simdloop.cpp` * Class Name: `LowerSIMDLoopPass` -* Opt Name: `module(LowerSIMDLoop)` +* Opt Name: `loop(LowerSIMDLoop)` -This pass acts as the main driver of the `@simd` annotation. Codegen inserts a call to a marker intrinsic (`julia.simdloop`), which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop). +This pass acts as the main driver of the `@simd` annotation. Codegen inserts a `!llvm.loopid` marker at the back branch of a loop, which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop). ### LowerPTLS From 09ec9eb5e6011705ca4099e7beb16f128e5bd1b7 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 9 Aug 2023 19:27:46 -0400 Subject: [PATCH 11/14] add remark back --- src/llvm-simdloop.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index cc33a69d66e33..a14f35782c22d 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -190,10 +190,10 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT // TODO: Can we drop `julia.simdloop` and `julia.ivdep`? - // REMARK([=]() { - // return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", L.getLoopLatch()->getTerminator()->getDebugLoc(), L) - // << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }"; - // }); + REMARK([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", L.getStartLoc(), L.getHeader()) + << "Loop marked for SIMD vectorization with flags { \"simd\": " << (simd ? "true" : "false") << ", \"ivdep\": " << (ivdep ? "true" : "false") << " }"; + }); // If ivdep is true we assume that there is no memory dependency between loop iterations // This is a fairly strong assumption and does often not hold true for generic code. From 78604ecab3fd32ddbbc34c879099ef3ddf4b9c65 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 9 Aug 2023 22:10:16 -0400 Subject: [PATCH 12/14] handle ScalarEvolution --- src/llvm-simdloop.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index a14f35782c22d..6c91c45494fa7 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -74,7 +74,7 @@ static unsigned getReduceOpcode(Instruction *J, Instruction *operand) JL_NOTSAFE /// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv, /// mark the ops as permitting reassociation/commuting. /// As of LLVM 4.0, FDiv is not handled by the loop vectorizer -static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT +static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT { typedef SmallVector chainVector; chainVector chain; @@ -152,13 +152,15 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop &L, OptimizationRe }); (*K)->setHasAllowReassoc(true); (*K)->setHasAllowContract(true); + if (SE) + SE->forgetValue(*K); ++length; } ReductionChainLength += length; MaxChainLength.updateMax(length); } -static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT +static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution *SE) JL_NOTSAFEPOINT { MDNode *LoopID = L.getLoopID(); if (!LoopID) @@ -217,14 +219,17 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE) JL_NOTSAFEPOINT // Mark floating-point reductions as okay to reassociate/commute. for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) { if (PHINode *Phi = dyn_cast(I)) - enableUnsafeAlgebraIfReduction(Phi, L, ORE); + enableUnsafeAlgebraIfReduction(Phi, L, ORE, SE); else break; } + + if (SE) + SE->forgetLoopDispositions(&L); } #ifdef JL_VERIFY_PASSES - assert(!verifyLLVMIR(*L)); + assert(!verifyLLVMIR(L)); #endif return true; } @@ -242,7 +247,7 @@ PreservedAnalyses LowerSIMDLoopPass::run(Loop &L, LoopAnalysisManager &AM, { OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); - if (processLoop(L, ORE)) { + if (processLoop(L, ORE, &AR.SE)) { #ifdef JL_DEBUG_BUILD if (AR.MSSA) AR.MSSA->verifyMemorySSA(); @@ -268,7 +273,7 @@ class LowerSIMDLoopLegacy : public LoopPass { bool runOnLoop(Loop *L, LPPassManager &LPM) override { OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); - return processLoop(*L, ORE); + return processLoop(*L, ORE, nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { From 86a848e5ed4ac4af8814b9c33c7089bcb45ca170 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 10 Aug 2023 10:05:19 -0400 Subject: [PATCH 13/14] drop julia. in llvm-simdloop --- src/llvm-simdloop.cpp | 12 ++++++++++-- test/llvmpasses/loopinfo.jl | 4 ++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index 6c91c45494fa7..6f57c82b3ab74 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -171,7 +171,10 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution BasicBlock *Lh = L.getHeader(); LLVM_DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n"); - for (unsigned i = 0, ie = LoopID->getNumOperands(); i < ie; ++i) { + SmallVector MDs(1); + // First Operand is self-reference + // Drop `julia.` prefixes + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { Metadata *Op = LoopID->getOperand(i); const MDString *S = dyn_cast(Op); if (S) { @@ -184,13 +187,18 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution continue; } } + MDs.push_back(Op); } LLVM_DEBUG(dbgs() << "LSL: simd: " << simd << " ivdep: " << ivdep << "\n"); if (!simd && !ivdep) return false; - // TODO: Can we drop `julia.simdloop` and `julia.ivdep`? + LLVMContext &Context = L.getHeader()->getContext(); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself + NewLoopID->replaceOperandWith(0, NewLoopID); + L.setLoopID(NewLoopID); REMARK([&]() { return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", L.getStartLoc(), L.getHeader()) diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl index 0d94a6e14e56a..559793b70b27f 100644 --- a/test/llvmpasses/loopinfo.jl +++ b/test/llvmpasses/loopinfo.jl @@ -121,8 +121,8 @@ end # CHECK: [[LOOPUNROLL]] = !{!"llvm.loop.unroll.count", i64 3} # CHECK: [[LOOPID4]] = distinct !{[[LOOPID4]], [[LOOPUNROLL2:![0-9]+]]} # CHECK: [[LOOPUNROLL2]] = !{!"llvm.loop.unroll.full"} -# LOWER: [[LOOPID]] = distinct !{[[LOOPID]], !"julia.simdloop"} -# LOWER: [[LOOPID2]] = distinct !{[[LOOPID2]], !"julia.simdloop", !"julia.ivdep"} +# LOWER: [[LOOPID]] = distinct !{[[LOOPID]]} +# LOWER: [[LOOPID2]] = distinct !{[[LOOPID2]]} # LOWER: [[LOOPID3]] = distinct !{[[LOOPID3]], [[LOOPUNROLL:![0-9]+]]} # LOWER: [[LOOPUNROLL]] = !{!"llvm.loop.unroll.count", i64 3} # LOWER: [[LOOPID4]] = distinct !{[[LOOPID4]], [[LOOPUNROLL2:![0-9]+]]} From d384889b364720218b68eb49bcf21a26cea5900e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 10 Aug 2023 16:28:37 -0400 Subject: [PATCH 14/14] fixup! drop julia. in llvm-simdloop --- src/llvm-simdloop.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index 6f57c82b3ab74..f6a1e3a501a98 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -195,10 +195,10 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution return false; LLVMContext &Context = L.getHeader()->getContext(); - MDNode *NewLoopID = MDNode::get(Context, MDs); + LoopID = MDNode::get(Context, MDs); // Set operand 0 to refer to the loop id itself - NewLoopID->replaceOperandWith(0, NewLoopID); - L.setLoopID(NewLoopID); + LoopID->replaceOperandWith(0, LoopID); + L.setLoopID(LoopID); REMARK([&]() { return OptimizationRemarkAnalysis(DEBUG_TYPE, "Loop SIMD Flags", L.getStartLoc(), L.getHeader())