Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Expr(:loopinfo) codegen #50663

Merged
merged 14 commits into from
Aug 14, 2023
4 changes: 2 additions & 2 deletions doc/src/devdocs/llvm-passes.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ This pass removes the non-integral address spaces from the module's datalayout s

* Filename: `llvm-simdloop.cpp`
* Class Name: `LowerSIMDLoopPass`
* Opt Name: `module(LowerSIMDLoop)`
* Opt Name: `loop(LowerSIMDLoop)`

This pass acts as the main driver of the `@simd` annotation. Codegen inserts a call to a marker intrinsic (`julia.simdloop`), which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop).
This pass acts as the main driver of the `@simd` annotation. Codegen inserts a `!llvm.loopid` marker at the back branch of a loop, which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop).

### LowerPTLS

Expand Down
45 changes: 30 additions & 15 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -992,14 +992,7 @@ static const auto jl_typeof_func = new JuliaFunction<>{
Attributes(C, {Attribute::NonNull}),
None); },
};
static const auto jl_loopinfo_marker_func = new JuliaFunction<>{
vchuravy marked this conversation as resolved.
Show resolved Hide resolved
"julia.loopinfo_marker",
[](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
[](LLVMContext &C) { return AttributeList::get(C,
Attributes(C, {Attribute::ReadOnly, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
AttributeSet(),
None); },
};

static const auto jl_write_barrier_func = new JuliaFunction<>{
"julia.write_barrier",
[](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
Expand Down Expand Up @@ -1609,6 +1602,7 @@ class jl_codectx_t {
jl_codegen_params_t &emission_context;
llvm::MapVector<jl_code_instance_t*, jl_codegen_call_target_t> call_targets;
Function *f = NULL;
MDNode* LoopID = NULL;
// local var info. globals are not in here.
std::vector<jl_varinfo_t> slots;
std::map<int, jl_varinfo_t> phic_slots;
Expand Down Expand Up @@ -5773,16 +5767,22 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
}
else if (head == jl_loopinfo_sym) {
// parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4))
// to LLVM LoopID
SmallVector<Metadata *, 8> MDs;

// Reserve first location for self reference to the LoopID metadata node.
TempMDTuple TempNode = MDNode::getTemporary(ctx.builder.getContext(), None);
MDs.push_back(TempNode.get());

for (int i = 0, ie = nargs; i < ie; ++i) {
Metadata *MD = to_md_tree(args[i], ctx.builder.getContext());
if (MD)
MDs.push_back(MD);
}

MDNode* MD = MDNode::get(ctx.builder.getContext(), MDs);
CallInst *I = ctx.builder.CreateCall(prepare_call(jl_loopinfo_marker_func));
I->setMetadata("julia.loopinfo", MD);
ctx.LoopID = MDNode::getDistinct(ctx.builder.getContext(), MDs);
// Replace the temporary node with a self-reference.
ctx.LoopID->replaceOperandWith(0, ctx.LoopID);
return jl_cgval_t();
}
else if (head == jl_leave_sym || head == jl_coverageeffect_sym
Expand Down Expand Up @@ -8125,6 +8125,7 @@ static jl_llvm_functions_t
std::map<int, BasicBlock*> BB;
std::map<size_t, BasicBlock*> come_from_bb;
int cursor = 0;
int current_label = 0;
auto find_next_stmt = [&] (int seq_next) {
// new style ir is always in dominance order, but frontend IR might not be
// `seq_next` is the next statement we want to emit
Expand All @@ -8141,6 +8142,7 @@ static jl_llvm_functions_t
workstack.pop_back();
auto nextbb = BB.find(item + 1);
if (nextbb == BB.end()) {
// Not a BB
cursor = item;
return;
}
Expand All @@ -8151,8 +8153,10 @@ static jl_llvm_functions_t
seq_next = -1;
// if this BB is non-empty, we've visited it before so skip it
if (!nextbb->second->getTerminator()) {
// New BB
ctx.builder.SetInsertPoint(nextbb->second);
cursor = item;
current_label = item;
return;
}
}
Expand Down Expand Up @@ -8399,7 +8403,12 @@ static jl_llvm_functions_t
if (jl_is_gotonode(stmt)) {
int lname = jl_gotonode_label(stmt);
come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
ctx.builder.CreateBr(BB[lname]);
auto br = ctx.builder.CreateBr(BB[lname]);
// Check if backwards branch
if (ctx.LoopID && lname <= current_label) {
br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
ctx.LoopID = NULL;
}
find_next_stmt(lname - 1);
continue;
}
Expand All @@ -8417,10 +8426,17 @@ static jl_llvm_functions_t
workstack.push_back(lname - 1);
BasicBlock *ifnot = BB[lname];
BasicBlock *ifso = BB[cursor+2];
Instruction *br;
if (ifnot == ifso)
ctx.builder.CreateBr(ifnot);
br = ctx.builder.CreateBr(ifnot);
else
ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
br = ctx.builder.CreateCondBr(isfalse, ifnot, ifso);

// Check if backwards branch
if (ctx.LoopID && lname <= current_label) {
br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
ctx.LoopID = NULL;
}
find_next_stmt(cursor + 1);
continue;
}
Expand Down Expand Up @@ -9163,7 +9179,6 @@ static void init_jit_functions(void)
add_named_global(jl_object_id__func, &jl_object_id_);
add_named_global(jl_alloc_obj_func, (void*)NULL);
add_named_global(jl_newbits_func, (void*)jl_new_bits);
add_named_global(jl_loopinfo_marker_func, (void*)NULL);
add_named_global(jl_typeof_func, (void*)NULL);
add_named_global(jl_write_barrier_func, (void*)NULL);
add_named_global(jldlsym_func, &jl_load_and_lookup);
Expand Down
2 changes: 1 addition & 1 deletion src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,6 @@
YY(LLVMExtraAddCPUFeaturesPass) \
YY(LLVMExtraMPMAddCPUFeaturesPass) \
YY(LLVMExtraMPMAddRemoveNIPass) \
YY(LLVMExtraMPMAddLowerSIMDLoopPass) \
YY(LLVMExtraMPMAddMultiVersioningPass) \
YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \
YY(LLVMExtraMPMAddRemoveAddrspacesPass) \
Expand All @@ -596,6 +595,7 @@
YY(LLVMExtraFPMAddGCInvariantVerifierPass) \
YY(LLVMExtraFPMAddFinalLowerGCPass) \
YY(LLVMExtraLPMAddJuliaLICMPass) \
YY(LLVMExtraLPMAddLowerSIMDLoopPass) \
YY(JLJITGetLLVMOrcExecutionSession) \
YY(JLJITGetJuliaOJIT) \
YY(JLJITGetExternalJITDylib) \
Expand Down
2 changes: 1 addition & 1 deletion src/llvm-julia-passes.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#ifdef MODULE_PASS
MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass())
MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass())
MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass())
MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass())
MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass())
MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass())
Expand All @@ -24,4 +23,5 @@ FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass())
//Loop passes
#ifdef LOOP_PASS
LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass())
LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass())
#endif
Loading