Skip to content

Commit

Permalink
Fix Expr(:loopinfo) codegen (#50663)
Browse files Browse the repository at this point in the history
We used a loop-marker intrinsic because the LoopID used to
be dropped by optimization passes (this seems no longer true).

#50660 is an example of a miscompilation where a loop of length 1
got optimized by simplifycfg to the point where the loop-marker
is now attached to the wrong back-edge.

This PR drops the loop-marker and uses the LoopID metadata node
directly.
  • Loading branch information
vchuravy authored Aug 14, 2023
1 parent 91093fe commit 90494c2
Show file tree
Hide file tree
Showing 11 changed files with 210 additions and 204 deletions.
4 changes: 2 additions & 2 deletions doc/src/devdocs/llvm-passes.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ This pass removes the non-integral address spaces from the module's datalayout s

* Filename: `llvm-simdloop.cpp`
* Class Name: `LowerSIMDLoopPass`
* Opt Name: `module(LowerSIMDLoop)`
* Opt Name: `loop(LowerSIMDLoop)`

This pass acts as the main driver of the `@simd` annotation. Codegen inserts a call to a marker intrinsic (`julia.simdloop`), which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop).
This pass acts as the main driver of the `@simd` annotation. Codegen inserts a `!llvm.loopid` marker at the back branch of a loop, which this pass uses to identify loops that were originally marked with `@simd`. Then, this pass looks for a chain of floating point operations that form a reduce and adds the `contract` and `reassoc` fast math flags to allow reassociation (and thus vectorization). This pass does not preserve either loop information nor inference correctness, so it may violate Julia semantics in surprising ways. If the loop was annotated with `ivdep` as well, then the pass marks the loop as having no loop-carried dependencies (the resulting behavior is undefined if the user annotation was incorrect or gets applied to the wrong loop).

### LowerPTLS

Expand Down
45 changes: 30 additions & 15 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -992,14 +992,7 @@ static const auto jl_typeof_func = new JuliaFunction<>{
Attributes(C, {Attribute::NonNull}),
None); },
};
static const auto jl_loopinfo_marker_func = new JuliaFunction<>{
"julia.loopinfo_marker",
[](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
[](LLVMContext &C) { return AttributeList::get(C,
Attributes(C, {Attribute::ReadOnly, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
AttributeSet(),
None); },
};

static const auto jl_write_barrier_func = new JuliaFunction<>{
"julia.write_barrier",
[](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
Expand Down Expand Up @@ -1609,6 +1602,7 @@ class jl_codectx_t {
jl_codegen_params_t &emission_context;
llvm::MapVector<jl_code_instance_t*, jl_codegen_call_target_t> call_targets;
Function *f = NULL;
MDNode* LoopID = NULL;
// local var info. globals are not in here.
std::vector<jl_varinfo_t> slots;
std::map<int, jl_varinfo_t> phic_slots;
Expand Down Expand Up @@ -5773,16 +5767,22 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
}
else if (head == jl_loopinfo_sym) {
// parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4))
// to LLVM LoopID
SmallVector<Metadata *, 8> MDs;

// Reserve first location for self reference to the LoopID metadata node.
TempMDTuple TempNode = MDNode::getTemporary(ctx.builder.getContext(), None);
MDs.push_back(TempNode.get());

for (int i = 0, ie = nargs; i < ie; ++i) {
Metadata *MD = to_md_tree(args[i], ctx.builder.getContext());
if (MD)
MDs.push_back(MD);
}

MDNode* MD = MDNode::get(ctx.builder.getContext(), MDs);
CallInst *I = ctx.builder.CreateCall(prepare_call(jl_loopinfo_marker_func));
I->setMetadata("julia.loopinfo", MD);
ctx.LoopID = MDNode::getDistinct(ctx.builder.getContext(), MDs);
// Replace the temporary node with a self-reference.
ctx.LoopID->replaceOperandWith(0, ctx.LoopID);
return jl_cgval_t();
}
else if (head == jl_leave_sym || head == jl_coverageeffect_sym
Expand Down Expand Up @@ -8125,6 +8125,7 @@ static jl_llvm_functions_t
std::map<int, BasicBlock*> BB;
std::map<size_t, BasicBlock*> come_from_bb;
int cursor = 0;
int current_label = 0;
auto find_next_stmt = [&] (int seq_next) {
// new style ir is always in dominance order, but frontend IR might not be
// `seq_next` is the next statement we want to emit
Expand All @@ -8141,6 +8142,7 @@ static jl_llvm_functions_t
workstack.pop_back();
auto nextbb = BB.find(item + 1);
if (nextbb == BB.end()) {
// Not a BB
cursor = item;
return;
}
Expand All @@ -8151,8 +8153,10 @@ static jl_llvm_functions_t
seq_next = -1;
// if this BB is non-empty, we've visited it before so skip it
if (!nextbb->second->getTerminator()) {
// New BB
ctx.builder.SetInsertPoint(nextbb->second);
cursor = item;
current_label = item;
return;
}
}
Expand Down Expand Up @@ -8399,7 +8403,12 @@ static jl_llvm_functions_t
if (jl_is_gotonode(stmt)) {
int lname = jl_gotonode_label(stmt);
come_from_bb[cursor+1] = ctx.builder.GetInsertBlock();
ctx.builder.CreateBr(BB[lname]);
auto br = ctx.builder.CreateBr(BB[lname]);
// Check if backwards branch
if (ctx.LoopID && lname <= current_label) {
br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
ctx.LoopID = NULL;
}
find_next_stmt(lname - 1);
continue;
}
Expand All @@ -8417,10 +8426,17 @@ static jl_llvm_functions_t
workstack.push_back(lname - 1);
BasicBlock *ifnot = BB[lname];
BasicBlock *ifso = BB[cursor+2];
Instruction *br;
if (ifnot == ifso)
ctx.builder.CreateBr(ifnot);
br = ctx.builder.CreateBr(ifnot);
else
ctx.builder.CreateCondBr(isfalse, ifnot, ifso);
br = ctx.builder.CreateCondBr(isfalse, ifnot, ifso);

// Check if backwards branch
if (ctx.LoopID && lname <= current_label) {
br->setMetadata(LLVMContext::MD_loop, ctx.LoopID);
ctx.LoopID = NULL;
}
find_next_stmt(cursor + 1);
continue;
}
Expand Down Expand Up @@ -9163,7 +9179,6 @@ static void init_jit_functions(void)
add_named_global(jl_object_id__func, &jl_object_id_);
add_named_global(jl_alloc_obj_func, (void*)NULL);
add_named_global(jl_newbits_func, (void*)jl_new_bits);
add_named_global(jl_loopinfo_marker_func, (void*)NULL);
add_named_global(jl_typeof_func, (void*)NULL);
add_named_global(jl_write_barrier_func, (void*)NULL);
add_named_global(jldlsym_func, &jl_load_and_lookup);
Expand Down
2 changes: 1 addition & 1 deletion src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,6 @@
YY(LLVMExtraAddCPUFeaturesPass) \
YY(LLVMExtraMPMAddCPUFeaturesPass) \
YY(LLVMExtraMPMAddRemoveNIPass) \
YY(LLVMExtraMPMAddLowerSIMDLoopPass) \
YY(LLVMExtraMPMAddMultiVersioningPass) \
YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \
YY(LLVMExtraMPMAddRemoveAddrspacesPass) \
Expand All @@ -596,6 +595,7 @@
YY(LLVMExtraFPMAddGCInvariantVerifierPass) \
YY(LLVMExtraFPMAddFinalLowerGCPass) \
YY(LLVMExtraLPMAddJuliaLICMPass) \
YY(LLVMExtraLPMAddLowerSIMDLoopPass) \
YY(JLJITGetLLVMOrcExecutionSession) \
YY(JLJITGetJuliaOJIT) \
YY(JLJITGetExternalJITDylib) \
Expand Down
2 changes: 1 addition & 1 deletion src/llvm-julia-passes.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#ifdef MODULE_PASS
MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass())
MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass())
MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass())
MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass())
MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass())
MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass())
Expand All @@ -24,4 +23,5 @@ FUNCTION_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass())
//Loop passes
#ifdef LOOP_PASS
LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass())
LOOP_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass())
#endif
Loading

4 comments on commit 90494c2

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(isdaily = true)

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here.

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The package evaluation job you requested has completed - possible new issues were detected.
The full report is available.

Please sign in to comment.