Skip to content

Commit

Permalink
[coro][pgo] Do not insert counters in the suspend block (llvm#71262)
Browse files Browse the repository at this point in the history
If we did, we couldn't lower symmetric transfer resumes to tail calls.

We can instrument the other 2 edges instead, as long as they also don't
point to the same basic block.
  • Loading branch information
mtrofin authored Nov 15, 2023
1 parent 8ea8dd9 commit ffd337b
Show file tree
Hide file tree
Showing 25 changed files with 1,013 additions and 10 deletions.
35 changes: 35 additions & 0 deletions llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
Expand Down Expand Up @@ -82,6 +84,38 @@ template <class Edge, class BBInfo> class CFGMST {
return true;
}

void handleCoroSuspendEdge(Edge *E) {
// We must not add instrumentation to the BB representing the
// "suspend" path, else CoroSplit won't be able to lower
// llvm.coro.suspend to a tail call. We do want profiling info for
// the other branches (resume/destroy). So we do 2 things:
// 1. we prefer instrumenting those other edges by setting the weight
// of the "suspend" edge to max, and
// 2. we mark the edge as "Removed" to guarantee it is not considered
// for instrumentation. That could technically happen:
// (from test/Transforms/Coroutines/coro-split-musttail.ll)
//
// %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
// switch i8 %suspend, label %exit [
// i8 0, label %await.ready
// i8 1, label %exit
// ]
const BasicBlock *EdgeTarget = E->DestBB;
if (!EdgeTarget)
return;
assert(E->SrcBB);
const Function *F = EdgeTarget->getParent();
if (!F->isPresplitCoroutine())
return;

const Instruction *TI = E->SrcBB->getTerminator();
if (auto *SWInst = dyn_cast<SwitchInst>(TI))
if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition()))
if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend &&
SWInst->getDefaultDest() == EdgeTarget)
E->Removed = true;
}

// Traverse the CFG using a stack. Find all the edges and assign the weight.
// Edges with large weight will be put into MST first so they are less likely
// to be instrumented.
Expand Down Expand Up @@ -133,6 +167,7 @@ template <class Edge, class BBInfo> class CFGMST {
Weight++;
auto *E = &addEdge(&BB, TargetBB, Weight);
E->IsCritical = Critical;
handleCoroSuspendEdge(E);
LLVM_DEBUG(dbgs() << " Edge: from " << BB.getName() << " to "
<< TargetBB->getName() << " w=" << Weight << "\n");

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
; Tests that instrumentation doesn't interfere with lowering (coro-split).
; It should convert coro.resume followed by a suspend to a musttail call.

; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

define void @f() #0 {
entry:
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%alloc = call ptr @malloc(i64 16) #3
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)

%save = call token @llvm.coro.save(ptr null)
%addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
call fastcc void %addr1(ptr null)

%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
%save2 = call token @llvm.coro.save(ptr null)
%addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
call fastcc void %addr2(ptr null)

%suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
switch i8 %suspend2, label %exit [
i8 0, label %exit
i8 1, label %exit
]
exit:
call i1 @llvm.coro.end(ptr null, i1 false, token none)
ret void
}

; Verify that in the initial function resume is not marked with musttail.
; CHECK-LABEL: @f(
; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)

; Verify that in the resume part resume call is marked with musttail.
; CHECK-LABEL: @f.resume(
; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
; CHECK: call void @llvm.instrprof
; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
; CHECK-NEXT: ret void

declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
declare i1 @llvm.coro.alloc(token) #2
declare i64 @llvm.coro.size.i64() #3
declare ptr @llvm.coro.begin(token, ptr writeonly) #2
declare token @llvm.coro.save(ptr) #2
declare ptr @llvm.coro.frame() #3
declare i8 @llvm.coro.suspend(token, i1) #2
declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
declare i1 @llvm.coro.end(ptr, i1, token) #2
declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
declare ptr @malloc(i64)

attributes #0 = { presplitcoroutine }
attributes #1 = { argmemonly nounwind readonly }
attributes #2 = { nounwind }
attributes #3 = { nounwind readnone }
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
; Tests that instrumentation doesn't interfere with lowering (coro-split).
; It should convert coro.resume followed by a suspend to a musttail call.

; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

define void @f() #0 {
entry:
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%alloc = call ptr @malloc(i64 16) #3
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)

%save = call token @llvm.coro.save(ptr null)
%addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
call fastcc void %addr1(ptr null)

%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.suspend
i8 1, label %exit
]
await.suspend:
%save2 = call token @llvm.coro.save(ptr null)
%br0 = call i8 @switch_result()
switch i8 %br0, label %unreach [
i8 0, label %await.resume3
i8 1, label %await.resume1
i8 2, label %await.resume2
]
await.resume1:
%hdl = call ptr @g()
%addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
call fastcc void %addr2(ptr %hdl)
br label %final.suspend
await.resume2:
%hdl2 = call ptr @h()
%addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
call fastcc void %addr3(ptr %hdl2)
br label %final.suspend
await.resume3:
%addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
call fastcc void %addr4(ptr null)
br label %final.suspend
final.suspend:
%suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
switch i8 %suspend2, label %exit [
i8 0, label %pre.exit
i8 1, label %exit
]
pre.exit:
br label %exit
exit:
call i1 @llvm.coro.end(ptr null, i1 false, token none)
ret void
unreach:
unreachable
}

; Verify that in the initial function resume is not marked with musttail.
; CHECK-LABEL: @f(
; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)

; Verify that in the resume part resume call is marked with musttail.
; CHECK-LABEL: @f.resume(
; CHECK: %[[hdl:.+]] = call ptr @g()
; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
; CHECK: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
; CHECK-NEXT: ret void
; CHECK: %[[hdl2:.+]] = call ptr @h()
; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
; CHECK: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
; CHECK-NEXT: ret void
; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
; CHECK: musttail call fastcc void %[[addr4]](ptr null)
; CHECK-NEXT: ret void



declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
declare i1 @llvm.coro.alloc(token) #2
declare i64 @llvm.coro.size.i64() #3
declare ptr @llvm.coro.begin(token, ptr writeonly) #2
declare token @llvm.coro.save(ptr) #2
declare ptr @llvm.coro.frame() #3
declare i8 @llvm.coro.suspend(token, i1) #2
declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
declare i1 @llvm.coro.end(ptr, i1, token) #2
declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
declare ptr @malloc(i64)
declare i8 @switch_result()
declare ptr @g()
declare ptr @h()

attributes #0 = { presplitcoroutine }
attributes #1 = { argmemonly nounwind readonly }
attributes #2 = { nounwind }
attributes #3 = { nounwind readnone }
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
; Tests that instrumentation doesn't interfere with lowering (coro-split).
; It should convert coro.resume followed by a suspend to a musttail call.

; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

target triple = "wasm64-unknown-unknown"

define void @f() #0 {
entry:
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%alloc = call ptr @malloc(i64 16) #3
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)

%save = call token @llvm.coro.save(ptr null)
%addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
call fastcc void %addr1(ptr null)

%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
%save2 = call token @llvm.coro.save(ptr null)
%addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
call fastcc void %addr2(ptr null)

%suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
switch i8 %suspend2, label %exit [
i8 0, label %exit
i8 1, label %exit
]
exit:
call i1 @llvm.coro.end(ptr null, i1 false, token none)
ret void
}

; CHECK: musttail call

declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
declare i1 @llvm.coro.alloc(token) #2
declare i64 @llvm.coro.size.i64() #3
declare ptr @llvm.coro.begin(token, ptr writeonly) #2
declare token @llvm.coro.save(ptr) #2
declare ptr @llvm.coro.frame() #3
declare i8 @llvm.coro.suspend(token, i1) #2
declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
declare i1 @llvm.coro.end(ptr, i1, token) #2
declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
declare ptr @malloc(i64)

attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
attributes #1 = { argmemonly nounwind readonly }
attributes #2 = { nounwind }
attributes #3 = { nounwind readnone }
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
; Tests that instrumentation doesn't interfere with lowering (coro-split).
; It should convert coro.resume followed by a suspend to a musttail call.

; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

target triple = "wasm32-unknown-unknown"

define void @f() #0 {
entry:
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%alloc = call ptr @malloc(i64 16) #3
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)

%save = call token @llvm.coro.save(ptr null)
%addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
call fastcc void %addr1(ptr null)

%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
%save2 = call token @llvm.coro.save(ptr null)
%addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
call fastcc void %addr2(ptr null)

%suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
switch i8 %suspend2, label %exit [
i8 0, label %exit
i8 1, label %exit
]
exit:
call i1 @llvm.coro.end(ptr null, i1 false, token none)
ret void
}

; CHECK: musttail call

declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
declare i1 @llvm.coro.alloc(token) #2
declare i64 @llvm.coro.size.i64() #3
declare ptr @llvm.coro.begin(token, ptr writeonly) #2
declare token @llvm.coro.save(ptr) #2
declare ptr @llvm.coro.frame() #3
declare i8 @llvm.coro.suspend(token, i1) #2
declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
declare i1 @llvm.coro.end(ptr, i1, token) #2
declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
declare ptr @malloc(i64)

attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
attributes #1 = { argmemonly nounwind readonly }
attributes #2 = { nounwind }
attributes #3 = { nounwind readnone }
Loading

0 comments on commit ffd337b

Please sign in to comment.