diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 15a3d74666ca21..295479b45fa95b 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -126,6 +126,9 @@ class PGOHash { BinaryOperatorNE, // The preceding values are available since PGO_HASH_V2. + // Cilk statements. These values are also available with PGO_HASH_V1. + CilkForStmt, + // Keep this last. It's for the static assert that follows. LastHashType }; @@ -267,6 +270,7 @@ struct MapRegionCounters : public RecursiveASTVisitor { DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt) DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt) DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt) + DEFINE_NESTABLE_TRAVERSAL(CilkForStmt) /// Get version \p HashVersion of the PGO hash for \p S. PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) { @@ -327,6 +331,8 @@ struct MapRegionCounters : public RecursiveASTVisitor { } break; } + case Stmt::CilkForStmtClass: + return PGOHash::CilkForStmt; } if (HashVersion >= PGO_HASH_V2) { @@ -744,6 +750,53 @@ struct ComputeRegionCounts : public ConstStmtVisitor { setCount(ParentCount + RHSCount - CurrentCount); RecordNextStmtCount = true; } + + void VisitCilkForStmt(const CilkForStmt *S) { + RecordStmtCount(S); + if (S->getInit()) + Visit(S->getInit()); + if (S->getLimitStmt()) + Visit(S->getLimitStmt()); + if (S->getBeginStmt()) + Visit(S->getBeginStmt()); + if (S->getEndStmt()) + Visit(S->getEndStmt()); + if (S->getLoopVarDecl()) + Visit(S->getLoopVarDecl()); + + uint64_t ParentCount = CurrentCount; + + BreakContinueStack.push_back(BreakContinue()); + // Visit the body region first. (This is basically the same as a while + // loop; see further comments in VisitWhileStmt.) + uint64_t BodyCount = setCount(PGO.getRegionCount(S)); + CountMap[S->getBody()] = BodyCount; + Visit(S->getBody()); + uint64_t BackedgeCount = CurrentCount; + BreakContinue BC = BreakContinueStack.pop_back_val(); + + // The increment is essentially part of the body but it needs to include + // the count for all the continue statements. + if (S->getInc()) { + uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount); + CountMap[S->getInc()] = IncCount; + Visit(S->getInc()); + } + + // ...then go back and propagate counts through the condition. + uint64_t CondCount = + setCount(ParentCount + BackedgeCount + BC.ContinueCount); + if (S->getInitCond()) { + CountMap[S->getInitCond()] = ParentCount; + Visit(S->getInitCond()); + } + if (S->getCond()) { + CountMap[S->getCond()] = CondCount; + Visit(S->getCond()); + } + setCount(BC.BreakCount + CondCount - BodyCount); + RecordNextStmtCount = true; + } }; } // end anonymous namespace diff --git a/clang/test/Cilk/cilkfor-pgo.cpp b/clang/test/Cilk/cilkfor-pgo.cpp new file mode 100644 index 00000000000000..d60dc18de1a5dc --- /dev/null +++ b/clang/test/Cilk/cilkfor-pgo.cpp @@ -0,0 +1,24 @@ +// Check that -fprofile-instrument generates atomic +// instrumentation instructions inside of _Cilk_for loops. +// +// Credit to Brian Wheatman for the original source of this test. +// +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fprofile-instrument=clang -fprofile-update=atomic %s -S -emit-llvm -fopencilk -ftapir=none -o - 2>&1 | FileCheck %s +// expected-no-diagnostics + +int main() { + int sum = 0; + _Cilk_for(int i = 0; i < 1000000; i++) { sum += i; } + + return sum; +} + +// CHECK: @__profc_main = {{.*}}global [2 x i64] zeroinitializer, section "__llvm_prf_cnts" + +// CHECK-LABEL: define {{.*}}i32 @main() + +// CHECK: detach within %{{.+}}, label %[[PFOR_BODY:.+]], label %[[PFOR_INC:.+]] + +// CHECK: [[PFOR_BODY]]: +// CHECK: atomicrmw add i64* getelementptr inbounds ([2 x i64], [2 x i64]* @__profc_main, i64 0, i64 1), i64 1 monotonic +// CHECK: reattach within %{{.+}}, label %[[PFOR_INC]]