Skip to content

Commit

Permalink
Reland "[LICM] Hoist LOAD without sinking the STORE"
Browse files Browse the repository at this point in the history
When doing load/store promotion within LICM, if we
cannot prove that it is safe to sink the store we won't
hoist the load, even though we can prove the load could
be dereferenced and moved outside the loop. This patch
implements the load promotion by moving it in the loop
preheader by inserting proper PHI in the loop. The store
is kept as is in the loop. By doing this, we avoid doing
the load from a memory location in each iteration.

Please consider this small example:

loop {
  var = *ptr;
  if (var) break;
  *ptr= var + 1;
}
After this patch, it will be:

var0 = *ptr;
loop {
  var1 = phi (var0, var2);
  if (var1) break;
  var2 = var1 + 1;
  *ptr = var2;
}
This addresses some problems from [0].

[0] https://bugs.llvm.org/show_bug.cgi?id=51193

Differential revision: https://reviews.llvm.org/D113289
  • Loading branch information
Djordje Todorovic authored and Djordje Todorovic committed Dec 2, 2021
1 parent 47616c8 commit 2cdc6f2
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 26 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/Transforms/Utils/SSAUpdater.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ class LoadAndStorePromoter {

/// Called to update debug info associated with the instruction.
virtual void updateDebugInfo(Instruction *I) const {}

/// Return false if a sub-class wants to keep one of the loads/stores
/// after the SSA construction.
virtual bool shouldDelete(Instruction *I) const { return true; }
};

} // end namespace llvm
Expand Down
41 changes: 32 additions & 9 deletions llvm/lib/Transforms/Scalar/LICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1860,6 +1860,7 @@ class LoopPromoter : public LoadAndStorePromoter {
bool UnorderedAtomic;
AAMDNodes AATags;
ICFLoopSafetyInfo &SafetyInfo;
bool CanInsertStoresInExitBlocks;

// We're about to add a use of V in a loop exit block. Insert an LCSSA phi
// (if legal) if doing so would add an out-of-loop use to an instruction
Expand All @@ -1886,12 +1887,13 @@ class LoopPromoter : public LoadAndStorePromoter {
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
ICFLoopSafetyInfo &SafetyInfo)
ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
SafetyInfo(SafetyInfo) {}
SafetyInfo(SafetyInfo),
CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}

bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
Expand All @@ -1903,7 +1905,7 @@ class LoopPromoter : public LoadAndStorePromoter {
return PointerMustAliases.count(Ptr);
}

void doExtraRewritesBeforeFinalDeletion() override {
void insertStoresInLoopExitBlocks() {
// Insert stores after in the loop exit blocks. Each exit block gets a
// store of the live-out values that feed them. Since we've already told
// the SSA updater about the defs in the loop and the preheader
Expand Down Expand Up @@ -1937,10 +1939,21 @@ class LoopPromoter : public LoadAndStorePromoter {
}
}

void doExtraRewritesBeforeFinalDeletion() override {
if (CanInsertStoresInExitBlocks)
insertStoresInLoopExitBlocks();
}

void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
MSSAU->removeMemoryAccess(I);
}

bool shouldDelete(Instruction *I) const override {
if (isa<StoreInst>(I))
return CanInsertStoresInExitBlocks;
return true;
}
};

bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
Expand Down Expand Up @@ -2039,6 +2052,7 @@ bool llvm::promoteLoopAccessesToScalars(

bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
bool FoundLoadToPromote = false;

SmallVector<Instruction *, 64> LoopUses;

Expand Down Expand Up @@ -2086,6 +2100,7 @@ bool llvm::promoteLoopAccessesToScalars(

SawUnorderedAtomic |= Load->isAtomic();
SawNotAtomic |= !Load->isAtomic();
FoundLoadToPromote = true;

Align InstAlignment = Load->getAlign();

Expand Down Expand Up @@ -2197,13 +2212,20 @@ bool llvm::promoteLoopAccessesToScalars(
}
}

// If we've still failed to prove we can sink the store, give up.
if (!SafeToInsertStore)
// If we've still failed to prove we can sink the store, hoist the load
// only, if possible.
if (!SafeToInsertStore && !FoundLoadToPromote)
// If we cannot hoist the load either, give up.
return false;

// Otherwise, this is safe to promote, lets do it!
LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
<< '\n');
// Lets do the promotion!
if (SafeToInsertStore)
LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
<< '\n');
else
LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
<< '\n');

ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
LoopUses[0])
Expand All @@ -2222,7 +2244,8 @@ bool llvm::promoteLoopAccessesToScalars(
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
SafeToInsertStore);

// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Utils/SSAUpdater.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,9 @@ void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
// Now that everything is rewritten, delete the old instructions from the
// function. They should all be dead now.
for (Instruction *User : Insts) {
if (!shouldDelete(User))
continue;

// If this is a load that still has uses, then the load must have been added
// as a live value in the SSAUpdate data structure for a block (e.g. because
// the loaded value was stored later). In this case, we need to recursively
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ target triple = "x86_64-unknown-linux-gnu"
; RUN: opt -O2 -S < %s | FileCheck %s

; CHECK-LABEL: main
; CHECK: if.end
; CHECK: store
; CHECK: memset
; CHECK: if.then
; CHECK: store
; CHECK: memset
; CHECK: if.end
; CHECK: store
; CHECK: store

@d = common global i32 0, align 4
@b = common global i32 0, align 4
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/Transforms/LICM/hoist-load-without-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ define dso_local void @f(i32* nocapture %ptr, i32 %n) {
; CHECK-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[N:%.*]]
; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_LR_PH:%.*]], label [[CLEANUP1:%.*]]
; CHECK: for.body.lr.ph:
; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ 1, [[IF_END:%.*]] ]
; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ]
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[FOR_BODY_CLEANUP1_CRIT_EDGE:%.*]]
; CHECK: if.end:
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/Transforms/LICM/promote-capture.ll
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,19 @@ define void @test_captured_before_loop(i32 %len) {
; CHECK-NEXT: [[COUNT:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 0, i32* [[COUNT]], align 4
; CHECK-NEXT: call void @capture(i32* [[COUNT]])
; CHECK-NEXT: [[COUNT_PROMOTED:%.*]] = load i32, i32* [[COUNT]], align 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[C_INC2:%.*]] = phi i32 [ [[COUNT_PROMOTED]], [[ENTRY:%.*]] ], [ [[C_INC1:%.*]], [[LATCH:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[I_NEXT:%.*]], [[LATCH]] ]
; CHECK-NEXT: [[COND:%.*]] = call i1 @cond(i32 [[I]])
; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[LATCH]]
; CHECK: if:
; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[COUNT]], align 4
; CHECK-NEXT: [[C_INC:%.*]] = add i32 [[C]], 1
; CHECK-NEXT: [[C_INC:%.*]] = add i32 [[C_INC2]], 1
; CHECK-NEXT: store i32 [[C_INC]], i32* [[COUNT]], align 4
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[C_INC1]] = phi i32 [ [[C_INC]], [[IF]] ], [ [[C_INC2]], [[LOOP]] ]
; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,21 @@ define void @bar(i32 %n, i32 %b) nounwind uwtable ssp {
; CHECK-LABEL: @bar(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: [[G_PROMOTED:%.*]] = load i32, i32* @g, align 4
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC5:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[INC2:%.*]] = phi i32 [ [[G_PROMOTED]], [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC5:%.*]], [[FOR_INC]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[INC2]], 1
; CHECK-NEXT: store i32 [[INC]], i32* @g, align 4
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC1]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC2]], [[FOR_BODY]] ]
; CHECK-NEXT: [[INC5]] = add nsw i32 [[I_0]], 1
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: for.end:
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -314,17 +314,19 @@ define i32 @test7bad() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @capture(ptr [[LOCAL]])
; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ]
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[LOCAL]], align 4
; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X]])
; CHECK-NEXT: [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ]
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X22]])
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X2]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
; CHECK: if:
; CHECK-NEXT: store i32 [[X2]], ptr [[LOCAL]], align 4
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ]
; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/Transforms/LICM/scalar-promote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -315,17 +315,19 @@ define i32 @test7bad() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @capture(i32* [[LOCAL]])
; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, i32* [[LOCAL]], align 4
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ]
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[LOCAL]], align 4
; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X]])
; CHECK-NEXT: [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ]
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ]
; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X22]])
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X2]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]]
; CHECK: if:
; CHECK-NEXT: store i32 [[X2]], i32* [[LOCAL]], align 4
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ]
; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0
; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
Expand Down

0 comments on commit 2cdc6f2

Please sign in to comment.