Skip to content

Commit

Permalink
[GlobalISel] Fix store merging incorrectly classifying an unknown ind…
Browse files Browse the repository at this point in the history
…ex expr as 0. (#90375)

During analysis, we incorrectly leave the offset part of an address info
struct
as zero, when in actual fact we failed to decompose it into base +
offset.
This results in incorrectly assuming that the address is adjacent to
another store
addr. To fix this we wrap the offset in an optional<> so we can
distinguish between
real zero and unknown.

Fixes issue #90242

(cherry picked from commit 19f4d68)
  • Loading branch information
aemerson authored and tstellar committed May 1, 2024
1 parent a7b8b89 commit ece9d35
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 34 deletions.
20 changes: 16 additions & 4 deletions llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,23 @@ struct LegalityQuery;
class MachineRegisterInfo;
namespace GISelAddressing {
/// Helper struct to store a base, index and offset that forms an address
struct BaseIndexOffset {
class BaseIndexOffset {
private:
Register BaseReg;
Register IndexReg;
int64_t Offset = 0;
bool IsIndexSignExt = false;
std::optional<int64_t> Offset;

public:
BaseIndexOffset() = default;
Register getBase() { return BaseReg; }
Register getBase() const { return BaseReg; }
Register getIndex() { return IndexReg; }
Register getIndex() const { return IndexReg; }
void setBase(Register NewBase) { BaseReg = NewBase; }
void setIndex(Register NewIndex) { IndexReg = NewIndex; }
void setOffset(std::optional<int64_t> NewOff) { Offset = NewOff; }
bool hasValidOffset() const { return Offset.has_value(); }
int64_t getOffset() const { return *Offset; }
};

/// Returns a BaseIndexOffset which describes the pointer in \p Ptr.
Expand Down Expand Up @@ -89,7 +101,7 @@ class LoadStoreOpt : public MachineFunctionPass {
// order stores are writing to incremeneting consecutive addresses. So when
// we walk the block in reverse order, the next eligible store must write to
// an offset one store width lower than CurrentLowestOffset.
uint64_t CurrentLowestOffset;
int64_t CurrentLowestOffset;
SmallVector<GStore *> Stores;
// A vector of MachineInstr/unsigned pairs to denote potential aliases that
// need to be checked before the candidate is considered safe to merge. The
Expand Down
48 changes: 28 additions & 20 deletions llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,21 +84,20 @@ BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
MachineRegisterInfo &MRI) {
BaseIndexOffset Info;
Register PtrAddRHS;
if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
Info.BaseReg = Ptr;
Info.IndexReg = Register();
Info.IsIndexSignExt = false;
Register BaseReg;
if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(BaseReg), m_Reg(PtrAddRHS)))) {
Info.setBase(Ptr);
Info.setOffset(0);
return Info;
}

Info.setBase(BaseReg);
auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
if (RHSCst)
Info.Offset = RHSCst->Value.getSExtValue();
Info.setOffset(RHSCst->Value.getSExtValue());

// Just recognize a simple case for now. In future we'll need to match
// indexing patterns for base + index + constant.
Info.IndexReg = PtrAddRHS;
Info.IsIndexSignExt = false;
Info.setIndex(PtrAddRHS);
return Info;
}

Expand All @@ -114,15 +113,16 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);

if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
if (!BasePtr0.getBase().isValid() || !BasePtr1.getBase().isValid())
return false;

int64_t Size1 = LdSt1->getMemSize();
int64_t Size2 = LdSt2->getMemSize();

int64_t PtrDiff;
if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
if (BasePtr0.getBase() == BasePtr1.getBase() && BasePtr0.hasValidOffset() &&
BasePtr1.hasValidOffset()) {
PtrDiff = BasePtr1.getOffset() - BasePtr0.getOffset();
// If the size of memory access is unknown, do not use it to do analysis.
// One example of unknown size memory access is to load/store scalable
// vector objects on the stack.
Expand Down Expand Up @@ -151,8 +151,8 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
// able to calculate their relative offset if at least one arises
// from an alloca. However, these allocas cannot overlap and we
// can infer there is no alias.
auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
auto *Base0Def = getDefIgnoringCopies(BasePtr0.getBase(), MRI);
auto *Base1Def = getDefIgnoringCopies(BasePtr1.getBase(), MRI);
if (!Base0Def || !Base1Def)
return false; // Couldn't tell anything.

Expand Down Expand Up @@ -520,16 +520,20 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,

Register StoreAddr = StoreMI.getPointerReg();
auto BIO = getPointerInfo(StoreAddr, *MRI);
Register StoreBase = BIO.BaseReg;
uint64_t StoreOffCst = BIO.Offset;
Register StoreBase = BIO.getBase();
if (C.Stores.empty()) {
C.BasePtr = StoreBase;
if (!BIO.hasValidOffset()) {
C.CurrentLowestOffset = 0;
} else {
C.CurrentLowestOffset = BIO.getOffset();
}
// This is the first store of the candidate.
// If the offset can't possibly allow for a lower addressed store with the
// same base, don't bother adding it.
if (StoreOffCst < ValueTy.getSizeInBytes())
if (BIO.hasValidOffset() &&
BIO.getOffset() < static_cast<int64_t>(ValueTy.getSizeInBytes()))
return false;
C.BasePtr = StoreBase;
C.CurrentLowestOffset = StoreOffCst;
C.Stores.emplace_back(&StoreMI);
LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
<< StoreMI);
Expand All @@ -549,8 +553,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
// writes to the next lowest adjacent address.
if (C.BasePtr != StoreBase)
return false;
if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
static_cast<uint64_t>(StoreOffCst))
// If we don't have a valid offset, we can't guarantee to be an adjacent
// offset.
if (!BIO.hasValidOffset())
return false;
if ((C.CurrentLowestOffset -
static_cast<int64_t>(ValueTy.getSizeInBytes())) != BIO.getOffset())
return false;

// This writes to an adjacent address. Allow it.
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll
Original file line number Diff line number Diff line change
Expand Up @@ -323,3 +323,22 @@ define i32 @test_alias_3xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr noalias %safe_p
store i32 14, ptr %addr4
ret i32 %safeld
}

@G = external global [10 x i32]

define void @invalid_zero_offset_no_merge(i64 %0) {
; CHECK-LABEL: invalid_zero_offset_no_merge:
; CHECK: ; %bb.0:
; CHECK-NEXT: Lloh0:
; CHECK-NEXT: adrp x8, _G@GOTPAGE
; CHECK-NEXT: Lloh1:
; CHECK-NEXT: ldr x8, [x8, _G@GOTPAGEOFF]
; CHECK-NEXT: str wzr, [x8, x0, lsl #2]
; CHECK-NEXT: str wzr, [x8, #4]
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
%2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
store i32 0, ptr %2, align 4
store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
ret void
}
62 changes: 52 additions & 10 deletions llvm/test/CodeGen/AArch64/GlobalISel/store-merging.mir
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@
ret void
}

@G = external global [10 x i32]
define void @invalid_zero_offset_no_merge(i64 %0) {
%2 = getelementptr [10 x i32], ptr @G, i64 0, i64 %0
store i32 0, ptr %2, align 4
store i32 0, ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1), align 4
ret void
}
...
---
name: test_simple_2xs8
Expand Down Expand Up @@ -582,13 +589,11 @@ liveins:
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
# The store to ptr2 prevents merging into a single store.
# We can still merge the stores into addr1 and addr2.
body: |
bb.1 (%ir-block.0):
liveins: $x0, $x1
; The store to ptr2 prevents merging into a single store.
; We can still merge the stores into addr1 and addr2.
; CHECK-LABEL: name: test_alias_4xs16
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
Expand Down Expand Up @@ -639,10 +644,10 @@ liveins:
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
# Here store of 5 and 9 can be merged, others have aliasing barriers.
body: |
bb.1 (%ir-block.0):
liveins: $x0, $x1, $x2
; Here store of 5 and 9 can be merged, others have aliasing barriers.
; CHECK-LABEL: name: test_alias2_4xs16
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
Expand Down Expand Up @@ -698,12 +703,11 @@ liveins:
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
# No merging can be done here.
body: |
bb.1 (%ir-block.0):
liveins: $x0, $x1, $x2, $x3
; No merging can be done here.
; CHECK-LABEL: name: test_alias3_4xs16
; CHECK: liveins: $x0, $x1, $x2, $x3
; CHECK-NEXT: {{ $}}
Expand Down Expand Up @@ -767,12 +771,10 @@ stack:
- { id: 0, name: a1, size: 24, alignment: 4 }
- { id: 1, name: a2, size: 4, alignment: 4 }
machineFunctionInfo: {}
# Can merge because the load is from a different alloca and can't alias.
body: |
bb.1 (%ir-block.0):
liveins: $x0
; Can merge because the load is from a different alloca and can't alias.
; CHECK-LABEL: name: test_alias_allocas_2xs32
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
Expand Down Expand Up @@ -826,3 +828,43 @@ body: |
RET_ReallyLR
...
---
name: invalid_zero_offset_no_merge
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
bb.1 (%ir-block.1):
liveins: $x0
; CHECK-LABEL: name: invalid_zero_offset_no_merge
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64)
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @G
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[SHL]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.2)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[GV]], [[C2]](s64)
; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`)
; CHECK-NEXT: RET_ReallyLR
%0:_(s64) = COPY $x0
%9:_(s64) = G_CONSTANT i64 2
%3:_(s64) = G_SHL %0, %9(s64)
%1:_(p0) = G_GLOBAL_VALUE @G
%4:_(p0) = G_PTR_ADD %1, %3(s64)
%6:_(s32) = G_CONSTANT i32 0
G_STORE %6(s32), %4(p0) :: (store (s32) into %ir.2)
%8:_(s64) = G_CONSTANT i64 4
%7:_(p0) = nuw G_PTR_ADD %1, %8(s64)
G_STORE %6(s32), %7(p0) :: (store (s32) into `ptr getelementptr inbounds ([10 x i32], ptr @G, i64 0, i64 1)`)
RET_ReallyLR
...

0 comments on commit ece9d35

Please sign in to comment.