Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GlobalIsel] Import GEP flags #93850

Merged
merged 6 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ namespace llvm {

/// A base class for all GenericMachineInstrs.
class GenericMachineInstr : public MachineInstr {
constexpr static unsigned PoisonFlags = NoUWrap | NoSWrap | NoUSWrap |
IsExact | Disjoint | NonNeg |
FmNoNans | FmNoInfs;

public:
GenericMachineInstr() = delete;

Expand All @@ -35,14 +39,10 @@ class GenericMachineInstr : public MachineInstr {
return isPreISelGenericOpcode(MI->getOpcode());
}

bool hasPoisonGeneratingFlags() const {
return getFlags() & (NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg |
FmNoNans | FmNoInfs);
}
bool hasPoisonGeneratingFlags() const { return getFlags() & PoisonFlags; }

void dropPoisonGeneratingFlags() {
clearFlags(NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg | FmNoNans |
FmNoInfs);
clearFlags(PoisonFlags);
assert(!hasPoisonGeneratingFlags());
}
};
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/MachineInstr.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ class MachineInstr
NoConvergent = 1 << 17, // Call does not require convergence guarantees.
NonNeg = 1 << 18, // The operand is non-negative.
Disjoint = 1 << 19, // Each bit is zero in at least one of the inputs.
NoUSWrap = 1 << 20, // Instruction supports geps
// no unsigned signed wrap.
};

private:
Expand Down
6 changes: 2 additions & 4 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1583,10 +1583,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);

uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
}
if (const Instruction *I = dyn_cast<Instruction>(&U))
Flags = MachineInstr::copyFlagsFromInstruction(*I);

// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/MIRParser/MILexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("reassoc", MIToken::kw_reassoc)
.Case("nuw", MIToken::kw_nuw)
.Case("nsw", MIToken::kw_nsw)
.Case("nusw", MIToken::kw_nusw)
.Case("exact", MIToken::kw_exact)
.Case("nneg", MIToken::kw_nneg)
.Case("disjoint", MIToken::kw_disjoint)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/MIRParser/MILexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ struct MIToken {
kw_contract,
kw_afn,
kw_reassoc,
kw_nusw,
kw_nuw,
kw_nsw,
kw_exact,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/MIRPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,8 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "nneg ";
if (MI.getFlag(MachineInstr::Disjoint))
OS << "disjoint ";
if (MI.getFlag(MachineInstr::NoUSWrap))
OS << "nusw ";

OS << TII->getName(MI.getOpcode());
if (I < E)
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/CodeGen/MachineInstr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,11 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
MIFlags |= MachineInstr::MIFlag::NoSWrap;
if (TI->hasNoUnsignedWrap())
MIFlags |= MachineInstr::MIFlag::NoUWrap;
} else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
if (GEP->hasNoUnsignedSignedWrap())
MIFlags |= MachineInstr::MIFlag::NoUSWrap;
if (GEP->hasNoUnsignedWrap())
MIFlags |= MachineInstr::MIFlag::NoUWrap;
}

// Copy the nonneg flag.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ define i32 @cse_gep(ptr %ptr, i32 %idx) {
; O0-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
; O0-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
; O0-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; O0-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; O0-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2)
; O0-NEXT: %11:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; O0-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2)
; O0-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
; O0-NEXT: $w0 = COPY [[ADD]](s32)
; O0-NEXT: RET_ReallyLR implicit $w0
Expand All @@ -39,8 +39,8 @@ define i32 @cse_gep(ptr %ptr, i32 %idx) {
; O3-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
; O3-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
; O3-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; O3-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
; O3-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.gep2)
; O3-NEXT: %9:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
; O3-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %9(p0) :: (load (s32) from %ir.gep2)
; O3-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
; O3-NEXT: $w0 = COPY [[ADD]](s32)
; O3-NEXT: RET_ReallyLR implicit $w0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -795,8 +795,8 @@ define void @jt_multiple_jump_tables(ptr %arg, i32 %arg1, ptr %arg2) {
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[PHI]], [[C111]]
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[MUL]](s64)
; CHECK-NEXT: [[C112:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD]], [[C112]](s64)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load (p0) from %ir.tmp59)
; CHECK-NEXT: %120:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD]], [[C112]](s64)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD %120(p0) :: (load (p0) from %ir.tmp59)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: $x0 = COPY [[COPY]](p0)
; CHECK-NEXT: $x1 = COPY [[LOAD]](p0)
Expand Down
122 changes: 122 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-gep-flags.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -O0 -mtriple=aarch64-linux-gnu -global-isel -stop-after=irtranslator %s -o - | FileCheck %s

define i32 @gep_nusw_nuw(ptr %ptr, i32 %idx) {
; CHECK-LABEL: name: gep_nusw_nuw
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: %11:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
; CHECK-NEXT: $w0 = COPY [[ADD]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%sidx = sext i32 %idx to i64
%gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
%v1 = load i32, ptr %gep1
%gep2 = getelementptr nusw nuw [4 x i32], ptr %ptr, i64 %sidx, i64 1
%v2 = load i32, ptr %gep2
%res = add i32 %v1, %v2
ret i32 %res
}

define i32 @gep_nuw(ptr %ptr, i32 %idx) {
; CHECK-LABEL: name: gep_nuw
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
; CHECK-NEXT: $w0 = COPY [[ADD]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%sidx = sext i32 %idx to i64
%gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
%v1 = load i32, ptr %gep1
%gep2 = getelementptr nuw [4 x i32], ptr %ptr, i64 %sidx, i64 1
%v2 = load i32, ptr %gep2
%res = add i32 %v1, %v2
ret i32 %res
}

define i32 @gep_nusw(ptr %ptr, i32 %idx) {
; CHECK-LABEL: name: gep_nusw
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: %11:_(p0) = nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
; CHECK-NEXT: $w0 = COPY [[ADD]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%sidx = sext i32 %idx to i64
%gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
%v1 = load i32, ptr %gep1
%gep2 = getelementptr nusw [4 x i32], ptr %ptr, i64 %sidx, i64 1
%v2 = load i32, ptr %gep2
%res = add i32 %v1, %v2
ret i32 %res
}

define i32 @gep_none(ptr %ptr, i32 %idx) {
; CHECK-LABEL: name: gep_none
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2)
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
; CHECK-NEXT: $w0 = COPY [[ADD]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%sidx = sext i32 %idx to i64
%gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
%v1 = load i32, ptr %gep1
%gep2 = getelementptr [4 x i32], ptr %ptr, i64 %sidx, i64 1
%v2 = load i32, ptr %gep2
%res = add i32 %v1, %v2
ret i32 %res
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test a vector case?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The GEP is a vector.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer to not touch this code. 1. it is copy-and-paste. 2. There are odd rules for attaching flags to ptradd.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None of these GEPs are vectors

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My concern is with:

MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0),

In one corner case the IRTranslator attaches flags to ptradds. I am afraid that there are many geps that do not hit this line.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/arm64-this-return.ll
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ define ptr @E_ctor_base(ptr %this, i32 %x) {
; GISEL-MIR: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; GISEL-MIR: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
; GISEL-MIR: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GISEL-MIR: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C]](s64)
; GISEL-MIR: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s64)
; GISEL-MIR: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; GISEL-MIR: $x0 = COPY [[PTR_ADD]](p0)
; GISEL-MIR: $w1 = COPY [[COPY1]](s32)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
Original file line number Diff line number Diff line change
Expand Up @@ -952,9 +952,9 @@ define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %ar
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (volatile load (s32) from `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[COPY]], [[C]](s32)
; CHECK-NEXT: %5:_(p5) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s32)
; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.arg0, addrspace 5)
; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5)
; CHECK-NEXT: G_STORE [[LOAD1]](s32), %5(p5) :: (store (s32) into %ir.gep1, addrspace 5)
; CHECK-NEXT: SI_RETURN
%val0 = load volatile i8, ptr addrspace(1) undef
%val1 = load volatile i32, ptr addrspace(1) undef
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2914,16 +2914,16 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[INT]], [[C]](s64)
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4)
; GCN-NEXT: %17:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64)
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %17(p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4)
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
Expand All @@ -2947,7 +2947,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.out.val
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
; GCN-NEXT: %17:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
; GCN-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.val, addrspace 5)
; GCN-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5)
; GCN-NEXT: G_STORE [[C1]](s32), %17(p5) :: (store (s32) into %ir.in.gep1, addrspace 5)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
Expand All @@ -50,15 +50,15 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; GCN-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5)
; GCN-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5)
; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
Expand All @@ -67,9 +67,9 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; GCN-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
; GCN-NEXT: %45:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.val, addrspace 5)
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5)
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %45(p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5)
; GCN-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
Expand Down
Loading
Loading