Skip to content

Commit

Permalink
[AMDGPU] Update SIInsertHardClauses for GFX11
Browse files Browse the repository at this point in the history
Changes for GFX11:
- Clauses may not mix instructions of different types, and there are
  more types. For example image instructions with and without a sampler
  are now different types.
- The max size of a clause is explicitly documented as 63 instructions.
  Previously it was implicitly assumed to be 64. This is such a tiny
  difference that it does not seem worth making it conditional on the
  subtarget.
- It can be beneficial to clause stores as well as loads.

Differential Revision: https://reviews.llvm.org/D127391
  • Loading branch information
jayfoad committed Jun 9, 2022
1 parent 87b4677 commit ffe86e3
Show file tree
Hide file tree
Showing 2 changed files with 299 additions and 17 deletions.
76 changes: 64 additions & 12 deletions llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,39 @@ using namespace llvm;

namespace {

// A clause length of 64 instructions could be encoded in the s_clause
// instruction, but the hardware documentation (at least for GFX11) says that
// 63 is the maximum allowed.
constexpr unsigned MaxInstructionsInClause = 63;

enum HardClauseType {
// For GFX10:

// Texture, buffer, global or scratch memory instructions.
HARDCLAUSE_VMEM,
// Flat (not global or scratch) memory instructions.
HARDCLAUSE_FLAT,

// For GFX11:

// Texture memory instructions.
HARDCLAUSE_MIMG_LOAD,
HARDCLAUSE_MIMG_STORE,
HARDCLAUSE_MIMG_ATOMIC,
HARDCLAUSE_MIMG_SAMPLE,
// Buffer, global or scratch memory instructions.
HARDCLAUSE_VMEM_LOAD,
HARDCLAUSE_VMEM_STORE,
HARDCLAUSE_VMEM_ATOMIC,
// Flat (not global or scratch) memory instructions.
HARDCLAUSE_FLAT_LOAD,
HARDCLAUSE_FLAT_STORE,
HARDCLAUSE_FLAT_ATOMIC,
// BVH instructions.
HARDCLAUSE_BVH,

// Common:

// Instructions that access LDS.
HARDCLAUSE_LDS,
// Scalar memory instructions.
Expand Down Expand Up @@ -79,19 +107,43 @@ class SIInsertHardClauses : public MachineFunctionPass {
}

HardClauseType getHardClauseType(const MachineInstr &MI) {

// On current architectures we only get a benefit from clausing loads.
if (MI.mayLoad()) {
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
if (ST->hasNSAClauseBug()) {
if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
if (ST->hasNSAClauseBug()) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
return HARDCLAUSE_ILLEGAL;
}
return HARDCLAUSE_VMEM;
}
if (SIInstrInfo::isFLAT(MI))
return HARDCLAUSE_FLAT;
} else {
assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
if (SIInstrInfo::isMIMG(MI)) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
return HARDCLAUSE_ILLEGAL;
const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
if (BaseInfo->BVH)
return HARDCLAUSE_BVH;
if (BaseInfo->Sampler)
return HARDCLAUSE_MIMG_SAMPLE;
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
: HARDCLAUSE_MIMG_LOAD
: HARDCLAUSE_MIMG_STORE;
}
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
: HARDCLAUSE_VMEM_LOAD
: HARDCLAUSE_VMEM_STORE;
}
if (SIInstrInfo::isFLAT(MI)) {
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
: HARDCLAUSE_FLAT_LOAD
: HARDCLAUSE_FLAT_STORE;
}
return HARDCLAUSE_VMEM;
}
if (SIInstrInfo::isFLAT(MI))
return HARDCLAUSE_FLAT;
// TODO: LDS
if (SIInstrInfo::isSMRD(MI))
return HARDCLAUSE_SMEM;
Expand Down Expand Up @@ -130,7 +182,7 @@ class SIInsertHardClauses : public MachineFunctionPass {
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
if (CI.First == CI.Last)
return false;
assert(CI.Length <= 64 && "Hard clause is too long!");
assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!");

auto &MBB = *CI.First->getParent();
auto ClauseMI =
Expand Down Expand Up @@ -171,7 +223,7 @@ class SIInsertHardClauses : public MachineFunctionPass {
}
}

if (CI.Length == 64 ||
if (CI.Length == MaxInstructionsInClause ||
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
Type != HARDCLAUSE_IGNORE &&
(Type != CI.Type ||
Expand Down
Loading

0 comments on commit ffe86e3

Please sign in to comment.