Skip to content

Commit

Permalink
AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (#102806
Browse files Browse the repository at this point in the history
)
  • Loading branch information
arsenm authored Aug 12, 2024
1 parent afe019c commit 05b75e0
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 47 deletions.
16 changes: 13 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ FunctionPass *createSIPostRABundlerPass();
FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPULateCodeGenPreparePass();
FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
FunctionPass *createAMDGPURewriteOutArgumentsPass();
ModulePass *
Expand Down Expand Up @@ -282,6 +282,16 @@ class AMDGPUCodeGenPreparePass
PreservedAnalyses run(Function &, FunctionAnalysisManager &);
};

class AMDGPULateCodeGenPreparePass
: public PassInfoMixin<AMDGPULateCodeGenPreparePass> {
private:
const GCNTargetMachine &TM;

public:
AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {};
PreservedAnalyses run(Function &, FunctionAnalysisManager &);
};

class AMDGPULowerKernelArgumentsPass
: public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
private:
Expand Down Expand Up @@ -352,8 +362,8 @@ extern char &AMDGPUCodeGenPrepareID;
void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
extern char &AMDGPURemoveIncompatibleFunctionsID;

void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
extern char &AMDGPULateCodeGenPrepareID;
void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &);
extern char &AMDGPULateCodeGenPrepareLegacyID;

FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
Expand Down
110 changes: 68 additions & 42 deletions llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,35 +42,21 @@ static cl::opt<bool>
namespace {

class AMDGPULateCodeGenPrepare
: public FunctionPass,
public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
: public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
const GCNSubtarget &ST;

AssumptionCache *AC = nullptr;
UniformityInfo *UA = nullptr;

SmallVector<WeakTrackingVH, 8> DeadInsts;

public:
static char ID;

AMDGPULateCodeGenPrepare() : FunctionPass(ID) {}

StringRef getPassName() const override {
return "AMDGPU IR late optimizations";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetPassConfig>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<UniformityInfoWrapperPass>();
AU.setPreservesAll();
}

bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;

AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST,
AssumptionCache *AC, UniformityInfo *UA)
: Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
bool run(Function &F);
bool visitInstruction(Instruction &) { return false; }

// Check if the specified value is at least DWORD aligned.
Expand Down Expand Up @@ -148,23 +134,7 @@ class LiveRegOptimizer {

} // end anonymous namespace

bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) {
Mod = &M;
DL = &Mod->getDataLayout();
return false;
}

bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
return false;

const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const TargetMachine &TM = TPC.getTM<TargetMachine>();
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();

bool AMDGPULateCodeGenPrepare::run(Function &F) {
// "Optimize" the virtual regs that cross basic block boundaries. When
// building the SelectionDAG, vectors of illegal types that cross basic blocks
// will be scalarized and widened, with each scalar living in its
Expand Down Expand Up @@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
return true;
}

INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
PreservedAnalyses
AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(F);
UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(F);

AMDGPULateCodeGenPrepare Impl(*F.getParent(), ST, &AC, &UI);

bool Changed = Impl.run(F);

PreservedAnalyses PA = PreservedAnalyses::none();
if (!Changed)
return PA;
PA.preserveSet<CFGAnalyses>();
return PA;
}

class AMDGPULateCodeGenPrepareLegacy : public FunctionPass {
public:
static char ID;

AMDGPULateCodeGenPrepareLegacy() : FunctionPass(ID) {}

StringRef getPassName() const override {
return "AMDGPU IR late optimizations";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetPassConfig>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<UniformityInfoWrapperPass>();
AU.setPreservesAll();
}

bool runOnFunction(Function &F) override;
};

bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {
if (skipFunction(F))
return false;

const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const TargetMachine &TM = TPC.getTM<TargetMachine>();
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

AssumptionCache &AC =
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
UniformityInfo &UI =
getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();

AMDGPULateCodeGenPrepare Impl(*F.getParent(), ST, &AC, &UI);

return Impl.run(F);
}

INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
"AMDGPU IR late optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,
"AMDGPU IR late optimizations", false, false)

char AMDGPULateCodeGenPrepare::ID = 0;
char AMDGPULateCodeGenPrepareLegacy::ID = 0;

FunctionPass *llvm::createAMDGPULateCodeGenPreparePass() {
return new AMDGPULateCodeGenPrepare();
FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() {
return new AMDGPULateCodeGenPrepareLegacy();
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ FUNCTION_PASS("amdgpu-annotate-uniform", AMDGPUAnnotateUniformValuesPass())
FUNCTION_PASS("amdgpu-codegenprepare", AMDGPUCodeGenPreparePass(*this))
FUNCTION_PASS("amdgpu-image-intrinsic-opt",
AMDGPUImageIntrinsicOptimizerPass(*this))
FUNCTION_PASS("amdgpu-late-codegenprepare",
AMDGPULateCodeGenPreparePass(
*static_cast<const GCNTargetMachine *>(this)))
FUNCTION_PASS("amdgpu-lower-kernel-arguments",
AMDGPULowerKernelArgumentsPass(*this))
FUNCTION_PASS("amdgpu-lower-kernel-attributes",
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUPromoteAllocaToVectorPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPULateCodeGenPreparePass(*PR);
initializeAMDGPULateCodeGenPrepareLegacyPass(*PR);
initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
initializeAMDGPULowerModuleLDSLegacyPass(*PR);
initializeAMDGPULowerBufferFatPointersPass(*PR);
Expand Down Expand Up @@ -1227,7 +1227,7 @@ bool GCNPassConfig::addPreISel() {
addPass(createSinkingPass());

if (TM->getOptLevel() > CodeGenOptLevel::None)
addPass(createAMDGPULateCodeGenPreparePass());
addPass(createAMDGPULateCodeGenPrepareLegacyPass());

// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
// regions formed by them.
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-late-codegenprepare %s | FileCheck %s -check-prefix=GFX9
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-late-codegenprepare %s | FileCheck %s -check-prefix=GFX12
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-late-codegenprepare %s | FileCheck %s -check-prefix=GFX9

; Make sure we don't crash when trying to create a bitcast between
; address spaces
Expand Down

0 comments on commit 05b75e0

Please sign in to comment.