Skip to content

Commit

Permalink
AMDGPU/NewPM: Fill out addPreISelPasses
Browse files Browse the repository at this point in the history
This specific callback should now be at parity with the old
pass manager version. There are still some missing IR passes
before this point.

Also I don't understand the need for the RequiresAnalysisPass at the
end. SelectionDAG should just be using the uncached getResult?
  • Loading branch information
arsenm committed Aug 12, 2024
1 parent f86da4c commit e8bf752
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 6 deletions.
55 changes: 53 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,17 @@
#include "AMDGPUCodeGenPassBuilder.h"
#include "AMDGPU.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUTargetMachine.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "SIFixSGPRCopies.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/UnifyLoopExits.h"

using namespace llvm;

Expand All @@ -28,8 +36,51 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
}

void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
// TODO: Add passes pre instruction selection.
// Test only, convert to real IR passes in future.
const bool LateCFGStructurize = AMDGPUTargetMachine::EnableLateStructurizeCFG;
const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer;
const bool EnableStructurizerWorkarounds =
AMDGPUTargetMachine::EnableStructurizerWorkarounds;

if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(FlattenCFGPass());

if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(SinkingPass());

addPass(AMDGPULateCodeGenPreparePass(TM));

// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
// regions formed by them.

addPass(AMDGPUUnifyDivergentExitNodesPass());

if (!LateCFGStructurize && !DisableStructurizer) {
if (EnableStructurizerWorkarounds) {
addPass(FixIrreduciblePass());
addPass(UnifyLoopExitsPass());
}

addPass(StructurizeCFGPass(/*SkipUniformRegions=*/false));
}

addPass(AMDGPUAnnotateUniformValuesPass());

if (!LateCFGStructurize && !DisableStructurizer) {
addPass(SIAnnotateControlFlowPass(TM));

// TODO: Move this right after structurizeCFG to avoid extra divergence
// analysis. This depends on stopping SIAnnotateControlFlow from making
// control flow modifications.
addPass(AMDGPURewriteUndefForPHIPass());
}

addPass(LCSSAPass());

if (TM.getOptLevel() > CodeGenOptLevel::Less)
addPass(AMDGPUPerfHintAnalysisPass(TM));

// FIXME: Why isn't this queried as required from AMDGPUISelDAGToDAG, and why
// isn't this in addInstSelector?
addPass(RequireAnalysisPass<UniformityInfoAnalysis, Function>());
}

Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,10 +338,11 @@ static cl::opt<bool> EnableScalarIRPasses(
cl::init(true),
cl::Hidden);

static cl::opt<bool> EnableStructurizerWorkarounds(
static cl::opt<bool, true> EnableStructurizerWorkarounds(
"amdgpu-enable-structurizer-workarounds",
cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
cl::Hidden);
cl::desc("Enable workarounds for the StructurizeCFG pass"),
cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds),
cl::init(true), cl::Hidden);

static cl::opt<bool, true> EnableLowerModuleLDS(
"amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
Expand Down Expand Up @@ -611,6 +612,7 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
bool AMDGPUTargetMachine::EnableFunctionCalls = false;
bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
bool AMDGPUTargetMachine::DisableStructurizer = false;
bool AMDGPUTargetMachine::EnableStructurizerWorkarounds = true;

AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
static bool EnableFunctionCalls;
static bool EnableLowerModuleLDS;
static bool DisableStructurizer;
static bool EnableStructurizerWorkarounds;

AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-isel -stop-after=amdgpu-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel -enable-new-pm | FileCheck %s --check-prefixes=CHECK
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-isel -stop-after=amdgpu-isel -enable-new-pm -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK

; This caused failure in infinite cycle in Selection DAG (combine) due to missing insert_subvector.
;
Expand Down

0 comments on commit e8bf752

Please sign in to comment.