From e8bf752a73a31777b5ee503a150ca80b3530c88e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 11 Aug 2024 11:55:00 +0400 Subject: [PATCH] AMDGPU/NewPM: Fill out addPreISelPasses This specific callback should now be at parity with the old pass manager version. There are still some missing IR passes before this point. Also I don't understand the need for the RequiresAnalysisPass at the end. SelectionDAG should just be using the uncached getResult? --- .../AMDGPU/AMDGPUCodeGenPassBuilder.cpp | 55 ++++++++++++++++++- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 8 ++- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 1 + .../CodeGen/AMDGPU/bug-v4f64-subvector.ll | 2 +- 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index fb3d3259171acae..36f44a20d955320 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -9,9 +9,17 @@ #include "AMDGPUCodeGenPassBuilder.h" #include "AMDGPU.h" #include "AMDGPUISelDAGToDAG.h" +#include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUTargetMachine.h" +#include "AMDGPUUnifyDivergentExitNodes.h" #include "SIFixSGPRCopies.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/Transforms/Scalar/FlattenCFG.h" +#include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/StructurizeCFG.h" +#include "llvm/Transforms/Utils/FixIrreducible.h" +#include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/Transforms/Utils/UnifyLoopExits.h" using namespace llvm; @@ -28,8 +36,51 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder( } void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const { - // TODO: Add passes pre instruction selection. - // Test only, convert to real IR passes in future. + const bool LateCFGStructurize = AMDGPUTargetMachine::EnableLateStructurizeCFG; + const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer; + const bool EnableStructurizerWorkarounds = + AMDGPUTargetMachine::EnableStructurizerWorkarounds; + + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(FlattenCFGPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(SinkingPass()); + + addPass(AMDGPULateCodeGenPreparePass(TM)); + + // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit + // regions formed by them. + + addPass(AMDGPUUnifyDivergentExitNodesPass()); + + if (!LateCFGStructurize && !DisableStructurizer) { + if (EnableStructurizerWorkarounds) { + addPass(FixIrreduciblePass()); + addPass(UnifyLoopExitsPass()); + } + + addPass(StructurizeCFGPass(/*SkipUniformRegions=*/false)); + } + + addPass(AMDGPUAnnotateUniformValuesPass()); + + if (!LateCFGStructurize && !DisableStructurizer) { + addPass(SIAnnotateControlFlowPass(TM)); + + // TODO: Move this right after structurizeCFG to avoid extra divergence + // analysis. This depends on stopping SIAnnotateControlFlow from making + // control flow modifications. + addPass(AMDGPURewriteUndefForPHIPass()); + } + + addPass(LCSSAPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::Less) + addPass(AMDGPUPerfHintAnalysisPass(TM)); + + // FIXME: Why isn't this queried as required from AMDGPUISelDAGToDAG, and why + // isn't this in addInstSelector? addPass(RequireAnalysisPass()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0523fee5bcf9f44..5929dadf93bcbe5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -338,10 +338,11 @@ static cl::opt EnableScalarIRPasses( cl::init(true), cl::Hidden); -static cl::opt EnableStructurizerWorkarounds( +static cl::opt EnableStructurizerWorkarounds( "amdgpu-enable-structurizer-workarounds", - cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), - cl::Hidden); + cl::desc("Enable workarounds for the StructurizeCFG pass"), + cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds), + cl::init(true), cl::Hidden); static cl::opt EnableLowerModuleLDS( "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), @@ -611,6 +612,7 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false; bool AMDGPUTargetMachine::EnableFunctionCalls = false; bool AMDGPUTargetMachine::EnableLowerModuleLDS = true; bool AMDGPUTargetMachine::DisableStructurizer = false; +bool AMDGPUTargetMachine::EnableStructurizerWorkarounds = true; AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 6bb8788cc73b0c8..4d39ad2b4150521 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -38,6 +38,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { static bool EnableFunctionCalls; static bool EnableLowerModuleLDS; static bool DisableStructurizer; + static bool EnableStructurizerWorkarounds; AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, diff --git a/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll b/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll index 2c7072b8c93b11b..2acd2355965a59b 100644 --- a/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll +++ b/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-isel -stop-after=amdgpu-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK -; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel -enable-new-pm | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-isel -stop-after=amdgpu-isel -enable-new-pm -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK ; This caused failure in infinite cycle in Selection DAG (combine) due to missing insert_subvector. ;