From 99d8995de1cd1171fcc4dd285ea70463449ca2f6 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 7 Aug 2024 09:22:48 -0400 Subject: [PATCH] Revert "Reapply "[Attributor][AMDGPU] Enable AAIndirectCallInfo for AMDAttributor (#100952)"" This reverts commit 7a68449a82ab1c1ab005caa72c1d986ca5deca36. https://lab.llvm.org/buildbot/#/builders/123/builds/3205 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 9 ++- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 16 +---- llvm/lib/Transforms/IPO/Attributor.cpp | 2 +- .../Transforms/IPO/AttributorAttributes.cpp | 3 +- .../CodeGen/AMDGPU/direct-indirect-call.ll | 4 +- .../AMDGPU/duplicate-attribute-indirect.ll | 4 +- .../CodeGen/AMDGPU/simple-indirect-call-2.ll | 66 ------------------- .../CodeGen/AMDGPU/simple-indirect-call.ll | 4 +- 8 files changed, 13 insertions(+), 95 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 718cf704cbdf1a2..ad3c6426efd2fe2 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1448,7 +1448,7 @@ struct AttributorConfig { /// Callback function to determine if an indirect call targets should be made /// direct call targets (with an if-cascade). std::function + Function &AssummedCallee)> IndirectCalleeSpecializationCallback = nullptr; /// Helper to update an underlying call graph and to delete functions. @@ -1718,11 +1718,10 @@ struct Attributor { /// Return true if we should specialize the call site \b CB for the potential /// callee \p Fn. bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA, - CallBase &CB, Function &Callee, - unsigned NumAssumedCallees) { + CallBase &CB, Function &Callee) { return Configuration.IndirectCalleeSpecializationCallback - ? Configuration.IndirectCalleeSpecializationCallback( - *this, AA, CB, Callee, NumAssumedCallees) + ? Configuration.IndirectCalleeSpecializationCallback(*this, AA, + CB, Callee) : true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 29d493db926d921..39c52140dfbd23a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -14,7 +14,6 @@ #include "GCNSubtarget.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CycleAnalysis.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" @@ -1039,25 +1038,12 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) { &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, - &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID, - &AAInstanceInfo::ID}); + &AAUnderlyingObjects::ID, &AAAddressSpace::ID}); AttributorConfig AC(CGUpdater); AC.Allowed = &Allowed; AC.IsModulePass = true; AC.DefaultInitializeLiveInternals = false; - AC.IndirectCalleeSpecializationCallback = - [&TM](Attributor &A, const AbstractAttribute &AA, CallBase &CB, - Function &Callee, unsigned NumAssumedCallees) { - if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv())) - return false; - // Singleton functions can be specialized. - if (NumAssumedCallees == 1) - return true; - // Otherwise specialize uniform values. - const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller()); - return TTI.isAlwaysUniform(CB.getCalledOperand()); - }; AC.IPOAmendableCB = [](const Function &F) { return F.getCallingConv() == CallingConv::AMDGPU_KERNEL; }; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 38b61b6a88357c3..910c0aeacc42e0d 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -3836,7 +3836,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache, if (MaxSpecializationPerCB.getNumOccurrences()) { AC.IndirectCalleeSpecializationCallback = [&](Attributor &, const AbstractAttribute &AA, CallBase &CB, - Function &Callee, unsigned) { + Function &Callee) { if (MaxSpecializationPerCB == 0) return false; auto &Set = IndirectCalleeTrackingMap[&CB]; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 94948e57cf1f941..db5e94806e9a160 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12357,8 +12357,7 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo { SmallVector SkippedAssumedCallees; SmallVector> NewCalls; for (Function *NewCallee : AssumedCallees) { - if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee, - AssumedCallees.size())) { + if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) { SkippedAssumedCallees.push_back(NewCallee); SpecializedForAllCallees = false; continue; diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll index aa182b720c60420..386f9cd3f9ce739 100644 --- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -15,7 +15,7 @@ define internal void @direct() { ; CHECK-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) ; CHECK-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 ; CHECK-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 -; CHECK-NEXT: call void @indirect() +; CHECK-NEXT: call void [[FP]]() ; CHECK-NEXT: ret void ; %fptr = alloca ptr, addrspace(5) @@ -36,5 +36,5 @@ define amdgpu_kernel void @test_direct_indirect_call() { } ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll index 848019c8729251a..05558c555c581e3 100644 --- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -27,7 +27,7 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 { ; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) ; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 ; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 -; ATTRIBUTOR_GCN-NEXT: call void @indirect() +; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() ; ATTRIBUTOR_GCN-NEXT: ret void ; %fptr = alloca ptr, addrspace(5) @@ -43,5 +43,5 @@ attributes #0 = { "amdgpu-no-dispatch-id" } ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll deleted file mode 100644 index 9c3457e87dbf3fd..000000000000000 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll +++ /dev/null @@ -1,66 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck --check-prefixes=CHECK,OW %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -attributor-assume-closed-world=1 %s | FileCheck --check-prefixes=CHECK,CW %s - -target datalayout = "A5" - -@G = global i32 0, align 4 - -;. -; CHECK: @G = global i32 0, align 4 -;. -define void @bar() { -; CHECK-LABEL: define {{[^@]+}}@bar -; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 1, ptr @G, align 4 -; CHECK-NEXT: ret void -; -entry: - store i32 1, ptr @G, align 4 - ret void -} - -define ptr @helper() { -; CHECK-LABEL: define {{[^@]+}}@helper -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: ret ptr @bar -; -entry: - ret ptr @bar -} - -define amdgpu_kernel void @foo(ptr noundef %fp) { -; OW-LABEL: define {{[^@]+}}@foo -; OW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] { -; OW-NEXT: entry: -; OW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; OW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8 -; OW-NEXT: call void [[FP]]() -; OW-NEXT: ret void -; -; CW-LABEL: define {{[^@]+}}@foo -; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] { -; CW-NEXT: entry: -; CW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -; CW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8 -; CW-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8 -; CW-NEXT: call void @bar() -; CW-NEXT: ret void -; -entry: - %fp.addr = alloca ptr, addrspace(5) - store ptr %fp, ptr addrspace(5) %fp.addr - %load = load ptr, ptr addrspace(5) %fp.addr - call void %load() - ret void -} - -;. -; OW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } -;. -; CW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -;. diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll index cca7b49996ff3b2..3a6b0485d241746 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -36,7 +36,7 @@ define amdgpu_kernel void @test_simple_indirect_call() { ; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) ; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 ; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 -; ATTRIBUTOR_GCN-NEXT: call void @indirect() +; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() ; ATTRIBUTOR_GCN-NEXT: ret void ; ; GFX9-LABEL: test_simple_indirect_call: @@ -81,7 +81,7 @@ define amdgpu_kernel void @test_simple_indirect_call() { ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. ; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} ;.