Skip to content

Commit

Permalink
[Attributor][AMD] Enable AAIndirectCallInfo for AMDAttributorPass
Browse files Browse the repository at this point in the history
  • Loading branch information
shiltian committed Jul 31, 2024
1 parent 0a01e8f commit aa74047
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 17 deletions.
9 changes: 5 additions & 4 deletions llvm/include/llvm/Transforms/IPO/Attributor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1448,7 +1448,7 @@ struct AttributorConfig {
/// Callback function to determine if an indirect call targets should be made
/// direct call targets (with an if-cascade).
std::function<bool(Attributor &A, const AbstractAttribute &AA, CallBase &CB,
Function &AssummedCallee)>
Function &AssummedCallee, unsigned NumAssummedCallee)>
IndirectCalleeSpecializationCallback = nullptr;

/// Helper to update an underlying call graph and to delete functions.
Expand Down Expand Up @@ -1718,10 +1718,11 @@ struct Attributor {
/// Return true if we should specialize the call site \b CB for the potential
/// callee \p Fn.
bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA,
CallBase &CB, Function &Callee) {
CallBase &CB, Function &Callee,
unsigned NumAssummedCallee) {
return Configuration.IndirectCalleeSpecializationCallback
? Configuration.IndirectCalleeSpecializationCallback(*this, AA,
CB, Callee)
? Configuration.IndirectCalleeSpecializationCallback(
*this, AA, CB, Callee, NumAssummedCallee)
: true;
}

Expand Down
15 changes: 14 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
Expand Down Expand Up @@ -1038,12 +1039,24 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID});
&AAUnderlyingObjects::ID, &AAIndirectCallInfo::ID, &AAInstanceInfo::ID});

AttributorConfig AC(CGUpdater);
AC.Allowed = &Allowed;
AC.IsModulePass = true;
AC.DefaultInitializeLiveInternals = false;
AC.IndirectCalleeSpecializationCallback =
[&TM](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
Function &Callee, unsigned NumAssummedCallee) {
if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv()))
return false;
// Singleton functions can be specialized.
if (NumAssummedCallee == 1)
return true;
// Otherwise specialize uniform values.
const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller());
return TTI.isAlwaysUniform(CB.getCalledOperand());
};
AC.IPOAmendableCB = [](const Function &F) {
return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
};
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/IPO/Attributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3836,7 +3836,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
if (MaxSpecializationPerCB.getNumOccurrences()) {
AC.IndirectCalleeSpecializationCallback =
[&](Attributor &, const AbstractAttribute &AA, CallBase &CB,
Function &Callee) {
Function &Callee, unsigned) {
if (MaxSpecializationPerCB == 0)
return false;
auto &Set = IndirectCalleeTrackingMap[&CB];
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12347,7 +12347,8 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
SmallVector<Function *, 8> SkippedAssumedCallees;
SmallVector<std::pair<CallInst *, Instruction *>> NewCalls;
for (Function *NewCallee : AssumedCallees) {
if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) {
if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee,
AssumedCallees.size())) {
SkippedAssumedCallees.push_back(NewCallee);
SpecializedForAllCallees = false;
continue;
Expand Down
4 changes: 3 additions & 1 deletion llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
; CHECK-NEXT: call void [[FPTR]]()
; CHECK-NEXT: call void [[FPTR]](), !callees [[META0:![0-9]+]]
; CHECK-NEXT: ret void
;
%fptr = select i1 %cond, ptr @empty, ptr @also_empty
Expand All @@ -253,3 +253,5 @@ attributes #0 = { "amdgpu-no-agpr" }
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" }
;.
; CHECK: [[META0]] = !{ptr @also_empty, ptr @empty}
;.
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define internal void @direct() {
; CHECK-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
; CHECK-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
; CHECK-NEXT: call void [[FP]]()
; CHECK-NEXT: call void @indirect()
; CHECK-NEXT: ret void
;
%fptr = alloca ptr, addrspace(5)
Expand All @@ -36,5 +36,5 @@ define amdgpu_kernel void @test_direct_indirect_call() {
}
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
;.
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
; ATTRIBUTOR_GCN-NEXT: call void @indirect()
; ATTRIBUTOR_GCN-NEXT: ret void
;
%fptr = alloca ptr, addrspace(5)
Expand All @@ -43,5 +43,5 @@ attributes #0 = { "amdgpu-no-dispatch-id" }
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
66 changes: 66 additions & 0 deletions llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck --check-prefixes=CHECK,OW %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -attributor-assume-closed-world=1 %s | FileCheck --check-prefixes=CHECK,CW %s

target datalayout = "A5"

@G = global i32 0, align 4

;.
; CHECK: @G = global i32 0, align 4
;.
define void @bar() {
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: store i32 1, ptr @G, align 4
; CHECK-NEXT: ret void
;
entry:
store i32 1, ptr @G, align 4
ret void
}

define ptr @helper() {
; CHECK-LABEL: define {{[^@]+}}@helper
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret ptr @bar
;
entry:
ret ptr @bar
}

define amdgpu_kernel void @foo(ptr noundef %fp) {
; OW-LABEL: define {{[^@]+}}@foo
; OW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
; OW-NEXT: entry:
; OW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; OW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
; OW-NEXT: call void [[FP]]()
; OW-NEXT: ret void
;
; CW-LABEL: define {{[^@]+}}@foo
; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
; CW-NEXT: entry:
; CW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; CW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
; CW-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8
; CW-NEXT: call void @bar()
; CW-NEXT: ret void
;
entry:
%fp.addr = alloca ptr, addrspace(5)
store ptr %fp, ptr addrspace(5) %fp.addr
%load = load ptr, ptr addrspace(5) %fp.addr
call void %load()
ret void
}

;.
; OW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
; CW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
14 changes: 9 additions & 5 deletions llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
; ATTRIBUTOR_GCN-NEXT: call void @indirect()
; ATTRIBUTOR_GCN-NEXT: ret void
;
; GFX9-LABEL: test_simple_indirect_call:
Expand Down Expand Up @@ -75,12 +75,16 @@ define amdgpu_kernel void @test_simple_indirect_call() {
ret void
}


!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
; ATTRIBUTOR_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}

0 comments on commit aa74047

Please sign in to comment.