Skip to content

Commit

Permalink
[amdgpu] Add amdgpu_kernel calling conv attribute to clang
Browse files Browse the repository at this point in the history
Allows emitting define amdgpu_kernel void @func() IR from C or C++.

This replaces the current workflow which is to write a stub in opencl that
calls an external C function implemented in C++ combined through llvm-link.

Calling the resulting function still requires a manual implementation of the
ABI from the host side. The primary application is for more rapid debugging
of the amdgpu backend by permuting a C or C++ test file instead of manually
updating an IR file.

Implementation closely follows D54425. Non-amd reviewers from there.

Reviewed By: yaxunl

Differential Revision: https://reviews.llvm.org/D125970
  • Loading branch information
JonChesterfield committed May 20, 2022
1 parent 5537b22 commit 83c431f
Show file tree
Hide file tree
Showing 13 changed files with 115 additions and 0 deletions.
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1857,6 +1857,11 @@ def AMDGPUNumVGPR : InheritableAttr {
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
}

def AMDGPUKernelCall : DeclOrTypeAttr {
let Spellings = [Clang<"amdgpu_kernel">];
let Documentation = [Undocumented];
}

def BPFPreserveAccessIndex : InheritableAttr,
TargetSpecificAttr<TargetBPF> {
let Spellings = [Clang<"preserve_access_index">];
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Basic/Specifiers.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ namespace clang {
CC_PreserveAll, // __attribute__((preserve_all))
CC_AArch64VectorCall, // __attribute__((aarch64_vector_pcs))
CC_AArch64SVEPCS, // __attribute__((aarch64_sve_pcs))
CC_AMDGPUKernelCall, // __attribute__((amdgpu_kernel))
};

/// Checks whether the given calling convention supports variadic
Expand Down
1 change: 1 addition & 0 deletions clang/lib/AST/ItaniumMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3150,6 +3150,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
case CC_AAPCS_VFP:
case CC_AArch64VectorCall:
case CC_AArch64SVEPCS:
case CC_AMDGPUKernelCall:
case CC_IntelOclBicc:
case CC_SpirFunction:
case CC_OpenCLKernel:
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/AST/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3186,6 +3186,7 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
case CC_AAPCS_VFP: return "aapcs-vfp";
case CC_AArch64VectorCall: return "aarch64_vector_pcs";
case CC_AArch64SVEPCS: return "aarch64_sve_pcs";
case CC_AMDGPUKernelCall: return "amdgpu_kernel";
case CC_IntelOclBicc: return "intel_ocl_bicc";
case CC_SpirFunction: return "spir_function";
case CC_OpenCLKernel: return "opencl_kernel";
Expand Down Expand Up @@ -3622,6 +3623,7 @@ bool AttributedType::isCallingConv() const {
case attr::VectorCall:
case attr::AArch64VectorPcs:
case attr::AArch64SVEPcs:
case attr::AMDGPUKernelCall:
case attr::Pascal:
case attr::MSABI:
case attr::SysVABI:
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/AST/TypePrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,9 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
case CC_AArch64SVEPCS:
OS << "__attribute__((aarch64_sve_pcs))";
break;
case CC_AMDGPUKernelCall:
OS << "__attribute__((amdgpu_kernel))";
break;
case CC_IntelOclBicc:
OS << " __attribute__((intel_ocl_bicc))";
break;
Expand Down Expand Up @@ -1754,6 +1757,7 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
}
case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break;
case attr::IntelOclBicc: OS << "inteloclbicc"; break;
case attr::PreserveMost:
OS << "preserve_most";
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
return CCCR_Warning;
case CC_C:
case CC_OpenCLKernel:
case CC_AMDGPUKernelCall:
return CCCR_OK;
}
}
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall;
case CC_AArch64SVEPCS: return llvm::CallingConv::AArch64_SVE_VectorCall;
case CC_AMDGPUKernelCall: return llvm::CallingConv::AMDGPU_KERNEL;
case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
Expand Down Expand Up @@ -232,6 +233,9 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
if (D->hasAttr<AArch64SVEPcsAttr>())
return CC_AArch64SVEPCS;

if (D->hasAttr<AMDGPUKernelCallAttr>())
return CC_AMDGPUKernelCall;

if (D->hasAttr<IntelOclBiccAttr>())
return CC_IntelOclBicc;

Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGDebugInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1335,6 +1335,7 @@ static unsigned getDwarfCC(CallingConv CC) {
case CC_SpirFunction:
return llvm::dwarf::DW_CC_LLVM_SpirFunction;
case CC_OpenCLKernel:
case CC_AMDGPUKernelCall:
return llvm::dwarf::DW_CC_LLVM_OpenCLKernel;
case CC_Swift:
return llvm::dwarf::DW_CC_LLVM_Swift;
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5011,6 +5011,9 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
case ParsedAttr::AT_AArch64SVEPcs:
D->addAttr(::new (S.Context) AArch64SVEPcsAttr(S.Context, AL));
return;
case ParsedAttr::AT_AMDGPUKernelCall:
D->addAttr(::new (S.Context) AMDGPUKernelCallAttr(S.Context, AL));
return;
case ParsedAttr::AT_IntelOclBicc:
D->addAttr(::new (S.Context) IntelOclBiccAttr(S.Context, AL));
return;
Expand Down Expand Up @@ -5171,6 +5174,9 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
case ParsedAttr::AT_AArch64SVEPcs:
CC = CC_AArch64SVEPCS;
break;
case ParsedAttr::AT_AMDGPUKernelCall:
CC = CC_AMDGPUKernelCall;
break;
case ParsedAttr::AT_RegCall:
CC = CC_X86RegCall;
break;
Expand Down Expand Up @@ -8784,6 +8790,7 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_PreserveAll:
case ParsedAttr::AT_AArch64VectorPcs:
case ParsedAttr::AT_AArch64SVEPcs:
case ParsedAttr::AT_AMDGPUKernelCall:
handleCallConvAttr(S, D, AL);
break;
case ParsedAttr::AT_Suppress:
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Sema/SemaType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
case ParsedAttr::AT_VectorCall: \
case ParsedAttr::AT_AArch64VectorPcs: \
case ParsedAttr::AT_AArch64SVEPcs: \
case ParsedAttr::AT_AMDGPUKernelCall: \
case ParsedAttr::AT_MSABI: \
case ParsedAttr::AT_SysVABI: \
case ParsedAttr::AT_Pcs: \
Expand Down Expand Up @@ -7482,6 +7483,8 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
return createSimpleAttr<AArch64VectorPcsAttr>(Ctx, Attr);
case ParsedAttr::AT_AArch64SVEPcs:
return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
case ParsedAttr::AT_AMDGPUKernelCall:
return createSimpleAttr<AMDGPUKernelCallAttr>(Ctx, Attr);
case ParsedAttr::AT_Pcs: {
// The attribute may have had a fixit applied where we treated an
// identifier as a string literal. The contents of the string are valid,
Expand Down
83 changes: 83 additions & 0 deletions clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// REQUIRES: amdgpu-registered-target

// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s

// Derived from CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu by deleting references to HOST
// The original test passes the result through opt O2, but that seems to introduce invalid
// addrspace casts which are not being fixed as part of the present change.

// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32* {{.*}} %x)
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
__attribute__((amdgpu_kernel)) void kernel1(int *x) {
x[0]++;
}

// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32* {{.*}} nonnull align 4 dereferenceable(4) %x)
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
__attribute__((amdgpu_kernel)) void kernel2(int &x) {
x++;
}

// CHECK-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y)
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
__attribute__((amdgpu_kernel)) void kernel3(__attribute__((address_space(2))) int *x,
__attribute__((address_space(1))) int *y) {
y[0] = x[0];
}

// COMMON-LABEL: define{{.*}} void @_Z4funcPi(i32*{{.*}} %x)
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
__attribute__((amdgpu_kernel)) void func(int *x) {
x[0]++;
}

struct S {
int *x;
float *y;
};
// `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect
// by-val). However, the enhanced address inferring pass should be able to
// assume they are global pointers.
//

// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0)
__attribute__((amdgpu_kernel)) void kernel4(struct S s) {
s.x[0]++;
s.y[0] += 1.f;
}

// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(%struct.S* {{.*}} %s)
__attribute__((amdgpu_kernel)) void kernel5(struct S *s) {
s->x[0]++;
s->y[0] += 1.f;
}

struct T {
float *x[2];
};
// `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect
// by-val). However, the enhanced address inferring pass should be able to
// assume they are global pointers.
//
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(%struct.T addrspace(4)*{{.*}} byref(%struct.T) align 8 %0)
__attribute__((amdgpu_kernel)) void kernel6(struct T t) {
t.x[0][0] += 1.f;
t.x[1][0] += 2.f;
}

// Check that coerced pointers retain the noalias attribute when qualified with __restrict.
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(i32* noalias{{.*}} %x)
__attribute__((amdgpu_kernel)) void kernel7(int *__restrict x) {
x[0]++;
}

// Single element struct.
struct SS {
float *x;
};
// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float* %a.coerce)
// CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
__attribute__((amdgpu_kernel)) void kernel8(struct SS a) {
*a.x += 3.f;
}

2 changes: 2 additions & 0 deletions clang/test/Sema/callingconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ int __attribute__((pcs("foo"))) pcs7(void); // expected-error {{invalid PCS type
int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aarch64_vector_pcs' calling convention is not supported for this target}}
int __attribute__((aarch64_sve_pcs)) aasvepcs(void); // expected-warning {{'aarch64_sve_pcs' calling convention is not supported for this target}}

int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning {{'amdgpu_kernel' calling convention is not supported for this target}}

// PR6361
void ctest3();
void __attribute__((cdecl)) ctest3() {}
Expand Down
1 change: 1 addition & 0 deletions clang/tools/libclang/CXType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,7 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) {
TCALLINGCONV(PreserveMost);
TCALLINGCONV(PreserveAll);
case CC_SpirFunction: return CXCallingConv_Unexposed;
case CC_AMDGPUKernelCall: return CXCallingConv_Unexposed;
case CC_OpenCLKernel: return CXCallingConv_Unexposed;
break;
}
Expand Down

0 comments on commit 83c431f

Please sign in to comment.