Skip to content

Commit

Permalink
[SYCL] Generate the offload kernel Part 2 (llvm#46)
Browse files Browse the repository at this point in the history
This contains bug fixes and applies review comments.
It also contains changes accidentally removed in Tom's
fork. This commit should be squashed with commit
de5cea5.
  • Loading branch information
elizabethandrews committed Sep 19, 2024
1 parent 90ceed1 commit 6715380
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 54 deletions.
13 changes: 12 additions & 1 deletion clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11994,8 +11994,19 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {

// SYCL kernel entry point functions are used to generate and emit
// the offload kernel.
if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelEntryPointAttr>())
if (LangOpts.SYCLIsDevice) {
if (D->hasAttr<SYCLKernelEntryPointAttr>())
return true;
// FIXME: In SYCL device compilation, the only functions that
// must be emitted are the SYCL kernel entry points, functions
// called from the the SYCL kernel, and functions declared with
// SYCL_EXTERNAL. However, some existing tests fail if the set
// of emitted functions is limited to these. Once support is
// implemented for SYCL_EXTERNAL, this check should be modified
// to return false. The tests should be modified to include
// SYCL_EXTERNAL.
// return false;
}

// Constructors and destructors are required.
if (FD->hasAttr<ConstructorAttr>() || FD->hasAttr<DestructorAttr>())
Expand Down
37 changes: 23 additions & 14 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3168,6 +3168,24 @@ void CodeGenModule::EmitDeferred() {
CurDeclsToEmit.swap(DeferredDeclsToEmit);

for (GlobalDecl &D : CurDeclsToEmit) {
// If the Decl corresponds to a SYCL kernel entry point function, generate
// and emit the corresponding SYCL kernel caller function i.e the
// offload kernel. The generation of the offload kernel needs to happen
// first in this loop, in order to avoid generating IR for the SYCL kernel
// entry point function.
if (const auto *FD = D.getDecl()->getAsFunction()) {
if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelEntryPointAttr>() &&
FD->isDefined()) {
// Generate and emit the offload kernel
EmitSYCLKernelCaller(FD, getContext());
// The offload kernel invokes the operator method of the SYCL kernel
// object i.e. the SYCL kernel function is invoked. Emit this function.
EmitDeferred();
// Do not emit the SYCL kernel entry point function.
continue;
}
}

// We should call GetAddrOfGlobal with IsForDefinition set to true in order
// to get GlobalValue with exactly the type we need, not something that
// might had been created for another decl with the same mangled name but
Expand Down Expand Up @@ -3197,20 +3215,8 @@ void CodeGenModule::EmitDeferred() {
if (LangOpts.OpenMP && OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(D))
continue;

// If the Decl corresponds to a SYCL kernel entry point function, generate
// and emit the corresponding SYCL kernel caller function, i.e the
// offload kernel. Otherwise, emit the definition and move on to the next
// one.
const FunctionDecl *FD = nullptr;
if (LangOpts.SYCLIsDevice &&
(FD = D.getDecl()->getAsFunction()) != nullptr &&
FD->hasAttr<SYCLKernelEntryPointAttr>() &&
FD->isDefined()) {
// Generate and emit the offload kernel
EmitSYCLKernelCaller(FD, getContext());
} else {
EmitGlobalDefinition(D, GV);
}
// Otherwise, emit the definition and move on to the next one.
EmitGlobalDefinition(D, GV);

// If we found out that we need to emit more decls, do that recursively.
// This has the advantage that the decls are emitted in a DFS and related
Expand Down Expand Up @@ -3515,6 +3521,9 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
// Defer until all versions have been semantically checked.
if (FD->hasAttr<TargetVersionAttr>() && !FD->isMultiVersion())
return false;
// Defer emission of SYCL kernel entry point functions.
if (LangOpts.SYCLIsDevice && FD->hasAttr<SYCLKernelEntryPointAttr>())
return false;
}
if (const auto *VD = dyn_cast<VarDecl>(Global)) {
if (Context.getInlineVariableDefinitionKind(VD) ==
Expand Down
13 changes: 8 additions & 5 deletions clang/lib/CodeGen/CodeGenSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,15 @@ void CodeGenModule::EmitSYCLKernelCaller(const FunctionDecl *KernelEntryPointFn,
llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo);

// Retrieve the generated name for the SYCL kernel caller function
const auto *SKEPAttr = KernelEntryPointFn->getAttr<SYCLKernelEntryPointAttr>();
assert(SKEPAttr && "Missing sycl_kernel_entry_point attribute");
CanQualType KernelNameType = Ctx.getCanonicalType(SKEPAttr->getKernelName());
const SYCLKernelInfo &SKI = Ctx.SYCLKernels.at(KernelNameType);
const auto *KernelEntryPointAttr =
KernelEntryPointFn->getAttr<SYCLKernelEntryPointAttr>();
assert(KernelEntryPointAttr && "Missing sycl_kernel_entry_point attribute");
CanQualType KernelNameType =
Ctx.getCanonicalType(KernelEntryPointAttr->getKernelName());
const SYCLKernelInfo *KernelInfo = Ctx.findSYCLKernelInfo(KernelNameType);
assert(KernelInfo && "Type does not correspond to a kernel name");
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalVariable::ExternalLinkage,
SKI.GetKernelName(), &getModule());
KernelInfo->GetKernelName(), &getModule());

// Emit the SYCL kernel caller function
CodeGenFunction CGF(*this);
Expand Down
81 changes: 47 additions & 34 deletions clang/test/CodeGenSYCL/kernel-caller-generation.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -triple spir64 %s -o - | FileCheck %s
// RUN: %clang_cc1 -fsycl-is-device -emit-llvm -triple spir64 %s -o - | FileCheck --check-prefix=CHECK-DEVICE %s
// RUN: %clang_cc1 -fsycl-is-host -emit-llvm -triple x86_64 %s -o - | FileCheck --check-prefix=CHECK-HOST %s

// Test the generation of SYCL kernel caller function. SYCL kernel caller function
// should not be generated during host compilation.

template <typename name, typename Func>
__attribute__((sycl_kernel_entry_point(name))) void kernel_single_task(const Func kernelFunc) {
Expand All @@ -9,9 +13,10 @@ struct single_purpose_kernel_name;
struct single_purpose_kernel {
void operator()() const;
};

__attribute__((sycl_kernel_entry_point(single_purpose_kernel_name)))
void single_purpose_kernel_task(single_purpose_kernel k) {
k();
void single_purpose_kernel_task(single_purpose_kernel kernelFunc) {
kernelFunc();
}

int main() {
Expand All @@ -20,40 +25,48 @@ int main() {
[=]() {
(void) capture;
});
single_purpose_kernel obj;
single_purpose_kernel_task(obj);
}

// CHECK-HOST-NOT: __sycl_kernel_caller

// IR for compiler generated SYCL kernel caller function :
// The arguments of the SYCL kernel caller function correspond to either the SYCL Kernel
// Object, or decomposed fields of the SYCL kernel object if special SYCL types are
// captured (not yet supported). In the latter case, the SYCL kernel object is reconstited
// in the body of the SYCL caller function. The body of the SYCL kernel caller function
// then invokes the SYCL kernel i.e. the operator method of the SYCL kernel object.
// IR for compiler generated SYCL kernel caller function corresponding to
// single_purpose_kernel_name:

// CHECK: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
// FIXME: Pointer Alignment mismatch with syclos must be clarified and confirmed.
// CHECK: define dso_local spir_kernel void @_Z20__sycl_kernel_callerIZ4mainE11test_kernelEvv(ptr noundef byval(%class.anon) align 4 %kernelFunc) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: %kernelFunc.ascast = addrspacecast ptr %kernelFunc to ptr addrspace(4)
// CHECK-NEXT: call spir_func void @_ZZ4mainENKUlvE_clEv
// CHECK-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %kernelFunc.ascast) #[[ATTR1:[0-9]+]]
// CHECK-NEXT: ret void
// CHECK-NEXT:}
// CHECK-DEVICE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
// CHECK-DEVICE: define dso_local spir_kernel void @_Z20__sycl_kernel_callerI26single_purpose_kernel_nameEvv
// CHECK-DEVICE-SAME: (ptr noundef byval(%struct.single_purpose_kernel) align 1 %kernelFunc) #[[ATTR0:[0-9]+]] {
// CHECK-DEVICE-NEXT: entry:
// CHECK-DEVICE-NEXT: %kernelFunc.ascast = addrspacecast ptr %kernelFunc to ptr addrspace(4)
// CHECK-DEVICE-NEXT: call spir_func void @_ZNK21single_purpose_kernelclEv
// CHECK-DEVICE-SAME: (ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %kernelFunc.ascast) #[[ATTR1:[0-9]+]]
// CHECK-DEVICE-NEXT: ret void
// CHECK-DEVICE-NEXT:}

// IR for compiler generated SYCL kernel caller function corresponding to
// test_kernel:

// CHECK-DEVICE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
// CHECK-DEVICE: define dso_local spir_kernel void @_Z20__sycl_kernel_callerIZ4mainE11test_kernelEvv(ptr noundef byval(%class.anon) align 4 %kernelFunc) #[[ATTR0]] {
// CHECK-DEVICE-NEXT: entry:
// CHECK-DEVICE-NEXT: %kernelFunc.ascast = addrspacecast ptr %kernelFunc to ptr addrspace(4)
// CHECK-DEVICE-NEXT: call spir_func void @_ZZ4mainENKUlvE_clEv
// CHECK-DEVICE-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %kernelFunc.ascast) #[[ATTR1]]
// CHECK-DEVICE-NEXT: ret void
// CHECK-DEVICE-NEXT:}

// IR for operator method of kernel object:
// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv
// FIXME: Pointer Alignment mismatch with syclos must be clarified and confirmed.
// FIXME: !srcloc metadata present in syclos (with incorrect value?). Why is this not present in llvm.org ?
// CHECK-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %this) #[[ATTR0]] align 2 {
// CHECK-NEXT: entry:
// CHECK-NEXT: %this.addr = alloca ptr addrspace(4), align 8
// CHECK-NEXT: %this.addr.ascast = addrspacecast ptr %this.addr to ptr addrspace(4)
// CHECK-NEXT: store ptr addrspace(4) %this, ptr addrspace(4) %this.addr.ascast, align 8
// CHECK-NEXT: %this1 = load ptr addrspace(4), ptr addrspace(4) %this.addr.ascast, align 8
// CHECK-NEXT: %[[CAPTURE:.+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %this1, i32 0, i32 0
// CHECK-NEXT: ret void
// CHECK-NEXT:}

// FIXME:: Additional function attribute "sycl-optlevel"="0" generated in syclos vs llvm.org by a LLVM pass.
// CHECK: #[[ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: #[[ATTR1]] = { convergent nounwind }
// CHECK-DEVICE: define internal spir_func void @_ZZ4mainENKUlvE_clEv
// CHECK-DEVICE-SAME: (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %this) #[[ATTR0]] align 2 {
// CHECK-DEVICE-NEXT: entry:
// CHECK-DEVICE-NEXT: %this.addr = alloca ptr addrspace(4), align 8
// CHECK-DEVICE-NEXT: %this.addr.ascast = addrspacecast ptr %this.addr to ptr addrspace(4)
// CHECK-DEVICE-NEXT: store ptr addrspace(4) %this, ptr addrspace(4) %this.addr.ascast, align 8
// CHECK-DEVICE-NEXT: %this1 = load ptr addrspace(4), ptr addrspace(4) %this.addr.ascast, align 8
// CHECK-DEVICE-NEXT: %[[CAPTURE:.+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %this1, i32 0, i32 0
// CHECK-DEVICE-NEXT: ret void
// CHECK-DEVICE-NEXT:}

// CHECK-DEVICE: #[[ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK-DEVICE: #[[ATTR1]] = { convergent nounwind }

0 comments on commit 6715380

Please sign in to comment.