Skip to content

Commit

Permalink
[SYCL][NVPTX] Do not decompose SYCL functor unless necessary (#14434)
Browse files Browse the repository at this point in the history
CUDA backend can support passing pointer in the generic address space.
The patch prevent the decomposition of the SYCL functor if there is no
special types in it.

---------

Signed-off-by: Victor Lomuller <victor@codeplay.com>
  • Loading branch information
Naghasan authored Jul 18, 2024
1 parent ac98c33 commit 7a9d3b1
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 78 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")
LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension")
LANGOPT(EnableDAEInSpirKernels , 1, 0, "Enable Dead Argument Elimination in SPIR kernels")
LANGOPT(SYCLDecomposeStruct, 1, 1, "Force top level decomposition of SYCL functor")
LANGOPT(
SYCLValueFitInMaxInt, 1, 1,
"SYCL compiler assumes value fits within MAX_INT for member function of "
Expand Down
7 changes: 7 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4028,6 +4028,13 @@ defm sycl_instrument_device_code
BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option], " Instrumentation and Tracing "
"Technology (ITT) instrumentation intrinsics calls "
"(experimental)">>;
defm sycl_decompose_functor
: BoolFOption<"sycl-decompose-functor",
LangOpts<"SYCLDecomposeStruct">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption], "Do">,
NegFlag<SetFalse, [], [ClangOption], "Do not">,
BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option],
" decompose SYCL functor if possible (experimental, CUDA only)">>;
def flink_huge_device_code : Flag<["-"], "flink-huge-device-code">,
Group<Link_Group>, HelpText<"Generate and use a custom linker script for huge"
" device code sections">;
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5447,6 +5447,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-fsycl-allow-func-ptr");
}

Args.AddLastArg(CmdArgs, options::OPT_fsycl_decompose_functor,
options::OPT_fno_sycl_decompose_functor);

// Forward -fsycl-instrument-device-code option to cc1. This option will
// only be used for SPIR/SPIR-V based targets.
if (Triple.isSPIROrSPIRV())
Expand Down
192 changes: 148 additions & 44 deletions clang/lib/Sema/SemaSYCL.cpp

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions clang/test/CodeGenSYCL/kernel-handler.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT

// This test checks IR generated when kernel_handler argument
Expand All @@ -23,7 +23,8 @@ void test(int val) {
}

// ALL: define dso_local{{ spir_kernel | }}void @{{.*}}test_kernel_handler{{[^(]*}}
// ALL-SAME: (i32 noundef %_arg_a, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer)
// NONATIVESUPPORT-SAME: (ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer)
// NATIVESUPPORT-SAME: (i32 noundef %_arg_a, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer)
// ALL: %kh = alloca %"class.sycl::_V1::kernel_handler", align 1

// NONATIVESUPPORT: %[[KH:[0-9]+]] = load ptr addrspace(1), ptr %_arg__specialization_constants_buffer.addr, align 8
Expand Down
10 changes: 5 additions & 5 deletions clang/test/CodeGenSYCL/nvvm-annotations.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST

// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST
// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST

// Tests that certain SYCL kernel parameters are annotated with "grid_constant" for supported microarchitectures.

Expand All @@ -18,7 +18,7 @@ int main() {
} s;

q.submit([&](handler &h) {
// CHECK: define{{.*}} void @[[FUNC1:.*kernel_grid_const_params]](ptr noundef byval(%struct.S) align 4 %_arg_s)
// CHECK: define{{.*}} void @[[FUNC1:.*kernel_grid_const_params]](ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor)
h.single_task<class kernel_grid_const_params>([=]() { (void) s;});
});

Expand Down
10 changes: 10 additions & 0 deletions clang/test/Driver/sycl-offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -542,3 +542,13 @@
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK: --dependent-lib=sycl{{[0-9]*}}-previewd
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib
// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}-preview.lib

/// ###########################################################################

/// Check -fsycl-decompose-functor behaviors from source
// RUN: %clang -### -fsycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-DECOMP %s
// RUN: %clang -### -fno-sycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-NODECOMP %s
// CHK-DECOMP: -fsycl-decompose-functor
// CHK-NODECOMP: -fno-sycl-decompose-functor
6 changes: 3 additions & 3 deletions clang/test/SemaSYCL/kernel-arg-opt-report.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,10 @@ int main() {
// SPIR-NEXT: String: 'Arg '
// SPIR-NEXT: Argument: '13'
// SPIR-NEXT: String: ':'
// SPIR-NEXT: String: ''
// SPIR-NEXT: String: A
// SPIR-NEXT: String: 'Compiler generated argument for decomposed struct/class,'
// SPIR-NEXT: String: KernelFunctor
// SPIR-NEXT: String: ' ('
// SPIR-NEXT: String: ''
// SPIR-NEXT: String: 'Field:A, '
// SPIR-NEXT: String: 'Type:'
// SPIR-NEXT: String: int
// SPIR-NEXT: String: ', '
Expand Down
34 changes: 10 additions & 24 deletions clang/test/SemaSYCL/kernel-handler.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fno-sycl-decompose-functor -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NATIVESUPPORT

// This test checks that the compiler handles kernel_handler type (for
Expand Down Expand Up @@ -28,19 +28,12 @@ int main() {
}

// Check test_kernel_handler parameters
// NONATIVESUPPORT: FunctionDecl {{.*}}test_kernel_handler{{.*}} 'void (int, __global char *)'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg_a 'int'
// NONATIVESUPPORT: FunctionDecl {{.*}}test_kernel_handler{{.*}} 'void ((lambda at {{.*}}kernel-handler.cpp{{.*}}), __global char *)'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__sycl_functor '(lambda at {{.*}}'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *'

// Check declaration and initialization of kernel object local clone
// NONATIVESUPPORT-NEXT: CompoundStmt
// NONATIVESUPPORT-NEXT: DeclStmt
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} cinit
// NONATIVESUPPORT-NEXT: InitListExpr
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int'

// Check declaration and initialization of kernel handler local clone using default constructor
// NONATIVESUPPORT-NEXT: CompoundStmt
// NONATIVESUPPORT-NEXT: DeclStmt
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} callinit
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'sycl::kernel_handler' 'void () noexcept'
Expand All @@ -58,26 +51,19 @@ int main() {
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'void (sycl::kernel_handler) const' lvalue CXXMethod {{.*}} 'operator()' 'void (sycl::kernel_handler) const'
// Kernel body with clones
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue ParmVar {{.*}} '_arg__sycl_functor' '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}} 'sycl::kernel_handler' 'void (const kernel_handler &) noexcept'
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const kernel_handler':'const sycl::kernel_handler' lvalue
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'kernel_handler':'sycl::kernel_handler' lvalue Var {{.*}} 'kh' 'kernel_handler':'sycl::kernel_handler'

// Check test_pfwg_kernel_handler parameters
// NONATIVESUPPORT: FunctionDecl {{.*}}test_pfwg_kernel_handler{{.*}} 'void (int, __global char *)'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg_a 'int'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *'

// Check declaration and initialization of kernel object local clone
// NONATIVESUPPORT-NEXT: CompoundStmt
// NONATIVESUPPORT-NEXT: DeclStmt
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} cinit
// NONATIVESUPPORT-NEXT: InitListExpr
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int'
// NONATIVESUPPORT: FunctionDecl {{.*}}test_pfwg_kernel_handler{{.*}} 'void ((lambda at {{.*}}kernel-handler.cpp{{.*}}), __global char *)'
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__sycl_functor '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
// NONATIVESUPPORT-NEXT: SYCLScopeAttr {{.*}} Implicit WorkGroup
// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *'

// Check declaration and initialization of kernel handler local clone using default constructor
// NONATIVESUPPORT-NEXT: CompoundStmt
// NONATIVESUPPORT-NEXT: DeclStmt
// NONATIVESUPPORT-NEXT: VarDecl {{.*}} callinit
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'sycl::kernel_handler' 'void () noexcept'
Expand All @@ -96,7 +82,7 @@ int main() {

// Kernel body with clones
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue ParmVar {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})'
// NONATIVESUPPORT-NEXT: CXXTemporaryObjectExpr {{.*}} 'group<1>':'sycl::group<>' 'void () noexcept' zeroing
// NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'kernel_handler':'sycl::kernel_handler' 'void (const kernel_handler &) noexcept'
// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}}'const sycl::kernel_handler' lvalue
Expand Down
56 changes: 56 additions & 0 deletions clang/test/SemaSYCL/no-decomp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP
// RUN: %clang_cc1 -fsycl-is-device -fno-sycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP
// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP

#include "Inputs/sycl.hpp"

class with_acc {
public:
int *d;
sycl::accessor<char, 1, sycl::access::mode::read> AccField;
};

class wrapping_acc {
public:
with_acc acc;
void operator()() const {
}
};

class pointer_wrap {
public:
int *d;
void operator()() const {
}
};

class empty {
public:
void operator()() const {
}
};

int main() {
sycl::queue q;

q.submit([&](sycl::handler &cgh) {
wrapping_acc acc;
cgh.single_task(acc);
});
// ALL: FunctionDecl {{.*}} _ZTS12wrapping_acc 'void (__wrapper_class, __global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>)'

q.submit([&](sycl::handler &cgh) {
pointer_wrap ptr;
cgh.single_task(ptr);
});
// NODECOMP: FunctionDecl {{.*}} _ZTS12pointer_wrap 'void (pointer_wrap)'
// DECOMP: FunctionDecl {{.*}} _ZTS12pointer_wrap 'void (__global int *)'

q.submit([&](sycl::handler &cgh) {
empty e;
cgh.single_task(e);
});
// ALL: FunctionDecl {{.*}} _ZTS5empty 'void ()'

return 0;
}

0 comments on commit 7a9d3b1

Please sign in to comment.