From 7a9d3b1e9483b69baa0b8c6f1097016efd52854c Mon Sep 17 00:00:00 2001 From: Victor Lomuller Date: Thu, 18 Jul 2024 10:06:29 +0100 Subject: [PATCH] [SYCL][NVPTX] Do not decompose SYCL functor unless necessary (#14434) CUDA backend can support passing pointer in the generic address space. The patch prevent the decomposition of the SYCL functor if there is no special types in it. --------- Signed-off-by: Victor Lomuller --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Driver/Options.td | 7 + clang/lib/Driver/ToolChains/Clang.cpp | 3 + clang/lib/Sema/SemaSYCL.cpp | 192 ++++++++++++++---- clang/test/CodeGenSYCL/kernel-handler.cpp | 5 +- clang/test/CodeGenSYCL/nvvm-annotations.cpp | 10 +- clang/test/Driver/sycl-offload.c | 10 + clang/test/SemaSYCL/kernel-arg-opt-report.cpp | 6 +- clang/test/SemaSYCL/kernel-handler.cpp | 34 +--- clang/test/SemaSYCL/no-decomp.cpp | 56 +++++ 10 files changed, 246 insertions(+), 78 deletions(-) create mode 100644 clang/test/SemaSYCL/no-decomp.cpp diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index c77c918529d7..84dad8c7f947 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -304,6 +304,7 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions") LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension") LANGOPT(EnableDAEInSpirKernels , 1, 0, "Enable Dead Argument Elimination in SPIR kernels") +LANGOPT(SYCLDecomposeStruct, 1, 1, "Force top level decomposition of SYCL functor") LANGOPT( SYCLValueFitInMaxInt, 1, 1, "SYCL compiler assumes value fits within MAX_INT for member function of " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index b18ea5e88130..caa30cf7995f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4028,6 +4028,13 @@ defm sycl_instrument_device_code BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option], " Instrumentation and Tracing " "Technology (ITT) instrumentation intrinsics calls " "(experimental)">>; +defm sycl_decompose_functor + : BoolFOption<"sycl-decompose-functor", + LangOpts<"SYCLDecomposeStruct">, DefaultTrue, + PosFlag, + NegFlag, + BothFlags<[], [ClangOption, CLOption, DXCOption, CC1Option], + " decompose SYCL functor if possible (experimental, CUDA only)">>; def flink_huge_device_code : Flag<["-"], "flink-huge-device-code">, Group, HelpText<"Generate and use a custom linker script for huge" " device code sections">; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index acb6b59e21eb..dc32278c33f5 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5447,6 +5447,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsycl-allow-func-ptr"); } + Args.AddLastArg(CmdArgs, options::OPT_fsycl_decompose_functor, + options::OPT_fno_sycl_decompose_functor); + // Forward -fsycl-instrument-device-code option to cc1. This option will // only be used for SPIR/SPIR-V based targets. if (Triple.isSPIROrSPIRV()) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 95a1f9e0b3c1..98cbe7cd5f68 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -314,6 +314,13 @@ ExprResult SemaSYCL::BuildSYCLBuiltinBaseTypeExpr(SourceLocation Loc, SYCLBuiltinBaseTypeExpr(Loc, SourceTy, Idx, BaseTy); } +/// Returns true if the target requires a new type. +/// This happens if a pointer to generic cannot be passed +static bool targetRequiresNewType(ASTContext &Context) { + llvm::Triple T = Context.getTargetInfo().getTriple(); + return !T.isNVPTX(); +} + // This information is from Section 4.13 of the SYCL spec // https://www.khronos.org/registry/SYCL/specs/sycl-1.2.1.pdf // This function returns false if the math lib function @@ -1467,6 +1474,27 @@ class KernelObjVisitor { public: KernelObjVisitor(SemaSYCL &S) : SemaSYCLRef(S) {} + static bool useTopLevelKernelObj(const CXXRecordDecl *KernelObj) { + // If the kernel is empty, "decompose" it so we don't generate arguments. + if (KernelObj->isEmpty()) + return false; + // FIXME: Workaround to not change large number of tests + // this is covered by the test below. + if (targetRequiresNewType(KernelObj->getASTContext())) + return false; + if (KernelObj->hasAttr() || + KernelObj->hasAttr()) + return false; + return true; + } + + template + void visitTopLevelRecord(const CXXRecordDecl *Owner, QualType RecordTy, + HandlerTys &...Handlers) { + (void)std::initializer_list{ + (Handlers.handleTopLevelStruct(Owner, RecordTy), 0)...}; + } + template void VisitRecordBases(const CXXRecordDecl *KernelFunctor, HandlerTys &... Handlers) { @@ -1485,6 +1513,20 @@ class KernelObjVisitor { void visitArray(const CXXRecordDecl *Owner, FieldDecl *Field, QualType ArrayTy, HandlerTys &...Handlers); + // A visitor for Kernel object to functions as defined in + // SyclKernelFieldHandler by iterating over fields and bases + // if they require decomposition or new type. + template + void VisitKernelRecord(const CXXRecordDecl *KernelObj, + QualType KernelFunctorTy, HandlerTys &...Handlers) { + if (!useTopLevelKernelObj(KernelObj)) { + VisitRecordBases(KernelObj, Handlers...); + VisitRecordFields(KernelObj, Handlers...); + } else { + visitTopLevelRecord(KernelObj, KernelFunctorTy, Handlers...); + } + } + // A visitor function that dispatches to functions as defined in // SyclKernelFieldHandler by iterating over a free function parameter list. template @@ -1537,6 +1579,13 @@ class SyclKernelFieldHandlerBase { virtual bool handleOtherType(FieldDecl *, QualType) { return true; } virtual bool handleOtherType(ParmVarDecl *, QualType) { return true; } + // Handle the SYCL kernel as a whole. This applies only when the target can + // support pointer to the generic address space as arguments and the functor + // doesn't have any SYCL special types. + virtual bool handleTopLevelStruct(const CXXRecordDecl *, QualType) { + return true; + } + // Handle a simple struct that doesn't need to be decomposed, only called on // handlers with VisitInsideSimpleContainers as false. Replaces // handleStructType, enterStruct, leaveStruct, and visiting of sub-elements. @@ -2098,10 +2147,9 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { static constexpr const bool VisitNthArrayElement = false; SyclKernelDecompMarker(SemaSYCL &S) : SyclKernelFieldHandler(S) { - // In order to prevent checking this over and over, just add a dummy-base - // entry. - CollectionStack.push_back(true); - PointerStack.push_back(true); + // Base entry. + CollectionStack.push_back(false); + PointerStack.push_back(false); } bool handleSyclSpecialType(const CXXRecordDecl *, const CXXBaseSpecifier &, @@ -2121,7 +2169,7 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { } bool handlePointerType(FieldDecl *, QualType) final { - PointerStack.back() = true; + PointerStack.back() = targetRequiresNewType(SemaSYCLRef.getASTContext()); return true; } @@ -2131,6 +2179,26 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { return true; } + // Add Top level information to ease checks for processor. + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); + assert(RD && "should not be null."); + if (CollectionStack.pop_back_val() || + SemaSYCLRef.getLangOpts().SYCLDecomposeStruct) { + if (!RD->hasAttr()) + RD->addAttr(SYCLRequiresDecompositionAttr::CreateImplicit( + SemaSYCLRef.getASTContext())); + PointerStack.pop_back(); + } else if (PointerStack.pop_back_val()) { + if (!RD->hasAttr()) + RD->addAttr(SYCLGenerateNewTypeAttr::CreateImplicit( + SemaSYCLRef.getASTContext())); + } + assert(CollectionStack.size() == 0); + assert(PointerStack.size() == 0); + return true; + } + bool enterStruct(const CXXRecordDecl *, FieldDecl *, QualType) final { CollectionStack.push_back(false); PointerStack.push_back(false); @@ -2898,6 +2966,12 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { return true; } + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + StringRef Name = "_arg__sycl_functor"; + addParam(Name, Ty); + return true; + } + bool handleNonDecompStruct(const CXXRecordDecl *RD, FieldDecl *FD, QualType Ty) final { // This is a field which should not be decomposed. @@ -3130,6 +3204,11 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { return true; } + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + addParam(Ty); + return true; + } + bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { addParam(Ty); @@ -3291,6 +3370,12 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { return true; } + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + addParam(DC.getParamVarDeclsForCurrentField()[0]->getType(), + "SYCL Functor"); + return true; + } + using SyclKernelFieldHandler::handleNonDecompStruct; bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { @@ -3331,6 +3416,7 @@ static bool isESIMDKernelType(CXXMethodDecl *CallOperator) { } class SyclKernelBodyCreator : public SyclKernelFieldHandler { + bool UseTopLevelKernelObj; SyclKernelDeclCreator &DeclCreator; llvm::SmallVector BodyStmts; llvm::SmallVector CollectionInitExprs; @@ -3342,7 +3428,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { // nextElement. llvm::SmallVector, 8> ArrayInfos; VarDecl *KernelObjClone; - InitializedEntity VarEntity; + std::optional VarEntity; llvm::SmallVector MemberExprBases; llvm::SmallVector ArrayParamBases; FunctionDecl *KernelCallerFunc; @@ -3373,10 +3459,12 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { // Push the Kernel function scope to ensure the scope isn't empty SemaSYCLRef.SemaRef.PushFunctionScope(); - // Initialize kernel object local clone - assert(CollectionInitExprs.size() == 1 && - "Should have been popped down to just the first one"); - KernelObjClone->setInit(CollectionInitExprs.back()); + if (!UseTopLevelKernelObj) { + // Initialize kernel object local clone + assert(CollectionInitExprs.size() == 1 && + "Should have been popped down to just the first one"); + KernelObjClone->setInit(CollectionInitExprs.back()); + } // Replace references to the kernel object in kernel body, to use the // compiler generated local clone @@ -3500,7 +3588,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { return InitializedEntity::InitializeElement(SemaSYCLRef.getASTContext(), ArrayInfos.back().second, ArrayInfos.back().first); - return InitializedEntity::InitializeMember(FD, &VarEntity); + return InitializedEntity::InitializeMember(FD, &VarEntity.value()); } void addFieldInit(FieldDecl *FD, QualType Ty, MultiExprArg ParamRef) { @@ -3530,7 +3618,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { InitializationKind InitKind) { InitializedEntity Entity = InitializedEntity::InitializeBase( SemaSYCLRef.getASTContext(), &BS, /*IsInheritedVirtualBase*/ false, - &VarEntity); + &VarEntity.value()); InitializationSequence InitSeq(SemaSYCLRef.SemaRef, Entity, InitKind, std::nullopt); ExprResult Init = @@ -3545,7 +3633,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { InitializationKind InitKind, MultiExprArg Args) { InitializedEntity Entity = InitializedEntity::InitializeBase( SemaSYCLRef.getASTContext(), &BS, /*IsInheritedVirtualBase*/ false, - &VarEntity); + &VarEntity.value()); InitializationSequence InitSeq(SemaSYCLRef.SemaRef, Entity, InitKind, Args); ExprResult Init = InitSeq.Perform(SemaSYCLRef.SemaRef, Entity, InitKind, Args); @@ -3561,7 +3649,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { InitializedEntity Entity = InitializedEntity::InitializeBase( SemaSYCLRef.getASTContext(), &BS, /*IsInheritedVirtualBase*/ false, - &VarEntity); + &VarEntity.value()); Expr *ParamRef = createParamReferenceExpr(); InitializationSequence InitSeq(SemaSYCLRef.SemaRef, Entity, InitKind, @@ -3819,8 +3907,8 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { Expr *ArrayRef = createSimpleArrayParamReferenceExpr(FieldTy); InitializationKind InitKind = InitializationKind::CreateDirect({}, {}, {}); - InitializedEntity Entity = - InitializedEntity::InitializeMember(FD, &VarEntity, /*Implicit*/ true); + InitializedEntity Entity = InitializedEntity::InitializeMember( + FD, &VarEntity.value(), /*Implicit*/ true); addFieldInit(FD, FieldTy, ArrayRef, InitKind, Entity); } @@ -3885,27 +3973,33 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { const CXXRecordDecl *KernelObj, FunctionDecl *KernelCallerFunc, bool IsSIMDKernel, CXXMethodDecl *CallOperator) - : SyclKernelFieldHandler(S), DeclCreator(DC), - KernelObjClone(createKernelObjClone(S.getASTContext(), - DC.getKernelDecl(), KernelObj)), - VarEntity(InitializedEntity::InitializeVariable(KernelObjClone)), - KernelCallerFunc(KernelCallerFunc), + : SyclKernelFieldHandler(S), + UseTopLevelKernelObj(KernelObjVisitor::useTopLevelKernelObj(KernelObj)), + DeclCreator(DC), + KernelObjClone(UseTopLevelKernelObj + ? nullptr + : createKernelObjClone(S.getASTContext(), + DC.getKernelDecl(), + KernelObj)), + VarEntity(), KernelCallerFunc(KernelCallerFunc), KernelCallerSrcLoc(KernelCallerFunc->getLocation()), IsESIMD(IsSIMDKernel), CallOperator(CallOperator) { - CollectionInitExprs.push_back(createInitListExpr(KernelObj)); - annotateHierarchicalParallelismAPICalls(); - - Stmt *DS = new (S.getASTContext()) DeclStmt( - DeclGroupRef(KernelObjClone), KernelCallerSrcLoc, KernelCallerSrcLoc); - BodyStmts.push_back(DS); - DeclRefExpr *KernelObjCloneRef = DeclRefExpr::Create( - S.getASTContext(), NestedNameSpecifierLoc(), KernelCallerSrcLoc, - KernelObjClone, false, DeclarationNameInfo(), - QualType(KernelObj->getTypeForDecl(), 0), VK_LValue); - MemberExprBases.push_back(KernelObjCloneRef); + if (!UseTopLevelKernelObj) { + VarEntity.emplace(InitializedEntity::InitializeVariable(KernelObjClone)); + Stmt *DS = new (S.getASTContext()) DeclStmt( + DeclGroupRef(KernelObjClone), KernelCallerSrcLoc, KernelCallerSrcLoc); + BodyStmts.push_back(DS); + CollectionInitExprs.push_back(createInitListExpr(KernelObj)); + DeclRefExpr *KernelObjCloneRef = DeclRefExpr::Create( + S.getASTContext(), NestedNameSpecifierLoc(), KernelCallerSrcLoc, + KernelObjClone, false, DeclarationNameInfo(), + QualType(KernelObj->getTypeForDecl(), 0), VK_LValue); + MemberExprBases.push_back(KernelObjCloneRef); + } } ~SyclKernelBodyCreator() { + annotateHierarchicalParallelismAPICalls(); CompoundStmt *KernelBody = createKernelBody(); DeclCreator.setBody(KernelBody); } @@ -3934,6 +4028,13 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { return true; } + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + // As the functor is passed as a whole, use the param as the vardecl + // otherwise used as the clone. + KernelObjClone = DeclCreator.getParamVarDeclsForCurrentField()[0]; + return true; + } + bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); @@ -4488,6 +4589,11 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { return true; } + bool handleTopLevelStruct(const CXXRecordDecl *, QualType Ty) final { + addParam(Ty, SYCLIntegrationHeader::kind_std_layout, /*Offset=*/0); + return true; + } + bool handleNonDecompStruct(const CXXRecordDecl *, FieldDecl *FD, QualType Ty) final { addParam(FD, Ty, SYCLIntegrationHeader::kind_std_layout); @@ -4855,6 +4961,8 @@ void SemaSYCL::CheckSYCLKernelCall(FunctionDecl *KernelFunc, Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker, DecompMarker); Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker, DecompMarker); + Visitor.visitTopLevelRecord(KernelObj, GetSYCLKernelObjectType(KernelFunc), + FieldChecker, UnionChecker, DecompMarker); DiagnosingSYCLKernel = false; // Set the kernel function as invalid, if any of the checkers fail validation. @@ -4969,8 +5077,8 @@ void SemaSYCL::SetSYCLKernelNames() { void SemaSYCL::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC) { // The first argument to the KernelCallerFunc is the lambda object. - const CXXRecordDecl *KernelObj = - GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl(); + QualType KernelObjTy = GetSYCLKernelObjectType(KernelCallerFunc); + const CXXRecordDecl *KernelObj = KernelObjTy->getAsCXXRecordDecl(); assert(KernelObj && "invalid kernel caller"); // Do not visit invalid kernel object. @@ -5025,17 +5133,13 @@ void SemaSYCL::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, // Visit handlers to generate information for optimization record only if // optimization record is saved. if (!getLangOpts().OptRecordFile.empty()) { - Visitor.VisitRecordBases(KernelObj, argsSizeChecker, esimdKernel, - kernel_decl, kernel_body, int_header, int_footer, - opt_report); - Visitor.VisitRecordFields(KernelObj, argsSizeChecker, esimdKernel, - kernel_decl, kernel_body, int_header, int_footer, - opt_report); + Visitor.VisitKernelRecord(KernelObj, KernelObjTy, argsSizeChecker, + esimdKernel, kernel_decl, kernel_body, int_header, + int_footer, opt_report); } else { - Visitor.VisitRecordBases(KernelObj, argsSizeChecker, esimdKernel, - kernel_decl, kernel_body, int_header, int_footer); - Visitor.VisitRecordFields(KernelObj, argsSizeChecker, esimdKernel, - kernel_decl, kernel_body, int_header, int_footer); + Visitor.VisitKernelRecord(KernelObj, KernelObjTy, argsSizeChecker, + esimdKernel, kernel_decl, kernel_body, int_header, + int_footer); } if (ParmVarDecl *KernelHandlerArg = diff --git a/clang/test/CodeGenSYCL/kernel-handler.cpp b/clang/test/CodeGenSYCL/kernel-handler.cpp index efa27788fd57..00e799521a10 100644 --- a/clang/test/CodeGenSYCL/kernel-handler.cpp +++ b/clang/test/CodeGenSYCL/kernel-handler.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT // RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT // This test checks IR generated when kernel_handler argument @@ -23,7 +23,8 @@ void test(int val) { } // ALL: define dso_local{{ spir_kernel | }}void @{{.*}}test_kernel_handler{{[^(]*}} -// ALL-SAME: (i32 noundef %_arg_a, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer) +// NONATIVESUPPORT-SAME: (ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer) +// NATIVESUPPORT-SAME: (i32 noundef %_arg_a, ptr addrspace(1) noundef align 1 %_arg__specialization_constants_buffer) // ALL: %kh = alloca %"class.sycl::_V1::kernel_handler", align 1 // NONATIVESUPPORT: %[[KH:[0-9]+]] = load ptr addrspace(1), ptr %_arg__specialization_constants_buffer.addr, align 8 diff --git a/clang/test/CodeGenSYCL/nvvm-annotations.cpp b/clang/test/CodeGenSYCL/nvvm-annotations.cpp index 858648d901fb..d5376fa26db5 100644 --- a/clang/test/CodeGenSYCL/nvvm-annotations.cpp +++ b/clang/test/CodeGenSYCL/nvvm-annotations.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_70 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,GRIDCONST -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fno-sycl-decompose-functor -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -target-cpu sm_60 -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NOGRIDCONST // Tests that certain SYCL kernel parameters are annotated with "grid_constant" for supported microarchitectures. @@ -18,7 +18,7 @@ int main() { } s; q.submit([&](handler &h) { - // CHECK: define{{.*}} void @[[FUNC1:.*kernel_grid_const_params]](ptr noundef byval(%struct.S) align 4 %_arg_s) + // CHECK: define{{.*}} void @[[FUNC1:.*kernel_grid_const_params]](ptr noundef byval(%class.anon) align 4 %_arg__sycl_functor) h.single_task([=]() { (void) s;}); }); diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 34985f6111e1..2ac97885ab1c 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -542,3 +542,13 @@ // FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK: --dependent-lib=sycl{{[0-9]*}}-previewd // FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib // FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}-preview.lib + +/// ########################################################################### + +/// Check -fsycl-decompose-functor behaviors from source +// RUN: %clang -### -fsycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DECOMP %s +// RUN: %clang -### -fno-sycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NODECOMP %s +// CHK-DECOMP: -fsycl-decompose-functor +// CHK-NODECOMP: -fno-sycl-decompose-functor diff --git a/clang/test/SemaSYCL/kernel-arg-opt-report.cpp b/clang/test/SemaSYCL/kernel-arg-opt-report.cpp index 95f2106d7265..92f19374818e 100644 --- a/clang/test/SemaSYCL/kernel-arg-opt-report.cpp +++ b/clang/test/SemaSYCL/kernel-arg-opt-report.cpp @@ -348,10 +348,10 @@ int main() { // SPIR-NEXT: String: 'Arg ' // SPIR-NEXT: Argument: '13' // SPIR-NEXT: String: ':' -// SPIR-NEXT: String: '' -// SPIR-NEXT: String: A +// SPIR-NEXT: String: 'Compiler generated argument for decomposed struct/class,' +// SPIR-NEXT: String: KernelFunctor // SPIR-NEXT: String: ' (' -// SPIR-NEXT: String: '' +// SPIR-NEXT: String: 'Field:A, ' // SPIR-NEXT: String: 'Type:' // SPIR-NEXT: String: int // SPIR-NEXT: String: ', ' diff --git a/clang/test/SemaSYCL/kernel-handler.cpp b/clang/test/SemaSYCL/kernel-handler.cpp index 4df4a8d17bc7..ec9644a3bec2 100644 --- a/clang/test/SemaSYCL/kernel-handler.cpp +++ b/clang/test/SemaSYCL/kernel-handler.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fno-sycl-decompose-functor -triple nvptx64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NONATIVESUPPORT // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -ast-dump %s | FileCheck %s --check-prefix=NATIVESUPPORT // This test checks that the compiler handles kernel_handler type (for @@ -28,19 +28,12 @@ int main() { } // Check test_kernel_handler parameters -// NONATIVESUPPORT: FunctionDecl {{.*}}test_kernel_handler{{.*}} 'void (int, __global char *)' -// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg_a 'int' +// NONATIVESUPPORT: FunctionDecl {{.*}}test_kernel_handler{{.*}} 'void ((lambda at {{.*}}kernel-handler.cpp{{.*}}), __global char *)' +// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__sycl_functor '(lambda at {{.*}}' // NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *' -// Check declaration and initialization of kernel object local clone -// NONATIVESUPPORT-NEXT: CompoundStmt -// NONATIVESUPPORT-NEXT: DeclStmt -// NONATIVESUPPORT-NEXT: VarDecl {{.*}} cinit -// NONATIVESUPPORT-NEXT: InitListExpr -// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' -// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int' - // Check declaration and initialization of kernel handler local clone using default constructor +// NONATIVESUPPORT-NEXT: CompoundStmt // NONATIVESUPPORT-NEXT: DeclStmt // NONATIVESUPPORT-NEXT: VarDecl {{.*}} callinit // NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'sycl::kernel_handler' 'void () noexcept' @@ -58,26 +51,19 @@ int main() { // NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'void (sycl::kernel_handler) const' lvalue CXXMethod {{.*}} 'operator()' 'void (sycl::kernel_handler) const' // Kernel body with clones // NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue -// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' +// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue ParmVar {{.*}} '_arg__sycl_functor' '(lambda at {{.*}}kernel-handler.cpp{{.*}})' // NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}} 'sycl::kernel_handler' 'void (const kernel_handler &) noexcept' // NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const kernel_handler':'const sycl::kernel_handler' lvalue // NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'kernel_handler':'sycl::kernel_handler' lvalue Var {{.*}} 'kh' 'kernel_handler':'sycl::kernel_handler' // Check test_pfwg_kernel_handler parameters -// NONATIVESUPPORT: FunctionDecl {{.*}}test_pfwg_kernel_handler{{.*}} 'void (int, __global char *)' -// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg_a 'int' -// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *' - -// Check declaration and initialization of kernel object local clone -// NONATIVESUPPORT-NEXT: CompoundStmt -// NONATIVESUPPORT-NEXT: DeclStmt -// NONATIVESUPPORT-NEXT: VarDecl {{.*}} cinit -// NONATIVESUPPORT-NEXT: InitListExpr -// NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'int' -// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_a' 'int' +// NONATIVESUPPORT: FunctionDecl {{.*}}test_pfwg_kernel_handler{{.*}} 'void ((lambda at {{.*}}kernel-handler.cpp{{.*}}), __global char *)' +// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__sycl_functor '(lambda at {{.*}}kernel-handler.cpp{{.*}})' // NONATIVESUPPORT-NEXT: SYCLScopeAttr {{.*}} Implicit WorkGroup +// NONATIVESUPPORT-NEXT: ParmVarDecl {{.*}} used _arg__specialization_constants_buffer '__global char *' // Check declaration and initialization of kernel handler local clone using default constructor +// NONATIVESUPPORT-NEXT: CompoundStmt // NONATIVESUPPORT-NEXT: DeclStmt // NONATIVESUPPORT-NEXT: VarDecl {{.*}} callinit // NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'sycl::kernel_handler' 'void () noexcept' @@ -96,7 +82,7 @@ int main() { // Kernel body with clones // NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}} 'const (lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue -// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue Var {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' +// NONATIVESUPPORT-NEXT: DeclRefExpr {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' lvalue ParmVar {{.*}} '(lambda at {{.*}}kernel-handler.cpp{{.*}})' // NONATIVESUPPORT-NEXT: CXXTemporaryObjectExpr {{.*}} 'group<1>':'sycl::group<>' 'void () noexcept' zeroing // NONATIVESUPPORT-NEXT: CXXConstructExpr {{.*}}'kernel_handler':'sycl::kernel_handler' 'void (const kernel_handler &) noexcept' // NONATIVESUPPORT-NEXT: ImplicitCastExpr {{.*}}'const sycl::kernel_handler' lvalue diff --git a/clang/test/SemaSYCL/no-decomp.cpp b/clang/test/SemaSYCL/no-decomp.cpp new file mode 100644 index 000000000000..5f61574935c8 --- /dev/null +++ b/clang/test/SemaSYCL/no-decomp.cpp @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP +// RUN: %clang_cc1 -fsycl-is-device -fno-sycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=NODECOMP +// RUN: %clang_cc1 -fsycl-is-device -fsycl-decompose-functor -triple nvptx64-nvidia-cuda -ast-dump %s | FileCheck %s -check-prefix=ALL -check-prefix=DECOMP + +#include "Inputs/sycl.hpp" + +class with_acc { +public: + int *d; + sycl::accessor AccField; +}; + +class wrapping_acc { +public: + with_acc acc; + void operator()() const { + } +}; + +class pointer_wrap { +public: + int *d; + void operator()() const { + } +}; + +class empty { +public: + void operator()() const { + } +}; + +int main() { + sycl::queue q; + + q.submit([&](sycl::handler &cgh) { + wrapping_acc acc; + cgh.single_task(acc); + }); + // ALL: FunctionDecl {{.*}} _ZTS12wrapping_acc 'void (__wrapper_class, __global char *, sycl::range<1>, sycl::range<1>, sycl::id<1>)' + + q.submit([&](sycl::handler &cgh) { + pointer_wrap ptr; + cgh.single_task(ptr); + }); + // NODECOMP: FunctionDecl {{.*}} _ZTS12pointer_wrap 'void (pointer_wrap)' + // DECOMP: FunctionDecl {{.*}} _ZTS12pointer_wrap 'void (__global int *)' + + q.submit([&](sycl::handler &cgh) { + empty e; + cgh.single_task(e); + }); + // ALL: FunctionDecl {{.*}} _ZTS5empty 'void ()' + + return 0; +}