diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 4c85e000ec88..a43b97b01afa 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -55,12 +55,8 @@ class Util { } }; -static CXXRecordDecl *getKernelCallerLambdaArg(FunctionDecl *FD) { - auto FirstArg = (*FD->param_begin()); - if (FirstArg) - if (FirstArg->getType()->getAsCXXRecordDecl()->isLambda()) - return FirstArg->getType()->getAsCXXRecordDecl(); - return nullptr; +static CXXRecordDecl *getKernelObjectType(FunctionDecl *Caller) { + return (*Caller->param_begin())->getType()->getAsCXXRecordDecl(); } class MarkDeviceFunction : public RecursiveASTVisitor { @@ -240,198 +236,177 @@ CreateSYCLKernelFunction(ASTContext &Context, StringRef Name, static CompoundStmt * CreateSYCLKernelBody(Sema &S, FunctionDecl *KernelCallerFunc, DeclContext *DC) { - llvm::SmallVector BodyStmts; - - // TODO: case when kernel is functor - // TODO: possible refactoring when functor case will be completed - CXXRecordDecl *LC = getKernelCallerLambdaArg(KernelCallerFunc); - if (LC) { - // Create Lambda object - auto LambdaVD = VarDecl::Create( - S.Context, DC, SourceLocation(), SourceLocation(), LC->getIdentifier(), - QualType(LC->getTypeForDecl(), 0), LC->getLambdaTypeInfo(), SC_None); - - Stmt *DS = new (S.Context) - DeclStmt(DeclGroupRef(LambdaVD), SourceLocation(), SourceLocation()); - BodyStmts.push_back(DS); - auto LambdaDRE = DeclRefExpr::Create( - S.Context, NestedNameSpecifierLoc(), SourceLocation(), LambdaVD, false, - DeclarationNameInfo(), QualType(LC->getTypeForDecl(), 0), VK_LValue); - - auto TargetFunc = dyn_cast(DC); - auto TargetFuncParam = - TargetFunc->param_begin(); // Iterator to ParamVarDecl (VarDecl) - if (TargetFuncParam) { - for (auto Field : LC->fields()) { - auto getExprForPointer = [](Sema &S, const QualType ¶mTy, - DeclRefExpr *DRE) { - // C++ address space attribute != OpenCL address space attribute - Expr *qualifiersCast = ImplicitCastExpr::Create( - S.Context, paramTy, CK_NoOp, DRE, nullptr, VK_LValue); - Expr *Res = - ImplicitCastExpr::Create(S.Context, paramTy, CK_LValueToRValue, - qualifiersCast, nullptr, VK_RValue); - return Res; - }; - auto getExprForRange = [](Sema &S, const QualType ¶mTy, + CXXRecordDecl *LC = getKernelObjectType(KernelCallerFunc); + assert(LC && "Kernel object must be available"); + TypeSourceInfo *TSInfo = LC->isLambda() ? LC->getLambdaTypeInfo() : nullptr; + // Create a local kernel object (lambda or functor) assembled from the + // incoming formal parameters + auto KernelObjClone = VarDecl::Create( + S.Context, DC, SourceLocation(), SourceLocation(), LC->getIdentifier(), + QualType(LC->getTypeForDecl(), 0), TSInfo, SC_None); + Stmt *DS = new (S.Context) + DeclStmt(DeclGroupRef(KernelObjClone), SourceLocation(), SourceLocation()); + BodyStmts.push_back(DS); + auto CloneRef = DeclRefExpr::Create( + S.Context, NestedNameSpecifierLoc(), SourceLocation(), KernelObjClone, false, + DeclarationNameInfo(), QualType(LC->getTypeForDecl(), 0), VK_LValue); + auto TargetFunc = dyn_cast(DC); + auto TargetFuncParam = + TargetFunc->param_begin(); // Iterator to ParamVarDecl (VarDecl) + if (TargetFuncParam) { + for (auto Field : LC->fields()) { + auto getExprForPointer = [](Sema &S, const QualType ¶mTy, DeclRefExpr *DRE) { - Expr *Res = ImplicitCastExpr::Create(S.Context, paramTy, CK_NoOp, DRE, - nullptr, VK_RValue); - return Res; - }; - - QualType ParamType = (*TargetFuncParam)->getOriginalType(); - auto DRE = - DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(), - SourceLocation(), *TargetFuncParam, false, - DeclarationNameInfo(), ParamType, VK_LValue); - - QualType FieldType = Field->getType(); - CXXRecordDecl *CRD = FieldType->getAsCXXRecordDecl(); - if (CRD && Util::isSyclAccessorType(FieldType)) { - DeclAccessPair FieldDAP = DeclAccessPair::make(Field, AS_none); - // lambda.accessor - auto AccessorME = MemberExpr::Create( - S.Context, LambdaDRE, false, SourceLocation(), - NestedNameSpecifierLoc(), SourceLocation(), Field, FieldDAP, - DeclarationNameInfo(Field->getDeclName(), SourceLocation()), - nullptr, Field->getType(), VK_LValue, OK_Ordinary); - bool PointerOfAccesorWasSet = false; - for (auto Method : CRD->methods()) { - llvm::SmallVector ParamStmts; - if (Method->getNameInfo().getName().getAsString() == - "__set_pointer") { - DeclAccessPair MethodDAP = DeclAccessPair::make(Method, AS_none); - // lambda.accessor.__set_pointer - auto ME = MemberExpr::Create( - S.Context, AccessorME, false, SourceLocation(), - NestedNameSpecifierLoc(), SourceLocation(), Method, MethodDAP, - Method->getNameInfo(), nullptr, Method->getType(), VK_LValue, - OK_Ordinary); - - // Not referenced -> not emitted - S.MarkFunctionReferenced(SourceLocation(), Method, true); - - QualType ResultTy = Method->getReturnType(); - ExprValueKind VK = Expr::getValueKindForType(ResultTy); - ResultTy = ResultTy.getNonLValueExprType(S.Context); - - // __set_pointer needs one parameter - QualType paramTy = (*(Method->param_begin()))->getOriginalType(); - - Expr *Res = getExprForPointer(S, paramTy, DRE); - - // kernel_parameter - ParamStmts.push_back(Res); - // lambda.accessor.__set_pointer(kernel_parameter) - CXXMemberCallExpr *Call = CXXMemberCallExpr::Create( - S.Context, ME, ParamStmts, ResultTy, VK, SourceLocation()); - BodyStmts.push_back(Call); - PointerOfAccesorWasSet = true; - } + // C++ address space attribute != OpenCL address space attribute + Expr *qualifiersCast = ImplicitCastExpr::Create( + S.Context, paramTy, CK_NoOp, DRE, nullptr, VK_LValue); + Expr *Res = + ImplicitCastExpr::Create(S.Context, paramTy, CK_LValueToRValue, + qualifiersCast, nullptr, VK_RValue); + return Res; + }; + auto getExprForRange = [](Sema &S, const QualType ¶mTy, + DeclRefExpr *DRE) { + Expr *Res = ImplicitCastExpr::Create(S.Context, paramTy, CK_NoOp, DRE, + nullptr, VK_RValue); + return Res; + }; + QualType ParamType = (*TargetFuncParam)->getOriginalType(); + auto DRE = + DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(), + SourceLocation(), *TargetFuncParam, false, + DeclarationNameInfo(), ParamType, VK_LValue); + QualType FieldType = Field->getType(); + CXXRecordDecl *CRD = FieldType->getAsCXXRecordDecl(); + if (CRD && Util::isSyclAccessorType(FieldType)) { + DeclAccessPair FieldDAP = DeclAccessPair::make(Field, AS_none); + // kernel_obj.accessor + auto AccessorME = MemberExpr::Create( + S.Context, CloneRef, false, SourceLocation(), + NestedNameSpecifierLoc(), SourceLocation(), Field, FieldDAP, + DeclarationNameInfo(Field->getDeclName(), SourceLocation()), + nullptr, Field->getType(), VK_LValue, OK_Ordinary); + bool PointerOfAccesorWasSet = false; + for (auto Method : CRD->methods()) { + llvm::SmallVector ParamStmts; + if (Method->getNameInfo().getName().getAsString() == + "__set_pointer") { + DeclAccessPair MethodDAP = DeclAccessPair::make(Method, AS_none); + // kernel_obj.accessor.__set_pointer + auto ME = MemberExpr::Create( + S.Context, AccessorME, false, SourceLocation(), + NestedNameSpecifierLoc(), SourceLocation(), Method, MethodDAP, + Method->getNameInfo(), nullptr, Method->getType(), VK_LValue, + OK_Ordinary); + // Not referenced -> not emitted + S.MarkFunctionReferenced(SourceLocation(), Method, true); + QualType ResultTy = Method->getReturnType(); + ExprValueKind VK = Expr::getValueKindForType(ResultTy); + ResultTy = ResultTy.getNonLValueExprType(S.Context); + // __set_pointer needs one parameter + QualType paramTy = (*(Method->param_begin()))->getOriginalType(); + Expr *Res = getExprForPointer(S, paramTy, DRE); + // kernel_parameter + ParamStmts.push_back(Res); + // kernel_obj.accessor.__set_pointer(kernel_parameter) + CXXMemberCallExpr *Call = CXXMemberCallExpr::Create( + S.Context, ME, ParamStmts, ResultTy, VK, SourceLocation()); + BodyStmts.push_back(Call); + PointerOfAccesorWasSet = true; } - if (PointerOfAccesorWasSet) { - TargetFuncParam++; - - ParamType = (*TargetFuncParam)->getOriginalType(); - DRE = DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(), - SourceLocation(), *TargetFuncParam, false, - DeclarationNameInfo(), ParamType, - VK_LValue); - - FieldType = Field->getType(); - CRD = FieldType->getAsCXXRecordDecl(); - if (CRD) { - FieldDAP = DeclAccessPair::make(Field, AS_none); - // lambda.accessor - AccessorME = MemberExpr::Create( - S.Context, LambdaDRE, false, SourceLocation(), - NestedNameSpecifierLoc(), SourceLocation(), Field, FieldDAP, - DeclarationNameInfo(Field->getDeclName(), SourceLocation()), - nullptr, Field->getType(), VK_LValue, OK_Ordinary); - - for (auto Method : CRD->methods()) { - llvm::SmallVector ParamStmts; - if (Method->getNameInfo().getName().getAsString() == - "__set_range") { - // lambda.accessor.__set_range - DeclAccessPair MethodDAP = - DeclAccessPair::make(Method, AS_none); - auto ME = MemberExpr::Create( - S.Context, AccessorME, false, SourceLocation(), - NestedNameSpecifierLoc(), SourceLocation(), Method, - MethodDAP, Method->getNameInfo(), nullptr, - Method->getType(), VK_LValue, OK_Ordinary); - - // Not referenced -> not emitted - S.MarkFunctionReferenced(SourceLocation(), Method, true); - - QualType ResultTy = Method->getReturnType(); - ExprValueKind VK = Expr::getValueKindForType(ResultTy); - ResultTy = ResultTy.getNonLValueExprType(S.Context); - - // __set_range needs one parameter - QualType paramTy = - (*(Method->param_begin()))->getOriginalType(); - - Expr *Res = getExprForRange(S, paramTy, DRE); - - // kernel_parameter - ParamStmts.push_back(Res); - // lambda.accessor.__set_range(kernel_parameter) - CXXMemberCallExpr *Call = CXXMemberCallExpr::Create( - S.Context, ME, ParamStmts, ResultTy, VK, - SourceLocation()); - BodyStmts.push_back(Call); - } + } + if (PointerOfAccesorWasSet) { + TargetFuncParam++; + ParamType = (*TargetFuncParam)->getOriginalType(); + DRE = DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(), + SourceLocation(), *TargetFuncParam, false, + DeclarationNameInfo(), ParamType, + VK_LValue); + FieldType = Field->getType(); + CRD = FieldType->getAsCXXRecordDecl(); + if (CRD) { + FieldDAP = DeclAccessPair::make(Field, AS_none); + // lambda.accessor + AccessorME = MemberExpr::Create( + S.Context, CloneRef, false, SourceLocation(), + NestedNameSpecifierLoc(), SourceLocation(), Field, FieldDAP, + DeclarationNameInfo(Field->getDeclName(), SourceLocation()), + nullptr, Field->getType(), VK_LValue, OK_Ordinary); + for (auto Method : CRD->methods()) { + llvm::SmallVector ParamStmts; + if (Method->getNameInfo().getName().getAsString() == + "__set_range") { + // lambda.accessor.__set_range + DeclAccessPair MethodDAP = + DeclAccessPair::make(Method, AS_none); + auto ME = MemberExpr::Create( + S.Context, AccessorME, false, SourceLocation(), + NestedNameSpecifierLoc(), SourceLocation(), Method, + MethodDAP, Method->getNameInfo(), nullptr, + Method->getType(), VK_LValue, OK_Ordinary); + // Not referenced -> not emitted + S.MarkFunctionReferenced(SourceLocation(), Method, true); + QualType ResultTy = Method->getReturnType(); + ExprValueKind VK = Expr::getValueKindForType(ResultTy); + ResultTy = ResultTy.getNonLValueExprType(S.Context); + // __set_range needs one parameter + QualType paramTy = + (*(Method->param_begin()))->getOriginalType(); + Expr *Res = getExprForRange(S, paramTy, DRE); + // kernel_parameter + ParamStmts.push_back(Res); + // lambda.accessor.__set_range(kernel_parameter) + CXXMemberCallExpr *Call = CXXMemberCallExpr::Create( + S.Context, ME, ParamStmts, ResultTy, VK, + SourceLocation()); + BodyStmts.push_back(Call); } - } else { - llvm_unreachable( - "unsupported accessor and without initialized range"); } + } else { + llvm_unreachable( + "unsupported accessor and without initialized range"); } - } else if (CRD || FieldType->isBuiltinType()) { - // If field have built-in or a structure/class type just initialize - // this field with corresponding kernel argument using '=' binary - // operator. The structure/class type must be copy assignable - this - // holds because SYCL kernel lambdas capture arguments by copy. - DeclAccessPair FieldDAP = DeclAccessPair::make(Field, AS_none); - auto Lhs = MemberExpr::Create( - S.Context, LambdaDRE, false, SourceLocation(), - NestedNameSpecifierLoc(), SourceLocation(), Field, FieldDAP, - DeclarationNameInfo(Field->getDeclName(), SourceLocation()), - nullptr, Field->getType(), VK_LValue, OK_Ordinary); - auto Rhs = ImplicitCastExpr::Create( - S.Context, ParamType, CK_LValueToRValue, DRE, nullptr, VK_RValue); - // lambda.field = kernel_parameter - Expr *Res = new (S.Context) - BinaryOperator(Lhs, Rhs, BO_Assign, FieldType, VK_LValue, - OK_Ordinary, SourceLocation(), FPOptions()); - BodyStmts.push_back(Res); } - TargetFuncParam++; + } else if (CRD || FieldType->isBuiltinType()) { + // If field have built-in or a structure/class type just initialize + // this field with corresponding kernel argument using '=' binary + // operator. The structure/class type must be copy assignable - this + // holds because SYCL kernel lambdas capture arguments by copy. + DeclAccessPair FieldDAP = DeclAccessPair::make(Field, AS_none); + auto Lhs = MemberExpr::Create( + S.Context, CloneRef, false, SourceLocation(), + NestedNameSpecifierLoc(), SourceLocation(), Field, FieldDAP, + DeclarationNameInfo(Field->getDeclName(), SourceLocation()), + nullptr, Field->getType(), VK_LValue, OK_Ordinary); + auto Rhs = ImplicitCastExpr::Create( + S.Context, ParamType, CK_LValueToRValue, DRE, nullptr, VK_RValue); + // lambda.field = kernel_parameter + Expr *Res = new (S.Context) + BinaryOperator(Lhs, Rhs, BO_Assign, FieldType, VK_LValue, + OK_Ordinary, SourceLocation(), FPOptions()); + BodyStmts.push_back(Res); } + TargetFuncParam++; } - - // In function from headers lambda is function parameter, we need - // to replace all refs to this lambda with our vardecl. - // I used TreeTransform here, but I'm not sure that it is good solution - // Also I used map and I'm not sure about it too. - // TODO SYCL review the above design concerns - Stmt *FunctionBody = KernelCallerFunc->getBody(); - DeclMap DMap; - ParmVarDecl *LambdaParam = *(KernelCallerFunc->param_begin()); - // DeclRefExpr with valid source location but with decl which is not marked - // as used is invalid. - LambdaVD->setIsUsed(); - DMap[LambdaParam] = LambdaVD; - // Without PushFunctionScope I had segfault. Maybe we also need to do pop. - S.PushFunctionScope(); - KernelBodyTransform KBT(DMap, S); - Stmt *NewBody = KBT.TransformStmt(FunctionBody).get(); - BodyStmts.push_back(NewBody); } + // In function from headers lambda is function parameter, we need + // to replace all refs to this lambda with our vardecl. + // I used TreeTransform here, but I'm not sure that it is good solution + // Also I used map and I'm not sure about it too. + // TODO SYCL review the above design concerns + Stmt *FunctionBody = KernelCallerFunc->getBody(); + DeclMap DMap; + ParmVarDecl *KernelObjParam = *(KernelCallerFunc->param_begin()); + // DeclRefExpr with valid source location but with decl which is not marked + // as used is invalid. + KernelObjClone->setIsUsed(); + DMap[KernelObjParam] = KernelObjClone; + // Without PushFunctionScope I had segfault. Maybe we also need to do pop. + S.PushFunctionScope(); + KernelBodyTransform KBT(DMap, S); + Stmt *NewBody = KBT.TransformStmt(FunctionBody).get(); + BodyStmts.push_back(NewBody); return CompoundStmt::Create(S.Context, BodyStmts, SourceLocation(), SourceLocation()); } @@ -447,27 +422,30 @@ enum VisitorContext { post_visit, }; -/// Implements visitor design pattern for lambda captures. +/// Implements visitor design pattern for kernel object fields. /// -/// Iterates over captured parameters of given lambda and invokes given -/// visitor functions at appropriate context providing information of interest. -/// \param Lambda the kernel lambda object +/// Iterates over fields of given kernel object (can be lambda or functor) and +/// invokes given visitor functions at appropriate context providing information +/// of interest. +/// \param KernelObj the kernel object /// \param Vis a tuple of visitor functions, each corresponds to and is /// invoked at a specific context. @see VisitorContext. /// template -static void visitKernelLambdaCaptures(const CXXRecordDecl *Lambda, +static void visitKernelObjFields(const CXXRecordDecl *KernelObj, VisitorTupleTy &Vis) { - const LambdaCapture *Cpt = Lambda->captures_begin(); - RecordDecl::field_iterator Fld = Lambda->field_begin(); - const LambdaCapture *CptEnd = Lambda->captures_end(); - const RecordDecl::field_iterator FldEnd = Lambda->field_end(); - - for (; (Cpt != CptEnd) && (Fld != FldEnd); Cpt++, Fld++) { + const LambdaCapture *Cpt = KernelObj->captures_begin(); + RecordDecl::field_iterator Fld = KernelObj->field_begin(); +#ifndef NDEBUG + const LambdaCapture *CptEnd = KernelObj->captures_end(); +#endif // NDEBUG + const RecordDecl::field_iterator FldEnd = KernelObj->field_end(); + bool IsLambda = KernelObj->isLambda(); + for (unsigned Cnt = 0; Fld != FldEnd; Fld++, ++Cnt) { + assert(!Cpt || (Cpt != CptEnd) && "too few captures"); // pre-visit context - unsigned Cnt = static_cast(std::distance(Cpt, CptEnd)); - VarDecl *V = Cpt->getCapturedVar(); - QualType ArgTy = V->getType(); + VarDecl *V = IsLambda ? Cpt->getCapturedVar() : nullptr; + QualType ArgTy = Fld->getType(); auto F1 = std::get(Vis); F1(Cnt, V, *Fld); FieldDecl *AccessorRangeField = nullptr; @@ -498,7 +476,6 @@ static void visitKernelLambdaCaptures(const CXXRecordDecl *Lambda, assert(AccessorImplRecord && "accessor __impl must be of a record type"); AccessorRangeField = getFieldByName(AccessorImplRecord, "Range"); assert(AccessorRangeField && "no Range found in __impl of accessor"); - // First accessor template parameter - data type QualType PointeeType = TemplateDecl->getTemplateArgs()[0].getAsType(); // Fourth parameter - access target @@ -512,7 +489,7 @@ static void visitKernelLambdaCaptures(const CXXRecordDecl *Lambda, F(Cnt, V, *Fld); } else if (ArgTy->isStructureOrClassType()) { if (!ArgTy->isStandardLayoutType()) - Lambda->getASTContext().getDiagnostics().Report(V->getLocation(), + KernelObj->getASTContext().getDiagnostics().Report(V->getLocation(), diag::err_sycl_non_std_layout_type); // structure or class typed parameter - the same handling as a scalar auto F = std::get(Vis); @@ -543,7 +520,7 @@ static void visitKernelLambdaCaptures(const CXXRecordDecl *Lambda, "captures inconsistent with fields"); } -static void BuildArgTys(ASTContext &Context, CXXRecordDecl *Lambda, +static void BuildArgTys(ASTContext &Context, CXXRecordDecl *KernelObjType, llvm::SmallVector &NewArgDecls, llvm::SmallVector &ArgTys) { QualType ActualArgType; // serves to transfer info between visitor lambdas @@ -553,8 +530,8 @@ static void BuildArgTys(ASTContext &Context, CXXRecordDecl *Lambda, // pre_visit_class_field [&](int, VarDecl *, FieldDecl *, FieldDecl *) {}, // visit_accessor - [&](int CaptureN, target AccTrg, QualType PointeeType, - DeclaratorDecl *CapturedVar, FieldDecl *CapturedVal) { + [&](int FieldN, target AccTrg, QualType PointeeType, + DeclaratorDecl *CapturedVar, FieldDecl *Field) { Qualifiers Quals = PointeeType.getQualifiers(); // TODO: Support all access targets switch (AccTrg) { @@ -578,23 +555,22 @@ static void BuildArgTys(ASTContext &Context, CXXRecordDecl *Lambda, Context.getQualifiedType(PointerType.getUnqualifiedType(), Quals); }, // visit_std_layout - [&](int CaptureN, VarDecl *CapturedVar, FieldDecl *CapturedVal) { - ActualArgType = CapturedVal->getType(); + [&](int FieldN, VarDecl *CapturedVar, FieldDecl *Field) { + ActualArgType = Field->getType(); }, // visit_stream - [&](int CaptureN, VarDecl *CapturedVar, FieldDecl *CapturedVal) { + [&](int FieldN, VarDecl *CapturedVar, FieldDecl *Field) { llvm_unreachable("streams not supported yet"); }, // post_visit - [&](int CaptureN, VarDecl *CapturedVar, FieldDecl *CapturedVal) { + [&](int FieldN, VarDecl *CapturedVar, FieldDecl *Field) { DeclContext *DC = Context.getTranslationUnitDecl(); - IdentifierInfo *VarName = 0; SmallString<8> Str; llvm::raw_svector_ostream OS(Str); IdentifierInfo *Identifier = (CapturedVar != nullptr) ? CapturedVar->getIdentifier() - : CapturedVal->getIdentifier(); + : Field->getIdentifier(); OS << "_arg_" << Identifier->getName(); VarName = &Context.Idents.get(OS.str()); @@ -605,18 +581,19 @@ static void BuildArgTys(ASTContext &Context, CXXRecordDecl *Lambda, ArgTys.push_back(ActualArgType); NewArgDecls.push_back(NewVarDecl); }); - visitKernelLambdaCaptures(Lambda, Vis); + visitKernelObjFields(KernelObjType, Vis); } /// Adds necessary data describing given kernel to the integration header. -/// \param H the integration header object -/// \param Name kernel name -/// \param NameType user-specified type representing kernel name -/// \param Lambda kernel lambda object +/// \param H the integration header object +/// \param Name kernel name +/// \param NameType type representing kernel name (first template argument of +/// single_task, parallel_for, etc) +/// \param KernelObj kernel object static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name, - QualType NameType, CXXRecordDecl *Lambda) { - ASTContext &Ctx = Lambda->getASTContext(); - const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(Lambda); + QualType NameType, CXXRecordDecl *KernelObj) { + ASTContext &Ctx = KernelObj->getASTContext(); + const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(KernelObj); KernelParamKind Knd = SYCLIntegrationHeader::kind_last; H.startKernel(Name, NameType); unsigned Offset = 0; @@ -624,18 +601,18 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name, auto Vis = std::make_tuple( // pre_visit - [&](int CaptureN, VarDecl *CapturedVar, FieldDecl *CapturedVal) { + [&](int FieldN, VarDecl *CapturedVar, FieldDecl *Field) { // Set offset in bytes Offset = static_cast( - Layout.getFieldOffset(CapturedVal->getFieldIndex())) / + Layout.getFieldOffset(Field->getFieldIndex())) / 8; }, // pre_visit_class_field - [&](int CaptureN, VarDecl *CapturedVar, FieldDecl *CapturedVal, + [&](int FieldN, VarDecl *CapturedVar, FieldDecl *Field, FieldDecl *MemberVal) { // Set offset of parent in bytes Offset = static_cast( - Layout.getFieldOffset(CapturedVal->getFieldIndex())) / + Layout.getFieldOffset(Field->getFieldIndex())) / 8; const RecordDecl *parent = MemberVal->getParent(); ASTContext &CtxMember = parent->getASTContext(); @@ -647,29 +624,29 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name, 8; }, // visit_accessor - [&](int CaptureN, target AccTrg, QualType PointeeType, - DeclaratorDecl *CapturedVar, FieldDecl *CapturedVal) { + [&](int FieldN, target AccTrg, QualType PointeeType, + DeclaratorDecl *CapturedVar, FieldDecl *Field) { Knd = SYCLIntegrationHeader::kind_accessor; Info = static_cast(AccTrg); }, // visit_std_layout - [&](int CaptureN, VarDecl *CapturedVar, FieldDecl *CapturedVal) { + [&](int FieldN, VarDecl *CapturedVar, FieldDecl *Field) { // TODO this code (when used to handle a structure-typed scalar) relies // on the host and device structure layouts and sizes to be the same. // Need SYCL spec clarification on passing structures as parameters. Knd = SYCLIntegrationHeader::kind_std_layout; Info = static_cast( - Ctx.getTypeSizeInChars(CapturedVal->getType()).getQuantity()); + Ctx.getTypeSizeInChars(Field->getType()).getQuantity()); }, // visit_stream - [&](int CaptureN, VarDecl *CapturedVar, FieldDecl *CapturedVal) { + [&](int FieldN, VarDecl *CapturedVar, FieldDecl *Field) { llvm_unreachable("streams not supported yet"); }, // post_visit - [&](int CaptureN, VarDecl *CapturedVar, FieldDecl *CapturedVal) { + [&](int FieldN, VarDecl *CapturedVar, FieldDecl *Field) { H.addParamDesc(Knd, Info, Offset); }); - visitKernelLambdaCaptures(Lambda, Vis); + visitKernelObjFields(KernelObj, Vis); } // Removes all "(anonymous namespace)::" substrings from given string @@ -709,35 +686,31 @@ static std::string constructKernelName(QualType KernelNameType) { void Sema::ConstructSYCLKernel(FunctionDecl *KernelCallerFunc) { // TODO: Case when kernel is functor - CXXRecordDecl *LE = getKernelCallerLambdaArg(KernelCallerFunc); - if (LE) { - llvm::SmallVector ArgTys; - llvm::SmallVector NewArgDecls; - BuildArgTys(getASTContext(), LE, NewArgDecls, ArgTys); - - // Get Name for our kernel. - const TemplateArgumentList *TemplateArgs = - KernelCallerFunc->getTemplateSpecializationArgs(); - QualType KernelNameType = TypeName::getFullyQualifiedType( - TemplateArgs->get(0).getAsType(), - getASTContext(), - true); - std::string Name = constructKernelName(KernelNameType); - populateIntHeader(getSyclIntegrationHeader(), Name, KernelNameType, LE); - - FunctionDecl *SYCLKernel = - CreateSYCLKernelFunction(getASTContext(), Name, ArgTys, NewArgDecls); - - CompoundStmt *SYCLKernelBody = - CreateSYCLKernelBody(*this, KernelCallerFunc, SYCLKernel); - SYCLKernel->setBody(SYCLKernelBody); - - AddSyclKernel(SYCLKernel); - - // Let's mark all called functions with SYCL Device attribute. - MarkDeviceFunction Marker(*this); - Marker.TraverseStmt(SYCLKernelBody); - } + CXXRecordDecl *LE = getKernelObjectType(KernelCallerFunc); + assert(LE && "invalid kernel caller"); + llvm::SmallVector ArgTys; + llvm::SmallVector NewArgDecls; + BuildArgTys(getASTContext(), LE, NewArgDecls, ArgTys); + // Get Name for our kernel. + const TemplateArgumentList *TemplateArgs = + KernelCallerFunc->getTemplateSpecializationArgs(); + // The first teamplate argument always describes the kernel name - whether it + // is lambda or functor. + QualType KernelNameType = TypeName::getFullyQualifiedType( + TemplateArgs->get(0).getAsType(), + getASTContext(), + true); + std::string Name = constructKernelName(KernelNameType); + populateIntHeader(getSyclIntegrationHeader(), Name, KernelNameType, LE); + FunctionDecl *SYCLKernel = + CreateSYCLKernelFunction(getASTContext(), Name, ArgTys, NewArgDecls); + CompoundStmt *SYCLKernelBody = + CreateSYCLKernelBody(*this, KernelCallerFunc, SYCLKernel); + SYCLKernel->setBody(SYCLKernelBody); + AddSyclKernel(SYCLKernel); + // Let's mark all called functions with SYCL Device attribute. + MarkDeviceFunction Marker(*this); + Marker.TraverseStmt(SYCLKernelBody); } // ----------------------------------------------------------------------------- diff --git a/clang/test/CodeGenSYCL/Inputs/CL/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/CL/sycl.hpp new file mode 100644 index 000000000000..6773727397f7 --- /dev/null +++ b/clang/test/CodeGenSYCL/Inputs/CL/sycl.hpp @@ -0,0 +1,281 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#ifndef __SYCL_DEVICE_ONLY__ +#define __global +#endif + +#define ATTR_SYCL_KERNEL __attribute__((sycl_kernel)) + +// Dummy runtime classes to model SYCL API. +namespace cl { +namespace sycl { + +template < class T, class Alloc = std::allocator > +using vector_class = std::vector; + +using string_class = std::string; + +template +using function_class = std::function; + +using mutex_class = std::mutex; + +template +using unique_ptr_class = std::unique_ptr; + +template +using shared_ptr_class = std::shared_ptr; + +template +using weak_ptr_class = std::weak_ptr; + +template +using hash_class = std::hash; + +using exception_ptr_class = std::exception_ptr; + +template +using buffer_allocator = std::allocator; + +namespace access { + +enum class target { + global_buffer = 2014, + constant_buffer, + local, + image, + host_buffer, + host_image, + image_array +}; + +enum class mode { + read = 1024, + write, + read_write, + discard_write, + discard_read_write, + atomic +}; + +enum class placeholder { + false_t, + true_t +}; + +enum class address_space : int { + private_space = 0, + global_space, + constant_space, + local_space +}; +} // namespace access + +namespace property { + +enum prop_type { + use_host_ptr = 0, + use_mutex, + context_bound, + enable_profiling, + base_prop +}; + +struct property_base { + virtual prop_type type() const = 0; +}; +} // namespace property + +namespace detail { +template struct allProperties : std::true_type {}; +template +struct allProperties + : std::conditional< + std::is_base_of::value, + allProperties, std::false_type>::type {}; +} // namespace detail + +class property_list { +public: + template ::value>::type> + property_list(propertyTN... props) {} + + template bool has_property() const { return true; } + + template propertyT get_property() const { + return propertyT{}; + } + + bool operator==(const property_list &rhs) const { return false; } + + bool operator!=(const property_list &rhs) const { return false; } +}; + +template +struct id { +}; + +template +struct range { + template range(T...args) {} // fake constructor +}; + +template +struct nd_range { +}; + +template +struct _ImplT { + range Range; +}; + +template + class accessor { + + public: + + void __set_pointer(__global dataT *Ptr) { } + void __set_range(range Range) { + __impl.Range = Range; + } + void use(void) const {} + template void use(T...args) { } + template void use(T...args) const { } + _ImplT __impl; + +}; + +class kernel {}; +class context {}; +class device {}; +class event{}; + +class queue { +public: + template event submit(T cgf) { return event{}; } + + void wait() {} + void wait_and_throw() {} + void throw_asynchronous() {} +}; + +class handler { +public: + template + ATTR_SYCL_KERNEL + void parallel_for(range numWorkItems, KernelType kernelFunc) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(nd_range executionRange, + KernelType kernelFunc) {} + + ATTR_SYCL_KERNEL + void single_task(kernel syclKernel) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(range numWorkItems, kernel syclKernel) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(nd_range ndRange, kernel syclKernel) {} + + template + ATTR_SYCL_KERNEL + void single_task(kernel syclKernel, KernelType kernelFunc) {} + + template + ATTR_SYCL_KERNEL + void single_task(KernelType kernelFunc) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(range numWorkItems, kernel syclKernel, + KernelType kernelFunc) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(range numWorkItems, KernelType kernelFunc) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(nd_range ndRange, kernel syclKernel, + KernelType kernelFunc) {} +}; + +template > +class buffer { +public: + using value_type = T; + using reference = value_type &; + using const_reference = const value_type &; + using allocator_type = AllocatorT; + + template buffer(ParamTypes...args) {} // fake constructor + + buffer(const range &bufferRange, + const property_list &propList = {}) {} + + buffer(T *hostData, const range &bufferRange, + const property_list &propList = {}) {} + + buffer(const T *hostData, const range &bufferRange, + const property_list &propList = {}) {} + + buffer(const shared_ptr_class &hostData, + const range &bufferRange, + const property_list &propList = {}) {} + + buffer(const buffer &rhs) = default; + + buffer(buffer &&rhs) = default; + + buffer &operator=(const buffer &rhs) = default; + + buffer &operator=(buffer &&rhs) = default; + + ~buffer() = default; + + range get_range() const { return range{}; } + + size_t get_count() const { return 0; } + + size_t get_size() const { return 0; } + + template + accessor + get_access(handler &commandGroupHandler) { + return accessor{}; + } + + template + accessor + get_access() { + accessor{}; + } + + template + void set_final_data(Destination finalData = nullptr) {} +}; + +} // namespace sycl +} // namespace cl + + diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp new file mode 100644 index 000000000000..9acf8e514aae --- /dev/null +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -0,0 +1,280 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#ifndef __SYCL_DEVICE_ONLY__ +#define __global +#endif + +#define ATTR_SYCL_KERNEL __attribute__((sycl_kernel)) + +// Dummy runtime classes to model SYCL API. +namespace cl { +namespace sycl { + +template < class T, class Alloc = std::allocator > +using vector_class = std::vector; + +using string_class = std::string; + +template +using function_class = std::function; + +using mutex_class = std::mutex; + +template +using unique_ptr_class = std::unique_ptr; + +template +using shared_ptr_class = std::shared_ptr; + +template +using weak_ptr_class = std::weak_ptr; + +template +using hash_class = std::hash; + +using exception_ptr_class = std::exception_ptr; + +template +using buffer_allocator = std::allocator; + +namespace access { + +enum class target { + global_buffer = 2014, + constant_buffer, + local, + image, + host_buffer, + host_image, + image_array +}; + +enum class mode { + read = 1024, + write, + read_write, + discard_write, + discard_read_write, + atomic +}; + +enum class placeholder { + false_t, + true_t +}; + +enum class address_space : int { + private_space = 0, + global_space, + constant_space, + local_space +}; +} // namespace access + +namespace property { + +enum prop_type { + use_host_ptr = 0, + use_mutex, + context_bound, + enable_profiling, + base_prop +}; + +struct property_base { + virtual prop_type type() const = 0; +}; +} // namespace property + +namespace detail { +template struct allProperties : std::true_type {}; +template +struct allProperties + : std::conditional< + std::is_base_of::value, + allProperties, std::false_type>::type {}; +} // namespace detail + +class property_list { +public: + template ::value>::type> + property_list(propertyTN... props) {} + + template bool has_property() const { return true; } + + template propertyT get_property() const { + return propertyT{}; + } + + bool operator==(const property_list &rhs) const { return false; } + + bool operator!=(const property_list &rhs) const { return false; } +}; + +template +struct id { +}; + +template +struct range { + template range(T...args) {} // fake constructor +}; + +template +struct nd_range { +}; + +template +struct _ImplT { + range Range; +}; + +template + class accessor { + + public: + + void __set_pointer(__global dataT *Ptr) { } + void __set_range(range Range) { + __impl.Range = Range; + } + void use(void) const {} + template void use(T...args) { } + template void use(T...args) const { } + _ImplT __impl; + +}; + +class kernel {}; +class context {}; +class device {}; +class event{}; + +class queue { +public: + template event submit(T cgf) { return event{}; } + + void wait() {} + void wait_and_throw() {} + void throw_asynchronous() {} +}; + +class handler { +public: + template + ATTR_SYCL_KERNEL + void parallel_for(range numWorkItems, KernelType kernelFunc) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(nd_range executionRange, + KernelType kernelFunc) {} + + ATTR_SYCL_KERNEL + void single_task(kernel syclKernel) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(range numWorkItems, kernel syclKernel) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(nd_range ndRange, kernel syclKernel) {} + + template + ATTR_SYCL_KERNEL + void single_task(kernel syclKernel, KernelType kernelFunc) {} + + template + ATTR_SYCL_KERNEL + void single_task(KernelType kernelFunc) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(range numWorkItems, kernel syclKernel, + KernelType kernelFunc) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(range numWorkItems, KernelType kernelFunc) {} + + template + ATTR_SYCL_KERNEL + void parallel_for(nd_range ndRange, kernel syclKernel, + KernelType kernelFunc) {} +}; + +template > +class buffer { +public: + using value_type = T; + using reference = value_type &; + using const_reference = const value_type &; + using allocator_type = AllocatorT; + + template buffer(ParamTypes...args) {} // fake constructor + + buffer(const range &bufferRange, + const property_list &propList = {}) {} + + buffer(T *hostData, const range &bufferRange, + const property_list &propList = {}) {} + + buffer(const T *hostData, const range &bufferRange, + const property_list &propList = {}) {} + + buffer(const shared_ptr_class &hostData, + const range &bufferRange, + const property_list &propList = {}) {} + + buffer(const buffer &rhs) = default; + + buffer(buffer &&rhs) = default; + + buffer &operator=(const buffer &rhs) = default; + + buffer &operator=(buffer &&rhs) = default; + + ~buffer() = default; + + range get_range() const { return range{}; } + + size_t get_count() const { return 0; } + + size_t get_size() const { return 0; } + + template + accessor + get_access(handler &commandGroupHandler) { + return accessor{}; + } + + template + accessor + get_access() { + accessor{}; + } + + template + void set_final_data(Destination finalData = nullptr) {} +}; + +} // namespace sycl +} // namespace cl + diff --git a/clang/test/CodeGenSYCL/kernel_functor.cpp b/clang/test/CodeGenSYCL/kernel_functor.cpp new file mode 100644 index 000000000000..fd84e51e998c --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel_functor.cpp @@ -0,0 +1,191 @@ +// RUN: %clang -I %S/Inputs -std=c++11 --sycl -Xclang -fsycl-int-header=%t.h %s -c -o kernel.spv +// RUN: %clang -I %S/Inputs -std=c++11 -include %t.h -g %s -o %t.out -lstdc++ +// RUN: %t.out | FileCheck %s + +// Note: short version 'clang -fsycl x.cpp' is not possible as it will require +// linking with sycl.so, which we don't want to do in codegen tests. + +// CHECK: Functor1 +// CHECK: ::ns::Functor2 +// CHECK: TmplFunctor +// CHECK: TmplConstFunctor + +// Checks that functors are supported as SYCL kernels. + +#include +#include + +constexpr auto sycl_read_write = cl::sycl::access::mode::read_write; +constexpr auto sycl_global_buffer = cl::sycl::access::target::global_buffer; + +// Case 1: +// - functor class is defined in an anonymous namespace +// - the '()' operator: +// * does not have parameters (to be used in 'single_task'). +// * has no 'const' qualifier +namespace { + class Functor1 { + public: + Functor1(int X_, cl::sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + void operator()() { + Acc.use(X); + } + + private: + int X; + cl::sycl::accessor Acc; + }; +} + +// Case 2: +// - functor class is defined in a namespace +// - the '()' operator: +// * does not have parameters (to be used in 'single_task'). +// * has the 'const' qualifier +namespace ns { + class Functor2 { + public: + Functor2(int X_, cl::sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + void operator()() const { + Acc.use(X); + } + + private: + int X; + cl::sycl::accessor Acc; + }; +} + +// Case 3: +// - functor class is templated and defined in the translation unit scope +// - the '()' operator: +// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). +// * has no 'const' qualifier +template class TmplFunctor { +public: + TmplFunctor(T X_, cl::sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + void operator()(cl::sycl::id<1> id) { + Acc.use(id, X); + } + +private: + T X; + cl::sycl::accessor Acc; +}; + +// Case 4: +// - functor class is templated and defined in the translation unit scope +// - the '()' operator: +// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). +// * has the 'const' qualifier +template class TmplConstFunctor { +public: + TmplConstFunctor(T X_, cl::sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + void operator()(cl::sycl::id<1> id) const { + Acc.use(id, X); + } + +private: + T X; + cl::sycl::accessor Acc; +}; + +// Exercise non-templated functors in 'single_task'. +int foo(int X) { + int A[] = { 10 }; + { + cl::sycl::queue Q; + cl::sycl::buffer Buf(A, 1); + + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.get_access(cgh); + Functor1 F(X, Acc); + + cgh.single_task(F); + }); + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.get_access(cgh); + ns::Functor2 F(X, Acc); + + cgh.single_task(F); + }); + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.get_access(cgh); + ns::Functor2 F(X, Acc); + + cgh.single_task(F); + }); + } + return A[0]; +} + +#define ARR_LEN(x) sizeof(x)/sizeof(x[0]) + +// Exercise templated functors in 'parallel_for'. +template T bar(T X) { + T A[] = { (T)10, (T)10 }; + { + cl::sycl::queue Q; + cl::sycl::buffer Buf(A, ARR_LEN(A)); + + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.template get_access(cgh); + TmplFunctor F(X, Acc); + + cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); + }); + // Spice with lambdas to make sure functors and lambdas work together. + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.template get_access(cgh); + cgh.parallel_for( + cl::sycl::range<1>(ARR_LEN(A)), + [=](cl::sycl::id<1> id) { Acc.use(id, X); }); + }); + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.template get_access(cgh); + TmplConstFunctor F(X, Acc); + + cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); + }); + } + T res = (T)0; + + for (int i = 0; i < ARR_LEN(A); i++) { + res += A[i]; + } + return res; +} + +#ifndef __SYCL_DEVICE_ONLY__ +using namespace cl::sycl::detail; +#endif // __SYCL_DEVICE_ONLY__ + +int main() { + const int Res1 = foo(10); + const int Res2 = bar(10); + const int Gold1 = 40; + const int Gold2 = 80; + +#ifndef __SYCL_DEVICE_ONLY__ + std::cout << "-- RUNNING THE TEST --\n"; + std::cout << KernelInfo::getName() << "\n"; + std::cout << KernelInfo::getName() << "\n"; + std::cout << KernelInfo>::getName() << "\n"; + std::cout << KernelInfo>::getName() << "\n"; +#endif //__SYCL_DEVICE_ONLY__ + + return 0; +} + diff --git a/clang/test/CodeGenSYCL/struct_kernel_param.cpp b/clang/test/CodeGenSYCL/struct_kernel_param.cpp new file mode 100644 index 000000000000..ebec499a9232 --- /dev/null +++ b/clang/test/CodeGenSYCL/struct_kernel_param.cpp @@ -0,0 +1,44 @@ +// RUN: %clang --sycl -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv +// RUN: FileCheck -input-file=%t.h %s + +// CHECK: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-NEXT: //--- MyKernel +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 2014, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 8, 16 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 24, 32 }, +// CHECK-EMPTY: +// CHECK-NEXT:}; + + +// This test checks if compiler accepts structures as kernel parameters. + +#include "sycl.hpp" + +using namespace cl::sycl; + +struct MyNestedStruct { + int FldArr[1]; + float FldFloat; +}; + +struct MyStruct { + int FldInt; + MyNestedStruct FldStruct; + int FldArr[3]; +}; + +MyStruct GlobS; + +bool test0() { + MyStruct S = GlobS; + MyStruct S0 = { 0 }; + { + buffer Buf(&S0, range<1>(1)); + queue myQueue; + myQueue.submit([&](handler &cgh) { + auto B = Buf.get_access(cgh); + cgh.single_task([=] { B[0] = S; }); + }); + } +} + diff --git a/clang/test/SemaSYCL/non-std-layout-param.cpp b/clang/test/SemaSYCL/non-std-layout-param.cpp new file mode 100644 index 000000000000..c3681e06b60b --- /dev/null +++ b/clang/test/SemaSYCL/non-std-layout-param.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -fsycl-is-device -verify -fsyntax-only -std=c++11 %s + +// This test checks if compiler reports compilation error on an attempt to pass +// non-standard layout struct object as SYCL kernel parameter. + +struct Base { + int X; +}; + +// This struct has non-standard layout, because both C (the most derived class) +// and Base have non-static data members. +struct C : public Base { + int Y; +}; + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + + +void test() { + // expected-error@+1 2{{kernel parameter has non-standard layout class/struct type}} + C C0; + C0.Y=0; + kernel_single_task([&] { C0.Y++; }); +} +