Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][CUDA] Initial CUDA backend support #1091

Merged
merged 24 commits into from
Feb 24, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
12f4abe
[SYCL][LIBCLC] Additional libclc builtins to support SYCL work
Feb 6, 2020
979b448
[SYCL] CMake and lit support for SYCL CUDA backend
Feb 6, 2020
b937adf
[SYCL][CUDA] Add SYCL CUDA support to clang driver
Feb 18, 2020
4f2e019
[SYCL] Local Accessor Support for CUDA
Feb 7, 2020
b63f78a
[SYCL][CUDA] Change __spirv_BuiltIn.. to functions
Feb 7, 2020
fc60859
[SYCL][CUDA] Initial Implementation of the CUDA backend
Feb 24, 2020
17c8ccf
[SYCL] Update libclc install rules
Feb 3, 2020
680f890
[SYCL][CUDA] Inline cl namespace to simplify SYCL API usage
fwyzard Feb 3, 2020
5e71823
Added missing flags for device-side builtins
Ruyk Feb 10, 2020
b01ff28
[SYCL][CUDA] Removing unnecessary tool from the tree
Ruyk Feb 10, 2020
abee4f9
[SYCL][PI] Fix kernel group info parameter conversion
Feb 12, 2020
cfd1266
[SYCL] Changed CUDA unit tests to call through plugin
Feb 18, 2020
61a206b
[SYCL] Have default_selector consider SYCL_BE
Feb 14, 2020
c2168af
[SYCL] Select GlobalPlugin based on SYCL_BE
Feb 17, 2020
c7e2846
[SYCL] Improve default device selection checks
Feb 17, 2020
23b179e
[SYCL] Formatting update for device_selector.cpp
Feb 18, 2020
52736fd
[SYCL][CUDA] Refactor __SYCL_INLINE macro
fwyzard Feb 13, 2020
62afe84
[SYCL][CUDA] Code style and cleanup to CUDA support
Feb 21, 2020
5f5e017
[SYCL] Pass SYCL_BE=PI_OPENCL in check-sycl
Feb 20, 2020
54678ab
[SYCL][CUDA] Remove PI_CUDA specific details from clang
Feb 20, 2020
fb4521e
[SYCL][CUDA] Disable linear_id/opencl-interop.cpp for cuda
Feb 20, 2020
ab9f4be
[SYCL][CUDA] Further fixes to CUDA device selection
Feb 20, 2020
cdab838
[SYCL] Enable asserts in all buildbot builds
Feb 21, 2020
5b1ff35
[SYCL][CUDA] Minor test and build configuration
Feb 24, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
58 changes: 39 additions & 19 deletions buildbot/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,49 @@ def do_configure(args):
sycl_dir = os.path.join(args.src_dir, "sycl")
spirv_dir = os.path.join(args.src_dir, "llvm-spirv")
ocl_header_dir = os.path.join(args.obj_dir, "OpenCL-Headers")
icd_loader_lib = ''
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build")
llvm_targets_to_build = 'X86'
llvm_enable_projects = 'clang;llvm-spirv;sycl;opencl-aot'
libclc_targets_to_build = ''
sycl_build_pi_cuda = 'OFF'
llvm_enable_assertions = 'OFF'
bader marked this conversation as resolved.
Show resolved Hide resolved

if platform.system() == 'Linux':
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "libOpenCL.so")
icd_loader_lib = os.path.join(icd_loader_lib, "libOpenCL.so")
else:
icd_loader_lib = os.path.join(args.obj_dir, "OpenCL-ICD-Loader", "build", "OpenCL.lib")
icd_loader_lib = os.path.join(icd_loader_lib, "OpenCL.lib")

if args.cuda:
llvm_targets_to_build += ';NVPTX'
llvm_enable_projects += ';libclc'
libclc_targets_to_build = 'nvptx64--;nvptx64--nvidiacl'
sycl_build_pi_cuda = 'ON'

if args.assertions:
llvm_enable_assertions = 'ON'

install_dir = os.path.join(args.obj_dir, "install")

cmake_cmd = ["cmake",
"-G", "Ninja",
"-DCMAKE_BUILD_TYPE={}".format(args.build_type),
"-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv;opencl-aot",
"-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
"-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
"-DLLVM_ENABLE_PROJECTS=clang;sycl;llvm-spirv;opencl-aot",
"-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
"-DOpenCL_LIBRARY={}".format(icd_loader_lib),
"-DLLVM_BUILD_TOOLS=ON",
"-DSYCL_ENABLE_WERROR=ON",
"-DLLVM_ENABLE_ASSERTIONS=ON",
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
llvm_dir]
cmake_cmd = [
"cmake",
"-G", "Ninja",
"-DCMAKE_BUILD_TYPE={}".format(args.build_type),
"-DLLVM_ENABLE_ASSERTIONS={}".format(llvm_enable_assertions),
"-DLLVM_TARGETS_TO_BUILD={}".format(llvm_targets_to_build),
"-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv;opencl-aot",
"-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
"-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
"-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
"-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build),
"-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
"-DOpenCL_LIBRARY={}".format(icd_loader_lib),
"-DSYCL_BUILD_PI_CUDA={}".format(sycl_build_pi_cuda),
"-DLLVM_BUILD_TOOLS=ON",
"-DSYCL_ENABLE_WERROR=ON",
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
llvm_dir
]

print(cmake_cmd)

Expand Down Expand Up @@ -63,6 +82,8 @@ def main():
parser.add_argument("-o", "--obj-dir", metavar="OBJ_DIR", required=True, help="build directory")
parser.add_argument("-t", "--build-type",
metavar="BUILD_TYPE", required=True, help="build type, debug or release")
parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA")
parser.add_argument("--assertions", action='store_true', help="build with assertions")

args = parser.parse_args()

Expand All @@ -74,4 +95,3 @@ def main():
ret = main()
exit_code = 0 if ret else 1
sys.exit(exit_code)

4 changes: 4 additions & 0 deletions clang/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ endif()
include(CheckIncludeFile)
check_include_file(sys/resource.h CLANG_HAVE_RLIMITS)

if(SYCL_BUILD_PI_CUDA)
set(SYCL_HAVE_PI_CUDA 1)
endif()

set(CLANG_RESOURCE_DIR "" CACHE STRING
"Relative directory from the Clang binary to its resource files.")

Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def warn_drv_unknown_cuda_version: Warning<
"Unknown CUDA version %0. Assuming the latest supported version %1">,
InGroup<CudaUnknownVersion>;
def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
def err_drv_no_sycl_libspirv : Error<
"cannot find `libspirv-nvptx64--nvidiacl.bc`. Provide path to libspirv library via "
"-fsycl-libspirv-path, or pass -fno-sycl-libspirv to build without linking with libspirv.">;
def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
def err_drv_invalid_thread_model_for_target : Error<
"invalid thread model '%0' in '%1' for this target">;
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/DiagnosticIDs.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace clang {
// Size of each of the diagnostic categories.
enum {
DIAG_SIZE_COMMON = 300,
DIAG_SIZE_DRIVER = 250, // 200 -> 250 for SYCL related diagnostics
DIAG_SIZE_DRIVER = 210,
DIAG_SIZE_FRONTEND = 150,
DIAG_SIZE_SERIALIZATION = 120,
DIAG_SIZE_LEX = 400,
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Config/config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@
#cmakedefine01 CLANG_ENABLE_OBJC_REWRITER
#cmakedefine01 CLANG_ENABLE_STATIC_ANALYZER

/* Define if we have SYCL PI CUDA support */
#cmakedefine SYCL_HAVE_PI_CUDA ${SYCL_HAVE_PI_CUDA}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#cmakedefine SYCL_HAVE_PI_CUDA ${SYCL_HAVE_PI_CUDA}
#cmakedefine01 SYCL_HAVE_PI_CUDA

According to the docs it should do the same

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need this define? Can we have "SYCL PI CUDA support" unconditionally?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we were to have PI CUDA support unconditionally the cuda toolchain will always be required for compilation. We decided to make it optional to allow people who only use the OpenCL plugin to compile the project without a cuda toolchain on their system.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we were to have PI CUDA support unconditionally the cuda toolchain will always be required for compilation. We decided to make it optional to allow people who only use the OpenCL plugin to compile the project without a cuda toolchain on their system.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to my understanding we need CUDA toolchain to build CUDA plugin only.
Could you clarify why we should require CUDA toolchain to build the driver?
https://llvm.org/docs/CompileCudaWithLLVM.html - doesn't seem to require some custom driver.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are correct, we only need CUDA toolchain for building of the plugin, but we limit the valid SYCL triples in the clang driver based if PI CUDA support is available or not.

https://github.com/intel/llvm/pull/1091/files#diff-beaf25b0cdf8830dd4ea165404b00671R618

static bool isValidSYCLTriple(llvm::Triple T) {
#ifdef SYCL_HAVE_PI_CUDA
  // NVPTX is valid for SYCL.
  if (T.isNVPTX())
    return true;
#endif
  // Check for invalid SYCL device triple values.
  // Non-SPIR arch.
  if (!T.isSPIR())
    return false;
  // SPIR arch, but has invalid SubArch for AOT.
  StringRef A(T.getArchName());
  if (T.getSubArch() == llvm::Triple::NoSubArch &&
      ((T.getArch() == llvm::Triple::spir && !A.equals("spir")) ||
       (T.getArch() == llvm::Triple::spir64 && !A.equals("spir64"))))
    return false;
  return true;
}

We can remove this limitation though and always allow nvptx triples for compilation, regardless of it the CUDA plugin is available.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 for removing.


/* Spawn a new process clang.exe for the CC1 tool invocation, when necessary */
#cmakedefine01 CLANG_SPAWN_CC1

Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,9 @@ def fsycl_help_EQ : Joined<["-"], "fsycl-help=">,
def fsycl_help : Flag<["-"], "fsycl-help">, Alias<fsycl_help_EQ>,
Flags<[DriverOption, CoreOption]>, AliasArgs<["all"]>, HelpText<"Emit help information "
"from all of the offline compilation tools">;
def fsycl_libspirv_path_EQ : Joined<["-"], "fsycl-libspirv-path=">,
Flags<[CC1Option, CoreOption]>, HelpText<"Path to libspirv library">;
def fno_sycl_libspirv : Flag<["-"], "fno-sycl-libspirv">, HelpText<"Disable check for libspirv">;
def fsyntax_only : Flag<["-"], "fsyntax-only">,
Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
.Default(32);
}

TLSSupported = false;
// FIXME: Needed for compiling SYCL to PTX.
TLSSupported = Triple.getEnvironment() == llvm::Triple::SYCLDevice;
VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap;
UseAddrSpaceMapMangling = true;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
Opts.support("cl_khr_global_int32_extended_atomics");
Opts.support("cl_khr_local_int32_base_atomics");
Opts.support("cl_khr_local_int32_extended_atomics");
// PTX actually supports 64 bits operations even if the Nvidia OpenCL
// runtime does not report support for it.
// This is required for libclc to compile 64 bits atomic functions.
// FIXME: maybe we should have a way to control this ?
Opts.support("cl_khr_int64_base_atomics");
Opts.support("cl_khr_int64_extended_atomics");
}

/// \returns If a target requires an address within a target specific address
Expand Down
3 changes: 0 additions & 3 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -842,9 +842,6 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
PerFunctionPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));

if (LangOpts.SYCLIsDevice)
PerFunctionPasses.add(createSYCLLowerWGScopePass());

CreatePasses(PerModulePasses, PerFunctionPasses);

legacy::PassManager CodeGenPasses;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,12 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
return *FI;

unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
// This is required so SYCL kernels are successfully processed by tools from CUDA. Kernels
// with a `spir_kernel` calling convention are ignored otherwise.
if (CC == llvm::CallingConv::SPIR_KERNEL && CGM.getTriple().isNVPTX() &&
getContext().getLangOpts().SYCLIsDevice) {
CC = llvm::CallingConv::C;
}

// Construct the function info. We co-allocate the ArgInfos.
FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info,
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/CodeGen/CodeGenAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "CodeGenModule.h"
#include "CoverageMappingGen.h"
#include "MacroPPCallbacks.h"
#include "SYCLLowerIR/LowerWGScope.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
Expand All @@ -33,6 +34,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
Expand Down Expand Up @@ -326,6 +328,17 @@ namespace clang {
CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
Ctx.setDiagnosticsHotnessRequested(true);

// The parallel_for_work_group legalization pass can emit calls to
// builtins function. Definitions of those builtins can be provided in
// LinkModule. We force the pass to legalize the code before the link
// happens.
if (LangOpts.SYCLIsDevice) {
PrettyStackTraceString CrashInfo("Pre-linking SYCL passes");
legacy::PassManager PreLinkingSyclPasses;
PreLinkingSyclPasses.add(createSYCLLowerWGScopePass());
PreLinkingSyclPasses.run(*getModule());
}

// Link each LinkModule into our module.
if (LinkInModules())
return;
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ void CodeGenModule::createSYCLRuntime() {
switch (getTriple().getArch()) {
case llvm::Triple::spir:
case llvm::Triple::spir64:
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
SYCLRuntime.reset(new CGSYCLRuntime(*this));
break;
default:
Expand Down
Loading