From 60f889aa6e1aefaf2b919864b33c8b2e0c231467 Mon Sep 17 00:00:00 2001 From: Gary Miguel Date: Thu, 29 Jun 2023 17:49:57 +0000 Subject: [PATCH] Include versioned SO files in cuda libraries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to this PR, I believe the way things are working is the following: * `repositories.local_cuda()` creates a bazel repository which is a symlink to some absolute path, default `/usr/local/cuda`. * A `cuda_library` depends on `@local_cuda//:cuda_runtime`. * In its sandbox, bazel creates a directory which only contains the declared dependencies and passes it to the linker with `-L`. In this case, that means it includes `libcudart.so`, but NOT `libcudart.so.12` (replace 12 with whatever version you want) is being used. You can see this in the linker params with a line like: `-Lbazel-out/k8-fastbuild/bin/_solib_k8/_U@local_Ucuda_S_S_Ccudart_Uso___Ucuda_Slib64`, and ```sh ➜ ls bazel-out/k8-fastbuild/bin/_solib_k8/_U@local_Ucuda_S_S_Ccudart_Uso___Ucuda_Slib64 libcudart.so@ ``` * Similarly the linker specifies the `rpath` to include a directory inside `runfiles`, which also contains just libcdart.so: `-rpath $ORIGIN/main.runfiles/rules_cuda_examples/_solib_k8/_U@local_Ucuda_S_S_Ccudart_Uso___Ucuda_Slib64`. * The linker opens `libcudart.so`, which is a symlink (possibly to another symlink). The final non-link target file has SONAME set to a versioned name like `libcudart.so.12`. So the binary is linked against `libcudart.so.12`. * At runtime (including `bazel test`), the dynamic linker `ldd` will look in the runfiles for `libcudart.so.12` and fail to find it. Then it will look in system search paths. On my machine it will look in `/lib/x86_64-linux-gnu`, amongst other paths. * If there is a libcudart.so.12 in any of the system paths, then it works. Else, it fails. The situation that triggered the failure for me was: 1. Multiple versions of CUDA installed, 11 and 12. 2. `/lib/x86_64_linux-gnu/libcudart.so` is a symlink to `/lib/x86_64_linux-gnu/libcudart.so.11`. 3. In my WORKSPACE.bazel I have: `local_cuda(name = "local_cuda", toolkit_path = "/usr/local/cuda-12")` In this case the binary gets linked against `libcudart.so.12` but the `rpath` does NOT contain `/usr/local/cuda-12`, and the system's default lib paths also don't contain it, so ldd fails to find it at runtime. So I think this proposed change will not actually make things any worse as to your concern: > This is not something reasonable path outside of bazel environment The only way this works at runtime before this PR is if `ldd` searched the system library search paths. It will continue to do that in the case that the `libcudart.so.12` is missing from runfiles (I tested this by `rm`'ing the .so file from the runfiles and confirmed that `ldd` then finds the one that it used to find before this PR). So I'm pretty sure this will not cause any regressions at all in things that currently work, but it will fix things that currently do not. Fixes: https://github.com/bazel-contrib/rules_cuda/issues/113 --- cuda/defs.bzl | 4 ++- cuda/private/os_helpers.bzl | 26 +++++++++++++++++++ cuda/runtime/BUILD.local_cuda | 49 +++++++++++++---------------------- 3 files changed, 47 insertions(+), 32 deletions(-) diff --git a/cuda/defs.bzl b/cuda/defs.bzl index ba1aa4ab..70459824 100644 --- a/cuda/defs.bzl +++ b/cuda/defs.bzl @@ -3,7 +3,7 @@ Core rules for building CUDA projects. """ load("//cuda/private:providers.bzl", _CudaArchsInfo = "CudaArchsInfo", _cuda_archs = "cuda_archs") -load("//cuda/private:os_helpers.bzl", _if_linux = "if_linux", _if_windows = "if_windows") +load("//cuda/private:os_helpers.bzl", _cc_import_versioned_sos = "cc_import_versioned_sos", _if_linux = "if_linux", _if_windows = "if_windows") load("//cuda/private:rules/cuda_objects.bzl", _cuda_objects = "cuda_objects") load("//cuda/private:rules/cuda_library.bzl", _cuda_library = "cuda_library") load("//cuda/private:rules/cuda_toolkit.bzl", _cuda_toolkit = "cuda_toolkit") @@ -33,3 +33,5 @@ cuda_library = _cuda_library if_linux = _if_linux if_windows = _if_windows + +cc_import_versioned_sos = _cc_import_versioned_sos diff --git a/cuda/private/os_helpers.bzl b/cuda/private/os_helpers.bzl index f9068a3e..50bcf73e 100644 --- a/cuda/private/os_helpers.bzl +++ b/cuda/private/os_helpers.bzl @@ -1,3 +1,5 @@ +load("@bazel_skylib//lib:paths.bzl", "paths") + def if_linux(if_true, if_false = []): return select({ "@platforms//os:linux": if_true, @@ -9,3 +11,27 @@ def if_windows(if_true, if_false = []): "@platforms//os:windows": if_true, "//conditions:default": if_false, }) + +def cc_import_versioned_sos(name, shared_library): + """Creates a cc_library that depends on all versioned .so files with the given prefix. + + If is path/to/foo.so, and it is a symlink to foo.so., + this should be used instead of cc_import. + The versioned files are typically needed at runtime, but not at build time. + + Args: + name: Name of the cc_library. + shared_library: Prefix of the versioned .so files. + """ + so_paths = native.glob([shared_library + "*"]) + + [native.cc_import( + name = paths.basename(p), + shared_library = p, + target_compatible_with = ["@platforms//os:linux"], + ) for p in so_paths] + + native.cc_library( + name = name, + deps = [":%s" % paths.basename(p) for p in so_paths], + ) diff --git a/cuda/runtime/BUILD.local_cuda b/cuda/runtime/BUILD.local_cuda index a775673f..6f5cae4b 100644 --- a/cuda/runtime/BUILD.local_cuda +++ b/cuda/runtime/BUILD.local_cuda @@ -1,5 +1,4 @@ -load("@rules_cuda//cuda:defs.bzl", "if_linux", "if_windows") -load(":defs.bzl", "if_local_cuda") +load("@rules_cuda//cuda:defs.bzl", "cc_import_versioned_sos", "if_linux", "if_windows") package( default_visibility = ["//visibility:public"], @@ -8,9 +7,9 @@ package( filegroup( name = "compiler_deps", srcs = [ + "cuda/version.txt", ":_cuda_header_files", ] + glob([ - "cuda/version.txt", "cuda/bin/**", "cuda/lib64/**", "cuda/nvvm/**", @@ -39,10 +38,9 @@ cc_library( ]), ) -cc_import( +cc_import_versioned_sos( name = "cudart_so", shared_library = "cuda/lib64/libcudart.so", - target_compatible_with = ["@platforms//os:linux"], ) cc_library( @@ -136,16 +134,14 @@ cc_library( ]), ) -cc_import( +cc_import_versioned_sos( name = "cublas_so", shared_library = "cuda/lib64/libcublas.so", - target_compatible_with = ["@platforms//os:linux"], ) -cc_import( +cc_import_versioned_sos( name = "cublasLt_so", shared_library = "cuda/lib64/libcublasLt.so", - target_compatible_with = ["@platforms//os:linux"], ) cc_import( @@ -193,10 +189,9 @@ cc_library( ) # CUPTI -cc_import( +cc_import_versioned_sos( name = "cupti_so", shared_library = "cuda/lib64/libcupti.so", - target_compatible_with = ["@platforms//os:linux"], ) cc_import( @@ -301,10 +296,9 @@ cc_library( ) # curand -cc_import( +cc_import_versioned_sos( name = "curand_so", shared_library = "cuda/lib64/libcurand.so", - target_compatible_with = ["@platforms//os:linux"], ) cc_import( @@ -351,19 +345,17 @@ cc_library( "cuda/include", ], visibility = ["//visibility:public"], - deps = [] + - if_linux([ - ":nvptxcompiler_so" + deps = [] + if_linux([ + ":nvptxcompiler_so", ]) + if_windows([ - ":nvptxcompiler_lib" - ]) + ":nvptxcompiler_lib", + ]), ) # cufft -cc_import( +cc_import_versioned_sos( name = "cufft_so", shared_library = "cuda/lib64/libcufft.so", - target_compatible_with = ["@platforms//os:linux"], ) cc_import( @@ -373,10 +365,9 @@ cc_import( target_compatible_with = ["@platforms//os:windows"], ) -cc_import( +cc_import_versioned_sos( name = "cufftw_so", shared_library = "cuda/lib64/libcufftw.so", - target_compatible_with = ["@platforms//os:linux"], ) cc_import( @@ -392,7 +383,7 @@ cc_library( ":cuda_headers", ] + if_linux([ ":cufft_so", - ":cufftw_so" + ":cufftw_so", ]) + if_windows([ ":cufft_lib", ":cufftw_lib", @@ -400,10 +391,9 @@ cc_library( ) # cusolver -cc_import( +cc_import_versioned_sos( name = "cusolver_so", shared_library = "cuda/lib64/libcusolver.so", - target_compatible_with = ["@platforms//os:linux"], ) cc_import( @@ -425,10 +415,9 @@ cc_library( ) # cusparse -cc_import( +cc_import_versioned_sos( name = "cusparse_so", shared_library = "cuda/lib64/libcusparse.so", - target_compatible_with = ["@platforms//os:linux"], ) cc_import( @@ -450,10 +439,9 @@ cc_library( ) # nvtx -cc_import( +cc_import_versioned_sos( name = "nvtx_so", shared_library = "cuda/lib64/libnvToolsExt.so", - target_compatible_with = ["@platforms//os:linux"], ) cc_import( @@ -505,10 +493,9 @@ _NPP_LIBS = { } [ - cc_import( + cc_import_versioned_sos( name = name + "_so", shared_library = "cuda/lib64/lib{}.so".format(name), - target_compatible_with = ["@platforms//os:linux"], ) for name in _NPP_LIBS.keys() ]