From 3d2ab00d9dc279223f290cbbef07de36714087b9 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 5 Dec 2024 10:06:00 +0100 Subject: [PATCH] cufile version (#565) Fixes https://github.com/rapidsai/kvikio/issues/566 To clean up and make cufile capability detection more robust, we now use `cuFileGetVersion()`. Should fix the nightly failure: https://github.com/rapidsai/kvikio/actions/runs/12133477083/job/33877440755 NB: `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3) thus the stream and batch API detection will return false for versions older than v1.8. I think this is acceptable for robustness. Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Vyas Ramasubramani (https://github.com/vyasr) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/kvikio/pull/565 --- cpp/CMakeLists.txt | 17 +++- cpp/examples/basic_io.cpp | 2 +- cpp/include/kvikio/file_handle.hpp | 29 +++--- cpp/include/kvikio/remote_handle.hpp | 2 +- cpp/include/kvikio/shim/cufile.hpp | 99 +++++++++++++------- cpp/include/kvikio/shim/cufile_h_wrapper.hpp | 7 +- python/kvikio/kvikio/_lib/cufile_driver.pyx | 5 + python/kvikio/kvikio/benchmarks/utils.py | 6 ++ python/kvikio/kvikio/cufile_driver.py | 21 +++++ python/kvikio/tests/test_cufile_driver.py | 6 ++ 10 files changed, 140 insertions(+), 54 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9c1450518b..a0639c5382 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -80,7 +80,7 @@ if(KvikIO_CUDA_SUPPORT) else() set(cuFile_FOUND 1) - # Check batch and stream API support (cuFile_BATCH_API_FOUND and cuFile_STREAM_API_FOUND) + # Check API support try_compile( cuFile_BATCH_API_FOUND SOURCE_FROM_CONTENT batch.cpp @@ -109,6 +109,20 @@ if(KvikIO_CUDA_SUPPORT) OUTPUT_VARIABLE stream_output ) message(STATUS "Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}") + try_compile( + cuFile_VERSION_API_FOUND SOURCE_FROM_CONTENT + version.cpp + [[#include + int main() { + int version; + cuFileGetVersion(&version); + return 0; + } + ]] + LINK_LIBRARIES CUDA::cuFile rt ${CMAKE_DL_LIBS} + OUTPUT_VARIABLE version_output + ) + message(STATUS "Found cuFile Version API: ${cuFile_VERSION_API_FOUND}") endif() endif() @@ -154,6 +168,7 @@ target_compile_definitions( $<$:KVIKIO_CUFILE_FOUND> $<$:KVIKIO_CUFILE_BATCH_API_FOUND> $<$:KVIKIO_CUFILE_STREAM_API_FOUND> + $<$:KVIKIO_CUFILE_VERSION_API_FOUND> ) set_target_properties( diff --git a/cpp/examples/basic_io.cpp b/cpp/examples/basic_io.cpp index 4d04391404..39bfc315cd 100644 --- a/cpp/examples/basic_io.cpp +++ b/cpp/examples/basic_io.cpp @@ -181,7 +181,7 @@ int main() cout << "Parallel POSIX read (" << kvikio::defaults::thread_pool_nthreads() << " threads): " << read << endl; } - if (kvikio::is_batch_and_stream_available() && !kvikio::defaults::is_compat_mode_preferred()) { + if (kvikio::is_batch_api_available() && !kvikio::defaults::is_compat_mode_preferred()) { std::cout << std::endl; Timer timer; // Here we use the batch API to read "/tmp/test-file" into `b_dev` by diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index abc6660de6..4880bd4f20 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -62,23 +62,20 @@ class FileHandle { */ bool is_compat_mode_preferred_for_async(CompatMode requested_compat_mode) { - if (!defaults::is_compat_mode_preferred(requested_compat_mode)) { - if (!is_batch_and_stream_available()) { - if (requested_compat_mode == CompatMode::AUTO) { return true; } - throw std::runtime_error("Missing cuFile batch or stream library symbol."); - } - - // When checking for availability, we also check if cuFile's config file exist. This is - // because even when the stream API is available, it doesn't work if no config file exist. - if (config_path().empty()) { - if (requested_compat_mode == CompatMode::AUTO) { return true; } - throw std::runtime_error("Missing cuFile configuration file."); - } - - return false; + if (defaults::is_compat_mode_preferred(requested_compat_mode)) { return true; } + + if (!is_stream_api_available()) { + if (requested_compat_mode == CompatMode::AUTO) { return true; } + throw std::runtime_error("Missing the cuFile stream api."); } - return true; + // When checking for availability, we also check if cuFile's config file exists. This is + // because even when the stream API is available, it doesn't work if no config file exists. + if (config_path().empty()) { + if (requested_compat_mode == CompatMode::AUTO) { return true; } + throw std::runtime_error("Missing cuFile configuration file."); + } + return false; } public: @@ -670,7 +667,7 @@ class FileHandle { */ [[nodiscard]] bool is_compat_mode_preferred_for_async() const noexcept { - static bool is_extra_symbol_available = is_batch_and_stream_available(); + static bool is_extra_symbol_available = is_stream_api_available(); static bool is_config_path_empty = config_path().empty(); return is_compat_mode_preferred() || !is_extra_symbol_available || is_config_path_empty; } diff --git a/cpp/include/kvikio/remote_handle.hpp b/cpp/include/kvikio/remote_handle.hpp index 8ac2798f31..e1b152b23c 100644 --- a/cpp/include/kvikio/remote_handle.hpp +++ b/cpp/include/kvikio/remote_handle.hpp @@ -41,7 +41,7 @@ namespace detail { * @note Is not thread-safe. */ class BounceBufferH2D { - CUstream _stream; // The CUDA steam to use. + CUstream _stream; // The CUDA stream to use. CUdeviceptr _dev; // The output device buffer. AllocRetain::Alloc _host_buffer; // The host buffer to bounce data on. std::ptrdiff_t _dev_offset{0}; // Number of bytes written to `_dev`. diff --git a/cpp/include/kvikio/shim/cufile.hpp b/cpp/include/kvikio/shim/cufile.hpp index 7f12c29c3d..5194d45e74 100644 --- a/cpp/include/kvikio/shim/cufile.hpp +++ b/cpp/include/kvikio/shim/cufile.hpp @@ -57,8 +57,11 @@ class cuFileAPI { decltype(cuFileDriverOpen)* DriverOpen{nullptr}; decltype(cuFileDriverClose)* DriverClose{nullptr}; + // Don't call `GetVersion` directly, use `cuFileAPI::instance().version`. + decltype(cuFileGetVersion)* GetVersion{nullptr}; + public: - bool stream_available = false; + int version{0}; private: #ifdef KVIKIO_CUFILE_FOUND @@ -88,33 +91,39 @@ class cuFileAPI { get_symbol(DriverSetMaxCacheSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxCacheSize)); get_symbol(DriverSetMaxPinnedMemSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxPinnedMemSize)); -#ifdef KVIKIO_CUFILE_BATCH_API_FOUND - get_symbol(BatchIOSetUp, lib, KVIKIO_STRINGIFY(cuFileBatchIOSetUp)); - get_symbol(BatchIOSubmit, lib, KVIKIO_STRINGIFY(cuFileBatchIOSubmit)); - get_symbol(BatchIOGetStatus, lib, KVIKIO_STRINGIFY(cuFileBatchIOGetStatus)); - get_symbol(BatchIOCancel, lib, KVIKIO_STRINGIFY(cuFileBatchIOCancel)); - get_symbol(BatchIODestroy, lib, KVIKIO_STRINGIFY(cuFileBatchIODestroy)); -#endif - -#ifdef KVIKIO_CUFILE_STREAM_API_FOUND - get_symbol(ReadAsync, lib, KVIKIO_STRINGIFY(cuFileReadAsync)); - get_symbol(WriteAsync, lib, KVIKIO_STRINGIFY(cuFileWriteAsync)); - get_symbol(StreamRegister, lib, KVIKIO_STRINGIFY(cuFileStreamRegister)); - get_symbol(StreamDeregister, lib, KVIKIO_STRINGIFY(cuFileStreamDeregister)); +#ifdef KVIKIO_CUFILE_VERSION_API_FOUND try { - void* s{}; - get_symbol(s, lib, "cuFileReadAsync"); - stream_available = true; - } catch (const std::runtime_error&) { + get_symbol(GetVersion, lib, KVIKIO_STRINGIFY(cuFileGetVersion)); + int ver; + CUfileError_t const error = GetVersion(&ver); + if (error.err == CU_FILE_SUCCESS) { version = ver; } + } catch (std::runtime_error const&) { } #endif + // Some symbols were introduced in later versions, so version guards are required. + // Note: `version` is 0 for cuFile versions prior to v1.8 because `cuFileGetVersion` + // did not exist. As a result, the batch and stream APIs are not loaded in versions + // 1.6 and 1.7, respectively, even though they are available. This trade-off is made + // for improved robustness. + if (version >= 1060) { + get_symbol(BatchIOSetUp, lib, KVIKIO_STRINGIFY(cuFileBatchIOSetUp)); + get_symbol(BatchIOSubmit, lib, KVIKIO_STRINGIFY(cuFileBatchIOSubmit)); + get_symbol(BatchIOGetStatus, lib, KVIKIO_STRINGIFY(cuFileBatchIOGetStatus)); + get_symbol(BatchIOCancel, lib, KVIKIO_STRINGIFY(cuFileBatchIOCancel)); + get_symbol(BatchIODestroy, lib, KVIKIO_STRINGIFY(cuFileBatchIODestroy)); + } + if (version >= 1070) { + get_symbol(ReadAsync, lib, KVIKIO_STRINGIFY(cuFileReadAsync)); + get_symbol(WriteAsync, lib, KVIKIO_STRINGIFY(cuFileWriteAsync)); + get_symbol(StreamRegister, lib, KVIKIO_STRINGIFY(cuFileStreamRegister)); + get_symbol(StreamDeregister, lib, KVIKIO_STRINGIFY(cuFileStreamDeregister)); + } + // cuFile is supposed to open and close the driver automatically but // because of a bug in cuFile v1.4 (CUDA v11.8) it sometimes segfaults: // . - // We use the stream API as a version indicator of cuFile since it was introduced - // in cuFile v1.7 (CUDA v12.2). - if (!stream_available) { driver_open(); } + if (version < 1050) { driver_open(); } } // Notice, we have to close the driver at program exit (if we opened it) even though we are @@ -124,7 +133,7 @@ class cuFileAPI { // [1] ~cuFileAPI() { - if (!stream_available) { driver_close(); } + if (version < 1050) { driver_close(); } } #else cuFileAPI() { throw std::runtime_error("KvikIO not compiled with cuFile.h"); } @@ -205,25 +214,49 @@ inline bool is_cufile_available() } /** - * @brief Check if cuFile's batch and stream API is available + * @brief Get cufile version (or zero if older than v1.8). * - * Technically, the batch API is available in CUDA 12.1 but since there is no good - * way to check CUDA version using the driver API, we check for the existing of the - * `cuFileReadAsync` symbol, which is defined in CUDA 12.2+. + * The version is returned as (1000*major + 10*minor). E.g., cufile v1.8.0 would + * be represented by 1080. * - * @return The boolean answer + * Notice, this is not the version of the CUDA toolkit. cufile is part of the + * toolkit but follows its own version scheme. + * + * @return The version (1000*major + 10*minor) or zero if older than 1080. */ -#if defined(KVIKIO_CUFILE_STREAM_API_FOUND) && defined(KVIKIO_CUFILE_STREAM_API_FOUND) -inline bool is_batch_and_stream_available() noexcept +#ifdef KVIKIO_CUFILE_FOUND +inline int cufile_version() { try { - return is_cufile_available() && cuFileAPI::instance().stream_available; - } catch (const std::runtime_error&) { - return false; + return cuFileAPI::instance().version; + } catch (std::runtime_error const&) { + return 0; } } #else -constexpr bool is_batch_and_stream_available() { return false; } +constexpr int cufile_version() { return 0; } #endif +/** + * @brief Check if cuFile's batch API is available. + * + * Since `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3), + * this function returns false for versions older than v1.8 even though the batch + * API became available in v1.6. + * + * @return The boolean answer + */ +inline bool is_batch_api_available() noexcept { return cufile_version() >= 1060; } + +/** + * @brief Check if cuFile's stream (async) API is available. + * + * Since `cuFileGetVersion()` first became available in cufile v1.8 (CTK v12.3), + * this function returns false for versions older than v1.8 even though the stream + * API became available in v1.7. + * + * @return The boolean answer + */ +inline bool is_stream_api_available() noexcept { return cufile_version() >= 1070; } + } // namespace kvikio diff --git a/cpp/include/kvikio/shim/cufile_h_wrapper.hpp b/cpp/include/kvikio/shim/cufile_h_wrapper.hpp index 33c3fee9a2..1c13d2d8a1 100644 --- a/cpp/include/kvikio/shim/cufile_h_wrapper.hpp +++ b/cpp/include/kvikio/shim/cufile_h_wrapper.hpp @@ -75,7 +75,7 @@ CUfileError_t cuFileDriverSetMaxPinnedMemSize(...); #endif -// If the Batch API isn't defined, we define some of the data types here. +// If some cufile APIs aren't defined, we define some of the data types here. // Notice, this doesn't need to be ABI compatible with the cufile definitions and // the lack of definitions is not a problem because the linker will never look for // these symbols because the "real" function calls are made through the shim instance. @@ -105,10 +105,13 @@ CUfileError_t cuFileBatchIOCancel(...); CUfileError_t cuFileBatchIODestroy(...); #endif -// If the Stream API isn't defined, we define some of the data types here. #ifndef KVIKIO_CUFILE_STREAM_API_FOUND CUfileError_t cuFileReadAsync(...); CUfileError_t cuFileWriteAsync(...); CUfileError_t cuFileStreamRegister(...); CUfileError_t cuFileStreamDeregister(...); #endif + +#ifndef KVIKIO_CUFILE_VERSION_API_FOUND +CUfileError_t cuFileGetVersion(...); +#endif diff --git a/python/kvikio/kvikio/_lib/cufile_driver.pyx b/python/kvikio/kvikio/_lib/cufile_driver.pyx index 29302a0104..0488eb3b20 100644 --- a/python/kvikio/kvikio/_lib/cufile_driver.pyx +++ b/python/kvikio/kvikio/_lib/cufile_driver.pyx @@ -9,10 +9,15 @@ from libcpp cimport bool cdef extern from "" nogil: + cdef int cpp_libcufile_version "kvikio::cufile_version"() except + cdef void cpp_driver_open "kvikio::cuFileAPI::instance().driver_open"() except + cdef void cpp_driver_close "kvikio::cuFileAPI::instance().driver_close"() except + +def libcufile_version() -> int: + return cpp_libcufile_version() + + def driver_open(): cpp_driver_open() diff --git a/python/kvikio/kvikio/benchmarks/utils.py b/python/kvikio/kvikio/benchmarks/utils.py index 23c7731f24..fa25c361a4 100644 --- a/python/kvikio/kvikio/benchmarks/utils.py +++ b/python/kvikio/kvikio/benchmarks/utils.py @@ -27,6 +27,7 @@ def drop_vm_cache() -> None: def pprint_sys_info() -> None: """Pretty print system information""" + version = kvikio.cufile_driver.libcufile_version() props = kvikio.cufile_driver.DriverProperties() try: import pynvml @@ -41,6 +42,10 @@ def pprint_sys_info() -> None: gpu_name = f"{pynvml.nvmlDeviceGetName(dev)} (dev #0)" mem_total = format_bytes(pynvml.nvmlDeviceGetMemoryInfo(dev).total) bar1_total = format_bytes(pynvml.nvmlDeviceGetBAR1MemoryInfo(dev).bar1Total) + if version == (0, 0): + libcufile_version = "unknown (earlier than cuFile 1.8)" + else: + libcufile_version = f"{version[0]}.{version[1]}" gds_version = "N/A (Compatibility Mode)" if props.is_gds_available: gds_version = f"v{props.major_version}.{props.minor_version}" @@ -61,6 +66,7 @@ def pprint_sys_info() -> None: print(f"GPU | {gpu_name}") print(f"GPU Memory Total | {mem_total}") print(f"BAR1 Memory Total | {bar1_total}") + print(f"libcufile version | {libcufile_version}") print(f"GDS driver | {gds_version}") print(f"GDS config.json | {gds_config_json_path}") diff --git a/python/kvikio/kvikio/cufile_driver.py b/python/kvikio/kvikio/cufile_driver.py index e78242a514..fb32be347a 100644 --- a/python/kvikio/kvikio/cufile_driver.py +++ b/python/kvikio/kvikio/cufile_driver.py @@ -2,6 +2,7 @@ # See file LICENSE for terms. import atexit +from typing import Tuple from kvikio._lib import cufile_driver # type: ignore @@ -10,6 +11,26 @@ DriverProperties = cufile_driver.DriverProperties +def libcufile_version() -> Tuple[int, int]: + """Get the libcufile version. + + Returns (0, 0) for cuFile versions prior to v1.8. + + Notes + ----- + This is not the version of the CUDA toolkit. cufile is part of the + toolkit but follows its own version scheme. + + Returns + ------- + The version as a tuple (MAJOR, MINOR). + """ + v = cufile_driver.libcufile_version() + # Convert the integer version like 1080 to (1, 8). + major, minor = divmod(v, 1000) + return (major, minor // 10) + + def driver_open() -> None: """Open the cuFile driver diff --git a/python/kvikio/tests/test_cufile_driver.py b/python/kvikio/tests/test_cufile_driver.py index 0a64bf0952..a1dc3a6454 100644 --- a/python/kvikio/tests/test_cufile_driver.py +++ b/python/kvikio/tests/test_cufile_driver.py @@ -6,6 +6,12 @@ import kvikio.cufile_driver +def test_version(): + major, minor = kvikio.cufile_driver.libcufile_version() + assert major >= 0 + assert minor >= 0 + + @pytest.mark.cufile def test_open_and_close(): kvikio.cufile_driver.driver_open()