From 3e2f78236a950aaf33214eddfc5531ab3a8427f3 Mon Sep 17 00:00:00 2001 From: TysonRayJones Date: Sun, 9 Jun 2024 12:27:07 +1000 Subject: [PATCH] added hardware querents but some GPU utilities are TODO --- quest/src/comm/communication.cpp | 105 ++++++++++++++++++++- quest/src/comm/communication.hpp | 20 ++++ quest/src/core/errors.cpp | 24 ++++- quest/src/core/errors.hpp | 16 ++++ quest/src/core/validation.cpp | 1 + quest/src/cpu/omp_subroutines.cpp | 52 +++++++++- quest/src/cpu/omp_subroutines.hpp | 12 +++ quest/src/gpu/config.cpp | 151 +++++++++++++++++++++++++++++- quest/src/gpu/config.hpp | 18 ++++ 9 files changed, 394 insertions(+), 5 deletions(-) diff --git a/quest/src/comm/communication.cpp b/quest/src/comm/communication.cpp index d71759a7..d1f61a5d 100644 --- a/quest/src/comm/communication.cpp +++ b/quest/src/comm/communication.cpp @@ -6,12 +6,31 @@ #include "quest/include/modes.h" #include "quest/include/types.h" +#include "quest/src/core/errors.hpp" + #if ENABLE_DISTRIBUTION #include #endif +/* + * WARN ABOUT CUDA-AWARENESS + */ + +#if ENABLE_DISTRIBUTION && ENABLE_GPU_ACCELERATION + #include + + #ifndef MPIX_CUDA_AWARE_SUPPORT + #warning "Could not ascertain whether MPI is CUDA-aware, so we will assume it is not. This means inter-GPU communication will be slowly routed through the CPU/RAM." + #elif !MPIX_CUDA_AWARE_SUPPORT + #warning "MPI compiler is not CUDA-aware, so inter-GPU communication will be slowly routed through the CPU/RAM" + #endif + +#endif + + + /* * MPI COMPLEX TYPE FLAG */ @@ -33,4 +52,88 @@ #define MPI_QCOMP MPI_C_LONG_DOUBLE_COMPLEX #endif -#endif \ No newline at end of file +#endif + + + +/* + * MPI ENVIRONMENT MANAGEMENT + * all of which is safely callable in non-distributed mode + */ + + +bool comm_isMpiCompiled() { + return (bool) ENABLE_DISTRIBUTION; +} + + +bool comm_isMpiGpuAware() { + + // TODO: these checks may be OpenMPI specific, so that + // non-OpenMPI MPI compilers are always dismissed as + // not being CUDA-aware. Check e.g. MPICH method! + + // definitely not GPU-aware if compiler declares it is not + #if defined(MPIX_CUDA_AWARE_SUPPORT) && ! MPIX_CUDA_AWARE_SUPPORT + return false; + #endif + + // check CUDA-awareness at run-time if we know it's principally supported + #if defined(MPIX_CUDA_AWARE_SUPPORT) + return (bool) MPIX_Query_cuda_support(); + #endif + + // if we can't ascertain CUDA-awareness, just assume no to avoid seg-fault + return false; +} + + +void comm_init() { +#if ENABLE_DISTRIBUTION + int isInit; + MPI_Initialized(&isInit); + + // gracefully handle re-initialisation + if (isInit) + error_commAlreadyInit(); + + MPI_Init(NULL, NULL); +#endif +} + + +void comm_end() { +#if ENABLE_DISTRIBUTION + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); +#endif +} + + +int comm_getRank() { +#if ENABLE_DISTRIBUTION + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + return rank; +#else + return 0; +#endif +} + + +int comm_getNumNodes() { +#if ENABLE_DISTRIBUTION + int numNodes; + MPI_Comm_size(MPI_COMM_WORLD, &numNodes); + return numNodes; +#else + return 1; +#endif +} + + +void comm_synch() { +#if ENABLE_DISTRIBUTION + MPI_Barrier(MPI_COMM_WORLD); +#endif +} \ No newline at end of file diff --git a/quest/src/comm/communication.hpp b/quest/src/comm/communication.hpp index 7564999e..2568400b 100644 --- a/quest/src/comm/communication.hpp +++ b/quest/src/comm/communication.hpp @@ -8,4 +8,24 @@ +/* + * MPI ENVIRONMENT MANAGEMENT + */ + +bool comm_isMpiCompiled(); + +bool comm_isMpiGpuAware(); + +void comm_init(); + +void comm_end(); + +int comm_getRank(); + +int comm_getNumNodes(); + +void comm_synch(); + + + #endif // COMMUNICATION_HPP \ No newline at end of file diff --git a/quest/src/core/errors.cpp b/quest/src/core/errors.cpp index 06b4e1f7..611666a1 100644 --- a/quest/src/core/errors.cpp +++ b/quest/src/core/errors.cpp @@ -49,4 +49,26 @@ void error_validationMessageVarNotSubstituted(std::string msg, std::string var) void error_validationMessageContainedUnsubstitutedVars(std::string msg) { raiseInternalError("User input validation failed and an error string was prepared. However, the message contained unexpected (and potentially ill-formed) unsubstituted variables. The message was:\n" + msg + "\n"); -} \ No newline at end of file +} + + + +/* + * COMMUNICATION ERRORS + */ + +void error_commAlreadyInit() { + + raiseInternalError("The MPI communication environment was attemptedly re-initialised, despite the QuEST environment already existing."); +} + + + +/* + * GPU ERRORS + */ + +void error_gpuQueriedButGpuNotCompiled() { + + raiseInternalError("A function attempted to query GPU properties but QuEST was not compiled with GPU acceleration enabled."); +} diff --git a/quest/src/core/errors.hpp b/quest/src/core/errors.hpp index 91327976..29151807 100644 --- a/quest/src/core/errors.hpp +++ b/quest/src/core/errors.hpp @@ -24,4 +24,20 @@ void error_validationMessageContainedUnsubstitutedVars(std::string msg); +/* + * COMMUNICATION ERRORS + */ + +void error_commAlreadyInit(); + + + +/* + * GPU ERRORS + */ + +void error_gpuQueriedButGpuNotCompiled(); + + + #endif // ERRORS_HPP \ No newline at end of file diff --git a/quest/src/core/validation.cpp b/quest/src/core/validation.cpp index bdfc73f3..5fcea0a2 100644 --- a/quest/src/core/validation.cpp +++ b/quest/src/core/validation.cpp @@ -4,6 +4,7 @@ */ #include "quest/src/core/errors.hpp" +#include "quest/src/comm/communication.hpp" #include #include diff --git a/quest/src/cpu/omp_subroutines.cpp b/quest/src/cpu/omp_subroutines.cpp index 05484da2..35395958 100644 --- a/quest/src/cpu/omp_subroutines.cpp +++ b/quest/src/cpu/omp_subroutines.cpp @@ -6,12 +6,60 @@ #include "quest/include/modes.h" #include "quest/include/types.h" +#include "quest/src/core/errors.hpp" + + +#if ENABLE_MULTITHREADING && !defined(_OPENMP) + #error "Attempted to compile in multithreaded mode without enabling OpenMP." +#endif + + #if ENABLE_MULTITHREADING #include #endif -// inform OpenMP how to reduce qcomp instances (except on MSVC compilers) + +/* + * ENABLE OPENMP REDUCTION OF qcomp (except on MSVC compilers) + */ + #if defined(ENABLE_MULTITHREADING) && !defined(_MSC_VER) #pragma omp declare reduction(+ : qcomp : omp_out += omp_in ) initializer( omp_priv = omp_orig ) -#endif \ No newline at end of file +#endif + + + +/* + * OPENMP CONFIG + */ + + +bool cpu_isOpenmpCompiled() { + return (bool) ENABLE_MULTITHREADING; +} + + +int cpu_getCurrentNumThreads() { +#if ENABLE_MULTITHREADING + int n = -1; + + #pragma omp parallel shared(n) + n = omp_get_num_threads(); + + return n; +#else + error_cpuThreadsQueriedButEnvNotMultithreaded(); + return -1; +#endif +} + + +int cpu_getNumOpenmpProcessors() { +#if ENABLE_MULTITHREADING + return omp_get_num_procs(); +#else + error_cpuThreadsQueriedButEnvNotMultithreaded(); + return -1; +#endif +} diff --git a/quest/src/cpu/omp_subroutines.hpp b/quest/src/cpu/omp_subroutines.hpp index 9aa5520f..fa27bf74 100644 --- a/quest/src/cpu/omp_subroutines.hpp +++ b/quest/src/cpu/omp_subroutines.hpp @@ -7,4 +7,16 @@ +/* + * OPENMP CONFIG + */ + +bool cpu_isOpenmpCompiled(); + +int cpu_getCurrentNumThreads(); + +int cpu_getNumOpenmpProcessors(); + + + #endif // OMP_SUBROUTINES_HPP \ No newline at end of file diff --git a/quest/src/gpu/config.cpp b/quest/src/gpu/config.cpp index 388005ed..88b877ec 100644 --- a/quest/src/gpu/config.cpp +++ b/quest/src/gpu/config.cpp @@ -1,3 +1,152 @@ /** @file * Utility functions for querying GPU hardware, used by gpu.cpp. - */ \ No newline at end of file + */ + +#include "quest/include/modes.h" + +#include "quest/src/core/errors.hpp" +#include "quest/src/comm/communication.hpp" + + +#if ENABLE_GPU_ACCELERATION && ! (defined(__NVCC__) || defined(__HIPCC__)) + #error \ + "Attempted to compile config.cpp in GPU-accelerated mode with a non-GPU compiler. "\ + "Please compile this file with a CUDA (nvcc) or ROCm (hipcc) compiler." +#endif + + +#if ENABLE_GPU_ACCELERATION + #include + #include +#endif + + + +bool gpu_isGpuCompiled() { + return (bool) ENABLE_GPU_ACCELERATION; +} + + +bool gpu_isGpuAvailable() { +#if ENABLE_GPU_ACCELERATION + + // DEBUG: cudaGetDeviceProperties is (for some reason) being auto-suffixed with _v2 + // in Cuda 12, which is the only sm=90 compatible version we can use. But then the + // function is not found in -lcuda and -lcudart, WTF + + // ask CUDA for the number of available "devices" + int numDevices; + cudaError_t successCode = cudaGetDeviceCount(&numDevices); + + // if the query failed, we can't use any devices anyway, so we abort + if (successCode != cudaSuccess) + return false; + + // so for each reported device... + for (int deviceInd=0; deviceInd < numDevices; deviceInd++) { + + // query its properties + struct cudaDeviceProp props; + successCode = cudaGetDeviceProperties(&props, deviceInd); + + // if the query failed, device is anyway unusable + if (successCode != cudaSuccess) + continue; + + // if the device is a real GPU, it's 'major' compute capability is != 9999 (meaning emulation) + if (props.major != 9999) + return true; + } + + // no non-emulation devices were found + return false; + +#else + error_gpuQueriedButGpuNotCompiled(); + return false; +#endif +} + + +bool gpu_isDirectGpuCommPossible() { +#if ENABLE_GPU_ACCELERATION + + if (!comm_isMpiGpuAware()) + return false; + + if (!gpu_isGpuAvailable()) + return false; + + // TODO: + // and are GPUs compatible? + // (the above might need to call a GPU-compiled func) + + return true; + +#else + error_gpuQueriedButGpuNotCompiled(); + return false; +#endif +} + + +int gpu_getNumberOfLocalGpus() { +#if ENABLE_GPU_ACCELERATION + + // TODO: this will over-report, since it may include virtual devices! + // see gpu_isGpuAvailable() + + int num; + cudaGetDeviceCount(&num); + return num; + +#else + error_gpuQueriedButGpuNotCompiled(); + return -1; +#endif +} + + +void gpu_bindLocalGPUsToNodes(int rank) { +#if ENABLE_GPU_ACCELERATION + + int numLocalGpus = gpu_getNumberOfLocalGpus(); + int localGpuInd = rank % numLocalGpus; + cudaSetDevice(localGpuInd); + +#else + error_gpuQueriedButGpuNotCompiled(); +#endif +} + + +size_t gpu_getCurrentAvailableMemoryInBytes() { +#if ENABLE_GPU_ACCELERATION + + // note that in distributed settings, all GPUs + // are being simultaneously queried, and it is + // possible their values differ per-node + + size_t free, total; + cudaMemGetInfo(&free, &total); + return free; + +#else + error_gpuQueriedButGpuNotCompiled(); + return 0; +#endif +} + + +size_t gpu_getTotalMemoryInBytes() { +#if ENABLE_GPU_ACCELERATION + + size_t free, total; + cudaMemGetInfo(&free, &total); + return total; + +#else + error_gpuQueriedButGpuNotCompiled(); + return 0; +#endif +} \ No newline at end of file diff --git a/quest/src/gpu/config.hpp b/quest/src/gpu/config.hpp index 5675da3f..1fcf934b 100644 --- a/quest/src/gpu/config.hpp +++ b/quest/src/gpu/config.hpp @@ -5,6 +5,24 @@ #ifndef CONFIG_HPP #define CONFIG_HPP +#include + + + +bool gpu_isGpuCompiled(); + +bool gpu_isGpuAvailable(); + +bool gpu_isDirectGpuCommPossible(); + +int gpu_getNumberOfLocalGpus(); + +void gpu_bindLocalGPUsToNodes(int rank); + +size_t gpu_getCurrentAvailableMemoryInBytes(); + +size_t gpu_getTotalMemoryInBytes(); + #endif // CONFIG_HPP \ No newline at end of file