From 4122a6ed4d6cd88305112c4e34624f3cb6549c3f Mon Sep 17 00:00:00 2001 From: TysonRayJones Date: Sun, 9 Jun 2024 10:59:13 +1000 Subject: [PATCH] added scalar types - `qreal` - `qindex` - `qcomp` The latter is a C/C++ agnostic complex scalar compatible with OpenMP, MPI, Thrust, cuQuantum, and given a CUDA-compatible wrapper Co-Authored-By: Ali Rezaei --- quest/include/precision.h | 84 ++++++++++++++++++++++++++++++ quest/include/types.h | 86 +++++++++++++++++++++++++++++++ quest/src/comm/communication.cpp | 34 +++++++++++- quest/src/cpu/omp_subroutines.cpp | 15 +++++- quest/src/gpu/kernels.hpp | 42 +++++++++++++++ 5 files changed, 259 insertions(+), 2 deletions(-) diff --git a/quest/include/precision.h b/quest/include/precision.h index b353e2ac..0c754f50 100644 --- a/quest/include/precision.h +++ b/quest/include/precision.h @@ -5,6 +5,90 @@ #ifndef PRECISION_H #define PRECISION_H +#include "quest/include/modes.h" + + + +/* + * STATE-INDEXING TYPE + */ + +// can be (for example) int, long, long long, unsigned, long unsigned, long long unsigned +#define INDEX_TYPE long long unsigned + + + +/* + * RE-CONFIGURABLE FLOATING-POINT PRECISION + */ + +// assume double precision as default +#ifndef FLOAT_PRECISION + #define FLOAT_PRECISION 2 +#endif + +// validate precision is 1 (float), 2 (double) or 4 (long double) +#if ! (FLOAT_PRECISION == 1 || FLOAT_PRECISION == 2 || FLOAT_PRECISION == 4) + #error "FLOAT_PRECISION must be 1 (float), 2 (double) or 4 (long double)" +#endif + +// infer floating-point type from precision +#if FLOAT_PRECISION == 1 + #define FLOAT_TYPE float +#elif FLOAT_PRECISION == 2 + #define FLOAT_TYPE double +#elif FLOAT_PRECISION == 4 + #define FLOAT_TYPE long double +#endif + + + +/* + * CHECK PRECISION TYPES ARE COMPATIBLE WITH DEPLOYMENT + */ + +#if ENABLE_GPU_ACCELERATION && (FLOAT_PRECISION == 4) + #error "A quad floating-point precision (FLOAT_PRECISION=4, i.e. long double) is not supported by GPU deployment" +#endif + +// Windows MSVC OpenMP doesn't permit operator overloading of the qcomp type, +// as is necessary when performing multithreaded reductions of amplitudes. +// We could support MSVC by separately reducing the real and imaginary components, +// but Bill Gates would have to wrestle me into submission. +#if ENABLE_MULTITHREADING && defined(_MSC_VER) + #error "Cannot use multi-threading on Windows" +#endif + + + +/* + * MACROS FOR PRINTING MULTI-WORD MACROS + */ + +#define GET_STR_INTERNAL(x) #x +#define GET_STR(x) GET_STR_INTERNAL(x) + + + +/* + * RE-CONFIGURABLE VALIDATION PRECISION + */ + +#ifndef VALIDATION_EPSILON + + #if (FLOAT_PRECISION == 1) + #define VALIDATION_EPSILON 1E-5 + + #elif (FLOAT_PRECISION == 2) + #define VALIDATION_EPSILON 1E-13 + + #elif (FLOAT_PRECISION == 4) + #define VALIDATION_EPSILON 1E-14 + + #endif + +#endif + #endif // PRECISION_H \ No newline at end of file diff --git a/quest/include/types.h b/quest/include/types.h index 835ab149..46125504 100644 --- a/quest/include/types.h +++ b/quest/include/types.h @@ -5,6 +5,92 @@ #ifndef TYPES_H #define TYPES_H +#include "quest/include/modes.h" +#include "quest/include/precision.h" + + + +/* + * REAL TYPE ALIASES + */ + +typedef FLOAT_TYPE qreal; +typedef INDEX_TYPE qindex; + + + +/* + * COMPLEX TYPE ALIAS + */ + +// when C++ parses this header during backend or C++ user-code compilation... +#ifdef __cplusplus + + // resolve qcomp as the standard C++ complex type + #include + typedef std::complex qcomp; + +// when C parses this header, during compilation of C user code... +#else + + // pretend that the API's qcomp is the C complex type + #include + + // which is either MSVC's custom C complex... + #ifdef _MSC_VER + + #if (FLOAT_PRECISION == 1) + typedef _Fcomplex qcomp; + + #elif (FLOAT_PRECISION == 2) + typedef _Dcomplex qcomp; + + #elif (FLOAT_PRECISION == 4) + typedef _Lcomplex qcomp; + + #endif + + // or that used by GNU & Clang + #else + typedef FLOAT_TYPE _Complex qcomp; + + #endif + +#endif + + + +/* + * COMPLEX TYPE OVERLOADS + */ + +#ifdef __cplusplus + + // enable C++ literals (requires C++14) + using namespace std::complex_literals; + + // qcomp() C++ instantiation is already enabled + +#else + + #ifdef _MSC_VER + + // MSVC C literals are literally impossible + + // enable qcomp() C instantiation + #define qcomp(re,im) = (qcomp) {(re), (im)} + + #else + + // C literals are already enabled (requires C99) + + // enable qcomp() C instantiation + #define qcomp(re,im) ( (qreal) (re) + I*((qreal) (im)) ) + + #endif + +#endif + #endif // TYPES_H \ No newline at end of file diff --git a/quest/src/comm/communication.cpp b/quest/src/comm/communication.cpp index 273a4f56..d71759a7 100644 --- a/quest/src/comm/communication.cpp +++ b/quest/src/comm/communication.cpp @@ -1,4 +1,36 @@ /** @file * Functions for communicating and exchanging amplitudes between compute * nodes, when running in distributed mode, using the C MPI standard. - */ \ No newline at end of file + */ + +#include "quest/include/modes.h" +#include "quest/include/types.h" + +#if ENABLE_DISTRIBUTION + #include +#endif + + + +/* + * MPI COMPLEX TYPE FLAG + */ + +#if ENABLE_DISTRIBUTION + + #if (FLOAT_PRECISION == 1) + #define MPI_QCOMP MPI_CXX_FLOAT_COMPLEX + + #elif (FLOAT_PRECISION == 2) + #define MPI_QCOMP MPI_CXX_DOUBLE_COMPLEX + + // sometimes 'MPI_CXX_LONG_DOUBLE_COMPLEX' isn't defined + #elif (FLOAT_PRECISION == 4) && defined(MPI_CXX_LONG_DOUBLE_COMPLEX) + #define MPI_QCOMP MPI_CXX_LONG_DOUBLE_COMPLEX + + // in that case, fall back to the C type (identical memory layout) + #else + #define MPI_QCOMP MPI_C_LONG_DOUBLE_COMPLEX + #endif + +#endif \ No newline at end of file diff --git a/quest/src/cpu/omp_subroutines.cpp b/quest/src/cpu/omp_subroutines.cpp index 1063d48a..05484da2 100644 --- a/quest/src/cpu/omp_subroutines.cpp +++ b/quest/src/cpu/omp_subroutines.cpp @@ -1,4 +1,17 @@ /** @file * CPU OpenMP-accelerated definitions of the subroutines called by * accelerator.cpp. - */ \ No newline at end of file + */ + +#include "quest/include/modes.h" +#include "quest/include/types.h" + +#if ENABLE_MULTITHREADING + #include +#endif + + +// inform OpenMP how to reduce qcomp instances (except on MSVC compilers) +#if defined(ENABLE_MULTITHREADING) && !defined(_MSC_VER) + #pragma omp declare reduction(+ : qcomp : omp_out += omp_in ) initializer( omp_priv = omp_orig ) +#endif \ No newline at end of file diff --git a/quest/src/gpu/kernels.hpp b/quest/src/gpu/kernels.hpp index db1c6380..738fd707 100644 --- a/quest/src/gpu/kernels.hpp +++ b/quest/src/gpu/kernels.hpp @@ -7,6 +7,48 @@ #ifndef KERNELS_HPP #define KERNELS_HPP +#include "quest/include/modes.h" +#include "quest/include/types.h" + +#include + + + +/* + * CREATE CUDA-COMPATIBLE QCOMP ALIAS + */ + +#if (FLOAT_PRECISION == 1) + typedef cuFloatComplex cu_qcomp; + +#elif (FLOAT_PRECISION == 2) + typedef cuDoubleComplex cu_qcomp; + +#else + #error "Build bug; precision.h should have prevented non-float non-double qcomp precision on GPU." + +#endif + + + +/* + * CREATE CUDA-COMPATIBLE QCOMP OVERLOADS + */ + +__host__ __device__ inline cu_qcomp operator + (const cu_qcomp& a, const cu_qcomp& b) { + cu_qcomp res; + res.x = a.x + b.x; + res.y = a.y + b.y; + return res; +} + +__host__ __device__ inline cu_qcomp operator * (const cu_qcomp& a, const cu_qcomp& b) { + cu_qcomp res; + res.x = a.x * b.x - a.y * b.y; + res.y = a.x * b.y + a.y * b.x; + return res; +} + #endif // KERNELS_HPP \ No newline at end of file