From 4122a6ed4d6cd88305112c4e34624f3cb6549c3f Mon Sep 17 00:00:00 2001
From: TysonRayJones <tyson.jones.input@gmail.com>
Date: Sun, 9 Jun 2024 10:59:13 +1000
Subject: [PATCH] added scalar types

- `qreal`
- `qindex`
- `qcomp`

The latter is a C/C++ agnostic complex scalar compatible with OpenMP, MPI, Thrust, cuQuantum, and given a CUDA-compatible wrapper

Co-Authored-By: Ali Rezaei <Ali.Rezaei@ed.ac.uk>
---
 quest/include/precision.h         | 84 ++++++++++++++++++++++++++++++
 quest/include/types.h             | 86 +++++++++++++++++++++++++++++++
 quest/src/comm/communication.cpp  | 34 +++++++++++-
 quest/src/cpu/omp_subroutines.cpp | 15 +++++-
 quest/src/gpu/kernels.hpp         | 42 +++++++++++++++
 5 files changed, 259 insertions(+), 2 deletions(-)
diff --git a/quest/include/precision.h b/quest/include/precision.h
index b353e2ac..0c754f50 100644
--- a/quest/include/precision.h
+++ b/quest/include/precision.h
@@ -5,6 +5,90 @@
 #ifndef PRECISION_H
 #define PRECISION_H
 
+#include "quest/include/modes.h"
+
+
+
+/*
+ * STATE-INDEXING TYPE
+ */
+
+// can be (for example) int, long, long long, unsigned, long unsigned, long long unsigned
+#define INDEX_TYPE long long unsigned
+
+
+
+/*
+ * RE-CONFIGURABLE FLOATING-POINT PRECISION
+ */
+
+// assume double precision as default
+#ifndef FLOAT_PRECISION
+    #define FLOAT_PRECISION 2
+#endif
+
+// validate precision is 1 (float), 2 (double) or 4 (long double)
+#if ! (FLOAT_PRECISION == 1 || FLOAT_PRECISION == 2 || FLOAT_PRECISION == 4)
+    #error "FLOAT_PRECISION must be 1 (float), 2 (double) or 4 (long double)"
+#endif 
+
+// infer floating-point type from precision
+#if FLOAT_PRECISION == 1
+    #define FLOAT_TYPE float
+#elif FLOAT_PRECISION == 2
+    #define FLOAT_TYPE double
+#elif FLOAT_PRECISION == 4
+    #define FLOAT_TYPE long double
+#endif
+
+
+
+/*
+ * CHECK PRECISION TYPES ARE COMPATIBLE WITH DEPLOYMENT
+ */
+
+#if ENABLE_GPU_ACCELERATION && (FLOAT_PRECISION == 4)
+    #error "A quad floating-point precision (FLOAT_PRECISION=4, i.e. long double) is not supported by GPU deployment"
+#endif
+
+// Windows MSVC OpenMP doesn't permit operator overloading of the qcomp type,
+// as is necessary when performing multithreaded reductions of amplitudes.
+// We could support MSVC by separately reducing the real and imaginary components,
+// but Bill Gates would have to wrestle me into submission.
+#if ENABLE_MULTITHREADING && defined(_MSC_VER)
+    #error "Cannot use multi-threading on Windows"
+#endif
+
+
+
+/*
+ * MACROS FOR PRINTING MULTI-WORD MACROS
+ */
+
+#define GET_STR_INTERNAL(x) #x
+#define GET_STR(x) GET_STR_INTERNAL(x)
+
+
+
+/*
+ * RE-CONFIGURABLE VALIDATION PRECISION
+ */
+
+#ifndef VALIDATION_EPSILON
+
+    #if (FLOAT_PRECISION == 1)
+        #define VALIDATION_EPSILON 1E-5
+
+    #elif (FLOAT_PRECISION == 2)
+        #define VALIDATION_EPSILON 1E-13
+
+    #elif (FLOAT_PRECISION == 4)
+        #define VALIDATION_EPSILON 1E-14
+
+    #endif
+
+#endif
+
 
 
 #endif // PRECISION_H
\ No newline at end of file
diff --git a/quest/include/types.h b/quest/include/types.h
index 835ab149..46125504 100644
--- a/quest/include/types.h
+++ b/quest/include/types.h
@@ -5,6 +5,92 @@
 #ifndef TYPES_H
 #define TYPES_H
 
+#include "quest/include/modes.h"
+#include "quest/include/precision.h"
+
+
+
+/*
+ * REAL TYPE ALIASES
+ */
+
+typedef FLOAT_TYPE qreal;
+typedef INDEX_TYPE qindex;
+
+
+
+/*
+ * COMPLEX TYPE ALIAS
+ */
+
+// when C++ parses this header during backend or C++ user-code compilation...
+#ifdef __cplusplus
+
+    // resolve qcomp as the standard C++ complex type
+    #include <complex>
+    typedef std::complex<FLOAT_TYPE> qcomp;
+
+// when C parses this header, during compilation of C user code...
+#else
+
+    // pretend that the API's qcomp is the C complex type
+    #include <complex.h>
+
+    // which is either MSVC's custom C complex...
+    #ifdef _MSC_VER
+
+        #if (FLOAT_PRECISION == 1)
+            typedef _Fcomplex qcomp;
+
+        #elif (FLOAT_PRECISION == 2)
+            typedef _Dcomplex qcomp;
+
+        #elif (FLOAT_PRECISION == 4)
+            typedef _Lcomplex qcomp;
+
+        #endif
+
+    // or that used by GNU & Clang
+    #else
+        typedef FLOAT_TYPE _Complex qcomp;
+
+    #endif
+
+#endif
+
+
+
+/*
+ * COMPLEX TYPE OVERLOADS
+ */
+
+#ifdef __cplusplus
+
+    // enable C++ literals (requires C++14)
+    using namespace std::complex_literals;
+
+    // qcomp() C++ instantiation is already enabled
+
+#else
+
+    #ifdef _MSC_VER
+
+        // MSVC C literals are literally impossible
+
+        // enable qcomp() C instantiation
+        #define qcomp(re,im) = (qcomp) {(re), (im)}
+
+    #else
+
+        // C literals are already enabled (requires C99)
+
+        // enable qcomp() C instantiation
+        #define qcomp(re,im) ( (qreal) (re) + I*((qreal) (im)) )
+
+    #endif
+
+#endif
+
 
 
 #endif // TYPES_H
\ No newline at end of file
diff --git a/quest/src/comm/communication.cpp b/quest/src/comm/communication.cpp
index 273a4f56..d71759a7 100644
--- a/quest/src/comm/communication.cpp
+++ b/quest/src/comm/communication.cpp
@@ -1,4 +1,36 @@
 /** @file
  * Functions for communicating and exchanging amplitudes between compute
  * nodes, when running in distributed mode, using the C MPI standard.
- */
\ No newline at end of file
+ */
+
+#include "quest/include/modes.h"
+#include "quest/include/types.h"
+
+#if ENABLE_DISTRIBUTION
+    #include <mpi.h>
+#endif
+
+
+
+/*
+ * MPI COMPLEX TYPE FLAG
+ */
+
+#if ENABLE_DISTRIBUTION
+
+    #if (FLOAT_PRECISION == 1)
+        #define MPI_QCOMP MPI_CXX_FLOAT_COMPLEX
+
+    #elif (FLOAT_PRECISION == 2)
+        #define MPI_QCOMP MPI_CXX_DOUBLE_COMPLEX
+
+    // sometimes 'MPI_CXX_LONG_DOUBLE_COMPLEX' isn't defined
+    #elif (FLOAT_PRECISION == 4) && defined(MPI_CXX_LONG_DOUBLE_COMPLEX)
+        #define MPI_QCOMP MPI_CXX_LONG_DOUBLE_COMPLEX
+
+    // in that case, fall back to the C type (identical memory layout)
+    #else
+        #define MPI_QCOMP MPI_C_LONG_DOUBLE_COMPLEX
+    #endif
+
+#endif
\ No newline at end of file
diff --git a/quest/src/cpu/omp_subroutines.cpp b/quest/src/cpu/omp_subroutines.cpp
index 1063d48a..05484da2 100644
--- a/quest/src/cpu/omp_subroutines.cpp
+++ b/quest/src/cpu/omp_subroutines.cpp
@@ -1,4 +1,17 @@
 /** @file
  * CPU OpenMP-accelerated definitions of the subroutines called by
  * accelerator.cpp. 
- */
\ No newline at end of file
+ */
+
+#include "quest/include/modes.h"
+#include "quest/include/types.h"
+
+#if ENABLE_MULTITHREADING
+    #include <omp.h>
+#endif
+
+
+// inform OpenMP how to reduce qcomp instances (except on MSVC compilers)
+#if defined(ENABLE_MULTITHREADING) && !defined(_MSC_VER)
+     #pragma omp declare reduction(+ : qcomp : omp_out += omp_in ) initializer( omp_priv = omp_orig )
+#endif
\ No newline at end of file
diff --git a/quest/src/gpu/kernels.hpp b/quest/src/gpu/kernels.hpp
index db1c6380..738fd707 100644
--- a/quest/src/gpu/kernels.hpp
+++ b/quest/src/gpu/kernels.hpp
@@ -7,6 +7,48 @@
 #ifndef KERNELS_HPP
 #define KERNELS_HPP
 
+#include "quest/include/modes.h"
+#include "quest/include/types.h"
+
+#include <cuComplex.h>
+
+
+
+/*
+ * CREATE CUDA-COMPATIBLE QCOMP ALIAS
+ */
+
+#if (FLOAT_PRECISION == 1)
+    typedef cuFloatComplex cu_qcomp;
+
+#elif (FLOAT_PRECISION == 2)
+    typedef cuDoubleComplex cu_qcomp;
+
+#else
+    #error "Build bug; precision.h should have prevented non-float non-double qcomp precision on GPU."
+
+#endif
+
+
+
+/*
+ * CREATE CUDA-COMPATIBLE QCOMP OVERLOADS
+ */
+
+__host__ __device__ inline cu_qcomp operator + (const cu_qcomp& a, const cu_qcomp& b) {
+    cu_qcomp res;
+    res.x = a.x + b.x;
+    res.y = a.y + b.y;
+    return res;
+}
+
+__host__ __device__ inline cu_qcomp operator * (const cu_qcomp& a, const cu_qcomp& b) {
+    cu_qcomp res;
+    res.x = a.x * b.x - a.y * b.y;
+    res.y = a.x * b.y + a.y * b.x;
+    return res;
+}
+
 
 
 #endif // KERNELS_HPP
\ No newline at end of file