diff --git a/quest/src/comm/communication.cpp b/quest/src/comm/communication.cpp
index d71759a7..d1f61a5d 100644
--- a/quest/src/comm/communication.cpp
+++ b/quest/src/comm/communication.cpp
@@ -6,12 +6,31 @@
 #include "quest/include/modes.h"
 #include "quest/include/types.h"
 
+#include "quest/src/core/errors.hpp"
+
 #if ENABLE_DISTRIBUTION
     #include <mpi.h>
 #endif
 
 
 
+/*
+ * WARN ABOUT CUDA-AWARENESS
+ */
+
+#if ENABLE_DISTRIBUTION && ENABLE_GPU_ACCELERATION
+    #include <mpi-ext.h>
+
+    #ifndef MPIX_CUDA_AWARE_SUPPORT
+        #warning "Could not ascertain whether MPI is CUDA-aware, so we will assume it is not. This means inter-GPU communication will be slowly routed through the CPU/RAM."
+    #elif !MPIX_CUDA_AWARE_SUPPORT
+        #warning "MPI compiler is not CUDA-aware, so inter-GPU communication will be slowly routed through the CPU/RAM"
+    #endif
+
+#endif
+
+
+
 /*
  * MPI COMPLEX TYPE FLAG
  */
@@ -33,4 +52,88 @@
         #define MPI_QCOMP MPI_C_LONG_DOUBLE_COMPLEX
     #endif
 
-#endif
\ No newline at end of file
+#endif
+
+
+
+/*
+ * MPI ENVIRONMENT MANAGEMENT
+ * all of which is safely callable in non-distributed mode
+ */
+
+
+bool comm_isMpiCompiled() {
+    return (bool) ENABLE_DISTRIBUTION;
+}
+
+
+bool comm_isMpiGpuAware() {
+
+    // TODO: these checks may be OpenMPI specific, so that
+    // non-OpenMPI MPI compilers are always dismissed as
+    // not being CUDA-aware. Check e.g. MPICH method!
+
+    // definitely not GPU-aware if compiler declares it is not
+    #if defined(MPIX_CUDA_AWARE_SUPPORT) && ! MPIX_CUDA_AWARE_SUPPORT
+        return false;
+    #endif
+
+    // check CUDA-awareness at run-time if we know it's principally supported
+    #if defined(MPIX_CUDA_AWARE_SUPPORT)
+        return (bool) MPIX_Query_cuda_support();
+    #endif
+
+    // if we can't ascertain CUDA-awareness, just assume no to avoid seg-fault
+    return false;
+}
+
+
+void comm_init() {
+#if ENABLE_DISTRIBUTION
+    int isInit;
+    MPI_Initialized(&isInit);
+
+    // gracefully handle re-initialisation
+    if (isInit)
+        error_commAlreadyInit();
+    
+    MPI_Init(NULL, NULL);
+#endif
+}
+
+
+void comm_end() {
+#if ENABLE_DISTRIBUTION
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Finalize();
+#endif
+}
+
+
+int comm_getRank() {
+#if ENABLE_DISTRIBUTION
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    return rank;
+#else
+    return 0;
+#endif
+}
+
+
+int comm_getNumNodes() {
+#if ENABLE_DISTRIBUTION
+    int numNodes;
+    MPI_Comm_size(MPI_COMM_WORLD, &numNodes);
+    return numNodes;
+#else
+    return 1;
+#endif
+}
+
+
+void comm_synch() {
+#if ENABLE_DISTRIBUTION
+    MPI_Barrier(MPI_COMM_WORLD);
+#endif
+}
\ No newline at end of file
diff --git a/quest/src/comm/communication.hpp b/quest/src/comm/communication.hpp
index 7564999e..2568400b 100644
--- a/quest/src/comm/communication.hpp
+++ b/quest/src/comm/communication.hpp
@@ -8,4 +8,24 @@
 
 
 
+/*
+ * MPI ENVIRONMENT MANAGEMENT
+ */
+
+bool comm_isMpiCompiled();
+
+bool comm_isMpiGpuAware();
+
+void comm_init();
+
+void comm_end();
+
+int comm_getRank();
+
+int comm_getNumNodes();
+
+void comm_synch();
+
+
+
 #endif // COMMUNICATION_HPP
\ No newline at end of file
diff --git a/quest/src/core/errors.cpp b/quest/src/core/errors.cpp
index 06b4e1f7..611666a1 100644
--- a/quest/src/core/errors.cpp
+++ b/quest/src/core/errors.cpp
@@ -49,4 +49,26 @@ void error_validationMessageVarNotSubstituted(std::string msg, std::string var)
 void error_validationMessageContainedUnsubstitutedVars(std::string msg) {
 
     raiseInternalError("User input validation failed and an error string was prepared. However, the message contained unexpected (and potentially ill-formed) unsubstituted variables. The message was:\n" + msg + "\n");
-}
\ No newline at end of file
+}
+
+
+
+/*
+ * COMMUNICATION ERRORS
+ */
+
+void error_commAlreadyInit() {
+
+    raiseInternalError("The MPI communication environment was attemptedly re-initialised, despite the QuEST environment already existing.");
+}
+
+
+
+/*
+ * GPU ERRORS
+ */
+
+void error_gpuQueriedButGpuNotCompiled() {
+
+    raiseInternalError("A function attempted to query GPU properties but QuEST was not compiled with GPU acceleration enabled.");
+}
diff --git a/quest/src/core/errors.hpp b/quest/src/core/errors.hpp
index 91327976..29151807 100644
--- a/quest/src/core/errors.hpp
+++ b/quest/src/core/errors.hpp
@@ -24,4 +24,20 @@ void error_validationMessageContainedUnsubstitutedVars(std::string msg);
 
 
 
+/*
+ * COMMUNICATION ERRORS
+ */
+
+void error_commAlreadyInit();
+
+
+
+/*
+ * GPU ERRORS
+ */
+
+void error_gpuQueriedButGpuNotCompiled();
+
+
+
 #endif // ERRORS_HPP
\ No newline at end of file
diff --git a/quest/src/core/validation.cpp b/quest/src/core/validation.cpp
index bdfc73f3..5fcea0a2 100644
--- a/quest/src/core/validation.cpp
+++ b/quest/src/core/validation.cpp
@@ -4,6 +4,7 @@
  */
 
 #include "quest/src/core/errors.hpp"
+#include "quest/src/comm/communication.hpp"
 
 #include <iostream>
 #include <cstdlib>
diff --git a/quest/src/cpu/omp_subroutines.cpp b/quest/src/cpu/omp_subroutines.cpp
index 05484da2..35395958 100644
--- a/quest/src/cpu/omp_subroutines.cpp
+++ b/quest/src/cpu/omp_subroutines.cpp
@@ -6,12 +6,60 @@
 #include "quest/include/modes.h"
 #include "quest/include/types.h"
 
+#include "quest/src/core/errors.hpp"
+
+
+#if ENABLE_MULTITHREADING && !defined(_OPENMP)
+    #error "Attempted to compile in multithreaded mode without enabling OpenMP."
+#endif
+
+
 #if ENABLE_MULTITHREADING
     #include <omp.h>
 #endif
 
 
-// inform OpenMP how to reduce qcomp instances (except on MSVC compilers)
+
+/*
+ * ENABLE OPENMP REDUCTION OF qcomp (except on MSVC compilers)
+ */
+
 #if defined(ENABLE_MULTITHREADING) && !defined(_MSC_VER)
      #pragma omp declare reduction(+ : qcomp : omp_out += omp_in ) initializer( omp_priv = omp_orig )
-#endif
\ No newline at end of file
+#endif
+
+
+
+/*
+ * OPENMP CONFIG
+ */
+
+
+bool cpu_isOpenmpCompiled() {
+    return (bool) ENABLE_MULTITHREADING;
+}
+
+
+int cpu_getCurrentNumThreads() {
+#if ENABLE_MULTITHREADING
+    int n = -1;
+
+    #pragma omp parallel shared(n)
+    n = omp_get_num_threads();
+
+    return n;
+#else
+    error_cpuThreadsQueriedButEnvNotMultithreaded();
+    return -1;
+#endif
+}
+
+
+int cpu_getNumOpenmpProcessors() {
+#if ENABLE_MULTITHREADING
+    return omp_get_num_procs();
+#else
+    error_cpuThreadsQueriedButEnvNotMultithreaded();
+    return -1;
+#endif
+}
diff --git a/quest/src/cpu/omp_subroutines.hpp b/quest/src/cpu/omp_subroutines.hpp
index 9aa5520f..fa27bf74 100644
--- a/quest/src/cpu/omp_subroutines.hpp
+++ b/quest/src/cpu/omp_subroutines.hpp
@@ -7,4 +7,16 @@
 
 
 
+/*
+ * OPENMP CONFIG
+ */
+
+bool cpu_isOpenmpCompiled();
+
+int cpu_getCurrentNumThreads();
+
+int cpu_getNumOpenmpProcessors();
+
+
+
 #endif // OMP_SUBROUTINES_HPP
\ No newline at end of file
diff --git a/quest/src/gpu/config.cpp b/quest/src/gpu/config.cpp
index 388005ed..88b877ec 100644
--- a/quest/src/gpu/config.cpp
+++ b/quest/src/gpu/config.cpp
@@ -1,3 +1,152 @@
 /** @file
  * Utility functions for querying GPU hardware, used by gpu.cpp.
- */
\ No newline at end of file
+ */
+
+#include "quest/include/modes.h"
+
+#include "quest/src/core/errors.hpp"
+#include "quest/src/comm/communication.hpp"
+
+
+#if ENABLE_GPU_ACCELERATION && ! (defined(__NVCC__) || defined(__HIPCC__))
+    #error \
+        "Attempted to compile config.cpp in GPU-accelerated mode with a non-GPU compiler. "\
+        "Please compile this file with a CUDA (nvcc) or ROCm (hipcc) compiler."
+#endif
+
+
+#if ENABLE_GPU_ACCELERATION
+    #include <cuda.h>
+    #include <cuda_runtime.h>
+#endif
+
+
+
+bool gpu_isGpuCompiled() {
+    return (bool) ENABLE_GPU_ACCELERATION;
+}
+
+
+bool gpu_isGpuAvailable() {
+#if ENABLE_GPU_ACCELERATION
+
+    // DEBUG: cudaGetDeviceProperties is (for some reason) being auto-suffixed with _v2
+    // in Cuda 12, which is the only sm=90 compatible version we can use. But then the
+    // function is not found in -lcuda and -lcudart, WTF
+
+    // ask CUDA for the number of available "devices"
+    int numDevices;
+    cudaError_t successCode = cudaGetDeviceCount(&numDevices);
+
+    // if the query failed, we can't use any devices anyway, so we abort
+    if (successCode != cudaSuccess)
+        return false;
+
+    // so for each reported device...
+    for (int deviceInd=0; deviceInd < numDevices; deviceInd++) {
+
+        // query its properties
+        struct cudaDeviceProp props;
+        successCode = cudaGetDeviceProperties(&props, deviceInd);
+
+        // if the query failed, device is anyway unusable
+        if (successCode != cudaSuccess) 
+            continue;
+
+        // if the device is a real GPU, it's 'major' compute capability is != 9999 (meaning emulation)
+        if (props.major != 9999)
+            return true;
+    }
+
+    // no non-emulation devices were found
+    return false;
+
+#else
+    error_gpuQueriedButGpuNotCompiled();
+    return false;
+#endif
+}
+
+
+bool gpu_isDirectGpuCommPossible() {
+#if ENABLE_GPU_ACCELERATION
+
+    if (!comm_isMpiGpuAware())
+        return false;
+
+    if (!gpu_isGpuAvailable())
+        return false;
+
+    // TODO:
+    // and are GPUs compatible?
+    // (the above might need to call a GPU-compiled func)
+
+    return true;
+
+#else
+    error_gpuQueriedButGpuNotCompiled();
+    return false;
+#endif
+}
+
+
+int gpu_getNumberOfLocalGpus() {
+#if ENABLE_GPU_ACCELERATION
+
+    // TODO: this will over-report, since it may include virtual devices!
+    // see gpu_isGpuAvailable()
+
+    int num;
+    cudaGetDeviceCount(&num);
+    return num;
+
+#else
+    error_gpuQueriedButGpuNotCompiled();
+    return -1;
+#endif
+}
+
+
+void gpu_bindLocalGPUsToNodes(int rank) {
+#if ENABLE_GPU_ACCELERATION
+
+    int numLocalGpus = gpu_getNumberOfLocalGpus();
+    int localGpuInd = rank % numLocalGpus;
+    cudaSetDevice(localGpuInd);
+
+#else
+    error_gpuQueriedButGpuNotCompiled();
+#endif 
+}
+
+
+size_t gpu_getCurrentAvailableMemoryInBytes() {
+#if ENABLE_GPU_ACCELERATION
+
+    // note that in distributed settings, all GPUs
+    // are being simultaneously queried, and it is
+    // possible their values differ per-node
+
+    size_t free, total;
+    cudaMemGetInfo(&free, &total);
+    return free;
+
+#else
+    error_gpuQueriedButGpuNotCompiled();
+    return 0;
+#endif
+}
+
+
+size_t gpu_getTotalMemoryInBytes() {
+#if ENABLE_GPU_ACCELERATION
+
+    size_t free, total;
+    cudaMemGetInfo(&free, &total);
+    return total;
+
+#else
+    error_gpuQueriedButGpuNotCompiled();
+    return 0;
+#endif
+}
\ No newline at end of file
diff --git a/quest/src/gpu/config.hpp b/quest/src/gpu/config.hpp
index 5675da3f..1fcf934b 100644
--- a/quest/src/gpu/config.hpp
+++ b/quest/src/gpu/config.hpp
@@ -5,6 +5,24 @@
 #ifndef CONFIG_HPP
 #define CONFIG_HPP
 
+#include <cstdlib>
+
+
+
+bool gpu_isGpuCompiled();
+
+bool gpu_isGpuAvailable();
+
+bool gpu_isDirectGpuCommPossible();
+
+int gpu_getNumberOfLocalGpus();
+
+void gpu_bindLocalGPUsToNodes(int rank);
+
+size_t gpu_getCurrentAvailableMemoryInBytes();
+
+size_t gpu_getTotalMemoryInBytes();
+
 
 
 #endif // CONFIG_HPP
\ No newline at end of file