MGHPhysicsResearch · ferdymercury · Dec 7, 2022 · Dec 7, 2022 · Dec 7, 2022 · Dec 7, 2022
diff --git a/.gitignore b/.gitignore
@@ -1,34 +1,2 @@
-# Prerequisites
-*.d
-
-# Compiled Object files
-*.slo
-*.lo
-*.o
-*.obj
-
-# Precompiled Headers
-*.gch
-*.pch
-
-# Compiled Dynamic libraries
-*.so
-*.dylib
-*.dll
-
-# Fortran module files
-*.mod
-*.smod
-
-# Compiled Static libraries
-*.lai
-*.la
-*.a
-*.lib
-
-# Executables
-*.exe
-*.out
-*.app
-
-.clang-format
+build/
+CMakeLists.txt.user*
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,77 @@
+cmake_minimum_required(VERSION 3.12)
+project(moqui)
+
+find_package(GDCM REQUIRED)
+find_package(ZLIB REQUIRED)
+
+if (APPLE)
+    find_library(COREFOUNDATION_LIBRARY CoreFoundation)
+endif ()
+
+### Set release mode as default build type
+set(default_build_type "Release")
+if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+  message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
+  set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Choose the type of build." FORCE)
+  # Set the possible values of build type for cmake-gui
+  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release")# "MinSizeRel" "RelWithDebInfo"
+endif()
+
+if (CMAKE_BUILD_TYPE STREQUAL Debug)
+add_definitions(-D__PHYSICS_DEBUG__)
+endif()
+
+set(USE_CUDA ON)
+
+# The extension of the main code should be cpp to compile it using g++
+# for CPU version and using nvcc for GPU version.
+# It will not be compiled using g++ if the extension is cu
+if (USE_CUDA)
+    find_package(CUDAToolkit REQUIRED)
+    enable_language(CUDA)
+    if(NOT DEFINED CMAKE_CUDA_STANDARD)
+        set(CMAKE_CUDA_STANDARD 11)
+        set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+    endif()
+    message(STATUS "Compile using ${CMAKE_CUDA_COMPILER} ${CMAKE_CUDA_COMPILER_VERSION}")
+
+    set(CMAKE_CUDA_ARCHITECTURES 75 CACHE STRING "CUDA architectures")
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w --use_fast_math")
+    if(20 IN_LIST CMAKE_CUDA_ARCHITECTURES)
+        if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS_EQUAL 9.0.0)
+            set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") # silence warnings
+        endif()
+    endif()
+
+    ### When in debug mode: separate compilation and device debugging
+    # Remember setting in QtCreator cuda-gdb as debugger in the Compiler kit
+    if (CMAKE_BUILD_TYPE STREQUAL Debug)
+        #set(SEPARATE_COMPILATION ON CACHE BOOL "Compile each source file separately. Results in less optimized code but good for debugging.")
+        #set(DEVICE_DEBUGGING ON CACHE BOOL "Debug device code.")
+        set(CMAKE_CUDA_FLAGS_DEBUG "-G -g -Xcompiler -Wall ${CMAKE_CUDA_FLAGS_DEBUG}")#-rdynamic
+        #TODO, if you want profiling at high speed, use -lineinfo instead of -G
+        #See also --source-in-ptx https://gitlab.kitware.com/cmake/cmake/-/issues/19017
+    endif()
+
+    #~ if (SEPARATE_COMPILATION)
+    #set(CUDA_SEPARABLE_COMPILATION ON)
+    #set_property(TARGET myexe PROPERTY CUDA_SEPARABLE_COMPILATION ON)
+
+else ()
+    enable_language(CXX)
+    if(NOT DEFINED CMAKE_CXX_STANDARD)
+        set(CMAKE_CXX_STANDARD 11)
+        set(CMAKE_CXX_STANDARD_REQUIRED ON)
+    endif()
+    message(STATUS "Compile using ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_VERSION}")
+endif ()
+
+add_subdirectory(moqui)
+
+option(ENABLE_TESTS "Enables end-to-end integration tests using CTest framework." ON)
+if(ENABLE_TESTS)
+    enable_testing()
+    add_subdirectory(tests)
+endif()
+
+
diff --git a/README.md b/README.md
@@ -1,35 +1,48 @@
 MOnte carlo code for QUIck proton dose calculation (moqui)
-=======
+==========================================================
 
 <img src="images/moqui_logo.jpg">
 
 ### Installation
 #### Requirements
-* GDCM > 2
-  * Please refer to GDCM v2 [installation guide](https://sourceforge.net/projects/gdcm/)
-  * You can also install GDCM v3 using package manager
-* CUDA
-- The code has been tested with GDCM v2 and CUDA v10.2 on Ubuntu 20.04
-- The code has been tested with GDCM v3 and CUDA v11.8 on Ubuntu 22.04
+- GDCM > 2
+  - Please refer to GDCM [installation guide](https://sourceforge.net/projects/gdcm/)
+  - You can also install GDCM v3 using package manager (`libgdcm-dev` in Ubuntu 22)
+    - If you get spurious warnings in CMake, and they annoy you, consider installing (Ubuntu 22): `libgdcm-tools libvtkgdcm-cil libvtkgdcm-dev libvtkgdcm-java python3-vtkgdcm`
+- CUDA
+- The code has been tested with
+  - GDCM v2 and CUDA v10.2 on Ubuntu 20.04
+  - GDCM v3 and CUDA v11.8 on Ubuntu 22.04
+  - GDCM v3 and CUDA v8.0 on Ubuntu 22.04
+  - GDCM v3 and CUDA v12.6 on Alma Linux 9
+- ZLIB
+- Python3 (for phantom example)
 
 #### Obtaining the code
 ```bash
-$ git clone https://github.com/mghro/moquimc.git
+$ git clone https://github.com/ferdymercury/moquimc.git
 ```
 
 #### Compile the phantom case
 ```bash
-$ cd moquimc/tests/mc/phantom
-$ cmake .
+$ cd moquimc
+$ mkdir build
+$ cd build
+$ cmake ..
 $ make
 ```
-- You may need to modify the CUDA configuration on the CMakeList.txt
-- The default is to use CUDA compute capability 7.5
+- You can specify a custom CUDA path in the cmake command, for example: `-DCUDAToolkit_ROOT=/opt/cuda-8.0 -DCMAKE_CUDA_COMPILER=/opt/cuda-8.0/bin/nvcc`. The default is the nvcc found within thhe PATH environment variable.
+- You can specify a custom CUDA compute capability via `-DCMAKE_CUDA_ARCHITECTURES=20`. The default is to use CUDA compute capability 7.5
 
 #### Running the phantom example
 ```bash
-$ python create_phantom.py # create water phantom
-$ ./phantom_env --lxyz 100 100 350 --pxyz 0.0 0.0 -175 --nxyz 200 200 350 --spot_energy 200.0 0.0 --spot_position 0 0 0.5 --spot_size 30.0 30.0 --histories 100000 --phantom_path ./water_phantom.raw --output_prefix ./ --gpu_id 0 > ./log.out
+$ python3 ../tests/mc/phantom/create_phantom.py # create water phantom in /tmp/, you need to install numpy
+$ ./tests/mc/phantom/phantom_env --lxyz 100 100 350 --pxyz 0.0 0.0 -175 --nxyz 200 200 350 --spot_energy 200.0 0.0 --spot_position 0 0 0.5 --spot_size 30.0 30.0 --histories 100000 --phantom_path /tmp/water_phantom.raw --output_prefix ./ --gpu_id 0 > ./log.out
+```
+
+Or simply:
+```bash
+$ ctest -V -R phantom_env
 ```
 
 #### Update Dec/26/2023
@@ -51,16 +64,42 @@ $ ./phantom_env --lxyz 100 100 350 --pxyz 0.0 0.0 -175 --nxyz 200 200 350 --spot
 - You can find the TOPAS extensions and example parameter file under treatment_machines/TOPAS
 - These are updated version of the HU extension in TOPAS (https://github.com/topasmc/extensions/tree/master/HU)
 
+
 ### Authors
-Hoyeon Lee (leehoy12345@gmail.com)  
-Jungwook Shin  
-Joost M. Verburg  
-Mislav Bobić  
-Brian Winey  
-Jan Schuemann  
-Harald Paganetti  
+Hoyeon Lee (leehoy12345@gmail.com)
+Jungwook Shin
+Joost M. Verburg
+Mislav Bobić
+Brian Winey
+Jan Schuemann
+Harald Paganetti
+
+
+### Notes
+You might need for old Tesla C2070 commands such as:
+- Install patched nvidia-390 driver on Ubuntu 22: https://launchpad.net/%7Edtl131/+archive/ubuntu/nvidiaexp
+- Install gcc5 and cuda8: https://askubuntu.com/questions/1442001/cuda-8-and-gcc-5-on-ubuntu-22-04-for-tesla-c2070
+- Error with stncpy: https://stackoverflow.com/questions/76531467/nvcc-cuda8-gcc-5-3-no-longer-compiles-with-o1-on-ubuntu-22-04
+- Error with float128: https://askubuntu.com/questions/1442001/cuda-8-and-gcc-5-on-ubuntu-22-04-for-tesla-c2070
+- `cmake ../ -DCUDAToolkit_ROOT=/opt/cuda-8.0 -DCMAKE_CUDA_COMPILER=/opt/cuda-8.0/bin/nvcc -DCMAKE_C_COMPILER=/opt/gcc5/gcc -DCMAKE_CXX_COMPILER=/opt/gcc5/g++ -DCMAKE_CUDA_ARCHITECTURES=20`
+- This might also be needed depending on the platform or CMake version: `export PATH=/opt/gcc5:$PATH`
+- Need to fine-tune QtCreator adding a new custom compiler /opt/cuda-8.0/bin/nvcc and edit .config/clangd/config.yaml file with
+```
+CompileFlags:
+Add:
+  [
+    '--cuda-path="/opt/cuda-8.0/"',
+    --cuda-gpu-arch=sm_20,
+    '-L"/opt/cuda-8.0/lib64/"',
+    -lcudart,
+  ]
+```
+- See https://github.com/clangd/clangd/issues/858 and https://github.com/clangd/clangd/issues/1815
+
+For an Ampere GPU NVIDIA A40:
+- `cmake -DCUDAToolkit_ROOT=/usr/local/cuda-12.6 -DCMAKE_CUDA_ARCHITECTURES=86 -DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.6/bin/nvcc`
 
 ### Acknowledgements
-This work is supported by NIH/NCI R01 234210 "Fast Individualized Delivery Adaptation in Proton Therapy"   
+This work is supported by NIH/NCI R01 234210 "Fast Individualized Delivery Adaptation in Proton Therapy"
 
 
diff --git a/moqui/CMakeLists.txt b/moqui/CMakeLists.txt
@@ -0,0 +1,18 @@
+add_library(moqui INTERFACE)
+target_include_directories(moqui INTERFACE ../)
+file(GLOB header_files CONFIGURE_DEPENDS
+        "*/*.hpp"
+        "*/*/*.hpp"
+)
+target_sources(moqui PRIVATE ${header_files})
+target_link_libraries(moqui INTERFACE
+    gdcmCommon
+    gdcmDSED
+    gdcmMEXD
+    gdcmjpeg12
+    gdcmjpeg8
+    gdcmDICT
+    gdcmIOD
+    gdcmMSFF
+    gdcmjpeg16
+)
diff --git a/moqui/base/environments/mqi_phantom_env.hpp b/moqui/base/environments/mqi_phantom_env.hpp
@@ -88,11 +88,10 @@ class phantom_env : public x_environment<R>
             spot_position[2] = 0.0;
         }
 
-        ///< Beam position
+        ///< Beam spatial spread
         if (cli["--spot_size"].size() >= 1) {
             spot_size[0] = std::stof(cli["--spot_size"][0]);
             spot_size[1] = std::stof(cli["--spot_size"][1]);
-
         } else {
             spot_size[0] = 0.0;
             spot_size[1] = 0.0;
@@ -153,12 +152,12 @@ class phantom_env : public x_environment<R>
             this->output_path = cli["--output_prefix"][0];
             printf("%s\n", this->output_path.c_str());
         } else {
-            throw std::runtime_error("output_path is required.");
+            throw std::runtime_error("output_prefix is required.");
         }
 
         if (cli["--phantom_path"].size() >= 1) {
             this->phantom_path = cli["--phantom_path"][0];
-            printf("phantom path: %s\n", this->output_path.c_str());
+            printf("phantom path: %s\n", this->phantom_path.c_str());
         } else {
             throw std::runtime_error("phantom_path is required.");
         }
@@ -207,7 +206,7 @@ class phantom_env : public x_environment<R>
         mqi::coordinate_transform<R> p_coord(spot_angles, { 0, 0, 0 });   //angles, isocenter
         /// Source direction is -Z
         mqi::vec3<R> dir(0, 0, -1);
-        ///< Beamlet phse-space distribution
+        ///< Beamlet phase-space distribution
         std::array<R, 6> beamlet_mean = {
             spot_position[0], spot_position[1], spot_position[2], dir.x, dir.y, dir.z
         };
@@ -268,6 +267,10 @@ class phantom_env : public x_environment<R>
         mqi::patient_material_t<R> patient_material;
         int16_t*                   ph = new int16_t[nxyz.x * nxyz.y * nxyz.z];
         std::ifstream              ph_fid(this->phantom_path, std::ios::in | std::ios::binary);
+        if(!ph_fid.good())
+        {
+            throw std::runtime_error("phantom file does not exist");
+        }
         ph_fid.read((char*) (&ph[0]), nxyz.x * nxyz.y * nxyz.z * sizeof(ph[0]));
         ph_fid.close();
         for (int i = 0; i < nxyz.x * nxyz.y * nxyz.z; i++) {
@@ -372,7 +375,7 @@ class phantom_env : public x_environment<R>
         }
 
         int32_t* d_transport_seed;
-        gpu_err_chk(cudaMalloc(&d_transport_seed, sizeof(int32_t) * (h0-h1)));
+        gpu_err_chk(cudaMalloc(&d_transport_seed, sizeof(int32_t) * (h1-h0)));
         gpu_err_chk(cudaMemcpy(d_transport_seed,
                                transport_seed,
                                sizeof(int32_t) * h1,

diff --git a/moqui/base/environments/mqi_tps_env.hpp b/moqui/base/environments/mqi_tps_env.hpp
@@ -4,20 +4,11 @@
 #include <moqui/base/environments/mqi_xenvironment.hpp>
 #include <moqui/base/scorers/mqi_scorer_energy_deposit.hpp>
 
-#include "gdcmAttribute.h"
-#include "gdcmDataElement.h"
+
 #include "gdcmDataSet.h"
-#include "gdcmDict.h"
-#include "gdcmDicts.h"
-#include "gdcmGlobal.h"
-#include "gdcmIPPSorter.h"
-#include "gdcmImage.h"
 #include "gdcmReader.h"
 #include "gdcmScanner.h"
-#include "gdcmSorter.h"
-#include "gdcmStringFilter.h"
 #include "gdcmTag.h"
-#include "gdcmTesting.h"
 #include <cassert>
 #include <chrono>
 #include <ctime>

diff --git a/moqui/base/mqi_common.hpp b/moqui/base/mqi_common.hpp
@@ -97,4 +97,25 @@ const float max_step_global = 1.0;
 
 }   // namespace mqi
 
+#if defined(__CUDACC__)
+#include <cuda.h>
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
+/**
+ * \brief atomic add function for older CUDA architectures
+ * \see https://stackoverflow.com/questions/39274472/error-function-atomicadddouble-double-has-already-been-defined
+ */
+static __inline__ __device__ double atomicAdd(double *address, double val) {
+    unsigned long long int* address_as_ull = (unsigned long long int*)address;
+    unsigned long long int old = *address_as_ull, assumed;
+    if (val==0.0)
+        return __longlong_as_double(old);
+    do {
+        assumed = old;
+        old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val +__longlong_as_double(assumed)));
+    } while (assumed != old);
+    return __longlong_as_double(old);
+}
+#endif
+#endif
+
 #endif
diff --git a/moqui/base/mqi_coordinate_transform.hpp b/moqui/base/mqi_coordinate_transform.hpp
@@ -34,7 +34,7 @@ class coordinate_transform
     vec3<T> translation;
 
     ///< rotation
-    std::array<T, 4> angles = { 0.0, 0.0, 0.0, 0.0 };
+    std::array<T, 4> angles { 0.0, 0.0, 0.0, 0.0 };
 
     /// rotation matrices
     mat3x3<T> collimator;        ///< Rotation due to collimator angle
@@ -72,8 +72,7 @@ class coordinate_transform
 
     /// A copy constructor
     CUDA_HOST_DEVICE
-    coordinate_transform(const coordinate_transform<T>& ref) {
-        angles          = ref.angles;
+    coordinate_transform(const coordinate_transform<T>& ref) : angles(ref.angles) {
         collimator      = ref.collimator;
         gantry          = ref.gantry;
         patient_support = ref.patient_support;
@@ -84,7 +83,7 @@ class coordinate_transform
 
     /// Destructor
     CUDA_HOST_DEVICE
-    coordinate_transform() {
+    coordinate_transform() : angles() {
         ;
     }