Merge branch 'development' into generalize_cell_assignment

AMReX-Codes · Aug 23, 2023 · 15091f1 · 15091f1
2 parents 39eb44a + d4d0e90
commit 15091f1
Show file tree

Hide file tree

Showing 18 changed files with 404 additions and 70 deletions.
diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst
@@ -187,6 +187,43 @@ can run it and that will generate results like:
    [The  Pinned Arena] space (MB): 8
    AMReX (19.06-404-g0455b168b69c-dirty) finalized
 
+SYCL configuration variables
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When building with ``USE_SYCL=TRUE``, one can set the following makefile
+variables to configure the build
+
+.. raw:: latex
+
+   \begin{center}
+
+.. _tab:gnumakesyclvar:
+
+.. table:: AMReX SYCL-specific GNU Make build options
+
+   +------------------------------+-------------------------------------------------+-------------+-----------------+
+   | Variable Name                | Description                                     | Default     | Possible values |
+   +==============================+=================================================+=============+=================+
+   | SYCL_AOT                     | Enable SYCL ahead-of-time compilation           | FALSE       | TRUE, FALSE     |
+   +------------------------------+-------------------------------------------------+-------------+-----------------+
+   | SYCL_AOT_GRF_MODE            | Specify AOT register file mode                  | Default     | Default, Large, |
+   |                              |                                                 |             | AutoLarge       |
+   +------------------------------+-------------------------------------------------+-------------+-----------------+
+   | AMREX_INTEL_ARCH             | Specify target if AOT is enabled                | None        | pvc, etc.       |
+   +------------------------------+-------------------------------------------------+-------------+-----------------+
+   | SYCL_SPLIT_KERNEL            | Enable SYCL kernel splitting                    | FALSE       | TRUE, FALSE     |
+   +------------------------------+-------------------------------------------------+-------------+-----------------+
+   | USE_ONEDPL                   | Enable SYCL's oneDPL algorithms                 | NO          | YES, NO         |
+   +------------------------------+-------------------------------------------------+-------------+-----------------+
+   | SYCL_SUB_GROUP_SIZE          | Specify subgroup size                           | 32          | 64, 32, 16      |
+   +------------------------------+-------------------------------------------------+-------------+-----------------+
+   | SYCL_MAX_PARALLEL_LINK_JOBS  | Number of parallel jobs in device link          | 1           | 1, 2, 3, etc.   |
+   +------------------------------+-------------------------------------------------+-------------+-----------------+
+.. raw:: latex
+
+   \end{center}
+
+
 Building with CMake
 -------------------
 

diff --git a/Src/AmrCore/AMReX_Interp_2D_C.H b/Src/AmrCore/AMReX_Interp_2D_C.H
@@ -164,6 +164,36 @@ facediv_int (int ci, int cj, int /*ck*/, int nf,
 
 }
 
+template<typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
+face_linear_interp_x (int i, int j, int /*k*/, int n, Array4<T> const& fine,
+                      Array4<T const> const& crse, IntVect const& ratio) noexcept
+{
+    const int ii = amrex::coarsen(i,ratio[0]);
+    const int jj = amrex::coarsen(j,ratio[1]);
+    if (i-ii*ratio[0] == 0) {
+        fine(i,j,0,n) = crse(ii,jj,0,n);
+    } else {
+        Real const w = static_cast<Real>(i-ii*ratio[0]) * (Real(1.)/Real(ratio[0]));
+        fine(i,j,0,n) = (Real(1.)-w) * crse(ii,jj,0,n) + w * crse(ii+1,jj,0,n);
+    }
+}
+
+template<typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
+face_linear_interp_y (int i, int j, int /*k*/, int n, Array4<T> const& fine,
+                      Array4<T const> const& crse, IntVect const& ratio) noexcept
+{
+    const int ii = amrex::coarsen(i,ratio[0]);
+    const int jj = amrex::coarsen(j,ratio[1]);
+    if (j-jj*ratio[1] == 0) {
+        fine(i,j,0,n) = crse(ii,jj,0,n);
+    } else {
+        Real const w = static_cast<Real>(j-jj*ratio[1]) * (Real(1.)/Real(ratio[1]));
+        fine(i,j,0,n) = (Real(1.)-w) * crse(ii,jj,0,n) + w * crse(ii,jj+1,0,n);
+    }
+}
+
 template <typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void ccprotect_2d (int ic, int jc, int /*kc*/, int nvar,

diff --git a/Src/AmrCore/AMReX_Interp_3D_C.H b/Src/AmrCore/AMReX_Interp_3D_C.H
@@ -332,6 +332,54 @@ facediv_int (int ci, int cj, int ck, int nf,
                                   +                dz3/(8*dy*zspxs)*(v000+v021-v001-v020);
 }
 
+template<typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
+face_linear_interp_x (int i, int j, int k, int n, Array4<T> const& fine,
+                      Array4<T const> const& crse, IntVect const& ratio) noexcept
+{
+    const int ii = amrex::coarsen(i,ratio[0]);
+    const int jj = amrex::coarsen(j,ratio[1]);
+    const int kk = amrex::coarsen(k,ratio[2]);
+    if (i-ii*ratio[0] == 0) {
+        fine(i,j,k,n) = crse(ii,jj,kk,n);
+    } else {
+        Real const w = static_cast<Real>(i-ii*ratio[0]) * (Real(1.)/Real(ratio[0]));
+        fine(i,j,k,n) = (Real(1.)-w) * crse(ii,jj,kk,n) + w * crse(ii+1,jj,kk,n);
+    }
+}
+
+template<typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
+face_linear_interp_y (int i, int j, int k, int n, Array4<T> const& fine,
+                      Array4<T const> const& crse, IntVect const& ratio) noexcept
+{
+    const int ii = amrex::coarsen(i,ratio[0]);
+    const int jj = amrex::coarsen(j,ratio[1]);
+    const int kk = amrex::coarsen(k,ratio[2]);
+    if (j-jj*ratio[1] == 0) {
+        fine(i,j,k,n) = crse(ii,jj,kk,n);
+    } else {
+        Real const w = static_cast<Real>(j-jj*ratio[1]) * (Real(1.)/Real(ratio[1]));
+        fine(i,j,k,n) = (Real(1.)-w) * crse(ii,jj,kk,n) + w * crse(ii,jj+1,kk,n);
+    }
+}
+
+template<typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
+face_linear_interp_z (int i, int j, int k, int n, Array4<T> const& fine,
+                      Array4<T const> const& crse, IntVect const& ratio) noexcept
+{
+    const int ii = amrex::coarsen(i,ratio[0]);
+    const int jj = amrex::coarsen(j,ratio[1]);
+    const int kk = amrex::coarsen(k,ratio[2]);
+    if (k-kk*ratio[2] == 0) {
+        fine(i,j,k,n) = crse(ii,jj,kk,n);
+    } else {
+        Real const w = static_cast<Real>(k-kk*ratio[2]) * (Real(1.)/Real(ratio[2]));
+        fine(i,j,k,n) = (Real(1.)-w) * crse(ii,jj,kk,n) + w * crse(ii,jj,kk+1,n);
+    }
+}
+
 template <typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void ccprotect_3d (int ic, int jc, int kc, int nvar,

diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp
@@ -134,10 +134,10 @@ FaceLinear::interp (const FArrayBox&  crse,
                     int               ncomp,
                     const Box&        fine_region,
                     const IntVect&    ratio,
-                    const Geometry&   crse_geom ,
-                    const Geometry&   fine_geom ,
-                    Vector<BCRec> const& bcr,
-                    int               actual_comp,
+                    const Geometry& /* crse_geom */,
+                    const Geometry& /* fine_geom */,
+                    Vector<BCRec> const& /*bcr*/,
+                    int              /* actual_comp*/,
                     int               /*actual_state*/,
                     RunOn             runon)
 {
@@ -146,9 +146,36 @@ FaceLinear::interp (const FArrayBox&  crse,
     //
     BL_PROFILE("FaceLinear::interp()");
 
-    // pass unallocated IArrayBox for solve_mask, so all fine values get filled.
-    interp_face(crse, crse_comp, fine, fine_comp, ncomp, fine_region,
-                ratio, IArrayBox(), crse_geom, fine_geom, bcr, actual_comp, runon);
+    AMREX_ASSERT(AMREX_D_TERM(fine_region.type(0),+fine_region.type(1),+fine_region.type(2)) == 1);
+
+    Array4<Real> const& fine_arr = fine.array(fine_comp);
+    Array4<Real const> const& crse_arr = crse.const_array(crse_comp);
+
+    if (fine_region.type(0) == IndexType::NODE)
+    {
+        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+        {
+            face_linear_interp_x(i,j,k,n,fine_arr,crse_arr,ratio);
+        });
+    }
+#if (AMREX_SPACEDIM >= 2)
+    else if (fine_region.type(1) == IndexType::NODE)
+    {
+        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+        {
+            face_linear_interp_y(i,j,k,n,fine_arr,crse_arr,ratio);
+        });
+    }
+#if (AMREX_SPACEDIM == 3)
+    else
+    {
+        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+        {
+            face_linear_interp_z(i,j,k,n,fine_arr,crse_arr,ratio);
+        });
+    }
+#endif
+#endif
 }
 
 void

diff --git a/Src/Base/AMReX_BLBackTrace.cpp b/Src/Base/AMReX_BLBackTrace.cpp
@@ -239,10 +239,12 @@ BLBackTrace::print_backtrace_info (FILE* f)
             std::fprintf(f, "%2d: %s\n", i, strings[i]);
 
 #if !defined(AMREX_USE_OMP) || !defined(__INTEL_COMPILER)
+            const bool stack_ptr_not_null = (bt_buffer[i] != nullptr);
+
             std::string addr2line_result;
             bool try_addr2line = false;
             if (amrex::system::call_addr2line && have_eu_addr2line) {
-                if (bt_buffer[i] != nullptr) {
+                if (stack_ptr_not_null) {
                     char print_buff[32];
                     std::snprintf(print_buff,sizeof(print_buff),"%p",bt_buffer[i]);
                     const std::string full_cmd = eu_cmd + " " + print_buff;
@@ -280,7 +282,7 @@ BLBackTrace::print_backtrace_info (FILE* f)
                             addr2line_result.clear();
                         }
                     }
-                    if (addr2line_result.empty()) {
+                    if (addr2line_result.empty() && stack_ptr_not_null) {
                         char print_buff[32];
                         std::snprintf(print_buff,sizeof(print_buff),"%p",bt_buffer[i]);
                         std::string full_cmd = cmd;
@@ -307,27 +309,30 @@ BLBackTrace::print_backtrace_info (FILE* f)
 
     for (int i = 0; i < nentries; ++i) {
         Dl_info info;
-        if (dladdr(bt_buffer[i], &info))
+        if (bt_buffer[i] != nullptr)
         {
-            std::string line;
-            if (amrex::system::call_addr2line && have_atos) {
-                char print_buff[32];
-                std::snprintf(print_buff,sizeof(print_buff),"%p",bt_buffer[i]);
-                const std::string full_cmd = cmd + " " + print_buff;
-                line = run_command(full_cmd);
-            }
-            if (line.empty()) {
-                int status;
-                char * demangled_name = abi::__cxa_demangle(info.dli_sname, nullptr, 0, &status);
-                if (status == 0) {
-                    line += demangled_name;
-                } else {
-                    line += info.dli_fname;
+            if (dladdr(bt_buffer[i], &info))
+            {
+                std::string line;
+                if (amrex::system::call_addr2line && have_atos) {
+                    char print_buff[32];
+                    std::snprintf(print_buff,sizeof(print_buff),"%p",bt_buffer[i]);
+                    const std::string full_cmd = cmd + " " + print_buff;
+                    line = run_command(full_cmd);
+                }
+                if (line.empty()) {
+                    int status;
+                    char * demangled_name = abi::__cxa_demangle(info.dli_sname, nullptr, 0, &status);
+                    if (status == 0) {
+                        line += demangled_name;
+                    } else {
+                        line += info.dli_fname;
+                    }
+                    line += '\n';
+                    std::free(demangled_name);
                 }
-                line += '\n';
-                std::free(demangled_name);
+                std::fprintf(f, "%2d: %s\n", i, line.c_str());
             }
-            std::fprintf(f, "%2d: %s\n", i, line.c_str());
         }
     }
 

diff --git a/Src/Base/AMReX_GpuAsyncArray.H b/Src/Base/AMReX_GpuAsyncArray.H
@@ -86,26 +86,19 @@ public:
                                                         amrex_asyncarray_delete, p));
 #endif
 #elif defined(AMREX_USE_SYCL)
-#ifdef AMREX_USE_CODEPLAY_HOST_TASK
                 auto* pd = d_data;
                 auto* ph = h_data;
                 auto& q = *(Gpu::gpuStream().queue);
                 try {
                     q.submit([&] (sycl::handler& h) {
-                        h.codeplay_host_task([=] () {
+                        h.host_task([=] () {
                             The_Arena()->free(pd);
                             The_Pinned_Arena()->free(ph);
                         });
                     });
                 } catch (sycl::exception const& ex) {
                     amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
                 }
-#else
-                // xxxxx SYCL todo
-                Gpu::streamSynchronize();
-                The_Arena()->free(d_data);
-                The_Pinned_Arena()->free(h_data);
-#endif
 #endif
             }
         }

diff --git a/Src/Base/AMReX_GpuElixir.cpp b/Src/Base/AMReX_GpuElixir.cpp
@@ -48,27 +48,19 @@ Elixir::clear () noexcept
                                                     amrex_elixir_delete, (void*)p));
 #endif
 #elif defined(AMREX_USE_SYCL)
-#ifdef AMREX_USE_CODEPLAY_HOST_TASK
-            auto lpa = std::move(m_pa);
-            auto& q = *(Gpu::gpuStream().queue);
-            try {
-                q.submit([&] (sycl::handler& h) {
-                    h.codeplay_host_task([=] () {
-                        for (auto const& pa : lpa) {
-                            pa.second->free(pa.first);
-                        }
-                    });
+        auto lpa = std::move(m_pa);
+        auto& q = *(Gpu::gpuStream().queue);
+        try {
+            q.submit([&] (sycl::handler& h) {
+                h.host_task([=] () {
+                    for (auto const& pa : lpa) {
+                        pa.second->free(pa.first);
+                    }
                 });
-            } catch (sycl::exception const& ex) {
-                amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
-            }
-#else
-            // xxxxx SYCL todo
-            Gpu::streamSynchronize();
-            for (auto const& pa : m_pa) {
-                pa.second->free(pa.first);
-            }
-#endif
+            });
+        } catch (sycl::exception const& ex) {
+            amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
+        }
 #endif
         }
     }

diff --git a/Src/Base/AMReX_GpuError.H b/Src/Base/AMReX_GpuError.H
@@ -79,6 +79,11 @@ namespace Gpu {
                            + ": " + cudaGetErrorString(amrex_i_err)); \
         amrex::Abort(errStr); \
     }}
+
+#define AMREX_CURAND_SAFE_CALL(x) do { if((x)!=CURAND_STATUS_SUCCESS) { \
+    std::string errStr(std::string("CURAND error in file ") + __FILE__  \
+                       + " line " + std::to_string(__LINE__));          \
+    amrex::Abort(errStr); }} while(0)
 #endif
 
 #ifdef AMREX_USE_HIP
@@ -90,6 +95,11 @@ namespace Gpu {
                            + " " + hipGetErrorString(amrex_i_err)); \
         amrex::Abort(errStr); \
     }}
+
+#define AMREX_HIPRAND_SAFE_CALL(x) do { if((x)!=HIPRAND_STATUS_SUCCESS) { \
+    std::string errStr(std::string("HIPRAND error in file ") + __FILE__  \
+                       + " line " + std::to_string(__LINE__));          \
+    amrex::Abort(errStr); }} while(0)
 #endif
 
 #define AMREX_GPU_ERROR_CHECK() amrex::Gpu::ErrorCheck(__FILE__, __LINE__)

diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H
@@ -361,6 +361,31 @@ namespace amrex
     void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp,
                                             MultiFab const& fmf,
                                             IntVect const& ratio);
+
+    /**
+     * \brief Fill MultiFab with random numbers from uniform distribution
+     *
+     * The uniform distribution range is [0.0, 1.0) for CPU and SYCL, it's
+     * (0,1] for CUDA and HIP. All cells including ghost cells are filled.
+     *
+     * \param mf    MultiFab
+     * \param scomp starting component
+     * \param ncomp number of component
+     */
+    void FillRandom (MultiFab& mf, int scomp, int ncomp);
+
+    /**
+     * \brief Fill MultiFab with random numbers from nornmal distribution
+     *
+     * All cells including ghost cells are filled.
+     *
+     * \param mf     MultiFab
+     * \param scomp  starting component
+     * \param ncomp  number of component
+     * \param mean   mean of normal distribution
+     * \param stddev standard deviation of normal distribution
+     */
+    void FillRandomNormal (MultiFab& mf, int scomp, int ncomp, Real mean, Real stddev);
 }
 
 namespace amrex {