Skip to content

Commit

Permalink
Merge branch 'development' into generalize_cell_assignment
Browse files Browse the repository at this point in the history
  • Loading branch information
asalmgren authored Aug 23, 2023
2 parents 39eb44a + d4d0e90 commit 15091f1
Show file tree
Hide file tree
Showing 18 changed files with 404 additions and 70 deletions.
37 changes: 37 additions & 0 deletions Docs/sphinx_documentation/source/GPU.rst
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,43 @@ can run it and that will generate results like:
[The Pinned Arena] space (MB): 8
AMReX (19.06-404-g0455b168b69c-dirty) finalized

SYCL configuration variables
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

When building with ``USE_SYCL=TRUE``, one can set the following makefile
variables to configure the build

.. raw:: latex

\begin{center}

.. _tab:gnumakesyclvar:

.. table:: AMReX SYCL-specific GNU Make build options

+------------------------------+-------------------------------------------------+-------------+-----------------+
| Variable Name | Description | Default | Possible values |
+==============================+=================================================+=============+=================+
| SYCL_AOT | Enable SYCL ahead-of-time compilation | FALSE | TRUE, FALSE |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| SYCL_AOT_GRF_MODE | Specify AOT register file mode | Default | Default, Large, |
| | | | AutoLarge |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| AMREX_INTEL_ARCH | Specify target if AOT is enabled | None | pvc, etc. |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| SYCL_SPLIT_KERNEL | Enable SYCL kernel splitting | FALSE | TRUE, FALSE |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| USE_ONEDPL | Enable SYCL's oneDPL algorithms | NO | YES, NO |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| SYCL_SUB_GROUP_SIZE | Specify subgroup size | 32 | 64, 32, 16 |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| SYCL_MAX_PARALLEL_LINK_JOBS | Number of parallel jobs in device link | 1 | 1, 2, 3, etc. |
+------------------------------+-------------------------------------------------+-------------+-----------------+
.. raw:: latex

\end{center}


Building with CMake
-------------------

Expand Down
30 changes: 30 additions & 0 deletions Src/AmrCore/AMReX_Interp_2D_C.H
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,36 @@ facediv_int (int ci, int cj, int /*ck*/, int nf,

}

template<typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
face_linear_interp_x (int i, int j, int /*k*/, int n, Array4<T> const& fine,
Array4<T const> const& crse, IntVect const& ratio) noexcept
{
const int ii = amrex::coarsen(i,ratio[0]);
const int jj = amrex::coarsen(j,ratio[1]);
if (i-ii*ratio[0] == 0) {
fine(i,j,0,n) = crse(ii,jj,0,n);
} else {
Real const w = static_cast<Real>(i-ii*ratio[0]) * (Real(1.)/Real(ratio[0]));
fine(i,j,0,n) = (Real(1.)-w) * crse(ii,jj,0,n) + w * crse(ii+1,jj,0,n);
}
}

template<typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
face_linear_interp_y (int i, int j, int /*k*/, int n, Array4<T> const& fine,
Array4<T const> const& crse, IntVect const& ratio) noexcept
{
const int ii = amrex::coarsen(i,ratio[0]);
const int jj = amrex::coarsen(j,ratio[1]);
if (j-jj*ratio[1] == 0) {
fine(i,j,0,n) = crse(ii,jj,0,n);
} else {
Real const w = static_cast<Real>(j-jj*ratio[1]) * (Real(1.)/Real(ratio[1]));
fine(i,j,0,n) = (Real(1.)-w) * crse(ii,jj,0,n) + w * crse(ii,jj+1,0,n);
}
}

template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void ccprotect_2d (int ic, int jc, int /*kc*/, int nvar,
Expand Down
48 changes: 48 additions & 0 deletions Src/AmrCore/AMReX_Interp_3D_C.H
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,54 @@ facediv_int (int ci, int cj, int ck, int nf,
+ dz3/(8*dy*zspxs)*(v000+v021-v001-v020);
}

template<typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
face_linear_interp_x (int i, int j, int k, int n, Array4<T> const& fine,
Array4<T const> const& crse, IntVect const& ratio) noexcept
{
const int ii = amrex::coarsen(i,ratio[0]);
const int jj = amrex::coarsen(j,ratio[1]);
const int kk = amrex::coarsen(k,ratio[2]);
if (i-ii*ratio[0] == 0) {
fine(i,j,k,n) = crse(ii,jj,kk,n);
} else {
Real const w = static_cast<Real>(i-ii*ratio[0]) * (Real(1.)/Real(ratio[0]));
fine(i,j,k,n) = (Real(1.)-w) * crse(ii,jj,kk,n) + w * crse(ii+1,jj,kk,n);
}
}

template<typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
face_linear_interp_y (int i, int j, int k, int n, Array4<T> const& fine,
Array4<T const> const& crse, IntVect const& ratio) noexcept
{
const int ii = amrex::coarsen(i,ratio[0]);
const int jj = amrex::coarsen(j,ratio[1]);
const int kk = amrex::coarsen(k,ratio[2]);
if (j-jj*ratio[1] == 0) {
fine(i,j,k,n) = crse(ii,jj,kk,n);
} else {
Real const w = static_cast<Real>(j-jj*ratio[1]) * (Real(1.)/Real(ratio[1]));
fine(i,j,k,n) = (Real(1.)-w) * crse(ii,jj,kk,n) + w * crse(ii,jj+1,kk,n);
}
}

template<typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void
face_linear_interp_z (int i, int j, int k, int n, Array4<T> const& fine,
Array4<T const> const& crse, IntVect const& ratio) noexcept
{
const int ii = amrex::coarsen(i,ratio[0]);
const int jj = amrex::coarsen(j,ratio[1]);
const int kk = amrex::coarsen(k,ratio[2]);
if (k-kk*ratio[2] == 0) {
fine(i,j,k,n) = crse(ii,jj,kk,n);
} else {
Real const w = static_cast<Real>(k-kk*ratio[2]) * (Real(1.)/Real(ratio[2]));
fine(i,j,k,n) = (Real(1.)-w) * crse(ii,jj,kk,n) + w * crse(ii,jj,kk+1,n);
}
}

template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void ccprotect_3d (int ic, int jc, int kc, int nvar,
Expand Down
41 changes: 34 additions & 7 deletions Src/AmrCore/AMReX_Interpolater.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,10 @@ FaceLinear::interp (const FArrayBox& crse,
int ncomp,
const Box& fine_region,
const IntVect& ratio,
const Geometry& crse_geom ,
const Geometry& fine_geom ,
Vector<BCRec> const& bcr,
int actual_comp,
const Geometry& /* crse_geom */,
const Geometry& /* fine_geom */,
Vector<BCRec> const& /*bcr*/,
int /* actual_comp*/,
int /*actual_state*/,
RunOn runon)
{
Expand All @@ -146,9 +146,36 @@ FaceLinear::interp (const FArrayBox& crse,
//
BL_PROFILE("FaceLinear::interp()");

// pass unallocated IArrayBox for solve_mask, so all fine values get filled.
interp_face(crse, crse_comp, fine, fine_comp, ncomp, fine_region,
ratio, IArrayBox(), crse_geom, fine_geom, bcr, actual_comp, runon);
AMREX_ASSERT(AMREX_D_TERM(fine_region.type(0),+fine_region.type(1),+fine_region.type(2)) == 1);

Array4<Real> const& fine_arr = fine.array(fine_comp);
Array4<Real const> const& crse_arr = crse.const_array(crse_comp);

if (fine_region.type(0) == IndexType::NODE)
{
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
{
face_linear_interp_x(i,j,k,n,fine_arr,crse_arr,ratio);
});
}
#if (AMREX_SPACEDIM >= 2)
else if (fine_region.type(1) == IndexType::NODE)
{
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
{
face_linear_interp_y(i,j,k,n,fine_arr,crse_arr,ratio);
});
}
#if (AMREX_SPACEDIM == 3)
else
{
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
{
face_linear_interp_z(i,j,k,n,fine_arr,crse_arr,ratio);
});
}
#endif
#endif
}

void
Expand Down
45 changes: 25 additions & 20 deletions Src/Base/AMReX_BLBackTrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,12 @@ BLBackTrace::print_backtrace_info (FILE* f)
std::fprintf(f, "%2d: %s\n", i, strings[i]);

#if !defined(AMREX_USE_OMP) || !defined(__INTEL_COMPILER)
const bool stack_ptr_not_null = (bt_buffer[i] != nullptr);

std::string addr2line_result;
bool try_addr2line = false;
if (amrex::system::call_addr2line && have_eu_addr2line) {
if (bt_buffer[i] != nullptr) {
if (stack_ptr_not_null) {
char print_buff[32];
std::snprintf(print_buff,sizeof(print_buff),"%p",bt_buffer[i]);
const std::string full_cmd = eu_cmd + " " + print_buff;
Expand Down Expand Up @@ -280,7 +282,7 @@ BLBackTrace::print_backtrace_info (FILE* f)
addr2line_result.clear();
}
}
if (addr2line_result.empty()) {
if (addr2line_result.empty() && stack_ptr_not_null) {
char print_buff[32];
std::snprintf(print_buff,sizeof(print_buff),"%p",bt_buffer[i]);
std::string full_cmd = cmd;
Expand All @@ -307,27 +309,30 @@ BLBackTrace::print_backtrace_info (FILE* f)

for (int i = 0; i < nentries; ++i) {
Dl_info info;
if (dladdr(bt_buffer[i], &info))
if (bt_buffer[i] != nullptr)
{
std::string line;
if (amrex::system::call_addr2line && have_atos) {
char print_buff[32];
std::snprintf(print_buff,sizeof(print_buff),"%p",bt_buffer[i]);
const std::string full_cmd = cmd + " " + print_buff;
line = run_command(full_cmd);
}
if (line.empty()) {
int status;
char * demangled_name = abi::__cxa_demangle(info.dli_sname, nullptr, 0, &status);
if (status == 0) {
line += demangled_name;
} else {
line += info.dli_fname;
if (dladdr(bt_buffer[i], &info))
{
std::string line;
if (amrex::system::call_addr2line && have_atos) {
char print_buff[32];
std::snprintf(print_buff,sizeof(print_buff),"%p",bt_buffer[i]);
const std::string full_cmd = cmd + " " + print_buff;
line = run_command(full_cmd);
}
if (line.empty()) {
int status;
char * demangled_name = abi::__cxa_demangle(info.dli_sname, nullptr, 0, &status);
if (status == 0) {
line += demangled_name;
} else {
line += info.dli_fname;
}
line += '\n';
std::free(demangled_name);
}
line += '\n';
std::free(demangled_name);
std::fprintf(f, "%2d: %s\n", i, line.c_str());
}
std::fprintf(f, "%2d: %s\n", i, line.c_str());
}
}

Expand Down
9 changes: 1 addition & 8 deletions Src/Base/AMReX_GpuAsyncArray.H
Original file line number Diff line number Diff line change
Expand Up @@ -86,26 +86,19 @@ public:
amrex_asyncarray_delete, p));
#endif
#elif defined(AMREX_USE_SYCL)
#ifdef AMREX_USE_CODEPLAY_HOST_TASK
auto* pd = d_data;
auto* ph = h_data;
auto& q = *(Gpu::gpuStream().queue);
try {
q.submit([&] (sycl::handler& h) {
h.codeplay_host_task([=] () {
h.host_task([=] () {
The_Arena()->free(pd);
The_Pinned_Arena()->free(ph);
});
});
} catch (sycl::exception const& ex) {
amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
}
#else
// xxxxx SYCL todo
Gpu::streamSynchronize();
The_Arena()->free(d_data);
The_Pinned_Arena()->free(h_data);
#endif
#endif
}
}
Expand Down
32 changes: 12 additions & 20 deletions Src/Base/AMReX_GpuElixir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,27 +48,19 @@ Elixir::clear () noexcept
amrex_elixir_delete, (void*)p));
#endif
#elif defined(AMREX_USE_SYCL)
#ifdef AMREX_USE_CODEPLAY_HOST_TASK
auto lpa = std::move(m_pa);
auto& q = *(Gpu::gpuStream().queue);
try {
q.submit([&] (sycl::handler& h) {
h.codeplay_host_task([=] () {
for (auto const& pa : lpa) {
pa.second->free(pa.first);
}
});
auto lpa = std::move(m_pa);
auto& q = *(Gpu::gpuStream().queue);
try {
q.submit([&] (sycl::handler& h) {
h.host_task([=] () {
for (auto const& pa : lpa) {
pa.second->free(pa.first);
}
});
} catch (sycl::exception const& ex) {
amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
}
#else
// xxxxx SYCL todo
Gpu::streamSynchronize();
for (auto const& pa : m_pa) {
pa.second->free(pa.first);
}
#endif
});
} catch (sycl::exception const& ex) {
amrex::Abort(std::string("host_task: ")+ex.what()+"!!!!!");
}
#endif
}
}
Expand Down
10 changes: 10 additions & 0 deletions Src/Base/AMReX_GpuError.H
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ namespace Gpu {
+ ": " + cudaGetErrorString(amrex_i_err)); \
amrex::Abort(errStr); \
}}

#define AMREX_CURAND_SAFE_CALL(x) do { if((x)!=CURAND_STATUS_SUCCESS) { \
std::string errStr(std::string("CURAND error in file ") + __FILE__ \
+ " line " + std::to_string(__LINE__)); \
amrex::Abort(errStr); }} while(0)
#endif

#ifdef AMREX_USE_HIP
Expand All @@ -90,6 +95,11 @@ namespace Gpu {
+ " " + hipGetErrorString(amrex_i_err)); \
amrex::Abort(errStr); \
}}

#define AMREX_HIPRAND_SAFE_CALL(x) do { if((x)!=HIPRAND_STATUS_SUCCESS) { \
std::string errStr(std::string("HIPRAND error in file ") + __FILE__ \
+ " line " + std::to_string(__LINE__)); \
amrex::Abort(errStr); }} while(0)
#endif

#define AMREX_GPU_ERROR_CHECK() amrex::Gpu::ErrorCheck(__FILE__, __LINE__)
Expand Down
25 changes: 25 additions & 0 deletions Src/Base/AMReX_MultiFabUtil.H
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,31 @@ namespace amrex
void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp,
MultiFab const& fmf,
IntVect const& ratio);

/**
* \brief Fill MultiFab with random numbers from uniform distribution
*
* The uniform distribution range is [0.0, 1.0) for CPU and SYCL, it's
* (0,1] for CUDA and HIP. All cells including ghost cells are filled.
*
* \param mf MultiFab
* \param scomp starting component
* \param ncomp number of component
*/
void FillRandom (MultiFab& mf, int scomp, int ncomp);

/**
* \brief Fill MultiFab with random numbers from nornmal distribution
*
* All cells including ghost cells are filled.
*
* \param mf MultiFab
* \param scomp starting component
* \param ncomp number of component
* \param mean mean of normal distribution
* \param stddev standard deviation of normal distribution
*/
void FillRandomNormal (MultiFab& mf, int scomp, int ncomp, Real mean, Real stddev);
}

namespace amrex {
Expand Down
Loading

0 comments on commit 15091f1

Please sign in to comment.