From dc0e7c12b764117112b986368bbbdea1bb5fd99e Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Thu, 23 Jun 2022 10:49:36 -0700 Subject: [PATCH 001/111] Begin refactor of particle classes to allow positions and ids in SoA form. --- Src/AmrCore/AMReX_AmrParticles.H | 44 ++++--- Src/Extern/HDF5/AMReX_ParticleHDF5.H | 84 ++++++------ Src/Particle/AMReX_ParIter.H | 8 +- Src/Particle/AMReX_ParticleContainer.H | 52 ++++---- Src/Particle/AMReX_ParticleContainerI.H | 164 ++++++++++++------------ Src/Particle/AMReX_ParticleIO.H | 92 ++++++------- Src/Particle/AMReX_ParticleInit.H | 52 ++++---- 7 files changed, 254 insertions(+), 242 deletions(-) diff --git a/Src/AmrCore/AMReX_AmrParticles.H b/Src/AmrCore/AMReX_AmrParticles.H index 7b97d015801..1db0a501b69 100644 --- a/Src/AmrCore/AMReX_AmrParticles.H +++ b/Src/AmrCore/AMReX_AmrParticles.H @@ -10,10 +10,10 @@ namespace amrex { -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::AssignDensity (int rho_index, Vector >& mf_to_be_filled, int lev_min, int ncomp, int finest_level, int ngrow) const @@ -249,43 +249,47 @@ ParticleToMesh (PC const& pc, const Vector& mf, } } -template class Allocator=DefaultAllocator> -class AmrParticleContainer - : public ParticleContainer +class AmrParticleContainer_impl + : public ParticleContainer_impl { public: - typedef Particle ParticleType; + using ParticleType = T_ParticleType; - AmrParticleContainer () - : ParticleContainer() + AmrParticleContainer_impl () + : ParticleContainer_impl() { } - AmrParticleContainer (AmrCore* amr_core) - : ParticleContainer(amr_core->GetParGDB()) + AmrParticleContainer_impl (AmrCore* amr_core) + : ParticleContainer_impl(amr_core->GetParGDB()) { } - AmrParticleContainer (const Vector & geom, - const Vector & dmap, - const Vector & ba, - const Vector & rr) - : ParticleContainer(geom, dmap, ba, rr) + AmrParticleContainer_impl (const Vector & geom, + const Vector & dmap, + const Vector & ba, + const Vector & rr) + : ParticleContainer_impl(geom, dmap, ba, rr) { } - ~AmrParticleContainer () {} + ~AmrParticleContainer_impl () {} - AmrParticleContainer ( const AmrParticleContainer &) = delete; - AmrParticleContainer& operator= ( const AmrParticleContainer & ) = delete; + AmrParticleContainer_impl ( const AmrParticleContainer_impl &) = delete; + AmrParticleContainer_impl& operator= ( const AmrParticleContainer_impl & ) = delete; - AmrParticleContainer ( AmrParticleContainer && ) = default; - AmrParticleContainer& operator= ( AmrParticleContainer && ) = default; + AmrParticleContainer_impl ( AmrParticleContainer_impl && ) = default; + AmrParticleContainer_impl& operator= ( AmrParticleContainer_impl && ) = default; }; +template class Allocator=DefaultAllocator> +using AmrParticleContainer = AmrParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; + class AmrTracerParticleContainer : public TracerParticleContainer { diff --git a/Src/Extern/HDF5/AMReX_ParticleHDF5.H b/Src/Extern/HDF5/AMReX_ParticleHDF5.H index d861a3684d8..9bfd8296b95 100644 --- a/Src/Extern/HDF5/AMReX_ParticleHDF5.H +++ b/Src/Extern/HDF5/AMReX_ParticleHDF5.H @@ -17,10 +17,10 @@ #include "H5Z_SZ.h" #endif -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointHDF5 (const std::string& dir, const std::string& name, bool /*is_checkpoint*/, const Vector& real_comp_names, @@ -70,10 +70,10 @@ ParticleContainer }, true); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointHDF5 (const std::string& dir, const std::string& name, const std::string& compression) const { @@ -106,10 +106,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const std::string& compression) const { @@ -142,10 +142,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& real_comp_names, const Vector& int_comp_names, @@ -170,10 +170,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& real_comp_names, const std::string& compression) const @@ -204,10 +204,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& write_real_comp, @@ -242,10 +242,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -265,11 +265,11 @@ WritePlotFileHDF5 (const std::string& dir, const std::string& name, }); } -template class Allocator> template >::value>::type*> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const std::string& compression, F&& f) const { @@ -298,11 +298,11 @@ ParticleContainer std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& real_comp_names, const Vector& int_comp_names, @@ -323,11 +323,11 @@ ParticleContainer compression, std::forward(f)); } -template class Allocator> template >::value>::type*> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& real_comp_names, const std::string& compression, F&& f) const @@ -354,11 +354,11 @@ ParticleContainer compression, std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& write_real_comp, @@ -389,11 +389,11 @@ ParticleContainer compression, std::forward(f)); } -template class Allocator> template void -ParticleContainer:: +ParticleContainer_impl:: WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -409,11 +409,11 @@ WritePlotFileHDF5 (const std::string& dir, const std::string& name, compression, std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WriteHDF5ParticleData (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -437,10 +437,10 @@ ParticleContainer /* } */ } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointPreHDF5 () { if( ! usePrePost) { @@ -494,10 +494,10 @@ ParticleContainer } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointPostHDF5 () { if( ! usePrePost) { @@ -550,30 +550,30 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFilePreHDF5 () { CheckpointPreHDF5(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFilePostHDF5 () { CheckpointPostHDF5(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WriteParticlesHDF5 (int lev, hid_t grp, Vector& which, Vector& count, Vector& where, const Vector& write_real_comp, @@ -964,19 +964,19 @@ ParticleContainer return; } // End WriteParticlesHDF5 -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::RestartHDF5 (const std::string& dir, const std::string& file, bool /*is_checkpoint*/) { RestartHDF5(dir, file); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::RestartHDF5 (const std::string& dir, const std::string& file) { BL_PROFILE("ParticleContainer::RestartHDF5()"); @@ -1340,11 +1340,11 @@ ParticleContainer } // Read a batch of particles from the checkpoint file -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::ReadParticlesHDF5 (hsize_t offset, hsize_t cnt, int grd, int lev, hid_t int_dset, hid_t real_dset, int finest_level_in_file, bool convert_ids) diff --git a/Src/Particle/AMReX_ParIter.H b/Src/Particle/AMReX_ParIter.H index b0384778f04..486f8a27c5d 100644 --- a/Src/Particle/AMReX_ParIter.H +++ b/Src/Particle/AMReX_ParIter.H @@ -8,9 +8,13 @@ namespace amrex { -template class Allocator> -class ParticleContainer; +class ParticleContainer_impl; + +template class Allocator=DefaultAllocator> +using ParticleContainer = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; template class Allocator=DefaultAllocator> diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index a0a96852862..1b394071af2 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -132,15 +132,17 @@ struct ParticleInitType * \tparam T_NArrayInt The number of extra integer components stored in struct-of-array form * */ -template class Allocator=DefaultAllocator> -class ParticleContainer : public ParticleContainerBase +class ParticleContainer_impl : public ParticleContainerBase { public: + //! \brief The type of the "Particle" + using ParticleType = T_ParticleType; //! \brief Number of extra Real components in the particle struct - static constexpr int NStructReal = T_NStructReal; + static constexpr int NStructReal = ParticleType::NReal; //! \brief Number of extra integer components in the particle struct - static constexpr int NStructInt = T_NStructInt; + static constexpr int NStructInt = ParticleType::NInt; //! \brief Number of extra Real components stored in struct-of-array form static constexpr int NArrayReal = T_NArrayReal; //! \brief Number of extra integer components stored in struct-of-array form @@ -154,8 +156,6 @@ public: //! \brief The memory allocator in use. template using AllocatorType = Allocator; - //! \brief The type of Particles we hold. - using ParticleType = Particle; //! \brief The type of the "SuperParticle" which stored all components in AoS form using SuperParticleType = Particle; //! \brief The type of the Real data @@ -187,7 +187,7 @@ public: //! \brief Default constructor - construct an empty particle container that has no concept //! of a level hierarchy. Must be properly initialized later. - ParticleContainer () + ParticleContainer_impl () : ParticleContainerBase(), h_redistribute_real_comp(AMREX_SPACEDIM + NStructReal + NArrayReal, true), @@ -206,7 +206,7 @@ public: //! DistributionMapping, and BoxArray objects that define the AMR hierarchy. Usually, //! this is generated by an AmrCore or AmrLevel object. //! - ParticleContainer (ParGDBBase* gdb) + ParticleContainer_impl (ParGDBBase* gdb) : ParticleContainerBase(gdb), h_redistribute_real_comp(AMREX_SPACEDIM + NStructReal + NArrayReal, true), @@ -227,9 +227,9 @@ public: //! \param A DistributionMapping, which describes how the boxes are distributed onto MPI tasks //! \param A BoxArray, which gives the set of grid boxes //! - ParticleContainer (const Geometry & geom, - const DistributionMapping & dmap, - const BoxArray & ba) + ParticleContainer_impl (const Geometry & geom, + const DistributionMapping & dmap, + const BoxArray & ba) : ParticleContainerBase(geom, dmap, ba), h_redistribute_real_comp(AMREX_SPACEDIM + NStructReal + NArrayReal, true), @@ -252,10 +252,10 @@ public: //! \param rr A Vector of integer refinement ratios, of size num_levels - 1. rr[n] gives the //! refinement ratio between levels n and n+1 //! - ParticleContainer (const Vector & geom, - const Vector & dmap, - const Vector & ba, - const Vector & rr) + ParticleContainer_impl (const Vector & geom, + const Vector & dmap, + const Vector & ba, + const Vector & rr) : ParticleContainerBase(geom, dmap, ba, rr), h_redistribute_real_comp(AMREX_SPACEDIM + NStructReal + NArrayReal, true), @@ -277,10 +277,10 @@ public: //! \param rr A Vector of IntVect refinement ratios, of size num_levels - 1. rr[n] gives the //! refinement ratio between levels n and n+1 //! - ParticleContainer (const Vector & geom, - const Vector & dmap, - const Vector & ba, - const Vector & rr) + ParticleContainer_impl (const Vector & geom, + const Vector & dmap, + const Vector & ba, + const Vector & rr) : ParticleContainerBase(geom, dmap, ba, rr), h_redistribute_real_comp(AMREX_SPACEDIM + NStructReal + NArrayReal, true), @@ -294,13 +294,13 @@ public: resizeData(); } - virtual ~ParticleContainer () = default; + virtual ~ParticleContainer_impl () = default; - ParticleContainer ( const ParticleContainer &) = delete; - ParticleContainer& operator= ( const ParticleContainer & ) = delete; + ParticleContainer_impl ( const ParticleContainer_impl &) = delete; + ParticleContainer_impl& operator= ( const ParticleContainer_impl & ) = delete; - ParticleContainer ( ParticleContainer && ) = default; - ParticleContainer& operator= ( ParticleContainer && ) = default; + ParticleContainer_impl ( ParticleContainer_impl && ) = default; + ParticleContainer_impl& operator= ( ParticleContainer_impl && ) = default; //! \brief Define a default-constructed ParticleContainer using a ParGDB object. @@ -1363,6 +1363,10 @@ private: Vector m_particles; }; +template class Allocator=DefaultAllocator> +using ParticleContainer = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; + #include "AMReX_ParticleInit.H" #include "AMReX_ParticleContainerI.H" #include "AMReX_ParticleIO.H" diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index f4ababb3a82..5719f74de24 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1,8 +1,8 @@ -template class Allocator> void -ParticleContainer::SetParticleSize () +ParticleContainer_impl::SetParticleSize () { num_real_comm_comps = 0; int comm_comps_start = AMREX_SPACEDIM + NStructReal; @@ -21,10 +21,10 @@ ParticleContainer::Se num_real_comm_comps*sizeof(ParticleReal) + num_int_comm_comps*sizeof(int); } -template class Allocator> void -ParticleContainer :: Initialize () +ParticleContainer_impl :: Initialize () { levelDirectoriesCreated = false; usePrePost = false; @@ -58,11 +58,11 @@ ParticleContainer :: } } -template class Allocator> template IntVect -ParticleContainer::Index (const P& p, int lev) const +ParticleContainer_impl::Index (const P& p, int lev) const { IntVect iv; const Geometry& geom = Geom(lev); @@ -76,11 +76,11 @@ ParticleContainer::In return iv; } -template class Allocator> template bool -ParticleContainer +ParticleContainer_impl ::Where (const P& p, ParticleLocData& pld, int lev_min, @@ -156,10 +156,10 @@ ParticleContainer return false; } -template class Allocator> bool -ParticleContainer +ParticleContainer_impl ::EnforcePeriodicWhere (ParticleType& p, ParticleLocData& pld, int lev_min, @@ -230,10 +230,10 @@ ParticleContainer } -template class Allocator> bool -ParticleContainer +ParticleContainer_impl ::PeriodicShift (ParticleType& p) const { const auto& geom = Geom(0); @@ -245,10 +245,10 @@ ParticleContainer return enforcePeriodic(p, plo, phi, rhi, is_per); } -template class Allocator> ParticleLocData -ParticleContainer:: +ParticleContainer_impl:: Reset (ParticleType& p, bool /*update*/, bool verbose, @@ -280,29 +280,29 @@ Reset (ParticleType& p, return pld; } -template class Allocator> void -ParticleContainer::reserveData () +ParticleContainer_impl::reserveData () { this->ParticleContainerBase::reserveData(); m_particles.reserve(maxLevel()+1); } -template class Allocator> void -ParticleContainer::resizeData () +ParticleContainer_impl::resizeData () { this->ParticleContainerBase::resizeData(); int nlevs = std::max(0, finestLevel()+1); m_particles.resize(nlevs); } -template class Allocator> void -ParticleContainer::locateParticle (ParticleType& p, ParticleLocData& pld, +ParticleContainer_impl::locateParticle (ParticleType& p, ParticleLocData& pld, int lev_min, int lev_max, int nGrow, int local_grid) const { bool outside = AMREX_D_TERM(p.pos(0) < Geom(0).ProbLo(0) @@ -360,10 +360,10 @@ ParticleContainer::lo } } -template class Allocator> Long -ParticleContainer::TotalNumberOfParticles (bool only_valid, bool only_local) const +ParticleContainer_impl::TotalNumberOfParticles (bool only_valid, bool only_local) const { Long nparticles = 0; for (int lev = 0; lev <= finestLevel(); lev++) { @@ -375,10 +375,10 @@ ParticleContainer::To return nparticles; } -template class Allocator> Vector -ParticleContainer::NumberOfParticlesInGrid (int lev, bool only_valid, bool only_local) const +ParticleContainer_impl::NumberOfParticlesInGrid (int lev, bool only_valid, bool only_local) const { AMREX_ASSERT(lev >= 0 && lev < int(m_particles.size())); @@ -432,10 +432,10 @@ ParticleContainer::Nu return nparticles; } -template class Allocator> Long -ParticleContainer::NumberOfParticlesAtLevel (int lev, bool only_valid, bool only_local) const +ParticleContainer_impl::NumberOfParticlesAtLevel (int lev, bool only_valid, bool only_local) const { Long nparticles = 0; @@ -477,10 +477,10 @@ ParticleContainer::Nu // This includes both valid and invalid particles since invalid particles still take up space. // -template class Allocator> void -ParticleContainer::ByteSpread () const +ParticleContainer_impl::ByteSpread () const { Long cnt = 0; @@ -516,10 +516,10 @@ ParticleContainer::By #endif } -template class Allocator> void -ParticleContainer::PrintCapacity () const +ParticleContainer_impl::PrintCapacity () const { Long cnt = 0; @@ -554,10 +554,10 @@ ParticleContainer::Pr #endif } -template class Allocator> void -ParticleContainer::ShrinkToFit () +ParticleContainer_impl::ShrinkToFit () { for (unsigned lev = 0; lev < m_particles.size(); lev++) { auto& pmap = m_particles[lev]; @@ -574,10 +574,10 @@ ParticleContainer::Sh */ -template class Allocator> void -ParticleContainer::Increment (MultiFab& mf, int lev) +ParticleContainer_impl::Increment (MultiFab& mf, int lev) { BL_PROFILE("ParticleContainer::Increment"); @@ -599,20 +599,20 @@ ParticleContainer::In }, false); } -template class Allocator> Long -ParticleContainer::IncrementWithTotal (MultiFab& mf, int lev, bool local) +ParticleContainer_impl::IncrementWithTotal (MultiFab& mf, int lev, bool local) { BL_PROFILE("ParticleContainer::IncrementWithTotal(lev)"); Increment(mf, lev); return TotalNumberOfParticles(true, local); } -template class Allocator> void -ParticleContainer::RemoveParticlesAtLevel (int level) +ParticleContainer_impl::RemoveParticlesAtLevel (int level) { BL_PROFILE("ParticleContainer::RemoveParticlesAtLevel()"); if (level >= int(this->m_particles.size())) return; @@ -623,10 +623,10 @@ ParticleContainer::Re } } -template class Allocator> void -ParticleContainer::RemoveParticlesNotAtFinestLevel () +ParticleContainer_impl::RemoveParticlesNotAtFinestLevel () { BL_PROFILE("ParticleContainer::RemoveParticlesNotAtFinestLevel()"); AMREX_ASSERT(this->finestLevel()+1 == int(this->m_particles.size())); @@ -689,10 +689,10 @@ struct TransformerVirt }; -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CreateVirtualParticles (int level, AoS& virts) const { ParticleTileType ptile; @@ -700,10 +700,10 @@ ParticleContainer ptile.GetArrayOfStructs().swap(virts); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CreateVirtualParticles (int level, ParticleTileType& virts) const { BL_PROFILE("ParticleContainer::CreateVirtualParticles()"); @@ -931,10 +931,10 @@ struct TransformerGhost } }; -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CreateGhostParticles (int level, int nGrow, AoS& ghosts) const { ParticleTileType ptile; @@ -942,10 +942,10 @@ ParticleContainer ptile.GetArrayOfStructs().swap(ghosts); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CreateGhostParticles (int level, int nGrow, ParticleTileType& ghosts) const { BL_PROFILE("ParticleContainer::CreateGhostParticles()"); @@ -976,10 +976,10 @@ ParticleContainer Gpu::streamSynchronize(); } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: clearParticles () { BL_PROFILE("ParticleContainer::clearParticles()"); @@ -991,35 +991,35 @@ clearParticles () } } -template class Allocator> template ::value, int> foo> void -ParticleContainer:: +ParticleContainer_impl:: copyParticles (const PCType& other, bool local) { using PData = ConstParticleTileData; copyParticles(other, [=] AMREX_GPU_HOST_DEVICE (const PData& /*data*/, int /*i*/) { return 1; }, local); } -template class Allocator> template ::value, int> foo> void -ParticleContainer:: +ParticleContainer_impl:: addParticles (const PCType& other, bool local) { using PData = ConstParticleTileData; addParticles(other, [=] AMREX_GPU_HOST_DEVICE (const PData& /*data*/, int /*i*/) { return 1; }, local); } -template class Allocator> template ::value, int> foo, std::enable_if_t::value, int> bar> void -ParticleContainer:: +ParticleContainer_impl:: copyParticles (const PCType& other, F&& f, bool local) { BL_PROFILE("ParticleContainer::copyParticles"); @@ -1027,13 +1027,13 @@ copyParticles (const PCType& other, F&& f, bool local) addParticles(other, std::forward(f), local); } -template class Allocator> template ::value, int> foo, std::enable_if_t::value, int> bar> void -ParticleContainer:: +ParticleContainer_impl:: addParticles (const PCType& other, F&& f, bool local) { BL_PROFILE("ParticleContainer::addParticles"); @@ -1066,10 +1066,10 @@ addParticles (const PCType& other, F&& f, bool local) // // This redistributes valid particles and discards invalid ones. // -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::Redistribute (int lev_min, int lev_max, int nGrow, int local, bool remove_negative) { #ifdef AMREX_USE_GPU @@ -1086,18 +1086,18 @@ ParticleContainer #endif } -template class Allocator> void -ParticleContainer::SortParticlesByCell () +ParticleContainer_impl::SortParticlesByCell () { SortParticlesByBin(IntVect(AMREX_D_DECL(1, 1, 1))); } -template class Allocator> void -ParticleContainer::SortParticlesByBin (IntVect bin_size) +ParticleContainer_impl::SortParticlesByBin (IntVect bin_size) { BL_PROFILE("ParticleContainer::SortParticlesByBin()"); @@ -1180,10 +1180,10 @@ ParticleContainer::So // // The GPU implementation of Redistribute // -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::RedistributeGPU (int lev_min, int lev_max, int nGrow, int local, bool remove_negative) { #ifdef AMREX_USE_GPU @@ -1371,10 +1371,10 @@ ParticleContainer // // The CPU implementation of Redistribute // -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::RedistributeCPU (int lev_min, int lev_max, int nGrow, int local, bool remove_negative) { BL_PROFILE("ParticleContainer::RedistributeCPU()"); @@ -1703,10 +1703,10 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: RedistributeMPI (std::map >& not_ours, int lev_min, int lev_max, int nGrow, int local) { @@ -1998,10 +1998,10 @@ RedistributeMPI (std::map >& not_ours, #endif } -template class Allocator> bool -ParticleContainer::OK (int lev_min, int lev_max, int nGrow) const +ParticleContainer_impl::OK (int lev_min, int lev_max, int nGrow) const { BL_PROFILE("ParticleContainer::OK()"); @@ -2011,10 +2011,10 @@ ParticleContainer::OK return (numParticlesOutOfRange(*this, lev_min, lev_max, nGrow) == 0); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::AddParticlesAtLevel (AoS& particles, int level, int nGrow) { ParticleTileType ptile; @@ -2022,10 +2022,10 @@ ParticleContainer AddParticlesAtLevel(ptile, level, nGrow); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::AddParticlesAtLevel (ParticleTileType& particles, int level, int nGrow) { BL_PROFILE("ParticleContainer::AddParticlesAtLevel()"); @@ -2057,10 +2057,10 @@ ParticleContainer } // This is the single-level version for cell-centered density -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: AssignCellDensitySingleLevel (int rho_index, MultiFab& mf_to_be_filled, int lev, @@ -2195,10 +2195,10 @@ AssignCellDensitySingleLevel (int rho_index, } } -template class Allocator> void -ParticleContainer::Interpolate (Vector >& mesh_data, +ParticleContainer_impl::Interpolate (Vector >& mesh_data, int lev_min, int lev_max) { BL_PROFILE("ParticleContainer::Interpolate()"); @@ -2207,10 +2207,10 @@ ParticleContainer::In } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InterpolateSingleLevel (MultiFab& mesh_data, int lev) { BL_PROFILE("ParticleContainer::InterpolateSingleLevel()"); diff --git a/Src/Particle/AMReX_ParticleIO.H b/Src/Particle/AMReX_ParticleIO.H index cedbdb329c4..733ec8c3671 100644 --- a/Src/Particle/AMReX_ParticleIO.H +++ b/Src/Particle/AMReX_ParticleIO.H @@ -4,10 +4,10 @@ #include -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WriteParticleRealData (void* data, size_t size, std::ostream& os) const { if (sizeof(typename ParticleType::RealType) == 4) { @@ -18,10 +18,10 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::ReadParticleRealData (void* data, size_t size, std::istream& is) { if (sizeof(typename ParticleType::RealType) == 4) { @@ -32,10 +32,10 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::Checkpoint (const std::string& dir, const std::string& name, bool /*is_checkpoint*/, const Vector& real_comp_names, @@ -83,10 +83,10 @@ ParticleContainer }, true); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name) const { Vector write_real_comp; @@ -117,10 +117,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names, const Vector& int_comp_names) const @@ -143,10 +143,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names) const { @@ -175,10 +175,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& write_real_comp, @@ -211,10 +211,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: WritePlotFile (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -232,11 +232,11 @@ WritePlotFile (const std::string& dir, const std::string& name, }); } -template class Allocator> template &>::value>::type*> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, F&& f) const { Vector write_real_comp; @@ -264,11 +264,11 @@ ParticleContainer std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names, const Vector& int_comp_names, F&& f) const @@ -288,11 +288,11 @@ ParticleContainer std::forward(f)); } -template class Allocator> template >::value>::type*> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names, F&& f) const { @@ -318,11 +318,11 @@ ParticleContainer std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& write_real_comp, @@ -352,11 +352,11 @@ ParticleContainer std::forward(f)); } -template class Allocator> template void -ParticleContainer:: +ParticleContainer_impl:: WritePlotFile (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -372,11 +372,11 @@ WritePlotFile (const std::string& dir, const std::string& name, std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WriteBinaryParticleData (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -397,10 +397,10 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointPre () { if( ! usePrePost) { @@ -454,10 +454,10 @@ ParticleContainer } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointPost () { if( ! usePrePost) { @@ -510,30 +510,30 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFilePre () { CheckpointPre(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFilePost () { CheckpointPost(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WriteParticles (int lev, std::ofstream& ofs, int fnum, Vector& which, Vector& count, Vector& where, const Vector& write_real_comp, @@ -585,19 +585,19 @@ ParticleContainer } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::Restart (const std::string& dir, const std::string& file, bool /*is_checkpoint*/) { Restart(dir, file); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::Restart (const std::string& dir, const std::string& file) { BL_PROFILE("ParticleContainer::Restart()"); @@ -880,11 +880,11 @@ ParticleContainer } // Read a batch of particles from the checkpoint file -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::ReadParticles (int cnt, int grd, int lev, std::ifstream& ifs, int finest_level_in_file, bool convert_ids) { @@ -1017,10 +1017,10 @@ ParticleContainer Gpu::streamSynchronize(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WriteAsciiFile (const std::string& filename) { BL_PROFILE("ParticleContainer::WriteAsciiFile()"); diff --git a/Src/Particle/AMReX_ParticleInit.H b/Src/Particle/AMReX_ParticleInit.H index ee8afc778e2..6d1b39e678a 100644 --- a/Src/Particle/AMReX_ParticleInit.H +++ b/Src/Particle/AMReX_ParticleInit.H @@ -30,10 +30,10 @@ across the domain so that you only need to specify a sub-volume of them. By default particles are not replicated. */ -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::InitFromAsciiFile (const std::string& file, int extradata, const IntVect* Nrep) { BL_PROFILE("ParticleContainer::InitFromAsciiFile()"); @@ -233,7 +233,7 @@ ParticleContainer if (m_verbose) { amrex::AllPrint() << "BAD REPLICATED PARTICLE ID WOULD BE " << ParticleType::NextID() << "\n"; } - amrex::Abort("ParticleContainer::InitFromAsciiFile(): invalid replicated particle"); + amrex::Abort("ParticleContainer_impl::InitFromAsciiFile(): invalid replicated particle"); } } @@ -470,10 +470,10 @@ ParticleContainer // Note that there is nothing separating all these values. // They're packed into the binary file like sardines. // -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InitFromBinaryFile (const std::string& file, int extradata) { @@ -614,22 +614,22 @@ InitFromBinaryFile (const std::string& file, // NP MUST be positive! // if (NP <= 0) - amrex::Abort("ParticleContainer::InitFromBinaryFile(): NP <= 0"); + amrex::Abort("ParticleContainer_impl::InitFromBinaryFile(): NP <= 0"); // // DM must equal AMREX_SPACEDIM. // if (DM != AMREX_SPACEDIM) - amrex::Abort("ParticleContainer::InitFromBinaryFile(): DM != AMREX_SPACEDIM"); + amrex::Abort("ParticleContainer_impl::InitFromBinaryFile(): DM != AMREX_SPACEDIM"); // // NX MUST be in [0,N]. // if (NX < 0 || NX > NStructReal) - amrex::Abort("ParticleContainer::InitFromBinaryFile(): NX < 0 || NX > N"); + amrex::Abort("ParticleContainer_impl::InitFromBinaryFile(): NX < 0 || NX > N"); // // Can't ask for more data than exists in the file! // if (extradata > NX) - amrex::Abort("ParticleContainer::InitFromBinaryFile(): extradata > NX"); + amrex::Abort("ParticleContainer_impl::InitFromBinaryFile(): extradata > NX"); // // Figure out whether we're dealing with floats or doubles. // @@ -813,7 +813,7 @@ InitFromBinaryFile (const std::string& file, << p.pos(2)) << "\n"; } - amrex::Abort("ParticleContainer::InitFromBinaryFile(): invalid particle"); + amrex::Abort("ParticleContainer_impl::InitFromBinaryFile(): invalid particle"); } } @@ -905,10 +905,10 @@ InitFromBinaryFile (const std::string& file, // one file name per line. // -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InitFromBinaryMetaFile (const std::string& metafile, int extradata) { @@ -943,10 +943,10 @@ InitFromBinaryMetaFile (const std::string& metafile, } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InitRandom (Long icount, ULong iseed, const ParticleInitData& pdata, @@ -1170,7 +1170,7 @@ InitRandom (Long icount, // locate the particle if (!Where(p, pld)) { - amrex::Abort("ParticleContainer::InitRandom(): invalid particle"); + amrex::Abort("ParticleContainer_impl::InitRandom(): invalid particle"); } AMREX_ASSERT(pld.m_lev >= 0 && pld.m_lev <= finestLevel()); std::pair ind(pld.m_grid, pld.m_tile); @@ -1231,16 +1231,16 @@ InitRandom (Long icount, ParallelDescriptor::ReduceRealMax(stoptime,IOProc); - amrex::Print() << "ParticleContainer::InitRandom() time: " << stoptime << '\n'; + amrex::Print() << "ParticleContainer_impl::InitRandom() time: " << stoptime << '\n'; } Gpu::streamSynchronize(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::InitRandomPerBox (Long icount_per_box, ULong iseed, const ParticleInitData& pdata) @@ -1331,14 +1331,14 @@ ParticleContainer ParallelDescriptor::ReduceRealMax(stoptime,IOProc); - amrex::Print() << "ParticleContainer::InitRandomPerBox() time: " << stoptime << '\n'; + amrex::Print() << "ParticleContainer_impl::InitRandomPerBox() time: " << stoptime << '\n'; } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InitOnePerCell (Real x_off, Real y_off, Real z_off, const ParticleInitData& pdata) { amrex::ignore_unused(y_off,z_off); @@ -1415,14 +1415,14 @@ InitOnePerCell (Real x_off, Real y_off, Real z_off, const ParticleInitData& pdat ParallelDescriptor::ReduceRealMax(stoptime,IOProc); - amrex::Print() << "ParticleContainer::InitOnePerCell() time: " << stoptime << '\n'; + amrex::Print() << "ParticleContainer_impl::InitOnePerCell() time: " << stoptime << '\n'; } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InitNRandomPerCell (int n_per_cell, const ParticleInitData& pdata) { BL_PROFILE("ParticleContainer::InitNRandomPerCell()"); @@ -1495,7 +1495,7 @@ InitNRandomPerCell (int n_per_cell, const ParticleInitData& pdata) // locate the particle if (!Where(p, pld)) { - amrex::Abort("ParticleContainer::InitNRandomPerCell(): invalid particle"); + amrex::Abort("ParticleContainer_impl::InitNRandomPerCell(): invalid particle"); } AMREX_ASSERT(pld.m_lev >= 0 && pld.m_lev <= finestLevel()); std::pair ind(pld.m_grid, pld.m_tile); @@ -1556,7 +1556,7 @@ InitNRandomPerCell (int n_per_cell, const ParticleInitData& pdata) ParallelDescriptor::ReduceRealMax(stoptime,IOProc); - amrex::Print() << "ParticleContainer::InitNRandomPerCell() time: " << stoptime << '\n'; + amrex::Print() << "ParticleContainer_impl::InitNRandomPerCell() time: " << stoptime << '\n'; } Gpu::streamSynchronize(); From 95357fa354c5aeb9bcda281a9acc5f4587bfe19e Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Thu, 23 Jun 2022 12:30:02 -0700 Subject: [PATCH 002/111] do not redefine default arguments --- Src/Particle/AMReX_ParticleContainer.H | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index 1b394071af2..a3c24bc5f62 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -1363,8 +1363,7 @@ private: Vector m_particles; }; -template class Allocator=DefaultAllocator> +template class Allocator> using ParticleContainer = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; #include "AMReX_ParticleInit.H" From 9900521d576432a7dc3abfb8fe66c70527e246b7 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Fri, 15 Jul 2022 12:17:08 -0700 Subject: [PATCH 003/111] Sprint: Add New Tile Interfaces --- Src/Particle/AMReX_Particle.H | 7 ++ Src/Particle/AMReX_ParticleContainer.H | 3 + Src/Particle/AMReX_ParticleTile.H | 114 +++++++++++++++++++++ Tests/Particles/SOAParticle/CMakeLists.txt | 8 ++ Tests/Particles/SOAParticle/main.cpp | 79 ++++++++++++++ 5 files changed, 211 insertions(+) create mode 100644 Tests/Particles/SOAParticle/CMakeLists.txt create mode 100644 Tests/Particles/SOAParticle/main.cpp diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index 0548886010c..39eccaba3ee 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -187,6 +187,11 @@ struct ParticleBase uint64_t m_idcpu = 0; }; +struct SoAParticleBase +{ + static constexpr int NReal = 0; + static constexpr int NInt = 0; +}; /** \brief The struct used to store particles. * @@ -197,6 +202,8 @@ template struct Particle : ParticleBase { + static constexpr bool is_soa_particle = false; + //! \brief number of extra Real components in the particle struct static constexpr int NReal = T_NReal; diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index a3c24bc5f62..6c6a64eb275 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -1366,6 +1366,9 @@ private: template class Allocator> using ParticleContainer = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; +template class Allocator=DefaultAllocator> +using ParticleContainerPureSoA = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; + #include "AMReX_ParticleInit.H" #include "AMReX_ParticleContainerI.H" #include "AMReX_ParticleIO.H" diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 4374197d335..8f74f0fe398 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include @@ -160,6 +162,60 @@ struct ParticleTileData } }; +// SOA Particle Structure +template +struct SoAParticle : SoAParticleBase +{ + + static constexpr bool is_soa_particle = true; + + using RealType = ParticleReal; + + SoAParticle(ParticleTileData<0, 0, NArrayReal, NArrayInt> *const ptd, int const index) + { + m_particle_tile_data=ptd; + m_index=index; + } + + //functions to get id and cpu in the SOA data + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + ParticleCPUWrapper cpu () & { return ParticleCPUWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + ParticleIDWrapper id () & { return ParticleIDWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + ConstParticleIDWrapper id () const & { return ConstParticleIDWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } + + //functions to get positions of the particle in the SOA data + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_particle_tile_data.m_rdata[0][m_index], this->m_particle_tile_data.m_rdata[1][m_index], this->m_particle_tile_data.m_rdata[2][m_index]));} + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RealType& pos (int position_index) & + { + AMREX_ASSERT(position_index < AMREX_SPACEDIM); + return this->m_particle_tile_data.m_rdata[position_index][m_index]; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RealType pos (int position_index) const & + { + AMREX_ASSERT(position_index < AMREX_SPACEDIM); + return this->m_particle_tile_data.m_rdata[position_index][m_index]; + } + + private : + + ParticleTileData<0, 0, NReal, NInt>* m_particle_tile_data; + int m_index; +}; + template struct ConstParticleTileData { @@ -255,6 +311,7 @@ struct ParticleTile using ParticleType = Particle; static constexpr int NAR = NArrayReal; static constexpr int NAI = NArrayInt; + using RealType = typename ParticleType::RealType; using SuperParticleType = Particle; @@ -282,6 +339,63 @@ struct ParticleTile m_runtime_i_cptrs.resize(a_num_runtime_int); } + template ::type = 0> + ParticleCPUWrapper cpu (int index) & { + ParticleType p(this->getParticleTileData(), index); + return p.cpu(); + } + + template ::type = 0> + ParticleCPUWrapper cpu (int index) & { + // ... todo for AoS + } + +/* TODO: + ParticleIDWrapper id (int index) & { ... } + + ConstParticleCPUWrapper cpu (int index) const & { .. } + + ConstParticleIDWrapper id (int index) const & { ... } +*/ + + template ::type = 0> + RealVect pos (int index) & { + ParticleType p(this->getParticleTileData(), index); + return p.pos(); + } + + template ::type = 0> + RealVect pos (int index) & { + // ... todo for AoS + } + + template ::type = 0> + RealType& pos (int index, int position_index) & + { + ParticleType p(this->getParticleTileData(), index); + return p.pos(position_index); + } + + template ::type = 0> + RealType& pos (int index, int position_index) & + { + // ... todo for AoS + } + + template ::type = 0> + RealType pos (int index, int position_index) const & + { + ParticleType p(this->getParticleTileData(), index); + return p.pos(position_index); + } + + template ::type = 0> + RealType pos (int index, int position_index) const & + { + // ... todo for AoS + } + + AoS& GetArrayOfStructs () { return m_aos_tile; } const AoS& GetArrayOfStructs () const { return m_aos_tile; } diff --git a/Tests/Particles/SOAParticle/CMakeLists.txt b/Tests/Particles/SOAParticle/CMakeLists.txt new file mode 100644 index 00000000000..26eec60d2d0 --- /dev/null +++ b/Tests/Particles/SOAParticle/CMakeLists.txt @@ -0,0 +1,8 @@ +set(_sources main.cpp) +#set(_input_files) +#set(_input_files inputs) + +setup_test(_sources _input_files NTHREADS 2) + +unset(_sources) +unset(_input_files) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp new file mode 100644 index 00000000000..d58c867e9ea --- /dev/null +++ b/Tests/Particles/SOAParticle/main.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include + +using namespace amrex; + +template +void addParticles () +{ + T_PC pc; + using ParticleType = typename T_PC::ParticleType; + + const int add_num_particles = 5; + + auto& ptile1 = pc.DefineAndReturnParticleTile(0, 0, 0); + //{ + // ParticleType p(...); + // p.pos(0) = 12.0; + // p.pos(1) = 12.0; + // p.pos(2) = 12.0; + // ptile1.push_back(p); + //} + + //DefineAndReturnParticleTile(0,0,0); + + for (int i = 0; i < add_num_particles; ++i) + { + ptile1.pos(i, 0) = 12.0; + ptile1.pos(i, 1) = 12.0; + ptile1.pos(i, 2) = 12.0; + + // TODO + //ptile1.id(i) = ParticleType::NextID(); + ptile1.cpu(i) = amrex::ParallelDescriptor::MyProc(); + } + //ptile1.push_back_int(3, ...std::vector); + //ptile1.push_back_int(4, ...std::vector); + + + pc.Redistribute(); +} + + +int main(int argc, char* argv[]) + { + amrex::Initialize(argc,argv); + { + // for (int n = 0; n < BL_SPACEDIM; n++) + // { + // real_box.setLo(n, 0.0); + // real_box.setHi(n, params.size[n]); + // } + + // IntVect domain_lo(AMREX_D_DECL(0, 0, 0)); + // IntVect domain_hi(AMREX_D_DECL(params.size[0]-1,params.size[1]-1,params.size[2]-1)); + // const Box domain(domain_lo, domain_hi); + + // int coord = 0; + // int is_per[BL_SPACEDIM]; + // for (int i = 0; i < BL_SPACEDIM; i++) + // is_per[i] = params.is_periodic; + // Geometry geom(domain, &real_box, coord, is_per); + + // BoxArray ba(domain); + // ba.maxSize(params.max_grid_size); + // DistributionMapping dm(ba); + + // const int ncells = 1; + + //addParticles< ParticleContainer<1,2,3,4> > (); + addParticles< ParticleContainerPureSoA<3,4> > (); + } + amrex::Finalize(); + } + + + + From 27cf58d2d01978fd1361ee256c61a1261bb5439e Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Tue, 19 Jul 2022 15:44:58 -0700 Subject: [PATCH 004/111] Last functions added to the Soa Particle struct --- Src/Particle/AMReX_ParticleTile.H | 75 ++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 8f74f0fe398..fa8b5ad5b79 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -339,49 +339,76 @@ struct ParticleTile m_runtime_i_cptrs.resize(a_num_runtime_int); } + // Get cpu data + + // SoA template ::type = 0> ParticleCPUWrapper cpu (int index) & { ParticleType p(this->getParticleTileData(), index); return p.cpu(); } + // const + template ::type = 0> + ConstParticleCPUWrapper cpu (int index) const & { + ParticleType p(this->getParticleTileData(), index); + return p.cpu(); + } + + // AoS template ::type = 0> - ParticleCPUWrapper cpu (int index) & { - // ... todo for AoS + ParticleCPUWrapper cpu () & { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.cpu(); } -/* TODO: - ParticleIDWrapper id (int index) & { ... } + // const - ConstParticleCPUWrapper cpu (int index) const & { .. } + template ::type = 0> + ConstParticleCPUWrapper cpu () const & { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.cpu(); + } + + // Get id data - ConstParticleIDWrapper id (int index) const & { ... } -*/ + // SoA + template ::type = 0> + ParticleIDWrapper id (int index) & { + ParticleType p(this->getParticleTileData(), index); + return p.id(); + } + // const template ::type = 0> - RealVect pos (int index) & { + ConstParticleIDWrapper id (int index) const & { ParticleType p(this->getParticleTileData(), index); - return p.pos(); + return p.id(); } + // AoS template ::type = 0> - RealVect pos (int index) & { - // ... todo for AoS + ParticleIDWrapper id (int index) & { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.id(); } + // const + template ::type = 0> + ConstParticleIDWrapper id (int index) const & { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.id(); + } + + + // Get positions data + template ::type = 0> - RealType& pos (int index, int position_index) & - { + RealType& pos (int index, int position_index) & { ParticleType p(this->getParticleTileData(), index); return p.pos(position_index); } - template ::type = 0> - RealType& pos (int index, int position_index) & - { - // ... todo for AoS - } - template ::type = 0> RealType pos (int index, int position_index) const & { @@ -389,10 +416,18 @@ struct ParticleTile return p.pos(position_index); } + template ::type = 0> + RealType& pos (int index, int position_index) & { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.pos(position_index); + } + + template ::type = 0> RealType pos (int index, int position_index) const & { - // ... todo for AoS + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.pos(position_index); } From b750561380468c350303606e0dc80e7d61160c78 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 1 Aug 2022 16:03:18 -0700 Subject: [PATCH 005/111] main file --- Tests/Particles/SOAParticle/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index d58c867e9ea..d0071b8d3ab 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -31,8 +31,8 @@ void addParticles () ptile1.pos(i, 2) = 12.0; // TODO - //ptile1.id(i) = ParticleType::NextID(); - ptile1.cpu(i) = amrex::ParallelDescriptor::MyProc(); + ptile1.id(i) = 1; + //ptile1.cpu(i) = 1; } //ptile1.push_back_int(3, ...std::vector); //ptile1.push_back_int(4, ...std::vector); From 0fc0c4a09b070c69fc100b4479a028fd7e3ad26f Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Tue, 2 Aug 2022 18:13:42 -0700 Subject: [PATCH 006/111] Adding particle type corrections --- Src/Particle/AMReX_ArrayOfStructs.H | 10 ++--- Src/Particle/AMReX_ParticleContainer.H | 2 +- Src/Particle/AMReX_ParticleIO.H | 2 +- Src/Particle/AMReX_ParticleInit.H | 2 +- Src/Particle/AMReX_ParticleTile.H | 44 +++++++++++--------- Src/Particle/AMReX_WriteBinaryParticleData.H | 2 +- 6 files changed, 33 insertions(+), 29 deletions(-) diff --git a/Src/Particle/AMReX_ArrayOfStructs.H b/Src/Particle/AMReX_ArrayOfStructs.H index 25339dee444..48f99dc1e13 100644 --- a/Src/Particle/AMReX_ArrayOfStructs.H +++ b/Src/Particle/AMReX_ArrayOfStructs.H @@ -8,11 +8,11 @@ namespace amrex { -template class Allocator=DefaultAllocator> class ArrayOfStructs { public: - using ParticleType = Particle; + using ParticleType = T_ParticleType; using RealType = typename ParticleType::RealType; using ParticleVector = amrex::PODVector >; @@ -89,7 +89,7 @@ public: const ParticleType& operator[] (int i) const { return m_data[i]; } ParticleType & operator[] (int i) { return m_data[i]; } - void swap (ArrayOfStructs& other) + void swap (ArrayOfStructs& other) { m_data.swap(other.m_data); } @@ -116,9 +116,9 @@ private: }; #if __cplusplus < 201703L -template class Allocator> -constexpr int ArrayOfStructs::SizeInReal; +constexpr int ArrayOfStructs::SizeInReal; #endif } // namespace amrex diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index 6c6a64eb275..c06050785a9 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -168,7 +168,7 @@ public: #endif using ParticleContainerType = ParticleContainer; - using ParticleTileType = ParticleTile; + using ParticleTileType = ParticleTile; using ParticleInitData = ParticleInitType; //! A single level worth of particles is indexed (grid id, tile id) diff --git a/Src/Particle/AMReX_ParticleIO.H b/Src/Particle/AMReX_ParticleIO.H index 733ec8c3671..d08ca38aeb5 100644 --- a/Src/Particle/AMReX_ParticleIO.H +++ b/Src/Particle/AMReX_ParticleIO.H @@ -1108,7 +1108,7 @@ ParticleContainer_impl for (int lev = 0; lev < m_particles.size(); lev++) { auto& pmap = m_particles[lev]; for (const auto& kv : pmap) { - ParticleTile pinned_ptile; pinned_ptile.define(NumRuntimeRealComps(), NumRuntimeIntComps()); pinned_ptile.resize(kv.second.numParticles()); diff --git a/Src/Particle/AMReX_ParticleInit.H b/Src/Particle/AMReX_ParticleInit.H index 6d1b39e678a..7aa2141c0b0 100644 --- a/Src/Particle/AMReX_ParticleInit.H +++ b/Src/Particle/AMReX_ParticleInit.H @@ -1365,7 +1365,7 @@ InitOnePerCell (Real x_off, Real y_off, Real z_off, const ParticleInitData& pdat Box grid = ParticleBoxArray(0)[mfi.index()]; auto ind = std::make_pair(mfi.index(), mfi.LocalTileIndex()); RealBox grid_box (grid,dx,geom.ProbLo()); - ParticleTile ptile_tmp; + ParticleTile ptile_tmp; for (IntVect beg = grid.smallEnd(), end=grid.bigEnd(), cell = grid.smallEnd(); cell <= end; grid.next(cell)) { // the real struct data diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index fa8b5ad5b79..a62048b856a 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -301,21 +301,24 @@ struct ConstParticleTileData } }; -template class Allocator=DefaultAllocator> struct ParticleTile { template using AllocatorType = Allocator; - using ParticleType = Particle; + using ParticleType = T_ParticleType; static constexpr int NAR = NArrayReal; static constexpr int NAI = NArrayInt; using RealType = typename ParticleType::RealType; + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; + using SuperParticleType = Particle; - using AoS = ArrayOfStructs; + using AoS = ArrayOfStructs; using ParticleVector = typename AoS::ParticleVector; using SoA = StructOfArrays; @@ -341,22 +344,22 @@ struct ParticleTile // Get cpu data - // SoA - template ::type = 0> + // AoS + template ::type = 0> ParticleCPUWrapper cpu (int index) & { ParticleType p(this->getParticleTileData(), index); return p.cpu(); } // const - template ::type = 0> + template ::type = 0> ConstParticleCPUWrapper cpu (int index) const & { ParticleType p(this->getParticleTileData(), index); return p.cpu(); } - // AoS - template ::type = 0> + // SoA + template ::type = 0> ParticleCPUWrapper cpu () & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.cpu(); @@ -364,7 +367,7 @@ struct ParticleTile // const - template ::type = 0> + template ::type = 0> ConstParticleCPUWrapper cpu () const & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.cpu(); @@ -372,29 +375,29 @@ struct ParticleTile // Get id data - // SoA - template ::type = 0> + // AoS + template ::type = 0> ParticleIDWrapper id (int index) & { ParticleType p(this->getParticleTileData(), index); return p.id(); } // const - template ::type = 0> + template ::type = 0> ConstParticleIDWrapper id (int index) const & { ParticleType p(this->getParticleTileData(), index); return p.id(); } - // AoS - template ::type = 0> + // SoA + template ::type = 0> ParticleIDWrapper id (int index) & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.id(); } // const - template ::type = 0> + template ::type = 0> ConstParticleIDWrapper id (int index) const & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.id(); @@ -403,27 +406,27 @@ struct ParticleTile // Get positions data - template ::type = 0> + template ::type = 0> RealType& pos (int index, int position_index) & { ParticleType p(this->getParticleTileData(), index); return p.pos(position_index); } - template ::type = 0> + template ::type = 0> RealType pos (int index, int position_index) const & { ParticleType p(this->getParticleTileData(), index); return p.pos(position_index); } - template ::type = 0> + template ::type = 0> RealType& pos (int index, int position_index) & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.pos(position_index); } - template ::type = 0> + template ::type = 0> RealType pos (int index, int position_index) const & { ParticleType& p = m_aos_tile().dataPtr()[index]; @@ -668,7 +671,7 @@ struct ParticleTile return nbytes; } - void swap (ParticleTile& other) + void swap (ParticleTile& other) { m_aos_tile().swap(other.GetArrayOfStructs()()); for (int j = 0; j < NumRealComps(); ++j) @@ -811,3 +814,4 @@ private: } // namespace amrex; #endif // AMREX_PARTICLETILE_H_ + diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index 5a71d015763..6c9494f88c5 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -738,7 +738,7 @@ void WriteBinaryParticleDataAsync (PC const& pc, } // make tmp particle tiles in pinned memory to write - using PinnedPTile = ParticleTile; auto myptiles = std::make_shared,PinnedPTile> > >(); myptiles->resize(pc.finestLevel()+1); From 56d6297c897dce200eaff8c77dbdd6c3a1c9259e Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Tue, 2 Aug 2022 18:17:54 -0700 Subject: [PATCH 007/111] Adding particle type corrections --- Src/Particle/AMReX_ArrayOfStructs.H | 10 ++--- Src/Particle/AMReX_ParticleContainer.H | 2 +- Src/Particle/AMReX_ParticleIO.H | 2 +- Src/Particle/AMReX_ParticleInit.H | 2 +- Src/Particle/AMReX_ParticleTile.H | 44 +++++++++++--------- Src/Particle/AMReX_WriteBinaryParticleData.H | 2 +- 6 files changed, 33 insertions(+), 29 deletions(-) diff --git a/Src/Particle/AMReX_ArrayOfStructs.H b/Src/Particle/AMReX_ArrayOfStructs.H index 25339dee444..48f99dc1e13 100644 --- a/Src/Particle/AMReX_ArrayOfStructs.H +++ b/Src/Particle/AMReX_ArrayOfStructs.H @@ -8,11 +8,11 @@ namespace amrex { -template class Allocator=DefaultAllocator> class ArrayOfStructs { public: - using ParticleType = Particle; + using ParticleType = T_ParticleType; using RealType = typename ParticleType::RealType; using ParticleVector = amrex::PODVector >; @@ -89,7 +89,7 @@ public: const ParticleType& operator[] (int i) const { return m_data[i]; } ParticleType & operator[] (int i) { return m_data[i]; } - void swap (ArrayOfStructs& other) + void swap (ArrayOfStructs& other) { m_data.swap(other.m_data); } @@ -116,9 +116,9 @@ private: }; #if __cplusplus < 201703L -template class Allocator> -constexpr int ArrayOfStructs::SizeInReal; +constexpr int ArrayOfStructs::SizeInReal; #endif } // namespace amrex diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index 6c6a64eb275..c06050785a9 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -168,7 +168,7 @@ public: #endif using ParticleContainerType = ParticleContainer; - using ParticleTileType = ParticleTile; + using ParticleTileType = ParticleTile; using ParticleInitData = ParticleInitType; //! A single level worth of particles is indexed (grid id, tile id) diff --git a/Src/Particle/AMReX_ParticleIO.H b/Src/Particle/AMReX_ParticleIO.H index 733ec8c3671..d08ca38aeb5 100644 --- a/Src/Particle/AMReX_ParticleIO.H +++ b/Src/Particle/AMReX_ParticleIO.H @@ -1108,7 +1108,7 @@ ParticleContainer_impl for (int lev = 0; lev < m_particles.size(); lev++) { auto& pmap = m_particles[lev]; for (const auto& kv : pmap) { - ParticleTile pinned_ptile; pinned_ptile.define(NumRuntimeRealComps(), NumRuntimeIntComps()); pinned_ptile.resize(kv.second.numParticles()); diff --git a/Src/Particle/AMReX_ParticleInit.H b/Src/Particle/AMReX_ParticleInit.H index 6d1b39e678a..7aa2141c0b0 100644 --- a/Src/Particle/AMReX_ParticleInit.H +++ b/Src/Particle/AMReX_ParticleInit.H @@ -1365,7 +1365,7 @@ InitOnePerCell (Real x_off, Real y_off, Real z_off, const ParticleInitData& pdat Box grid = ParticleBoxArray(0)[mfi.index()]; auto ind = std::make_pair(mfi.index(), mfi.LocalTileIndex()); RealBox grid_box (grid,dx,geom.ProbLo()); - ParticleTile ptile_tmp; + ParticleTile ptile_tmp; for (IntVect beg = grid.smallEnd(), end=grid.bigEnd(), cell = grid.smallEnd(); cell <= end; grid.next(cell)) { // the real struct data diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index fa8b5ad5b79..a62048b856a 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -301,21 +301,24 @@ struct ConstParticleTileData } }; -template class Allocator=DefaultAllocator> struct ParticleTile { template using AllocatorType = Allocator; - using ParticleType = Particle; + using ParticleType = T_ParticleType; static constexpr int NAR = NArrayReal; static constexpr int NAI = NArrayInt; using RealType = typename ParticleType::RealType; + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; + using SuperParticleType = Particle; - using AoS = ArrayOfStructs; + using AoS = ArrayOfStructs; using ParticleVector = typename AoS::ParticleVector; using SoA = StructOfArrays; @@ -341,22 +344,22 @@ struct ParticleTile // Get cpu data - // SoA - template ::type = 0> + // AoS + template ::type = 0> ParticleCPUWrapper cpu (int index) & { ParticleType p(this->getParticleTileData(), index); return p.cpu(); } // const - template ::type = 0> + template ::type = 0> ConstParticleCPUWrapper cpu (int index) const & { ParticleType p(this->getParticleTileData(), index); return p.cpu(); } - // AoS - template ::type = 0> + // SoA + template ::type = 0> ParticleCPUWrapper cpu () & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.cpu(); @@ -364,7 +367,7 @@ struct ParticleTile // const - template ::type = 0> + template ::type = 0> ConstParticleCPUWrapper cpu () const & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.cpu(); @@ -372,29 +375,29 @@ struct ParticleTile // Get id data - // SoA - template ::type = 0> + // AoS + template ::type = 0> ParticleIDWrapper id (int index) & { ParticleType p(this->getParticleTileData(), index); return p.id(); } // const - template ::type = 0> + template ::type = 0> ConstParticleIDWrapper id (int index) const & { ParticleType p(this->getParticleTileData(), index); return p.id(); } - // AoS - template ::type = 0> + // SoA + template ::type = 0> ParticleIDWrapper id (int index) & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.id(); } // const - template ::type = 0> + template ::type = 0> ConstParticleIDWrapper id (int index) const & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.id(); @@ -403,27 +406,27 @@ struct ParticleTile // Get positions data - template ::type = 0> + template ::type = 0> RealType& pos (int index, int position_index) & { ParticleType p(this->getParticleTileData(), index); return p.pos(position_index); } - template ::type = 0> + template ::type = 0> RealType pos (int index, int position_index) const & { ParticleType p(this->getParticleTileData(), index); return p.pos(position_index); } - template ::type = 0> + template ::type = 0> RealType& pos (int index, int position_index) & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.pos(position_index); } - template ::type = 0> + template ::type = 0> RealType pos (int index, int position_index) const & { ParticleType& p = m_aos_tile().dataPtr()[index]; @@ -668,7 +671,7 @@ struct ParticleTile return nbytes; } - void swap (ParticleTile& other) + void swap (ParticleTile& other) { m_aos_tile().swap(other.GetArrayOfStructs()()); for (int j = 0; j < NumRealComps(); ++j) @@ -811,3 +814,4 @@ private: } // namespace amrex; #endif // AMREX_PARTICLETILE_H_ + diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index 5a71d015763..6c9494f88c5 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -738,7 +738,7 @@ void WriteBinaryParticleDataAsync (PC const& pc, } // make tmp particle tiles in pinned memory to write - using PinnedPTile = ParticleTile; auto myptiles = std::make_shared,PinnedPTile> > >(); myptiles->resize(pc.finestLevel()+1); From bb0f1c9249cd0ddbaad7a93020ff8fd4c514ef20 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Wed, 10 Aug 2022 11:08:21 -0700 Subject: [PATCH 008/111] Adding correction to the cpu function --- Src/Particle/AMReX_ParticleTile.H | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index a62048b856a..efb991e9239 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -360,7 +360,7 @@ struct ParticleTile // SoA template ::type = 0> - ParticleCPUWrapper cpu () & { + ParticleCPUWrapper cpu (int index) & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.cpu(); } @@ -368,7 +368,7 @@ struct ParticleTile // const template ::type = 0> - ConstParticleCPUWrapper cpu () const & { + ConstParticleCPUWrapper cpu (int index) const & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.cpu(); } From 1fcfc227b028c9db9094f62b67228af42e0a494d Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Wed, 10 Aug 2022 13:17:18 -0700 Subject: [PATCH 009/111] test git push --- Tests/Particles/SOAParticle/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index d0071b8d3ab..284c984a5ea 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -32,7 +32,7 @@ void addParticles () // TODO ptile1.id(i) = 1; - //ptile1.cpu(i) = 1; + ptile1.cpu(i) = 1; } //ptile1.push_back_int(3, ...std::vector); //ptile1.push_back_int(4, ...std::vector); From eba049fc9d025d70e3a2089c5e122ed5624daff8 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Wed, 10 Aug 2022 13:58:40 -0700 Subject: [PATCH 010/111] Correcting the "!" and switching id/cpu index --- Src/Particle/AMReX_ParticleTile.H | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index eb5ae59941c..7aa3afbd2cb 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -17,7 +17,7 @@ namespace amrex { template struct ParticleTileData { - + static constexpr int NAR = NArrayReal; static constexpr int NAI = NArrayInt; using ParticleType = Particle; @@ -181,16 +181,16 @@ struct SoAParticle : SoAParticleBase //functions to get id and cpu in the SOA data AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleCPUWrapper cpu () & { return ParticleCPUWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } + ParticleCPUWrapper cpu () & { return ParticleCPUWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleIDWrapper id () & { return ParticleIDWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } + ParticleIDWrapper id () & { return ParticleIDWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } + ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleIDWrapper id () const & { return ConstParticleIDWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } + ConstParticleIDWrapper id () const & { return ConstParticleIDWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } //functions to get positions of the particle in the SOA data @@ -377,28 +377,28 @@ struct ParticleTile // Get id data // AoS - template ::type = 0> + template ::type = 0> ParticleIDWrapper id (int index) & { ParticleType p(this->getParticleTileData(), index); return p.id(); } // const - template ::type = 0> + template ::type = 0> ConstParticleIDWrapper id (int index) const & { ParticleType p(this->getParticleTileData(), index); return p.id(); } // SoA - template ::type = 0> + template ::type = 0> ParticleIDWrapper id (int index) & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.id(); } // const - template ::type = 0> + template ::type = 0> ConstParticleIDWrapper id (int index) const & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.id(); @@ -407,27 +407,27 @@ struct ParticleTile // Get positions data - template ::type = 0> + template ::type = 0> RealType& pos (int index, int position_index) & { ParticleType p(this->getParticleTileData(), index); return p.pos(position_index); } - template ::type = 0> + template ::type = 0> RealType pos (int index, int position_index) const & { ParticleType p(this->getParticleTileData(), index); return p.pos(position_index); } - template ::type = 0> + template ::type = 0> RealType& pos (int index, int position_index) & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.pos(position_index); } - template ::type = 0> + template ::type = 0> RealType pos (int index, int position_index) const & { ParticleType& p = m_aos_tile().dataPtr()[index]; From ba3e58ebd3c683272afaab35b467fa1cbeefa7f2 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Tue, 16 Aug 2022 10:02:04 -0700 Subject: [PATCH 011/111] Still correcting Type errors --- Src/Particle/AMReX_ParticleContainerI.H | 4 +- Src/Particle/AMReX_ParticleTile.H | 39 ++++++++++------ Src/Particle/AMReX_ParticleTransformation.H | 18 +++---- Src/Particle/AMReX_ParticleUtil.H | 52 ++++++++++----------- 4 files changed, 61 insertions(+), 52 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 5719f74de24..3c970dac200 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -998,7 +998,7 @@ void ParticleContainer_impl:: copyParticles (const PCType& other, bool local) { - using PData = ConstParticleTileData; + using PData = ConstParticleTileData; copyParticles(other, [=] AMREX_GPU_HOST_DEVICE (const PData& /*data*/, int /*i*/) { return 1; }, local); } @@ -1009,7 +1009,7 @@ void ParticleContainer_impl:: addParticles (const PCType& other, bool local) { - using PData = ConstParticleTileData; + using PData = ConstParticleTileData; addParticles(other, [=] AMREX_GPU_HOST_DEVICE (const PData& /*data*/, int /*i*/) { return 1; }, local); } diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 7aa3afbd2cb..394a1f94cf4 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -14,13 +14,18 @@ namespace amrex { -template +template struct ParticleTileData { static constexpr int NAR = NArrayReal; static constexpr int NAI = NArrayInt; - using ParticleType = Particle; + + using ParticleType = T_ParticleType; + + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; + using SuperParticleType = Particle; Long m_size; @@ -172,7 +177,7 @@ struct SoAParticle : SoAParticleBase using RealType = ParticleReal; - SoAParticle(ParticleTileData<0, 0, NArrayReal, NArrayInt> *const ptd, int const index) + SoAParticle(ParticleTileData *const ptd, int const index) { m_particle_tile_data=ptd; m_index=index; @@ -181,48 +186,52 @@ struct SoAParticle : SoAParticleBase //functions to get id and cpu in the SOA data AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleCPUWrapper cpu () & { return ParticleCPUWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } + ParticleCPUWrapper cpu () & { return ParticleCPUWrapper(this->m_particle_tile_data->m_idata[1][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleIDWrapper id () & { return ParticleIDWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } + ParticleIDWrapper id () & { return ParticleIDWrapper(this->m_particle_tile_data->m_idata[0][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } + ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(this->m_particle_tile_data->m_idata[1][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleIDWrapper id () const & { return ConstParticleIDWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } + ConstParticleIDWrapper id () const & { return ConstParticleIDWrapper(this->m_particle_tile_data->m_idata[0][m_index]); } //functions to get positions of the particle in the SOA data AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_particle_tile_data.m_rdata[0][m_index], this->m_particle_tile_data.m_rdata[1][m_index], this->m_particle_tile_data.m_rdata[2][m_index]));} + RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_particle_tile_data->m_rdata[0][m_index], this->m_particle_tile_data->m_rdata[1][m_index], this->m_particle_tile_data->m_rdata[2][m_index]));} AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE RealType& pos (int position_index) & { AMREX_ASSERT(position_index < AMREX_SPACEDIM); - return this->m_particle_tile_data.m_rdata[position_index][m_index]; + return this->m_particle_tile_data->m_rdata[position_index][m_index]; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE RealType pos (int position_index) const & { AMREX_ASSERT(position_index < AMREX_SPACEDIM); - return this->m_particle_tile_data.m_rdata[position_index][m_index]; + return this->m_particle_tile_data->m_rdata[position_index][m_index]; } private : - ParticleTileData<0, 0, NReal, NInt>* m_particle_tile_data; + ParticleTileData* m_particle_tile_data; int m_index; }; -template +template struct ConstParticleTileData { static constexpr int NAR = NArrayReal; static constexpr int NAI = NArrayInt; - using ParticleType = Particle; + using ParticleType = T_ParticleType; + + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; + using SuperParticleType = Particle; Long m_size; @@ -326,8 +335,8 @@ struct ParticleTile using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; - using ParticleTileDataType = ParticleTileData; - using ConstParticleTileDataType = ConstParticleTileData; + using ParticleTileDataType = ParticleTileData; + using ConstParticleTileDataType = ConstParticleTileData; ParticleTile () : m_defined(false) diff --git a/Src/Particle/AMReX_ParticleTransformation.H b/Src/Particle/AMReX_ParticleTransformation.H index 8fe57749c4a..dbf45f0db7e 100644 --- a/Src/Particle/AMReX_ParticleTransformation.H +++ b/Src/Particle/AMReX_ParticleTransformation.H @@ -26,10 +26,10 @@ namespace amrex * \param dst_i the index in the destination to write to * */ -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void copyParticle (const ParticleTileData& dst, - const ConstParticleTileData& src, +void copyParticle (const ParticleTileData& dst, + const ConstParticleTileData& src, int src_i, int dst_i) noexcept { AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real); @@ -60,10 +60,10 @@ void copyParticle (const ParticleTileData& dst, * \param dst_i the index in the destination to write to * */ -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void copyParticle (const ParticleTileData& dst, - const ParticleTileData& src, +void copyParticle (const ParticleTileData& dst, + const ParticleTileData& src, int src_i, int dst_i) noexcept { AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real); @@ -94,10 +94,10 @@ void copyParticle (const ParticleTileData& dst, * \param dst_i the index in the destination to write to * */ -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void swapParticle (const ParticleTileData& dst, - const ParticleTileData& src, +void swapParticle (const ParticleTileData& dst, + const ParticleTileData& src, int src_i, int dst_i) noexcept { AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real); diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 6732a271810..02171b18fac 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -56,10 +56,10 @@ auto call_f (F const& f, SrcData const& src, N i, amrex::RandomEngine const&) no // The next several functions are used by ParticleReduce // Lambda takes a Particle -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i) noexcept -> decltype(f(p.m_aos[i])) { @@ -67,11 +67,11 @@ auto call_f (F const& f, } // Lambda takes a SuperParticle -template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i) noexcept -> decltype(f(p.getSuperParticle(i))) { @@ -79,10 +79,10 @@ auto call_f (F const& f, } // Lambda takes a ConstParticleTileData -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i) noexcept -> decltype(f(p, i)) { @@ -92,10 +92,10 @@ auto call_f (F const& f, // These next several functions are used by ParticleToMesh and MeshToParticle // Lambda takes a Particle -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const& plo, GpuArray const& dxi) noexcept @@ -105,10 +105,10 @@ auto call_f (F const& f, } // Lambda takes a Particle -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const&, GpuArray const&) noexcept @@ -118,10 +118,10 @@ auto call_f (F const& f, } // Lambda takes a Particle -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ParticleTileData& p, + const ParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const& plo, GpuArray const& dxi) noexcept @@ -131,10 +131,10 @@ auto call_f (F const& f, } // Lambda takes a Particle -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ParticleTileData& p, + const ParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const&, GpuArray const&) noexcept @@ -144,11 +144,11 @@ auto call_f (F const& f, } // Lambda takes a SuperParticle -template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const& plo, GpuArray const& dxi) noexcept @@ -158,11 +158,11 @@ auto call_f (F const& f, } // Lambda takes a SuperParticle -template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const&, GpuArray const&) noexcept @@ -172,10 +172,10 @@ auto call_f (F const& f, } // Lambda takes a ConstParticleTileData -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const&, GpuArray const&) noexcept @@ -185,10 +185,10 @@ auto call_f (F const& f, } // Lambda takes a ConstParticleTileData -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const& plo, GpuArray const& dxi) noexcept @@ -198,10 +198,10 @@ auto call_f (F const& f, } // Lambda takes a ParticleTileData -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ParticleTileData& p, + const ParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const& plo, GpuArray const& dxi) noexcept @@ -211,10 +211,10 @@ auto call_f (F const& f, } // Lambda takes a ParticleTileData -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ParticleTileData& p, + const ParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const&, GpuArray const&) noexcept From 8b0daafeaa2dd642d6cc94b44aa376bf45c8ba7d Mon Sep 17 00:00:00 2001 From: atmyers Date: Wed, 17 Aug 2022 16:00:03 -0700 Subject: [PATCH 012/111] fixing some compile errors, some still remain. --- Src/Particle/AMReX_Particle.H | 3 ++- Src/Particle/AMReX_ParticleContainer.H | 1 + Src/Particle/AMReX_ParticleTile.H | 32 +++++++++++++------------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index 39eccaba3ee..fe94e6d9a89 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -203,7 +203,8 @@ struct Particle : ParticleBase { static constexpr bool is_soa_particle = false; - + using StorageParticleType = Particle; + //! \brief number of extra Real components in the particle struct static constexpr int NReal = T_NReal; diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index c06050785a9..b43fc2624c7 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -1369,6 +1369,7 @@ using ParticleContainer = ParticleContainer_impl class Allocator=DefaultAllocator> using ParticleContainerPureSoA = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; + #include "AMReX_ParticleInit.H" #include "AMReX_ParticleContainerI.H" #include "AMReX_ParticleIO.H" diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 394a1f94cf4..81207141b46 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -172,12 +172,12 @@ struct ParticleTileData template struct SoAParticle : SoAParticleBase { - + using StorageParticleType = SoAParticleBase; static constexpr bool is_soa_particle = true; using RealType = ParticleReal; - SoAParticle(ParticleTileData *const ptd, int const index) + SoAParticle (ParticleTileData ptd, int const index) { m_particle_tile_data=ptd; m_index=index; @@ -186,39 +186,39 @@ struct SoAParticle : SoAParticleBase //functions to get id and cpu in the SOA data AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleCPUWrapper cpu () & { return ParticleCPUWrapper(this->m_particle_tile_data->m_idata[1][m_index]); } + ParticleCPUWrapper cpu () & { return ParticleCPUWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleIDWrapper id () & { return ParticleIDWrapper(this->m_particle_tile_data->m_idata[0][m_index]); } + ParticleIDWrapper id () & { return ParticleIDWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(this->m_particle_tile_data->m_idata[1][m_index]); } + ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleIDWrapper id () const & { return ConstParticleIDWrapper(this->m_particle_tile_data->m_idata[0][m_index]); } + ConstParticleIDWrapper id () const & { return ConstParticleIDWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } //functions to get positions of the particle in the SOA data AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_particle_tile_data->m_rdata[0][m_index], this->m_particle_tile_data->m_rdata[1][m_index], this->m_particle_tile_data->m_rdata[2][m_index]));} + RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_particle_tile_data->m_rdata[0][m_index], this->m_particle_tile_data.m_rdata[1][m_index], this->m_particle_tile_data->m_rdata[2][m_index]));} AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE RealType& pos (int position_index) & { AMREX_ASSERT(position_index < AMREX_SPACEDIM); - return this->m_particle_tile_data->m_rdata[position_index][m_index]; + return this->m_particle_tile_data.m_rdata[position_index][m_index]; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE RealType pos (int position_index) const & { AMREX_ASSERT(position_index < AMREX_SPACEDIM); - return this->m_particle_tile_data->m_rdata[position_index][m_index]; + return this->m_particle_tile_data.m_rdata[position_index][m_index]; } private : - ParticleTileData* m_particle_tile_data; + ParticleTileData m_particle_tile_data; int m_index; }; @@ -335,8 +335,8 @@ struct ParticleTile using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; - using ParticleTileDataType = ParticleTileData; - using ConstParticleTileDataType = ConstParticleTileData; + using ParticleTileDataType = ParticleTileData; + using ConstParticleTileDataType = ConstParticleTileData; ParticleTile () : m_defined(false) @@ -355,21 +355,21 @@ struct ParticleTile // Get cpu data // AoS - template ::type = 0> + template ::type = 0> ParticleCPUWrapper cpu (int index) & { ParticleType p(this->getParticleTileData(), index); return p.cpu(); } // const - template ::type = 0> + template ::type = 0> ConstParticleCPUWrapper cpu (int index) const & { ParticleType p(this->getParticleTileData(), index); return p.cpu(); } // SoA - template ::type = 0> + template ::type = 0> ParticleCPUWrapper cpu (int index) & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.cpu(); @@ -377,7 +377,7 @@ struct ParticleTile // const - template ::type = 0> + template ::type = 0> ConstParticleCPUWrapper cpu (int index) const & { ParticleType& p = m_aos_tile().dataPtr()[index]; return p.cpu(); From 9221328479f756789b30ecdb051484accff3f778 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Fri, 19 Aug 2022 10:45:09 -0700 Subject: [PATCH 013/111] Adding new struct with N=0 output to GPU Array --- Src/Base/AMReX_Array.H | 82 ++++++++++++++++++++- Src/Particle/AMReX_Particle.H | 4 +- Src/Particle/AMReX_ParticleContainer.H | 2 +- Src/Particle/AMReX_ParticleTransformation.H | 10 ++- 4 files changed, 89 insertions(+), 9 deletions(-) diff --git a/Src/Base/AMReX_Array.H b/Src/Base/AMReX_Array.H index efdced8057a..f5f7c4c672a 100644 --- a/Src/Base/AMReX_Array.H +++ b/Src/Base/AMReX_Array.H @@ -127,7 +127,85 @@ namespace amrex { return p; } - T arr[amrex::max(N,1u)]; + T arr[N]; + }; + + template + struct GpuArray + { + using value_type = T; + using reference_type = T&; + + /** + * GpuArray elements are indexed using square brackets, as with any + * other array. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const T& operator [] (int) const noexcept { return *static_cast(nullptr); } + + /** + * GpuArray elements are indexed using square brackets, as with any + * other array. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T& operator [] (int) noexcept { return *static_cast(nullptr); } + + /** + * Returns a \c const pointer to the underlying data of a GpuArray object. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const T* data () const noexcept { return nullptr; } + + /** + * Returns a pointer to the underlying data of a GpuArray object. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T* data () noexcept { return nullptr; } + + /** + * Returns the number of elements in the GpuArray object as an + * unsigned integer. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr unsigned int size () noexcept { return 0u; } + + /** + * Returns a \c const pointer address to the first element of the + * GpuArray object. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const T* begin () const noexcept { return nullptr; } + + /** + * Returns a const pointer address right after the last element of the + * GpuArray object. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const T* end () const noexcept { return nullptr; } + + /** + * Returns a pointer address to the first element of the + * GpuArray object. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T* begin () noexcept { return nullptr; } + + /** + * Returns a pointer address right after the last element of the + * GpuArray object. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + T* end () noexcept { return nullptr; } + + /** + * Fills in all of the elements in the GpuArray object to the same + * value. + * + * \param value The fill value + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void fill ( const T& value ) noexcept + { (void) value; } }; } @@ -896,4 +974,4 @@ namespace amrex } } -#endif +#endif \ No newline at end of file diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index fe94e6d9a89..5295fa0c8ba 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -189,8 +189,8 @@ struct ParticleBase struct SoAParticleBase { - static constexpr int NReal = 0; - static constexpr int NInt = 0; + static constexpr int NReal=0; + static constexpr int NInt=0; }; /** \brief The struct used to store particles. diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index b43fc2624c7..6dc5d736193 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -132,7 +132,7 @@ struct ParticleInitType * \tparam T_NArrayInt The number of extra integer components stored in struct-of-array form * */ -template class Allocator=DefaultAllocator> class ParticleContainer_impl : public ParticleContainerBase { diff --git a/Src/Particle/AMReX_ParticleTransformation.H b/Src/Particle/AMReX_ParticleTransformation.H index dbf45f0db7e..af932788734 100644 --- a/Src/Particle/AMReX_ParticleTransformation.H +++ b/Src/Particle/AMReX_ParticleTransformation.H @@ -36,12 +36,14 @@ void copyParticle (const ParticleTileData& dst, AMREX_ASSERT(dst.m_num_runtime_int == src.m_num_runtime_int ); dst.m_aos[dst_i] = src.m_aos[src_i]; - for (int j = 0; j < NAR; ++j) - dst.m_rdata[j][dst_i] = src.m_rdata[j][src_i]; + if constexpr(NAR > 0) + for (int j = 0; j < NAR; ++j) + dst.m_rdata[j][dst_i] = src.m_rdata[j][src_i]; for (int j = 0; j < dst.m_num_runtime_real; ++j) dst.m_runtime_rdata[j][dst_i] = src.m_runtime_rdata[j][src_i]; - for (int j = 0; j < NAI; ++j) - dst.m_idata[j][dst_i] = src.m_idata[j][src_i]; + if constexpr(NAI > 0) + for (int j = 0; j < NAI; ++j) + dst.m_idata[j][dst_i] = src.m_idata[j][src_i]; for (int j = 0; j < dst.m_num_runtime_int; ++j) dst.m_runtime_idata[j][dst_i] = src.m_runtime_idata[j][src_i]; } From d75a35c17d9cc244c6b615034ff12119bcf3998a Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Fri, 19 Aug 2022 10:56:22 -0700 Subject: [PATCH 014/111] Correction to a typing error --- Src/Particle/AMReX_Particle.H | 1 + 1 file changed, 1 insertion(+) diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index 5295fa0c8ba..7876379cf08 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -187,6 +187,7 @@ struct ParticleBase uint64_t m_idcpu = 0; }; + struct SoAParticleBase { static constexpr int NReal=0; From 517970f1b98ef748365cff4aa4d3a1b8e9d738c9 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Fri, 19 Aug 2022 16:42:28 -0700 Subject: [PATCH 015/111] Compute a forward Declaration of SoAPartile class --- Src/Particle/AMReX_ParticleTile.H | 94 ++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 32 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 81207141b46..405fcb4cf94 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -14,24 +14,34 @@ namespace amrex { +// Forward Declaration +template +struct SoAParticle; + template -struct ParticleTileData +struct ParticleTileData { static constexpr int NAR = NArrayReal; static constexpr int NAI = NArrayInt; using ParticleType = T_ParticleType; + using Self = ParticleTileData; + + static_assert(!std::is_same>::value || ParticleType::NReal != 0, "ParticleTileData mismatch in R"); + static_assert(!std::is_same>::value || ParticleType::NInt != 0, "ParticleTileData mismatch in R"); static constexpr int NStructReal = ParticleType::NReal; static constexpr int NStructInt = ParticleType::NInt; - using SuperParticleType = Particle; + using SuperParticleType = Particle; + + static_assert(!std::is_same>::value || NAR != 0, "ParticleTileData NAR==0"); Long m_size; ParticleType* AMREX_RESTRICT m_aos; - GpuArray m_rdata; - GpuArray m_idata; + GpuArray m_rdata; + GpuArray m_idata; int m_num_runtime_real; int m_num_runtime_int; @@ -47,7 +57,7 @@ struct ParticleTileData memcpy(dst, m_aos + src_index, sizeof(ParticleType)); dst += sizeof(ParticleType); int array_start_index = AMREX_SPACEDIM + NStructReal; - for (int i = 0; i < NArrayReal; ++i) + for (int i = 0; i < NAR; ++i) { if (comm_real[array_start_index + i]) { @@ -55,7 +65,7 @@ struct ParticleTileData dst += sizeof(ParticleReal); } } - int runtime_start_index = AMREX_SPACEDIM + NStructReal + NArrayReal; + int runtime_start_index = AMREX_SPACEDIM + NStructReal + NAR; for (int i = 0; i < m_num_runtime_real; ++i) { if (comm_real[runtime_start_index + i]) @@ -65,7 +75,7 @@ struct ParticleTileData } } array_start_index = 2 + NStructInt; - for (int i = 0; i < NArrayInt; ++i) + for (int i = 0; i < NAI; ++i) { if (comm_int[array_start_index + i]) { @@ -73,7 +83,7 @@ struct ParticleTileData dst += sizeof(int); } } - runtime_start_index = 2 + NStructInt + NArrayInt; + runtime_start_index = 2 + NStructInt + NAI; for (int i = 0; i < m_num_runtime_int; ++i) { if (comm_int[runtime_start_index + i]) @@ -93,7 +103,7 @@ struct ParticleTileData memcpy(m_aos + dst_index, src, sizeof(ParticleType)); src += sizeof(ParticleType); int array_start_index = AMREX_SPACEDIM + NStructReal; - for (int i = 0; i < NArrayReal; ++i) + for (int i = 0; i < NAR; ++i) { if (comm_real[array_start_index + i]) { @@ -101,7 +111,7 @@ struct ParticleTileData src += sizeof(ParticleReal); } } - int runtime_start_index = AMREX_SPACEDIM + NStructReal + NArrayReal; + int runtime_start_index = AMREX_SPACEDIM + NStructReal + NAR; for (int i = 0; i < m_num_runtime_real; ++i) { if (comm_real[runtime_start_index + i]) @@ -111,7 +121,7 @@ struct ParticleTileData } } array_start_index = 2 + NStructInt; - for (int i = 0; i < NArrayInt; ++i) + for (int i = 0; i < NAI; ++i) { if (comm_int[array_start_index + i]) { @@ -119,7 +129,7 @@ struct ParticleTileData src += sizeof(int); } } - runtime_start_index = 2 + NStructInt + NArrayInt; + runtime_start_index = 2 + NStructInt + NAI; for (int i = 0; i < m_num_runtime_int; ++i) { if (comm_int[runtime_start_index + i]) @@ -139,13 +149,13 @@ struct ParticleTileData sp.pos(i) = m_aos[index].pos(i); for (int i = 0; i < NStructReal; ++i) sp.rdata(i) = m_aos[index].rdata(i); - for (int i = 0; i < NArrayReal; ++i) + for (int i = 0; i < NAR; ++i) sp.rdata(NStructReal+i) = m_rdata[i][index]; sp.id() = m_aos[index].id(); sp.cpu() = m_aos[index].cpu(); for (int i = 0; i < NStructInt; ++i) sp.idata(i) = m_aos[index].idata(i); - for (int i = 0; i < NArrayInt; ++i) + for (int i = 0; i < NAI; ++i) sp.idata(NStructInt+i) = m_idata[i][index]; return sp; } @@ -157,13 +167,13 @@ struct ParticleTileData m_aos[index].pos(i) = sp.pos(i); for (int i = 0; i < NStructReal; ++i) m_aos[index].rdata(i) = sp.rdata(i); - for (int i = 0; i < NArrayReal; ++i) + for (int i = 0; i < NAR; ++i) m_rdata[i][index] = sp.rdata(NStructReal+i); m_aos[index].id() = sp.id(); m_aos[index].cpu() = sp.cpu(); for (int i = 0; i < NStructInt; ++i) m_aos[index].idata(i) = sp.idata(i); - for (int i = 0; i < NArrayInt; ++i) + for (int i = 0; i < NAI; ++i) m_idata[i][index] = sp.idata(NStructInt+i); } }; @@ -173,12 +183,19 @@ template struct SoAParticle : SoAParticleBase { using StorageParticleType = SoAParticleBase; + using PTD = ParticleTileData; static constexpr bool is_soa_particle = true; using RealType = ParticleReal; - SoAParticle (ParticleTileData ptd, int const index) + SoAParticle (PTD ptd, int const index) { + + static_assert(NArrayReal == PTD::NAR, "ParticleTileData mismatch in R"); + static_assert(NArrayInt == PTD::NAI, "ParticleTileData mismatch in I"); + static_assert(NArrayReal == 3, "NArrayReal mismatch in R"); + static_assert(NArrayInt == 4, "NArrayInt mismatch in R"); + m_particle_tile_data=ptd; m_index=index; } @@ -218,7 +235,9 @@ struct SoAParticle : SoAParticleBase private : - ParticleTileData m_particle_tile_data; + static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable"); + + PTD m_particle_tile_data; int m_index; }; @@ -299,14 +318,16 @@ struct ConstParticleTileData sp.pos(i) = m_aos[index].pos(i); for (int i = 0; i < NStructReal; ++i) sp.rdata(i) = m_aos[index].rdata(i); - for (int i = 0; i < NArrayReal; ++i) - sp.rdata(NStructReal+i) = m_rdata[i][index]; + if constexpr(NArrayReal > 0) + for (int i = 0; i < NArrayReal; ++i) + sp.rdata(NStructReal+i) = m_rdata[i][index]; sp.id() = m_aos[index].id(); sp.cpu() = m_aos[index].cpu(); for (int i = 0; i < NStructInt; ++i) sp.idata(i) = m_aos[index].idata(i); - for (int i = 0; i < NArrayInt; ++i) - sp.idata(NStructInt+i) = m_idata[i][index]; + if constexpr(NArrayInt > 0) + for (int i = 0; i < NArrayInt; ++i) + sp.idata(NStructInt+i) = m_idata[i][index]; return sp; } }; @@ -329,7 +350,7 @@ struct ParticleTile using SuperParticleType = Particle; using AoS = ArrayOfStructs; - using ParticleVector = typename AoS::ParticleVector; + //using ParticleVector = typename AoS::ParticleVector; using SoA = StructOfArrays; using RealVector = typename SoA::RealVector; @@ -418,7 +439,12 @@ struct ParticleTile template ::type = 0> RealType& pos (int index, int position_index) & { - ParticleType p(this->getParticleTileData(), index); + static_assert(NArrayReal == T::PTD::NAR, "ParticleTile mismatch in R"); + static_assert(NArrayInt == T::PTD::NAI, "ParticleTile mismatch in I"); + static_assert(0 == T::StorageParticleType::NReal, "ParticleTile 2 mismatch in R"); + static_assert(0 == T::StorageParticleType::NInt, "ParticleTile 2 mismatch in I"); + ParticleTileDataType x=this->getParticleTileData(); + ParticleType p(x, index); return p.pos(position_index); } @@ -733,10 +759,12 @@ struct ParticleTile ParticleTileDataType ptd; ptd.m_aos = m_aos_tile().dataPtr(); - for (int i = 0; i < NArrayReal; ++i) - ptd.m_rdata[i] = m_soa_tile.GetRealData(i).dataPtr(); - for (int i = 0; i < NArrayInt; ++i) - ptd.m_idata[i] = m_soa_tile.GetIntData(i).dataPtr(); + if constexpr(NArrayReal > 0) + for (int i = 0; i < NArrayReal; ++i) + ptd.m_rdata[i] = m_soa_tile.GetRealData(i).dataPtr(); + if constexpr(NArrayInt > 0) + for (int i = 0; i < NArrayInt; ++i) + ptd.m_idata[i] = m_soa_tile.GetIntData(i).dataPtr(); ptd.m_size = size(); ptd.m_num_runtime_real = m_runtime_r_ptrs.size(); ptd.m_num_runtime_int = m_runtime_i_ptrs.size(); @@ -788,10 +816,12 @@ struct ParticleTile ConstParticleTileDataType ptd; ptd.m_aos = m_aos_tile().dataPtr(); - for (int i = 0; i < NArrayReal; ++i) - ptd.m_rdata[i] = m_soa_tile.GetRealData(i).dataPtr(); - for (int i = 0; i < NArrayInt; ++i) - ptd.m_idata[i] = m_soa_tile.GetIntData(i).dataPtr(); + if constexpr(NArrayReal > 0) + for (int i = 0; i < NArrayReal; ++i) + ptd.m_rdata[i] = m_soa_tile.GetRealData(i).dataPtr(); + if constexpr(NArrayInt > 0) + for (int i = 0; i < NArrayInt; ++i) + ptd.m_idata[i] = m_soa_tile.GetIntData(i).dataPtr(); ptd.m_size = size(); ptd.m_num_runtime_real = m_runtime_r_cptrs.size(); ptd.m_num_runtime_int = m_runtime_i_cptrs.size(); From 158116cdade83fa405a4c66cf8759acf1276f09b Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 22 Aug 2022 17:10:27 -0700 Subject: [PATCH 016/111] Correcting the *void error --- Src/Particle/AMReX_ParticleTile.H | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 405fcb4cf94..c585ffeba31 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -28,20 +28,22 @@ struct ParticleTileData using ParticleType = T_ParticleType; using Self = ParticleTileData; - static_assert(!std::is_same>::value || ParticleType::NReal != 0, "ParticleTileData mismatch in R"); - static_assert(!std::is_same>::value || ParticleType::NInt != 0, "ParticleTileData mismatch in R"); - static constexpr int NStructReal = ParticleType::NReal; static constexpr int NStructInt = ParticleType::NInt; + static_assert(!std::is_same>::value || ParticleType::NReal != 0, "ParticleTileData mismatch in R"); + static_assert(!std::is_same>::value || ParticleType::NInt != 0, "ParticleTileData mismatch in R"); + using SuperParticleType = Particle; static_assert(!std::is_same>::value || NAR != 0, "ParticleTileData NAR==0"); Long m_size; + ParticleType* AMREX_RESTRICT m_aos; - GpuArray m_rdata; - GpuArray m_idata; + + GpuArray m_rdata; + GpuArray m_idata; int m_num_runtime_real; int m_num_runtime_int; @@ -179,9 +181,11 @@ struct ParticleTileData }; // SOA Particle Structure -template +template struct SoAParticle : SoAParticleBase { + static constexpr int NArrayReal = T_NArrayReal; + static constexpr int NArrayInt = T_NArrayInt; using StorageParticleType = SoAParticleBase; using PTD = ParticleTileData; static constexpr bool is_soa_particle = true; @@ -190,12 +194,6 @@ struct SoAParticle : SoAParticleBase SoAParticle (PTD ptd, int const index) { - - static_assert(NArrayReal == PTD::NAR, "ParticleTileData mismatch in R"); - static_assert(NArrayInt == PTD::NAI, "ParticleTileData mismatch in I"); - static_assert(NArrayReal == 3, "NArrayReal mismatch in R"); - static_assert(NArrayInt == 4, "NArrayInt mismatch in R"); - m_particle_tile_data=ptd; m_index=index; } @@ -209,7 +207,7 @@ struct SoAParticle : SoAParticleBase ParticleIDWrapper id () & { return ParticleIDWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } + ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(this-> m_particle_tile_data.m_idata[1][m_index]); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE ConstParticleIDWrapper id () const & { return ConstParticleIDWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } From d03bd323f1632af431358457f235cab386860f30 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Thu, 25 Aug 2022 09:54:31 -0700 Subject: [PATCH 017/111] First version of the code that compiles ! --- Src/Particle/AMReX_ParticleContainerI.H | 49 ++++++++++++++++++++++++- Src/Particle/AMReX_ParticleTile.H | 20 ++++++++-- Tests/Particles/SOAParticle/main.cpp | 32 ---------------- 3 files changed, 63 insertions(+), 38 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 3c970dac200..b0a81eea62c 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1,3 +1,42 @@ +#include + +// template +// class A {}; // primary template + +// template +// class A::value>::type> { +// }; // specialization for floating point types +template< class T > +struct is_soa_particle + : std::integral_constant< + bool, + T::is_soa_particle + > {}; + + +template +struct make_particle +{ + template + auto + operator()(PTD, int) + { + // legacy Particle (AoS) + return T_ParticleType(); + } +}; + +template +struct make_particle::value>::type> +{ + template + auto + operator()(PTD ptd, int index) + { + // SoAParticle + return T_ParticleType(ptd, index); + } +}; template class Allocator> @@ -1832,7 +1871,11 @@ RedistributeMPI (std::map >& not_ours, for (int i = 0; i < int(Cnt); ++i) { char* pbuf = ((char*) &recvdata[offset]) + i*superparticle_size; - ParticleType p; + auto& ptile = m_particles[rcv_levs[ipart]][std::make_pair(rcv_grid[ipart], + rcv_tile[ipart])]; + + auto p = make_particle{}(ptile.getParticleTileData(),ipart); + std::memcpy(&p, pbuf, sizeof(ParticleType)); locateParticle(p, pld, lev_min, lev_max, nGrow); rcv_levs[ipart] = pld.m_lev; @@ -1859,7 +1902,8 @@ RedistributeMPI (std::map >& not_ours, rcv_tile[ipart])]; char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size; - ParticleType p; + auto p = make_particle{}(ptile.getParticleTileData(),ipart); + std::memcpy(&p, pbuf, sizeof(ParticleType)); pbuf += sizeof(ParticleType); ptile.push_back(p); @@ -1889,6 +1933,7 @@ RedistributeMPI (std::map >& not_ours, ++ipart; } } + #else Vector, Gpu::HostVector > > host_particles; host_particles.reserve(15); diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index c585ffeba31..a795ad07e5c 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -201,16 +201,28 @@ struct SoAParticle : SoAParticleBase //functions to get id and cpu in the SOA data AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleCPUWrapper cpu () & { return ParticleCPUWrapper(this->m_particle_tile_data.m_idata[1][m_index]); } + ParticleCPUWrapper cpu () & { + uint64_t unsigned_cpu_value; + unsigned_cpu_value = (uint64_t) this->m_particle_tile_data.m_idata[1][m_index]; + return ParticleCPUWrapper(unsigned_cpu_value); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleIDWrapper id () & { return ParticleIDWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } + ParticleIDWrapper id () & { + uint64_t unsigned_id_value; + unsigned_id_value = (uint64_t) this->m_particle_tile_data.m_idata[0][m_index]; + return ParticleIDWrapper(unsigned_id_value); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(this-> m_particle_tile_data.m_idata[1][m_index]); } + ConstParticleCPUWrapper cpu () const & { + uint64_t unsigned_cpu_value; + unsigned_cpu_value = (uint64_t) this->m_particle_tile_data.m_idata[1][m_index]; + return ConstParticleCPUWrapper(unsigned_cpu_value); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleIDWrapper id () const & { return ConstParticleIDWrapper(this->m_particle_tile_data.m_idata[0][m_index]); } + ConstParticleIDWrapper id () const & { + uint64_t unsigned_id_value; + unsigned_id_value = (uint64_t) this->m_particle_tile_data.m_idata[0][m_index]; + return ConstParticleIDWrapper(unsigned_id_value); } //functions to get positions of the particle in the SOA data diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index 284c984a5ea..d592ca93d88 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -14,15 +14,6 @@ void addParticles () const int add_num_particles = 5; auto& ptile1 = pc.DefineAndReturnParticleTile(0, 0, 0); - //{ - // ParticleType p(...); - // p.pos(0) = 12.0; - // p.pos(1) = 12.0; - // p.pos(2) = 12.0; - // ptile1.push_back(p); - //} - - //DefineAndReturnParticleTile(0,0,0); for (int i = 0; i < add_num_particles; ++i) { @@ -37,7 +28,6 @@ void addParticles () //ptile1.push_back_int(3, ...std::vector); //ptile1.push_back_int(4, ...std::vector); - pc.Redistribute(); } @@ -46,28 +36,6 @@ int main(int argc, char* argv[]) { amrex::Initialize(argc,argv); { - // for (int n = 0; n < BL_SPACEDIM; n++) - // { - // real_box.setLo(n, 0.0); - // real_box.setHi(n, params.size[n]); - // } - - // IntVect domain_lo(AMREX_D_DECL(0, 0, 0)); - // IntVect domain_hi(AMREX_D_DECL(params.size[0]-1,params.size[1]-1,params.size[2]-1)); - // const Box domain(domain_lo, domain_hi); - - // int coord = 0; - // int is_per[BL_SPACEDIM]; - // for (int i = 0; i < BL_SPACEDIM; i++) - // is_per[i] = params.is_periodic; - // Geometry geom(domain, &real_box, coord, is_per); - - // BoxArray ba(domain); - // ba.maxSize(params.max_grid_size); - // DistributionMapping dm(ba); - - // const int ncells = 1; - //addParticles< ParticleContainer<1,2,3,4> > (); addParticles< ParticleContainerPureSoA<3,4> > (); } From 79ab70ddaba1c3f7c001b8ca2e53ea8306382922 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Thu, 25 Aug 2022 11:17:49 -0700 Subject: [PATCH 018/111] Fixed test ParticleTransformation --- Tests/Particles/ParticleTransformations/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/Particles/ParticleTransformations/main.cpp b/Tests/Particles/ParticleTransformations/main.cpp index cc13b24f32d..7dea6cf4d71 100644 --- a/Tests/Particles/ParticleTransformations/main.cpp +++ b/Tests/Particles/ParticleTransformations/main.cpp @@ -39,7 +39,7 @@ class TestParticleContainer public: - using ParticleTileType = ParticleTile; + using ParticleTileType = ParticleTile, NAR, NAI>; TestParticleContainer (const amrex::Geometry & a_geom, From 7f9795294a15ceb092941e41151c3db35ff266ed Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Thu, 25 Aug 2022 16:42:21 -0700 Subject: [PATCH 019/111] First compile with debug mode --- Src/Base/AMReX_TypeTraits.H | 18 +++--- Src/Particle/AMReX_ParIter.H | 85 +++++++++++++++++--------- Src/Particle/AMReX_ParticleContainer.H | 6 +- Src/Particle/AMReX_ParticleTile.H | 3 - 4 files changed, 69 insertions(+), 43 deletions(-) diff --git a/Src/Base/AMReX_TypeTraits.H b/Src/Base/AMReX_TypeTraits.H index 34cc016e85e..f4052c6cdfc 100644 --- a/Src/Base/AMReX_TypeTraits.H +++ b/Src/Base/AMReX_TypeTraits.H @@ -49,17 +49,17 @@ namespace amrex struct IsMultiFabIterator : public std::is_base_of::type {}; #ifdef AMREX_PARTICLES - template class Allocator> - class ParIterBase; + // template class Allocator> + // class ParIterBase; - template class Allocator> - class ParIter; + // template class Allocator> + // class ParIter; - template class Allocator> - class ParConstIter; + // template class Allocator> + // class ParConstIter; class ParticleContainerBase; diff --git a/Src/Particle/AMReX_ParIter.H b/Src/Particle/AMReX_ParIter.H index 486f8a27c5d..b7f43ba3793 100644 --- a/Src/Particle/AMReX_ParIter.H +++ b/Src/Particle/AMReX_ParIter.H @@ -16,14 +16,16 @@ template class Allocator=DefaultAllocator> using ParticleContainer = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; -template class Allocator=DefaultAllocator> -class ParIterBase +class ParIterBase_impl : public MFIter { + private: - using PCType = ParticleContainer; + using PCType = ParticleContainer_impl; using ContainerRef = typename std::conditional::type; using ParticleTileRef = typename std::conditional ::type; @@ -36,18 +38,20 @@ private: public: - using ContainerType = ParticleContainer; + using ContainerType = ParticleContainer_impl; using ParticleTileType = typename ContainerType::ParticleTileType; using AoS = typename ContainerType::AoS; using SoA = typename ContainerType::SoA; - using ParticleType = typename ContainerType::ParticleType; + using ParticleType = T_ParticleType; using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; using ParticleVector = typename ContainerType::ParticleVector; + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; - ParIterBase (ContainerRef pc, int level); + ParIterBase_impl (ContainerRef pc, int level); - ParIterBase (ContainerRef pc, int level, MFItInfo& info); + ParIterBase_impl (ContainerRef pc, int level, MFItInfo& info); #ifdef AMREX_USE_OMP void operator++ () @@ -98,59 +102,62 @@ protected: ContainerRef m_pc; }; -template class Allocator=DefaultAllocator> -class ParIter - : public ParIterBase +class ParIter_impl + : public ParIterBase_impl { public: + using ParticleType=T_ParticleType; + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; + using ContainerType = ParticleContainer; using ParticleTileType = typename ContainerType::ParticleTileType; using AoS = typename ContainerType::AoS; using SoA = typename ContainerType::SoA; - using ParticleType = typename ContainerType::ParticleType; using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; - ParIter (ContainerType& pc, int level) - : ParIterBase(pc,level) + ParIter_impl (ContainerType& pc, int level) + : ParIterBase_impl(pc,level) {} - ParIter (ContainerType& pc, int level, MFItInfo& info) - : ParIterBase(pc,level,info) + ParIter_impl (ContainerType& pc, int level, MFItInfo& info) + : ParIterBase_impl(pc,level,info) {} }; -template class Allocator=DefaultAllocator> -class ParConstIter - : public ParIterBase +class ParConstIter_impl + : public ParIterBase_impl { public: - using ContainerType = ParticleContainer; using ParticleTileType = typename ContainerType::ParticleTileType; using AoS = typename ContainerType::AoS; using SoA = typename ContainerType::SoA; - using ParticleType = typename ContainerType::ParticleType; using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; - ParConstIter (ContainerType const& pc, int level) - : ParIterBase(pc,level) + ParConstIter_impl (ContainerType const& pc, int level) + : ParIterBase_impl(pc,level) {} - ParConstIter (ContainerType const& pc, int level, MFItInfo& info) - : ParIterBase(pc,level,info) + ParConstIter_impl (ContainerType const& pc, int level, MFItInfo& info) + : ParIterBase_impl(pc,level,info) {} }; -template class Allocator> -ParIterBase::ParIterBase +ParIterBase_impl::ParIterBase_impl (ContainerRef pc, int level, MFItInfo& info) : MFIter(*pc.m_dummy_mf[level], pc.do_tiling ? info.EnableTiling(pc.tile_size) : info), @@ -199,9 +206,9 @@ ParIterBase } } -template class Allocator> -ParIterBase::ParIterBase +ParIterBase_impl::ParIterBase_impl (ContainerRef pc, int level) : MFIter(*pc.m_dummy_mf[level], @@ -236,6 +243,28 @@ ParIterBase } } +template class Allocator=DefaultAllocator> +using ParIterBase = ParIterBase_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParIterBaseSoA = ParIterBase_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParConstIter = ParConstIter_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParConstIterSoA = ParConstIter_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParIter = ParIter_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParIterSoA = ParIter_impl, T_NArrayReal, T_NArrayInt, Allocator>; + } #endif diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index 6dc5d736193..9f6f17a62f0 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -149,8 +149,8 @@ public: static constexpr int NArrayInt = T_NArrayInt; private: - friend class ParIterBase; - friend class ParIterBase; + friend class ParIterBase_impl; + friend class ParIterBase_impl; public: //! \brief The memory allocator in use. @@ -183,7 +183,7 @@ public: using CharVector = Gpu::DeviceVector; using SendBuffer = Gpu::PolymorphicVector; using ParIterType = ParIter; - using ParConstIterType = ParConstIter; + using ParConstIterType = ParConstIter_impl; //! \brief Default constructor - construct an empty particle container that has no concept //! of a level hierarchy. Must be properly initialized later. diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index a795ad07e5c..7169bc23367 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -31,9 +31,6 @@ struct ParticleTileData static constexpr int NStructReal = ParticleType::NReal; static constexpr int NStructInt = ParticleType::NInt; - static_assert(!std::is_same>::value || ParticleType::NReal != 0, "ParticleTileData mismatch in R"); - static_assert(!std::is_same>::value || ParticleType::NInt != 0, "ParticleTileData mismatch in R"); - using SuperParticleType = Particle; static_assert(!std::is_same>::value || NAR != 0, "ParticleTileData NAR==0"); From 6ab3e315a0192481554a161c250493f78c0b6c82 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Fri, 26 Aug 2022 16:48:14 -0700 Subject: [PATCH 020/111] Correcting Particle Container Type in ParIter.H --- Src/Particle/AMReX_ParIter.H | 10 +++---- Tests/Particles/SOAParticle/main.cpp | 40 +++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/Src/Particle/AMReX_ParIter.H b/Src/Particle/AMReX_ParIter.H index b7f43ba3793..e91080349e2 100644 --- a/Src/Particle/AMReX_ParIter.H +++ b/Src/Particle/AMReX_ParIter.H @@ -4,6 +4,7 @@ #include #include +#include namespace amrex { @@ -12,13 +13,14 @@ template class Allocator> class ParticleContainer_impl; +// for backwards compatibility template class Allocator=DefaultAllocator> using ParticleContainer = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; - template class Allocator=DefaultAllocator> + class ParIterBase_impl : public MFIter { @@ -113,8 +115,7 @@ public: static constexpr int NStructReal = ParticleType::NReal; static constexpr int NStructInt = ParticleType::NInt; - using ContainerType = ParticleContainer; + using ContainerType = ParticleContainer_impl; using ParticleTileType = typename ContainerType::ParticleTileType; using AoS = typename ContainerType::AoS; using SoA = typename ContainerType::SoA; @@ -138,8 +139,7 @@ class ParConstIter_impl public: using ParticleType = T_ParticleType; - using ContainerType = ParticleContainer_impl; + using ContainerType = ParticleContainer_impl; using ParticleTileType = typename ContainerType::ParticleTileType; using AoS = typename ContainerType::AoS; using SoA = typename ContainerType::SoA; diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index d592ca93d88..a6bdef0d618 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -2,14 +2,27 @@ #include #include #include +#include +#include +#include +#include + +#include using namespace amrex; -template +template class Allocator=DefaultAllocator> void addParticles () { T_PC pc; + int const NReal = pc.NStructReal; + int const NInt = pc.NStructInt; + int const NArrayReal = pc.NArrayReal; + int const NArrayInt = pc.NArrayInt; + using ParticleType = typename T_PC::ParticleType; + using RealVector = amrex::PODVector >; + using IntVector = amrex::PODVector >; const int add_num_particles = 5; @@ -25,8 +38,29 @@ void addParticles () ptile1.id(i) = 1; ptile1.cpu(i) = 1; } - //ptile1.push_back_int(3, ...std::vector); - //ptile1.push_back_int(4, ...std::vector); + + int lev=0; + int numparticles=0; + using MyParIter = ParIter_impl; + for (MyParIter pti(pc, lev); pti.isValid(); ++pti) { + const auto& particles = pti.GetArrayOfStructs(); + const auto& tile = pti.GetParticleTile(); + int np = pti.numParticles(); + ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) + { + tile.pos(ip,0)=1; + }); + } + + for (MyParIter pti(pc, lev); pti.isValid(); ++pti) { + auto& particle_attributes = pti.GetStructOfArrays(); + RealVector& real_comp0 = particle_attributes.GetRealData(0); + IntVector& int_comp1 = particle_attributes.GetIntData(1); + for (int i = 0; i < pti.numParticles(); ++i) { + real_comp0[i] += 1; + int_comp1[i] += 1; + } + } pc.Redistribute(); } From bb9d53ce638b9b93340b82cd4c6199870924ed4b Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 29 Aug 2022 17:51:20 -0700 Subject: [PATCH 021/111] new main function --- Tests/Particles/SOAParticle/main.cpp | 54 ++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index a6bdef0d618..c60341d9ce7 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -21,6 +21,7 @@ void addParticles () int const NArrayInt = pc.NArrayInt; using ParticleType = typename T_PC::ParticleType; + using ParticleTileDataType = typename T_PC::ParticleTileType::ParticleTileDataType; using RealVector = amrex::PODVector >; using IntVector = amrex::PODVector >; @@ -40,16 +41,55 @@ void addParticles () } int lev=0; - int numparticles=0; + // int numparticles=0; using MyParIter = ParIter_impl; for (MyParIter pti(pc, lev); pti.isValid(); ++pti) { - const auto& particles = pti.GetArrayOfStructs(); - const auto& tile = pti.GetParticleTile(); - int np = pti.numParticles(); + const int np = pti.numParticles(); + //const auto t_lev = pti.GetLevel(); + //const auto index = pti.GetPairIndex(); + // ... + + // preparing access to particle data: AoS + //using PType = ImpactXParticleContainer::ParticleType; + auto& aos = pti.GetArrayOfStructs(); + ParticleType* AMREX_RESTRICT aos_ptr = aos().dataPtr(); + + // preparing access to particle data: SoA of Reals + auto& soa_real = pti.GetStructOfArrays().GetRealData(); + amrex::ParticleReal* const AMREX_RESTRICT part_x = soa_real[0].dataPtr(); + amrex::ParticleReal* const AMREX_RESTRICT part_y = soa_real[1].dataPtr(); + amrex::ParticleReal* const AMREX_RESTRICT part_z = soa_real[2].dataPtr(); + amrex::ParticleReal* const AMREX_RESTRICT part_aaa = soa_real[3].dataPtr(); + auto& soa_int = pti.GetStructOfArrays().GetIntData(); + ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) - { - tile.pos(ip,0)=1; - }); + { + ParticleType& AMREX_RESTRICT p = aos_ptr[ip]; + p.pos(0) += 1; + p.pos(1) += 1; + p.pos(2) += 1; + + amrex::ParticleReal & AMREX_RESTRICT x = part_x[ip]; + amrex::ParticleReal & AMREX_RESTRICT y = part_y[ip]; + amrex::ParticleReal & AMREX_RESTRICT z = part_z[ip]; + + x += 1.0; + y += 1.0; + z += 1.0; + }); + + // new way of creating + ParticleTileDataType ptd = pti.GetParticleTile().getParticleTileData(); + + ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) + { + ParticleType p(ptd, ip); + p.pos(0) += 1; + p.pos(1) += 1; + p.pos(2) += 1; + }); + + } for (MyParIter pti(pc, lev); pti.isValid(); ++pti) { From 0c9aa59ebcd5d8cfbb1cb3309026f11c3ae83322 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 6 Sep 2022 16:14:36 -0700 Subject: [PATCH 022/111] Reset: `AMReX_Array.H` --- Src/Base/AMReX_Array.H | 83 ++---------------------------------------- 1 file changed, 3 insertions(+), 80 deletions(-) diff --git a/Src/Base/AMReX_Array.H b/Src/Base/AMReX_Array.H index f5f7c4c672a..82265a2e21f 100644 --- a/Src/Base/AMReX_Array.H +++ b/Src/Base/AMReX_Array.H @@ -127,85 +127,7 @@ namespace amrex { return p; } - T arr[N]; - }; - - template - struct GpuArray - { - using value_type = T; - using reference_type = T&; - - /** - * GpuArray elements are indexed using square brackets, as with any - * other array. - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const T& operator [] (int) const noexcept { return *static_cast(nullptr); } - - /** - * GpuArray elements are indexed using square brackets, as with any - * other array. - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - T& operator [] (int) noexcept { return *static_cast(nullptr); } - - /** - * Returns a \c const pointer to the underlying data of a GpuArray object. - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const T* data () const noexcept { return nullptr; } - - /** - * Returns a pointer to the underlying data of a GpuArray object. - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - T* data () noexcept { return nullptr; } - - /** - * Returns the number of elements in the GpuArray object as an - * unsigned integer. - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - static constexpr unsigned int size () noexcept { return 0u; } - - /** - * Returns a \c const pointer address to the first element of the - * GpuArray object. - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const T* begin () const noexcept { return nullptr; } - - /** - * Returns a const pointer address right after the last element of the - * GpuArray object. - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const T* end () const noexcept { return nullptr; } - - /** - * Returns a pointer address to the first element of the - * GpuArray object. - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - T* begin () noexcept { return nullptr; } - - /** - * Returns a pointer address right after the last element of the - * GpuArray object. - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - T* end () noexcept { return nullptr; } - - /** - * Fills in all of the elements in the GpuArray object to the same - * value. - * - * \param value The fill value - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - void fill ( const T& value ) noexcept - { (void) value; } + T arr[amrex::max(N,1u)]; }; } @@ -974,4 +896,5 @@ namespace amrex } } -#endif \ No newline at end of file +#endif + From 5f2cab647a433e72993e9850a7f6ea2dffd48c3f Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 6 Sep 2022 17:24:08 -0700 Subject: [PATCH 023/111] Example: Fix Unused Vars --- Tests/Particles/SOAParticle/main.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index c60341d9ce7..e9b36e7a399 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -15,8 +15,8 @@ template class Allocator=DefaultAllocator> void addParticles () { T_PC pc; - int const NReal = pc.NStructReal; - int const NInt = pc.NStructInt; + // int const NReal = pc.NStructReal; + // int const NInt = pc.NStructInt; int const NArrayReal = pc.NArrayReal; int const NArrayInt = pc.NArrayInt; @@ -72,10 +72,12 @@ void addParticles () amrex::ParticleReal & AMREX_RESTRICT x = part_x[ip]; amrex::ParticleReal & AMREX_RESTRICT y = part_y[ip]; amrex::ParticleReal & AMREX_RESTRICT z = part_z[ip]; + amrex::ParticleReal & AMREX_RESTRICT a = part_aaa[ip]; x += 1.0; y += 1.0; z += 1.0; + a += 1.0; }); // new way of creating From 31523708f857df088d2384cb5ee496d9b8aa2b60 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 12 Sep 2022 09:47:13 -0700 Subject: [PATCH 024/111] Creating MakeParticle file to adapt to the new particle constructor --- Src/Particle/AMReX_MakeParticle.H | 33 ++++++++++++++++++ Src/Particle/AMReX_ParticleCommunication.H | 4 ++- Src/Particle/AMReX_ParticleContainerI.H | 39 +--------------------- 3 files changed, 37 insertions(+), 39 deletions(-) create mode 100644 Src/Particle/AMReX_MakeParticle.H diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H new file mode 100644 index 00000000000..efe9f0cdbcc --- /dev/null +++ b/Src/Particle/AMReX_MakeParticle.H @@ -0,0 +1,33 @@ +#include + +template< class T > +struct is_soa_particle + : std::integral_constant< + bool, + T::is_soa_particle + > {}; + + +template +struct make_particle +{ + template + auto + operator()(PTD, int) + { + // legacy Particle (AoS) + return T_ParticleType(); + } +}; + +template +struct make_particle::value>::type> +{ + template + auto + operator()(PTD ptd, int index) + { + // SoAParticle + return T_ParticleType(ptd, index); + } +}; \ No newline at end of file diff --git a/Src/Particle/AMReX_ParticleCommunication.H b/Src/Particle/AMReX_ParticleCommunication.H index 83604915f18..8dca8804882 100644 --- a/Src/Particle/AMReX_ParticleCommunication.H +++ b/Src/Particle/AMReX_ParticleCommunication.H @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -367,7 +368,8 @@ void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan& if (do_periodic_shift) { - ParticleType p; + auto p = make_particle{}(ptd,kv); + //ParticleType p; amrex::Gpu::memcpy(&p, &p_snd_buffer[dst_offset], sizeof(ParticleType)); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index b0a81eea62c..bf4b5d878ca 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1,42 +1,5 @@ #include - -// template -// class A {}; // primary template - -// template -// class A::value>::type> { -// }; // specialization for floating point types -template< class T > -struct is_soa_particle - : std::integral_constant< - bool, - T::is_soa_particle - > {}; - - -template -struct make_particle -{ - template - auto - operator()(PTD, int) - { - // legacy Particle (AoS) - return T_ParticleType(); - } -}; - -template -struct make_particle::value>::type> -{ - template - auto - operator()(PTD ptd, int index) - { - // SoAParticle - return T_ParticleType(ptd, index); - } -}; +#include template class Allocator> From 68d46527506fd5efe4425b723b4d7d6a2af51f17 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 12 Sep 2022 15:52:49 -0700 Subject: [PATCH 025/111] Correcting ParIter.H file header --- Src/Particle/AMReX_ParIter.H | 1 - Src/Particle/AMReX_ParticleContainer.H | 12 +++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Src/Particle/AMReX_ParIter.H b/Src/Particle/AMReX_ParIter.H index e91080349e2..0796940d524 100644 --- a/Src/Particle/AMReX_ParIter.H +++ b/Src/Particle/AMReX_ParIter.H @@ -4,7 +4,6 @@ #include #include -#include namespace amrex { diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index 9f6f17a62f0..3c83b393b90 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -35,8 +35,8 @@ #include #include #include -#include #include +#include #ifdef AMREX_LAZY #include @@ -119,6 +119,11 @@ struct ParticleInitType std::array int_array_data; }; +template class Allocator> + +class ParIterBase_impl; + /** * \brief A distributed container for Particles sorted onto the levels, grids, * and tiles of a block-structured AMR hierarchy. @@ -134,6 +139,7 @@ struct ParticleInitType */ template class Allocator=DefaultAllocator> + class ParticleContainer_impl : public ParticleContainerBase { public: @@ -167,7 +173,7 @@ public: RealDescriptor ParticleRealDescriptor = FPC::Native64RealDescriptor(); #endif - using ParticleContainerType = ParticleContainer; + using ParticleContainerType = ParticleContainer_impl; using ParticleTileType = ParticleTile; using ParticleInitData = ParticleInitType; @@ -182,7 +188,7 @@ public: using ParticleVector = typename AoS::ParticleVector; using CharVector = Gpu::DeviceVector; using SendBuffer = Gpu::PolymorphicVector; - using ParIterType = ParIter; + using ParIterType = ParIter_impl; using ParConstIterType = ParConstIter_impl; //! \brief Default constructor - construct an empty particle container that has no concept From 090e7625de98eb9a3661463936abdd18718bf9e1 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 12 Sep 2022 16:13:13 -0700 Subject: [PATCH 026/111] Forward Declarations in ParIter.H --- Src/Particle/AMReX_ParIter.H | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Src/Particle/AMReX_ParIter.H b/Src/Particle/AMReX_ParIter.H index 0796940d524..deee365a3e4 100644 --- a/Src/Particle/AMReX_ParIter.H +++ b/Src/Particle/AMReX_ParIter.H @@ -12,6 +12,12 @@ template class Allocator> class ParticleContainer_impl; +template +struct Particle; + +template +struct SoAParticle; + // for backwards compatibility template class Allocator=DefaultAllocator> From 1d7a48d63e2a7ba369a8bd58367fb24b18488c8c Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Tue, 13 Sep 2022 08:44:14 -0700 Subject: [PATCH 027/111] Add NexId function to the SoAParticle Structure --- Src/Particle/AMReX_ParticleTile.H | 55 +++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 7169bc23367..a265abb3f7f 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -189,6 +189,8 @@ struct SoAParticle : SoAParticleBase using RealType = ParticleReal; + static Long the_next_id; + SoAParticle (PTD ptd, int const index) { m_particle_tile_data=ptd; @@ -240,6 +242,20 @@ struct SoAParticle : SoAParticleBase return this->m_particle_tile_data.m_rdata[position_index][m_index]; } + static Long NextID (); + + /** + * \brief This version can only be used inside omp critical. + */ + static Long UnprotectedNextID (); + + /** + * \brief Reset on restart. + * + * \param nextid + */ + static void NextID (Long nextid); + private : static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable"); @@ -248,6 +264,45 @@ struct SoAParticle : SoAParticleBase int m_index; }; +template Long SoAParticle::the_next_id = 1; + +template +Long +SoAParticle::NextID () +{ + Long next; +// we should be able to test on _OPENMP < 201107 for capture (version 3.1) +// but we must work around a bug in gcc < 4.9 +#if defined(AMREX_USE_OMP) && defined(_OPENMP) && _OPENMP < 201307 +#pragma omp critical (amrex_particle_nextid) +#elif defined(AMREX_USE_OMP) +#pragma omp atomic capture +#endif + next = the_next_id++; + + if (next > LastParticleID) + amrex::Abort("SoAParticle::NextID() -- too many particles"); + + return next; +} + +template +Long +SoAParticle::UnprotectedNextID () +{ + Long next = the_next_id++; + if (next > LastParticleID) + amrex::Abort("SoAParticle::NextID() -- too many particles"); + return next; +} + +template +void +SoAParticle::NextID (Long nextid) +{ + the_next_id = nextid; +} + template struct ConstParticleTileData { From 3ee712fcd773a5cacd0cc707fd8a3fcb5f41bb31 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Tue, 13 Sep 2022 10:58:52 -0700 Subject: [PATCH 028/111] Replace the ConstParticleTileData at the top --- Src/Particle/AMReX_ParticleTile.H | 182 +++++++++++++++--------------- 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index a265abb3f7f..76c1c556df0 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -177,6 +177,97 @@ struct ParticleTileData } }; +template +struct ConstParticleTileData +{ + static constexpr int NAR = NArrayReal; + static constexpr int NAI = NArrayInt; + using ParticleType = T_ParticleType; + + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; + + using SuperParticleType = Particle; + + Long m_size; + const ParticleType* AMREX_RESTRICT m_aos; + GpuArray m_rdata; + GpuArray m_idata; + + int m_num_runtime_real; + int m_num_runtime_int; + const ParticleReal* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_rdata; + const int* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_idata; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void packParticleData(char* buffer, int src_index, Long dst_offset, + const int* comm_real, const int * comm_int) const noexcept + { + AMREX_ASSERT(src_index < m_size); + auto dst = buffer + dst_offset; + memcpy(dst, m_aos + src_index, sizeof(ParticleType)); + dst += sizeof(ParticleType); + int array_start_index = AMREX_SPACEDIM + NStructReal; + for (int i = 0; i < NArrayReal; ++i) + { + if (comm_real[array_start_index + i]) + { + memcpy(dst, m_rdata[i] + src_index, sizeof(ParticleReal)); + dst += sizeof(ParticleReal); + } + } + int runtime_start_index = AMREX_SPACEDIM + NStructReal + NArrayReal; + for (int i = 0; i < m_num_runtime_real; ++i) + { + if (comm_real[runtime_start_index + i]) + { + memcpy(dst, m_runtime_rdata[i] + src_index, sizeof(ParticleReal)); + dst += sizeof(ParticleReal); + } + } + array_start_index = 2 + NStructInt; + for (int i = 0; i < NArrayInt; ++i) + { + if (comm_int[array_start_index + i]) + { + memcpy(dst, m_idata[i] + src_index, sizeof(int)); + dst += sizeof(int); + } + } + runtime_start_index = 2 + NStructInt + NArrayInt; + for (int i = 0; i < m_num_runtime_int; ++i) + { + if (comm_int[runtime_start_index + i]) + { + memcpy(dst, m_runtime_idata[i] + src_index, sizeof(int)); + dst += sizeof(int); + } + } + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + SuperParticleType getSuperParticle (int index) const noexcept + { + AMREX_ASSERT(index < m_size); + SuperParticleType sp; + for (int i = 0; i < AMREX_SPACEDIM; ++i) + sp.pos(i) = m_aos[index].pos(i); + for (int i = 0; i < NStructReal; ++i) + sp.rdata(i) = m_aos[index].rdata(i); + if constexpr(NArrayReal > 0) + for (int i = 0; i < NArrayReal; ++i) + sp.rdata(NStructReal+i) = m_rdata[i][index]; + sp.id() = m_aos[index].id(); + sp.cpu() = m_aos[index].cpu(); + for (int i = 0; i < NStructInt; ++i) + sp.idata(i) = m_aos[index].idata(i); + if constexpr(NArrayInt > 0) + for (int i = 0; i < NArrayInt; ++i) + sp.idata(NStructInt+i) = m_idata[i][index]; + return sp; + } +}; + // SOA Particle Structure template struct SoAParticle : SoAParticleBase @@ -303,97 +394,6 @@ SoAParticle::NextID (Long nextid) the_next_id = nextid; } -template -struct ConstParticleTileData -{ - static constexpr int NAR = NArrayReal; - static constexpr int NAI = NArrayInt; - using ParticleType = T_ParticleType; - - static constexpr int NStructReal = ParticleType::NReal; - static constexpr int NStructInt = ParticleType::NInt; - - using SuperParticleType = Particle; - - Long m_size; - const ParticleType* AMREX_RESTRICT m_aos; - GpuArray m_rdata; - GpuArray m_idata; - - int m_num_runtime_real; - int m_num_runtime_int; - const ParticleReal* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_rdata; - const int* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_idata; - - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - void packParticleData(char* buffer, int src_index, Long dst_offset, - const int* comm_real, const int * comm_int) const noexcept - { - AMREX_ASSERT(src_index < m_size); - auto dst = buffer + dst_offset; - memcpy(dst, m_aos + src_index, sizeof(ParticleType)); - dst += sizeof(ParticleType); - int array_start_index = AMREX_SPACEDIM + NStructReal; - for (int i = 0; i < NArrayReal; ++i) - { - if (comm_real[array_start_index + i]) - { - memcpy(dst, m_rdata[i] + src_index, sizeof(ParticleReal)); - dst += sizeof(ParticleReal); - } - } - int runtime_start_index = AMREX_SPACEDIM + NStructReal + NArrayReal; - for (int i = 0; i < m_num_runtime_real; ++i) - { - if (comm_real[runtime_start_index + i]) - { - memcpy(dst, m_runtime_rdata[i] + src_index, sizeof(ParticleReal)); - dst += sizeof(ParticleReal); - } - } - array_start_index = 2 + NStructInt; - for (int i = 0; i < NArrayInt; ++i) - { - if (comm_int[array_start_index + i]) - { - memcpy(dst, m_idata[i] + src_index, sizeof(int)); - dst += sizeof(int); - } - } - runtime_start_index = 2 + NStructInt + NArrayInt; - for (int i = 0; i < m_num_runtime_int; ++i) - { - if (comm_int[runtime_start_index + i]) - { - memcpy(dst, m_runtime_idata[i] + src_index, sizeof(int)); - dst += sizeof(int); - } - } - } - - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - SuperParticleType getSuperParticle (int index) const noexcept - { - AMREX_ASSERT(index < m_size); - SuperParticleType sp; - for (int i = 0; i < AMREX_SPACEDIM; ++i) - sp.pos(i) = m_aos[index].pos(i); - for (int i = 0; i < NStructReal; ++i) - sp.rdata(i) = m_aos[index].rdata(i); - if constexpr(NArrayReal > 0) - for (int i = 0; i < NArrayReal; ++i) - sp.rdata(NStructReal+i) = m_rdata[i][index]; - sp.id() = m_aos[index].id(); - sp.cpu() = m_aos[index].cpu(); - for (int i = 0; i < NStructInt; ++i) - sp.idata(i) = m_aos[index].idata(i); - if constexpr(NArrayInt > 0) - for (int i = 0; i < NArrayInt; ++i) - sp.idata(NStructInt+i) = m_idata[i][index]; - return sp; - } -}; - template class Allocator=DefaultAllocator> struct ParticleTile From 52e90aee74d51a1cb912df8fb2338d9a94c8fbb3 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Wed, 14 Sep 2022 10:42:12 -0700 Subject: [PATCH 029/111] Add rdata and idata to SoAParticle --- Src/Particle/AMReX_ParticleTile.H | 222 +++++++++++++++++------------- 1 file changed, 130 insertions(+), 92 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 76c1c556df0..8a06beee143 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -139,6 +139,7 @@ struct ParticleTileData } } + //template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE SuperParticleType getSuperParticle (int index) const noexcept { @@ -177,97 +178,6 @@ struct ParticleTileData } }; -template -struct ConstParticleTileData -{ - static constexpr int NAR = NArrayReal; - static constexpr int NAI = NArrayInt; - using ParticleType = T_ParticleType; - - static constexpr int NStructReal = ParticleType::NReal; - static constexpr int NStructInt = ParticleType::NInt; - - using SuperParticleType = Particle; - - Long m_size; - const ParticleType* AMREX_RESTRICT m_aos; - GpuArray m_rdata; - GpuArray m_idata; - - int m_num_runtime_real; - int m_num_runtime_int; - const ParticleReal* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_rdata; - const int* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_idata; - - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - void packParticleData(char* buffer, int src_index, Long dst_offset, - const int* comm_real, const int * comm_int) const noexcept - { - AMREX_ASSERT(src_index < m_size); - auto dst = buffer + dst_offset; - memcpy(dst, m_aos + src_index, sizeof(ParticleType)); - dst += sizeof(ParticleType); - int array_start_index = AMREX_SPACEDIM + NStructReal; - for (int i = 0; i < NArrayReal; ++i) - { - if (comm_real[array_start_index + i]) - { - memcpy(dst, m_rdata[i] + src_index, sizeof(ParticleReal)); - dst += sizeof(ParticleReal); - } - } - int runtime_start_index = AMREX_SPACEDIM + NStructReal + NArrayReal; - for (int i = 0; i < m_num_runtime_real; ++i) - { - if (comm_real[runtime_start_index + i]) - { - memcpy(dst, m_runtime_rdata[i] + src_index, sizeof(ParticleReal)); - dst += sizeof(ParticleReal); - } - } - array_start_index = 2 + NStructInt; - for (int i = 0; i < NArrayInt; ++i) - { - if (comm_int[array_start_index + i]) - { - memcpy(dst, m_idata[i] + src_index, sizeof(int)); - dst += sizeof(int); - } - } - runtime_start_index = 2 + NStructInt + NArrayInt; - for (int i = 0; i < m_num_runtime_int; ++i) - { - if (comm_int[runtime_start_index + i]) - { - memcpy(dst, m_runtime_idata[i] + src_index, sizeof(int)); - dst += sizeof(int); - } - } - } - - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - SuperParticleType getSuperParticle (int index) const noexcept - { - AMREX_ASSERT(index < m_size); - SuperParticleType sp; - for (int i = 0; i < AMREX_SPACEDIM; ++i) - sp.pos(i) = m_aos[index].pos(i); - for (int i = 0; i < NStructReal; ++i) - sp.rdata(i) = m_aos[index].rdata(i); - if constexpr(NArrayReal > 0) - for (int i = 0; i < NArrayReal; ++i) - sp.rdata(NStructReal+i) = m_rdata[i][index]; - sp.id() = m_aos[index].id(); - sp.cpu() = m_aos[index].cpu(); - for (int i = 0; i < NStructInt; ++i) - sp.idata(i) = m_aos[index].idata(i); - if constexpr(NArrayInt > 0) - for (int i = 0; i < NArrayInt; ++i) - sp.idata(NStructInt+i) = m_idata[i][index]; - return sp; - } -}; - // SOA Particle Structure template struct SoAParticle : SoAParticleBase @@ -287,6 +197,19 @@ struct SoAParticle : SoAParticleBase m_particle_tile_data=ptd; m_index=index; } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RealType& rdata (int index) & + { + return this->m_particle_tile_data.m_rdata[index]; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RealType& idata (int index) & + { + return this->m_particle_tile_data.m_idata[index]; + } + //functions to get id and cpu in the SOA data @@ -394,6 +317,122 @@ SoAParticle::NextID (Long nextid) the_next_id = nextid; } +template +struct ConstParticleTileData +{ + static constexpr int NAR = NArrayReal; + static constexpr int NAI = NArrayInt; + using ParticleType = T_ParticleType; + + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; + + using SuperParticleType = Particle; + + Long m_size; + const ParticleType* AMREX_RESTRICT m_aos; + + GpuArray m_rdata; + GpuArray m_idata; + + int m_num_runtime_real; + int m_num_runtime_int; + const ParticleReal* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_rdata; + const int* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_idata; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void packParticleData(char* buffer, int src_index, Long dst_offset, + const int* comm_real, const int * comm_int) const noexcept + { + AMREX_ASSERT(src_index < m_size); + auto dst = buffer + dst_offset; + memcpy(dst, m_aos + src_index, sizeof(ParticleType)); + dst += sizeof(ParticleType); + int array_start_index = AMREX_SPACEDIM + NStructReal; + for (int i = 0; i < NArrayReal; ++i) + { + if (comm_real[array_start_index + i]) + { + memcpy(dst, m_rdata[i] + src_index, sizeof(ParticleReal)); + dst += sizeof(ParticleReal); + } + } + int runtime_start_index = AMREX_SPACEDIM + NStructReal + NArrayReal; + for (int i = 0; i < m_num_runtime_real; ++i) + { + if (comm_real[runtime_start_index + i]) + { + memcpy(dst, m_runtime_rdata[i] + src_index, sizeof(ParticleReal)); + dst += sizeof(ParticleReal); + } + } + array_start_index = 2 + NStructInt; + for (int i = 0; i < NArrayInt; ++i) + { + if (comm_int[array_start_index + i]) + { + memcpy(dst, m_idata[i] + src_index, sizeof(int)); + dst += sizeof(int); + } + } + runtime_start_index = 2 + NStructInt + NArrayInt; + for (int i = 0; i < m_num_runtime_int; ++i) + { + if (comm_int[runtime_start_index + i]) + { + memcpy(dst, m_runtime_idata[i] + src_index, sizeof(int)); + dst += sizeof(int); + } + } + } + + template ::type = 0> + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + SuperParticleType getSuperParticle (int index) const noexcept + { + AMREX_ASSERT(index < m_size); + SuperParticleType sp; + for (int i = 0; i < AMREX_SPACEDIM; ++i) + sp.pos(i) = m_aos[index].pos(i); + for (int i = 0; i < NStructReal; ++i) + sp.rdata(i) = m_aos[index].rdata(i); + if constexpr(NArrayReal > 0) + for (int i = 0; i < NArrayReal; ++i) + sp.rdata(NStructReal+i) = m_rdata[i][index]; + sp.id() = m_aos[index].id(); + sp.cpu() = m_aos[index].cpu(); + for (int i = 0; i < NStructInt; ++i) + sp.idata(i) = m_aos[index].idata(i); + if constexpr(NArrayInt > 0) + for (int i = 0; i < NArrayInt; ++i) + sp.idata(NStructInt+i) = m_idata[i][index]; + return sp; + } + + template ::type = 0> + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + SuperParticleType getSuperParticle (int index) const noexcept + { + AMREX_ASSERT(index < m_size); + SuperParticleType sp; + for (int i = 0; i < AMREX_SPACEDIM; ++i) + sp.pos(i) = m_aos[index].pos(i); + for (int i = 0; i < NStructReal; ++i) + sp.rdata(i) = m_aos[index].m_particle_tile_data.rdata(i); + if constexpr(NArrayReal > 0) + for (int i = 0; i < NArrayReal; ++i) + sp.rdata(NStructReal+i) = m_rdata[i][index]; + sp.id() = m_aos[index].id(); + sp.cpu() = m_aos[index].cpu(); + for (int i = 0; i < NStructInt; ++i) + sp.idata(i) = m_aos[index].m_particle_tile_data.idata(i); + if constexpr(NArrayInt > 0) + for (int i = 0; i < NArrayInt; ++i) + sp.idata(NStructInt+i) = m_idata[i][index]; + return sp; + } +}; + template class Allocator=DefaultAllocator> struct ParticleTile @@ -496,7 +535,6 @@ struct ParticleTile return p.id(); } - // Get positions data template ::type = 0> From 6c392782bee67e34d848b265b08985afec5f0915 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Wed, 14 Sep 2022 11:10:24 -0700 Subject: [PATCH 030/111] deleted one redundance getsuperparticle --- Src/Particle/AMReX_ParticleTile.H | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 8a06beee143..e0dc608d739 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -386,7 +386,6 @@ struct ConstParticleTileData } } - template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE SuperParticleType getSuperParticle (int index) const noexcept { @@ -408,29 +407,6 @@ struct ConstParticleTileData sp.idata(NStructInt+i) = m_idata[i][index]; return sp; } - - template ::type = 0> - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - SuperParticleType getSuperParticle (int index) const noexcept - { - AMREX_ASSERT(index < m_size); - SuperParticleType sp; - for (int i = 0; i < AMREX_SPACEDIM; ++i) - sp.pos(i) = m_aos[index].pos(i); - for (int i = 0; i < NStructReal; ++i) - sp.rdata(i) = m_aos[index].m_particle_tile_data.rdata(i); - if constexpr(NArrayReal > 0) - for (int i = 0; i < NArrayReal; ++i) - sp.rdata(NStructReal+i) = m_rdata[i][index]; - sp.id() = m_aos[index].id(); - sp.cpu() = m_aos[index].cpu(); - for (int i = 0; i < NStructInt; ++i) - sp.idata(i) = m_aos[index].m_particle_tile_data.idata(i); - if constexpr(NArrayInt > 0) - for (int i = 0; i < NArrayInt; ++i) - sp.idata(NStructInt+i) = m_idata[i][index]; - return sp; - } }; template Date: Wed, 14 Sep 2022 11:28:54 -0700 Subject: [PATCH 031/111] Write GetSuperParticle for SoAParticle --- Src/Particle/AMReX_ParticleTile.H | 58 +++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index e0dc608d739..be8fe059386 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -139,7 +139,7 @@ struct ParticleTileData } } - //template ::type = 0> + template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE SuperParticleType getSuperParticle (int index) const noexcept { @@ -160,6 +160,27 @@ struct ParticleTileData return sp; } + template ::type = 0> + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + SuperParticleType getSuperParticle (int index) const noexcept + { + AMREX_ASSERT(index < m_size); + SuperParticleType sp; + for (int i = 0; i < AMREX_SPACEDIM; ++i) + sp.pos(i) = m_rdata[i][index]; + for (int i = 0; i < NStructReal; ++i) + sp.rdata(i) = m_rdata[i][index]; + for (int i = 0; i < NAR; ++i) + sp.rdata(NStructReal+i) = m_rdata[i][index]; + sp.id() = m_idata[0][index]; + sp.cpu() = m_idata[1][index]; + for (int i = 0; i < NStructInt; ++i) + sp.idata(i) = m_idata[i][index]; + for (int i = 0; i < NAI; ++i) + sp.idata(NStructInt+i) = m_idata[i][index]; + return sp; + } + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void setSuperParticle (const SuperParticleType& sp, int index) const noexcept { @@ -197,19 +218,6 @@ struct SoAParticle : SoAParticleBase m_particle_tile_data=ptd; m_index=index; } - - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - RealType& rdata (int index) & - { - return this->m_particle_tile_data.m_rdata[index]; - } - - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - RealType& idata (int index) & - { - return this->m_particle_tile_data.m_idata[index]; - } - //functions to get id and cpu in the SOA data @@ -386,6 +394,7 @@ struct ConstParticleTileData } } + template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE SuperParticleType getSuperParticle (int index) const noexcept { @@ -407,6 +416,27 @@ struct ConstParticleTileData sp.idata(NStructInt+i) = m_idata[i][index]; return sp; } + + template ::type = 0> + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + SuperParticleType getSuperParticle (int index) const noexcept + { + AMREX_ASSERT(index < m_size); + SuperParticleType sp; + for (int i = 0; i < AMREX_SPACEDIM; ++i) + sp.pos(i) = m_rdata[i][index]; + for (int i = 0; i < NStructReal; ++i) + sp.rdata(i) = m_rdata[i][index]; + for (int i = 0; i < NAR; ++i) + sp.rdata(NStructReal+i) = m_rdata[i][index]; + sp.id() = m_idata[0][index]; + sp.cpu() = m_idata[1][index]; + for (int i = 0; i < NStructInt; ++i) + sp.idata(i) = m_idata[i][index]; + for (int i = 0; i < NAI; ++i) + sp.idata(NStructInt+i) = m_idata[i][index]; + return sp; + } }; template Date: Mon, 19 Sep 2022 16:37:36 -0700 Subject: [PATCH 032/111] Fix the ReduceOp for the SoA Particle struct --- Src/Particle/AMReX_ParticleContainer.H | 2 +- Src/Particle/AMReX_ParticleTile.H | 26 +++++++++++++++++++++++-- Tests/Particles/SOAParticle/main.cpp | 27 ++++++++++++++++++++++---- 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index 3c83b393b90..46cf3f82070 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -1249,7 +1249,7 @@ public: /** type trait to translate one particle container to another, with changed allocator */ template class NewAllocator=amrex::DefaultAllocator> - using ContainerLike = amrex::ParticleContainer; + using ContainerLike = amrex::ParticleContainer_impl; /** Create an empty particle container * diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index be8fe059386..717f317eba0 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -42,6 +42,16 @@ struct ParticleTileData GpuArray m_rdata; GpuArray m_idata; + amrex::Real Get_rdata(int index, int particle_index) + { + return this->m_rdata[index][particle_index]; + } + + int Get_idata(int index, int particle_index) + { + return this->m_idata[index][particle_index]; + } + int m_num_runtime_real; int m_num_runtime_int; ParticleReal* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_rdata; @@ -218,6 +228,7 @@ struct SoAParticle : SoAParticleBase m_particle_tile_data=ptd; m_index=index; } + //functions to get id and cpu in the SOA data @@ -343,6 +354,16 @@ struct ConstParticleTileData GpuArray m_rdata; GpuArray m_idata; + auto const* rdata(const int attribute_index) const + { + return this->m_rdata[attribute_index]; + } + + auto const* idata(const int attribute_index) const + { + return this->m_idata[attribute_index]; + } + int m_num_runtime_real; int m_num_runtime_int; const ParticleReal* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_rdata; @@ -462,9 +483,10 @@ struct ParticleTile using SoA = StructOfArrays; using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; + using StorageParticleType = typename ParticleType::StorageParticleType; - using ParticleTileDataType = ParticleTileData; - using ConstParticleTileDataType = ConstParticleTileData; + using ParticleTileDataType = ParticleTileData; + using ConstParticleTileDataType = ConstParticleTileData; ParticleTile () : m_defined(false) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index e9b36e7a399..13609e39e83 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -15,8 +15,8 @@ template class Allocator=DefaultAllocator> void addParticles () { T_PC pc; - // int const NReal = pc.NStructReal; - // int const NInt = pc.NStructInt; + int const NReal = pc.NStructReal; + int const NInt = pc.NStructInt; int const NArrayReal = pc.NArrayReal; int const NArrayInt = pc.NArrayInt; @@ -24,6 +24,7 @@ void addParticles () using ParticleTileDataType = typename T_PC::ParticleTileType::ParticleTileDataType; using RealVector = amrex::PODVector >; using IntVector = amrex::PODVector >; + using SPType = typename T_PC::SuperParticleType; const int add_num_particles = 5; @@ -62,6 +63,7 @@ void addParticles () amrex::ParticleReal* const AMREX_RESTRICT part_aaa = soa_real[3].dataPtr(); auto& soa_int = pti.GetStructOfArrays().GetIntData(); + // Iterating over old Particles ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) { ParticleType& AMREX_RESTRICT p = aos_ptr[ip]; @@ -80,7 +82,7 @@ void addParticles () a += 1.0; }); - // new way of creating + // Iterating over SoA Particles ParticleTileDataType ptd = pti.GetParticleTile().getParticleTileData(); ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) @@ -105,6 +107,24 @@ void addParticles () } pc.Redistribute(); + + // Reduce for SoA Particle Struct + using PTDType = typename T_PC::ParticleTileType::ConstParticleTileDataType; + amrex::ReduceOps reduce_ops; + auto r = amrex::ParticleReduce> ( + pc, [=] AMREX_GPU_DEVICE (const PTDType& ptd, const int i) noexcept + -> amrex::GpuTuple + { + + const amrex::Real a = ptd.rdata(1)[i]; + const amrex::Real b = ptd.rdata(2)[i]; + const int c = ptd.idata(1)[i]; + return {a, b, c}; + }, reduce_ops); + + AMREX_ALWAYS_ASSERT(amrex::get<0>(r) == amrex::Real(std::pow(256, AMREX_SPACEDIM))); + AMREX_ALWAYS_ASSERT(amrex::get<1>(r) == 2.0); + AMREX_ALWAYS_ASSERT(amrex::get<2>(r) == 1); } @@ -112,7 +132,6 @@ int main(int argc, char* argv[]) { amrex::Initialize(argc,argv); { - //addParticles< ParticleContainer<1,2,3,4> > (); addParticles< ParticleContainerPureSoA<3,4> > (); } amrex::Finalize(); From ada7a1f6c00426072016138e456b891527a9bde6 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Wed, 21 Sep 2022 10:57:55 -0700 Subject: [PATCH 033/111] Function to get rdata and idata from ParticleTileData --- Src/Particle/AMReX_ParticleTile.H | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 717f317eba0..340e3d7a0c8 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -42,14 +42,14 @@ struct ParticleTileData GpuArray m_rdata; GpuArray m_idata; - amrex::Real Get_rdata(int index, int particle_index) + auto * rdata(const int attribute_index) { - return this->m_rdata[index][particle_index]; + return this->m_rdata[attribute_index]; } - int Get_idata(int index, int particle_index) + auto * idata(const int attribute_index) { - return this->m_idata[index][particle_index]; + return this->m_idata[attribute_index]; } int m_num_runtime_real; From 58e72747dc3586d6c15c935db6f7124f74d0965d Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Thu, 22 Sep 2022 16:07:50 -0700 Subject: [PATCH 034/111] Adding test to SoAParticle main.cpp --- Tests/Particles/SOAParticle/main.cpp | 33 ++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index 13609e39e83..31b8301e027 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -96,7 +96,9 @@ void addParticles () } - for (MyParIter pti(pc, lev); pti.isValid(); ++pti) { + auto tmp = pc.template make_alike(); + + for (MyParIter pti(tmp, lev); pti.isValid(); ++pti) { auto& particle_attributes = pti.GetStructOfArrays(); RealVector& real_comp0 = particle_attributes.GetRealData(0); IntVector& int_comp1 = particle_attributes.GetIntData(1); @@ -106,9 +108,33 @@ void addParticles () } } - pc.Redistribute(); + tmp.Redistribute(); + + using ConstPTDType = typename T_PC::ParticleTileType::ConstParticleTileDataType; + amrex::ReduceOps reduce_ops; + auto r = amrex::ParticleReduce< + amrex::ReduceData< + amrex::ParticleReal, amrex::ParticleReal, amrex::ParticleReal, + amrex::ParticleReal, amrex::ParticleReal, amrex::ParticleReal, + amrex::ParticleReal> + >( + pc, + [=] AMREX_GPU_DEVICE(const ConstPTDType& ptd, const int i) noexcept + { + + const amrex::ParticleReal x = ptd.rdata(0)[i]; + const amrex::ParticleReal y = ptd.rdata(1)[i]; + const amrex::ParticleReal z = ptd.rdata(2)[i]; + + amrex::ParticleReal const w = ptd.rdata(1)[i]; + + return amrex::makeTuple(x, x*x, y, y*y, z, z*z, w); + }, + reduce_ops + ); // Reduce for SoA Particle Struct + /* using PTDType = typename T_PC::ParticleTileType::ConstParticleTileDataType; amrex::ReduceOps reduce_ops; auto r = amrex::ParticleReduce> ( @@ -125,9 +151,12 @@ void addParticles () AMREX_ALWAYS_ASSERT(amrex::get<0>(r) == amrex::Real(std::pow(256, AMREX_SPACEDIM))); AMREX_ALWAYS_ASSERT(amrex::get<1>(r) == 2.0); AMREX_ALWAYS_ASSERT(amrex::get<2>(r) == 1); + */ } + + int main(int argc, char* argv[]) { amrex::Initialize(argc,argv); From d4388f9c6d7aa445d721f7ed3106f9b81011d622 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 22 Sep 2022 19:29:37 -0700 Subject: [PATCH 035/111] Fix Particle Iterator Type & Pinned Real Vector --- Tests/Particles/SOAParticle/main.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index 31b8301e027..f8d0e2f06d9 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -97,11 +97,12 @@ void addParticles () } auto tmp = pc.template make_alike(); + using MyPinnedParIter = ParIter_impl; - for (MyParIter pti(tmp, lev); pti.isValid(); ++pti) { + for (MyPinnedParIter pti(tmp, lev); pti.isValid(); ++pti) { auto& particle_attributes = pti.GetStructOfArrays(); - RealVector& real_comp0 = particle_attributes.GetRealData(0); - IntVector& int_comp1 = particle_attributes.GetIntData(1); + auto& real_comp0 = particle_attributes.GetRealData(0); + auto& int_comp1 = particle_attributes.GetIntData(1); for (int i = 0; i < pti.numParticles(); ++i) { real_comp0[i] += 1; int_comp1[i] += 1; From 80d41aa0debe727ff4bc1b6a120edb6adcd4258f Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 22 Sep 2022 19:30:10 -0700 Subject: [PATCH 036/111] Clean up Formatting --- Tests/Particles/SOAParticle/main.cpp | 44 ++++++++++++++-------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index f8d0e2f06d9..fb86f60d7a7 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -111,28 +111,28 @@ void addParticles () tmp.Redistribute(); - using ConstPTDType = typename T_PC::ParticleTileType::ConstParticleTileDataType; - amrex::ReduceOps reduce_ops; - auto r = amrex::ParticleReduce< - amrex::ReduceData< - amrex::ParticleReal, amrex::ParticleReal, amrex::ParticleReal, - amrex::ParticleReal, amrex::ParticleReal, amrex::ParticleReal, - amrex::ParticleReal> - >( - pc, - [=] AMREX_GPU_DEVICE(const ConstPTDType& ptd, const int i) noexcept - { - - const amrex::ParticleReal x = ptd.rdata(0)[i]; - const amrex::ParticleReal y = ptd.rdata(1)[i]; - const amrex::ParticleReal z = ptd.rdata(2)[i]; - - amrex::ParticleReal const w = ptd.rdata(1)[i]; - - return amrex::makeTuple(x, x*x, y, y*y, z, z*z, w); - }, - reduce_ops - ); + using ConstPTDType = typename T_PC::ParticleTileType::ConstParticleTileDataType; + amrex::ReduceOps reduce_ops; + auto r = amrex::ParticleReduce< + amrex::ReduceData< + amrex::ParticleReal, amrex::ParticleReal, amrex::ParticleReal, + amrex::ParticleReal, amrex::ParticleReal, amrex::ParticleReal, + amrex::ParticleReal> + >( + pc, + [=] AMREX_GPU_DEVICE(const ConstPTDType& ptd, const int i) noexcept + { + + const amrex::ParticleReal x = ptd.rdata(0)[i]; + const amrex::ParticleReal y = ptd.rdata(1)[i]; + const amrex::ParticleReal z = ptd.rdata(2)[i]; + + amrex::ParticleReal const w = ptd.rdata(1)[i]; + + return amrex::makeTuple(x, x*x, y, y*y, z, z*z, w); + }, + reduce_ops + ); // Reduce for SoA Particle Struct /* From b237dc5899de21b18b4d89b485c18540f25de6b3 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 22 Sep 2022 19:51:47 -0700 Subject: [PATCH 037/111] Add tmp.copyParticles --- Tests/Particles/SOAParticle/main.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index fb86f60d7a7..8219963a0a8 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -96,7 +96,13 @@ void addParticles () } + // create a host-side particle buffer auto tmp = pc.template make_alike(); + + // copy device-to-host + bool const local = true; + tmp.copyParticles(pc, local); + using MyPinnedParIter = ParIter_impl; for (MyPinnedParIter pti(tmp, lev); pti.isValid(); ++pti) { From c36f149252a6636e9f857787a00cdd4a74c95739 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 26 Sep 2022 09:52:05 -0700 Subject: [PATCH 038/111] Adding corrections and test to amrex soa particle --- Src/Particle/AMReX_ParticleTile.H | 10 ---------- Tests/Particles/SOAParticle/main.cpp | 3 ++- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 340e3d7a0c8..162e41cb813 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -42,16 +42,6 @@ struct ParticleTileData GpuArray m_rdata; GpuArray m_idata; - auto * rdata(const int attribute_index) - { - return this->m_rdata[attribute_index]; - } - - auto * idata(const int attribute_index) - { - return this->m_idata[attribute_index]; - } - int m_num_runtime_real; int m_num_runtime_int; ParticleReal* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_rdata; diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index 31b8301e027..416a12c8f3e 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -97,8 +97,9 @@ void addParticles () } auto tmp = pc.template make_alike(); + using MyPinnedParIter = typename decltype(tmp); - for (MyParIter pti(tmp, lev); pti.isValid(); ++pti) { + for (MyPinnedParIter pti(tmp, lev); pti.isValid(); ++pti) { auto& particle_attributes = pti.GetStructOfArrays(); RealVector& real_comp0 = particle_attributes.GetRealData(0); IntVector& int_comp1 = particle_attributes.GetIntData(1); From 4cc5a4996f894785e3ba32f7960372b2b3330d39 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 26 Sep 2022 15:20:00 -0700 Subject: [PATCH 039/111] trying to correct .copyParticles for SoA --- Src/Particle/AMReX_ParticleUtil.H | 10 ++++++++++ Tests/Particles/SOAParticle/main.cpp | 14 +++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 02171b18fac..47f0e6f64bc 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -89,6 +89,16 @@ auto call_f (F const& f, return f(p, i); } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +auto call_f (F const& f, + ConstParticleTileData& p, + const int i) noexcept + -> decltype(f(p, i)) +{ + return f(p, i); +} + // These next several functions are used by ParticleToMesh and MeshToParticle // Lambda takes a Particle diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index 464f418c9ce..105cabd9ece 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -97,18 +97,13 @@ void addParticles () } // create a host-side particle buffer + //ParticleContainer<1,1> pc_og; auto tmp = pc.template make_alike(); - using MyPinnedParIter = typename decltype(tmp); - -<<<<<<< HEAD -======= - // copy device-to-host - bool const local = true; - tmp.copyParticles(pc, local); - + tmp.copyParticles(pc, true); + + /* using MyPinnedParIter = ParIter_impl; ->>>>>>> b237dc5899de21b18b4d89b485c18540f25de6b3 for (MyPinnedParIter pti(tmp, lev); pti.isValid(); ++pti) { auto& particle_attributes = pti.GetStructOfArrays(); auto& real_comp0 = particle_attributes.GetRealData(0); @@ -118,6 +113,7 @@ void addParticles () int_comp1[i] += 1; } } + */ tmp.Redistribute(); From 9accbf19b9eb8449b09e59e02adde13c16d3d675 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Tue, 27 Sep 2022 14:47:03 -0700 Subject: [PATCH 040/111] Adapting the copyParticles function to SoAParticle --- Src/Particle/AMReX_ParticleContainerI.H | 4 ++-- Src/Particle/AMReX_ParticleUtil.H | 10 ---------- Tests/Particles/SOAParticle/main.cpp | 2 -- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index bf4b5d878ca..9e433ed876a 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1000,7 +1000,7 @@ void ParticleContainer_impl:: copyParticles (const PCType& other, bool local) { - using PData = ConstParticleTileData; + using PData = ConstParticleTileData; copyParticles(other, [=] AMREX_GPU_HOST_DEVICE (const PData& /*data*/, int /*i*/) { return 1; }, local); } @@ -1011,7 +1011,7 @@ void ParticleContainer_impl:: addParticles (const PCType& other, bool local) { - using PData = ConstParticleTileData; + using PData = ConstParticleTileData; addParticles(other, [=] AMREX_GPU_HOST_DEVICE (const PData& /*data*/, int /*i*/) { return 1; }, local); } diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 47f0e6f64bc..02171b18fac 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -89,16 +89,6 @@ auto call_f (F const& f, return f(p, i); } -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -auto call_f (F const& f, - ConstParticleTileData& p, - const int i) noexcept - -> decltype(f(p, i)) -{ - return f(p, i); -} - // These next several functions are used by ParticleToMesh and MeshToParticle // Lambda takes a Particle diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index 105cabd9ece..7f574127d91 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -101,7 +101,6 @@ void addParticles () auto tmp = pc.template make_alike(); tmp.copyParticles(pc, true); - /* using MyPinnedParIter = ParIter_impl; for (MyPinnedParIter pti(tmp, lev); pti.isValid(); ++pti) { @@ -113,7 +112,6 @@ void addParticles () int_comp1[i] += 1; } } - */ tmp.Redistribute(); From 691a93c71eefe739a358cc91577e2e796cf087a9 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Wed, 28 Sep 2022 10:10:04 -0700 Subject: [PATCH 041/111] Adapt new features to SoAParticles --- Src/Particle/AMReX_ParticleTile.H | 20 ++++++++++++++++++++ Tests/Particles/SOAParticle/main.cpp | 4 +++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 162e41cb813..56c2ab2b083 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -601,32 +601,52 @@ struct ParticleTile * */ + template ::type = 0> std::size_t size () const { return m_aos_tile.size(); } + template ::type = 0> + std::size_t size () const { return m_soa_tile.size(); } + /** * \brief Returns the number of real particles (excluding neighbors) * */ + template ::type = 0> int numParticles () const { return m_aos_tile.numParticles(); } + template ::type = 0> + int numParticles () const { return m_soa_tile.numParticles(); } + /** * \brief Returns the number of real particles (excluding neighbors) * */ + template ::type = 0> int numRealParticles () const { return m_aos_tile.numRealParticles(); } + template ::type = 0> + int numRealParticles () const { return m_soa_tile.numRealParticles(); } + /** * \brief Returns the number of neighbor particles (excluding reals) * */ + template ::type = 0> int numNeighborParticles () const { return m_aos_tile.numNeighborParticles(); } + template ::type = 0> + int numNeighborParticles () const { return m_soa_tile.numNeighborParticles(); } + /** * \brief Returns the total number of particles, real and neighbor * */ + template ::type = 0> int numTotalParticles () const { return m_aos_tile.numTotalParticles() ; } + template ::type = 0> + int numTotalParticles () const { return m_soa_tile.numTotalParticles() ; } + void setNumNeighbors (int num_neighbors) { m_soa_tile.setNumNeighbors(num_neighbors); diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index 7f574127d91..45ccab8331b 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -56,7 +56,9 @@ void addParticles () ParticleType* AMREX_RESTRICT aos_ptr = aos().dataPtr(); // preparing access to particle data: SoA of Reals - auto& soa_real = pti.GetStructOfArrays().GetRealData(); + auto& soa = pti.GetStructOfArrays(); + auto soa_real = soa.GetRealData(); + auto size = soa.size(); amrex::ParticleReal* const AMREX_RESTRICT part_x = soa_real[0].dataPtr(); amrex::ParticleReal* const AMREX_RESTRICT part_y = soa_real[1].dataPtr(); amrex::ParticleReal* const AMREX_RESTRICT part_z = soa_real[2].dataPtr(); From 8516860a83ef0799aa374ff3ec155ef3181148fa Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Thu, 29 Sep 2022 16:11:49 -0700 Subject: [PATCH 042/111] Adapting ParticleContainerI.H to SoA --- Src/Particle/AMReX_ParticleContainerI.H | 177 ++++++++++++++++++++---- 1 file changed, 147 insertions(+), 30 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 9e433ed876a..33e7d7f5c75 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1472,25 +1472,101 @@ ParticleContainer_impl int tile = grid_tile_ids[pmap_it].second; auto& aos = ptile_ptrs[pmap_it]->GetArrayOfStructs(); auto& soa = ptile_ptrs[pmap_it]->GetStructOfArrays(); + AMREX_ASSERT_WITH_MESSAGE((NumRealComps() == 0 && NumIntComps() == 0) || aos.size() == soa.size(), "The AoS and SoA data on this tile are different sizes - " "perhaps particles have not been initialized correctly?"); unsigned npart = aos.numParticles(); ParticleLocData pld; - if (npart != 0) { - Long last = npart - 1; - Long pindex = 0; - while (pindex <= last) { - ParticleType& p = aos[pindex]; - if ((remove_negative == false) && (p.id() < 0)) { - ++pindex; - continue; + if constexpr(!ParticleType::is_soa_particle){ + + if (npart != 0) { + Long last = npart - 1; + Long pindex = 0; + while (pindex <= last) { + ParticleType& p = aos[pindex]; + + if ((remove_negative == false) && (p.id() < 0)) { + ++pindex; + continue; + } + + if (p.id() < 0) + { + aos[pindex] = aos[last]; + for (int comp = 0; comp < NumRealComps(); comp++) + soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; + for (int comp = 0; comp < NumIntComps(); comp++) + soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; + correctCellVectors(last, pindex, grid, aos[pindex]); + --last; + continue; + } + + locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1); + + particlePostLocate(p, pld, lev); + + if (p.id() < 0) + { + aos[pindex] = aos[last]; + for (int comp = 0; comp < NumRealComps(); comp++) + soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; + for (int comp = 0; comp < NumIntComps(); comp++) + soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; + correctCellVectors(last, pindex, grid, aos[pindex]); + --last; + continue; + } + + const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); + if (who == MyProc) { + if (pld.m_lev != lev || pld.m_grid != grid || pld.m_tile != tile) { + // We own it but must shift it to another place. + auto index = std::make_pair(pld.m_grid, pld.m_tile); + AMREX_ASSERT(tmp_local[pld.m_lev][index].size() == num_threads); + tmp_local[pld.m_lev][index][thread_num].push_back(p); + for (int comp = 0; comp < NumRealComps(); ++comp) { + RealVector& arr = soa_local[pld.m_lev][index][thread_num].GetRealData(comp); + arr.push_back(soa.GetRealData(comp)[pindex]); + } + for (int comp = 0; comp < NumIntComps(); ++comp) { + IntVector& arr = soa_local[pld.m_lev][index][thread_num].GetIntData(comp); + arr.push_back(soa.GetIntData(comp)[pindex]); + } + + p.id() = -p.id(); // Invalidate the particle + } + } + else { + auto& particles_to_send = tmp_remote[who][thread_num]; + auto old_size = particles_to_send.size(); + auto new_size = old_size + superparticle_size; + particles_to_send.resize(new_size); + std::memcpy(&particles_to_send[old_size], &p, particle_size); + char* dst = &particles_to_send[old_size] + particle_size; + int array_comp_start = AMREX_SPACEDIM + NStructReal; + for (int comp = 0; comp < NumRealComps(); comp++) { + if (h_redistribute_real_comp[array_comp_start + comp]) { + std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal)); + dst += sizeof(ParticleReal); + } + } + array_comp_start = 2 + NStructInt; + for (int comp = 0; comp < NumIntComps(); comp++) { + if (h_redistribute_int_comp[array_comp_start + comp]) { + std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int)); + dst += sizeof(int); + } + } + + p.id() = -p.id(); // Invalidate the particle } - if (p.id() < 0) - { + if (p.id() < 0) + { aos[pindex] = aos[last]; for (int comp = 0; comp < NumRealComps(); comp++) soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; @@ -1499,15 +1575,56 @@ ParticleContainer_impl correctCellVectors(last, pindex, grid, aos[pindex]); --last; continue; - } + } + + ++pindex; + } + + aos().erase(aos().begin() + last + 1, aos().begin() + npart); + for (int comp = 0; comp < NumRealComps(); comp++) { + RealVector& rdata = soa.GetRealData(comp); + rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart); + } + for (int comp = 0; comp < NumIntComps(); comp++) { + IntVector& idata = soa.GetIntData(comp); + idata.erase(idata.begin() + last + 1, idata.begin() + npart); + } + } + + } else{ + + auto particle_tile = ptile_ptrs[pmap_it]; + if (npart != 0) { + Long last = npart - 1; + Long pindex = 0; + while (pindex <= last) { + auto ptd = particle_tile->getParticleTileData(); + ParticleType p(ptd,pindex); + + if ((remove_negative == false) && (p.id() < 0)) { + ++pindex; + continue; + } + + if (p.id() < 0){ + p = ParticleType(ptd,last); + for (int comp = 0; comp < NumRealComps(); comp++) + soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; + for (int comp = 0; comp < NumIntComps(); comp++) + soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; + correctCellVectors(last, pindex, grid, aos[pindex]); + --last; + continue; + } + //ParticleType& p2(ptd,last) locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1); particlePostLocate(p, pld, lev); - if (p.id() < 0) - { - aos[pindex] = aos[last]; + if (p.id() < 0){ + + p = ParticleType(ptd,last); for (int comp = 0; comp < NumRealComps(); comp++) soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; for (int comp = 0; comp < NumIntComps(); comp++) @@ -1515,7 +1632,7 @@ ParticleContainer_impl correctCellVectors(last, pindex, grid, aos[pindex]); --last; continue; - } + } const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); if (who == MyProc) { @@ -1534,8 +1651,8 @@ ParticleContainer_impl } p.id() = -p.id(); // Invalidate the particle - } - } + } + } else { auto& particles_to_send = tmp_remote[who][thread_num]; auto old_size = particles_to_send.size(); @@ -1548,22 +1665,21 @@ ParticleContainer_impl if (h_redistribute_real_comp[array_comp_start + comp]) { std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal)); dst += sizeof(ParticleReal); - } - } + } + } array_comp_start = 2 + NStructInt; for (int comp = 0; comp < NumIntComps(); comp++) { if (h_redistribute_int_comp[array_comp_start + comp]) { std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int)); dst += sizeof(int); - } - } + } + } p.id() = -p.id(); // Invalidate the particle - } + } - if (p.id() < 0) - { - aos[pindex] = aos[last]; + if (p.id() < 0){ + p = ParticleType(ptd,last); for (int comp = 0; comp < NumRealComps(); comp++) soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; for (int comp = 0; comp < NumIntComps(); comp++) @@ -1571,10 +1687,10 @@ ParticleContainer_impl correctCellVectors(last, pindex, grid, aos[pindex]); --last; continue; - } + } ++pindex; - } + } aos().erase(aos().begin() + last + 1, aos().begin() + npart); for (int comp = 0; comp < NumRealComps(); comp++) { @@ -1584,8 +1700,9 @@ ParticleContainer_impl for (int comp = 0; comp < NumIntComps(); comp++) { IntVector& idata = soa.GetIntData(comp); idata.erase(idata.begin() + last + 1, idata.begin() + npart); - } - } + } + } + } } } @@ -2230,7 +2347,7 @@ InterpolateSingleLevel (MultiFab& mesh_data, int lev) const auto plo = gm.ProbLoArray(); const auto dxi = gm.InvCellSizeArray(); - using ParIter = ParIter; + using ParIter = ParIter_impl; #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) From 5f51d4b4f626a9be7020955f45d7a83d1bff235a Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 3 Oct 2022 11:11:39 -0700 Subject: [PATCH 043/111] Rewrote numParticlesOutOfRange and add constructor to SoAParticle --- Src/Particle/AMReX_ParIter.H | 2 +- Src/Particle/AMReX_ParticleTile.H | 17 ++++++++++--- Src/Particle/AMReX_ParticleUtil.H | 41 ++++++++++++++++++++++++++++++- 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/Src/Particle/AMReX_ParIter.H b/Src/Particle/AMReX_ParIter.H index deee365a3e4..25e8d18ad91 100644 --- a/Src/Particle/AMReX_ParIter.H +++ b/Src/Particle/AMReX_ParIter.H @@ -157,7 +157,7 @@ public: ParConstIter_impl (ContainerType const& pc, int level, MFItInfo& info) : ParIterBase_impl(pc,level,info) - {} + {} }; template struct SoAParticle; + +template +struct ConstParticleTileData; template struct ParticleTileData @@ -207,18 +210,25 @@ struct SoAParticle : SoAParticleBase static constexpr int NArrayInt = T_NArrayInt; using StorageParticleType = SoAParticleBase; using PTD = ParticleTileData; + using ConstPTD = ConstParticleTileData; static constexpr bool is_soa_particle = true; using RealType = ParticleReal; static Long the_next_id; - SoAParticle (PTD ptd, int const index) + SoAParticle (PTD ptd, int const index) { m_particle_tile_data=ptd; m_index=index; } + SoAParticle (ConstPTD ptd, int const index) + { + m_constparticle_tile_data=ptd; + m_index=index; + } + //functions to get id and cpu in the SOA data @@ -284,6 +294,7 @@ struct SoAParticle : SoAParticleBase static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable"); PTD m_particle_tile_data; + ConstPTD m_constparticle_tile_data; int m_index; }; @@ -341,8 +352,8 @@ struct ConstParticleTileData Long m_size; const ParticleType* AMREX_RESTRICT m_aos; - GpuArray m_rdata; - GpuArray m_idata; + GpuArray m_rdata; + GpuArray m_idata; auto const* rdata(const int attribute_index) const { diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 02171b18fac..a6a5b0988ee 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -254,7 +254,8 @@ numParticlesOutOfRange (Iterator const& pti, int nGrow) * \param nGrow the number of grow cells allowed. * */ -template ::value, int> foo = 0> + +template ::value && !Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0> int numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) { @@ -293,6 +294,44 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) return hv; } +template ::value && Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0> +int +numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) +{ + using ParticleType = typename Iterator::ContainerType::ParticleType; + + const auto tile = pti.GetParticleTile(); + const auto tile_data = tile.getConstParticleTileData(); + const auto np = tile.numParticles(); + const auto& geom = pti.Geom(pti.GetLevel()); + + const auto domain = geom.Domain(); + const auto plo = geom.ProbLoArray(); + const auto dxi = geom.InvCellSizeArray(); + + Box box = pti.tilebox(); + box.grow(nGrow); + + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + using ReduceTuple = typename decltype(reduce_data)::Type; + + reduce_op.eval(np, reduce_data, + [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple + { + const ParticleType p(tile_data,i); + if ((p.id() < 0)) return false; + IntVect iv = IntVect( + AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])), + int(amrex::Math::floor((p.pos(1)-plo[1])*dxi[1])), + int(amrex::Math::floor((p.pos(2)-plo[2])*dxi[2])))); + iv += domain.smallEnd(); + return !box.contains(iv); + }); + int hv = amrex::get<0>(reduce_data.value(reduce_op)); + return hv; +} + /** * \brief Returns the number of particles that are more than nGrow cells * from their assigned box. From e9c91e5b171144f48fb327fa0c07debc81368cb3 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 3 Oct 2022 16:52:19 -0700 Subject: [PATCH 044/111] Create a ConstSoAParticle class --- Src/Particle/AMReX_ParticleTile.H | 97 ++++++++++++++++++++++++++++--- Src/Particle/AMReX_ParticleUtil.H | 2 +- 2 files changed, 90 insertions(+), 9 deletions(-) diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 2038974c3e5..edc94142d70 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -210,8 +210,8 @@ struct SoAParticle : SoAParticleBase static constexpr int NArrayInt = T_NArrayInt; using StorageParticleType = SoAParticleBase; using PTD = ParticleTileData; - using ConstPTD = ConstParticleTileData; static constexpr bool is_soa_particle = true; + static constexpr bool is_constsoa_particle = false; using RealType = ParticleReal; @@ -222,13 +222,6 @@ struct SoAParticle : SoAParticleBase m_particle_tile_data=ptd; m_index=index; } - - SoAParticle (ConstPTD ptd, int const index) - { - m_constparticle_tile_data=ptd; - m_index=index; - } - //functions to get id and cpu in the SOA data @@ -294,6 +287,94 @@ struct SoAParticle : SoAParticleBase static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable"); PTD m_particle_tile_data; + int m_index; +}; + +// SOA Particle Structure +template +struct ConstSoAParticle : SoAParticleBase +{ + static constexpr int NArrayReal = T_NArrayReal; + static constexpr int NArrayInt = T_NArrayInt; + using StorageParticleType = SoAParticleBase; + using ConstPTD = ConstParticleTileData; + static constexpr bool is_soa_particle = false; + static constexpr bool is_constsoa_particle = true; + + using RealType = ParticleReal; + + static Long the_next_id; + + + ConstSoAParticle (ConstPTD ptd, int const index) + { + m_constparticle_tile_data=ptd; + m_index=index; + } + + //functions to get id and cpu in the SOA data + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const ParticleCPUWrapper cpu () & { + uint64_t unsigned_cpu_value; + unsigned_cpu_value = (uint64_t) this->m_constparticle_tile_data.m_idata[1][m_index]; + return ParticleCPUWrapper(unsigned_cpu_value); } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const ParticleIDWrapper id () & { + uint64_t unsigned_id_value; + unsigned_id_value = (uint64_t) this->m_constparticle_tile_data.m_idata[0][m_index]; + return ParticleIDWrapper(unsigned_id_value); } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const ConstParticleCPUWrapper cpu () const & { + uint64_t unsigned_cpu_value; + unsigned_cpu_value = (uint64_t) this->m_constparticle_tile_data.m_idata[1][m_index]; + return ConstParticleCPUWrapper(unsigned_cpu_value); } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const ConstParticleIDWrapper id () const & { + uint64_t unsigned_id_value; + unsigned_id_value = (uint64_t) this->m_constparticle_tile_data.m_idata[0][m_index]; + return ConstParticleIDWrapper(unsigned_id_value); } + + //functions to get positions of the particle in the SOA data + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_constparticle_tile_data->m_rdata[0][m_index], this->m_constparticle_tile_data.m_rdata[1][m_index], this->m_constparticle_tile_data->m_rdata[2][m_index]));} + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const RealType& pos (int position_index) & + { + AMREX_ASSERT(position_index < AMREX_SPACEDIM); + return this->m_constparticle_tile_data.m_rdata[position_index][m_index]; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const RealType pos (int position_index) const & + { + AMREX_ASSERT(position_index < AMREX_SPACEDIM); + return this->m_constparticle_tile_data.m_rdata[position_index][m_index]; + } + + static Long NextID (); + + /** + * \brief This version can only be used inside omp critical. + */ + static Long UnprotectedNextID (); + + /** + * \brief Reset on restart. + * + * \param nextid + */ + static void NextID (Long nextid); + + private : + + static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable"); + ConstPTD m_constparticle_tile_data; int m_index; }; diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index a6a5b0988ee..414e77c5ff0 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -294,7 +294,7 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) return hv; } -template ::value && Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0> +template ::value && Iterator::ContainerType::ParticleType::is_constsoa_particle, int> foo = 0> int numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) { From db5b1f4e15e5eef8734da9f48ee03cd4ac9ef1ce Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Tue, 4 Oct 2022 15:10:31 -0700 Subject: [PATCH 045/111] Adding ConstSoAParticle --- Src/Particle/AMReX_ParIter.H | 1 + Src/Particle/AMReX_ParticleContainer.H | 5 +++-- Src/Particle/AMReX_ParticleTile.H | 19 ------------------- Src/Particle/AMReX_ParticleUtil.H | 6 +++--- 4 files changed, 7 insertions(+), 24 deletions(-) diff --git a/Src/Particle/AMReX_ParIter.H b/Src/Particle/AMReX_ParIter.H index 25e8d18ad91..fae27c074df 100644 --- a/Src/Particle/AMReX_ParIter.H +++ b/Src/Particle/AMReX_ParIter.H @@ -121,6 +121,7 @@ public: static constexpr int NStructInt = ParticleType::NInt; using ContainerType = ParticleContainer_impl; + using ConstParticleType = typename ContainerType::ConstParticleType; using ParticleTileType = typename ContainerType::ParticleTileType; using AoS = typename ContainerType::AoS; using SoA = typename ContainerType::SoA; diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index 46cf3f82070..ba9ee35bd78 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -143,7 +143,6 @@ template >::type; + using ConstParticleType = ConstSoAParticle; private: friend class ParIterBase_impl; friend class ParIterBase_impl; diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index edc94142d70..1a44ceefcb8 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -314,18 +314,6 @@ struct ConstSoAParticle : SoAParticleBase //functions to get id and cpu in the SOA data - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const ParticleCPUWrapper cpu () & { - uint64_t unsigned_cpu_value; - unsigned_cpu_value = (uint64_t) this->m_constparticle_tile_data.m_idata[1][m_index]; - return ParticleCPUWrapper(unsigned_cpu_value); } - - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const ParticleIDWrapper id () & { - uint64_t unsigned_id_value; - unsigned_id_value = (uint64_t) this->m_constparticle_tile_data.m_idata[0][m_index]; - return ParticleIDWrapper(unsigned_id_value); } - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE const ConstParticleCPUWrapper cpu () const & { uint64_t unsigned_cpu_value; @@ -343,13 +331,6 @@ struct ConstSoAParticle : SoAParticleBase AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE const RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_constparticle_tile_data->m_rdata[0][m_index], this->m_constparticle_tile_data.m_rdata[1][m_index], this->m_constparticle_tile_data->m_rdata[2][m_index]));} - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const RealType& pos (int position_index) & - { - AMREX_ASSERT(position_index < AMREX_SPACEDIM); - return this->m_constparticle_tile_data.m_rdata[position_index][m_index]; - } - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE const RealType pos (int position_index) const & { diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 414e77c5ff0..3dfee74e637 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -294,11 +294,11 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) return hv; } -template ::value && Iterator::ContainerType::ParticleType::is_constsoa_particle, int> foo = 0> +template ::value && Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0> int numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) { - using ParticleType = typename Iterator::ContainerType::ParticleType; + using ParticleType = typename Iterator::ContainerType::ConstParticleType; const auto tile = pti.GetParticleTile(); const auto tile_data = tile.getConstParticleTileData(); @@ -319,7 +319,7 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) reduce_op.eval(np, reduce_data, [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple { - const ParticleType p(tile_data,i); + ParticleType p(tile_data,i); if ((p.id() < 0)) return false; IntVect iv = IntVect( AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])), From 9a47bfac1ff61f57edbc31d9babcd683a5e9d2ef Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Fri, 7 Oct 2022 17:11:13 -0700 Subject: [PATCH 046/111] Still have to solve the enable_if error message --- Src/Particle/AMReX_ParticleContainer.H | 9 ++++- Src/Particle/AMReX_ParticleContainerI.H | 51 +++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index ba9ee35bd78..dbd8c7e1040 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -546,7 +546,14 @@ public: * \param only_valid * \param only_local */ - Long NumberOfParticlesAtLevel (int level, bool only_valid = true, bool only_local = false) const; + + template + auto NumberOfParticlesAtLevel (int level, bool only_valid = true, bool only_local = false) const-> + typename std::enable_if::value, Long>::type; + + template + auto NumberOfParticlesAtLevel (int level, bool only_valid = true, bool only_local = false) const-> + typename std::enable_if::value, Long>::type; Vector NumberOfParticlesInGrid (int level, bool only_valid = true, bool only_local = false) const; diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 33e7d7f5c75..265bc8f0db1 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -436,19 +436,20 @@ ParticleContainer_impl::NumberOf template class Allocator> -Long -ParticleContainer_impl::NumberOfParticlesAtLevel (int lev, bool only_valid, bool only_local) const +template +auto ParticleContainer_impl::NumberOfParticlesAtLevel (int level, bool only_valid, bool only_local) const-> +typename std::enable_if::value, Long>::type { Long nparticles = 0; - if (lev < 0 || lev >= int(m_particles.size())) return nparticles; + if (level < 0 || level >= int(m_particles.size())) return nparticles; if (only_valid) { ReduceOps reduce_op; ReduceData reduce_data(reduce_op); using ReduceTuple = typename decltype(reduce_data)::Type; - for (const auto& kv : GetParticles(lev)) { + for (const auto& kv : GetParticles(level)) { const auto& ptile = kv.second; auto const& ptaos = ptile.GetArrayOfStructs(); ParticleType const* pp = ptaos().data(); @@ -461,6 +462,48 @@ ParticleContainer_impl::NumberOf } nparticles = static_cast(amrex::get<0>(reduce_data.value(reduce_op))); } + else { + for (const auto& kv : GetParticles(level)) { + const auto& ptile = kv.second; + nparticles += ptile.numParticles(); + } + } + + if (!only_local) { + ParallelAllReduce::Sum(nparticles, ParallelContext::CommunicatorSub()); + } + + return nparticles; +} + +template class Allocator> +template +auto ParticleContainer_impl::NumberOfParticlesAtLevel (int lev, bool only_valid, bool only_local) const-> +typename std::enable_if::value, Long>::type +{ + Long nparticles = 0; + + if (lev < 0 || lev >= int(m_particles.size())) return nparticles; + + if (only_valid) { + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + using ReduceTuple = typename decltype(reduce_data)::Type; + + for (const auto& kv : GetParticles(lev)) { + const auto& ptile = kv.second; + auto const tile_data = ptile.getParticleTileData(); + auto const& ptaos = ptile.GetStructsOfArray(); + + reduce_op.eval(ptaos.numParticles(), reduce_data, + [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple + { + return (tile_data.idata(0)[i] > 0) ? 1 : 0; //get the id in SoA particle way + }); + } + nparticles = static_cast(amrex::get<0>(reduce_data.value(reduce_op))); + } else { for (const auto& kv : GetParticles(lev)) { const auto& ptile = kv.second; From 972095ca4d44a0a92251fec5c4df1439d57b2773 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Mon, 10 Oct 2022 17:03:14 -0700 Subject: [PATCH 047/111] Adapating ParticleContainerI.H file to SoA --- Src/Particle/AMReX_ParticleContainerI.H | 70 +++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 265bc8f0db1..21ed54ad1b8 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -434,6 +434,63 @@ ParticleContainer_impl::NumberOf return nparticles; } +template class Allocator> +Vector +ParticleContainer_impl::NumberOfParticlesInGrid (int lev, bool only_valid, bool only_local) const +{ + AMREX_ASSERT(lev >= 0 && lev < int(m_particles.size())); + + LayoutData np_per_grid_local(ParticleBoxArray(lev), + ParticleDistributionMap(lev)); + + for (ParConstIterType pti(*this, lev); pti.isValid(); ++pti) + { + int gid = pti.index(); + if (only_valid) + { + const auto& ptile = ParticlesAt(lev, pti); + const int np = ptile.numParticles(); + auto const tile_data = ptile.getParticleTileData(); + + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + using ReduceTuple = typename decltype(reduce_data)::Type; + + reduce_op.eval(np, reduce_data, + [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple + { + return (tile_data.idata(0)[i] > 0) ? 1 : 0; + }); + + int np_valid = amrex::get<0>(reduce_data.value(reduce_op)); + np_per_grid_local[gid] += np_valid; + } else + { + np_per_grid_local[gid] += pti.numParticles(); + } + } + + Vector nparticles(np_per_grid_local.size(), 0); + if (only_local) + { + for (ParConstIterType pti(*this, lev); pti.isValid(); ++pti) + { + nparticles[pti.index()] = np_per_grid_local[pti.index()]; + } + } + else + { + ParallelDescriptor::GatherLayoutDataToVector(np_per_grid_local, nparticles, + ParallelContext::IOProcessorNumberSub()); + ParallelDescriptor::Bcast(&nparticles[0], nparticles.size(), + ParallelContext::IOProcessorNumberSub()); + } + + return nparticles; +} + + template class Allocator> template @@ -476,12 +533,14 @@ typename std::enable_if::value, Long>::type return nparticles; } + template class Allocator> template -auto ParticleContainer_impl::NumberOfParticlesAtLevel (int lev, bool only_valid, bool only_local) const-> +auto ParticleContainer_impl::NumberOfParticlesAtLevel (int lev, bool only_valid, bool only_local) const -> typename std::enable_if::value, Long>::type { + /* Long nparticles = 0; if (lev < 0 || lev >= int(m_particles.size())) return nparticles; @@ -494,7 +553,7 @@ typename std::enable_if::value, Long>::type for (const auto& kv : GetParticles(lev)) { const auto& ptile = kv.second; auto const tile_data = ptile.getParticleTileData(); - auto const& ptaos = ptile.GetStructsOfArray(); + auto const& ptaos = ptile.GetStructOfArrays(); reduce_op.eval(ptaos.numParticles(), reduce_data, [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple @@ -516,6 +575,8 @@ typename std::enable_if::value, Long>::type } return nparticles; + */ + return 0; } // @@ -2398,7 +2459,7 @@ InterpolateSingleLevel (MultiFab& mesh_data, int lev) for (ParIter pti(*this, lev); pti.isValid(); ++pti) { auto& particles = pti.GetArrayOfStructs(); - auto pstruct = particles().data(); + auto ptd = pti.GetParticleTile().getParticleTileData(); FArrayBox& fab = mesh_data[pti]; const auto fabarr = fab.array(); const Long np = particles.numParticles(); @@ -2406,7 +2467,8 @@ InterpolateSingleLevel (MultiFab& mesh_data, int lev) int nComp = fab.nComp(); AMREX_FOR_1D( np, i, { - amrex_interpolate_cic(pstruct[i], nComp, fabarr, plo, dxi); + auto p = make_particle{}(ptd,i); + amrex_interpolate_cic(p, nComp, fabarr, plo, dxi); }); } } From 4b67ffad0902837710d3a4277119621de4b407af Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Tue, 11 Oct 2022 15:30:44 -0700 Subject: [PATCH 048/111] Correcting the ParticleContainerI.H file to SoA --- Src/Particle/AMReX_ParticleContainer.H | 8 +- Src/Particle/AMReX_ParticleContainerI.H | 165 +++++++----------------- 2 files changed, 51 insertions(+), 122 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index dbd8c7e1040..c164e7214d3 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -547,13 +547,7 @@ public: * \param only_local */ - template - auto NumberOfParticlesAtLevel (int level, bool only_valid = true, bool only_local = false) const-> - typename std::enable_if::value, Long>::type; - - template - auto NumberOfParticlesAtLevel (int level, bool only_valid = true, bool only_local = false) const-> - typename std::enable_if::value, Long>::type; + Long NumberOfParticlesAtLevel (int level, bool only_valid = true, bool only_local = false) const; Vector NumberOfParticlesInGrid (int level, bool only_valid = true, bool only_local = false) const; diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 21ed54ad1b8..95cf269b195 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -392,79 +392,43 @@ ParticleContainer_impl::NumberOf int gid = pti.index(); if (only_valid) { - const auto& ptile = ParticlesAt(lev, pti); - const auto& aos = ptile.GetArrayOfStructs(); - const auto pstruct = aos().dataPtr(); - const int np = ptile.numParticles(); + if constexpr(!ParticleType::is_soa_particle){ + const auto& ptile = ParticlesAt(lev, pti); + const auto& aos = ptile.GetArrayOfStructs(); + const auto pstruct = aos().dataPtr(); + const int np = ptile.numParticles(); - ReduceOps reduce_op; - ReduceData reduce_data(reduce_op); - using ReduceTuple = typename decltype(reduce_data)::Type; + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + using ReduceTuple = typename decltype(reduce_data)::Type; - reduce_op.eval(np, reduce_data, + reduce_op.eval(np, reduce_data, [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple - { + { return (pstruct[i].id() > 0) ? 1 : 0; - }); + }); - int np_valid = amrex::get<0>(reduce_data.value(reduce_op)); - np_per_grid_local[gid] += np_valid; - } else - { - np_per_grid_local[gid] += pti.numParticles(); - } - } + int np_valid = amrex::get<0>(reduce_data.value(reduce_op)); + np_per_grid_local[gid] += np_valid; - Vector nparticles(np_per_grid_local.size(), 0); - if (only_local) - { - for (ParConstIterType pti(*this, lev); pti.isValid(); ++pti) - { - nparticles[pti.index()] = np_per_grid_local[pti.index()]; - } - } - else - { - ParallelDescriptor::GatherLayoutDataToVector(np_per_grid_local, nparticles, - ParallelContext::IOProcessorNumberSub()); - ParallelDescriptor::Bcast(&nparticles[0], nparticles.size(), - ParallelContext::IOProcessorNumberSub()); - } - - return nparticles; -} - -template class Allocator> -Vector -ParticleContainer_impl::NumberOfParticlesInGrid (int lev, bool only_valid, bool only_local) const -{ - AMREX_ASSERT(lev >= 0 && lev < int(m_particles.size())); - - LayoutData np_per_grid_local(ParticleBoxArray(lev), - ParticleDistributionMap(lev)); - - for (ParConstIterType pti(*this, lev); pti.isValid(); ++pti) - { - int gid = pti.index(); - if (only_valid) - { - const auto& ptile = ParticlesAt(lev, pti); - const int np = ptile.numParticles(); - auto const tile_data = ptile.getParticleTileData(); + } else{ + const auto& ptile = ParticlesAt(lev, pti); + const int np = ptile.numParticles(); + auto const tile_data = ptile.getParticleTileData(); - ReduceOps reduce_op; - ReduceData reduce_data(reduce_op); - using ReduceTuple = typename decltype(reduce_data)::Type; + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + using ReduceTuple = typename decltype(reduce_data)::Type; - reduce_op.eval(np, reduce_data, - [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple - { + reduce_op.eval(np, reduce_data, + [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple + { return (tile_data.idata(0)[i] > 0) ? 1 : 0; - }); + }); - int np_valid = amrex::get<0>(reduce_data.value(reduce_op)); - np_per_grid_local[gid] += np_valid; + int np_valid = amrex::get<0>(reduce_data.value(reduce_op)); + np_per_grid_local[gid] += np_valid; + } } else { np_per_grid_local[gid] += pti.numParticles(); @@ -490,12 +454,9 @@ ParticleContainer_impl::NumberOf return nparticles; } - template class Allocator> -template -auto ParticleContainer_impl::NumberOfParticlesAtLevel (int level, bool only_valid, bool only_local) const-> -typename std::enable_if::value, Long>::type +Long ParticleContainer_impl::NumberOfParticlesAtLevel (int level, bool only_valid, bool only_local) const { Long nparticles = 0; @@ -506,53 +467,24 @@ typename std::enable_if::value, Long>::type ReduceData reduce_data(reduce_op); using ReduceTuple = typename decltype(reduce_data)::Type; - for (const auto& kv : GetParticles(level)) { - const auto& ptile = kv.second; - auto const& ptaos = ptile.GetArrayOfStructs(); - ParticleType const* pp = ptaos().data(); - - reduce_op.eval(ptaos.numParticles(), reduce_data, - [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple - { - return (pp[i].id() > 0) ? 1 : 0; - }); - } - nparticles = static_cast(amrex::get<0>(reduce_data.value(reduce_op))); - } - else { - for (const auto& kv : GetParticles(level)) { - const auto& ptile = kv.second; - nparticles += ptile.numParticles(); - } - } - - if (!only_local) { - ParallelAllReduce::Sum(nparticles, ParallelContext::CommunicatorSub()); - } + if constexpr(!ParticleType::is_soa_particle){ - return nparticles; -} - - -template class Allocator> -template -auto ParticleContainer_impl::NumberOfParticlesAtLevel (int lev, bool only_valid, bool only_local) const -> -typename std::enable_if::value, Long>::type -{ - /* - Long nparticles = 0; + for (const auto& kv : GetParticles(level)) { + const auto& ptile = kv.second; + auto const& ptaos = ptile.GetArrayOfStructs(); + ParticleType const* pp = ptaos().data(); - if (lev < 0 || lev >= int(m_particles.size())) return nparticles; - - if (only_valid) { - ReduceOps reduce_op; - ReduceData reduce_data(reduce_op); - using ReduceTuple = typename decltype(reduce_data)::Type; + reduce_op.eval(ptaos.numParticles(), reduce_data, + [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple + { + return (pp[i].id() > 0) ? 1 : 0; + }); + } - for (const auto& kv : GetParticles(lev)) { + } else{ + for (const auto& kv : GetParticles(level)) { const auto& ptile = kv.second; - auto const tile_data = ptile.getParticleTileData(); + auto const tile_data = ptile.getConstParticleTileData(); auto const& ptaos = ptile.GetStructOfArrays(); reduce_op.eval(ptaos.numParticles(), reduce_data, @@ -560,11 +492,13 @@ typename std::enable_if::value, Long>::type { return (tile_data.idata(0)[i] > 0) ? 1 : 0; //get the id in SoA particle way }); + } } + nparticles = static_cast(amrex::get<0>(reduce_data.value(reduce_op))); } else { - for (const auto& kv : GetParticles(lev)) { + for (const auto& kv : GetParticles(level)) { const auto& ptile = kv.second; nparticles += ptile.numParticles(); } @@ -575,8 +509,6 @@ typename std::enable_if::value, Long>::type } return nparticles; - */ - return 0; } // @@ -2345,6 +2277,7 @@ AssignCellDensitySingleLevel (int rho_index, const auto& particles = pti.GetArrayOfStructs(); const auto pstruct = particles().data(); const Long np = pti.numParticles(); + auto ptd = pti.GetParticleTile().getConstParticleTileData(); FArrayBox& fab = (*mf_pointer)[pti]; auto rhoarr = fab.array(); #ifdef AMREX_USE_OMP @@ -2363,14 +2296,16 @@ AssignCellDensitySingleLevel (int rho_index, { AMREX_HOST_DEVICE_FOR_1D( np, i, { - amrex_deposit_cic(pstruct[i], ncomp, rhoarr, plo, dxi); + auto p = make_particle{}(ptd,i); + amrex_deposit_cic(p, ncomp, rhoarr, plo, dxi); }); } else { AMREX_HOST_DEVICE_FOR_1D( np, i, { - amrex_deposit_particle_dx_cic(pstruct[i], ncomp, rhoarr, plo, dxi, pdxi); + auto p = make_particle{}(ptd,i); + amrex_deposit_particle_dx_cic(p, ncomp, rhoarr, plo, dxi, pdxi); }); } From 8b0a5274caf0dcc086cefe66b3749f3ad5fe9457 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Thu, 13 Oct 2022 11:53:13 -0700 Subject: [PATCH 049/111] Commenting memcpy part --- Src/Particle/AMReX_ParticleContainerI.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 95cf269b195..9adda100b99 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1694,7 +1694,7 @@ ParticleContainer_impl auto old_size = particles_to_send.size(); auto new_size = old_size + superparticle_size; particles_to_send.resize(new_size); - std::memcpy(&particles_to_send[old_size], &p, particle_size); + \\std::memcpy(&particles_to_send[old_size], &p, particle_size); char* dst = &particles_to_send[old_size] + particle_size; int array_comp_start = AMREX_SPACEDIM + NStructReal; for (int comp = 0; comp < NumRealComps(); comp++) { From 7fef7115e4cd265cc746906bb39271faddfab051 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Thu, 13 Oct 2022 13:28:28 -0700 Subject: [PATCH 050/111] Commenting memcpy part --- Src/Particle/AMReX_ParticleContainerI.H | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 9adda100b99..062d5e4ebc6 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1694,7 +1694,9 @@ ParticleContainer_impl auto old_size = particles_to_send.size(); auto new_size = old_size + superparticle_size; particles_to_send.resize(new_size); - \\std::memcpy(&particles_to_send[old_size], &p, particle_size); + /* + std::memcpy(&particles_to_send[old_size], &p, particle_size); + */ char* dst = &particles_to_send[old_size] + particle_size; int array_comp_start = AMREX_SPACEDIM + NStructReal; for (int comp = 0; comp < NumRealComps(); comp++) { From a7b7344dc584baa5cbb95e9a0ea282c63c2641f5 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Thu, 13 Oct 2022 13:32:24 -0700 Subject: [PATCH 051/111] Not incrementing the dst pointer by 1 --- Src/Particle/AMReX_ParticleContainerI.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 062d5e4ebc6..a1db32070cc 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1697,7 +1697,7 @@ ParticleContainer_impl /* std::memcpy(&particles_to_send[old_size], &p, particle_size); */ - char* dst = &particles_to_send[old_size] + particle_size; + char* dst = &particles_to_send[old_size]; int array_comp_start = AMREX_SPACEDIM + NStructReal; for (int comp = 0; comp < NumRealComps(); comp++) { if (h_redistribute_real_comp[array_comp_start + comp]) { From 8a7038a0d27fe505e905e1462ec2e049677c4c63 Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Thu, 13 Oct 2022 15:12:15 -0700 Subject: [PATCH 052/111] Not incrementing the +1 in superparticle_size --- Src/Particle/AMReX_ParticleContainerI.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index a1db32070cc..65229a511c7 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -18,7 +18,7 @@ ParticleContainer_impl::SetParti if (h_redistribute_int_comp[i]) {++num_int_comm_comps;} } - particle_size = sizeof(ParticleType); + particle_size = 0; superparticle_size = particle_size + num_real_comm_comps*sizeof(ParticleReal) + num_int_comm_comps*sizeof(int); } From 9b1da5ceb74143572873d2a0e1d9731aad67f03f Mon Sep 17 00:00:00 2001 From: Thierry Antoun Date: Thu, 13 Oct 2022 15:39:09 -0700 Subject: [PATCH 053/111] set Particlesize to zero --- Src/Particle/AMReX_ParticleContainerI.H | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 65229a511c7..2de5ab5072f 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -18,7 +18,7 @@ ParticleContainer_impl::SetParti if (h_redistribute_int_comp[i]) {++num_int_comm_comps;} } - particle_size = 0; + particle_size = sizeof(ParticleType); superparticle_size = particle_size + num_real_comm_comps*sizeof(ParticleReal) + num_int_comm_comps*sizeof(int); } @@ -1873,6 +1873,11 @@ RedistributeMPI (std::map >& not_ours, #ifdef AMREX_USE_MPI + int particle_size=0; + int superparticle_size = particle_size + + num_real_comm_comps*sizeof(ParticleReal) + num_int_comm_comps*sizeof(int); + +} using buffer_type = unsigned long long; std::map > mpi_snd_data; @@ -2269,7 +2274,7 @@ AssignCellDensitySingleLevel (int rho_index, mf_pointer->setVal(0); - using ParConstIter = ParConstIter; + using ParConstIter = ParConstIter_impl; #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif From 74e05d821e3f105e1f53ff6f94e56d2c2a4ce23e Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Fri, 14 Oct 2022 15:50:50 -0700 Subject: [PATCH 054/111] fix test --- Tests/Particles/SOAParticle/main.cpp | 61 +++++++++++++++++++--------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp index 45ccab8331b..17b7ccdc7c1 100644 --- a/Tests/Particles/SOAParticle/main.cpp +++ b/Tests/Particles/SOAParticle/main.cpp @@ -14,7 +14,29 @@ using namespace amrex; template class Allocator=DefaultAllocator> void addParticles () { - T_PC pc; + int is_per[BL_SPACEDIM]; + for (int i = 0; i < BL_SPACEDIM; i++) + is_per[i] = 1; + + RealBox real_box; + for (int n = 0; n < AMREX_SPACEDIM; n++) + { + real_box.setLo(n, 0.0); + real_box.setHi(n, 100.0); + } + + IntVect domain_lo(AMREX_D_DECL(0, 0, 0)); + IntVect domain_hi(AMREX_D_DECL(127, 127, 127)); + const Box base_domain(domain_lo, domain_hi); + + Geometry geom(base_domain, &real_box, CoordSys::cartesian, is_per); + BoxArray ba(base_domain); + ba.maxSize(64); + + DistributionMapping dm(ba); + + T_PC pc(geom, dm, ba); + int const NReal = pc.NStructReal; int const NInt = pc.NStructInt; int const NArrayReal = pc.NArrayReal; @@ -29,6 +51,7 @@ void addParticles () const int add_num_particles = 5; auto& ptile1 = pc.DefineAndReturnParticleTile(0, 0, 0); + ptile1.resize(add_num_particles); for (int i = 0; i < add_num_particles; ++i) { @@ -66,26 +89,26 @@ void addParticles () auto& soa_int = pti.GetStructOfArrays().GetIntData(); // Iterating over old Particles - ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) - { - ParticleType& AMREX_RESTRICT p = aos_ptr[ip]; - p.pos(0) += 1; - p.pos(1) += 1; - p.pos(2) += 1; - - amrex::ParticleReal & AMREX_RESTRICT x = part_x[ip]; - amrex::ParticleReal & AMREX_RESTRICT y = part_y[ip]; - amrex::ParticleReal & AMREX_RESTRICT z = part_z[ip]; - amrex::ParticleReal & AMREX_RESTRICT a = part_aaa[ip]; - - x += 1.0; - y += 1.0; - z += 1.0; - a += 1.0; - }); + // ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) + // { + // ParticleType& AMREX_RESTRICT p = aos_ptr[ip]; + // p.pos(0) += 1; + // p.pos(1) += 1; + // p.pos(2) += 1; + + // amrex::ParticleReal & AMREX_RESTRICT x = part_x[ip]; + // amrex::ParticleReal & AMREX_RESTRICT y = part_y[ip]; + // amrex::ParticleReal & AMREX_RESTRICT z = part_z[ip]; + // amrex::ParticleReal & AMREX_RESTRICT a = part_aaa[ip]; + + // x += 1.0; + // y += 1.0; + // z += 1.0; + // a += 1.0; + // }); // Iterating over SoA Particles - ParticleTileDataType ptd = pti.GetParticleTile().getParticleTileData(); + ParticleTileDataType ptd = pti.GetParticleTile().getParticleTileData(); ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) { From 203346b2f8a07ae023161660aa63d8b770eafa82 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Fri, 14 Oct 2022 15:51:09 -0700 Subject: [PATCH 055/111] fix extraneous bracket --- Src/Particle/AMReX_ParticleContainerI.H | 1 - 1 file changed, 1 deletion(-) diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 2de5ab5072f..316fccb277f 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1877,7 +1877,6 @@ RedistributeMPI (std::map >& not_ours, int superparticle_size = particle_size + num_real_comm_comps*sizeof(ParticleReal) + num_int_comm_comps*sizeof(int); -} using buffer_type = unsigned long long; std::map > mpi_snd_data; From af5d227f756e603ba8566a4300575f0121d1f4d6 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Wed, 2 Nov 2022 14:19:36 -0700 Subject: [PATCH 056/111] Squashed commit of the following: commit 10e99fbeea248b6dfc34a6a4bcce2ba587a87ecf Merge: d03045d81 f1e1d6fb9 Author: Andrew Myers Date: Wed Nov 2 14:06:00 2022 -0700 Merge branch 'particle_soa_refactor' of github.com:Thierry992/amrex into HEAD commit d03045d8160f02fa8980f17f29d3dbe468348c12 Author: Andrew Myers Date: Wed Nov 2 14:04:23 2022 -0700 fix buffer pack / unpack commit d771fc8a34e88a6dd2896ebf9ce20e3b3a9a81a0 Author: Andrew Myers Date: Wed Nov 2 14:04:08 2022 -0700 revert to one int for each id for now commit f1e1d6fb97ae9c143da1c85f34079e563eb98ee6 Merge: 4dbfbac39 c4a4811c3 Author: Axel Huebl Date: Tue Nov 1 15:18:54 2022 -0500 Merge remote-tracking branch 'mainline/development' into particle_soa_refactor commit c4a4811c373d9b599bb710c7029365b1ca7f2c22 Author: Axel Huebl Date: Tue Nov 1 14:08:38 2022 -0500 C++17 Transition (#2992) ## Summary Update AMReX to require C++17 or newer. - [x] docs - [x] CMake - [x] GNUmake - [x] CI ## Additional background Requires a mature [C++17](https://en.wikipedia.org/wiki/C%2B%2B17) compiler, e.g., GCC 8, Clang 7, NVCC 11.0, MSVC 19.15 or newer. Already used since 1+ year in production by downstream codes such as Castro and WarpX. Needed for modernization and new features such as #2878 Co-authored-by: Weiqun Zhang commit d2b82938c171a4b1ada48839ed6891b5b0183b43 Author: Weiqun Zhang Date: Tue Nov 1 09:01:54 2022 -0700 Update CHANGES for 22.11 (#3006) commit 5ec270b4d534a486aeabf478ae553f1df53f2e5b Author: Weiqun Zhang Date: Tue Nov 1 08:59:44 2022 -0700 Fix compilation for PETSc (#3005) We cannot include PETSc headers too early because it might redefine MPI routines as macros (https://github.com/petsc/petsc/blob/main/include/petsclog.h#L441). They break MPI calls like below, MPI_Allreduce(&tmp, &vi, 1, ParallelDescriptor::Mpi_typemap::type(), ParallelDescriptor::Mpi_op>(), comm); because of the `,` in `>`. commit 735c3513153f1d06f783e64f455816be85fb3602 Author: Weiqun Zhang Date: Sat Oct 29 10:57:23 2022 -0700 MPI Reduce for ValLocPair (#3003) Add ParallelReduce::Min, ParallelReduce::Max, ParallelAllReduce::Min, and ParallelAllReduce::Max for ValLocPair, where TV and TI are types that have corresponding MPI types (e.g., int, Real, IntVect, Box, etc.). commit 3ec07681574afa658f4f53117d7ab618459a514b Author: Axel Huebl Date: Wed Oct 26 16:49:40 2022 -0700 `FabArray::isDefined` (#2997) ## Summary Add a new query to `define_function_called`. ## Additional background This is a cheaper check than `ok()` for finding out if a MultiFab has been allocated or not yet, assuming that the calling code follows the convention that `define()` is called collectively. Update: It turns out you can also call `empty` inherited from `FabArrayBase`. The new API is quite explicit, which is ok, too. Co-authored-by: Weiqun Zhang commit 7f3c90893d70ca33c6dec499436dd503c77eeddf Author: Weiqun Zhang Date: Wed Oct 26 16:40:16 2022 -0700 Make The_Device_Arena non-managed (#2998) The_Device_Arena used to be a separate Arena. We changed it to be an alias of The_Arena to avoid memory fragmentation. However, the issue is we don't have an Arena that can allocate non-managed memory unless The_Arena is not managed. Because of performance concerns, we sometimes want to allocate non-managed memory. Therefore, we make The_Device_Arena an alias if and only if The_Arena is not managed. commit ab8c892e1dd8943a6f0f759693757c6a186668a7 Author: Weiqun Zhang Date: Wed Oct 26 15:59:39 2022 -0700 Add alias template Gpu::NonManagedDeviceVector (#2999) commit b3e0a62ba4d8c66b7cc40ab439b94835a5f4247c Author: Weiqun Zhang Date: Wed Oct 26 15:02:13 2022 -0700 Pre- and Post-interpolation hook interface (#2991) Support both Fab and MultiFab versions of pre- and post-interpolation hooks. Because the pre-interp hook might modify the data, we need to make a copy to avoid modifying cached coarse data. Close #2989. commit 3082028e42870b1ed37f0d26160ef078580511e3 Author: Weiqun Zhang Date: Wed Oct 19 19:24:10 2022 -0700 Update GitHub Actions (#2996) https://github.blog/changelog/2022-09-22-github-actions-all-actions-will-begin-running-on-node16-instead-of-node12/ ## Summary ## Additional background ## Checklist The proposed changes: - [ ] fix a bug or incorrect behavior in AMReX - [ ] add new capabilities to AMReX - [ ] changes answers in the test suite to more than roundoff level - [ ] are likely to significantly affect the results of downstream AMReX users - [ ] include documentation in the code and/or rst files, if appropriate commit 0b88bfd3718ab226fc4a03a6598d320976744346 Author: Weiqun Zhang Date: Wed Oct 19 13:39:18 2022 -0700 Add user defined BC types (#2995) Add BCType::user_1, BCType::user_2 and BCType::user_3. Previously the only "user" type is ext_dir (external Dirichlet). The BC types are passed from the user's code to FillPatch, which in turn passes them back to the user provided BC filling function. These new types will make it easy for the user to determine the user defined BC types in their BC filling functions. commit 9502b99cd98cc1aa70e5f19804c50252438ec1a6 Author: Weiqun Zhang Date: Tue Oct 18 10:20:06 2022 -0700 Add BCRec::set for convenience (#2993) commit 4dbfbac39594c44dc6611af9657d4c4443c62327 Author: Thierry Antoun Date: Mon Oct 17 15:05:54 2022 -0700 Adding AMReX_RESTRICT for GPU Test commit 7051a6cdf1b1a6c21f8d48d8708530b3d2255342 Author: Thierry Antoun Date: Mon Oct 17 15:03:19 2022 -0700 Modyfing RedistributeMPI to make it work with 2 ranks commit 56b6402d238979fca6e7c57fdc644a54c4cf6fce Author: Weiqun Zhang Date: Sat Oct 15 14:59:38 2022 -0700 ParallelFor with compile time optimization of kernels with run time parameters (#2954) Branches inside ParallelFor can be very expensive. If a branch uses a lot of resources (e.g., registers), it can significantly affect the performance even if at run time the branch is never executed because it affects the GPU occupancy. For CPUs, it can affect vectorization of the kernel. The new ParallelFor functions use C++17 fold expression to generate kernel launches for all run time variants. Only one will be executed. Which one is chosen at run time depends the run time parameters. The kernel function can use constexpr if to discard unused code blocks for better run time performance. Here are two examples of how to use them. int runtime_option = ...; enum All_options : int { A0, A1, A2, A3}; // Four ParallelFors will be generated. ParallelFor(TypeList>{}, {runtime_option}, box, [=] AMREX_GPU_DEVICE (int i, int j, int k, auto control) { ... if constexpr (control.value == A0) { ... } else if constexpr (control.value == A1) { ... } else if constexpr (control.value == A2) { ... else { ... } ... }); and int A_runtime_option = ...; int B_runtime_option = ...; enum A_options : int { A0, A1, A2, A3}; enum B_options : int { B0, B1 }; // 4*2=8 ParallelFors will be generated. ParallelFor(TypeList, CompileTimeOptions > {}, {A_runtime_option, B_runtime_option}, N, [=] AMREX_GPU_DEVICE (int i, auto A_control, auto B_control) { ... if constexpr (A_control.value == A0) { ... } else if constexpr (A_control.value == A1) { ... } else if constexpr (A_control.value == A2) { ... else { ... } if constexpr (A_control.value != A3 && B_control.value == B1) { ... } ... }); Note that that due to a limitation of CUDA's extended device lambda, the constexpr if block cannot be the one that captures a variable first. If nvcc complains about it, you will have to manually capture it outside constexpr if. The data type for the parameters is int. Thank Maikel Nadolski and Alex Sinn for showing us the meta-programming techniques used here. commit bcbf17f1cee4cd3209552cd0cafb2558c9254f20 Author: Weiqun Zhang Date: Fri Oct 14 19:48:14 2022 -0700 2D RZ solver for WarpX: Arbitrary coefficient (#2986) The assumption in the 2D RZ solver for WarpX used to be there was no sigma_r (i.e., sigma_r == 1). In this PR, we allow arbitrary sigma_r coefficient. commit 9a3cd5d985ad357ab78d8f06f397cfc741448fdc Author: Axel Huebl Date: Fri Oct 14 17:27:41 2022 -0700 CMake Docs: Fix User-Guidance (Link) (#2990) Update the user-guidance on CMake dependency linking to CMake 3.0+ (anno. 2014+). Seen in #2978 commit 1ad4144668b0656d42950be92936073c64c56db7 Author: Weiqun Zhang Date: Fri Oct 14 10:36:17 2022 -0700 Runge-Kutta support for AMR (#2974) This adds RK2, RK3 and RK4 in a new namespace RungeKutta. Together with the enhanced FillPatcher class, these functions can be used for RK time stepping in AMR simulations. A new function AmrLevel::RK is added for AmrLevel based codes. See CNS::advance in Tests/GPU/CNS/CNS_advance.cpp for an example of using the new AmrLevel::RK function. The main motivation for this PR is that ghost cell filling for high order (> 2) RK methods at coarse/fine boundary is non-trivial when there is subcycling. Co-authored-by: Jean M. Sexton commit c841ae81ddd519c088b29523aa71b6b280da440e Author: Weiqun Zhang Date: Fri Oct 14 10:03:34 2022 -0700 Fourth-order interpolation from fine to coarse level (#2987) For fourth-order finite-difference methods with data at cell centers, we cannot use the usual averageDown function to overwrite coarse level data with fine data. We actually need to do interpolation. commit 975b830a012e4677d070b46d2f92353c117ad65a Author: Weiqun Zhang Date: Fri Oct 14 09:53:22 2022 -0700 Fix EB data inconsistency when fixing small cells and multiple cuts (#2943) ## Summary For consistency, we need to call the function that zeros out the level set even if that box does not have any small cells or multiple cuts. This is because a node could exist in multiple boxes. Furthermore, a covered cell or covered face may have a node with a level set < 0. ## Additional background This is usually not an issue. However, in WarpX, we use the level set to decide whether a node is an unknown in the linear system. The inconsistency makes the solver fail in some cases. commit 9c2264bb5ff60b353250b3654866aef06f93bdcc Author: Axel Huebl Date: Fri Oct 14 07:41:06 2022 -0700 `MFIter::Finalize`: Free `m_fa` (#2988) This `free` should potentially not be delayed until the destructor is called. Follow-up to #2985 #2983 commit f84c7a8f77d6f80f6f8ba4ee9161ee5a73a839a5 Author: Weiqun Zhang Date: Wed Oct 12 10:44:11 2022 -0700 Fix MLMG::getGradSolution & getFluxes for inhomogeneous Neumann and Robin BC (#2984) Because of the way how inhomogeneous and Robin BC are handled, we must add the inhomogeneous fluxes back, otherwise they would be zero at those boundaries. commit ed1ecd62acb3fd7d39b8a23aa4e9ad09669741bb Author: Axel Huebl Date: Wed Oct 12 08:46:34 2022 -0700 MFIter: Make Finalize Public (#2985) Follow-up to #2983 commit 5acfe07a830305cc7cbafd1e5dd26e3c3598435b Author: Axel Huebl Date: Tue Oct 11 14:51:48 2022 -0700 MFIter::Finalize (#2983) Add a Finalize function to MFIter. The idea about this is, that we can call this already before destruction in Python, where `for` loops do not create scope. This function must be robust enough to be called again in the constructor (or we need to add an extra bool to guard that it is not called again in the destructor). Co-authored-by: Weiqun Zhang commit 53e34d17913cc76bdd4bbaad1582dd1b04058914 Author: Andy Nonaka Date: Tue Oct 11 12:00:34 2022 -0700 fix docs; Robin BC's for MLMG (#2982) Update the MLMG Robin BC description in the docs. commit 0019b3a41065caf6d9486000b9c6fbf86ad9837e Author: Weiqun Zhang Date: Tue Oct 11 11:00:13 2022 -0700 MLLinOp::postSolve (#2981) Add a virtual function MLLinOp::postSolve. This allows WarpX to set EB covered nodes to prescribed values in the solver's output for visualization purpose. commit 2d87a4c8ad5d375008ee9b1c23a50404fe0dfa21 Author: Brandon Runnels Date: Mon Oct 10 09:49:29 2022 -0600 add templating for the cell bilinear interpolators (#2979) This templates the `mf_cell_bilin_interp` functions so that the interpolators can be used with `BaseFab`s of arbitrary type. commit e4ab0485621d5566c96cae58a816860ee7d4997f Author: Weiqun Zhang Date: Wed Oct 5 12:03:41 2022 -0700 FillPatcher class (#2972) This adds a class FillPatcher for filling fine level data. It's not as general as the various FillPatch functions (e.g., FillPatchTwoLevels). However, it can reduce the amount of communication data. Suppose we use RK2 with subcycling and the refinement ratio is 2. For each step on level 0, there are two steps on level 1. With RK2, each fine step needs to call FillPatch twice. So the total number of FillPatch calls is 4 in the two fine steps. Using the free function, one ParallelCopy per FillPatch call is needed for copying coarse data for spatial interpolation. With the FillPatcher class, two ParallelCopy calls will be done to copy old and new coarse data. Then these data will be used in the four FillPatcher::fill calls. This new approach saves two ParallelCopy calls per coarse step for a two levels run. It could save more if the time stepping requires more substeps or the refinement ratio is higher. Note that many of our AMReX codes use a time stepping algorithm that needs only one FillPatch call per step. For those codes, this new approach will not save any communication for a refinement ratio of 2. However, it will save communication when the refinement ratio is 4. commit 1bc4e4eb5a25f4bdf9933695ead86f17dfdee9ed Author: Weiqun Zhang Date: Mon Oct 3 16:50:45 2022 -0700 Remove sycl namespace alias (#2971) This causes a conflict with new compilers. commit de7b7f44afda2227368a30646faeeea0d4679bec Author: Weiqun Zhang Date: Mon Oct 3 14:06:58 2022 -0700 Fix Tensor Solver BC (#2930) This fixes some bugs in the physical domain BC of tensor linear solver. At the corner of two no-slip walls (e.g., (0,0)), we have u(-1,0) = -u(0,0) and u(0,-1) = -u(0,0). It's incorrect to fill the corner ghost cell with u(-1,-1) = u(-1,0) + u(0,-1) - u(0,0), because it will result in u(-1,-1) = -3 * u(0,0). In the old approach, to avoid branches in computing transverse derivatives on cell faces, we fill the ghost cells first. For example, to compute du/dy at the lo-x boundary, we use the data in i = -1 and 0, just like we compute du/dy(i) using u(i-1) and u(i) for interior faces. The problem is the normal velocity in the ghost cells outside a wall is filled with extrapolation of the Dirichlet value (which is zero) and more than 1 interior cells. Because of the high-order extrapolation, u(-1) != -u(0). This is the desired approach for computing du/dx on the wall. However, this produces incorrect results in dudy. In the new approach, we explicitly handle the boundaries in the derivative stencil. For example, to compute transverse derivatives on an inflow face, we use the boundary values directly. Co-authored-by: cgilet commit 13aa4df0f5a4af40270963ad5b42ac7ce662e045 Author: Weiqun Zhang Date: Fri Sep 30 17:48:22 2022 -0700 Disable host device for macros for SYCL/DPC++ (#2969) The host part of the AMREX_HOST_DEVICE_FOR_* macros is disabled for SYCL/DPC++. It's really slow for compilation. commit 62379fbac96867437070c4852d3d741a76dc1a4b Author: Weiqun Zhang Date: Fri Sep 30 15:37:35 2022 -0700 Update CHANGES for 22.10 (#2968) commit d65e09e4a85dd2765a8cbe0ac9eba6223c47121b Author: Roberto Porcu <53792251+rporcu@users.noreply.github.com> Date: Thu Sep 29 15:46:19 2022 -0400 Solve an issue with particles async IO when having runtime added variables (#2966) commit cd07b0d84244d08cf2690a19e0312f349ec0aeaa Author: Weiqun Zhang Date: Wed Sep 28 09:20:42 2022 -0700 Fix int overflow in amrex::bisect (#2964) Change from (lo+hi)/2 to lo+(hi-lo)/2. Although it's very unlikely, it's possible (lo+hi), where both lo and hi are integers, could overflow. commit e55d6b4f5375efb22ebed9b467878e301763073b Author: Junghyeon Park Date: Thu Sep 29 01:20:15 2022 +0900 Update the SWFFT project site (#2965) commit b84d7c069cef7470f195b250926ca0e84ec46fb2 Author: Weiqun Zhang Date: Mon Sep 26 16:05:10 2022 -0700 Fix MLEBNodeFDLaplacian bottom solver (#2963) MLEBNodeFDLaplacian is never singular because it has Dirichlet boundary on the EB surface. We did set the singular flag to false, but forgot about the bottom solver used a different function to query. This fixes it by overriding the isBottomSingular function. commit 5e84f43241edfec7754d3ebfc369154bf249d992 Author: asalmgren Date: Sun Sep 25 09:38:51 2022 -0700 make tagging routines EB_aware (#2962) commit 8b367b0071787f8688d6f7eac55f7be251de6841 Author: Weiqun Zhang Date: Sun Sep 25 09:22:13 2022 -0700 Volume weighted sum (#2961) Add a new function doing volume weighted sum across AMR levels. This may not be exactly what amrex application codes want. But it should work for many cases. commit 2a3cc05dac916961b1a5ae4c18b21bacd889e7fc Author: Weiqun Zhang Date: Fri Sep 23 12:24:05 2022 -0700 CellData: data in a single cell (#2959) This adds struct CellData that allows for accessing data in a single cell in Array4. This is convenient sometimes because one can omit the i, j and k indices. It might also be faster sometimes because it can skip the repeated index calculation involving i,j,k. commit 27ef10654c4810fc7cfc0f941a3eec67b018bf34 Author: Weiqun Zhang Date: Fri Sep 23 12:23:34 2022 -0700 Quartic interpolation for cell centered data (#2960) New Interpolator for interpolation of cell centered data using a fourth-degreee polynomial. Note that the interpolation is not conservative and does not do any slope limiting. commit c4b7982d067497cc97ccb501ec08720b404d957e Author: Luca Fedeli Date: Fri Sep 23 21:17:12 2022 +0200 Add GPU-compatible upper bound and lower bound algorithms to AMReX_Algorithm (#2958) commit 3e5cc778028030ecb06bb079c5a6045f8f5fba6e Author: Don E. Willcox Date: Tue Sep 20 17:59:48 2022 -0700 add option for makebuildsources to specify the style arguments for 'git describe'. (#2957) commit a6e0c11989d34b976245db5719eedd0e9040f264 Author: Weiqun Zhang Date: Tue Sep 20 10:01:21 2022 -0700 Add more warnings (#2956) * Add -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches to gcc. * Add -Wnon-virtual-dtor to clang. * Add more warnings to CI. * Fix some non-virtual dtors and some other warnings. commit 826cd378f8ba0d844c64e1029f7914c3b066debd Author: Phil Miller Date: Thu Sep 15 17:26:00 2022 -0700 Add roundoff_lo corresponding to roundoff_hi for domains that don't start at 0 (#2950) * Lay groundwork for roundoff_lo * Add dummy implementation of roundoff_lo computation * implement bisect_prob_lo * change idx -> dxinv * use rlo instead of plo in locateParticle Co-authored-by: atmyers commit 6a5a0561076f62af588d5a3d54f0deb232f3a6af Author: Weiqun Zhang Date: Thu Sep 15 13:23:40 2022 -0700 Add template parameter to ParallelFor and launch specifying block size (#2947) By default, amrex::ParallelFor launches AMREX_GPU_MAX_THREADS threads per block. We can now explicitly specfiy the block size with `ParallelFor(...)`, where BLOCK_SIZE should be a multiple of the warp size (e.g., 64, 128, etc.). A similar change has also been made to `launch`. The changes are backward compatible. commit 2cdb9df08e4668bbc9a9b6560217514518f41573 Author: Andrew Myers Date: Thu Sep 15 10:55:41 2022 -0700 Byte spread fixes (#2949) commit 17c94cc196d779e9f7ec48f7d004088a1c1e11c6 Author: Candace Gilet Date: Wed Sep 14 11:49:35 2022 -0400 Correct MultiFab::norm0 doxygen brief description (#2946) commit 0351c9958be7fdc7e3e0c419fc68d36a0c00f288 Author: Axel Huebl Date: Wed Sep 14 08:48:25 2022 -0700 CMake: HIP_PATH from ROCM_PATH (#2948) * On machines like Crusher, `ROCM_PATH` is more likely to be available then a `HIP_PATH` environment variable. This is mainly needed for our hacky ROCTX hints. * ROCTX: New Include Supposedly, there is a new include we shall use: Ref.: https://github.com/ROCm-Developer-Tools/roctracer/issues/79 * ROCtracer: Include as System library Because of GNU extensions in the roctracer include files for the legacy include. But we should make this `-isystem` anyway to be robust for the future. The 5.2 deprecated include file `` throws warnings because they rely on GNU extensions: ``` In file included from /opt/rocm/hip/../roctracer/include/ext/prof_protocol.h:27: /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:70:7: warning: anonymous structs are a GNU extension [-Wgnu-anonymous-struct] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:70:7: warning: anonymous types declared in an anonymous union are an extension [-Wnested-anon-types] /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:75:7: warning: anonymous types declared in an anonymous union are an extension [-Wnested-anon-types] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:82:7: warning: anonymous structs are a GNU extension [-Wgnu-anonymous-struct] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:86:7: warning: anonymous structs are a GNU extension [-Wgnu-anonymous-struct] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:90:7: warning: anonymous structs are a GNU extension [-Wgnu-anonymous-struct] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:82:7: warning: anonymous types declared in an anonymous union are an extension [-Wnested-anon-types] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:86:7: warning: anonymous types declared in an anonymous union are an extension [-Wnested-anon-types] struct { ^ /opt/rocm/hip/../roctracer/include/ext/../../../include/roctracer/ext/prof_protocol.h:90:7: warning: anonymous types declared in an anonymous union are an extension [-Wnested-anon-types] struct { ^ ``` * GNUmake: Update Includes in `hip.mak` Use public prefix. commit 9aa23c202a13eee489a06030b9aeda6b89856944 Author: Cody Balos Date: Mon Sep 12 11:49:37 2022 -0700 Fix minor typo in fcompare docs (#2945) commit bfbd68f4ed31ca07572be9bf138a59cacb7e800c Author: Axel Huebl Date: Mon Sep 12 11:40:55 2022 -0700 Fix: Make Finalize->Initialize->F->I->... Work (#2944) Fix assertions in Arena::Initialize. The_BArena never dies (tm) Co-authored-by: Weiqun Zhang commit 67384701a808ca973ad2c24ec86cee4c7a81fd05 Author: Weiqun Zhang Date: Wed Sep 7 14:12:34 2022 -0700 Changes for Cray & Clang (#2941) * It seems that the new Cray compilers no longer define `_CRAYC`. However it does define `__cray__`. * For Clang based Cray compilers, use -O3 instead of -O2 for optimization. * Clang's vectorization pragma is very aggressive. For some codes, it makes ParallelFor with many if statements on CPU much slower than without vectorization. Unfortunately, it does not have an ivdep pragma. So we disable AMREX_PRAGMA for clang for safety. * No longer need to use -Wno-pass-failed for Clang based compilers. commit 5b0c598cc71a5e914bfc4dbb7ea44313d45c8f57 Author: Weiqun Zhang Date: Wed Sep 7 09:42:57 2022 -0700 Fix a warning in packing communication send buffer (#2940) When we communication double precision data in single precision, there is a conversion from double to float in packing the send buffer. A static cast is added to fix the warning. commit 3e397bb6ba2854245a10d49a5ee37e1ba9f33f0e Author: Weiqun Zhang Date: Wed Sep 7 09:13:53 2022 -0700 Link to cublas when using CUDA and Hypre (#2933) commit 9525ea8892b9c0910acc2bf2ae8950f6068c34e5 Author: Weiqun Zhang Date: Wed Sep 7 09:13:20 2022 -0700 HIP: use coarse grained host memory (#2932) commit 7e040166efc8208e60d8796d4d99b1dd47146ef2 Author: Marco Garten Date: Wed Sep 7 08:53:20 2022 -0700 Update Testing Docs (#2937) - document `abort_on_unused_inputs` - remove duplicate superfluous argument in regtest call commit 539427a19b20e49c4f7399c8ea0b0515fb5c79a0 Author: drangara <69211175+drangara@users.noreply.github.com> Date: Tue Sep 6 18:13:42 2022 -0400 EB checkpoint files (#2897) * support for loading EB from checkpoint file * add support for writing chkpt file as well Co-authored-by: Weiqun Zhang commit 35ed6b4d343215c1ccf6e4d0a59813fc236c9f22 Author: Axel Huebl Date: Tue Sep 6 15:07:16 2022 -0700 Fix: Loading Files Again (#2936) This enables that `amrex::ParmParse::addfile` can be called multiple times. Before this, we accidentially overwrite the `FILE` static keyword. Follow-up to #2842 commit 8f8198c2fb1868704d2b4d14b5b93d8d1d264ea0 Author: hengjiew <86926839+hengjiew@users.noreply.github.com> Date: Tue Sep 6 13:36:35 2022 -0400 Check if boundary particles container has been created before clearance. (#2935) This fixes a segmentation fault when using more GPUs for updating particles than fluid. commit fb0b31e1439b089074514f45ae900af257c66dba Author: Nuno Miguel Nobre Date: Sun Sep 4 05:18:49 2022 +0100 SYCL: Replace deprecated atomic types and operations (#2921) * SYCL: Replace deprecated atomic types and operations * Change atomic refs to device memory scope When using the relaxed memory order, the memory scope is ignored. Thus, for cosmetic reasons only, we set the memory scope to device, the broadest option when using the global address space. Co-authored-by: Weiqun Zhang commit cc3cd1470254d37f0cea4f212c2b0f6ffa8d0bee Author: Weiqun Zhang Date: Thu Sep 1 07:39:25 2022 -0700 Update CHANGES for 22.09 (#2934) commit acc223f9918284e7d8e595d3861c5e456d84a968 Author: Weiqun Zhang Date: Tue Aug 30 16:04:43 2022 -0700 Add hypre as an option for OpenBCSolver (#2931) commit 3d29fd7d0e816f3c436112d90bdefe815e0ff72a Author: hengjiew <86926839+hengjiew@users.noreply.github.com> Date: Wed Aug 24 16:10:22 2022 -0400 Preserve neighbor particles when sorting particles. (#2923) commit 8294c3afbcbbc503f77e493196d380fbe1666d02 Author: Weiqun Zhang Date: Mon Aug 22 10:46:05 2022 -0700 Scope of NonLocalBC::ParallelCopy (#2922) Make NonLocalBC::ParallelCopy accessible in namespace amrex, because it can be useful in situations other than non-local BC. commit 0911fc4b2e066209a590c330bf2ddf7178dca76b Author: Weiqun Zhang Date: Sun Aug 21 18:13:07 2022 -0700 Open Boundary Poisson Solver (#2912) This adds an open boundary Poisson solver based on the James's algorithm. To use it, the user builds an amrex:OpenBCSolver object, which can be reused until the grids change, and then call OpenBCSolver::solver. Currently, this is for 3D cell-centered data only. The solver works on CPU, Nvidia GPUS, and AMD GPUs. The SYCL version of a couple of kernels for Intel GPUs are to be implemented. commit f270b3d5db8f8b7ab010bc9134632361b8a9009c Author: Marc T. Henry de Frahan Date: Thu Aug 18 13:51:56 2022 -0600 Fix OOB access of ref ratio on HDF write header (#2919) commit fa8e20f946b661bd49af2a60898ffca2c5b21cff Author: Jean M. Sexton Date: Thu Aug 18 08:57:51 2022 -0700 Add Polaris to GNUMake (#2908) commit bd5f6a9f6a1a3a66c51eefd7950432d3bf3319a1 Author: Axel Huebl Date: Mon Aug 15 14:24:21 2022 -0700 Export GpuDevice Globals (#2918) * Export GpuDevice Globals Implement symbol export via `AMREX_EXPORT` for the global variables in `Src/Base/AMReX_GpuDevice.H`. Follow-up to #1847 #1847 Fix #2917 * Fix: Export `AMReX::m_instance` commit 4f639294606d47185d31eaee4af66fc6b590e5a2 Author: asalmgren Date: Sat Aug 13 09:00:02 2022 -0700 enable LinOp to use the right Factory (fixes moving geometry problem) (#2916) commit 659351846da6f930b4f04cc6cd6b9f78e7752e8a Author: Andrew Myers Date: Thu Aug 11 15:24:16 2022 -0700 Use 1 atomic instead of two per item in DenseBins::build (#2911) commit d295f2299101705f7c470c813b80542296087328 Author: Nuno Miguel Nobre Date: Thu Aug 11 03:40:09 2022 +0100 [SYCL] Remove amrex::oneapi and update deprecated device descriptors (#2910) * Remove amrex::oneapi in favour of standard features * Change deprecated device descriptors commit 1bda173b489024d5f4ec79627f3f612c350e521f Author: Axel Huebl Date: Wed Aug 10 15:46:43 2022 -0600 Add: `MultiFab::sum_unique` (#2909) This provides a new method to sum values in a `MultiFab`. For non-cell-centered data, `MultiFab::sum` double counts box boundary values that are owned by multiple boxes. This provides a function that does not double count these and provides a quick way to get only the sum of physically unique values. Co-authored-by: Weiqun Zhang commit 3f715d29c94b473e624aa9ff3fea9b502da25f97 Author: Candace Gilet Date: Mon Aug 8 14:40:28 2022 -0400 In MLMG::mgFcycle, assert that for EB the linop is cell-centered. (#2905) commit 59b0742b9b8c543b2896d76dd07e03c2fe4f1f94 Author: hengjiew <86926839+hengjiew@users.noreply.github.com> Date: Mon Aug 8 14:17:57 2022 -0400 Clear the boundary particle indices' container before updating it. (#2907) This avoids potential segmentation faults when one grid's particles all move to other grids. commit 103db6ebe2b570910ac4dbd7d6611e59d80f1a0b Author: Weiqun Zhang Date: Fri Aug 5 15:25:33 2022 -0700 EB: Add Fine Levels (#2881) Add a new function EB2::addFineLevels() that can be used to add more fine levels to the existing EB IndexSpace without changing the coarse levels. This is useful for restarting with a larger amr.max_level. The issue is we build EB at the finest level first and then coarsen it to the coarse levels. If the restart run has a different finest level, the EB on the coarse levels could be different without using this new capability. commit 6ebf8ffc2689e23ff2686627e660caf0a10ea315 Author: Jon Rood Date: Thu Aug 4 14:32:59 2022 -0600 Add rpath to lib64 for ZFP. (#2902) commit ed23627d6487306e26b37ed9a97d60fd8148a935 Author: Yadong_Zeng <30739800+ruohai0925@users.noreply.github.com> Date: Thu Aug 4 16:32:21 2022 -0400 change data types from double to amrex::Real, and thus we can use single precision for the hypre IJ interface (#2896) Co-authored-by: yzeng commit 9ed4f5955b1d5d0e400fd2f233e5e7b83db4e41b Author: Weiqun Zhang Date: Wed Aug 3 16:53:20 2022 -0700 Fix a new bug introduced in #2858 (#2901) We need to take into account that `amrex::Any` stores `MultiFab&` or `MultiFab const&`. commit 6eaab8c1c9f0e2a21531526dfd170ebe3aad507b Author: Weiqun Zhang Date: Wed Aug 3 13:39:44 2022 -0700 MPMD Support (#2895) Add support for multiple programs multiple data (MPMD). For now, we assume there are only two programs (i.e., executables) in the MPMD mode. During the initialization, MPI_COMM_WORLD is split into two communicators. The MPMD::Copier class can be used to copy FabArray/MultiFab data between two programs. This new capability can be used by FHDeX to couple FHD with SPPARKS. commit 94693291667bd0435819aa09cf28a293da226bf4 Author: Weiqun Zhang Date: Mon Aug 1 09:43:21 2022 -0700 MLMG interface (#2858) These changes are made to support a generic type (i.e., amrex::Any) in MLMG. This is still work in progress. But it should not break any existing codes. commit 5a3b3037950937343b7eafd292e5032cb8c7221c Author: Weiqun Zhang Date: Mon Aug 1 09:34:44 2022 -0700 Update CHANGES for 22.08 (#2894) commit 48702b48836d9aeb0db931e23ea9cc7d4ad4ccdc Author: hengjiew <86926839+hengjiew@users.noreply.github.com> Date: Thu Jul 28 14:14:19 2022 -0400 Let `selectActualNeighbors` return right after starting if there are (#2886) no particles for communication. commit 6a47d89fd12cb06d48e3e0d85eea415274e84a69 Author: kngott Date: Wed Jul 27 17:03:04 2022 -0700 Add Comm Sync to Redistribute (#2891) commit 51542c85ac18642a2cfb69ea3df3cf544d3d6f42 Author: philip-blakely <46958218+philip-blakely@users.noreply.github.com> Date: Wed Jul 27 17:29:26 2022 +0100 Multi-materials and derived variable output (#2888) ## Summary Output small plots if only derived variables are specified. Also, make DeriveFuncFab a std::function<> instead of plain function-pointer. ## Additional background We have been implementing small-plots for outputing variables at gauges (e.g. pressure at specific gauge locations). We may want to output the derived variable pressure only, and not all state-variables. The if-condition was incorrect in this case. Further, multi-material simulations require a material index in order to compute derived variables, in addition to existing parameters. Making DeriveFuncFab a std::function is sufficient for our purposes. commit ce0fb7412dff3ceeec00941ba525e7ecf5ce8015 Author: Andrew Myers Date: Tue Jul 26 16:20:38 2022 -0700 Fix host / device sync bug in PODVector (#2890) commit 06753e60aca7d063b28be93379c948e92afb8c5e Author: Axel Huebl Date: Tue Jul 26 12:54:35 2022 -0700 `TagBoxArray::collate`: Fujitsu Clang (#2889) `mpiFCC -Nclang` only defines `__CLANG_FUJITSU`, not `__FUJITSU` as in the classic compiler mode. commit 7cf77dc60e149ebe822f6b5428556f9208e150fa Author: Weiqun Zhang Date: Tue Jul 26 11:01:21 2022 -0700 MinLoc and MaxLoc Support (#2885) Add struct ValLocPair that can be used by ReduceOps/ReduceData and ParReduce to find the location of the min/max value. Add warp shuffle down function for more general types. This is needed for MinLoc/MaxLoc with CUDA < 11, because we don't use CUB for earlier versions of CUDA. The Intel GPU support is not done yet. We need to allocate enough shared local memory when the size of ValLocPair is larger than the size of unsigned long long. commit 4b7e20057a3dff84beae21812d826d24e19f2109 Author: Weiqun Zhang Date: Thu Jul 21 10:25:57 2022 -0700 HIP: Remove the call to hipDeviceSetSharedMemConfig (#2884) AMD devices do not support shared cache banking. Thanks @afanfa for reporting this. (#2883) commit 8e40952af9ab0600174f491c81100132f9b24c6e Author: Weiqun Zhang Date: Wed Jul 20 12:10:26 2022 -0700 Add Frontier to GNU Make (#2879) commit b673d81723c5585a1290126233d38f50833939d7 Author: Max Katz Date: Mon Jul 18 15:14:19 2022 -0400 Add option to derefine to AMRErrorTag (#2875) This allows a refinement field to specify *derefinement* (by setting a zone's tagging value to the clear value). commit 73dbf2f909bdc6c497eb5245b4e707b4814e699f Author: hengjiew <86926839+hengjiew@users.noreply.github.com> Date: Mon Jul 18 12:53:35 2022 -0400 Fix the segmentation fault in selecting actual neighbor particles. (#2877) commit 40b3d2176b17785191050482a2ead5539993fac6 Author: Weiqun Zhang Date: Wed Jul 13 13:24:15 2022 -0700 Add extra braces in initialization of GpuArray (#2876) It should not be needed since C++14. But some compilers seem to need the double braces. commit a633d2bff1db1a3335efd077a34b6a8dcfb4e793 Author: Luca Fedeli Date: Fri Jul 8 20:34:18 2022 +0200 Workaround to bypass issue observed at very large scale with Fujitsu MPI (#2874) We have observed some MPI issues at very large scale when WarpX is compiled using Fujitsu MPI (i.e., with the Fujitsu compiler). These issues seem to be related to the use of MPI Gatherv with MPI_Datatype. This PR implements a possible workaround, initially proposed by @WeiqunZhang . The idea is that, when WarpX is compiled with the Fujitsu compiler, simpler integer arrays instead of MPI_Datatype are used in the routine where the issue was observed. commit 7660c885d46779367344adf88af75e630a0bc77a Author: Weiqun Zhang Date: Fri Jul 8 08:48:14 2022 -0700 Allow zero components MultiFab and BaseFab (#2873) This is useful for particle I/O that does not have any mesh data. yt needs a header file associated with a MultiFab. commit c849dd1994388cebd78a6a1624e80bc3ab640970 Author: Weiqun Zhang Date: Fri Jul 8 08:06:37 2022 -0700 New EB optimization parameter: eb2.num_coarsen_opt (#2872) At the beginning of EB generation, we chop the entire finest domain into boxes and find out the type of the boxes. We then collect the completely covered boxes and cut boxes into two BoxArrays. This process can be costly because of the number of calls to the implicit functions. In this commit, we have introduced a new ParmParse parameter, eb2.num_coarsen_opt with a default value of zero. If for instance it is set to 3, we start the box type categorization at a resolution that is coarsened by a factor of 2^3. For the provisional cut boxes, we refine them by a factor of 2, Then we chop them into small boxes and categorize the new boxes. This process is performed recursively until we are at the original finest resolution. The users should be aware that, if eb2.num_coaren_opt is too big, this could produce in erroneous results because evaluating the implicit function on coarse boxes could miss fine structures in the EB. Thank Robert Marskar for sharing this algorithm. commit 557aae84902f63a84edc8b49831ee66af7d1a46a Author: Erik Date: Wed Jul 6 08:54:24 2022 -0700 point to new location of AMReX images, AMReX website repo (#2867) commit cbdc6580ee3d78cccdd37172e4ba077ee181f483 Author: Axel Huebl Date: Tue Jul 5 01:41:03 2022 +0200 SENSEI 4.0: Fix Build for Particles (#2869) ## Summary This part causes a compile error now in WarpX. cc @burlen @kwryankrattiger ## Additional background X-ref: Blocks WarpX 22.07 release https://github.com/ECP-WarpX/WarpX/pull/3211 Follow-up to: - #2785 - #2834 commit dc8b734b6a70583602150cfbee1b7d51f8dacdeb Author: Andrew Myers Date: Fri Jul 1 17:19:20 2022 -0700 Cache the neighbor comm tags for the CPU implementation of fillNeighbors. (#2862) * Cache the neighbor comm tags for the CPU implementation of fillNeighbors. * fix areMasksValid function commit 2b42fb56a96e752d301916ca23160098c5369386 Author: drangara <69211175+drangara@users.noreply.github.com> Date: Fri Jul 1 18:44:35 2022 -0400 Remove some hard checks in check_mvmc for 3D (#2864) Removing some hard checks in 3D coarsening logic as it appears that those are not necessarily bad states, and a soft failure to coarsen should suffice. commit 19c70685cdb0c3322712e9f442092b1140cfe7ec Author: Erik Date: Fri Jul 1 18:24:24 2022 -0400 Carry over fix for ngbxy.smallEnd typo (#2868) This a typo that got correct in other places but didn't get fixed here. commit d736ef299b724b96b34d41103dfc5318d0ecdee4 Author: Weiqun Zhang Date: Fri Jul 1 11:00:15 2022 -0700 Update CHANGES for 22.07 (#2866) commit be813d024e6b314e41c727734b8e53481898e08e Author: Weiqun Zhang Date: Fri Jul 1 10:29:13 2022 -0700 Hypre: add version check (#2865) These HYPRE_SetSp* are only available in hypre >= 22500. commit 8fb23ec17a58284af6bdafbcda3eea0d86d8ce69 Author: Jon Rood Date: Wed Jun 29 16:52:35 2022 -0600 Refactor Make.nrel to use MPT for MPI with the Intel compiler on Eagle. (#2861) commit 6f9a46c7e834046970d46d684927a078671355bc Author: PaulMullowney <60452402+PaulMullowney@users.noreply.github.com> Date: Wed Jun 29 11:09:57 2022 -0600 Adding control APIs and namespacing for core algorithm paths like SpGEMM, SpMV, and SpTrans. (#2859) Co-authored-by: Paul Mullowney commit e4c83cfddc8afb1bd091c45a6ad3040d23f019bc Author: Jon Rood Date: Wed Jun 29 11:08:42 2022 -0600 Add lib64 library location for ZFP since it may exist there instead of lib. (#2860) commit b2b9150ada12af878a07e0628be03668a9d17270 Author: Burlen Loring Date: Tue Jun 28 13:42:41 2022 -0700 update the SENSEI in situ coupling for SENSEI v4.0.0 (#2785) In this release, an install of VTK is no longer required. To compile AMReX w/ SENSEI use: ```cmake -DAMReX_SENSEI=ON -DSENSEI_DIR=//cmake ``` Note: may be `lib` or `lib64` or something else depending on your OS and is determined by CMake at configure time. See the CMake GNUInstallDirs documentation for more information. commit 2c5f475d451aede47fe2cad2bbd8681c9ca1f456 Author: Andrew Myers Date: Tue Jun 28 12:51:19 2022 -0700 Write runtime attribs to checkpoints on GPUs (#2856) commit d2cb54668b5e49fd35a60164f40ad6f36720f806 Author: Jon Rood Date: Tue Jun 28 13:27:02 2022 -0600 Fix gnu make on Crusher for mpi_gtl_hsa (#2857) Update environment variable at OLCF for mpi_gtl_hsa. commit 21fe4b3016a796b99c409760cfad7ae00a7475ba Author: Axel Huebl Date: Tue Jun 28 19:53:09 2022 +0200 CMake: FindDependency CUDAToolkit (#2849) If we install AMReX with CUDA support using a modern CMake, we need to repopulate targets such as `CUDA::curand` from `find_dependency` for downstream. Downstream users find us via `find_package` and that target link dependency showed up to be unpopulated in MFIX. commit 027f2ff77fed33a191cfc735d8adaabb42d21743 Author: Weiqun Zhang Date: Thu Jun 23 16:15:57 2022 -0700 Fix make help (#2854) This reverts the change in #2845, which fixed an issue with `make print-%`, but broke `make help`. This is now fixed in a different way. Both `make print-%` and `make help` should work now. commit 3d3ad213ca4b60421c9a80328e1316b23435958f Author: kngott Date: Thu Jun 23 13:39:59 2022 -0700 NERSC Programming Environment prototype (#2848) commit 487267625412e4f8a4fa1ab2492cb578955c4239 Author: Weiqun Zhang Date: Thu Jun 23 12:41:20 2022 -0700 GNU Make: No need to query mpif90 if Fortran is not used. (#2852) This minimize potential issues. commit fc0d6469f4ad590d576a7109d8719b018838dd86 Author: Weiqun Zhang Date: Thu Jun 23 12:23:55 2022 -0700 Remove f90doc (#2851) We no longer use it. commit 5188a6a28e64dc627c3333d13bebeb0d7250b506 Author: Weiqun Zhang Date: Thu Jun 23 11:09:15 2022 -0700 Explicitly invoke python3 (#2850) According to PEP 394, a python distributor may choose to not provide the python command. In fact, that's what recent versions of macOS do. commit 2d931f63cb4d611d0d23d694726889647f8a482d Author: Andrew Myers Date: Wed Jun 22 15:03:50 2022 -0500 Maintain the high end of the 'roundoff domain' in both float and double precision (#2839) * Maintain the high end of the 'roundoff domain' in both float and double precision * fix shadowing * fix warning * fix float conversion warning * fix logic * Update Src/Base/AMReX_Geometry.H * Update Src/Base/AMReX_Geometry.H --- .github/workflows/clang.yml | 32 +- .github/workflows/cuda.yml | 47 +- .../workflows/dependencies/dependencies.sh | 2 +- ...ncies_clang6.sh => dependencies_clang7.sh} | 4 +- .../dependencies/dependencies_gcc8.sh | 17 + .../dependencies/dependencies_nofortran.sh | 3 +- .../dependencies/dependencies_nvcc10.sh | 30 - .../dependencies/dependencies_nvcc11.sh | 7 +- .github/workflows/docs.yml | 2 +- .github/workflows/gcc.yml | 109 +- .github/workflows/hip.yml | 12 +- .github/workflows/intel.yml | 6 +- .github/workflows/macos.yml | 8 +- .github/workflows/sensei.yml | 8 +- .github/workflows/style.yml | 4 +- .github/workflows/windows.yml | 6 +- CHANGES | 233 ++ Docs/sphinx_documentation/source/Basics.rst | 8 +- .../source/BuildingAMReX.rst | 12 +- .../source/BuildingAMReX_Chapter.rst | 2 +- Docs/sphinx_documentation/source/GPU.rst | 13 +- .../source/LinearSolvers.rst | 22 +- .../source/Post_Processing.rst | 4 +- Docs/sphinx_documentation/source/SWFFT.rst | 2 +- Docs/sphinx_documentation/source/Testing.rst | 3 +- .../source/Visualization.rst | 10 +- GNUmakefile.in | 3 + INSTALL | 2 +- README.md | 4 +- Src/Amr/AMReX_Amr.cpp | 2 +- Src/Amr/AMReX_AmrLevel.H | 142 +- Src/Amr/AMReX_AmrLevel.cpp | 87 + Src/Amr/AMReX_Derive.H | 6 +- Src/Amr/AMReX_StateDescriptor.cpp | 65 +- Src/AmrCore/AMReX_ErrorList.H | 9 + Src/AmrCore/AMReX_ErrorList.cpp | 227 +- Src/AmrCore/AMReX_FillPatchUtil.H | 11 +- Src/AmrCore/AMReX_FillPatchUtil_I.H | 69 +- Src/AmrCore/AMReX_FillPatcher.H | 585 ++++ Src/AmrCore/AMReX_Interp_C.H | 48 + Src/AmrCore/AMReX_Interpolater.H | 69 + Src/AmrCore/AMReX_Interpolater.cpp | 93 + Src/AmrCore/AMReX_MFInterp_1D_C.H | 5 +- Src/AmrCore/AMReX_MFInterp_2D_C.H | 5 +- Src/AmrCore/AMReX_MFInterp_3D_C.H | 5 +- Src/AmrCore/AMReX_TagBox.cpp | 27 +- Src/AmrCore/CMakeLists.txt | 1 + Src/AmrCore/Make.package | 2 + Src/Base/AMReX.H | 2 +- Src/Base/AMReX.cpp | 22 +- Src/Base/AMReX_Algorithm.H | 53 +- Src/Base/AMReX_Any.H | 23 +- Src/Base/AMReX_Arena.cpp | 39 +- Src/Base/AMReX_Array4.H | 49 + Src/Base/AMReX_BCRec.H | 11 + Src/Base/AMReX_BC_TYPES.H | 6 +- Src/Base/AMReX_BLBackTrace.cpp | 13 +- Src/Base/AMReX_BaseFab.H | 20 +- Src/Base/AMReX_Box.cpp | 4 +- Src/Base/AMReX_BoxList.H | 4 +- Src/Base/AMReX_CTOParallelForImpl.H | 331 ++ Src/Base/AMReX_DistributionMapping.cpp | 4 +- Src/Base/AMReX_Extension.H | 4 +- Src/Base/AMReX_FArrayBox.H | 2 +- Src/Base/AMReX_FBI.H | 2 +- Src/Base/AMReX_FabArray.H | 19 +- Src/Base/AMReX_FabArrayCommI.H | 4 +- Src/Base/AMReX_Geometry.H | 86 +- Src/Base/AMReX_Geometry.cpp | 72 +- Src/Base/AMReX_GpuAtomic.H | 63 +- Src/Base/AMReX_GpuContainers.H | 15 +- Src/Base/AMReX_GpuDevice.H | 33 +- Src/Base/AMReX_GpuDevice.cpp | 14 +- Src/Base/AMReX_GpuLaunch.H | 139 +- Src/Base/AMReX_GpuLaunchFunctsC.H | 504 ++- Src/Base/AMReX_GpuLaunchFunctsG.H | 709 +++- Src/Base/AMReX_GpuLaunchMacrosG.H | 68 +- Src/Base/AMReX_GpuQualifiers.H | 4 - Src/Base/AMReX_GpuReduce.H | 52 +- Src/Base/AMReX_GpuTypes.H | 1 - Src/Base/AMReX_MFIter.H | 3 + Src/Base/AMReX_MFIter.cpp | 16 + Src/Base/AMReX_MPMD.H | 178 ++ Src/Base/AMReX_MPMD.cpp | 225 ++ Src/Base/AMReX_Math.H | 1 - Src/Base/AMReX_MultiFab.H | 10 +- Src/Base/AMReX_MultiFab.cpp | 53 + Src/Base/AMReX_MultiFabUtil.H | 29 + Src/Base/AMReX_MultiFabUtil.cpp | 241 ++ Src/Base/AMReX_NonLocalBC.H | 9 + Src/Base/AMReX_Orientation.H | 26 +- Src/Base/AMReX_PODVector.H | 8 +- Src/Base/AMReX_ParallelDescriptor.H | 73 + Src/Base/AMReX_ParallelDescriptor.cpp | 15 + Src/Base/AMReX_ParallelReduce.H | 55 + Src/Base/AMReX_ParmParse.H | 2 +- Src/Base/AMReX_ParmParse.cpp | 8 +- Src/Base/AMReX_RandomEngine.H | 1 - Src/Base/AMReX_Reduce.H | 19 +- Src/Base/AMReX_RungeKutta.H | 293 ++ Src/Base/AMReX_Scan.H | 22 +- Src/Base/AMReX_TableData.H | 12 +- Src/Base/AMReX_TinyProfiler.H | 2 +- Src/Base/AMReX_ValLocPair.H | 35 + Src/Base/AMReX_VisMF.H | 2 - Src/Base/AMReX_bc_types_mod.F90 | 3 + Src/Base/CMakeLists.txt | 11 +- Src/Base/Make.package | 8 +- Src/Boundary/AMReX_LOUtil_K.H | 16 + Src/EB/AMReX_EB2.H | 32 +- Src/EB/AMReX_EB2.cpp | 58 +- Src/EB/AMReX_EB2_2D_C.cpp | 7 + Src/EB/AMReX_EB2_3D_C.H | 18 - Src/EB/AMReX_EB2_3D_C.cpp | 159 +- Src/EB/AMReX_EB2_C.H | 8 + Src/EB/AMReX_EB2_GeometryShop.H | 1 + Src/EB/AMReX_EB2_IndexSpaceI.H | 40 +- Src/EB/AMReX_EB2_IndexSpace_STL.H | 3 +- Src/EB/AMReX_EB2_IndexSpace_STL.cpp | 12 +- Src/EB/AMReX_EB2_IndexSpace_chkpt_file.H | 47 + Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp | 86 + Src/EB/AMReX_EB2_Level.H | 116 +- Src/EB/AMReX_EB2_Level.cpp | 11 + Src/EB/AMReX_EB2_Level_STL.H | 2 +- Src/EB/AMReX_EB2_Level_STL.cpp | 4 +- Src/EB/AMReX_EB2_Level_chkpt_file.H | 31 + Src/EB/AMReX_EB2_Level_chkpt_file.cpp | 203 ++ Src/EB/AMReX_EB_chkpt_file.H | 60 + Src/EB/AMReX_EB_chkpt_file.cpp | 324 ++ Src/EB/AMReX_distFcnElement.H | 6 +- Src/EB/CMakeLists.txt | 6 + Src/EB/Make.package | 6 + Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp | 5 +- Src/Extern/HYPRE/AMReX_HypreIJIface.H | 6 +- Src/Extern/HYPRE/AMReX_HypreIJIface.cpp | 2 +- Src/Extern/PETSc/AMReX_PETSc.cpp | 6 +- Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H | 8 +- Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp | 118 +- Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp | 2 +- Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H | 8 +- .../SENSEI/AMReX_AmrMeshDataAdaptor.cpp | 116 +- .../SENSEI/AMReX_AmrMeshInSituBridge.cpp | 2 +- .../SENSEI/AMReX_AmrMeshParticleDataAdaptor.H | 8 +- .../AMReX_AmrMeshParticleDataAdaptorI.H | 8 +- .../AMReX_AmrMeshParticleInSituBridge.H | 2 +- .../SENSEI/AMReX_AmrParticleDataAdaptor.H | 8 +- .../SENSEI/AMReX_AmrParticleDataAdaptorI.H | 8 +- Src/Extern/SENSEI/AMReX_InSituUtils.H | 20 +- Src/Extern/SENSEI/AMReX_InSituUtils.cpp | 8 +- Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H | 24 +- .../SENSEI/AMReX_ParticleDataAdaptorI.H | 142 +- Src/LinearSolvers/CMakeLists.txt | 12 + .../MLMG/AMReX_MLABecLaplacian.cpp | 4 +- Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 5 + Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp | 7 + Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H | 8 +- .../MLMG/AMReX_MLCellABecLap.cpp | 122 +- Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H | 32 +- Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp | 453 ++- Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp | 3 +- .../MLMG/AMReX_MLEBNodeFDLap_2D_K.H | 44 +- .../MLMG/AMReX_MLEBNodeFDLaplacian.H | 11 +- .../MLMG/AMReX_MLEBNodeFDLaplacian.cpp | 73 +- Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H | 3 +- Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp | 300 +- .../MLMG/AMReX_MLEBTensorOp_bc.cpp | 58 +- .../MLMG/AMReX_MLEBTensor_2D_K.H | 138 +- .../MLMG/AMReX_MLEBTensor_3D_K.H | 539 +++- Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H | 139 + Src/LinearSolvers/MLMG/AMReX_MLLinOp.H | 139 +- Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp | 280 +- Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H | 486 +++ Src/LinearSolvers/MLMG/AMReX_MLMG.H | 42 +- Src/LinearSolvers/MLMG/AMReX_MLMG.cpp | 985 ++---- .../MLMG/AMReX_MLNodeLaplacian.H | 8 +- .../MLMG/AMReX_MLNodeLaplacian.cpp | 29 +- .../MLMG/AMReX_MLNodeLaplacian_misc.cpp | 8 + Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H | 38 +- Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp | 156 +- Src/LinearSolvers/MLMG/AMReX_MLPoisson.H | 4 + Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp | 59 + Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp | 140 +- .../MLMG/AMReX_MLTensorOp_grad.cpp | 160 +- Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H | 382 ++- Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H | 2844 +++++++++++------ Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H | 117 + Src/LinearSolvers/OpenBC/AMReX_OpenBC.H | 141 + Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp | 864 +++++ Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H | 166 + Src/LinearSolvers/OpenBC/Make.package | 6 + Src/Particle/AMReX_DenseBins.H | 12 +- Src/Particle/AMReX_NeighborParticles.H | 5 + Src/Particle/AMReX_NeighborParticlesCPUImpl.H | 6 +- Src/Particle/AMReX_NeighborParticlesGPUImpl.H | 2 +- Src/Particle/AMReX_NeighborParticlesI.H | 75 +- Src/Particle/AMReX_Particle.H | 2 +- Src/Particle/AMReX_ParticleContainer.H | 4 +- Src/Particle/AMReX_ParticleContainerI.H | 147 +- Src/Particle/AMReX_ParticleInit.H | 4 - Src/Particle/AMReX_ParticleTile.H | 34 +- Src/Particle/AMReX_ParticleUtil.H | 18 +- Src/Particle/AMReX_WriteBinaryParticleData.H | 34 +- .../Source/AdvancePhiAllLevels.cpp | 3 +- .../Source/AdvancePhiAtLevel.cpp | 3 +- .../Amr/Advection_AmrCore/Source/AmrCoreAdv.H | 11 +- .../Advection_AmrCore/Source/AmrCoreAdv.cpp | 62 +- .../Source/DefineVelocity.cpp | 2 +- .../Source/Src_K/Make.package | 2 +- .../Advection_AmrLevel/Source/AmrLevelAdv.H | 2 +- .../Advection_AmrLevel/Source/AmrLevelAdv.cpp | 37 +- Tests/CMakeLists.txt | 2 +- Tests/CTOParFor/CMakeLists.txt | 7 + Tests/CTOParFor/GNUmakefile | 20 + Tests/CTOParFor/Make.package | 4 + Tests/CTOParFor/main.cpp | 64 + Tests/EB/CNS/Source/main.cpp | 8 +- Tests/GPU/CNS/Source/CNS.H | 2 + Tests/GPU/CNS/Source/CNS.cpp | 5 + Tests/GPU/CNS/Source/CNS_advance.cpp | 34 +- .../CNS/Source/diffusion/CNS_diffusion_K.H | 20 +- Tests/LinearSolvers/CellEB2/inputs.rt.2d | 1 + Tests/LinearSolvers/CellEB2/inputs.rt.3d | 1 + Tools/AMRProfParser/GNUmakefile | 1 - Tools/Backtrace/parse_bt.py | 2 +- Tools/CMake/AMReXConfig.cmake.in | 10 +- Tools/CMake/AMReXFlagsTargets.cmake | 18 +- Tools/CMake/AMReXParallelBackends.cmake | 22 +- Tools/CMake/AMReXSYCL.cmake | 2 +- Tools/CMake/AMReXThirdPartyLibraries.cmake | 2 +- Tools/CMake/AMReXTypecheck.cmake | 2 +- Tools/CMake/AMReX_Config.cmake | 14 +- Tools/C_scripts/describe_sources.py | 6 +- Tools/C_scripts/gatherbuildtime.py | 6 +- Tools/C_scripts/makebuildinfo_C.py | 12 +- Tools/CompileTesting/compiletesting.py | 4 +- Tools/F_scripts/dep.py | 13 +- Tools/F_scripts/f90doc/README | 36 - Tools/F_scripts/f90doc/expr_parse.pl | 793 ----- Tools/F_scripts/f90doc/expr_parse.y | 234 -- Tools/F_scripts/f90doc/f90doc | 160 - Tools/F_scripts/f90doc/htmling.pl | 376 --- Tools/F_scripts/f90doc/stmts.pl | 891 ------ Tools/F_scripts/f90doc/typing.pl | 516 --- Tools/F_scripts/f90doc/utils.pl | 87 - Tools/F_scripts/fcheck.py | 8 +- Tools/F_scripts/find_files_vpath.py | 4 +- Tools/F_scripts/findparams.py | 4 +- Tools/F_scripts/makebuildinfo.py | 2 +- Tools/F_scripts/write_probin.py | 4 +- Tools/GNUMake/Make.defs | 12 +- Tools/GNUMake/Make.machines | 17 +- Tools/GNUMake/Make.rules | 11 +- Tools/GNUMake/comps/armclang.mak | 6 +- Tools/GNUMake/comps/cray.mak | 18 +- Tools/GNUMake/comps/dpcpp.mak | 4 +- Tools/GNUMake/comps/gnu.mak | 43 +- Tools/GNUMake/comps/hip.mak | 17 +- Tools/GNUMake/comps/intel.mak | 19 +- Tools/GNUMake/comps/llvm-flang.mak | 4 +- Tools/GNUMake/comps/llvm.mak | 8 +- Tools/GNUMake/comps/nag.mak | 9 +- Tools/GNUMake/comps/nvcc.mak | 63 +- Tools/GNUMake/comps/nvhpc.mak | 12 +- Tools/GNUMake/comps/pgi.mak | 10 +- Tools/GNUMake/packages/Make.hdf5 | 3 +- Tools/GNUMake/packages/Make.hypre | 2 +- Tools/GNUMake/sites/Make.alcf | 75 + Tools/GNUMake/sites/Make.nersc | 29 +- Tools/GNUMake/sites/Make.nrel | 43 +- Tools/GNUMake/sites/Make.olcf | 26 +- Tools/GNUMake/sites/Make.unknown | 7 +- Tools/Plotfile/CMakeLists.txt | 2 +- Tools/Postprocessing/python/column_depth.py | 2 +- Tools/Postprocessing/python/conv_slopes.py | 2 +- .../Postprocessing/python/dumpparthistory.py | 7 +- Tools/Postprocessing/python/test_helmeos.py | 2 +- .../python/test_parseparticles.py | 3 +- Tools/Py_util/plotsinglevar.py | 4 +- Tools/Release/ppCleanup.py | 2 +- Tools/Release/ppCleanupDir.py | 2 +- Tools/Release/release.py | 2 +- Tools/libamrex/configure.py | 8 +- Tools/libamrex/mkconfig.py | 8 +- Tools/libamrex/mkpkgconfig.py | 8 +- Tools/libamrex/mkversionheader.py | 8 +- Tools/typechecker/typechecker.py | 4 +- 286 files changed, 13996 insertions(+), 6935 deletions(-) rename .github/workflows/dependencies/{dependencies_clang6.sh => dependencies_clang7.sh} (73%) create mode 100755 .github/workflows/dependencies/dependencies_gcc8.sh delete mode 100755 .github/workflows/dependencies/dependencies_nvcc10.sh create mode 100644 Src/AmrCore/AMReX_FillPatcher.H create mode 100644 Src/Base/AMReX_CTOParallelForImpl.H create mode 100644 Src/Base/AMReX_MPMD.H create mode 100644 Src/Base/AMReX_MPMD.cpp create mode 100644 Src/Base/AMReX_RungeKutta.H create mode 100644 Src/Base/AMReX_ValLocPair.H create mode 100644 Src/EB/AMReX_EB2_IndexSpace_chkpt_file.H create mode 100644 Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp create mode 100644 Src/EB/AMReX_EB2_Level_chkpt_file.H create mode 100644 Src/EB/AMReX_EB2_Level_chkpt_file.cpp create mode 100644 Src/EB/AMReX_EB_chkpt_file.H create mode 100644 Src/EB/AMReX_EB_chkpt_file.cpp create mode 100644 Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H create mode 100644 Src/LinearSolvers/OpenBC/AMReX_OpenBC.H create mode 100644 Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp create mode 100644 Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H create mode 100644 Src/LinearSolvers/OpenBC/Make.package create mode 100644 Tests/CTOParFor/CMakeLists.txt create mode 100644 Tests/CTOParFor/GNUmakefile create mode 100644 Tests/CTOParFor/Make.package create mode 100644 Tests/CTOParFor/main.cpp delete mode 100644 Tools/F_scripts/f90doc/README delete mode 100644 Tools/F_scripts/f90doc/expr_parse.pl delete mode 100644 Tools/F_scripts/f90doc/expr_parse.y delete mode 100755 Tools/F_scripts/f90doc/f90doc delete mode 100644 Tools/F_scripts/f90doc/htmling.pl delete mode 100644 Tools/F_scripts/f90doc/stmts.pl delete mode 100644 Tools/F_scripts/f90doc/typing.pl delete mode 100644 Tools/F_scripts/f90doc/utils.pl diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml index bdd629ce11f..afd37544c12 100644 --- a/.github/workflows/clang.yml +++ b/.github/workflows/clang.yml @@ -12,13 +12,13 @@ jobs: # Build and install libamrex as AMReX CMake project # Note: this is an intentional "minimal" build that does not enable (many) options library_clang: - name: Clang@6.0 C++14 SP NOMPI Debug [lib] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions"} + name: Clang@7.0 C++17 SP NOMPI Debug [lib] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies_clang6.sh + run: .github/workflows/dependencies/dependencies_clang7.sh - name: Build & Install run: | mkdir build @@ -34,7 +34,6 @@ jobs: -DAMReX_PLOTFILE_TOOLS=ON \ -DAMReX_PRECISION=SINGLE \ -DAMReX_PARTICLES_PRECISION=SINGLE \ - -DCMAKE_CXX_STANDARD=14 \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) @@ -48,14 +47,14 @@ jobs: ctest --output-on-failure tests_clang: - name: Clang@6.0 C++14 SP Particles DP Mesh Debug [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -O1"} + name: Clang@7.0 C++17 SP Particles DP Mesh Debug [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -O1 -Wnon-virtual-dtor"} # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies_clang6.sh + run: .github/workflows/dependencies/dependencies_clang7.sh - name: Build & Install run: | mkdir build @@ -70,7 +69,6 @@ jobs: -DAMReX_PARTICLES=ON \ -DAMReX_PRECISION=DOUBLE \ -DAMReX_PARTICLES_PRECISION=SINGLE \ - -DCMAKE_CXX_STANDARD=14 \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) @@ -80,14 +78,14 @@ jobs: # Build 2D libamrex with configure configure-2d: - name: Clang@6.0 NOMPI Release [configure 2D] - runs-on: ubuntu-18.04 + name: Clang@7.0 NOMPI Release [configure 2D] + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies_clang6.sh + run: .github/workflows/dependencies/dependencies_clang7.sh - name: Build & Install run: | ./configure --dim 2 --with-fortran no --comp llvm --with-mpi no - make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names -Wno-c++17-extensions" + make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names" make install diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index c5fbceb5d7e..98a2b001760 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -7,44 +7,13 @@ concurrency: cancel-in-progress: true jobs: - # Build libamrex and all tests with CUDA 10.2 - tests-cuda10: - name: CUDA@10.2 GNU@6.5.0 C++14 Release [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code"} - steps: - - uses: actions/checkout@v2 - - name: Dependencies - run: .github/workflows/dependencies/dependencies_nvcc10.sh - - name: Build & Install - run: | - export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} - export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH} - which nvcc || echo "nvcc not in PATH!" - mkdir build - cd build - cmake .. \ - -DCMAKE_VERBOSE_MAKEFILE=ON \ - -DAMReX_EB=OFF \ - -DAMReX_ENABLE_TESTS=ON \ - -DAMReX_FORTRAN=OFF \ - -DAMReX_PARTICLES=ON \ - -DAMReX_GPU_BACKEND=CUDA \ - -DCMAKE_C_COMPILER=$(which gcc-6) \ - -DCMAKE_CXX_COMPILER=$(which g++-6) \ - -DCMAKE_CUDA_HOST_COMPILER=$(which g++-6) \ - -DCMAKE_Fortran_COMPILER=$(which gfortran-6) \ - -DAMReX_CUDA_ARCH=7.0 \ - -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON - make -j 2 - # Build libamrex and all tests with CUDA 11.0.2 (recent supported) tests-cuda11: name: CUDA@11.2 GNU@9.3.0 C++17 Release [tests] runs-on: ubuntu-20.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nvcc11.sh - name: Build & Install @@ -64,9 +33,7 @@ jobs: -DCMAKE_CXX_COMPILER=$(which g++) \ -DCMAKE_CUDA_HOST_COMPILER=$(which g++) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) \ - -DCMAKE_CUDA_STANDARD=17 \ - -DCMAKE_CXX_STANDARD=17 \ - -DAMReX_CUDA_ARCH=8.0 \ + -DAMReX_CUDA_ARCH=7.0 \ -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \ -DAMReX_CUDA_ERROR_CAPTURE_THIS=ON @@ -78,7 +45,7 @@ jobs: runs-on: ubuntu-20.04 env: {CXXFLAGS: "-Werror -Wall -Wextra -Wpedantic -Wshadow"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nvhpc21-11.sh - name: Build & Install @@ -106,8 +73,6 @@ jobs: -DCMAKE_CXX_COMPILER=$(which nvc++) \ -DCMAKE_CUDA_HOST_COMPILER=$(which nvc++) \ -DCMAKE_Fortran_COMPILER=$(which nvfortran) \ - -DCMAKE_CUDA_STANDARD=17 \ - -DCMAKE_CXX_STANDARD=17 \ -DAMReX_CUDA_ARCH=8.0 \ -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \ -DAMReX_CUDA_ERROR_CAPTURE_THIS=ON @@ -119,12 +84,12 @@ jobs: name: CUDA@11.2 GNU@9.3.0 [configure 3D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nvcc11.sh - name: Build & Install run: | export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} ./configure --dim 3 --with-cuda yes --enable-eb yes --enable-xsdk-defaults yes --with-fortran no - make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS=-fno-operator-names CXXSTD=c++17 + make -j2 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS=-fno-operator-names make install diff --git a/.github/workflows/dependencies/dependencies.sh b/.github/workflows/dependencies/dependencies.sh index d0e86e99c0a..c9bb080831c 100755 --- a/.github/workflows/dependencies/dependencies.sh +++ b/.github/workflows/dependencies/dependencies.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2020 The AMReX Community +# Copyright 2020-2022 The AMReX Community # # License: BSD-3-Clause-LBNL # Authors: Axel Huebl diff --git a/.github/workflows/dependencies/dependencies_clang6.sh b/.github/workflows/dependencies/dependencies_clang7.sh similarity index 73% rename from .github/workflows/dependencies/dependencies_clang6.sh rename to .github/workflows/dependencies/dependencies_clang7.sh index 19b348b920b..85396a2f73c 100755 --- a/.github/workflows/dependencies/dependencies_clang6.sh +++ b/.github/workflows/dependencies/dependencies_clang7.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2020 The AMReX Community +# Copyright 2020-2022 The AMReX Community # # License: BSD-3-Clause-LBNL # Authors: Axel Huebl @@ -11,4 +11,4 @@ sudo apt-get update sudo apt-get install -y \ build-essential \ - clang gfortran + clang-7 gfortran diff --git a/.github/workflows/dependencies/dependencies_gcc8.sh b/.github/workflows/dependencies/dependencies_gcc8.sh new file mode 100755 index 00000000000..c216e6a8c51 --- /dev/null +++ b/.github/workflows/dependencies/dependencies_gcc8.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# +# Copyright 2020-2022 The AMReX Community +# +# License: BSD-3-Clause-LBNL +# Authors: Axel Huebl + +set -eu -o pipefail + +sudo add-apt-repository ppa:ubuntu-toolchain-r/test +sudo apt-get update + +sudo apt-get install -y --no-install-recommends \ + build-essential \ + g++-8 gfortran-8 \ + libopenmpi-dev \ + openmpi-bin diff --git a/.github/workflows/dependencies/dependencies_nofortran.sh b/.github/workflows/dependencies/dependencies_nofortran.sh index 36d759f66fa..61089ad8bf7 100755 --- a/.github/workflows/dependencies/dependencies_nofortran.sh +++ b/.github/workflows/dependencies/dependencies_nofortran.sh @@ -1,8 +1,9 @@ #!/usr/bin/env bash # -# Copyright 2020 Axel Huebl +# Copyright 2020-2022 The AMReX Community # # License: BSD-3-Clause-LBNL +# Authors: Axel Huebl # search recursive inside a folder if a file contains tabs # diff --git a/.github/workflows/dependencies/dependencies_nvcc10.sh b/.github/workflows/dependencies/dependencies_nvcc10.sh deleted file mode 100755 index 591dd04d79b..00000000000 --- a/.github/workflows/dependencies/dependencies_nvcc10.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright 2020 Axel Huebl -# -# License: BSD-3-Clause-LBNL - -set -eu -o pipefail - -sudo apt-get update - -sudo apt-get install -y --no-install-recommends\ - build-essential \ - g++-6 \ - gfortran-6 \ - libopenmpi-dev \ - openmpi-bin - -sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub -echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" \ - | sudo tee /etc/apt/sources.list.d/cuda.list -sudo apt-get update -sudo apt-get install -y \ - cuda-command-line-tools-10-2 \ - cuda-compiler-10-2 \ - cuda-cupti-dev-10-2 \ - cuda-minimal-build-10-2 \ - cuda-nvml-dev-10-2 \ - cuda-nvtx-10-2 \ - cuda-curand-dev-10-2 -sudo ln -s cuda-10.2 /usr/local/cuda diff --git a/.github/workflows/dependencies/dependencies_nvcc11.sh b/.github/workflows/dependencies/dependencies_nvcc11.sh index 79c8c6c31f6..a4b2f335a99 100755 --- a/.github/workflows/dependencies/dependencies_nvcc11.sh +++ b/.github/workflows/dependencies/dependencies_nvcc11.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2020 Axel Huebl +# Copyright 2020-2022 Axel Huebl # # License: BSD-3-Clause-LBNL @@ -19,9 +19,8 @@ sudo apt-get install -y \ pkg-config \ wget -sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub -echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 /" \ - | sudo tee /etc/apt/sources.list.d/cuda.list +curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb +sudo dpkg -i cuda-keyring_1.0-1_all.deb sudo apt-get update sudo apt-get install -y \ cuda-command-line-tools-11-2 \ diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c0d50aa99e1..82e387cbff4 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2.3.1 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly. + uses: actions/checkout@v3 with: persist-credentials: false diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml index 188d7d32f95..32726a4767a 100644 --- a/.github/workflows/gcc.yml +++ b/.github/workflows/gcc.yml @@ -13,13 +13,13 @@ jobs: # Build and install libamrex as AMReX CMake project # Note: this is an intentional "minimal" build that does not enable (many) options library: - name: GNU@7.5 C++17 Release [lib] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual"} + name: GNU@8.4 C++17 Release [lib] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies.sh + run: .github/workflows/dependencies/dependencies_gcc8.sh - name: Build & Install run: | mkdir build @@ -29,7 +29,9 @@ jobs: -DAMReX_PLOTFILE_TOOLS=ON \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCMAKE_INSTALL_PREFIX=/tmp/my-amrex \ - -DCMAKE_CXX_STANDARD=17 + -DCMAKE_C_COMPILER=$(which gcc-8) \ + -DCMAKE_CXX_COMPILER=$(which g++-8) \ + -DCMAKE_Fortran_COMPILER=$(which gfortran-8) make -j 2 make install make test_install @@ -41,12 +43,12 @@ jobs: # Build libamrex and all tests tests_build_3D: - name: GNU@7.5 C++14 3D Debug Fortran [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1"} + name: GNU@9.3 C++17 3D Debug Fortran [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -64,12 +66,12 @@ jobs: ctest --test-dir build --output-on-failure tests_build_2D: - name: GNU@7.5 C++14 2D Debug Fortran [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1"} + name: GNU@9.3 C++17 2D Debug Fortran [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -87,13 +89,13 @@ jobs: ctest --test-dir build --output-on-failure tests_build_1D: - name: GNU@7.5 C++14 1D Debug Fortran [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1"} + name: GNU@9.3 C++17 1D Debug Fortran [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -O1 -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} # -Werror temporarily skipped until we have functional testing established # It's too slow with -O0 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -113,10 +115,10 @@ jobs: # Build libamrex and all tests tests_cxx20: name: GNU@10.1 C++20 OMP [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi"} + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_gcc10.sh - name: Build & Install @@ -145,13 +147,13 @@ jobs: # Build libamrex and all tests w/o MPI tests-nonmpi: - name: GNU@7.5 C++14 NOMPI [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual"} + name: GNU@8.4 C++17 NOMPI [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies - run: .github/workflows/dependencies/dependencies.sh + run: .github/workflows/dependencies/dependencies_gcc8.sh - name: Build & Install run: | mkdir build @@ -167,18 +169,21 @@ jobs: -DAMReX_ENABLE_TESTS=ON \ -DAMReX_FORTRAN=ON \ -DAMReX_MPI=OFF \ - -DAMReX_PARTICLES=ON + -DAMReX_PARTICLES=ON \ + -DCMAKE_C_COMPILER=$(which gcc-8) \ + -DCMAKE_CXX_COMPILER=$(which g++-8) \ + -DCMAKE_Fortran_COMPILER=$(which gfortran-8) make -j 2 ctest --output-on-failure # Build libamrex and all tests tests-nofortran: - name: GNU@7.5 C++14 w/o Fortran [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code"} + name: GNU@9.3 C++17 w/o Fortran [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_nofortran.sh - name: Build & Install @@ -203,10 +208,10 @@ jobs: # Build 1D libamrex with configure configure-1d: - name: GNU@7.5 Release [configure 1D] - runs-on: ubuntu-18.04 + name: GNU@9.3 Release [configure 1D] + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -217,10 +222,10 @@ jobs: # Build 3D libamrex with configure configure-3d: - name: GNU@7.5 Release [configure 3D] - runs-on: ubuntu-18.04 + name: GNU@11.2 Release [configure 3D] + runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -231,10 +236,10 @@ jobs: # Build 3D libamrex with single precision and tiny profiler configure-3d-single-tprof: - name: GNU@7.5 Release [configure 3D] - runs-on: ubuntu-18.04 + name: GNU@9.3 Release [configure 3D] + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -245,10 +250,10 @@ jobs: # Build 3D libamrex debug omp build with configure configure-3d-omp-debug: - name: GNU@7.5 OMP Debug [configure 3D] - runs-on: ubuntu-18.04 + name: GNU@9.3 OMP Debug [configure 3D] + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -260,9 +265,9 @@ jobs: # Build Tools/Plotfile plotfile-tools: name: GNU Plotfile Tools [tools] - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -272,11 +277,11 @@ jobs: # Build libamrex and run all tests tests_run: - name: GNU@7.5 C++14 [tests] - runs-on: ubuntu-18.04 - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code"} + name: GNU@9.3 C++17 [tests] + runs-on: ubuntu-20.04 + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wunreachable-code -Wnon-virtual-dtor -Wlogical-op -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies.sh - name: Build & Install @@ -295,13 +300,13 @@ jobs: ctest --output-on-failure -R test_hdf5: - name: GNU@7.5 HDF5 I/O Test [tests] - runs-on: ubuntu-18.04 + name: GNU@9.3 HDF5 I/O Test [tests] + runs-on: ubuntu-20.04 env: CXX: h5pcc CC: h5cc steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: | .github/workflows/dependencies/dependencies.sh diff --git a/.github/workflows/hip.yml b/.github/workflows/hip.yml index a128eabf664..d542fb603a2 100644 --- a/.github/workflows/hip.yml +++ b/.github/workflows/hip.yml @@ -20,9 +20,9 @@ jobs: # ^ # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here # #define select_impl_(_1, _2, impl_, ...) impl_ - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments -Wno-pass-failed"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_hip.sh - name: Build & Install @@ -47,6 +47,7 @@ jobs: -DAMReX_LINEAR_SOLVERS=ON \ -DAMReX_GPU_BACKEND=HIP \ -DAMReX_AMD_ARCH=gfx908 \ + -DAMReX_ROCTX=ON \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which clang++) \ -DCMAKE_Fortran_COMPILER=$(which flang) \ @@ -66,9 +67,9 @@ jobs: # ^ # /opt/rocm-4.1.1/hip/include/hip/hcc_detail/hip_runtime.h:176:9: note: macro 'select_impl_' defined here # #define select_impl_(_1, _2, impl_, ...) impl_ - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments -Wno-pass-failed"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-deprecated-declarations -Wno-gnu-zero-variadic-macro-arguments"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_hip.sh - name: Build & Install @@ -91,6 +92,7 @@ jobs: -DAMReX_LINEAR_SOLVERS=ON \ -DAMReX_GPU_BACKEND=HIP \ -DAMReX_AMD_ARCH=gfx908 \ + -DAMReX_ROCTX=ON \ -DCMAKE_C_COMPILER=$(which clang) \ -DCMAKE_CXX_COMPILER=$(which hipcc) \ -DCMAKE_Fortran_COMPILER=$(which gfortran) \ @@ -102,7 +104,7 @@ jobs: name: HIP EB [configure 2D] runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_hip.sh - name: Build & Install diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml index 6fef4fc0459..6e7d87a299e 100644 --- a/.github/workflows/intel.yml +++ b/.github/workflows/intel.yml @@ -11,9 +11,9 @@ jobs: name: DPCPP GFortran@7.5 C++17 [tests] runs-on: ubuntu-20.04 # mkl/rng/device/detail/mrg32k3a_impl.hpp has a number of sign-compare error - env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-sign-compare"} + env: {CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-sign-compare"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_dpcpp.sh - name: Build & Install @@ -41,7 +41,7 @@ jobs: runs-on: ubuntu-20.04 env: {CXXFLAGS: "-Werror"} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: install dependencies run: | export DEBIAN_FRONTEND=noninteractive diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index e1446a038da..be5a1e738ca 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -14,10 +14,10 @@ jobs: env: # build universal binaries for M1 "Apple Silicon" and Intel CPUs CMAKE_OSX_ARCHITECTURES: "arm64;x86_64" - CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis -Wno-pass-failed" + CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_mac.sh - name: Build & Install @@ -39,10 +39,10 @@ jobs: name: AppleClang@11.0 GFortran@9.3 [tests] runs-on: macos-latest env: - CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wno-c++17-extensions -Wno-range-loop-analysis -Wno-pass-failed" + CXXFLAGS: "-fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-c++17-extensions -Wno-range-loop-analysis" # -Wno-range-loop-analysis: Apple clang has a bug in range-loop-analysis steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Dependencies run: .github/workflows/dependencies/dependencies_mac.sh - name: Build & Install diff --git a/.github/workflows/sensei.yml b/.github/workflows/sensei.yml index 19121889d6b..163456a924a 100644 --- a/.github/workflows/sensei.yml +++ b/.github/workflows/sensei.yml @@ -17,17 +17,17 @@ jobs: CC: clang CXXFLAGS: "-Werror -Wshadow -Woverloaded-virtual -Wunreachable-code -fno-operator-names" CMAKE_GENERATOR: Ninja - CMAKE_PREFIX_PATH: /root/install/sensei/develop/lib/cmake + CMAKE_PREFIX_PATH: /root/install/sensei/v4.0.0/lib64/cmake container: - image: ryankrattiger/sensei:fedora33-vtk-mpi-20210616 + image: senseiinsitu/ci:fedora35-amrex-20220613 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup run: mkdir build - name: Configure run: | cd build - cmake .. \ + cmake .. \ -DCMAKE_BUILD_TYPE=Debug \ -DAMReX_ENABLE_TESTS=ON \ -DAMReX_FORTRAN=OFF \ diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index b459865f587..9c32554218d 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -10,13 +10,13 @@ jobs: tabs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Tabs run: .github/workflows/style/check_tabs.sh trailing_whitespaces: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Trailing Whitespaces run: .github/workflows/style/check_trailing_whitespaces.sh diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index b066ba6c98c..fba862d26dd 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -12,7 +12,7 @@ jobs: name: MSVC C++17 w/o Fortran w/o MPI runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Build & Install run: | cmake -S . -B build ` @@ -31,7 +31,7 @@ jobs: name: MSVC C++17 w/o Fortran w/o MPI static runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Build & Install run: | cmake -S . -B build ` @@ -49,7 +49,7 @@ jobs: name: Clang C++17 w/o Fortran w/o MPI runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: seanmiddleditch/gha-setup-ninja@master - name: Build & Install shell: cmd diff --git a/CHANGES b/CHANGES index 8104566abe2..648db385c07 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,236 @@ +# 22.11 + + -- MPI Reduce for ValLocPair (#3003) + + -- `FabArray::isDefined` (#2997) + + -- Make The_Device_Arena non-managed (#2998) + + -- Add alias template Gpu::NonManagedDeviceVector (#2999) + + -- Pre- and Post-interpolation hook interface (#2991) + + -- Add user defined BC types (#2995) + + -- Add BCRec::set for convenience (#2993) + + -- ParallelFor with compile time optimization of kernels with run time parameters (#2954) + + -- 2D RZ solver for WarpX: Arbitrary coefficient (#2986) + + -- Runge-Kutta support for AMR (#2974) + + -- Fourth-order interpolation from fine to coarse level (#2987) + + -- Fix EB data inconsistency when fixing small cells and multiple cuts (#2943) + + -- MFIter::Finalize (#2983, #2985, #2988) + + -- Fix MLMG::getGradSolution & getFluxes for inhomogeneous Neumann and Robin BC (#2984) + + -- MLLinOp::postSolve (#2981) + + -- add templating for the cell bilinear interpolators (#2979) + + -- FillPatcher class (#2972) + + -- Remove sycl namespace alias (#2971) + + -- Fix Tensor Solver BC (#2930) + + -- Disable host device for macros for SYCL/DPC++ (#2969) + +# 22.10 + + -- Solve an issue with particles async IO when having runtime added variables (#2966) + + -- Fix int overflow in amrex::bisect (#2964) + + -- Fix MLEBNodeFDLaplacian bottom solver (#2963) + + -- make tagging routines EB_aware (#2962) + + -- Volume weighted sum (#2961) + + -- CellData: data in a single cell (#2959) + + -- Quartic interpolation for cell centered data (#2960) + + -- Add GPU-compatible upper bound and lower bound algorithms to AMReX_Algorithm (#2958) + + -- add option for makebuildsources to specify the style arguments for 'git describe'. (#2957) + + -- Add roundoff_lo corresponding to roundoff_hi for domains that don't start at 0 (#2950) + + -- Add template parameter to ParallelFor and launch specifying block size (#2947) + + -- Byte spread fixes (#2949) + + -- CMake: HIP_PATH from ROCM_PATH (#2948) + + -- Fix: Make Finalize->Initialize->F->I->... Work (#2944) + + -- Changes for Cray & Clang (#2941) + + -- Link to cublas when using CUDA and Hypre (#2933) + + -- HIP: use coarse grained host memory (#2932) + + -- EB checkpoint files (#2897) + + -- Fix: Loading Files Again (#2936) + + -- Check if boundary particles container has been created before clearance. (#2935) + + -- SYCL: Replace deprecated atomic types and operations (#2921) + +# 22.09 + + -- Preserve neighbor particles when sorting particles. (#2923) + + -- Scope of NonLocalBC::ParallelCopy (#2922) + + -- Open Boundary Poisson Solver (#2912) + Add hypre as an option for OpenBCSolver (#2931) + + -- Fix OOB access of ref ratio on HDF write header (#2919) + + -- Add Polaris to GNUMake (#2908) + + -- Export GpuDevice Globals (#2918) + + -- enable LinOp to use the right Factory (fixes moving geometry problem) (#2916) + + -- Use 1 atomic instead of two per item in DenseBins::build (#2911) + + -- [SYCL] Remove amrex::oneapi and update deprecated device descriptors (#2910) + + -- Add: `MultiFab::sum_unique` (#2909) + + -- In MLMG::mgFcycle, assert that for EB the linop is cell-centered. (#2905) + + -- EB: Add Fine Levels (#2881) + + -- Add rpath to lib64 for ZFP. (#2902) + + -- change data types from double to amrex::Real, and thus we can use single precision for the hypre IJ interface (#2896) + + -- MPMD Support (#2895) + + -- MLMG interface (#2858) + +# 22.08 + + -- Let `selectActualNeighbors` return right after starting if there are no + particles for communication. (#2886) + + -- Add Comm Sync to Redistribute (#2891) + + -- Multi-materials and derived variable output (#2888) + + -- Fix host / device sync bug in PODVector (#2890) + + -- MinLoc and MaxLoc Support (#2885) + + -- HIP: Remove the call to hipDeviceSetSharedMemConfig (#2884) + + -- Add Frontier to GNU Make (#2879) + + -- Add option to derefine to AMRErrorTag (#2875) + + -- Fix the segmentation fault in selecting actual neighbor particles. (#2877) + + -- Workaround to bypass issue observed at very large scale with Fujitsu MPI (#2874) + `TagBoxArray::collate`: Fujitsu Clang (#2889) + + -- Allow zero components MultiFab and BaseFab (#2873) + + -- New EB optimization parameter: eb2.num_coarsen_opt (#2872) + + -- SENSEI 4.0: Fix Build for Particles (#2869) + + -- Cache the neighbor comm tags for the CPU implementation of fillNeighbors. (#2862) + + -- Remove some hard checks in check_mvmc for 3D (#2864) + + -- Carry over fix for ngbxy.smallEnd typo (#2868) + +# 22.07 + + -- Adding control APIs and namespacing for core algorithm paths like SpGEMM, SpMV, and SpTrans. (#2859) + + -- update the SENSEI in situ coupling for SENSEI v4.0.0 (#2785) + + -- Write runtime attribs to checkpoints on GPUs (#2856) + + -- Fix gnu make on Crusher for mpi_gtl_hsa (#2857) + + -- CMake: FindDependency CUDAToolkit (#2849) + + -- NERSC Programming Environment prototype (#2848) + + -- GNU Make: No need to query mpif90 if Fortran is not used. (#2852) + + -- Remove f90doc (#2851) + + -- Explicitly invoke python3 (#2850) + + -- Maintain the high end of the 'roundoff domain' in both float and double precision (#2839) + + -- add Ok to coordsys (#2844) + + -- ParamParse: Add Files at Runtime (#2842) + + -- Fix a pathological case for 2d EB (#2840) + + -- add fvolumesum to GNUmakefile (#2836) + + -- Clamp particles shifted from plo boundary against rhi, rather than back to plo (#2814) + + -- Fix: CMake NVTX not only Hypre (#2837) + + -- Update sensei CI container for sensei v4.0 integration (#2834) + + -- HIP Memory Advise : Set managed memory to coarse grain (#2835) + + -- CMake: Fix `export` with `AMReX_INSTALL=OFF` (#2838) + + -- make PODVector work with PolymorphicArenaAllocator (#2829) + + -- Re-implement FaceLinear::interp() for InterpFromCoarseLevel (#2831) + + -- Make regrid method of Amr class public (#2833) + + -- amrex::Any (#2827) + + -- Fix line integral computation (#2830) + + -- Fix a bug in multigrid grids (#2823) + + -- Add html, additional sections to README.md (#2775) + + -- Allow StateDataPhysBCFunct to operate on face-centered data (#2819) + + -- Fix Parser ODR (#2820) + + -- CMake: Cleanup old nvToolsExt (#2817) + + -- Handle the case where we don't have enough device memory for the snd_buffer (#2705) + + -- CMake: 3.17+ (#2813) + + -- Landon/fix bug ghost particles (#2812) + + -- Follow-on to 2809; update selectActualNeighbors as well. (#2810) + + -- Generalize the type of callables that can be passed into the neighbor list build function (#2809) + + -- Add AVX2 instructions flag. (#2803) + + -- Avoid M_PI because it's not in the C++ standard (#2807) + + -- In the array version of FillPatchTwoLevels, allow specifying an (#2800) + # 22.06 -- Fix solvability issue in the nodal solver RAP approach (#2783, #2801) diff --git a/Docs/sphinx_documentation/source/Basics.rst b/Docs/sphinx_documentation/source/Basics.rst index dd4e53d455e..dc3022f7e12 100644 --- a/Docs/sphinx_documentation/source/Basics.rst +++ b/Docs/sphinx_documentation/source/Basics.rst @@ -2549,7 +2549,11 @@ The basic idea behind physical boundary conditions is as follows: Reflection from interior cells with sign changed, :math:`q(-i) = -q(i)`. -- For external Dirichlet boundaries, the user needs to provide a + user_1, user_2 and user_3 + "User". It is the user's responsibility to write a routine + to fill ghost cells (more details below). + +- For external Dirichlet and user boundaries, the user needs to provide a callable object like below. .. highlight:: c++ @@ -2564,7 +2568,7 @@ The basic idea behind physical boundary conditions is as follows: const BCRec* bcr, const int bcomp, const int orig_comp) const { - // external Dirichlet for cell iv + // external Dirichlet or user BC for cell iv } }; diff --git a/Docs/sphinx_documentation/source/BuildingAMReX.rst b/Docs/sphinx_documentation/source/BuildingAMReX.rst index 7b3273bf874..331f9b8c9f6 100644 --- a/Docs/sphinx_documentation/source/BuildingAMReX.rst +++ b/Docs/sphinx_documentation/source/BuildingAMReX.rst @@ -35,8 +35,8 @@ list of important variables. +-----------------+-------------------------------------+--------------------+ | COMP | gnu, cray, ibm, intel, llvm, or pgi | none | +-----------------+-------------------------------------+--------------------+ - | CXXSTD | C++ standard (``c++14``, ``c++17``, | compiler default, | - | | ``c++20``) | at least ``c++14`` | + | CXXSTD | C++ standard (``c++17``, ``c++20``) | compiler default, | + | | | at least ``c++17`` | +-----------------+-------------------------------------+--------------------+ | DEBUG | TRUE or FALSE | FALSE | +-----------------+-------------------------------------+--------------------+ @@ -584,7 +584,7 @@ the following line in the appropriate CMakeLists.txt file: :: - target_link_libraries( AMReX:: ) + target_link_libraries( PUBLIC AMReX:: ) In the above snippet, ```` is any of the targets listed in the table below. @@ -709,7 +709,7 @@ As an example, consider the following CMake code: :: find_package(AMReX REQUIRED 3D EB) - target_link_libraries( Foo AMReX::amrex AMReX::Flags_CXX ) + target_link_libraries( Foo PUBLIC AMReX::amrex ) The code in the snippet above checks whether an AMReX installation with 3D and Embedded Boundary support is available on the system. If so, AMReX is linked to target ``Foo`` and AMReX flags preset is used @@ -740,8 +740,8 @@ The AMReX team does development on Linux machines, from laptops to supercomputer We do not officially support AMReX on Windows, and many of us do not have access to any Windows machines. However, we believe there are no fundamental issues for it to work on Windows. -(1) AMReX mostly uses standard C++14, but for Windows C++17 is required. This is because we use - C++17 to support file system operations when POSIX I/O is not available. +(1) AMReX mostly uses standard C++17. +We run continous integration tests on Windows with MSVC and Clang compilers. (2) We use POSIX signal handling when floating point exceptions, segmentation faults, etc. happen. This capability is not supported on Windows. diff --git a/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst b/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst index dd61bb254d3..3ecbc775c17 100644 --- a/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst +++ b/Docs/sphinx_documentation/source/BuildingAMReX_Chapter.rst @@ -18,7 +18,7 @@ an application code then uses its own build system and links to AMReX as an exte Finally, AMReX can also be built with CMake, as detailed in the section on :ref:`sec:build:cmake`. -AMReX requires a C++ compiler that supports the C++14 standard, a +AMReX requires a C++ compiler that supports the C++17 standard, a Fortran compiler that supports the Fortran 2003 standard, and a C compiler that supports the C99 standard. Prerequisites for building with GNU Make include Python (>= 2.7, including 3) and standard tools diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index 4101c806be2..4984b839132 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -315,7 +315,7 @@ we provide the helper function ``setup_target_for_cuda_compilation()``: setup_target_for_cuda_compilation(my_target) # Link against amrex - target_link_libraries(my_target AMReX::amrex) + target_link_libraries(my_target PUBLIC AMReX::amrex) @@ -1001,7 +1001,7 @@ launch function. ``amrex::ParallelFor()`` expands into different variations of a quadruply-nested :cpp:`for` loop depending dimensionality and whether it is being implemented on CPU or GPU. -The best way to understand this macro is to take a look at the 4D :cpp:`amrex::ParallelFor` +The best way to understand this function is to take a look at the 4D :cpp:`amrex::ParallelFor` that is implemented when ``USE_CUDA=FALSE``. A simplified version is reproduced here: .. highlight:: c++ @@ -1103,6 +1103,15 @@ bounds, a :cpp:`long` or :cpp:`int` number of elements is passed to bound the si passing the number of elements to work on and indexing the pointer to the starting element: :cpp:`p[idx + 15]`. +GPU block size +-------------- + +By default, :cpp:`ParallelFor` launches ``AMREX_GPU_MAX_THREADS`` threads +per GPU block, where ``AMREX_GPU_MAX_THREADS`` is a compile-time constant +with a default value of 256. The users can also explcitly specify the +number of threads per block by :cpp:`ParallelFor(...)`, where +``MY_BLOCK_SIZE`` is a multiple of the warp size (e.g., 128). This allows +the users to do performance tuning for individual kernels. Launching general kernels ------------------------- diff --git a/Docs/sphinx_documentation/source/LinearSolvers.rst b/Docs/sphinx_documentation/source/LinearSolvers.rst index c8743a3e8e2..d893859e7c2 100644 --- a/Docs/sphinx_documentation/source/LinearSolvers.rst +++ b/Docs/sphinx_documentation/source/LinearSolvers.rst @@ -209,8 +209,8 @@ function :: - void setDomainBC (const Array& lobc, // for lower ends - const Array& hibc); // for higher ends + void setDomainBC (const Array& lobc, // for lower ends + const Array& hibc); // for higher ends The supported BC types at the physical domain boundaries are @@ -222,6 +222,8 @@ The supported BC types at the physical domain boundaries are - :cpp:`LinOpBCType::inhomogNeumann` for inhomogeneous Neumann boundary condition. +- :cpp:`LinOpBCType::Robin` for Robin boundary conditions, :math:`a\phi + b\frac{\partial\phi}{\partial n} = f`. + - :cpp:`LinOpBCType::reflect_odd` for reflection with sign changed. 2) Cell-centered solvers only: @@ -255,12 +257,12 @@ before the solve one must always call the :cpp:`MLLinOp` member function :: virtual void setLevelBC (int amrlev, const MultiFab* levelbcdata, - const MultiFab* robinbc_a, - const MultiFab* robinbc_b, - const MultiFab* robinbc_f) = 0; + const MultiFab* robinbc_a = nullptr, + const MultiFab* robinbc_b = nullptr, + const MultiFab* robinbc_f = nullptr) = 0; -If we want to supply an inhomogeneous Dirichlet, inhomogeneous Neumann, or -Robin boundary conditions at the domain boundaries, we must supply those values +If we want to supply an inhomogeneous Dirichlet or inhomogeneous Neumann +boundary condition at the domain boundaries, we must supply those values in ``MultiFab* levelbcdata``, which must have at least one ghost cell. Note that the argument :cpp:`amrlev` is relative to the solve, not necessarily the full AMR hierarchy; amrlev = 0 refers to the coarsest @@ -286,6 +288,11 @@ Dirichlet or Neumann boundaries are assumed to be exactly on the face of the physical domain; storing these values in the ghost cell of a cell-centered array is a convenience of implementation. +For Robin boundary conditions, the ghost cells in +``MultiFab* robinbc_a``, ``MultiFab* robinbc_b``, and ``MultiFab* robinbc_f`` +store the numerical values in the condition, +:math:`a\phi + b\frac{\partial\phi}{\partial n} = f`. + .. _sec:linearsolver:pars: Parameters @@ -754,4 +761,3 @@ An example (implemented in the ``MultiComponent`` tutorial) might be: See ``amrex-tutorials/ExampleCodes/LinearSolvers/MultiComponent`` for a complete working example. .. solver reuse - diff --git a/Docs/sphinx_documentation/source/Post_Processing.rst b/Docs/sphinx_documentation/source/Post_Processing.rst index c2cce7fd7b2..fd707f221db 100644 --- a/Docs/sphinx_documentation/source/Post_Processing.rst +++ b/Docs/sphinx_documentation/source/Post_Processing.rst @@ -76,8 +76,8 @@ variable. **How to build and run** -In ``amrex/Tools/Plotfile``, type ``make`` and then ``./fextract.gnu.ex`` to run. -Typing ``./fextract.gnu.ex`` without inputs will bring up usage and options. +In ``amrex/Tools/Plotfile``, type ``make`` and then ``./fcompare.gnu.ex`` to run. +Typing ``./fcompare.gnu.ex`` without inputs will bring up usage and options. **Example** diff --git a/Docs/sphinx_documentation/source/SWFFT.rst b/Docs/sphinx_documentation/source/SWFFT.rst index 3e886dcc2a8..9e6192ff048 100644 --- a/Docs/sphinx_documentation/source/SWFFT.rst +++ b/Docs/sphinx_documentation/source/SWFFT.rst @@ -98,7 +98,7 @@ AMReX contains two SWFFT tutorials, `SWFFT Poisson`_ and `SWFFT Simple`_: .. _`SWFFT Simple`: https://amrex-codes.github.io/amrex/tutorials_html/SWFFT_Tutorial.html#swfft-simple .. [1] - https://xgitlab.cels.anl.gov/hacc/SWFFT + https://git.cels.anl.gov/hacc/SWFFT .. [2] SWFFT source code directory in AMReX: amrex/Src/Extern/SWFFT diff --git a/Docs/sphinx_documentation/source/Testing.rst b/Docs/sphinx_documentation/source/Testing.rst index b7e32c9477b..bbceae1d1ad 100644 --- a/Docs/sphinx_documentation/source/Testing.rst +++ b/Docs/sphinx_documentation/source/Testing.rst @@ -18,6 +18,7 @@ application codes that use it as a framework. We use an in-house test runner scr operation, originally developed by Michael Zingale for the Castro code, and later expanded to other application codes as well. The results for each night are collected and stored on a web page; see https://ccse.lbl.gov/pub/RegressionTesting/ for the latest set of results. +The runtime option ``amrex.abort_on_unused_inputs`` (``0`` or ``1``; default is ``0`` for false) is useful for making sure that tests always stay up to date with API changes as it will abort the application after the test run if any unused input parameters were detected. Running the test suite locally ============================== @@ -73,7 +74,7 @@ re-run the script without the :cpp:`--make_benchmarks` option: :: - python regtest.py --make_benchmarks 'generating initial benchmarks' AMReX-tests.ini + python regtest.py AMReX-tests.ini The script will generate a set of html pages in the directory specified in your :cpp:`AMReX-tests.ini` file that you can examine using the browser of your choice. diff --git a/Docs/sphinx_documentation/source/Visualization.rst b/Docs/sphinx_documentation/source/Visualization.rst index ea8b4ab8c0b..59f95f76090 100644 --- a/Docs/sphinx_documentation/source/Visualization.rst +++ b/Docs/sphinx_documentation/source/Visualization.rst @@ -873,9 +873,12 @@ and point to the CMake configuration installed with SENSEI. .. code-block:: bash - cmake -DAMReX_SENSEI=ON -DSENSEI_DIR=/lib/cmake .. + cmake -DAMReX_SENSEI=ON -DSENSEI_DIR=//cmake .. -When CMake generates the make files proceed as usual. +When CMake generates the make files proceed as usual. Note: may be +`lib` or `lib64` or something else depending on what CMake decided to use for +your particular OS. See the CMake GNUInstallDirs documentation for more +information. .. code-block:: bash @@ -952,8 +955,7 @@ dataset. Obtaining SENSEI ----------------- -SENSEI is hosted on Kitware's Gitlab site at https://gitlab.kitware.com/sensei/sensei -It's best to checkout the latest release rather than working on the master branch. +SENSEI is hosted on github at https://github.com/SENSEI-insitu/SENSEI.git To ease the burden of wrangling back end installs SENSEI provides two platforms with all dependencies pre-installed, a VirtualBox VM, and a NERSC Cori diff --git a/GNUmakefile.in b/GNUmakefile.in index 8a6ce69df09..ad6238543dc 100644 --- a/GNUmakefile.in +++ b/GNUmakefile.in @@ -19,6 +19,9 @@ ifeq ($(USE_FORTRAN_INTERFACE),TRUE) endif ifeq ($(USE_LINEAR_SOLVERS),TRUE) Pdirs += LinearSolvers/MLMG + ifeq ($(DIM),3) + Pdirs += LinearSolvers/OpenBC + endif ifeq ($(USE_FORTRAN_INTERFACE),TRUE) Pdirs += F_Interfaces/LinearSolvers endif diff --git a/INSTALL b/INSTALL index efb40fbdb2e..ed1e0dfb36e 100644 --- a/INSTALL +++ b/INSTALL @@ -10,7 +10,7 @@ There are three ways to use AMReX. Fortran modules via `./configure` followed by `make` and `make install`. Type `./configure -h` to show help message. An application code uses its build system to compile and link to the - AMReX library. Because AMReX uses C++14 and Fortran, the linker + AMReX library. Because AMReX uses C++17 and Fortran, the linker needs to link the libraries. See `Tutorials/Basic/Build_with_libamrex` for an example of this approach. Note that this approach relies the make system in diff --git a/README.md b/README.md index 72c182470e1..da3a1abcbd2 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@
-AMReX Logo +AMReX Logo

@@ -71,7 +71,7 @@ in a wide variety of other scientific simulations, some of which, can be seen in our application [gallery](https://amrex-codes.github.io/amrex/gallery.html).

## Get Help diff --git a/Src/Amr/AMReX_Amr.cpp b/Src/Amr/AMReX_Amr.cpp index 66ec4664c5a..02f0452eac9 100644 --- a/Src/Amr/AMReX_Amr.cpp +++ b/Src/Amr/AMReX_Amr.cpp @@ -910,7 +910,7 @@ Amr::writeSmallPlotFile () // Don't continue if we have no variables to plot. - if (stateSmallPlotVars().size() == 0) { + if (stateSmallPlotVars().size() == 0 && deriveSmallPlotVars().size() == 0) { return; } diff --git a/Src/Amr/AMReX_AmrLevel.H b/Src/Amr/AMReX_AmrLevel.H index 0aaf7fc2620..5034df1b5e5 100644 --- a/Src/Amr/AMReX_AmrLevel.H +++ b/Src/Amr/AMReX_AmrLevel.H @@ -15,6 +15,8 @@ #include #include #include +#include +#include #ifdef AMREX_USE_EB #include #endif @@ -152,11 +154,10 @@ public: int ncycle) = 0; /** - * \brief Contains operations to be done after a timestep. This is a - * pure virtual function and hence MUST be implemented by derived - * classes. + * \brief Contains operations to be done after a timestep. If this + * function is overridden, don't forget to reset FillPatcher. */ - virtual void post_timestep (int iteration) = 0; + virtual void post_timestep (int iteration); /** * \brief Contains operations to be done only after a full coarse * timestep. The default implementation does nothing. @@ -243,12 +244,14 @@ public: Long countCells () const noexcept; //! Get the area not to tag. - const BoxArray& getAreaNotToTag() noexcept; - const Box& getAreaToTag() noexcept; + const BoxArray& getAreaNotToTag () noexcept; + const Box& getAreaToTag () noexcept; //! Construct the area not to tag. - void constructAreaNotToTag(); + void constructAreaNotToTag (); //! Set the area not to tag. - void setAreaNotToTag(BoxArray& ba) noexcept; + void setAreaNotToTag (BoxArray& ba) noexcept; + + void resetFillPatcher (); /** * \brief Error estimation for regridding. This is a pure virtual @@ -365,6 +368,20 @@ public: virtual void particle_redistribute (int /*lbase*/ = 0, bool /*a_init*/ = false) {;} #endif + /** + * \brief Fill with FillPatcher on level > 0 and AmrLevel::FillPatch on level 0. + * + * \param mf destination MultiFab + * \param dcomp starting component for the destination + * \param ncomp number of component to fill + * \param nghost number of ghost cells to fill + * \param time time + * \param state_index StateData index + * \param scomp starting component in the StateData + */ + void FillPatcherFill (amrex::MultiFab& mf, int dcomp, int ncomp, int nghost, + amrex::Real time, int state_index, int scomp); + static void FillPatch (AmrLevel& amrlevel, MultiFab& leveldata, int boxGrow, @@ -380,8 +397,33 @@ public: Real time, int index, int scomp, - int ncomp, - int dcomp=0); + int ncomp, + int dcomp=0); + + /** + * \brief Evolve one step with Runge-Kutta (2, 3, or 4) + * + * To use RK, the StateData must have all the ghost cells needed. See + * namespace RungeKutta for expected function signatures of the callable + * parameters. + * + * \param order order of RK + * \param state_type index of StateData + * \param time time at the beginning of the step. + * \param dt time step + * \param iteration iteration number on fine level during a coarse time + * step. For an AMR simulation with subcycling and a + * refinement ratio of 2, the number is either 1 or 2, + * denoting the first and second substep, respectively. + * \param ncycle number of subcyling steps. It's usually 2 or 4. + * Without subcycling, this will be 1. + * \param f computing right-hand side for evolving the StateData. + * One can also register data for flux registers in this. + * \param p optionally post-processing RK stage results + */ + template + void RK (int order, int state_type, Real time, Real dt, int iteration, + int ncycle, F&& f, P&& p = RungeKutta::PostStageNoOp()); #ifdef AMREX_USE_EB static void SetEBMaxGrowCells (int nbasic, int nvolume, int nfull) noexcept { @@ -425,7 +467,7 @@ protected: IntVect fine_ratio; // Refinement ratio to finer level. static DeriveList derive_lst; // List of derived quantities. static DescriptorList desc_lst; // List of state variables. - Vector state; // Array of state data. + Vector state; // Array of state data. BoxArray m_AreaNotToTag; //Area which shouldn't be tagged on this level. Box m_AreaToTag; //Area which is allowed to be tagged on this level. @@ -436,8 +478,18 @@ protected: std::unique_ptr > m_factory; + Vector>> m_fillpatcher; + private: + template + void storeRKCoarseData (int state_type, Real time, Real dt, + MultiFab const& S_old, + Array const& rkk); + + void FillRKPatch (int state_index, MultiFab& S, Real time, + int stage, int iteration, int ncycle); + mutable BoxArray edge_grids[AMREX_SPACEDIM]; // face-centered grids mutable BoxArray nodal_grids; // all nodal grids }; @@ -558,6 +610,74 @@ private: std::map< int,Vector< Vector< Vector > > > m_fbid; // [grid][level][fillablesubbox][oldnew] }; +template +void AmrLevel::RK (int order, int state_type, Real time, Real dt, int iteration, + int ncycle, F&& f, P&& p) +{ + BL_PROFILE("AmrLevel::RK()"); + + AMREX_ASSERT(AmrLevel::desc_lst[state_type].nExtra() > 0); // Need ghost cells in StateData + + MultiFab& S_old = get_old_data(state_type); + MultiFab& S_new = get_new_data(state_type); + const Real t_old = state[state_type].prevTime(); + const Real t_new = state[state_type].curTime(); + AMREX_ALWAYS_ASSERT(amrex::almostEqual(time,t_old) && amrex::almostEqual(time+dt,t_new)); + + if (order == 2) { + RungeKutta::RK2(S_old, S_new, time, dt, std::forward(f), + [&] (int /*stage*/, MultiFab& mf, Real t) { + FillPatcherFill(mf, 0, mf.nComp(), mf.nGrow(), t, + state_type, 0); }, + std::forward

(p)); + } else if (order == 3) { + RungeKutta::RK3(S_old, S_new, time, dt, std::forward(f), + [&] (int stage, MultiFab& mf, Real t) { + FillRKPatch(state_type, mf, t, stage, iteration, ncycle); + }, + [&] (Array const& rkk) { + if (level < parent->finestLevel()) { + storeRKCoarseData(state_type, time, dt, S_old, rkk); + } + }, + std::forward

(p)); + } else if (order == 4) { + RungeKutta::RK4(S_old, S_new, time, dt, std::forward(f), + [&] (int stage, MultiFab& mf, Real t) { + FillRKPatch(state_type, mf, t, stage, iteration, ncycle); + }, + [&] (Array const& rkk) { + if (level < parent->finestLevel()) { + storeRKCoarseData(state_type, time, dt, S_old, rkk); + } + }, + std::forward

(p)); + } else { + amrex::Abort("AmrLevel::RK: order = "+std::to_string(order)+" is not supported"); + } +} + +template +void AmrLevel::storeRKCoarseData (int state_type, Real time, Real dt, + MultiFab const& S_old, + Array const& rkk) +{ + if (level == parent->finestLevel()) { return; } + + const StateDescriptor& desc = AmrLevel::desc_lst[state_type]; + + auto& fillpatcher = parent->getLevel(level+1).m_fillpatcher[state_type]; + fillpatcher = std::make_unique> + (parent->boxArray(level+1), parent->DistributionMap(level+1), + parent->Geom(level+1), + parent->boxArray(level), parent->DistributionMap(level), + parent->Geom(level), + IntVect(desc.nExtra()), desc.nComp(), desc.interp(0)); + + fillpatcher->storeRKCoarseData(time, dt, S_old, rkk); +} + + } #endif /*_AmrLevel_H_*/ diff --git a/Src/Amr/AMReX_AmrLevel.cpp b/Src/Amr/AMReX_AmrLevel.cpp index a88489f9512..c10a1e6277b 100644 --- a/Src/Amr/AMReX_AmrLevel.cpp +++ b/Src/Amr/AMReX_AmrLevel.cpp @@ -31,6 +31,14 @@ EBSupport AmrLevel::m_eb_support_level = EBSupport::volume; DescriptorList AmrLevel::desc_lst; DeriveList AmrLevel::derive_lst; +void +AmrLevel::post_timestep (int /*iteration*/) +{ + if (level < parent->finestLevel()) { + parent->getLevel(level+1).resetFillPatcher(); + } +} + void AmrLevel::postCoarseTimeStep (Real time) { @@ -102,6 +110,7 @@ AmrLevel::AmrLevel (Amr& papa, } state.resize(desc_lst.size()); + m_fillpatcher.resize(desc_lst.size()); #ifdef AMREX_USE_EB if (EB2::TopIndexSpaceIfPresent()) { @@ -451,6 +460,8 @@ AmrLevel::restart (Amr& papa, } } + m_fillpatcher.resize(ndesc); + if (parent->useFixedCoarseGrids()) constructAreaNotToTag(); post_step_regrid = 0; @@ -2096,6 +2107,63 @@ void AmrLevel::constructAreaNotToTag () } } +void +AmrLevel::resetFillPatcher () +{ + for (auto& fp : m_fillpatcher) { + fp.reset(); + } +} + +void +AmrLevel::FillPatcherFill (MultiFab& mf, int dcomp, int ncomp, int nghost, + Real time, int state_index, int scomp) +{ + if (level == 0) { + FillPatch(*this, mf, nghost, time, state_index, scomp, ncomp, dcomp); + } else { + AmrLevel& fine_level = *this; + AmrLevel& crse_level = parent->getLevel(level-1); + const Geometry& geom_fine = fine_level.geom; + const Geometry& geom_crse = crse_level.geom; + + Vector smf_crse; + Vector stime_crse; + StateData& statedata_crse = crse_level.state[state_index]; + statedata_crse.getData(smf_crse,stime_crse,time); + StateDataPhysBCFunct physbcf_crse(statedata_crse,scomp,geom_crse); + + Vector smf_fine; + Vector stime_fine; + StateData& statedata_fine = fine_level.state[state_index]; + statedata_fine.getData(smf_fine,stime_fine,time); + StateDataPhysBCFunct physbcf_fine(statedata_fine,scomp,geom_fine); + + const StateDescriptor& desc = AmrLevel::desc_lst[state_index]; + + if (level > 1 &&!amrex::ProperlyNested(fine_level.crse_ratio, + parent->blockingFactor(fine_level.level), + nghost, mf.ixType(), + desc.interp(scomp))) { + amrex::Abort("FillPatcherFill: Grids are not properly nested. Must increase blocking factor."); + } + + auto& fillpatcher = m_fillpatcher[state_index]; + if (fillpatcher == nullptr) { + fillpatcher = std::make_unique> + (parent->boxArray(level), parent->DistributionMap(level), geom_fine, + parent->boxArray(level-1), parent->DistributionMap(level-1), geom_crse, + IntVect(nghost), desc.nComp(), desc.interp(scomp)); + } + + fillpatcher->fill(mf, IntVect(nghost), time, + smf_crse, stime_crse, smf_fine, stime_fine, + scomp, dcomp, ncomp, + physbcf_crse, scomp, physbcf_fine, scomp, + desc.getBCs(), scomp); + } +} + void AmrLevel::FillPatch (AmrLevel& amrlevel, MultiFab& leveldata, @@ -2163,4 +2231,23 @@ AmrLevel::CreateLevelDirectory (const std::string &dir) levelDirectoryCreated = true; } +void +AmrLevel::FillRKPatch (int state_index, MultiFab& S, Real time, + int stage, int iteration, int ncycle) +{ + StateDataPhysBCFunct physbcf(state[state_index], 0, geom); + + if (level == 0) { + S.FillBoundary(geom.periodicity()); + physbcf(S, 0, S.nComp(), S.nGrowVect(), time, 0); + } else { + auto& crse_level = parent->getLevel(level-1); + StateDataPhysBCFunct physbcf_crse(crse_level.state[state_index], 0, + crse_level.geom); + auto& fillpatcher = m_fillpatcher[state_index]; + fillpatcher->fillRK(stage, iteration, ncycle, S, time, physbcf_crse, + physbcf, AmrLevel::desc_lst[state_index].getBCs()); + } +} + } diff --git a/Src/Amr/AMReX_Derive.H b/Src/Amr/AMReX_Derive.H index 2a7c2e26713..7d5b32d7aa6 100644 --- a/Src/Amr/AMReX_Derive.H +++ b/Src/Amr/AMReX_Derive.H @@ -84,9 +84,9 @@ extern "C" const int* level, const int* grid_no) ; } -typedef void (*DeriveFuncFab) (const amrex::Box& bx, amrex::FArrayBox& derfab, int dcomp, int ncomp, - const amrex::FArrayBox& datafab, const amrex::Geometry& geomdata, - amrex::Real time, const int* bcrec, int level); + typedef std::function DeriveFuncFab; class DescriptorList; diff --git a/Src/Amr/AMReX_StateDescriptor.cpp b/Src/Amr/AMReX_StateDescriptor.cpp index 932479feeb2..1910dcf7b3f 100644 --- a/Src/Amr/AMReX_StateDescriptor.cpp +++ b/Src/Amr/AMReX_StateDescriptor.cpp @@ -42,23 +42,31 @@ StateDescriptor::BndryFunc::operator () (Real* data,const int* lo,const int* hi, { BL_ASSERT(m_func != 0 || m_func3D != 0); +#ifdef AMREX_USE_OMP bool thread_safe = bf_thread_safety(lo, hi, dom_lo, dom_hi, a_bc, 1); if (thread_safe) { - if (m_func != 0) - m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); - } else { +#endif + { + if (m_func != 0) { + m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } #ifdef AMREX_USE_OMP + } else { #pragma omp critical (bndryfunc) -#endif - if (m_func != 0) - m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + { + if (m_func != 0) { + m_func(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_func3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } } +#endif } void @@ -69,23 +77,32 @@ StateDescriptor::BndryFunc::operator () (Real* data,const int* lo,const int* hi, { BL_ASSERT(m_gfunc != 0 || m_gfunc3D != 0); + amrex::ignore_unused(ng); +#ifdef AMREX_USE_OMP bool thread_safe = bf_thread_safety(lo, hi, dom_lo, dom_hi, a_bc, ng); if (thread_safe) { - if (m_gfunc != 0) - m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); - } else { +#endif + { + if (m_gfunc != 0) { + m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } #ifdef AMREX_USE_OMP + } else { #pragma omp critical (bndryfunc) -#endif - if (m_gfunc != 0) - m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); - else - m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), - AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + { + if (m_gfunc != 0) { + m_gfunc(data,AMREX_ARLIM(lo),AMREX_ARLIM(hi),dom_lo,dom_hi,dx,grd_lo,time,a_bc); + } else { + m_gfunc3D(data,AMREX_ARLIM_3D(lo),AMREX_ARLIM_3D(hi),AMREX_ARLIM_3D(dom_lo),AMREX_ARLIM_3D(dom_hi), + AMREX_ZFILL(dx),AMREX_ZFILL(grd_lo),time,a_bc); + } + } } +#endif } void diff --git a/Src/AmrCore/AMReX_ErrorList.H b/Src/AmrCore/AMReX_ErrorList.H index 90f49b02749..1cc8d61fd07 100644 --- a/Src/AmrCore/AMReX_ErrorList.H +++ b/Src/AmrCore/AMReX_ErrorList.H @@ -383,6 +383,7 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); Real m_min_time = std::numeric_limits::lowest(); Real m_max_time = std::numeric_limits::max(); int m_volume_weighting = 0; + int m_derefine = 0; RealBox m_realbox; AMRErrorTagInfo& SetMaxLevel (int max_level) noexcept { @@ -405,6 +406,10 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); m_volume_weighting = volume_weighting; return *this; } + AMRErrorTagInfo& SetDerefine (int derefine) noexcept { + m_derefine = derefine; + return *this; + } }; class AMRErrorTag @@ -415,6 +420,8 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); struct UserFunc { + virtual ~UserFunc () {} + virtual void operator() (const amrex::Box& bx, amrex::Array4 const& dat, amrex::Array4 const& tag, @@ -465,6 +472,8 @@ std::ostream& operator << (std::ostream& os, const ErrorList& elst); const AMRErrorTagInfo& info = AMRErrorTagInfo()) noexcept : m_userfunc(userfunc), m_field(field), m_info(info), m_ngrow(ngrow) {} + virtual ~AMRErrorTag () {} + virtual void operator() (amrex::TagBoxArray& tb, const amrex::MultiFab* mf, char clearval, diff --git a/Src/AmrCore/AMReX_ErrorList.cpp b/Src/AmrCore/AMReX_ErrorList.cpp index 1594ba740a9..6dcb5565227 100644 --- a/Src/AmrCore/AMReX_ErrorList.cpp +++ b/Src/AmrCore/AMReX_ErrorList.cpp @@ -293,80 +293,225 @@ AMRErrorTag::operator() (TagBoxArray& tba, auto threshold = m_value[level]; auto const volume_weighting = m_info.m_volume_weighting; auto geomdata = geom.data(); + auto tag_update = tagval; + if (m_info.m_derefine) { + tag_update = clearval; + } + if (m_test == GRAD) { - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + auto const& flag = flags[bi]; + + Real ax = 0.; Real ay = 0.; + if (flag(i,j,k).isConnected(1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(-1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + } + if (flag(i,j,k).isConnected(0,1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,-1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + } +#if AMREX_SPACEDIM > 2 + Real az = 0.; + if (flag(i,j,k).isConnected(0,0,1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,0,-1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); + } +#endif + if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold) { + tagma[bi](i,j,k) = tag_update; + } + }); + } else +#endif { - auto const& dat = datma[bi]; - auto ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); - ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + + Real ax = 0.; + ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); #if AMREX_SPACEDIM == 1 - if (ax >= threshold) { tagma[bi](i,j,k) = tagval;} + if (ax >= threshold) { tagma[bi](i,j,k) = tag_update;} #else - auto ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); - ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + Real ay = 0.; + ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); #if AMREX_SPACEDIM > 2 - auto az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); - az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); -#endif - if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold) { - tagma[bi](i,j,k) = tagval; - } -#endif - }); + Real az = 0.; + az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); +#endif // DIM > 2 + if (amrex::max(AMREX_D_DECL(ax,ay,az)) >= threshold) { + tagma[bi](i,j,k) = tag_update; + } +#endif // DIM > 1 + }); + } } else if (m_test == RELGRAD) { - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + auto const& flag = flags[bi]; + + Real ax = 0.; Real ay = 0.; + + if (flag(i,j,k).isConnected(1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(-1,0,0)) { + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + } + if (flag(i,j,k).isConnected(0,1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,-1,0)) { + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + } +#if AMREX_SPACEDIM > 2 + Real az = 0.; + if (flag(i,j,k).isConnected(0,0,1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k))); + } + if (flag(i,j,k).isConnected(0,0,-1)) { + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); + } +#endif // DIM > 2 + if (amrex::max(AMREX_D_DECL(ax,ay,az)) + >= threshold * amrex::Math::abs(dat(i,j,k))) { + tagma[bi](i,j,k) = tag_update; + } + }); + } else +#endif { - auto const& dat = datma[bi]; - auto ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); - ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& dat = datma[bi]; + + Real ax = amrex::Math::abs(dat(i+1,j,k) - dat(i,j,k)); + ax = amrex::max(ax,amrex::Math::abs(dat(i,j,k) - dat(i-1,j,k))); #if AMREX_SPACEDIM == 1 - if (ax >= threshold * amrex::Math::abs(dat(i,j,k))) { tagma[bi](i,j,k) = tagval;} + if (ax >= threshold * amrex::Math::abs(dat(i,j,k))) { tagma[bi](i,j,k) = tag_update;} #else - auto ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); - ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); + Real ay = amrex::Math::abs(dat(i,j+1,k) - dat(i,j,k)); + ay = amrex::max(ay,amrex::Math::abs(dat(i,j,k) - dat(i,j-1,k))); #if AMREX_SPACEDIM > 2 - auto az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); - az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); -#endif - if (amrex::max(AMREX_D_DECL(ax,ay,az)) - >= threshold * amrex::Math::abs(dat(i,j,k))) { - tagma[bi](i,j,k) = tagval; - } -#endif - }); + Real az = amrex::Math::abs(dat(i,j,k+1) - dat(i,j,k)); + az = amrex::max(az,amrex::Math::abs(dat(i,j,k) - dat(i,j,k-1))); +#endif // DIM > 2 + if (amrex::max(AMREX_D_DECL(ax,ay,az)) + >= threshold * amrex::Math::abs(dat(i,j,k))) { + tagma[bi](i,j,k) = tag_update; + } +#endif // DIM > 1 + }); + } } else if (m_test == LESS) { - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; + auto const& flag = flags[bi]; + if (!flag(i,j,k).isCovered()) { + if (datma[bi](i,j,k) * vol <= threshold) { + tagma[bi](i,j,k) = tag_update; + } + } + }); + } else +#endif { + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept { Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; if (datma[bi](i,j,k) * vol <= threshold) { - tagma[bi](i,j,k) = tagval; + tagma[bi](i,j,k) = tag_update; } }); + } } else if (m_test == GREATER) { +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; + auto const& flag = flags[bi]; + if (!flag(i,j,k).isCovered()) { + if (datma[bi](i,j,k) * vol >= threshold) { + tagma[bi](i,j,k) = tag_update; + } + } + }); + } else +#endif ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept { Real vol = volume_weighting ? Geometry::Volume(IntVect{AMREX_D_DECL(i,j,k)}, geomdata) : 1.0_rt; - if (datma[bi](i,j,k) * vol >= threshold) { - tagma[bi](i,j,k) = tagval; - } + if (datma[bi](i,j,k) * vol >= threshold) { + tagma[bi](i,j,k) = tag_update; + } }); } else if (m_test == VORT) { const Real fac = threshold * Real(std::pow(2,level)); - ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept +#ifdef AMREX_USE_EB + if (mf->hasEBFabFactory()) { + auto const& ebfact = + dynamic_cast(mf->Factory()); + auto const& flags = ebfact.getMultiEBCellFlagFab().arrays(); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + auto const& flag = flags[bi]; + if (!flag(i,j,k).isCovered()) { + if (datma[bi](i,j,k) >= fac) { + tagma[bi](i,j,k) = tag_update; + } + } + }); + } else +#endif { - if (datma[bi](i,j,k) >= fac) { - tagma[bi](i,j,k) = tagval; - } - }); + ParallelFor(tba, [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + if (datma[bi](i,j,k) >= fac) { + tagma[bi](i,j,k) = tag_update; + } + }); + } } else { diff --git a/Src/AmrCore/AMReX_FillPatchUtil.H b/Src/AmrCore/AMReX_FillPatchUtil.H index 51a5f457391..495cbc180b6 100644 --- a/Src/AmrCore/AMReX_FillPatchUtil.H +++ b/Src/AmrCore/AMReX_FillPatchUtil.H @@ -28,12 +28,17 @@ namespace amrex { - template + template struct NullInterpHook { - void operator() (FAB& /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} + template ::value,int> = 0> + void operator() (MFFAB& /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} - void operator() (Array /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} + template ::value,int> = 0> + void operator() (Array /*fab*/, const Box& /*bx*/, int /*icomp*/, int /*ncomp*/) const {} + + template ::value,int> = 0> + void operator() (MFFAB& /*mf*/, int /*icomp*/, int /*ncomp*/) const {} }; template diff --git a/Src/AmrCore/AMReX_FillPatchUtil_I.H b/Src/AmrCore/AMReX_FillPatchUtil_I.H index 8d8f210a0fe..3e94abfad27 100644 --- a/Src/AmrCore/AMReX_FillPatchUtil_I.H +++ b/Src/AmrCore/AMReX_FillPatchUtil_I.H @@ -4,6 +4,31 @@ namespace amrex { +namespace detail { + +template +auto call_interp_hook (F const& f, MF& mf, int icomp, int ncomp) + -> decltype(f(mf[0],Box(),icomp,ncomp)) +{ +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(mf); mfi.isValid(); ++mfi) { + auto& dfab = mf[mfi]; + const Box& dbx = dfab.box(); + f(dfab, dbx, icomp, ncomp); + } +} + +template +auto call_interp_hook (F const& f, MF& mf, int icomp, int ncomp) + -> decltype(f(mf,icomp,ncomp)) +{ + f(mf, icomp, ncomp); +} + +} + template bool ProperlyNested (const IntVect& ratio, const IntVect& blocking_factor, int ngrow, const IndexType& boxType, Interp* mapper) @@ -459,9 +484,6 @@ namespace { if ( ! fpc.ba_crse_patch.empty()) { - - using FAB = typename MF::FABType::value_type; - MF mf_crse_patch = make_mf_crse_patch (fpc, ncomp, mf.boxArray().ixType()); // Must make sure fine exists under needed coarse faces. // It stores values for the final (interior) interpolation, @@ -491,20 +513,12 @@ namespace { solve_mask.setVal(1); // Values to solve. solve_mask.setVal(0, mask_cpc, 0, 1); // Known values. - for (MFIter mfi(mf_refined_patch); mfi.isValid(); ++mfi) - { - FAB& sfab = mf_crse_patch[mfi]; - pre_interp(sfab, sfab.box(), 0, ncomp); - } + detail::call_interp_hook(pre_interp, mf_crse_patch, 0, ncomp); InterpFace(mapper, mf_crse_patch, 0, mf_refined_patch, 0, ncomp, ratio, solve_mask, cgeom, fgeom, bcscomp, RunOn::Gpu, bcs); - for (MFIter mfi(mf_refined_patch); mfi.isValid(); ++mfi) - { - FAB& dfab = mf_refined_patch[mfi]; - post_interp(dfab, dfab.box(), 0, ncomp); - } + detail::call_interp_hook(post_interp, mf_refined_patch, 0, ncomp); bool aliasing = false; for (auto const& fmf_a : fmf) { @@ -538,30 +552,14 @@ namespace { MF mf_fine_patch = make_mf_fine_patch(fpc, ncomp); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(mf_crse_patch); mfi.isValid(); ++mfi) - { - auto& sfab = mf_crse_patch[mfi]; - const Box& sbx = sfab.box(); - pre_interp(sfab, sbx, 0, ncomp); - } + detail::call_interp_hook(pre_interp, mf_crse_patch, 0, ncomp); FillPatchInterp(mf_fine_patch, 0, mf_crse_patch, 0, ncomp, IntVect(0), cgeom, fgeom, amrex::grow(amrex::convert(fgeom.Domain(),mf.ixType()),nghost), ratio, mapper, bcs, bcscomp); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(mf_fine_patch); mfi.isValid(); ++mfi) - { - auto& dfab = mf_fine_patch[mfi]; - const Box& dbx = dfab.box(); - post_interp(dfab, dbx, 0, ncomp); - } + detail::call_interp_hook(post_interp, mf_fine_patch, 0, ncomp); mf.ParallelCopy(mf_fine_patch, 0, dcomp, ncomp, IntVect{0}, nghost); } @@ -1024,14 +1022,7 @@ InterpFromCoarseLevel (MF& mf, IntVect const& nghost, Real time, cbc(mf_crse_patch, 0, ncomp, mf_crse_patch.nGrowVect(), time, cbccomp); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(mf_crse_patch); mfi.isValid(); ++mfi) - { - FAB& sfab = mf_crse_patch[mfi]; - pre_interp(sfab, sfab.box(), 0, ncomp); - } + detail::call_interp_hook(pre_interp, mf_crse_patch, 0, ncomp); FillPatchInterp(mf, dcomp, mf_crse_patch, 0, ncomp, nghost, cgeom, fgeom, fdomain_g, ratio, mapper, bcs, bcscomp); diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H new file mode 100644 index 00000000000..d0e775416ee --- /dev/null +++ b/Src/AmrCore/AMReX_FillPatcher.H @@ -0,0 +1,585 @@ +#ifndef AMREX_FILLPATCHER_H_ +#define AMREX_FILLPATCHER_H_ +#include + +#include + +namespace amrex { + +/** + * \brief FillPatcher is for filling a fine level MultiFab/FabArray. + * + * This class is not as general as the FillPatchTwoLevels functions. It + * fills the fine ghost cells not overlapping any fine level valid cells + * with interpolation of the coarse data. Then it fills the fine ghost + * cells overlapping fine level valid cells with the fine level data. If + * the valid cells of the destination need to be filled, it will be done as + * well. Finally, it will fill the physical bounbary using the user + * provided functor. The `fill` member function can be used to do the + * operations just described. Alternatively, one can also use the + * `fillCoarseFineBounary` to fill the ghost cells at the coarse/fine + * boundary only. Then one can manually call FillBoundary to fill the other + * ghost cells, and use the physical BC functor to handle the physical + * boundeary. + * + * The communication of the coarse data needed for spatial interpolation is + * optimized at the cost of being error-prone. One must follow the + * following guidelines. + * + * (1) This class is for filling data during time stepping, not during + * regrid. The fine level data passed as input must have the same BoxArray + * and DistributionMapping as the destination. It's OK they are the same + * MultiFab. For AmrLevel based codes, AmrLevel::FillPatcherFill wil try to + * use FillPatcher if it can, and AmrLevel::FillPatch will use the fillpatch + * functions. + * + * (2) When to build? It is recommended that one uses `std::unique_ptr` to + * store the FillPatcher object, and build it only when it is needed and + * it's a nullptr. For AmrLevel based codes, the AmrLevel class will build + * it for you as needed when you call the AmrLevel::FillPatcherFill + * function. + * + * (3) When to destroy? Usually, we do time steppig on a coarse level + * first. Then we recursively do time stepping on fine levels. After the + * finer level finishes, we do reflux and averge the fine data down to the + * coarse level. After that we should destroy the FillPatcher object + * associated with these two levels, because the coarse data stored in the + * object has become outdated. For AmrCore based codes, you could use + * Tests/Amr/Advection_AmrCore as an example. For AmrLevel based codes, you + * should do this in the post_timestep virtual function (see + * Tests/Amr/Advection_AmrLevel for an example). + * + * (4) The source MultiFabs/FabArrays (i.e., the crse_data and fine_data + * arguments of the fill function) need to have exactly the same number of + * components as the ncomp argument of the constructor, even though it's + * allowed to fill only some of the components with the fill function. + * + * (5) This only works for cell-centered and nodal data. + * + * This class also provides support for RungeKutta::RK3 and RungeKutta::RK4. + * The storeRKCoarseData function can be used to store coarse AMR level + * data that are needed for filling fine level data's ghost cells in this + * class. The `fillRK` function can be used to fill ghost cells for fine + * AMR levels. This operation at the coarse/fine boundary is non-trivial + * for RK orders higher than 2. Note that it is expected that time stepping + * on the coarse level is perform before any fine level time stepping, and + * it's the user's reponsibility to properly create and destroy this object. + * See AmrLevel::RK for an example of using the RungeKutta functions and + * FillPatcher together. + */ + +template +class FillPatcher +{ +public: + + /** + * \brief Constructor of FillPatcher + * + * \param fba fine level BoxArray + * \param fdm fine level DistributionMapping + * \param fgeom fine level Geometry + * \param cba coarse level BoxArray + * \param cdm coarse level DistributionMapping + * \param cgeom coarse level Geometry + * \param nghost max number of ghost cells to be filled at coarse/fine boundary + * \param ncomp the number of components + * \param interp for spatial interpolation + * \param eb_index_space optional argument for specifying EB IndexSpace + */ + FillPatcher (BoxArray const& fba, DistributionMapping const& fdm, + Geometry const& fgeom, + BoxArray const& cba, DistributionMapping const& cdm, + Geometry const& cgeom, + IntVect const& nghost, int ncomp, InterpBase* interp, +#ifdef AMREX_USE_EB + EB2::IndexSpace const* eb_index_space = EB2::TopIndexSpaceIfPresent()); +#else + EB2::IndexSpace const* eb_index_space = nullptr); +#endif + + /** + * \brief Function to fill data + * + * \param mf destination MultiFab/FabArray + * \param nghost number of ghost cells to fill. This must be <= what's + * provided to the constructor + * \param time time associated with the destination + * \param crse_data coarse level data + * \param crse_time time associated with the coarse data + * \param fine_data fine level data + * \param fine_time time associated with the fine data + * \param scomp starting component of the source + * \param dcomp starting component of the destination + * \param ncomp the number of components to fill + * \param cbc for filling coarse level physical BC + * \param cbccomp starting component of the coarse level BC functor + * \param fbc for filling fine level physical BC + * \param fbccomp starting component of the fine level BC functor + * \param bcs BCRec specifying physical boundary types + * \parame bcscomp starting component of the BCRec Vector. + * \param pre_interp optional pre-interpolation hook for modifying the coarse data + * \param post_interp optional post-interpolation hook for modifying the fine data + */ + template , + typename PostInterpHook=NullInterpHook > + void fill (MF& mf, IntVect const& nghost, Real time, + Vector const& crse_data, Vector const& crse_time, + Vector const& fine_data, Vector const& fine_time, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, BC& fbc, int fbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp = {}, + PostInterpHook const& post_interp = {}); + + /** + * \brief Function to fill data at coarse/fine boundary only + * + * \param mf destination MultiFab/FabArray + * \param nghost number of ghost cells to fill. This must be <= what's + * provided to the constructor + * \param time time associated with the destination + * \param crse_data coarse level data + * \param crse_time time associated with the coarse data + * \param scomp starting component of the source + * \param dcomp starting component of the destination + * \param ncomp the number of components to fill + * \param cbc for filling coarse level physical BC + * \param cbccomp starting component of the coarse level BC functor + * \param bcs BCRec specifying physical boundary types + * \param bcscomp starting component of the BCRec Vector. + * \param pre_interp optional pre-interpolation hook for modifying the coarse data + * \param post_interp optional post-interpolation hook for modifying the fine data + */ + template , + typename PostInterpHook=NullInterpHook > + void fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real time, + Vector const& crse_data, + Vector const& crse_time, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp = {}, + PostInterpHook const& post_interp = {}); + + /** + * \brief Store coarse AMR level data for RK3 and RK4 + * + * \tparam order RK order. Must be 3 or 4. + * \param time time at the beginning of the step + * \param dt time step + * \param S_old data at time + * \param RK_k right-hand side at RK stages + */ + template + void storeRKCoarseData (Real time, Real dt, MF const& S_old, + Array const& RK_k); + + /** + * \brief Fill ghost cells of fine AMR level for RK3 and RK4 + * + * \param stage RK stage number starting from 1 + * \param iteration iteration number on fine level during a coarse time + * step. For an AMR simulation with subcycling and a + * refinement ratio of 2, the number is either 1 or 2, + * denoting the first and second substep, respectively. + * \param ncycle number of subcyling steps. It's usually 2 or 4. + * Without subcycling, this will be 1. + * \param cbc filling physical boundary on coarse level + * \param fbc filling physical boundary on fine level + * \param bcs physical BC types + */ + template + void fillRK (int stage, int iteration, int ncycle, MF& mf, Real time, + BC& cbc, BC& fbc, Vector const& bcs); + +private: + + BoxArray m_fba; + BoxArray m_cba; + DistributionMapping m_fdm; + DistributionMapping m_cdm; + Geometry m_fgeom; + Geometry m_cgeom; + IntVect m_nghost; + int m_ncomp; + InterpBase* m_interp; + EB2::IndexSpace const* m_eb_index_space = nullptr; + MF m_sfine; + IntVect m_ratio; + Vector>> m_cf_crse_data; + std::unique_ptr m_cf_crse_data_tmp; + std::unique_ptr m_cf_fine_data; + Real m_dt_coarse = std::numeric_limits::lowest(); + + FabArrayBase::FPinfo const& getFPinfo (); +}; + +template +FillPatcher::FillPatcher (BoxArray const& fba, DistributionMapping const& fdm, + Geometry const& fgeom, + BoxArray const& cba, DistributionMapping const& cdm, + Geometry const& cgeom, + IntVect const& nghost, int ncomp, InterpBase* interp, + EB2::IndexSpace const* eb_index_space) + : m_fba(fba), + m_cba(cba), + m_fdm(fdm), + m_cdm(cdm), + m_fgeom(fgeom), + m_cgeom(cgeom), + m_nghost(nghost), + m_ncomp(ncomp), + m_interp(interp), + m_eb_index_space(eb_index_space), + m_sfine(fba, fdm, 1, nghost, MFInfo().SetAlloc(false)) +{ + static_assert(IsFabArray::value, + "FillPatcher: MF must be FabArray type"); + AMREX_ALWAYS_ASSERT(m_fba.ixType().cellCentered() || m_fba.ixType().nodeCentered()); + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + m_ratio[idim] = m_fgeom.Domain().length(idim) / m_cgeom.Domain().length(idim); + } + AMREX_ASSERT(m_fgeom.Domain() == amrex::refine(m_cgeom.Domain(),m_ratio)); +} + +template +template +void +FillPatcher::fill (MF& mf, IntVect const& nghost, Real time, + Vector const& cmf, Vector const& ct, + Vector const& fmf, Vector const& ft, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, + BC& fbc, int fbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp, + PostInterpHook const& post_interp) +{ + BL_PROFILE("FillPatcher::fill()"); + + AMREX_ALWAYS_ASSERT(m_fba == fmf[0]->boxArray() && + m_fdm == fmf[0]->DistributionMap()); + + fillCoarseFineBoundary(mf, nghost, time, cmf, ct, scomp, dcomp, ncomp, + cbc, cbccomp, bcs, bcscomp, pre_interp, post_interp); + + FillPatchSingleLevel(mf, nghost, time, fmf, ft, scomp, dcomp, ncomp, + m_fgeom, fbc, fbccomp); +} + +template +FabArrayBase::FPinfo const& +FillPatcher::getFPinfo () +{ + const InterpolaterBoxCoarsener& coarsener = m_interp->BoxCoarsener(m_ratio); + return FabArrayBase::TheFPinfo(m_sfine, m_sfine, m_nghost, coarsener, + m_fgeom, m_cgeom, m_eb_index_space); +} + +template +template +void +FillPatcher::fillCoarseFineBoundary (MF& mf, IntVect const& nghost, Real time, + Vector const& cmf, + Vector const& ct, + int scomp, int dcomp, int ncomp, + BC& cbc, int cbccomp, + Vector const& bcs, int bcscomp, + PreInterpHook const& pre_interp, + PostInterpHook const& post_interp) +{ + BL_PROFILE("FillPatcher::fillCFB"); + + AMREX_ALWAYS_ASSERT(nghost.allLE(m_nghost) && + m_fba == mf.boxArray() && + m_fdm == mf.DistributionMap() && + m_cba == cmf[0]->boxArray() && + m_cdm == cmf[0]->DistributionMap() && + m_ncomp >= ncomp && + m_ncomp == cmf[0]->nComp()); + + auto const& fpc = getFPinfo(); + + if ( ! fpc.ba_crse_patch.empty()) + { + if (m_cf_fine_data == nullptr) { + m_cf_fine_data = std::make_unique + (make_mf_fine_patch(fpc, m_ncomp)); + } + + int ncmfs = cmf.size(); + for (int icmf = 0; icmf < ncmfs; ++icmf) { + Real t = ct[icmf]; + auto it = std::find_if(m_cf_crse_data.begin(), m_cf_crse_data.end(), + [=] (auto const& x) { + return amrex::almostEqual(x.first,t,5); + }); + + if (it == std::end(m_cf_crse_data)) { + MF mf_crse_patch = make_mf_crse_patch(fpc, m_ncomp); + mf_crse_patch.ParallelCopy(*cmf[icmf], m_cgeom.periodicity()); + + std::pair> tmp; + tmp.first = t; + tmp.second = std::make_unique(std::move(mf_crse_patch)); + m_cf_crse_data.push_back(std::move(tmp)); + } + } + + if (m_cf_crse_data_tmp == nullptr) { + m_cf_crse_data_tmp = std::make_unique + (make_mf_crse_patch(fpc, m_ncomp)); + } + + if (m_cf_crse_data.size() > 0 && + amrex::almostEqual(time, m_cf_crse_data[0].first,5)) + { + amrex::Copy(*m_cf_crse_data_tmp, *m_cf_crse_data[0].second, + scomp, 0, ncomp, 0); + } + else if (m_cf_crse_data.size() > 1 && + amrex::almostEqual(time, m_cf_crse_data[1].first,5)) + { + amrex::Copy(*m_cf_crse_data_tmp, *m_cf_crse_data[1].second, + scomp, 0, ncomp, 0); + } + else if (m_cf_crse_data.size() == 2) + { + int const ng_space_interp = 8; // Need to be big enough + Box domain = m_cgeom.growPeriodicDomain(ng_space_interp); + domain.convert(mf.ixType()); + Real t0 = m_cf_crse_data[0].first; + Real t1 = m_cf_crse_data[1].first; + Real alpha = (t1-time)/(t1-t0); + Real beta = (time-t0)/(t1-t0); + AMREX_ASSERT(alpha >= 0._rt && beta >= 0._rt); + auto const& a = m_cf_crse_data_tmp->arrays(); + auto const& a0 = m_cf_crse_data[0].second->const_arrays(); + auto const& a1 = m_cf_crse_data[1].second->const_arrays(); + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + if (domain.contains(i,j,k)) { + a[bi](i,j,k,n) + = alpha*a0[bi](i,j,k,scomp+n) + + beta*a1[bi](i,j,k,scomp+n); + } + }); + Gpu::streamSynchronize(); + } + else + { + amrex::Abort("FillPatcher: High order interpolation in time not supported. Or FillPatcher was not properly deleted."); + } + + cbc(*m_cf_crse_data_tmp, 0, ncomp, nghost, time, cbccomp); + + detail::call_interp_hook(pre_interp, *m_cf_crse_data_tmp, 0, ncomp); + + FillPatchInterp(*m_cf_fine_data, scomp, *m_cf_crse_data_tmp, 0, + ncomp, IntVect(0), m_cgeom, m_fgeom, + amrex::grow(amrex::convert(m_fgeom.Domain(), + mf.ixType()),nghost), + m_ratio, m_interp, bcs, bcscomp); + + detail::call_interp_hook(post_interp, *m_cf_fine_data, scomp, ncomp); + + mf.ParallelCopy(*m_cf_fine_data, scomp, dcomp, ncomp, IntVect{0}, nghost); + } +} + +template +template +void FillPatcher::storeRKCoarseData (Real /*time*/, Real dt, MF const& S_old, + Array const& RK_k) +{ + m_dt_coarse = dt; + m_cf_crse_data.resize(order+1); + + auto const& fpc = getFPinfo(); + + for (auto& tmf : m_cf_crse_data) { + tmf.first = std::numeric_limits::lowest(); // because we dont' need it + tmf.second = std::make_unique(make_mf_crse_patch(fpc, m_ncomp)); + } + m_cf_crse_data[0].second->ParallelCopy(S_old, m_cgeom.periodicity()); + for (std::size_t i = 0; i < order; ++i) { + m_cf_crse_data[i+1].second->ParallelCopy(RK_k[i], m_cgeom.periodicity()); + } +} + +template +template +void FillPatcher::fillRK (int stage, int iteration, int ncycle, + MF& mf, Real time, BC& cbc, BC& fbc, + Vector const& bcs) +{ + int rk_order = m_cf_crse_data.size()-1; + if (rk_order != 3 && rk_order != 4) { + amrex::Abort("FillPatcher: unsupported RK order "+std::to_string(rk_order)); + return; + } + AMREX_ASSERT(stage > 0 && stage <= rk_order); + + auto const& fpc = getFPinfo(); + if (m_cf_crse_data_tmp == nullptr) { + m_cf_crse_data_tmp = std::make_unique + (make_mf_crse_patch(fpc, m_ncomp)); + } + + auto const& u = m_cf_crse_data_tmp->arrays(); + auto const& u0 = m_cf_crse_data[0].second->const_arrays(); + auto const& k1 = m_cf_crse_data[1].second->const_arrays(); + auto const& k2 = m_cf_crse_data[2].second->const_arrays(); + auto const& k3 = m_cf_crse_data[3].second->const_arrays(); + + Real dtc = m_dt_coarse; + Real r = Real(1) / Real(ncycle); + Real xsi = Real(iteration-1) / Real(ncycle); + + if (rk_order == 3) { + // coefficients for U + Real b1 = xsi - Real(5./6.)*xsi*xsi; + Real b2 = Real(1./6.)*xsi*xsi; + Real b3 = Real(2./3)*xsi*xsi; + // coefficients for Ut + Real c1 = Real(1.) - Real(5./3.)*xsi; + Real c2 = Real(1./3.)*xsi; + Real c3 = Real(4./3.)*xsi; + // coefficients for Utt + constexpr Real d1 = Real(-5./3.); + constexpr Real d2 = Real(1./3.); + constexpr Real d3 = Real(4./3.); + if (stage == 1) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*uu; + }); + } else if (stage == 2) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + Real ut = c1*kk1 + c2*kk2 + c3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*(uu + r*ut); + }); + } else if (stage == 3) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3; + Real ut = c1*kk1 + c2*kk2 + c3*kk3; + Real utt = d1*kk1 + d2*kk2 + d3*kk3; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc* + (uu + Real(0.5)*r*ut + Real(0.25)*r*r*utt); + }); + } + } else if (rk_order == 4) { + auto const& k4 = m_cf_crse_data[4].second->const_arrays(); + Real xsi2 = xsi*xsi; + Real xsi3 = xsi2*xsi; + // coefficients for U + Real b1 = xsi - Real(1.5)*xsi2 + Real(2./3.)*xsi3; + Real b2 = xsi2 - Real(2./3.)*xsi3; + Real b3 = b2; + Real b4 = Real(-0.5)*xsi2 + Real(2./3.)*xsi3; + // coefficients for Ut + Real c1 = Real(1.) - Real(3.)*xsi + Real(2.)*xsi2; + Real c2 = Real(2.)*xsi - Real(2.)*xsi2; + Real c3 = c2; + Real c4 = -xsi + Real(2.)*xsi2; + // coefficients for Utt + Real d1 = Real(-3.) + Real(4.)*xsi; + Real d2 = Real( 2.) - Real(4.)*xsi; + Real d3 = d2; + Real d4 = Real(-1.) + Real(4.)*xsi; + // coefficients for Uttt + constexpr Real e1 = Real( 4.); + constexpr Real e2 = Real(-4.); + constexpr Real e3 = Real(-4.); + constexpr Real e4 = Real( 4.); + if (stage == 1) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*uu; + }); + } else if (stage == 2) { + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + Real ut = c1*kk1 + c2*kk2 + c3*kk3 + c4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc*(uu + Real(0.5)*r*ut); + }); + } else if (stage == 3 || stage == 4) { + Real r2 = r*r; + Real r3 = r2*r; + Real at = (stage == 3) ? Real(0.5)*r : r; + Real att = (stage == 3) ? Real(0.25)*r2 : Real(0.5)*r2; + Real attt = (stage == 3) ? Real(0.0625)*r3 : Real(0.125)*r3; + Real akk = (stage == 3) ? Real(-4.) : Real(4.); + amrex::ParallelFor(*m_cf_crse_data_tmp, IntVect(0), m_ncomp, + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k, int n) noexcept + { + Real kk1 = k1[bi](i,j,k,n); + Real kk2 = k2[bi](i,j,k,n); + Real kk3 = k3[bi](i,j,k,n); + Real kk4 = k4[bi](i,j,k,n); + Real uu = b1*kk1 + b2*kk2 + b3*kk3 + b4*kk4; + Real ut = c1*kk1 + c2*kk2 + c3*kk3 + c4*kk4; + Real utt = d1*kk1 + d2*kk2 + d3*kk3 + d4*kk4; + Real uttt = e1*kk1 + e2*kk2 + e3*kk3 + e4*kk4; + u[bi](i,j,k,n) = u0[bi](i,j,k,n) + dtc * + (uu + at*ut + att*utt + attt*(uttt+akk*(kk3-kk2))); + }); + } + } + Gpu::streamSynchronize(); + + cbc(*m_cf_crse_data_tmp, 0, m_ncomp, m_nghost, time, 0); + + if (m_cf_fine_data == nullptr) { + m_cf_fine_data = std::make_unique(make_mf_fine_patch(fpc, m_ncomp)); + } + + FillPatchInterp(*m_cf_fine_data, 0, *m_cf_crse_data_tmp, 0, + m_ncomp, IntVect(0), m_cgeom, m_fgeom, + amrex::grow(amrex::convert(m_fgeom.Domain(), + mf.ixType()),m_nghost), + m_ratio, m_interp, bcs, 0); + + // xxxxx We can optimize away this ParallelCopy by making a special fpinfo. + mf.ParallelCopy(*m_cf_fine_data, 0, 0, m_ncomp, IntVect(0), m_nghost); + + mf.FillBoundary(m_fgeom.periodicity()); + fbc(mf, 0, m_ncomp, m_nghost, time, 0); +} + +} + +#endif diff --git a/Src/AmrCore/AMReX_Interp_C.H b/Src/AmrCore/AMReX_Interp_C.H index e12c4495fde..967d3aaa177 100644 --- a/Src/AmrCore/AMReX_Interp_C.H +++ b/Src/AmrCore/AMReX_Interp_C.H @@ -135,5 +135,53 @@ face_linear_interp_z (int i, int j, int k, int n, amrex::Array4 con } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cell_quartic_interp_x (int i, int j, int k, int n, Array4 const& fine, + Array4 const& crse) noexcept +{ + constexpr Array1D c = {Real(0.01708984), Real(-0.12304688), + Real(0.92285156), Real(0.20507812), + Real(-0.02197266)}; + int ii = amrex::coarsen(i,2); + int s = 2*(i-ii*2) - 1; // if i == ii*2, s = -1; if i == ii*2+1, s = 1; + fine(i,j,k,n) = c(-2*s)*crse(ii-2,j,k,n) + + c( -s)*crse(ii-1,j,k,n) + + c( 0)*crse(ii ,j,k,n) + + c( s)*crse(ii+1,j,k,n) + + c( 2*s)*crse(ii+2,j,k,n); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cell_quartic_interp_y (int i, int j, int k, int n, Array4 const& fine, + Array4 const& crse) noexcept +{ + constexpr Array1D c = {Real(0.01708984), Real(-0.12304688), + Real(0.92285156), Real(0.20507812), + Real(-0.02197266)}; + int jj = amrex::coarsen(j,2); + int s = 2*(j-jj*2) - 1; // if j == jj*2, s = -1; if j == jj*2+1, s = 1; + fine(i,j,k,n) = c(-2*s)*crse(i,jj-2,k,n) + + c( -s)*crse(i,jj-1,k,n) + + c( 0)*crse(i,jj ,k,n) + + c( s)*crse(i,jj+1,k,n) + + c( 2*s)*crse(i,jj+2,k,n); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void cell_quartic_interp_z (int i, int j, int k, int n, Array4 const& fine, + Array4 const& crse) noexcept +{ + constexpr Array1D c = {Real(0.01708984), Real(-0.12304688), + Real(0.92285156), Real(0.20507812), + Real(-0.02197266)}; + int kk = amrex::coarsen(k,2); + int s = 2*(k-kk*2) - 1; // if k == kk*2, s = -1; if k == kk*2+1, s = 1; + fine(i,j,k,n) = c(-2*s)*crse(i,j,kk-2,n) + + c( -s)*crse(i,j,kk-1,n) + + c( 0)*crse(i,j,kk ,n) + + c( s)*crse(i,j,kk+1,n) + + c( 2*s)*crse(i,j,kk+2,n); +} + } #endif diff --git a/Src/AmrCore/AMReX_Interpolater.H b/Src/AmrCore/AMReX_Interpolater.H index 06398b73097..bdb6cf9d46b 100644 --- a/Src/AmrCore/AMReX_Interpolater.H +++ b/Src/AmrCore/AMReX_Interpolater.H @@ -844,6 +844,74 @@ public: }; +/** +* \brief Quartic interpolation on cell centered data. +* +* Quartic interpolation on cell centered data. +*/ + +class CellQuartic + : + public Interpolater +{ +public: + + /** + * \brief The constructor. + */ + explicit CellQuartic (); + + /** + * \brief The destructor. + */ + virtual ~CellQuartic () override; + + /** + * \brief Returns coarsened box given fine box and refinement ratio. + * + * \param fine + * \param ratio + */ + virtual Box CoarseBox (const Box& fine, int ratio) override; + + /** + * \brief Returns coarsened box given fine box and refinement ratio. + * + * \param fine + * \param ratio + */ + virtual Box CoarseBox (const Box& fine, const IntVect& ratio) override; + + /** + * \brief Coarse to fine interpolation in space. + * + * \param crse + * \param crse_comp + * \param fine + * \param fine_comp + * \param ncomp + * \param fine_region + * \param ratio + * \param crse_geom + * \param fine_geom + * \param bcr + * \param actual_comp + * \param actual_state + */ + virtual void interp (const FArrayBox& crse, + int crse_comp, + FArrayBox& fine, + int fine_comp, + int ncomp, + const Box& fine_region, + const IntVect& ratio, + const Geometry& crse_geom, + const Geometry& fine_geom, + Vector const& bcr, + int actual_comp, + int actual_state, + RunOn gpu_or_cpu) override; +}; //! CONSTRUCT A GLOBAL OBJECT OF EACH VERSION. extern AMREX_EXPORT PCInterp pc_interp; @@ -856,6 +924,7 @@ extern AMREX_EXPORT CellBilinear cell_bilinear_interp; extern AMREX_EXPORT CellConservativeProtected protected_interp; extern AMREX_EXPORT CellConservativeQuartic quartic_interp; extern AMREX_EXPORT CellQuadratic quadratic_interp; +extern AMREX_EXPORT CellQuartic cell_quartic_interp; } diff --git a/Src/AmrCore/AMReX_Interpolater.cpp b/Src/AmrCore/AMReX_Interpolater.cpp index a78eac89aa0..8042aa2f322 100644 --- a/Src/AmrCore/AMReX_Interpolater.cpp +++ b/Src/AmrCore/AMReX_Interpolater.cpp @@ -18,6 +18,8 @@ namespace amrex { * * CellQuadratic only works in 2D and 3D on cpu and gpu. * + * CellQuartic works in 1D, 2D and 3D on cpu and gpu with ref ratio of 2 + * * CellConservativeQuartic only works with ref ratio of 2 on cpu and gpu. * * FaceDivFree works in 2D and 3D on cpu and gpu. @@ -37,6 +39,7 @@ CellConservativeProtected protected_interp; CellConservativeQuartic quartic_interp; CellBilinear cell_bilinear_interp; CellQuadratic quadratic_interp; +CellQuartic cell_quartic_interp; NodeBilinear::~NodeBilinear () {} @@ -988,4 +991,94 @@ FaceDivFree::interp_arr (Array const& crse, }); } +CellQuartic::CellQuartic () {} + +CellQuartic::~CellQuartic () {} + +Box +CellQuartic::CoarseBox (const Box& fine, const IntVect& ratio) +{ + Box crse = amrex::coarsen(fine,ratio); + crse.grow(2); + return crse; +} + +Box +CellQuartic::CoarseBox (const Box& fine, int ratio) +{ + Box crse = amrex::coarsen(fine,ratio); + crse.grow(2); + return crse; +} + +void +CellQuartic::interp (const FArrayBox& crse, + int crse_comp, + FArrayBox& fine, + int fine_comp, + int ncomp, + const Box& fine_region, + const IntVect& ratio, + const Geometry& /*crse_geom*/, + const Geometry& /*fine_geom*/, + Vector const& /*bcr*/, + int /* actual_comp */, + int /* actual_state */, + RunOn runon) +{ + BL_PROFILE("CellQuartic::interp()"); + amrex::ignore_unused(ratio); + AMREX_ASSERT(ratio == 2); + + Box target_fine_region = fine_region & fine.box(); + + bool run_on_gpu = (runon == RunOn::Gpu && Gpu::inLaunchRegion()); + amrex::ignore_unused(run_on_gpu); + + Array4 const& crsearr = crse.const_array(crse_comp); + Array4 const& finearr = fine.array(fine_comp); + +#if (AMREX_SPACEDIM == 3) + Box bz = amrex::coarsen(target_fine_region, IntVect(2,2,1)); + bz.grow(IntVect(2,2,0)); + FArrayBox tmpz(bz, ncomp); + Elixir tmpz_eli; + if (run_on_gpu) tmpz_eli = tmpz.elixir(); + Array4 const& tmpzarr = tmpz.array(); + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon, bz, ncomp, i, j, k, n, + { + cell_quartic_interp_z(i,j,k,n,tmpzarr,crsearr); + }); +#endif + +#if (AMREX_SPACEDIM >= 2) + Box by = amrex::coarsen(target_fine_region, IntVect(AMREX_D_DECL(2,1,1))); + by.grow(IntVect(AMREX_D_DECL(2,0,0))); + FArrayBox tmpy(by, ncomp); + Elixir tmpy_eli; + if (run_on_gpu) tmpy_eli = tmpy.elixir(); + Array4 const& tmpyarr = tmpy.array(); +#if (AMREX_SPACEDIM == 2) + Array4 srcarr = crsearr; +#else + Array4 srcarr = tmpz.const_array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon, by, ncomp, i, j, k, n, + { + cell_quartic_interp_y(i,j,k,n,tmpyarr,srcarr); + }); +#endif + +#if (AMREX_SPACEDIM == 1) + Array4 srcarr = crsearr; +#else + srcarr = tmpy.const_array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon, target_fine_region, ncomp, + i, j, k, n, + { + cell_quartic_interp_x(i,j,k,n,finearr,srcarr); + }); +} + } diff --git a/Src/AmrCore/AMReX_MFInterp_1D_C.H b/Src/AmrCore/AMReX_MFInterp_1D_C.H index 37751acc3b9..8fcadec5794 100644 --- a/Src/AmrCore/AMReX_MFInterp_1D_C.H +++ b/Src/AmrCore/AMReX_MFInterp_1D_C.H @@ -149,9 +149,10 @@ void mf_cell_cons_lin_interp_sph (int i, int ns, Array4 const& fine, int f + xoff * slope(ic,0,0,ns); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mf_cell_bilin_interp (int i, int, int, int n, Array4 const& fine, int fcomp, - Array4 const& crse, int ccomp, IntVect const& ratio) noexcept +void mf_cell_bilin_interp (int i, int, int, int n, Array4 const& fine, int fcomp, + Array4 const& crse, int ccomp, IntVect const& ratio) noexcept { int ic = amrex::coarsen(i,ratio[0]); int ioff = i - ic*ratio[0]; diff --git a/Src/AmrCore/AMReX_MFInterp_2D_C.H b/Src/AmrCore/AMReX_MFInterp_2D_C.H index c505ef2655c..e02084e2e8e 100644 --- a/Src/AmrCore/AMReX_MFInterp_2D_C.H +++ b/Src/AmrCore/AMReX_MFInterp_2D_C.H @@ -189,9 +189,10 @@ void mf_cell_cons_lin_interp_rz (int i, int j, int ns, Array4 const& fine, + yoff * slope(ic,jc,0,ns+ncomp); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mf_cell_bilin_interp (int i, int j, int, int n, Array4 const& fine, int fcomp, - Array4 const& crse, int ccomp, IntVect const& ratio) noexcept +void mf_cell_bilin_interp (int i, int j, int, int n, Array4 const& fine, int fcomp, + Array4 const& crse, int ccomp, IntVect const& ratio) noexcept { int ic = amrex::coarsen(i,ratio[0]); int jc = amrex::coarsen(j,ratio[1]); diff --git a/Src/AmrCore/AMReX_MFInterp_3D_C.H b/Src/AmrCore/AMReX_MFInterp_3D_C.H index dc0da5dba40..17d14ff689b 100644 --- a/Src/AmrCore/AMReX_MFInterp_3D_C.H +++ b/Src/AmrCore/AMReX_MFInterp_3D_C.H @@ -128,9 +128,10 @@ void mf_cell_cons_lin_interp (int i, int j, int k, int ns, Array4 const& f + zoff * slope(ic,jc,kc,ns+ncomp*2); } +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mf_cell_bilin_interp (int i, int j, int k, int n, Array4 const& fine, int fcomp, - Array4 const& crse, int ccomp, IntVect const& ratio) noexcept +void mf_cell_bilin_interp (int i, int j, int k, int n, Array4 const& fine, int fcomp, + Array4 const& crse, int ccomp, IntVect const& ratio) noexcept { int ic = amrex::coarsen(i,ratio[0]); int jc = amrex::coarsen(j,ratio[1]); diff --git a/Src/AmrCore/AMReX_TagBox.cpp b/Src/AmrCore/AMReX_TagBox.cpp index 6a989ffbbf1..3ec7425e283 100644 --- a/Src/AmrCore/AMReX_TagBox.cpp +++ b/Src/AmrCore/AMReX_TagBox.cpp @@ -441,7 +441,7 @@ TagBoxArray::local_collate_gpu (Gpu::PinnedVector& v) const std::partial_sum(nblocks.begin(), nblocks.end(), blockoffset.begin()+1); int ntotblocks = blockoffset.back(); - PODVector > dv_ntags(ntotblocks); + Gpu::NonManagedDeviceVector dv_ntags(ntotblocks); for (MFIter fai(*this); fai.isValid(); ++fai) { @@ -491,21 +491,21 @@ TagBoxArray::local_collate_gpu (Gpu::PinnedVector& v) const #endif } - PODVector > hv_ntags(ntotblocks); + Gpu::PinnedVector hv_ntags(ntotblocks); Gpu::dtoh_memcpy(hv_ntags.data(), dv_ntags.data(), ntotblocks*sizeof(int)); - PODVector > hv_tags_offset(ntotblocks+1); + Gpu::PinnedVector hv_tags_offset(ntotblocks+1); hv_tags_offset[0] = 0; std::partial_sum(hv_ntags.begin(), hv_ntags.end(), hv_tags_offset.begin()+1); int ntotaltags = hv_tags_offset.back(); if (ntotaltags == 0) return; - PODVector > dv_tags_offset(ntotblocks); + Gpu::NonManagedDeviceVector dv_tags_offset(ntotblocks); int* dp_tags_offset = dv_tags_offset.data(); Gpu::htod_memcpy_async(dp_tags_offset, hv_tags_offset.data(), ntotblocks*sizeof(int)); - PODVector > dv_tags(ntotaltags); + Gpu::NonManagedDeviceVector dv_tags(ntotaltags); IntVect* dp_tags = dv_tags.data(); int iblock = 0; @@ -649,7 +649,24 @@ TagBoxArray::collate (Gpu::PinnedVector& TheGlobalCollateSpace) const // const IntVect* psend = (count > 0) ? TheLocalCollateSpace.data() : nullptr; IntVect* precv = TheGlobalCollateSpace.data(); + + // Issues have been observed with the following call at very large scale when using + // FujitsuMPI. The issue seems to be related to the use of MPI_Datatype. We can + // bypasses the issue by exchanging simpler integer arrays. +#if !(defined(__FUJITSU) || defined(__CLANG_FUJITSU)) ParallelDescriptor::Gatherv(psend, count, precv, countvec, offset, IOProcNumber); +#else + const int* psend_int = psend->begin(); + int* precv_int = precv->begin(); + Long count_int = count * AMREX_SPACEDIM; + auto countvec_int = std::vector(countvec.size()); + auto offset_int = std::vector(offset.size()); + const auto mul_funct = [](const auto el){return el*AMREX_SPACEDIM;}; + std::transform(countvec.begin(), countvec.end(), countvec_int.begin(), mul_funct); + std::transform(offset.begin(), offset.end(), offset_int.begin(), mul_funct); + ParallelDescriptor::Gatherv( + psend_int, count_int, precv_int, countvec_int, offset_int, IOProcNumber); +#endif #else TheGlobalCollateSpace = std::move(TheLocalCollateSpace); diff --git a/Src/AmrCore/CMakeLists.txt b/Src/AmrCore/CMakeLists.txt index f9ff24f243b..be7c87eee4f 100644 --- a/Src/AmrCore/CMakeLists.txt +++ b/Src/AmrCore/CMakeLists.txt @@ -12,6 +12,7 @@ target_sources(amrex AMReX_FluxRegister.cpp AMReX_FillPatchUtil.H AMReX_FillPatchUtil_I.H + AMReX_FillPatcher.H AMReX_FluxRegister.H AMReX_InterpBase.H AMReX_InterpBase.cpp diff --git a/Src/AmrCore/Make.package b/Src/AmrCore/Make.package index 5b3afa61ccb..df3c2e83d40 100644 --- a/Src/AmrCore/Make.package +++ b/Src/AmrCore/Make.package @@ -6,6 +6,8 @@ CEXE_sources += AMReX_AmrCore.cpp AMReX_Cluster.cpp AMReX_ErrorList.cpp AMReX_Fi AMReX_Interpolater.cpp AMReX_MFInterpolater.cpp AMReX_TagBox.cpp AMReX_AmrMesh.cpp \ AMReX_InterpBase.cpp +CEXE_headers += AMReX_FillPatcher.H + CEXE_headers += AMReX_Interp_C.H AMReX_Interp_$(DIM)D_C.H CEXE_headers += AMReX_MFInterp_C.H AMReX_MFInterp_$(DIM)D_C.H diff --git a/Src/Base/AMReX.H b/Src/Base/AMReX.H index e02280f3e3b..91f8fc43b7c 100644 --- a/Src/Base/AMReX.H +++ b/Src/Base/AMReX.H @@ -271,7 +271,7 @@ namespace amrex private: - static std::vector > m_instance; + static AMREX_EXPORT std::vector > m_instance; Geometry* m_geom = nullptr; }; diff --git a/Src/Base/AMReX.cpp b/Src/Base/AMReX.cpp index f06806babcd..76488bf81e0 100644 --- a/Src/Base/AMReX.cpp +++ b/Src/Base/AMReX.cpp @@ -123,6 +123,11 @@ namespace { #ifdef AMREX_USE_HYPRE namespace { int init_hypre = 1; +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + int hypre_spgemm_use_vendor = 0; + int hypre_spmv_use_vendor = 0; + int hypre_sptrans_use_vendor = 0; +#endif } #endif @@ -489,6 +494,11 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, #ifdef AMREX_USE_HYPRE pp.queryAdd("init_hypre", init_hypre); +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + pp.queryAdd("hypre_spgemm_use_vendor", hypre_spgemm_use_vendor); + pp.queryAdd("hypre_spmv_use_vendor", hypre_spmv_use_vendor); + pp.queryAdd("hypre_sptrans_use_vendor", hypre_sptrans_use_vendor); +#endif #endif } @@ -526,7 +536,7 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, #ifdef AMREX_USE_HYPRE if (init_hypre) { HYPRE_Init(); -#ifdef HYPRE_USING_CUDA +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) #if defined(HYPRE_RELEASE_NUMBER) && (HYPRE_RELEASE_NUMBER >= 22400) @@ -541,9 +551,13 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse, HYPRE_SetGPUMemoryPoolSize( mempool_bin_growth, mempool_min_bin, mempool_max_bin, mempool_max_cached_bytes ); #endif - /* This API below used to be HYPRE_SetSpGemmUseCusparse(). This was changed in commit - Hypre master commit dfdd1cd12f */ - HYPRE_SetSpGemmUseVendor(false); +#if (HYPRE_RELEASE_NUMBER >= 22500) + HYPRE_SetSpGemmUseVendor(hypre_spgemm_use_vendor); + HYPRE_SetSpMVUseVendor(hypre_spmv_use_vendor); + HYPRE_SetSpTransUseVendor(hypre_sptrans_use_vendor); +#elif (HYPRE_USING_CUDA) + HYPRE_SetSpGemmUseCusparse(hypre_spgemm_use_vendor); +#endif HYPRE_SetMemoryLocation(HYPRE_MEMORY_DEVICE); HYPRE_SetExecutionPolicy(HYPRE_EXEC_DEVICE); HYPRE_SetUseGpuRand(true); diff --git a/Src/Base/AMReX_Algorithm.H b/Src/Base/AMReX_Algorithm.H index b5a5f4973c7..65a5f8cb763 100644 --- a/Src/Base/AMReX_Algorithm.H +++ b/Src/Base/AMReX_Algorithm.H @@ -145,7 +145,7 @@ namespace amrex AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE I bisect (T const* d, I lo, I hi, T const& v) { while (lo <= hi) { - int mid = (lo+hi)/2; + int mid = lo + (hi-lo)/2; if (v >= d[mid] && v < d[mid+1]) { return mid; } else if (v < d[mid]) { @@ -157,6 +157,57 @@ namespace amrex return hi; } + template + AMREX_GPU_HOST_DEVICE + ItType upper_bound (ItType first, ItType last, const ValType& val) + { +#if AMREX_DEVICE_COMPILE + std::ptrdiff_t count = last-first; + while(count>0){ + auto it = first; + const auto step = count/2; + it += step; + if (!(val < *it)){ + first = ++it; + count -= step + 1; + } + else{ + count = step; + } + } + + return first; +#else + return std::upper_bound(first, last, val); +#endif + } + + template + AMREX_GPU_HOST_DEVICE + ItType lower_bound (ItType first, ItType last, const ValType& val) + { +#ifdef AMREX_DEVICE_COMPILE + std::ptrdiff_t count = last-first; + while(count>0) + { + auto it = first; + const auto step = count/2; + it += step; + if (*it < val){ + first = ++it; + count -= step + 1; + } + else{ + count = step; + } + } + + return first; +#else + return std::lower_bound(first, last, val); +#endif + } + namespace detail { struct clzll_tag {}; diff --git a/Src/Base/AMReX_Any.H b/Src/Base/AMReX_Any.H index b57aa9a39ef..2c7d9688d36 100644 --- a/Src/Base/AMReX_Any.H +++ b/Src/Base/AMReX_Any.H @@ -48,11 +48,25 @@ public: //! Returns a reference to the contained object. template - MF& get () { return dynamic_cast&>(*m_ptr).m_mf; } + MF& get () { + if (auto p0 = dynamic_cast*>(m_ptr.get())) { + return p0->m_mf; + } else { + return dynamic_cast&>(*m_ptr).m_mf; + } + } //! Returns a const reference to the contained object. template - MF const& get () const { return dynamic_cast const&>(*m_ptr).m_mf; } + MF const& get () const { + if (auto p0 = dynamic_cast*>(m_ptr.get())) { + return p0->m_mf; + } else if (auto p1 = dynamic_cast*>(m_ptr.get())) { + return p1->m_mf; + } else { + return dynamic_cast const&>(*m_ptr).m_mf; + } + } template bool is () const { return m_ptr->Type() == typeid(MF); } @@ -60,15 +74,18 @@ public: private: struct innards_base { virtual const std::type_info& Type () const = 0; + virtual ~innards_base () = default; }; template struct innards : innards_base { - innards(MF && mf) + innards (MF && mf) : m_mf(std::forward(mf)) {} + virtual ~innards () = default; + virtual const std::type_info& Type () const override { return typeid(MF); } diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp index c14fced3872..f7a46dc25c8 100644 --- a/Src/Base/AMReX_Arena.cpp +++ b/Src/Base/AMReX_Arena.cpp @@ -14,11 +14,11 @@ ///#include //#define AMREX_MLOCK(x,y) VirtualLock(x,y) //#define AMREX_MUNLOCK(x,y) VirtualUnlock(x,y) -#define AMREX_MLOCK(x,y) ((void)0) +//#define AMREX_MLOCK(x,y) ((void)0) #define AMREX_MUNLOCK(x,y) ((void)0) #else #include -#define AMREX_MLOCK(x,y) mlock(x,y) +//#define AMREX_MLOCK(x,y) mlock(x,y) #define AMREX_MUNLOCK(x,y) munlock(x,y) #endif @@ -132,19 +132,21 @@ Arena::allocate_system (std::size_t nbytes) if (arena_info.use_cpu_memory) { p = std::malloc(nbytes); +#ifndef _WIN32 #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #endif - if (p && arena_info.device_use_hostalloc) AMREX_MLOCK(p, nbytes); + if (p && (nbytes > 0) && arena_info.device_use_hostalloc) mlock(p, nbytes); #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop +#endif #endif } else if (arena_info.device_use_hostalloc) { AMREX_HIP_OR_CUDA_OR_DPCPP( - AMREX_HIP_SAFE_CALL (hipHostMalloc(&p, nbytes, hipHostMallocMapped));, + AMREX_HIP_SAFE_CALL (hipHostMalloc(&p, nbytes, hipHostMallocMapped|hipHostMallocNonCoherent));, AMREX_CUDA_SAFE_CALL(cudaHostAlloc(&p, nbytes, cudaHostAllocMapped));, p = sycl::malloc_host(nbytes, Gpu::Device::syclContext())); } @@ -190,7 +192,16 @@ Arena::allocate_system (std::size_t nbytes) } #else p = std::malloc(nbytes); - if (p && arena_info.device_use_hostalloc) AMREX_MLOCK(p, nbytes); +#ifndef _WIN32 +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + if (p && (nbytes > 0) && arena_info.device_use_hostalloc) mlock(p, nbytes); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif +#endif #endif if (p == nullptr) amrex::Abort("Sorry, malloc failed"); return p; @@ -253,12 +264,13 @@ Arena::Initialize () if (initialized) return; initialized = true; - BL_ASSERT(the_arena == nullptr); + // see reason on allowed reuse of the default CPU BArena in Arena::Finalize + BL_ASSERT(the_arena == nullptr || the_arena == The_BArena()); BL_ASSERT(the_async_arena == nullptr); - BL_ASSERT(the_device_arena == nullptr); - BL_ASSERT(the_managed_arena == nullptr); + BL_ASSERT(the_device_arena == nullptr || the_device_arena == The_BArena()); + BL_ASSERT(the_managed_arena == nullptr || the_managed_arena == The_BArena()); BL_ASSERT(the_pinned_arena == nullptr); - BL_ASSERT(the_cpu_arena == nullptr); + BL_ASSERT(the_cpu_arena == nullptr || the_cpu_arena == The_BArena()); #ifdef AMREX_USE_GPU #ifdef AMREX_USE_DPCPP @@ -304,7 +316,7 @@ Arena::Initialize () the_async_arena = new PArena(the_async_arena_release_threshold); #ifdef AMREX_USE_GPU - if (the_arena->isDevice() || the_arena->isManaged()) { + if (the_arena->isDevice()) { the_device_arena = the_arena; } else { the_device_arena = new CArena(0, ArenaInfo{}.SetDeviceMemory().SetReleaseThreshold @@ -468,6 +480,13 @@ Arena::Finalize () initialized = false; + // we reset Arenas unless they are the default "CPU malloc/free" BArena + // this is because we want to allow users to free their UB objects + // that they forgot to destruct after amrex::Finalize(): + // amrex::Initialize(...); + // MultiFab mf(...); // this should be scoped in { ... } + // amrex::Finalize(); + // mf cannot be used now, but it can at least be freed without a segfault if (!dynamic_cast(the_device_arena)) { if (the_device_arena != the_arena) { delete the_device_arena; diff --git a/Src/Base/AMReX_Array4.H b/Src/Base/AMReX_Array4.H index 0fc4c049437..296762614d3 100644 --- a/Src/Base/AMReX_Array4.H +++ b/Src/Base/AMReX_Array4.H @@ -11,6 +11,50 @@ namespace amrex { + template + struct CellData // Data in a single cell + { + T* AMREX_RESTRICT p = nullptr; + Long stride = 0; + int ncomp = 0; + + AMREX_GPU_HOST_DEVICE + constexpr CellData (T* a_p, Long a_stride, int a_ncomp) + : p(a_p), stride(a_stride), ncomp(a_ncomp) + {} + + template ::value,int> = 0> + AMREX_GPU_HOST_DEVICE + constexpr CellData (CellData::type> const& rhs) noexcept + : p(rhs.p), stride(rhs.stride), ncomp(rhs.ncomp) + {} + + AMREX_GPU_HOST_DEVICE + explicit operator bool() const noexcept { return p != nullptr; } + + AMREX_GPU_HOST_DEVICE + int nComp() const noexcept { return ncomp; } + + template ::value,int> = 0> + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + U& operator[] (int n) const noexcept { +#if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK) + if (n < 0 || n >= ncomp) { +#if AMREX_DEVICE_COMPILE + AMREX_DEVICE_PRINTF(" %d is out of bound (0:%d)", n, ncomp-1); +#else + std::stringstream ss; + ss << " " << n << " is out of bound: (0:" << ncomp-1 << ")"; + amrex::Abort(ss.str()); +#endif + } +#endif + return p[n*stride]; + } + }; + template struct Array4 { @@ -207,6 +251,11 @@ namespace amrex { } } #endif + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + CellData cellData (int i, int j, int k) const noexcept { + return CellData{this->ptr(i,j,k), nstride, ncomp}; + } }; template diff --git a/Src/Base/AMReX_BCRec.H b/Src/Base/AMReX_BCRec.H index 1980c727e81..d76760df9d9 100644 --- a/Src/Base/AMReX_BCRec.H +++ b/Src/Base/AMReX_BCRec.H @@ -74,6 +74,17 @@ public: AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void setHi (int dir, int bc_val) noexcept { bc[AMREX_SPACEDIM+dir] = bc_val; } /** + * \brief Explicitly set bndry value for given face. + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void set (Orientation face, int bc_val) noexcept { + if (face.isLow()) { + setLo(face.coordDir(), bc_val); + } else { + setHi(face.coordDir(), bc_val); + } + } + /** * \brief Return bndry values (used in calls to FORTRAN). */ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE diff --git a/Src/Base/AMReX_BC_TYPES.H b/Src/Base/AMReX_BC_TYPES.H index ea24a64addf..b735da6fddb 100644 --- a/Src/Base/AMReX_BC_TYPES.H +++ b/Src/Base/AMReX_BC_TYPES.H @@ -73,7 +73,10 @@ enum mathematicalBndryTypes : int { foextrap = 2, ext_dir = 3, hoextrap = 4, - hoextrapcc = 5 + hoextrapcc = 5, + user_1 = 1001, + user_2 = 1002, + user_3 = 1003 }; } @@ -102,4 +105,3 @@ enum mathematicalBndryTypes : int { #endif #endif - diff --git a/Src/Base/AMReX_BLBackTrace.cpp b/Src/Base/AMReX_BLBackTrace.cpp index 477e0b6bac2..0c304d30011 100644 --- a/Src/Base/AMReX_BLBackTrace.cpp +++ b/Src/Base/AMReX_BLBackTrace.cpp @@ -5,6 +5,9 @@ #include #include #include +#ifdef AMREX_USE_MPI +#include +#endif #ifdef AMREX_TINY_PROFILING #include @@ -71,7 +74,15 @@ BLBackTrace::handler(int s) std::string errfilename; { std::ostringstream ss; - ss << "Backtrace." << ParallelDescriptor::MyProc(); +#ifdef AMREX_USE_MPI + if (MPMD::Initialized()) { + ss << "Backtrace.prog" << MPMD::MyProgId() << "."; + } else +#endif + { + ss << "Backtrace."; + } + ss << ParallelDescriptor::MyProc(); #ifdef AMREX_USE_OMP ss << "." << omp_get_thread_num(); #endif diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H index 3a9f5eea018..f0e50ecac48 100644 --- a/Src/Base/AMReX_BaseFab.H +++ b/Src/Base/AMReX_BaseFab.H @@ -260,7 +260,7 @@ public: */ void clear () noexcept; - // Release ownership of memory + //! Release ownership of memory std::unique_ptr release () noexcept; //! Returns how many bytes used @@ -350,10 +350,22 @@ public: * order, with the component index coming last. In other words, * dataPtr returns a pointer to all the Nth components. */ - T* dataPtr (int n = 0) noexcept { AMREX_ASSERT(!(this->dptr == 0)); return &(this->dptr[n*this->domain.numPts()]); } + T* dataPtr (int n = 0) noexcept { + if (this->dptr) { + return &(this->dptr[n*this->domain.numPts()]); + } else { + return nullptr; + } + } //! Same as above except works on const FABs. - const T* dataPtr (int n = 0) const noexcept { AMREX_ASSERT(!(this->dptr == 0)); return &(this->dptr[n*this->domain.numPts()]); } + const T* dataPtr (int n = 0) const noexcept { + if (this->dptr) { + return &(this->dptr[n*this->domain.numPts()]); + } else { + return nullptr; + } + } T* dataPtr (const IntVect& iv, int n = 0) noexcept; @@ -1882,9 +1894,9 @@ BaseFab::define () { AMREX_ASSERT(this->dptr == 0); AMREX_ASSERT(this->domain.numPts() > 0); - AMREX_ASSERT(std::numeric_limits::max()/this->nvar > this->domain.numPts()); AMREX_ASSERT(this->nvar >= 0); if (this->nvar == 0) return; + AMREX_ASSERT(std::numeric_limits::max()/this->nvar > this->domain.numPts()); this->truesize = this->nvar*this->domain.numPts(); this->ptr_owner = true; diff --git a/Src/Base/AMReX_Box.cpp b/Src/Base/AMReX_Box.cpp index f93818e784d..e61942c2a48 100644 --- a/Src/Base/AMReX_Box.cpp +++ b/Src/Base/AMReX_Box.cpp @@ -126,7 +126,7 @@ AllGatherBoxes (Vector& bxs, int n_extra_reserve) if (count_tot == 0) return; if (count_tot > static_cast(std::numeric_limits::max())) { - amrex::Abort("AllGatherBoxes: not many boxes"); + amrex::Abort("AllGatherBoxes: too many boxes"); } Vector recv_buffer; @@ -161,7 +161,7 @@ AllGatherBoxes (Vector& bxs, int n_extra_reserve) if (count_tot == 0) return; if (count_tot > static_cast(std::numeric_limits::max())) { - amrex::Abort("AllGatherBoxes: not many boxes"); + amrex::Abort("AllGatherBoxes: too many boxes"); } Vector recv_buffer; diff --git a/Src/Base/AMReX_BoxList.H b/Src/Base/AMReX_BoxList.H index 04e93eab97e..1dc8f15c536 100644 --- a/Src/Base/AMReX_BoxList.H +++ b/Src/Base/AMReX_BoxList.H @@ -206,9 +206,9 @@ public: BoxList& convert (IndexType typ) noexcept; //! Returns a reference to the Vector. - Vector& data() noexcept { return m_lbox; } + Vector& data () noexcept { return m_lbox; } //! Returns a constant reference to the Vector. - const Vector& data() const noexcept { return m_lbox; } + const Vector& data () const noexcept { return m_lbox; } void swap (BoxList& rhs) { std::swap(m_lbox, rhs.m_lbox); diff --git a/Src/Base/AMReX_CTOParallelForImpl.H b/Src/Base/AMReX_CTOParallelForImpl.H new file mode 100644 index 00000000000..e79122de24d --- /dev/null +++ b/Src/Base/AMReX_CTOParallelForImpl.H @@ -0,0 +1,331 @@ +#ifndef AMREX_CTO_PARALLEL_FOR_H_ +#define AMREX_CTO_PARALLEL_FOR_H_ + +#include +#include +#include + +#include +#include + +/* This header is not for the users to include directly. It's meant to be + * included in AMReX_GpuLaunch.H, which has included the headers needed + * here. */ + +/* Thank Maikel Nadolski and Alex Sinn for the techniques used here! */ + +namespace amrex { + +template +struct CompileTimeOptions { + // TypeList is defined in AMReX_Tuple.H + using list_type = TypeList...>; +}; + +#if (__cplusplus >= 201703L) + +//namespace meta +//{ + template + constexpr auto operator+ (TypeList, TypeList) { + return TypeList{}; + } + + template + constexpr auto single_product (TypeList, A) { + return TypeList{})...>{}; + } + + template + constexpr auto operator* (LLs, TypeList) { + return (TypeList<>{} + ... + single_product(LLs{}, As{})); + } + + template + constexpr auto cartesian_product_n (TypeList) { + return (TypeList>{} * ... * Ls{}); + } +//} + +namespace detail +{ + template + std::enable_if_t::value || std::is_same::value, bool> + ParallelFor_helper2 (T const& N, F&& f, TypeList, + std::array const& runtime_options) + { + if (runtime_options == std::array{As::value...}) { + if constexpr (std::is_integral::value) { + ParallelFor(N, [f] AMREX_GPU_DEVICE (T i) noexcept + { + f(i, As{}...); + }); + } else { + ParallelFor(N, [f] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + f(i, j, k, As{}...); + }); + } + return true; + } else { + return false; + } + } + + template + std::enable_if_t::value, bool> + ParallelFor_helper2 (Box const& box, T ncomp, F&& f, TypeList, + std::array const& runtime_options) + { + if (runtime_options == std::array{As::value...}) { + ParallelFor(box, ncomp, [f] AMREX_GPU_DEVICE (int i, int j, int k, T n) noexcept + { + f(i, j, k, n, As{}...); + }); + return true; + } else { + return false; + } + } + + template + std::enable_if_t::value || std::is_same::value> + ParallelFor_helper1 (T const& N, F&& f, TypeList, + RO const& runtime_options) + { + bool found_option = (false || ... || + ParallelFor_helper2(N, std::forward(f), + PPs{}, runtime_options)); + amrex::ignore_unused(found_option); + AMREX_ASSERT(found_option); + } + + template + std::enable_if_t::value> + ParallelFor_helper1 (Box const& box, T ncomp, F&& f, TypeList, + RO const& runtime_options) + { + bool found_option = (false || ... || + ParallelFor_helper2(box, ncomp, std::forward(f), + PPs{}, runtime_options)); + amrex::ignore_unused(found_option); + AMREX_ASSERT(found_option); + } +} + +#endif + +template +std::enable_if_t::value> +ParallelFor (TypeList /*list_of_compile_time_options*/, + std::array const& runtime_options, + T N, F&& f) +{ +#if (__cplusplus >= 201703L) + using OptionsListList = TypeList; + detail::ParallelFor_helper1(N, std::forward(f), + cartesian_product_n(OptionsListList{}), + runtime_options); +#else + amrex::ignore_unused(N, f, runtime_options); + static_assert(std::is_integral::value, "This requires C++17"); +#endif +} + +template +void ParallelFor (TypeList /*list_of_compile_time_options*/, + std::array const& runtime_options, + Box const& box, F&& f) +{ +#if (__cplusplus >= 201703L) + using OptionsListList = TypeList; + detail::ParallelFor_helper1(box, std::forward(f), + cartesian_product_n(OptionsListList{}), + runtime_options); +#else + amrex::ignore_unused(box, f, runtime_options); + static_assert(std::is_integral::value, "This requires C++17"); +#endif +} + +template +std::enable_if_t::value> +ParallelFor (TypeList /*list_of_compile_time_options*/, + std::array const& runtime_options, + Box const& box, T ncomp, F&& f) +{ +#if (__cplusplus >= 201703L) + using OptionsListList = TypeList; + detail::ParallelFor_helper1(box, ncomp, std::forward(f), + cartesian_product_n(OptionsListList{}), + runtime_options); +#else + amrex::ignore_unused(box, ncomp, f, runtime_options); + static_assert(std::is_integral::value, "This requires C++17"); +#endif +} + +/** + * \brief ParallelFor with compile time optimization of kernels with run time options. + * + * It uses fold expression to generate kernel launches for all combinations + * of the run time options. The kernel function can use constexpr if to + * discard unused code blocks for better run time performance. In the + * example below, the code will be expanded into 4*2=8 normal ParallelFors + * for all combinations of the run time parameters. + \verbatim + int A_runtime_option = ...; + int B_runtime_option = ...; + enum A_options : int { A0, A1, A2, A3}; + enum B_options : int { B0, B1 }; + ParallelFor(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + N, [=] AMREX_GPU_DEVICE (int i, auto A_control, auto B_control) + { + ... + if constexpr (A_control.value == A0) { + ... + } else if constexpr (A_control.value == A1) { + ... + } else if constexpr (A_control.value == A2) { + ... + else { + ... + } + if constexpr (A_control.value != A3 && B_control.value == B1) { + ... + } + ... + }); + \endverbatim + * Note that due to a limitation of CUDA's extended device lambda, the + * constexpr if block cannot be the one that captures a variable first. + * If nvcc complains about it, you will have to manually capture it outside + * constexpr if. The data type for the parameters is int. + * + * \param ctos list of all possible values of the parameters. + * \param option the run time parameters. + * \param N an interger specifying the 1D for loop's range. + * \param f a callable object taking an integer and working on that iteration. + */ +template +std::enable_if_t::value> +ParallelFor (TypeList ctos, + std::array const& option, + T N, F&& f) +{ + ParallelFor(ctos, option, N, std::forward(f)); +} + +/** + * \brief ParallelFor with compile time optimization of kernels with run time options. + * + * It uses fold expression to generate kernel launches for all combinations + * of the run time options. The kernel function can use constexpr if to + * discard unused code blocks for better run time performance. In the + * example below, the code will be expanded into 4*2=8 normal ParallelFors + * for all combinations of the run time parameters. + \verbatim + int A_runtime_option = ...; + int B_runtime_option = ...; + enum A_options : int { A0, A1, A2, A3}; + enum B_options : int { B0, B1 }; + ParallelFor(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + box, [=] AMREX_GPU_DEVICE (int i, int j, int k, + auto A_control, auto B_control) + { + ... + if constexpr (A_control.value == A0) { + ... + } else if constexpr (A_control.value == A1) { + ... + } else if constexpr (A_control.value == A2) { + ... + else { + ... + } + if constexpr (A_control.value != A3 && B_control.value == B1) { + ... + } + ... + }); + \endverbatim + * Note that due to a limitation of CUDA's extended device lambda, the + * constexpr if block cannot be the one that captures a variable first. + * If nvcc complains about it, you will have to manually capture it outside + * constexpr if. The data type for the parameters is int. + * + * \param ctos list of all possible values of the parameters. + * \param option the run time parameters. + * \param box a Box specifying the 3D for loop's range. + * \param f a callable object taking three integers and working on the given cell. + */ +template +void ParallelFor (TypeList ctos, + std::array const& option, + Box const& box, F&& f) +{ + ParallelFor(ctos, option, box, std::forward(f)); +} + +/** + * \brief ParallelFor with compile time optimization of kernels with run time options. + * + * It uses fold expression to generate kernel launches for all combinations + * of the run time options. The kernel function can use constexpr if to + * discard unused code blocks for better run time performance. In the + * example below, the code will be expanded into 4*2=8 normal ParallelFors + * for all combinations of the run time parameters. + \verbatim + int A_runtime_option = ...; + int B_runtime_option = ...; + enum A_options : int { A0, A1, A2, A3}; + enum B_options : int { B0, B1 }; + ParallelFor(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + box, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, + auto A_control, auto B_control) + { + ... + if constexpr (A_control.value == A0) { + ... + } else if constexpr (A_control.value == A1) { + ... + } else if constexpr (A_control.value == A2) { + ... + else { + ... + } + if constexpr (A_control.value != A3 && B_control.value == B1) { + ... + } + ... + }); + \endverbatim + * Note that due to a limitation of CUDA's extended device lambda, the + * constexpr if block cannot be the one that captures a variable first. + * If nvcc complains about it, you will have to manually capture it outside + * constexpr if. The data type for the parameters is int. + * + * \param ctos list of all possible values of the parameters. + * \param option the run time parameters. + * \param box a Box specifying the iteration in 3D space. + * \param ncomp an integer specifying the range for iteration over components. + * \param f a callable object taking three integers and working on the given cell. + */ +template +std::enable_if_t::value> +ParallelFor (TypeList ctos, + std::array const& option, + Box const& box, T ncomp, F&& f) +{ + ParallelFor(ctos, option, box, ncomp, std::forward(f)); +} + +} + +#endif diff --git a/Src/Base/AMReX_DistributionMapping.cpp b/Src/Base/AMReX_DistributionMapping.cpp index a61d5b2f591..6b4c0c8925c 100644 --- a/Src/Base/AMReX_DistributionMapping.cpp +++ b/Src/Base/AMReX_DistributionMapping.cpp @@ -1300,7 +1300,7 @@ DistributionMapping::SFCProcessorMap (const BoxArray& boxes, for (int i = 0, N = boxes.size(); i < N; ++i) { - wgts.push_back(boxes[i].volume()); + wgts.push_back(boxes[i].numPts()); } SFCProcessorMapDoIt(boxes,wgts,nprocs); @@ -1769,7 +1769,7 @@ DistributionMapping::makeSFC (const BoxArray& ba, bool use_box_vol, const int np { const Box& bx = ba[i]; tokens.push_back(makeSFCToken(i, bx.smallEnd())); - const Long v = use_box_vol ? bx.volume() : Long(1); + const Long v = use_box_vol ? bx.numPts() : Long(1); vol_sum += v; wgts.push_back(v); } diff --git a/Src/Base/AMReX_Extension.H b/Src/Base/AMReX_Extension.H index a084777f1a0..753b43995f3 100644 --- a/Src/Base/AMReX_Extension.H +++ b/Src/Base/AMReX_Extension.H @@ -57,7 +57,7 @@ #elif defined(__INTEL_COMPILER) #define AMREX_PRAGMA_SIMD _Pragma("ivdep") -#elif defined(_CRAYC) +#elif defined(_CRAYC) || defined(__cray__) #define AMREX_PRAGMA_SIMD _Pragma("ivdep") #elif defined(__PGI) @@ -73,7 +73,7 @@ #define AMREX_PRAGMA_SIMD _Pragma("ibm independent_loop") #elif defined(__clang__) -#define AMREX_PRAGMA_SIMD _Pragma("clang loop vectorize(enable)") +#define AMREX_PRAGMA_SIMD #elif defined(__GNUC__) #define AMREX_PRAGMA_SIMD _Pragma("GCC ivdep") diff --git a/Src/Base/AMReX_FArrayBox.H b/Src/Base/AMReX_FArrayBox.H index 3d3cda3674b..b678986c0e9 100644 --- a/Src/Base/AMReX_FArrayBox.H +++ b/Src/Base/AMReX_FArrayBox.H @@ -272,7 +272,7 @@ public: virtual ~FArrayBox () noexcept override {} FArrayBox (FArrayBox&& rhs) noexcept = default; - FArrayBox& operator= (FArrayBox&&) = default; + FArrayBox& operator= (FArrayBox&&) noexcept = default; FArrayBox (const FArrayBox&) = delete; FArrayBox& operator= (const FArrayBox&) = delete; diff --git a/Src/Base/AMReX_FBI.H b/Src/Base/AMReX_FBI.H index 61ef452b601..cc0bfeecbce 100644 --- a/Src/Base/AMReX_FBI.H +++ b/Src/Base/AMReX_FBI.H @@ -924,7 +924,7 @@ FabArray::pack_send_buffer_cpu (FabArray const& src, int scomp, int nc amrex::LoopConcurrentOnCpu( bx, ncomp, [=] (int ii, int jj, int kk, int n) noexcept { - pfab(ii,jj,kk,n) = sfab(ii,jj,kk,n+scomp); + pfab(ii,jj,kk,n) = static_cast(sfab(ii,jj,kk,n+scomp)); }); dptr += (bx.numPts() * ncomp * sizeof(BUF)); } diff --git a/Src/Base/AMReX_FabArray.H b/Src/Base/AMReX_FabArray.H index 6eef7caa579..736c39567ae 100644 --- a/Src/Base/AMReX_FabArray.H +++ b/Src/Base/AMReX_FabArray.H @@ -438,6 +438,15 @@ public: */ bool ok () const; + /** Has define() been called on this rank? + * + * \return true if `define` has been called on this `FabArray`. Note that all constructors except `FabArray ()` + * and `FabArray(Arena*a)` call `define`, even if the `MFInfo` argument has `alloc=false`. One could + * also use `FabArrayBase::empty()` to find whether `define` is called or not, although they are not exactly + * the same. + */ + bool isDefined () const; + //! Return a constant reference to the FAB associated with mfi. const FAB& operator[] (const MFIter& mfi) const noexcept { return *(this->fabPtr(mfi)); } @@ -1128,6 +1137,7 @@ protected: std::unique_ptr > m_factory; DataAllocator m_dallocator; + //! has define() been called? bool define_function_called = false; // @@ -1768,6 +1778,13 @@ FabArray::ok () const return isok == 1; } +template +bool +FabArray::isDefined () const +{ + return define_function_called; +} + template void FabArray::define (const BoxArray& bxs, @@ -2848,7 +2865,7 @@ FabArray::SumBoundary_nowait (int scomp, int ncomp, IntVect const& src_ngho FabArray* tmp = new FabArray( boxArray(), DistributionMap(), ncomp, src_nghost, MFInfo(), Factory() ); amrex::Copy(*tmp, *this, scomp, 0, ncomp, src_nghost); - this->setVal(0.0, scomp, ncomp, dst_nghost); + this->setVal(typename FAB::value_type(0), scomp, ncomp, dst_nghost); this->ParallelCopy_nowait(*tmp,0,scomp,ncomp,src_nghost,dst_nghost,period,FabArrayBase::ADD); // All local. Operation complete. diff --git a/Src/Base/AMReX_FabArrayCommI.H b/Src/Base/AMReX_FabArrayCommI.H index c894fe0b2c7..3d3fe1743a2 100644 --- a/Src/Base/AMReX_FabArrayCommI.H +++ b/Src/Base/AMReX_FabArrayCommI.H @@ -10,7 +10,7 @@ FabArray::FBEP_nowait (int scomp, int ncomp, const IntVect& nghost, bool enforce_periodicity_only, bool override_sync) { - BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms"); + BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: FB"); BL_PROFILE("FillBoundary_nowait()"); AMREX_ASSERT_WITH_MESSAGE(!fbd, "FillBoundary_nowait() called when comm operation already in progress."); @@ -316,7 +316,7 @@ FabArray::ParallelCopy_nowait (const FabArray& src, const FabArrayBase::CPC * a_cpc, bool to_ghost_cells_only) { - BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms"); + BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: PC"); BL_PROFILE("FabArray::ParallelCopy_nowait()"); AMREX_ASSERT_WITH_MESSAGE(!pcd, "ParallelCopy_nowait() called when comm operation already in progress."); diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H index 54a8b8630d3..890ec2e0f7e 100644 --- a/Src/Base/AMReX_Geometry.H +++ b/Src/Base/AMReX_Geometry.H @@ -67,6 +67,56 @@ public: int coord; }; + namespace detail { + template + T bisect_prob_lo (amrex::Real plo, amrex::Real /*phi*/, amrex::Real dxinv, int ilo, int ihi, amrex::Real tol) { + T lo = static_cast(plo + tol); + bool safe; + { + int i = int(Math::floor((lo - plo)*dxinv)) + ilo; + safe = i >= ilo && i <= ihi; + } + if (safe) { + return lo; + } else { + // bisect the point at which the cell no longer maps to inside the domain + T hi = static_cast(plo + 0.5_rt/dxinv); + T mid = bisect(lo, hi, + [=] AMREX_GPU_HOST_DEVICE (T x) -> T + { + int i = int(Math::floor((x - plo)*dxinv)) + ilo; + bool inside = i >= ilo && i <= ihi; + return static_cast(inside) - T(0.5); + }, static_cast(tol)); + return mid - static_cast(tol); + } + } + + template + T bisect_prob_hi (amrex::Real plo, amrex::Real phi, amrex::Real dxinv, int ilo, int ihi, amrex::Real tol) { + T hi = static_cast(phi - tol); + bool safe; + { + int i = int(Math::floor((hi - plo)*dxinv)) + ilo; + safe = i >= ilo && i <= ihi; + } + if (safe) { + return hi; + } else { + // bisect the point at which the cell no longer maps to inside the domain + T lo = static_cast(phi - 0.5_rt/dxinv); + T mid = bisect(lo, hi, + [=] AMREX_GPU_HOST_DEVICE (T x) -> T + { + int i = int(Math::floor((x - plo)*dxinv)) + ilo; + bool inside = i >= ilo && i <= ihi; + return static_cast(inside) - T(0.5); + }, static_cast(tol)); + return mid - static_cast(tol); + } + } + } + class Geometry : public CoordSys @@ -168,8 +218,6 @@ public: //! Returns the problem domain. const RealBox& ProbDomain () const noexcept { return prob_domain; } - //! Returns the roundoff domain. - const RealBox& RoundoffDomain () const noexcept { return roundoff_domain; } //! Sets the problem domain. void ProbDomain (const RealBox& rb) noexcept { @@ -193,12 +241,19 @@ public: return {{AMREX_D_DECL(prob_domain.hi(0),prob_domain.hi(1),prob_domain.hi(2))}}; } - GpuArray RoundoffLoArray () const noexcept { - return {{AMREX_D_DECL(roundoff_domain.lo(0),roundoff_domain.lo(1),roundoff_domain.lo(2))}}; + GpuArray ProbLoArrayInParticleReal () const noexcept { +#ifdef AMREX_SINGLE_PRECISION_PARTICLES + return roundoff_lo_f; +#else + return roundoff_lo_d; +#endif } - - GpuArray RoundoffHiArray () const noexcept { - return {{AMREX_D_DECL(roundoff_domain.hi(0),roundoff_domain.hi(1),roundoff_domain.hi(2))}}; + GpuArray ProbHiArrayInParticleReal () const noexcept { +#ifdef AMREX_SINGLE_PRECISION_PARTICLES + return roundoff_hi_f; +#else + return roundoff_hi_d; +#endif } //! Returns the overall size of the domain by multiplying the ProbLength's together @@ -365,9 +420,13 @@ public: const Box& src, Vector& out) const noexcept; + //! Return domain box with non-periodic directions grown by ngrow. + Box growNonPeriodicDomain (IntVect const& ngrow) const noexcept; //! Return domain box with non-periodic directions grown by ngrow. Box growNonPeriodicDomain (int ngrow) const noexcept; //! Return domain box with periodic directions grown by ngrow. + Box growPeriodicDomain (IntVect const& ngrow) const noexcept; + //! Return domain box with periodic directions grown by ngrow. Box growPeriodicDomain (int ngrow) const noexcept; //! Set periodicity flags and return the old flags. @@ -406,7 +465,7 @@ public: * are sure to be mapped to cells inside the Domain() box. Note that * the same need not be true for all points inside ProbDomain(). */ - bool outsideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const; + bool outsideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const; /** * \brief Returns true if a point is inside the roundoff domain. @@ -414,7 +473,7 @@ public: * are sure to be mapped to cells inside the Domain() box. Note that * the same need not be true for all points inside ProbDomain(). */ - bool insideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const; + bool insideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const; /** * \brief Compute the roundoff domain. Public because it contains an @@ -430,10 +489,11 @@ private: RealBox prob_domain; // Due to round-off errors, not all floating point numbers for which plo >= x < phi - // will map to a cell that is inside "domain". "roundoff_domain" stores a phi - // that is very close to that in prob_domain, and for which all floating point numbers - // inside it according to a naive inequality check will map to a cell inside domain. - RealBox roundoff_domain; + // will map to a cell that is inside "domain". "roundoff_{lo,hi}_{f,d}" each store + // a position that is very close to that in prob_domain, and for which all doubles and floats less than + // it will map to a cell inside domain. + GpuArray roundoff_lo_d, roundoff_hi_d; + GpuArray roundoff_lo_f, roundoff_hi_f; // Box domain; diff --git a/Src/Base/AMReX_Geometry.cpp b/Src/Base/AMReX_Geometry.cpp index 395f17e352b..235c7bb7674 100644 --- a/Src/Base/AMReX_Geometry.cpp +++ b/Src/Base/AMReX_Geometry.cpp @@ -473,29 +473,41 @@ Geometry::periodicShift (const Box& target, } Box -Geometry::growNonPeriodicDomain (int ngrow) const noexcept +Geometry::growNonPeriodicDomain (IntVect const& ngrow) const noexcept { Box b = Domain(); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { if (!isPeriodic(idim)) { - b.grow(idim,ngrow); + b.grow(idim,ngrow[idim]); } } return b; } Box -Geometry::growPeriodicDomain (int ngrow) const noexcept +Geometry::growPeriodicDomain (IntVect const& ngrow) const noexcept { Box b = Domain(); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { if (isPeriodic(idim)) { - b.grow(idim,ngrow); + b.grow(idim,ngrow[idim]); } } return b; } +Box +Geometry::growNonPeriodicDomain (int ngrow) const noexcept +{ + return growNonPeriodicDomain(IntVect(ngrow)); +} + +Box +Geometry::growPeriodicDomain (int ngrow) const noexcept +{ + return growPeriodicDomain(IntVect(ngrow)); +} + void Geometry::computeRoundoffDomain () { @@ -506,50 +518,48 @@ Geometry::computeRoundoffDomain () inv_dx[k] = 1.0_rt/dx[k]; } - roundoff_domain = prob_domain; for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { int ilo = Domain().smallEnd(idim); int ihi = Domain().bigEnd(idim); Real plo = ProbLo(idim); Real phi = ProbHi(idim); - Real idx = InvCellSize(idim); + Real dxinv = InvCellSize(idim); Real deltax = CellSize(idim); -#ifdef AMREX_SINGLE_PRECISION_PARTICLES - Real tolerance = std::max(1.e-4_rt*deltax, 2.e-7_rt*phi); -#else - Real tolerance = std::max(1.e-8_rt*deltax, 1.e-14_rt*phi); -#endif - // bisect the point at which the cell no longer maps to inside the domain - Real lo = static_cast(phi) - Real(0.5)*static_cast(deltax); - Real hi = static_cast(phi) + Real(0.5)*static_cast(deltax); - - Real mid = bisect(lo, hi, - [=] AMREX_GPU_HOST_DEVICE (Real x) -> Real - { - int i = int(Math::floor((x - plo)*idx)) + ilo; - bool inside = i >= ilo && i <= ihi; - return static_cast(inside) - Real(0.5); - }, tolerance); - roundoff_domain.setHi(idim, mid - tolerance); + Real ftol = std::max(1.e-4_rt*deltax, 2.e-7_rt*phi); + Real dtol = std::max(1.e-8_rt*deltax, 1.e-14_rt*phi); + + roundoff_lo_f[idim] = detail::bisect_prob_lo (plo, phi, dxinv, ilo, ihi, ftol); + roundoff_lo_d[idim] = detail::bisect_prob_lo(plo, phi, dxinv, ilo, ihi, dtol); + roundoff_hi_f[idim] = detail::bisect_prob_hi (plo, phi, dxinv, ilo, ihi, ftol); + roundoff_hi_d[idim] = detail::bisect_prob_hi(plo, phi, dxinv, ilo, ihi, dtol); } } bool -Geometry::outsideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const +Geometry::outsideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const { - bool outside = AMREX_D_TERM(x < roundoff_domain.lo(0) - || x >= roundoff_domain.hi(0), - || y < roundoff_domain.lo(1) - || y >= roundoff_domain.hi(1), - || z < roundoff_domain.lo(2) - || z >= roundoff_domain.hi(2)); +#ifdef AMREX_SINGLE_PRECISION_PARTICLES + bool outside = AMREX_D_TERM(x < roundoff_lo_f[0] + || x >= roundoff_hi_f[0], + || y < roundoff_lo_f[1] + || y >= roundoff_hi_f[1], + || z < roundoff_lo_f[2] + || z >= roundoff_hi_f[2]); +#else + bool outside = AMREX_D_TERM(x < roundoff_lo_d[0] + || x >= roundoff_hi_d[0], + || y < roundoff_lo_d[1] + || y >= roundoff_hi_d[1], + || z < roundoff_lo_d[2] + || z >= roundoff_hi_d[2]); +#endif return outside; } bool -Geometry::insideRoundoffDomain (AMREX_D_DECL(Real x, Real y, Real z)) const +Geometry::insideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const { return !outsideRoundoffDomain(AMREX_D_DECL(x, y, z)); } diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H index e6b2780abe0..a07704cb86b 100644 --- a/Src/Base/AMReX_GpuAtomic.H +++ b/Src/Base/AMReX_GpuAtomic.H @@ -30,15 +30,16 @@ namespace detail { { #if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; static_assert(sizeof(R) == sizeof(I), "sizeof R != sizeof I"); I* const add_as_I = reinterpret_cast(address); - sycl::atomic a{sycl::multi_ptr(add_as_I)}; - I old_I = a.load(mo), new_I; + sycl::atomic_ref a{*add_as_I}; + I old_I = a.load(), new_I; do { R const new_R = f(*(reinterpret_cast(&old_I)), val); new_I = *(reinterpret_cast(&new_R)); - } while (! a.compare_exchange_strong(old_I, new_I, mo)); + } while (! a.compare_exchange_strong(old_I, new_I)); return *(reinterpret_cast(&old_I)); #else R old = *address; @@ -53,17 +54,18 @@ namespace detail { { #if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; static_assert(sizeof(R) == sizeof(I), "sizeof R != sizeof I"); I* const add_as_I = reinterpret_cast(address); - sycl::atomic a{sycl::multi_ptr(add_as_I)}; - I old_I = a.load(mo), new_I; + sycl::atomic_ref a{*add_as_I}; + I old_I = a.load(), new_I; bool test_success; do { R const tmp = op(*(reinterpret_cast(&old_I)), val); new_I = *(reinterpret_cast(&tmp)); test_success = cond(tmp); - } while (test_success && ! a.compare_exchange_strong(old_I, new_I, mo)); + } while (test_success && ! a.compare_exchange_strong(old_I, new_I)); return test_success; #else R old = *address; @@ -131,9 +133,10 @@ namespace detail { return atomicAdd(sum, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(sum)}; - return a.fetch_add(value, mo); + sycl::atomic_ref a{*sum}; + return a.fetch_add(value); #else amrex::ignore_unused(sum, value); return T(); // should never get here, but have to return something @@ -313,9 +316,10 @@ namespace detail { return atomicMin(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_min(value, mo); + sycl::atomic_ref a{*m}; + return a.fetch_min(value); #else amrex::ignore_unused(m,value); return T(); // should never get here, but have to return something @@ -373,9 +377,10 @@ namespace detail { return atomicMax(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_max(value, mo); + sycl::atomic_ref a{*m}; + return a.fetch_max(value); #else amrex::ignore_unused(m,value); return T(); // should never get here, but have to return something @@ -430,9 +435,10 @@ namespace detail { return atomicOr(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_or(value, mo); + sycl::atomic_ref a{*m}; + return a.fetch_or(value); #else int const old = *m; *m = (*m) || value; @@ -451,9 +457,10 @@ namespace detail { return atomicAnd(m, value ? ~0x0 : 0); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - return a.fetch_and(value ? ~0x0 : 0, mo); + sycl::atomic_ref a{*m}; + return a.fetch_and(value ? ~0x0 : 0); #else int const old = *m; *m = (*m) && value; @@ -472,11 +479,12 @@ namespace detail { { #if defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; - sycl::atomic a{sycl::multi_ptr(m)}; - unsigned int oldi = a.load(mo), newi; + constexpr auto ms = sycl::memory_scope::device; + sycl::atomic_ref a{*m}; + unsigned int oldi = a.load(), newi; do { newi = (oldi >= value) ? 0u : (oldi+1u); - } while (! a.compare_exchange_strong(oldi, newi, mo)); + } while (! a.compare_exchange_strong(oldi, newi)); return oldi; #else auto const old = *m; @@ -509,12 +517,13 @@ namespace detail { return atomicDec(m, value); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(m)}; - unsigned int oldi = a.load(mo), newi; + sycl::atomic_ref a{*m}; + unsigned int oldi = a.load(), newi; do { newi = ((oldi == 0u) || (oldi > value)) ? value : (oldi-1u); - } while (! a.compare_exchange_strong(oldi, newi, mo)); + } while (! a.compare_exchange_strong(oldi, newi)); return oldi; #else auto const old = *m; @@ -535,9 +544,10 @@ namespace detail { return atomicExch(address, val); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(address)}; - return sycl::atomic_exchange(a, val, mo); + sycl::atomic_ref a{*address}; + return a.exchange(val); #else auto const old = *address; *address = val; @@ -557,9 +567,10 @@ namespace detail { return atomicCAS(address, compare, val); #elif defined(__SYCL_DEVICE_ONLY__) constexpr auto mo = sycl::memory_order::relaxed; + constexpr auto ms = sycl::memory_scope::device; constexpr auto as = sycl::access::address_space::global_space; - sycl::atomic a{sycl::multi_ptr(address)}; - a.compare_exchange_strong(compare, val, mo); + sycl::atomic_ref a{*address}; + a.compare_exchange_strong(compare, val); return compare; #else auto const old = *address; diff --git a/Src/Base/AMReX_GpuContainers.H b/Src/Base/AMReX_GpuContainers.H index cc68770ff3f..faccec1d2ef 100644 --- a/Src/Base/AMReX_GpuContainers.H +++ b/Src/Base/AMReX_GpuContainers.H @@ -19,13 +19,19 @@ namespace Gpu { /** * \brief A PODVector that uses the standard memory Arena. - * Note that, on NVIDIA architectures, this Arena is actually - * managed. - * + * Note that the memory might or might not be managed depending + * on the amrex.the_arena_is_managed ParmParse parameter. */ template using DeviceVector = PODVector >; + /** + * \brief A PODVector that uses the non-managed device memory arena. + * + */ + template + using NonManagedDeviceVector = PODVector >; + /** * \brief A PODVector that uses the managed memory arena. * @@ -83,6 +89,9 @@ namespace Gpu { template using HostVector = PODVector; + template + using NonManagedVector = PODVector; + template using ManagedVector = PODVector; diff --git a/Src/Base/AMReX_GpuDevice.H b/Src/Base/AMReX_GpuDevice.H index 8a327704a1d..a61ab4fe406 100644 --- a/Src/Base/AMReX_GpuDevice.H +++ b/Src/Base/AMReX_GpuDevice.H @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -148,9 +149,9 @@ public: // definition: https://github.com/llvm/llvm-project/blob/62ec4ac90738a5f2d209ed28c822223e58aaaeb7/clang/lib/Basic/Targets/AMDGPU.cpp#L400 // overview wavefront size: https://github.com/llvm/llvm-project/blob/efc063b621ea0c4d1e452bcade62f7fc7e1cc937/clang/test/Driver/amdgpu-macros.cl#L70-L115 // gfx10XX has 32 threads per wavefront else 64 - static constexpr int warp_size = __AMDGCN_WAVEFRONT_SIZE; + static AMREX_EXPORT constexpr int warp_size = __AMDGCN_WAVEFRONT_SIZE; # else - static constexpr int warp_size = AMREX_HIP_OR_CUDA_OR_DPCPP(64,32,16); + static AMREX_EXPORT constexpr int warp_size = AMREX_HIP_OR_CUDA_OR_DPCPP(64,32,16); # endif static unsigned int maxBlocksPerLaunch () noexcept { return max_blocks_per_launch; } @@ -166,28 +167,28 @@ private: static void initialize_gpu (); - static int device_id; - static int num_devices_used; - static int verbose; - static int max_gpu_streams; + static AMREX_EXPORT int device_id; + static AMREX_EXPORT int num_devices_used; + static AMREX_EXPORT int verbose; + static AMREX_EXPORT int max_gpu_streams; #ifdef AMREX_USE_GPU - static dim3 numThreadsMin; - static dim3 numBlocksOverride, numThreadsOverride; + static AMREX_EXPORT dim3 numThreadsMin; + static AMREX_EXPORT dim3 numBlocksOverride, numThreadsOverride; // We build gpu_default_stream and gpu_stream_pool. // The non-owning gpu_stream is used to store the current stream that will be used. // gpu_stream is a vector so that it's thread safe to write to it. - static gpuStream_t gpu_default_stream; - static Vector gpu_stream_pool; // The size of this is max_gpu_stream - static Vector gpu_stream; // The size of this is omp_max_threads - static gpuDeviceProp_t device_prop; - static int memory_pools_supported; - static unsigned int max_blocks_per_launch; + static AMREX_EXPORT gpuStream_t gpu_default_stream; + static AMREX_EXPORT Vector gpu_stream_pool; // The size of this is max_gpu_stream + static AMREX_EXPORT Vector gpu_stream; // The size of this is omp_max_threads + static AMREX_EXPORT gpuDeviceProp_t device_prop; + static AMREX_EXPORT int memory_pools_supported; + static AMREX_EXPORT unsigned int max_blocks_per_launch; #ifdef AMREX_USE_DPCPP - static std::unique_ptr sycl_context; - static std::unique_ptr sycl_device; + static AMREX_EXPORT std::unique_ptr sycl_context; + static AMREX_EXPORT std::unique_ptr sycl_device; #endif #endif }; diff --git a/Src/Base/AMReX_GpuDevice.cpp b/Src/Base/AMReX_GpuDevice.cpp index 8d42363f0a7..fe7257ea971 100644 --- a/Src/Base/AMReX_GpuDevice.cpp +++ b/Src/Base/AMReX_GpuDevice.cpp @@ -22,9 +22,9 @@ #if defined(AMREX_USE_HIP) #include #if defined(AMREX_USE_ROCTX) -#include +#include #if defined(AMREX_PROFILING) || defined (AMREX_TINY_PROFILING) -#include +#include #endif #endif #endif @@ -397,11 +397,7 @@ Device::initialize_gpu () // check compute capability - if (sizeof(Real) == 8) { - AMREX_HIP_SAFE_CALL(hipDeviceSetSharedMemConfig(hipSharedMemBankSizeEightByte)); - } else if (sizeof(Real) == 4) { - AMREX_HIP_SAFE_CALL(hipDeviceSetSharedMemConfig(hipSharedMemBankSizeFourByte)); - } + // AMD devices do not support shared cache banking. AMREX_HIP_SAFE_CALL(hipStreamCreate(&gpu_default_stream)); for (int i = 0; i < max_gpu_streams; ++i) { @@ -467,8 +463,8 @@ Device::initialize_gpu () device_prop.warpSize = warp_size; auto sgss = d.get_info(); device_prop.maxMemAllocSize = d.get_info(); - device_prop.managedMemory = d.get_info(); - device_prop.concurrentManagedAccess = d.get_info(); + device_prop.managedMemory = d.has(sycl::aspect::usm_host_allocations); + device_prop.concurrentManagedAccess = d.has(sycl::aspect::usm_shared_allocations); device_prop.maxParameterSize = d.get_info(); { amrex::Print() << "Device Properties:\n" diff --git a/Src/Base/AMReX_GpuLaunch.H b/Src/Base/AMReX_GpuLaunch.H index d31bae568c1..7e877140629 100644 --- a/Src/Base/AMReX_GpuLaunch.H +++ b/Src/Base/AMReX_GpuLaunch.H @@ -30,11 +30,11 @@ #define AMREX_GPU_Z_STRIDE 1 #ifdef AMREX_USE_CUDA -# define AMREX_LAUNCH_KERNEL(blocks, threads, sharedMem, stream, ... ) \ - amrex::launch_global<<>>(__VA_ARGS__); +# define AMREX_LAUNCH_KERNEL(MT, blocks, threads, sharedMem, stream, ... ) \ + amrex::launch_global<<>>(__VA_ARGS__) #elif defined(AMREX_USE_HIP) -# define AMREX_LAUNCH_KERNEL(blocks, threads, sharedMem, stream, ... ) \ - hipLaunchKernelGGL(launch_global, blocks, threads, sharedMem, stream, __VA_ARGS__); +# define AMREX_LAUNCH_KERNEL(MT, blocks, threads, sharedMem, stream, ... ) \ + hipLaunchKernelGGL(launch_global, blocks, threads, sharedMem, stream, __VA_ARGS__) #endif @@ -151,6 +151,28 @@ namespace Gpu { dim3 numThreads; std::size_t sharedMem = 0; }; + + template + ExecutionConfig + makeExecutionConfig (Long N) noexcept + { + ExecutionConfig ec(dim3{}, dim3{}); + ec.numBlocks.x = (std::max(N,Long(1)) + MT - 1) / MT; + ec.numThreads.x = MT; + AMREX_ASSERT(MT % Gpu::Device::warp_size == 0); + return ec; + } + + template + ExecutionConfig + makeExecutionConfig (const Box& box) noexcept + { + ExecutionConfig ec(dim3{}, dim3{}); + ec.numBlocks.x = (std::max(box.numPts(),Long(1)) + MT - 1) / MT; + ec.numThreads.x = MT; + AMREX_ASSERT(MT % Gpu::Device::warp_size == 0); + return ec; + } #endif } @@ -221,6 +243,8 @@ namespace Gpu { #ifdef AMREX_USE_GPU +#ifndef AMREX_USE_DPCPP + #define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \ { using amrex_i_inttype = typename std::remove_const::type; \ if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ @@ -344,6 +368,111 @@ namespace Gpu { block3; \ } +#else +// xxxxx DPCPP todo: host disabled in host device + +#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \ + { using amrex_i_inttype = typename std::remove_const::type; \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }} + +#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \ + { using amrex_i_inttype = typename std::remove_const::type; \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }} + +#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \ + block \ + ); \ + } \ + else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \ + if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \ + { \ + AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } + +#endif + #else #define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \ @@ -421,4 +550,6 @@ namespace Gpu { #endif +#include + #endif diff --git a/Src/Base/AMReX_GpuLaunchFunctsC.H b/Src/Base/AMReX_GpuLaunchFunctsC.H index 025b43fec0a..6ce9cca0f3a 100644 --- a/Src/Base/AMReX_GpuLaunchFunctsC.H +++ b/Src/Base/AMReX_GpuLaunchFunctsC.H @@ -55,11 +55,18 @@ namespace detail { } template -void launch (T const& n, L&& f, std::size_t /*shared_mem_bytes*/=0) noexcept +void launch (T const& n, L&& f) noexcept { f(n); } +template +void launch (T const& n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + f(n); +} + template ::value> > void For (T n, L&& f) noexcept { @@ -68,12 +75,26 @@ void For (T n, L&& f) noexcept } } +template ::value> > +void For (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n, std::forward(f)); +} + template ::value> > void For (Gpu::KernelInfo const&, T n, L&& f) noexcept { For(n, std::forward(f)); } +template ::value> > +void For (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n, std::forward(f)); +} + template ::value> > void ParallelFor (T n, L&& f) noexcept { @@ -83,12 +104,26 @@ void ParallelFor (T n, L&& f) noexcept } } +template ::value> > +void ParallelFor (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n, std::forward(f)); +} + template ::value> > void ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { ParallelFor(n, std::forward(f)); } +template ::value> > +void ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n, std::forward(f)); +} + template void For (Box const& box, L&& f) noexcept { @@ -101,12 +136,26 @@ void For (Box const& box, L&& f) noexcept }}} } +template +void For (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, std::forward(f)); +} + template void For (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { For(box, std::forward(f)); } +template +void For (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, std::forward(f)); +} + template void ParallelFor (Box const& box, L&& f) noexcept { @@ -120,12 +169,26 @@ void ParallelFor (Box const& box, L&& f) noexcept }}} } +template +void ParallelFor (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, std::forward(f)); +} + template void ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { ParallelFor(box, std::forward(f)); } +template +void ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, std::forward(f)); +} + template ::value> > void For (Box const& box, T ncomp, L&& f) noexcept { @@ -140,12 +203,26 @@ void For (Box const& box, T ncomp, L&& f) noexcept } } +template ::value> > +void For (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, ncomp, std::forward(f)); +} + template ::value> > void For (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { For(box, ncomp, std::forward(f)); } +template ::value> > +void For (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box, ncomp, std::forward(f)); +} + template ::value> > void ParallelFor (Box const& box, T ncomp, L&& f) noexcept { @@ -161,12 +238,26 @@ void ParallelFor (Box const& box, T ncomp, L&& f) noexcept } } +template ::value> > +void ParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, ncomp, std::forward(f)); +} + template ::value> > void ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { ParallelFor(box, ncomp, std::forward(f)); } +template ::value> > +void ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box, ncomp, std::forward(f)); +} + template void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { @@ -174,12 +265,27 @@ void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept For(box2, std::forward(f2)); } +template +void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1, std::forward(f1)); + For(box2, std::forward(f2)); +} + template void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { For (box1, box2, std::forward(f1), std::forward(f2)); } +template +void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For (box1, box2, std::forward(f1), std::forward(f2)); +} + template void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { @@ -188,12 +294,28 @@ void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L For(box3, std::forward(f3)); } +template +void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1, std::forward(f1)); + For(box2, std::forward(f2)); + For(box3, std::forward(f3)); +} + template void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { For(box1, box2, box3, std::forward(f1), std::forward(f2), std::forward(f3)); } +template +void For (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1, box2, box3, std::forward(f1), std::forward(f2), std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -204,6 +326,17 @@ void For (Box const& box1, T1 ncomp1, L1&& f1, For(box2, ncomp2, std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1, ncomp1, std::forward(f1)); + For(box2, ncomp2, std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -214,6 +347,17 @@ void For (Gpu::KernelInfo const&, For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -227,6 +371,20 @@ void For (Box const& box1, T1 ncomp1, L1&& f1, For(box3, ncomp3, std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1, ncomp1, std::forward(f1)); + For(box2, ncomp2, std::forward(f2)); + For(box3, ncomp3, std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -241,6 +399,21 @@ void For (Gpu::KernelInfo const&, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { @@ -248,12 +421,27 @@ void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept ParallelFor(box2, std::forward(f2)); } +template +void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, std::forward(f1)); + ParallelFor(box2, std::forward(f2)); +} + template void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,f1,f2); } +template +void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,f1,f2); +} + template void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { @@ -262,12 +450,28 @@ void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2 ParallelFor(box3, std::forward(f3)); } +template +void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, std::forward(f1)); + ParallelFor(box2, std::forward(f2)); + ParallelFor(box3, std::forward(f3)); +} + template void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -278,6 +482,17 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, ParallelFor(box2, ncomp2, std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, ncomp1, std::forward(f1)); + ParallelFor(box2, ncomp2, std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -289,6 +504,18 @@ void ParallelFor (Gpu::KernelInfo const&, box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void ParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -302,6 +529,20 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, ParallelFor(box3, ncomp3, std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, ncomp1, std::forward(f1)); + ParallelFor(box2, ncomp2, std::forward(f2)); + ParallelFor(box3, ncomp3, std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -316,30 +557,73 @@ void ParallelFor (Gpu::KernelInfo const&, box3, ncomp3, std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void ParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1, ncomp1, std::forward(f1), + box2, ncomp2, std::forward(f2), + box3, ncomp3, std::forward(f3)); +} + template ::value> > void HostDeviceParallelFor (T n, L&& f) noexcept { ParallelFor(n,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n,std::forward(f)); +} + template void HostDeviceParallelFor (Box const& box, L&& f) noexcept { ParallelFor(box,std::forward(f)); } +template +void HostDeviceParallelFor (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,std::forward(f)); +} + template ::value> > void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept { ParallelFor(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,ncomp,std::forward(f)); +} + template void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept @@ -347,6 +631,14 @@ void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -356,6 +648,16 @@ void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -369,30 +671,72 @@ void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void HostDeviceFor (T n, L&& f) noexcept { For(n,std::forward(f)); } +template ::value> > +void HostDeviceFor (T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n,std::forward(f)); +} + template void HostDeviceFor (Box const& box, L&& f) noexcept { For(box,std::forward(f)); } +template +void HostDeviceFor (Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,std::forward(f)); +} + template ::value> > void HostDeviceFor (Box const& box, T ncomp, L&& f) noexcept { For(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceFor (Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,ncomp,std::forward(f)); +} + template void HostDeviceFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { For(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept @@ -400,6 +744,14 @@ void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -409,6 +761,16 @@ void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -422,30 +784,72 @@ void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void HostDeviceParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { ParallelFor(n,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(n,std::forward(f)); +} + template void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { ParallelFor(box,std::forward(f)); } +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,std::forward(f)); +} + template ::value> > void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { ParallelFor(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box,ncomp,std::forward(f)); +} + template void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, @@ -454,6 +858,15 @@ void HostDeviceParallelFor (Gpu::KernelInfo const&, ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceParallelFor (Gpu::KernelInfo const&, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -464,6 +877,17 @@ void HostDeviceParallelFor (Gpu::KernelInfo const&, ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -478,30 +902,73 @@ void HostDeviceParallelFor (Gpu::KernelInfo const&, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceParallelFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + ParallelFor(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void HostDeviceFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { For(n,std::forward(f)); } +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const&, T n, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(n,std::forward(f)); +} + template void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { For(box,std::forward(f)); } +template +void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,std::forward(f)); +} + template ::value> > void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { For(box,ncomp,std::forward(f)); } +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept +{ + amrex::ignore_unused(MT); + For(box,ncomp,std::forward(f)); +} + template void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { For(box1,box2,std::forward(f1),std::forward(f2)); } +template +void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,std::forward(f1),std::forward(f2)); +} + template void HostDeviceFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, @@ -510,6 +977,15 @@ void HostDeviceFor (Gpu::KernelInfo const&, For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } +template +void HostDeviceFor (Gpu::KernelInfo const&, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + template ::value>, typename M2=std::enable_if_t::value> > @@ -520,6 +996,17 @@ void HostDeviceFor (Gpu::KernelInfo const&, For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + template ::value>, typename M2=std::enable_if_t::value>, @@ -534,6 +1021,21 @@ void HostDeviceFor (Gpu::KernelInfo const&, box3,ncomp3,std::forward(f3)); } +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const&, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + amrex::ignore_unused(MT); + For(box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + template ::value> > void ParallelForRNG (T n, L&& f) noexcept { diff --git a/Src/Base/AMReX_GpuLaunchFunctsG.H b/Src/Base/AMReX_GpuLaunchFunctsG.H index 12206f69b70..7940b5589a0 100644 --- a/Src/Base/AMReX_GpuLaunchFunctsG.H +++ b/Src/Base/AMReX_GpuLaunchFunctsG.H @@ -64,11 +64,24 @@ void launch (int nblocks, int nthreads_per_block, gpuStream_t stream, L&& f) noe } } -template +template +void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream, + L&& f) noexcept +{ + launch(nblocks, MT, shared_mem_bytes, stream, std::forward(f)); +} + +template +void launch (int nblocks, gpuStream_t stream, L&& f) noexcept +{ + launch(nblocks, MT, stream, std::forward(f)); +} + +template void launch (T const& n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); + const auto ec = Gpu::makeExecutionConfig(n); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -139,11 +152,11 @@ namespace detail { } } -template ::value> > +template ::value> > void ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); + const auto ec = Gpu::makeExecutionConfig(n); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -186,7 +199,7 @@ void ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept } } -template +template void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { if (amrex::isEmpty(box)) return; @@ -195,7 +208,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -250,7 +263,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept } } -template ::value> > +template ::value> > void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { if (amrex::isEmpty(box)) return; @@ -259,7 +272,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) n const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -437,7 +450,7 @@ void ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept } } -template +template void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { if (amrex::isEmpty(box1) && amrex::isEmpty(box2)) return; @@ -452,7 +465,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& b const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -491,7 +504,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& b } } -template +template void ParallelFor (Gpu::KernelInfo const& /*info*/, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept @@ -513,7 +526,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -561,7 +574,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, } } -template ::value>, typename M2=std::enable_if_t::value> > void ParallelFor (Gpu::KernelInfo const& /*info*/, @@ -580,7 +593,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -623,7 +636,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, } } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > @@ -649,7 +662,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); + const auto ec = Gpu::makeExecutionConfig(ncells); int nthreads_per_block = ec.numThreads.x; int nthreads_total = nthreads_per_block * ec.numBlocks.x; auto& q = Gpu::Device::streamQueue(); @@ -709,16 +722,34 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/, template void single_task (gpuStream_t stream, L&& f) noexcept { - AMREX_LAUNCH_KERNEL(1, 1, 0, stream, + AMREX_LAUNCH_KERNEL(Gpu::Device::warp_size, 1, 1, 0, stream, [=] AMREX_GPU_DEVICE () noexcept {f();}); AMREX_GPU_ERROR_CHECK(); } +template +void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream, + L&& f) noexcept +{ + AMREX_LAUNCH_KERNEL(MT, nblocks, MT, shared_mem_bytes, stream, + [=] AMREX_GPU_DEVICE () noexcept { f(); }); + AMREX_GPU_ERROR_CHECK(); +} + +template +void launch (int nblocks, gpuStream_t stream, L&& f) noexcept +{ + AMREX_LAUNCH_KERNEL(MT, nblocks, MT, 0, stream, + [=] AMREX_GPU_DEVICE () noexcept { f(); }); + AMREX_GPU_ERROR_CHECK(); +} + template void launch (int nblocks, int nthreads_per_block, std::size_t shared_mem_bytes, gpuStream_t stream, L&& f) noexcept { - AMREX_LAUNCH_KERNEL(nblocks, nthreads_per_block, shared_mem_bytes, + AMREX_ASSERT(nthreads_per_block <= AMREX_GPU_MAX_THREADS); + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, nblocks, nthreads_per_block, shared_mem_bytes, stream, [=] AMREX_GPU_DEVICE () noexcept { f(); }); AMREX_GPU_ERROR_CHECK(); } @@ -729,12 +760,12 @@ void launch (int nblocks, int nthreads_per_block, gpuStream_t stream, L&& f) noe launch(nblocks, nthreads_per_block, 0, stream, std::forward(f)); } -template +template void launch (T const& n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(n); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (auto const i : Gpu::Range(n)) { f(i); @@ -793,13 +824,13 @@ namespace detail { } } -template ::value> > +template ::value> > std::enable_if_t::value> ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept { if (amrex::isEmpty(n)) return; - const auto ec = Gpu::ExecutionConfig(n); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(n); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (T i = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; i < n; i += stride) { @@ -809,7 +840,7 @@ ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value> ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept { @@ -819,8 +850,8 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) @@ -837,7 +868,7 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept AMREX_GPU_ERROR_CHECK(); } -template ::value> > +template ::value> > std::enable_if_t::value> ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept { @@ -847,8 +878,8 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept const auto len = amrex::length(box); const auto lenxy = len.x*len.y; const auto lenx = len.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -871,7 +902,8 @@ ParallelForRNG (T n, L&& f) noexcept if (amrex::isEmpty(n)) return; randState_t* rand_state = getRandState(); const auto ec = Gpu::ExecutionConfig(n); - AMREX_LAUNCH_KERNEL(amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, + amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { int tid = blockDim.x*blockIdx.x+threadIdx.x; @@ -896,7 +928,8 @@ ParallelForRNG (Box const& box, L&& f) noexcept const auto lenxy = len.x*len.y; const auto lenx = len.x; const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, + amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { int tid = blockDim.x*blockIdx.x+threadIdx.x; @@ -927,7 +960,8 @@ ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept const auto lenxy = len.x*len.y; const auto lenx = len.x; const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, + amrex::min(ec.numBlocks.x, Gpu::Device::maxBlocksPerLaunch()), ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { int tid = blockDim.x*blockIdx.x+threadIdx.x; @@ -948,7 +982,7 @@ ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept @@ -965,8 +999,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -993,7 +1027,7 @@ ParallelFor (Gpu::KernelInfo const&, AMREX_GPU_ERROR_CHECK(); } -template +template std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, Box const& box3, @@ -1016,8 +1050,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -1053,7 +1087,7 @@ ParallelFor (Gpu::KernelInfo const&, AMREX_GPU_ERROR_CHECK(); } -template ::value>, typename M2=std::enable_if_t::value> > std::enable_if_t::value && MaybeDeviceRunnable::value> @@ -1073,8 +1107,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len2xy = len2.x*len2.y; const auto len1x = len1.x; const auto len2x = len2.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -1105,7 +1139,7 @@ ParallelFor (Gpu::KernelInfo const&, AMREX_GPU_ERROR_CHECK(); } -template ::value>, typename M2=std::enable_if_t::value>, typename M3=std::enable_if_t::value> > @@ -1132,8 +1166,8 @@ ParallelFor (Gpu::KernelInfo const&, const auto len1x = len1.x; const auto len2x = len2.x; const auto len3x = len3.x; - const auto ec = Gpu::ExecutionConfig(ncells); - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + const auto ec = Gpu::makeExecutionConfig(ncells); + AMREX_LAUNCH_KERNEL(MT, ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { for (int icell = blockDim.x*blockIdx.x+threadIdx.x, stride = blockDim.x*gridDim.x; icell < ncells; icell += stride) { @@ -1183,29 +1217,127 @@ void single_task (L&& f) noexcept single_task(Gpu::gpuStream(), std::forward(f)); } +template +void launch (T const& n, L&& f) noexcept +{ + launch(n, std::forward(f)); +} + +template ::value> > +std::enable_if_t::value> +ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + ParallelFor(info, n, std::forward(f)); +} + +template +std::enable_if_t::value> +ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + ParallelFor(info, box, std::forward(f)); +} + +template ::value> > +std::enable_if_t::value> +ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(info, box, ncomp, std::forward(f)); +} + +template +std::enable_if_t::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(info, box1, box2, std::forward(f1), + std::forward(f2)); +} + +template +std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(info, box1, box2, box3, std::forward(f1), + std::forward(f2), std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +std::enable_if_t::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(info, box1, ncomp1, std::forward(f1), + box2, ncomp2, std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +std::enable_if_t::value && MaybeDeviceRunnable::value && MaybeDeviceRunnable::value> +ParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(info, box1, ncomp1, std::forward(f1), + box2, ncomp2, std::forward(f2), + box3, ncomp3, std::forward(f3)); +} + template ::value> > void For (Gpu::KernelInfo const& info, T n, L&& f) noexcept { - ParallelFor(info, n,std::forward(f)); + ParallelFor(info, n,std::forward(f)); +} + +template ::value> > +void For (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + ParallelFor(info, n,std::forward(f)); } template void For (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { - ParallelFor(info, box,std::forward(f)); + ParallelFor(info, box,std::forward(f)); +} + +template +void For (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + ParallelFor(info, box,std::forward(f)); } template ::value> > void For (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { - ParallelFor(info,box,ncomp,std::forward(f)); + ParallelFor(info,box,ncomp,std::forward(f)); +} + +template ::value> > +void For (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(info,box,ncomp,std::forward(f)); } template void For (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void For (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } template @@ -1213,7 +1345,15 @@ void For (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); + ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void For (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(info,box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } template (f1),box2,ncomp2,std::forward(f2)); + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (info, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(info, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1244,32 +1409,63 @@ void For (Gpu::KernelInfo const& info, template ::value> > void ParallelFor (T n, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); +} + +template ::value> > +void ParallelFor (T n, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); } template void ParallelFor (Box const& box, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); +} + +template +void ParallelFor (Box const& box, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box, std::forward(f)); } template ::value> > void ParallelFor (Box const& box, T ncomp, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); +} + +template ::value> > +void ParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } template void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } template void ParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void ParallelFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } template (f1),box2,ncomp2,std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (Gpu::KernelInfo{}, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1298,32 +1517,63 @@ void ParallelFor (Box const& box1, T1 ncomp1, L1&& f1, template ::value> > void For (T n, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, n,std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, n,std::forward(f)); +} + +template ::value> > +void For (T n, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, n,std::forward(f)); } template void For (Box const& box, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); + ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); +} + +template +void For (Box const& box, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box,std::forward(f)); } template ::value> > void For (Box const& box, T ncomp, L&& f) noexcept { - ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); +} + +template ::value> > +void For (Box const& box, T ncomp, L&& f) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } template void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void For (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } template void For (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void For (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,box2,box3,std::forward(f1),std::forward(f2),std::forward(f3)); } template (f1),box2,ncomp2,std::forward(f2)); + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + ParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (Gpu::KernelInfo{}, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void For (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + ParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1354,10 +1627,30 @@ std::enable_if_t::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,n,std::forward(f)); + ParallelFor(info,n,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else AMREX_PRAGMA_SIMD for (T i = 0; i < n; ++i) f(i); +#endif + } +} + +template ::value> > +std::enable_if_t::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info,n,std::forward(f)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + AMREX_PRAGMA_SIMD + for (T i = 0; i < n; ++i) f(i); +#endif } } @@ -1365,7 +1658,14 @@ template ::value> HostDeviceParallelFor (T n, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); +} + +template ::value> > +std::enable_if_t::value> +HostDeviceParallelFor (T n, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{}, n, std::forward(f)); } template @@ -1373,9 +1673,28 @@ std::enable_if_t::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info, box,std::forward(f)); + ParallelFor(info, box,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box,std::forward(f)); +#endif + } +} + +template +std::enable_if_t::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info, box,std::forward(f)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + LoopConcurrentOnCpu(box,std::forward(f)); +#endif } } @@ -1384,9 +1703,28 @@ std::enable_if_t::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info, box,ncomp,std::forward(f)); + ParallelFor(info, box,ncomp,std::forward(f)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box,ncomp,std::forward(f)); +#endif + } +} + +template ::value> > +std::enable_if_t::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info, box,ncomp,std::forward(f)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + LoopConcurrentOnCpu(box,ncomp,std::forward(f)); +#endif } } @@ -1396,26 +1734,51 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,std::forward(f1)); LoopConcurrentOnCpu(box2,std::forward(f2)); +#endif } } -template +template +std::enable_if_t::value && MaybeHostDeviceRunnable::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + LoopConcurrentOnCpu(box1,std::forward(f1)); + LoopConcurrentOnCpu(box2,std::forward(f2)); +#endif + } +} + +template std::enable_if_t::value && MaybeHostDeviceRunnable::value && MaybeHostDeviceRunnable::value> HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,box1,box2,box3, + ParallelFor(info,box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,std::forward(f1)); LoopConcurrentOnCpu(box2,std::forward(f2)); LoopConcurrentOnCpu(box3,std::forward(f3)); +#endif } } @@ -1428,10 +1791,34 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box2, T2 ncomp2, L2&& f2) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); + LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); +#endif + } +} + +template ::value>, + typename M2=std::enable_if_t::value> > +std::enable_if_t::value && MaybeHostDeviceRunnable::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); +#endif } } @@ -1446,40 +1833,95 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box3, T3 ncomp3, L3&& f3) noexcept { if (Gpu::inLaunchRegion()) { - ParallelFor(info, + ParallelFor(info, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); LoopConcurrentOnCpu(box3,ncomp3,std::forward(f3)); +#endif + } +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +std::enable_if_t::value && MaybeHostDeviceRunnable::value && MaybeHostDeviceRunnable::value> +HostDeviceParallelFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + if (Gpu::inLaunchRegion()) { + ParallelFor(info, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); + } else { +#ifdef AMREX_USE_DPCPP + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); +#else + LoopConcurrentOnCpu(box1,ncomp1,std::forward(f1)); + LoopConcurrentOnCpu(box2,ncomp2,std::forward(f2)); + LoopConcurrentOnCpu(box3,ncomp3,std::forward(f3)); +#endif } } template ::value> > void HostDeviceFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept { - HostDeviceParallelFor(info,n,std::forward(f)); + HostDeviceParallelFor(info,n,std::forward(f)); +} + +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept +{ + HostDeviceParallelFor(info,n,std::forward(f)); } template void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept { - HostDeviceParallelFor(info,box,std::forward(f)); + HostDeviceParallelFor(info,box,std::forward(f)); +} + +template +void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept +{ + HostDeviceParallelFor(info,box,std::forward(f)); } template ::value> > void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept { - HostDeviceParallelFor(info,box,ncomp,std::forward(f)); + HostDeviceParallelFor(info,box,ncomp,std::forward(f)); +} + +template ::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept +{ + HostDeviceParallelFor(info,box,ncomp,std::forward(f)); } template void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); + HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + HostDeviceParallelFor(info,box1,box2,std::forward(f1),std::forward(f2)); } template @@ -1487,7 +1929,16 @@ void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - HostDeviceParallelFor(info, box1,box2,box3, + HostDeviceParallelFor(info, box1,box2,box3, + std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + HostDeviceParallelFor(info, box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } @@ -1498,7 +1949,17 @@ void HostDeviceFor (Gpu::KernelInfo const& info, Box const& box1, T1 ncomp1, L1&& f1, Box const& box2, T2 ncomp2, L2&& f2) noexcept { - HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); + HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + HostDeviceParallelFor(info,box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (info, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceFor (Gpu::KernelInfo const& info, + Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + HostDeviceParallelFor(info, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); @@ -1519,32 +1995,64 @@ void HostDeviceFor (Gpu::KernelInfo const& info, template ::value> > void HostDeviceParallelFor (T n, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},n,std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{},n,std::forward(f)); +} + +template ::value> > +void HostDeviceParallelFor (T n, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},n,std::forward(f)); } template void HostDeviceParallelFor (Box const& box, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); +} + +template +void HostDeviceParallelFor (Box const& box, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box,std::forward(f)); } template ::value> > void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); + HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); +} + +template ::value> > +void HostDeviceParallelFor (Box const& box, T ncomp, L&& f) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box,ncomp,std::forward(f)); } template void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); + HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); +} + +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box1,box2,std::forward(f1),std::forward(f2)); } template void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, L1&& f1, L2&& f2, L3&& f3) noexcept { - HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, + HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, + std::forward(f1),std::forward(f2),std::forward(f3)); +} + +template +void HostDeviceParallelFor (Box const& box1, Box const& box2, Box const& box3, + L1&& f1, L2&& f2, L3&& f3) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{}, box1,box2,box3, std::forward(f1),std::forward(f2),std::forward(f3)); } @@ -1554,7 +2062,16 @@ template (f1),box2,ncomp2,std::forward(f2)); + HostDeviceParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); +} + +template ::value>, + typename M2=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{},box1,ncomp1,std::forward(f1),box2,ncomp2,std::forward(f2)); } template (Gpu::KernelInfo{}, + box1,ncomp1,std::forward(f1), + box2,ncomp2,std::forward(f2), + box3,ncomp3,std::forward(f3)); +} + +template ::value>, + typename M2=std::enable_if_t::value>, + typename M3=std::enable_if_t::value> > +void HostDeviceParallelFor (Box const& box1, T1 ncomp1, L1&& f1, + Box const& box2, T2 ncomp2, L2&& f2, + Box const& box3, T3 ncomp3, L3&& f3) noexcept +{ + HostDeviceParallelFor(Gpu::KernelInfo{}, box1,ncomp1,std::forward(f1), box2,ncomp2,std::forward(f2), box3,ncomp3,std::forward(f3)); diff --git a/Src/Base/AMReX_GpuLaunchMacrosG.H b/Src/Base/AMReX_GpuLaunchMacrosG.H index 89aa1f24bc9..e1c643454bc 100644 --- a/Src/Base/AMReX_GpuLaunchMacrosG.H +++ b/Src/Base/AMReX_GpuLaunchMacrosG.H @@ -29,10 +29,16 @@ } \ } \ else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }}} + +#if 0 for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \ block \ } \ }}} +#endif + #else #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE(TN,TI,block) \ { auto const& amrex_i_tn = TN; \ @@ -40,7 +46,7 @@ if (amrex::Gpu::inLaunchRegion()) \ { \ const auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \ - AMREX_LAUNCH_KERNEL(amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \ block \ @@ -93,6 +99,10 @@ } \ } \ else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }}} + +#if 0 for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ block1 \ } \ @@ -100,6 +110,8 @@ block2 \ } \ }}} +#endif + #else #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(TN1,TI1,block1,TN2,TI2,block2) \ { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; \ @@ -111,7 +123,7 @@ dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \ amrex_i_ec2.numBlocks.x); \ amrex_i_nblocks.y = 2; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -179,6 +191,10 @@ } \ } \ else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + }}} + +#if 0 for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ block1 \ } \ @@ -189,6 +205,8 @@ block3 \ } \ }}} +#endif + #else #define AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(TN1,TI1,block1,TN2,TI2,block2,TN3,TI3,block3) \ { auto const& amrex_i_tn1 = TN1; auto const& amrex_i_tn2 = TN2; auto const& amrex_i_tn3 = TN3; \ @@ -202,7 +220,7 @@ amrex_i_ec2.numBlocks.x), \ amrex_i_ec3.numBlocks.x); \ amrex_i_nblocks.y = 3; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -269,7 +287,7 @@ if (amrex::Gpu::inLaunchRegion()) \ { \ auto amrex_i_ec = amrex::Gpu::ExecutionConfig(amrex_i_tn); \ - AMREX_LAUNCH_KERNEL(amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_ec.numBlocks, amrex_i_ec.numThreads, amrex_i_ec.sharedMem, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ for (auto const TI : amrex::Gpu::Range(amrex_i_tn)) { \ block \ @@ -333,7 +351,7 @@ dim3 amrex_i_nblocks = amrex::max(amrex_i_ec1.numBlocks.x, \ amrex_i_ec2.numBlocks.x); \ amrex_i_nblocks.y = 2; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -410,7 +428,7 @@ amrex_i_ec2.numBlocks.x), \ amrex_i_ec3.numBlocks.x); \ amrex_i_nblocks.y = 3; \ - AMREX_LAUNCH_KERNEL(amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, amrex_i_nblocks, amrex_i_ec1.numThreads, 0, amrex::Gpu::gpuStream(), \ [=] AMREX_GPU_DEVICE () noexcept { \ switch (blockIdx.y) { \ case 0: for (auto const TI1 : amrex::Gpu::Range(amrex_i_tn1)) { \ @@ -434,6 +452,18 @@ // FOR_1D +#ifdef AMREX_USE_DPCPP +#define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \ +{ \ + auto const& amrex_i_n = n; \ + using amrex_i_inttype = typename std::remove_const::type; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else #define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \ { \ auto const& amrex_i_n = n; \ @@ -446,6 +476,7 @@ for (amrex_i_inttype i = 0; i < amrex_i_n; ++i) amrex_i_lambda(i); \ } \ } +#endif #define AMREX_GPU_DEVICE_FOR_1D(n,i,block) \ { \ @@ -455,6 +486,17 @@ // FOR_3D +#ifdef AMREX_USE_DPCPP +#define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \ +{ \ + auto const& amrex_i_box = box; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else #define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \ { \ auto const& amrex_i_box = box; \ @@ -464,6 +506,7 @@ amrex::LoopConcurrentOnCpu(amrex_i_box,[=] (int i, int j, int k) noexcept block); \ } \ } +#endif #define AMREX_GPU_DEVICE_FOR_3D(box,i,j,k,block) \ { \ @@ -472,6 +515,18 @@ // FOR_4D +#ifdef AMREX_USE_DPCPP +#define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ +{ \ + auto const& amrex_i_box = box; \ + auto const& amrex_i_ncomp = ncomp; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else #define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ { \ auto const& amrex_i_box = box; \ @@ -482,6 +537,7 @@ amrex::LoopConcurrentOnCpu(amrex_i_box,amrex_i_ncomp,[=] (int i, int j, int k, int n) noexcept block); \ } \ } +#endif #define AMREX_GPU_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ { \ diff --git a/Src/Base/AMReX_GpuQualifiers.H b/Src/Base/AMReX_GpuQualifiers.H index ce07a3e52c2..b5d5ea58fbd 100644 --- a/Src/Base/AMReX_GpuQualifiers.H +++ b/Src/Base/AMReX_GpuQualifiers.H @@ -41,10 +41,6 @@ # include -namespace amrex { - namespace oneapi = sycl::ext::oneapi; -} - # define AMREX_REQUIRE_SUBGROUP_SIZE(x) \ _Pragma("clang diagnostic push") \ _Pragma("clang diagnostic ignored \"-Wattributes\"") \ diff --git a/Src/Base/AMReX_GpuReduce.H b/Src/Base/AMReX_GpuReduce.H index 9b48138940c..7b9b0e42355 100644 --- a/Src/Base/AMReX_GpuReduce.H +++ b/Src/Base/AMReX_GpuReduce.H @@ -8,6 +8,7 @@ #include #include #include +#include #if !defined(AMREX_USE_CUB) && defined(AMREX_USE_CUDA) && defined(__CUDACC__) && (__CUDACC_VER_MAJOR__ >= 11) #define AMREX_USE_CUB 1 @@ -54,10 +55,10 @@ template struct warpReduce { AMREX_GPU_DEVICE AMREX_FORCE_INLINE - T operator() (T x, amrex::oneapi::sub_group const& sg) const noexcept + T operator() (T x, sycl::sub_group const& sg) const noexcept { for (int offset = warpSize/2; offset > 0; offset /= 2) { - T y = sg.shuffle_down(x, offset); + T y = sycl::shift_group_left(sg, x, offset); x = F()(x,y); } return x; @@ -70,7 +71,7 @@ T blockReduce (T x, WARPREDUCE && warp_reduce, T x0, Gpu::Handler const& h) { T* shared = (T*)h.local; int tid = h.item->get_local_id(0); - amrex::oneapi::sub_group const& sg = h.item->get_sub_group(); + sycl::sub_group const& sg = h.item->get_sub_group(); int lane = sg.get_local_id()[0]; int wid = sg.get_group_id()[0]; int numwarps = sg.get_group_range()[0]; @@ -93,7 +94,7 @@ AMREX_GPU_DEVICE AMREX_FORCE_INLINE void blockReduce_partial (T* dest, T x, WARPREDUCE && warp_reduce, ATOMICOP && atomic_op, Gpu::Handler const& handler) { - amrex::oneapi::sub_group const& sg = handler.item->get_sub_group(); + sycl::sub_group const& sg = handler.item->get_sub_group(); int wid = sg.get_group_id()[0]; if ((wid+1)*warpSize <= handler.numActiveThreads) { x = warp_reduce(x, sg); // full warp @@ -249,15 +250,54 @@ void deviceReduceLogicalOr (int * dest, int source, Gpu::Handler const& h) noexc #elif defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) +namespace detail { + +template +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +T shuffle_down (T x, int offset) noexcept +{ + return AMREX_HIP_OR_CUDA(__shfl_down(x, offset), + __shfl_down_sync(0xffffffff, x, offset)); +} + +// If other sizeof is needed, we can implement it later. +template = 0> +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +T multi_shuffle_down (T x, int offset) noexcept +{ + constexpr int nwords = (sizeof(T) + sizeof(unsigned int) - 1) / sizeof(unsigned int); + T y; + auto py = reinterpret_cast(&y); + auto px = reinterpret_cast(&x); + for (int i = 0; i < nwords; ++i) { + py[i] = shuffle_down(px[i],offset); + } + return y; +} + +} + template struct warpReduce { + // Not all arithmetic types can be taken by shuffle_down, but it's good enough. + template ::value,int> = 0> + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + T operator() (T x) const noexcept + { + for (int offset = warpSize/2; offset > 0; offset /= 2) { + T y = detail::shuffle_down(x, offset); + x = F()(x,y); + } + return x; + } + + template ::value,int> = 0> AMREX_GPU_DEVICE AMREX_FORCE_INLINE T operator() (T x) const noexcept { for (int offset = warpSize/2; offset > 0; offset /= 2) { - AMREX_HIP_OR_CUDA(T y = __shfl_down(x, offset);, - T y = __shfl_down_sync(0xffffffff, x, offset); ) + T y = detail::multi_shuffle_down(x, offset); x = F()(x,y); } return x; diff --git a/Src/Base/AMReX_GpuTypes.H b/Src/Base/AMReX_GpuTypes.H index 737a47e665c..12b8fbc1829 100644 --- a/Src/Base/AMReX_GpuTypes.H +++ b/Src/Base/AMReX_GpuTypes.H @@ -8,7 +8,6 @@ #ifdef AMREX_USE_DPCPP #include -namespace sycl = cl::sycl; #endif namespace amrex { diff --git a/Src/Base/AMReX_MFIter.H b/Src/Base/AMReX_MFIter.H index eb259ac7b6d..9c01e38b138 100644 --- a/Src/Base/AMReX_MFIter.H +++ b/Src/Base/AMReX_MFIter.H @@ -164,6 +164,8 @@ public: static int allowMultipleMFIters (int allow); + void Finalize (); + protected: std::unique_ptr m_fa; //!< This must be the first member! @@ -180,6 +182,7 @@ protected: IndexType typ; bool dynamic; + bool finalized = false; struct DeviceSync { DeviceSync () = default; diff --git a/Src/Base/AMReX_MFIter.cpp b/Src/Base/AMReX_MFIter.cpp index e8a97256d3d..c761c466449 100644 --- a/Src/Base/AMReX_MFIter.cpp +++ b/Src/Base/AMReX_MFIter.cpp @@ -209,6 +209,19 @@ MFIter::MFIter (const FabArrayBase& fabarray_, const MFItInfo& info) MFIter::~MFIter () { + Finalize(); +} + +void +MFIter::Finalize () +{ + // avoid double finalize + if (finalized) return; + finalized = true; + + // mark as invalid + currentIndex = endIndex; + #ifdef AMREX_USE_OMP #pragma omp master #endif @@ -237,6 +250,9 @@ MFIter::~MFIter () #endif m_fa->clearThisBD(); } + if (m_fa) { + m_fa.reset(nullptr); + } } void diff --git a/Src/Base/AMReX_MPMD.H b/Src/Base/AMReX_MPMD.H new file mode 100644 index 00000000000..2b8ef399866 --- /dev/null +++ b/Src/Base/AMReX_MPMD.H @@ -0,0 +1,178 @@ +#ifndef AMREX_MPMD_H_ +#define AMREX_MPMD_H_ +#include + +#ifdef AMREX_USE_MPI + +#include + +#include + +namespace amrex { namespace MPMD { + +MPI_Comm Initialize (int argc, char* argv[]); + +void Finalize (); + +bool Initialized (); + +int MyProc (); //! Process ID in MPI_COMM_WORLD +int NProcs (); //! Number of processes in MPI_COMM_WORLD +int MyProgId (); //! Program ID + +class Copier +{ +public: + Copier (BoxArray const& ba, DistributionMapping const& dm); + + template + void send (FabArray const& fa, int icomp, int ncomp) const; + + template + void recv (FabArray& fa, int icomp, int ncomp) const; + +private: + std::map m_SndTags; + std::map m_RcvTags; +}; + +template +void Copier::send (FabArray const& mf, int icomp, int ncomp) const +{ + const int N_snds = m_SndTags.size(); + + if (N_snds == 0) return; + + // Prepare buffer + + Vector send_data; + Vector send_size; + Vector send_rank; + Vector send_reqs; + Vector send_cctc; + + Vector offset; + std::size_t total_volume = 0; + for (auto const& kv : m_SndTags) { + auto const& cctc = kv.second; + + std::size_t nbytes = 0; + for (auto const& cct : cctc) { + nbytes += cct.sbox.numPts() * ncomp * sizeof(typename FAB::value_type); + } + + std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes); + nbytes = amrex::aligned_size(acd, nbytes); // so that bytes are aligned + + // Also need to align the offset properly + total_volume = amrex::aligned_size(std::max(alignof(typename FAB::value_type), + acd), total_volume); + + offset.push_back(total_volume); + total_volume += nbytes; + + send_data.push_back(nullptr); + send_size.push_back(nbytes); + send_rank.push_back(kv.first); + send_reqs.push_back(MPI_REQUEST_NULL); + send_cctc.push_back(&cctc); + } + + Gpu::PinnedVector send_buffer(total_volume); + char* the_send_data = send_buffer.data(); + for (int i = 0; i < N_snds; ++i) { + send_data[i] = the_send_data + offset[i]; + } + + // Pack buffer +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && (mf.arena()->isDevice() || mf.arena()->isManaged())) { + mf.pack_send_buffer_gpu(mf, icomp, ncomp, send_data, send_size, send_cctc); + } else +#endif + { + mf.pack_send_buffer_cpu(mf, icomp, ncomp, send_data, send_size, send_cctc); + } + + // Send + for (int i = 0; i < N_snds; ++i) { + send_reqs[i] = ParallelDescriptor::Asend + (send_data[i], send_size[i], send_rank[i], 100, MPI_COMM_WORLD).req(); + } + Vector stats(N_snds); + ParallelDescriptor::Waitall(send_reqs, stats); +} + +template +void Copier::recv (FabArray& mf, int icomp, int ncomp) const +{ + const int N_rcvs = m_RcvTags.size(); + + if (N_rcvs == 0) return; + + // Prepare buffer + + Vector recv_data; + Vector recv_size; + Vector recv_from; + Vector recv_reqs; + + Vector offset; + std::size_t TotalRcvsVolume = 0; + for (auto const& kv : m_RcvTags) { + std::size_t nbytes = 0; + for (auto const& cct : kv.second) { + nbytes += cct.dbox.numPts() * ncomp * sizeof(typename FAB::value_type); + } + + std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes); + nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned + + // Also need to align the offset properly + TotalRcvsVolume = amrex::aligned_size(std::max(alignof(typename FAB::value_type), + acd), TotalRcvsVolume); + + offset.push_back(TotalRcvsVolume); + TotalRcvsVolume += nbytes; + + recv_data.push_back(nullptr); + recv_size.push_back(nbytes); + recv_from.push_back(kv.first); + recv_reqs.push_back(MPI_REQUEST_NULL); + } + + Gpu::PinnedVector recv_buffer(TotalRcvsVolume); + char* the_recv_data = recv_buffer.data(); + + // Recv + for (int i = 0; i < N_rcvs; ++i) { + recv_data[i] = the_recv_data + offset[i]; + recv_reqs[i] = ParallelDescriptor::Arecv + (recv_data[i], recv_size[i], recv_from[i], 100, MPI_COMM_WORLD).req(); + } + + Vector recv_cctc(N_rcvs, nullptr); + for (int i = 0; i < N_rcvs; ++i) { + recv_cctc[i] = &(m_RcvTags.at(recv_from[i])); + } + + Vector stats(N_rcvs); + ParallelDescriptor::Waitall(recv_reqs, stats); + + // Unpack buffer +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion() && (mf.arena()->isDevice() || mf.arena()->isManaged())) { + mf.unpack_recv_buffer_gpu(mf, icomp, ncomp, recv_data, recv_size, recv_cctc, + FabArrayBase::COPY, true); + } else +#endif + { + mf.unpack_recv_buffer_cpu(mf, icomp, ncomp, recv_data, recv_size, recv_cctc, + FabArrayBase::COPY, true); + } +} + +}} + +#endif +#endif diff --git a/Src/Base/AMReX_MPMD.cpp b/Src/Base/AMReX_MPMD.cpp new file mode 100644 index 00000000000..917c741c2a6 --- /dev/null +++ b/Src/Base/AMReX_MPMD.cpp @@ -0,0 +1,225 @@ +#include +#include + +#include +#include +#include +#include +#include + +#ifdef AMREX_USE_MPI + +namespace amrex { namespace MPMD { + +namespace { + bool initialized = false; + bool mpi_initialized_by_us = false; + MPI_Comm app_comm = MPI_COMM_NULL; + int myproc; + int nprocs; +} + +namespace { + +template +int num_unique_elements (std::vector& v) +{ + std::sort(v.begin(), v.end()); + auto last = std::unique(v.begin(), v.end()); + return last - v.begin(); +} + +} + +MPI_Comm Initialize (int argc, char* argv[]) +{ + initialized = true; + int flag; + MPI_Initialized(&flag); + if (!flag) { + MPI_Init(&argc, &argv); + mpi_initialized_by_us = true; + } + + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + int* p; + MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_APPNUM, &p, &flag); + int appnum = *p; + + std::vector all_appnum(nprocs); + MPI_Allgather(&appnum, 1, MPI_INT, all_appnum.data(), 1, MPI_INT, MPI_COMM_WORLD); + int napps = num_unique_elements(all_appnum); + + // MPI_APPNUM does not appear to work with slurm on some systems. + if (napps != 2) { + std::vector all_argc(nprocs); + MPI_Allgather(&argc, 1, MPI_INT, all_argc.data(), 1, MPI_INT, MPI_COMM_WORLD); + napps = num_unique_elements(all_argc); + if (napps == 2) { + appnum = static_cast(argc != all_argc[0]); + } + } + + if (napps != 2) { + std::string exename; + if (argc > 0) { + exename = std::string(argv[0]); + } + unsigned long long hexe = std::hash{}(exename); + std::vector all_hexe(nprocs); + MPI_Allgather(&hexe, 1, MPI_UNSIGNED_LONG_LONG, + all_hexe.data(), 1, MPI_UNSIGNED_LONG_LONG, MPI_COMM_WORLD); + napps = num_unique_elements(all_hexe); + if (napps == 2) { + appnum = static_cast(hexe != all_hexe[0]); + } + } + + if (napps == 2) { + MPI_Comm_split(MPI_COMM_WORLD, appnum, myproc, &app_comm); + } else { + std::cout << "amrex::MPMD only supports two programs." << std::endl; + MPI_Abort(MPI_COMM_WORLD, 1); + } + + return app_comm; +} + +void Finalize () +{ + MPI_Comm_free(&app_comm); + if (mpi_initialized_by_us) { + MPI_Finalize(); + mpi_initialized_by_us = false; + } + initialized = false; +} + +bool Initialized () { return initialized; } + +int MyProc () +{ + return myproc; +} + +int NProcs () +{ + return nprocs; +} + +int MyProgId () +{ + return (myproc == ParallelDescriptor::MyProc()) ? 0 : 1; +} + +Copier::Copier (BoxArray const& ba, DistributionMapping const& dm) +{ + int rank_offset = myproc - ParallelDescriptor::MyProc(); + int this_root, other_root; + if (rank_offset == 0) { // First program + this_root = 0; + other_root = ParallelDescriptor::NProcs(); + } else { + this_root = rank_offset; + other_root = 0; + } + + Vector bv = ba.boxList().data(); + + int this_nboxes = ba.size(); + Vector procs = dm.ProcessorMap(); + if (rank_offset != 0) { + for (int i = 0; i < this_nboxes; ++i) { + procs[i] += rank_offset; + } + } + + Vector obv; + Vector oprocs; + int other_nboxes; + if (myproc == this_root) { + if (rank_offset == 0) // the first program + { + MPI_Send(&this_nboxes, 1, MPI_INT, other_root, 0, MPI_COMM_WORLD); + MPI_Recv(&other_nboxes, 1, MPI_INT, other_root, 1, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + obv.resize(other_nboxes); + MPI_Send(bv.data(), this_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 2, MPI_COMM_WORLD); + MPI_Recv(obv.data(), other_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + oprocs.resize(other_nboxes); + MPI_Send(procs.data(), this_nboxes, MPI_INT, other_root, 4, MPI_COMM_WORLD); + MPI_Recv(oprocs.data(), other_nboxes, MPI_INT, other_root, 5, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + else // the second program + { + MPI_Recv(&other_nboxes, 1, MPI_INT, other_root, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Send(&this_nboxes, 1, MPI_INT, other_root, 1, MPI_COMM_WORLD); + obv.resize(other_nboxes); + MPI_Recv(obv.data(), other_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Send(bv.data(), this_nboxes, + ParallelDescriptor::Mpi_typemap::type(), + other_root, 3, MPI_COMM_WORLD); + oprocs.resize(other_nboxes); + MPI_Recv(oprocs.data(), other_nboxes, MPI_INT, other_root, 4, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + MPI_Send(procs.data(), this_nboxes, MPI_INT, other_root, 5, MPI_COMM_WORLD); + } + } + + ParallelDescriptor::Bcast(&other_nboxes, 1); + if (obv.empty()) { + obv.resize(other_nboxes); + oprocs.resize(other_nboxes); + } + ParallelDescriptor::Bcast(obv.data(), obv.size()); + ParallelDescriptor::Bcast(oprocs.data(), oprocs.size()); + + BoxArray oba(BoxList(std::move(obv))); + + // At this point, ba and bv hold our boxes, and oba holds the other + // program's boxes. procs holds mpi ranks of our boxes, and oprocs holds + // mpi ranks of the other program's boxes. All mpi ranks are in + // MPI_COMM_WORLD. + + // Build communication meta-data + + AMREX_ALWAYS_ASSERT(ba.ixType().cellCentered()); + + std::vector > isects; + + for (int i = 0; i < this_nboxes; ++i) { + if (procs[i] == myproc) { + oba.intersections(bv[i], isects); + for (auto const& isec : isects) { + const int oi = isec.first; + const Box& bx = isec.second; + const int orank = oprocs[oi]; + m_SndTags[orank].push_back + (FabArrayBase::CopyComTag(bx, bx, oi, i)); + m_RcvTags[orank].push_back + (FabArrayBase::CopyComTag(bx, bx, i, oi)); + } + } + } + + for (auto& kv : m_SndTags) { + std::sort(kv.second.begin(), kv.second.end()); + } + for (auto& kv : m_RcvTags) { + std::sort(kv.second.begin(), kv.second.end()); + } +} + +}} + +#endif diff --git a/Src/Base/AMReX_Math.H b/Src/Base/AMReX_Math.H index 7996830d534..3eed941fb00 100644 --- a/Src/Base/AMReX_Math.H +++ b/Src/Base/AMReX_Math.H @@ -9,7 +9,6 @@ #ifdef AMREX_USE_DPCPP #include -namespace sycl = cl::sycl; #endif namespace amrex { inline namespace disabled { diff --git a/Src/Base/AMReX_MultiFab.H b/Src/Base/AMReX_MultiFab.H index dfb75dacbf9..70e6facaee7 100644 --- a/Src/Base/AMReX_MultiFab.H +++ b/Src/Base/AMReX_MultiFab.H @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef AMREX_USE_EB #include @@ -190,7 +191,7 @@ public: /** * \brief Returns the maximum *absolute* values contained in - * each component of "comps" of the MultiFab. No ghost cells are used. + * each component of "comps" of the MultiFab. "nghost" ghost cells are used. */ Vector norm0 (const Vector& comps, int nghost = 0, bool local = false, bool ignore_covered = false ) const; Vector norminf (const Vector& comps, int nghost = 0, bool local = false, bool ignore_covered = false) const { @@ -232,6 +233,13 @@ public: */ Real sum (int comp = 0, bool local = false) const; /** + * \brief Same as sum with local=false, but for non-cell-centered data, this + * skips non-unique points that are owned by multiple boxes. + */ + Real sum_unique (int comp = 0, + bool local = false, + const Periodicity& period = Periodicity::NonPeriodic()) const; + /** * \brief Adds the scalar value val to the value of each cell in the * specified subregion of the MultiFab. The subregion consists * of the num_comp components starting at component comp. diff --git a/Src/Base/AMReX_MultiFab.cpp b/Src/Base/AMReX_MultiFab.cpp index 9e2f37adf37..83664b307d4 100644 --- a/Src/Base/AMReX_MultiFab.cpp +++ b/Src/Base/AMReX_MultiFab.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef AMREX_MEM_PROFILING #include @@ -1586,6 +1587,58 @@ MultiFab::sum (int comp, bool local) const return sm; } +Real +MultiFab::sum_unique (int comp, + bool local, + const Periodicity& period) const +{ + BL_PROFILE("MultiFab::sum_unique()"); + + // no duplicatly distributed points if cell centered + if (ixType().cellCentered()) + return this->sum(comp, local); + + // Owner is the grid with the lowest grid number containing the data + std::unique_ptr owner_mask = OwnerMask(period); + + Real sm = Real(0.0); +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = this->const_arrays(); + auto const& msk = owner_mask->const_arrays(); + sm = ParReduce(TypeList{}, TypeList{}, *this, IntVect(0), + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> GpuTuple + { + return msk[box_no](i,j,k) ? ma[box_no](i,j,k,comp) : 0.0_rt; + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm) +#endif + for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.tilebox(); + Array4 const& a = this->const_array(mfi); + Array4 const& msk = owner_mask->const_array(mfi); + Real tmp = 0.0_rt; + AMREX_LOOP_3D(bx, i, j, k, + { + tmp += msk(i,j,k) ? a(i,j,k,comp) : 0.0_rt; + }); + sm += tmp; // Do it this way so that it does not break regression tests. + } + } + + if (!local) { + ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub()); + } + + return sm; +} + void MultiFab::minus (const MultiFab& mf, int strt_comp, int num_comp, int nghost) { diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H index 1444bb90484..21f89c8ed6c 100644 --- a/Src/Base/AMReX_MultiFabUtil.H +++ b/Src/Base/AMReX_MultiFabUtil.H @@ -231,6 +231,35 @@ namespace amrex */ Gpu::HostVector sumToLine (MultiFab const& mf, int icomp, int ncomp, Box const& domain, int direction, bool local = false); + + /** \brief Volume weighted sum for a vector of MultiFabs + * + * Return a volume weighted sum of MultiFabs of AMR data. The sum is + * perform on a single component of the data. If the MultiFabs are + * built with EB Factories, the cut cell volume fraction will be + * included in the weight. + */ + Real volumeWeightedSum (Vector const& mf, int icomp, + Vector const& geom, + Vector const& ratio, + bool local = false); + + /** + * \brief Fourth-order interpolation from fine to coarse level. + * + * This is for high-order "average-down" of finite-difference data. If + * ghost cell data are used, it's the caller's responsibility to fill + * the ghost cells before calling this function. + * + * \param cmf coarse data + * \param scomp starting component + * \param ncomp number of component + * \param fmf fine data + * \param ratio refinement ratio. + */ + void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp, + MultiFab const& fmf, + IntVect const& ratio); } namespace amrex { diff --git a/Src/Base/AMReX_MultiFabUtil.cpp b/Src/Base/AMReX_MultiFabUtil.cpp index 26a7242e89d..3ae4aa91b9f 100644 --- a/Src/Base/AMReX_MultiFabUtil.cpp +++ b/Src/Base/AMReX_MultiFabUtil.cpp @@ -1226,4 +1226,245 @@ namespace amrex } return hv; } + + Real volumeWeightedSum (Vector const& mf, int icomp, + Vector const& geom, + Vector const& ratio, + bool local) + { + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + +#ifdef AMREX_USE_EB + bool has_eb = !(mf[0]->isAllRegular()); +#endif + + int nlevels = mf.size(); + for (int ilev = 0; ilev < nlevels-1; ++ilev) { + iMultiFab mask = makeFineMask(*mf[ilev], *mf[ilev+1], IntVect(0), + ratio[ilev],Periodicity::NonPeriodic(), + 0, 1); + auto const& m = mask.const_arrays(); + auto const& a = mf[ilev]->const_arrays(); + auto const dx = geom[ilev].CellSizeArray(); + Real dv = AMREX_D_TERM(dx[0],*dx[1],*dx[2]); +#ifdef AMREX_USE_EB + if (has_eb) { + AMREX_ASSERT(mf[ilev]->hasEBFabFactory()); + auto const& f = dynamic_cast + (mf[ilev]->Factory()); + auto const& vfrac = f.getVolFrac(); + auto const& va = vfrac.const_arrays(); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> Real + { + return m[box_no](i,j,k) ? Real(0.) + : dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k); + }); + } else +#endif + { +#if (AMREX_SPACEDIM == 1) + if (geom[ilev].IsSPHERICAL()) { + const auto rlo = geom[ilev].CellSize(0); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + if (m[box_no](i,j,k)) { + return Real(0.); + } else { + constexpr Real pi = Real(3.1415926535897932); + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + return Real(4./3.)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri) + * a[box_no](i,j,k,icomp); + } + }); + } else +#elif (AMREX_SPACEDIM == 2) + if (geom[ilev].IsRZ()) { + const auto rlo = geom[ilev].CellSize(0); + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + if (m[box_no](i,j,k)) { + return Real(0.); + } else { + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + constexpr Real pi = Real(3.1415926535897932); + return pi*dx[1]*dx[0]*(ro+ri) + * a[box_no](i,j,k,icomp); + } + }); + } else +#endif + { + reduce_op.eval(*mf[ilev], IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + return m[box_no](i,j,k) ? Real(0.) + : dv*a[box_no](i,j,k,icomp); + }); + } + } + Gpu::streamSynchronize(); + } + + auto const& a = mf.back()->const_arrays(); + auto const dx = geom[nlevels-1].CellSizeArray(); + Real dv = AMREX_D_TERM(dx[0],*dx[1],*dx[2]); +#ifdef AMREX_USE_EB + if (has_eb) { + AMREX_ASSERT(mf.back()->hasEBFabFactory()); + auto const& f = dynamic_cast + (mf.back()->Factory()); + auto const& vfrac = f.getVolFrac(); + auto const& va = vfrac.const_arrays(); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + -> Real + { + return dv*a[box_no](i,j,k,icomp)*va[box_no](i,j,k); + }); + } else +#endif + { +#if (AMREX_SPACEDIM == 1) + if (geom[nlevels-1].IsSPHERICAL()) { + const auto rlo = geom[nlevels-1].CellSize(0); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + constexpr Real pi = Real(3.1415926535897932); + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + return Real(4./3.)*pi*(ro-ri)*(ro*ro+ro*ri+ri*ri) + * a[box_no](i,j,k,icomp); + }); + } else +#elif (AMREX_SPACEDIM == 2) + if (geom[nlevels-1].IsRZ()) { + const auto rlo = geom[nlevels-1].CellSize(0); + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) + noexcept -> Real + { + Real ri = rlo + dx[0]*i; + Real ro = ri + dx[0]; + constexpr Real pi = Real(3.1415926535897932); + return pi*dx[1]*dx[0]*(ro+ri) + * a[box_no](i,j,k,icomp); + }); + } else +#endif + { + reduce_op.eval(*mf.back(), IntVect(0), reduce_data, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + { + return dv*a[box_no](i,j,k,icomp); + }); + } + } + + auto const& hv = reduce_data.value(reduce_op); + Real r = amrex::get<0>(hv); + + if (!local) { + ParallelAllReduce::Sum(r, ParallelContext::CommunicatorSub()); + } + return r; + } + + void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp, + MultiFab const& fmf, + IntVect const& ratio) + { + AMREX_ASSERT(AMREX_D_TERM( (ratio[0] == 2 || ratio[0] == 4), + && (ratio[1] == 2 || ratio[1] == 4), + && (ratio[2] == 2 || ratio[2] == 4))); + + MultiFab tmp(amrex::coarsen(fmf.boxArray(), ratio), fmf.DistributionMap(), + ncomp, 0); + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + { +#if (AMREX_SPACEDIM > 1) + FArrayBox xtmp; +#if (AMREX_SPACEDIM > 2) + FArrayBox ytmp; +#endif +#endif + for (MFIter mfi(tmp,TilingIfNotGPU()); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fa = fmf.const_array(mfi,scomp); + + Box xbx = bx; +#if (AMREX_SPACEDIM == 1) + auto const& xa = tmp.array(mfi); +#else + xbx.refine(IntVect(AMREX_D_DECL(1,ratio[1],ratio[2]))); + if (ratio[1] == 2) { xbx.grow(1,1); } +#if (AMREX_SPACEDIM == 3) + if (ratio[2] == 2) { xbx.grow(2,1); } +#endif + xtmp.resize(xbx,ncomp); + Elixir eli = xtmp.elixir(); + auto const& xa = xtmp.array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(xbx, ncomp, i, j, k, n, + { + int ii = 2*i; + xa(i,j,k,n) = Real(1./16)*(Real(9.)*(fa(ii ,j,k,n) + + fa(ii+1,j,k,n)) + - fa(ii-1,j,k,n) + - fa(ii+2,j,k,n)); + }); + +#if (AMREX_SPACEDIM > 1) + Box ybx = bx; + auto const& xca = xtmp.const_array(); +#if (AMREX_SPACEDIM == 2) + auto const& ya = tmp.array(mfi); +#else + ybx.refine(IntVect(AMREX_D_DECL(1,1,ratio[2]))); + if (ratio[2] == 2) { ybx.grow(2,1); } + ytmp.resize(ybx,ncomp); + eli.append(ytmp.elixir()); + auto const& ya = ytmp.array(); +#endif + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(ybx, ncomp, i, j, k, n, + { + int jj = 2*j; + ya(i,j,k,n) = Real(1./16)*(Real(9.)*(xca(i,jj ,k,n) + + xca(i,jj+1,k,n)) + - xca(i,jj-1,k,n) + - xca(i,jj+2,k,n)); + }); + +#if (AMREX_SPACEDIM == 3) + auto const& yca = ytmp.const_array(); + auto const& ca = tmp.array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, + { + int kk = 2*k; + ca(i,j,k,n) = Real(1./16)*(Real(9.)*(yca(i,j,kk ,n) + + yca(i,j,kk+1,n)) + - yca(i,j,kk-1,n) + - yca(i,j,kk+2,n)); + }); +#endif +#endif + } + } + + cmf.ParallelCopy(tmp, 0, scomp, ncomp); + } } diff --git a/Src/Base/AMReX_NonLocalBC.H b/Src/Base/AMReX_NonLocalBC.H index 7613a35de5b..fd534685a7b 100644 --- a/Src/Base/AMReX_NonLocalBC.H +++ b/Src/Base/AMReX_NonLocalBC.H @@ -1038,4 +1038,13 @@ FillPolar (FabArray& mf, Box const& domain); #include +namespace amrex { + using NonLocalBC::ParallelCopy; + using NonLocalBC::ParallelCopy_nowait; + using NonLocalBC::ParallelCopy_finish; + using NonLocalBC::MultiBlockIndexMapping; + using NonLocalBC::MultiBlockCommMetaData; + using NonLocalBC::CommHandler; +} + #endif diff --git a/Src/Base/AMReX_Orientation.H b/Src/Base/AMReX_Orientation.H index 064344cafd4..de9c54a1b6c 100644 --- a/Src/Base/AMReX_Orientation.H +++ b/Src/Base/AMReX_Orientation.H @@ -75,7 +75,7 @@ public: * according to the above ordering. */ AMREX_GPU_HOST_DEVICE - operator int () const noexcept { return val; } + constexpr operator int () const noexcept { return val; } //! Return opposite orientation. AMREX_GPU_HOST_DEVICE Orientation flip () const noexcept @@ -97,6 +97,30 @@ public: //! Read from an istream. friend std::istream& operator>> (std::istream& os, Orientation& o); + //! Int value of the x-lo-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int xlo () noexcept { return 0; } + + //! Int value of the x-hi-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int xhi () noexcept { return AMREX_SPACEDIM; } + + //! Int value of the y-lo-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int ylo () noexcept { return 1; } + + //! Int value of the y-hi-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int yhi () noexcept { return 1+AMREX_SPACEDIM; } + + //! Int value of the z-lo-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int zlo () noexcept { return 2; } + + //! Int value of the z-hi-face + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr int zhi () noexcept { return 2+AMREX_SPACEDIM; } + private: //! Used internally. AMREX_GPU_HOST_DEVICE diff --git a/Src/Base/AMReX_PODVector.H b/Src/Base/AMReX_PODVector.H index 7217b4e814e..bfae2c01627 100644 --- a/Src/Base/AMReX_PODVector.H +++ b/Src/Base/AMReX_PODVector.H @@ -608,7 +608,10 @@ namespace amrex void AllocateBuffer (size_type a_capacity) noexcept { pointer new_data = allocate(a_capacity); - if (m_data) detail::memCopyImpl(new_data, m_data, size() * sizeof(T), *this); + if (m_data) { + detail::memCopyImpl(new_data, m_data, size() * sizeof(T), *this); + amrex::Gpu::streamSynchronize(); + } deallocate(m_data, capacity()); m_data = new_data; m_capacity = a_capacity; @@ -621,9 +624,10 @@ namespace amrex pointer new_data = allocate(a_capacity); if (m_data) { - memCopyImpl(new_data, m_data, a_index * sizeof(T), *this); + memCopyImpl(new_data, m_data, a_index * sizeof(T), *this); memCopyImpl(new_data + a_index + a_count, m_data + a_index, (size() - a_index)*sizeof(T), *this); + amrex::Gpu::streamSynchronize(); } deallocate(m_data, capacity()); m_data = new_data; diff --git a/Src/Base/AMReX_ParallelDescriptor.H b/Src/Base/AMReX_ParallelDescriptor.H index 38cd4cdf167..03c431d135a 100644 --- a/Src/Base/AMReX_ParallelDescriptor.H +++ b/Src/Base/AMReX_ParallelDescriptor.H @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef BL_AMRPROF #include @@ -211,6 +212,11 @@ while ( false ) extern AMREX_EXPORT MPI_Comm m_comm; inline MPI_Comm Communicator () noexcept { return m_comm; } +#ifdef AMREX_USE_MPI + extern Vector m_mpi_types; + extern Vector m_mpi_ops; +#endif + //! return the number of MPI ranks local to the current Parallel Context inline int NProcs () noexcept @@ -1479,6 +1485,73 @@ void DoReduce (T* r, MPI_Op op, int cnt, int cpu) #endif } +#ifdef AMREX_USE_MPI +namespace ParallelDescriptor { + +template +struct Mpi_typemap> +{ + static MPI_Datatype type () + { + static MPI_Datatype mpi_type = MPI_DATATYPE_NULL; + if (mpi_type == MPI_DATATYPE_NULL) { + using T = ValLocPair; + static_assert(std::is_trivially_copyable::value, + "To communicate with MPI, ValLocPair must be trivially copyable."); + static_assert(std::is_standard_layout::value, + "To communicate with MPI, ValLocPair must be standard layout"); + + T vlp[2]; + MPI_Datatype types[] = { + Mpi_typemap::type(), + Mpi_typemap::type(), + }; + int blocklens[] = { 1, 1 }; + MPI_Aint disp[2]; + BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].value, &disp[0]) ); + BL_MPI_REQUIRE( MPI_Get_address(&vlp[0].index, &disp[1]) ); + disp[1] -= disp[0]; + disp[0] = 0; + BL_MPI_REQUIRE( MPI_Type_create_struct(2, blocklens, disp, types, + &mpi_type) ); + MPI_Aint lb, extent; + BL_MPI_REQUIRE( MPI_Type_get_extent(mpi_type, &lb, &extent) ); + if (extent != sizeof(T)) { + MPI_Datatype tmp = mpi_type; + BL_MPI_REQUIRE( MPI_Type_create_resized(tmp, 0, sizeof(vlp[0]), &mpi_type) ); + BL_MPI_REQUIRE( MPI_Type_free(&tmp) ); + } + BL_MPI_REQUIRE( MPI_Type_commit( &mpi_type ) ); + + m_mpi_types.push_back(&mpi_type); + } + return mpi_type; + } +}; + +template +MPI_Op Mpi_op () +{ + static MPI_Op mpi_op = MPI_OP_NULL; + if (mpi_op == MPI_OP_NULL) { + static auto user_fn = [] (void *invec, void *inoutvec, int* len, + MPI_Datatype * /*datatype*/) + { + auto in = static_cast(invec); + auto out = static_cast(inoutvec); + for (int i = 0; i < *len; ++i) { + out[i] = F()(in[i],out[i]); + } + }; + BL_MPI_REQUIRE( MPI_Op_create(user_fn, 1, &mpi_op) ); + m_mpi_ops.push_back(&mpi_op); + } + return mpi_op; +} + +} +#endif + } #endif /*BL_PARALLELDESCRIPTOR_H*/ diff --git a/Src/Base/AMReX_ParallelDescriptor.cpp b/Src/Base/AMReX_ParallelDescriptor.cpp index 6d457d28398..3ea202d9b50 100644 --- a/Src/Base/AMReX_ParallelDescriptor.cpp +++ b/Src/Base/AMReX_ParallelDescriptor.cpp @@ -65,6 +65,11 @@ namespace amrex { namespace ParallelDescriptor { MPI_Comm m_comm = MPI_COMM_NULL; // communicator for all ranks, probably MPI_COMM_WORLD +#ifdef AMREX_USE_MPI + Vector m_mpi_types; + Vector m_mpi_ops; +#endif + int m_MinTag = 1000, m_MaxTag = -1; const int ioProcessor = 0; @@ -357,10 +362,20 @@ EndParallel () BL_MPI_REQUIRE( MPI_Type_free(&mpi_type_indextype) ); BL_MPI_REQUIRE( MPI_Type_free(&mpi_type_box) ); BL_MPI_REQUIRE( MPI_Type_free(&mpi_type_lull_t) ); + for (auto t : m_mpi_types) { + BL_MPI_REQUIRE( MPI_Type_free(t) ); + *t = MPI_DATATYPE_NULL; + } + for (auto op : m_mpi_ops) { + BL_MPI_REQUIRE( MPI_Op_free(op) ); + *op = MPI_OP_NULL; + } mpi_type_intvect = MPI_DATATYPE_NULL; mpi_type_indextype = MPI_DATATYPE_NULL; mpi_type_box = MPI_DATATYPE_NULL; mpi_type_lull_t = MPI_DATATYPE_NULL; + m_mpi_types.clear(); + m_mpi_ops.clear(); } if (!call_mpi_finalize) { diff --git a/Src/Base/AMReX_ParallelReduce.H b/Src/Base/AMReX_ParallelReduce.H index e0e1e98b66e..3a6db500a2a 100644 --- a/Src/Base/AMReX_ParallelReduce.H +++ b/Src/Base/AMReX_ParallelReduce.H @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -120,6 +121,32 @@ namespace ParallelGather { namespace ParallelAllReduce { + template + void Max (ValLocPair& vi, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Allreduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), comm); +#else + amrex::ignore_unused(vi, comm); +#endif + } + + template + void Min (ValLocPair& vi, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Allreduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), comm); +#else + amrex::ignore_unused(vi, comm); +#endif + } + template void Max (T& v, MPI_Comm comm) { detail::Reduce(detail::ReduceOp::max, v, -1, comm); @@ -174,6 +201,34 @@ namespace ParallelAllReduce { namespace ParallelReduce { + template + void Max (ValLocPair& vi, int root, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Reduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), + root, comm); +#else + amrex::ignore_unused(vi, root, comm); +#endif + } + + template + void Min (ValLocPair& vi, int root, MPI_Comm comm) { +#ifdef AMREX_USE_MPI + auto tmp = vi; + using T = ValLocPair; + MPI_Reduce(&tmp, &vi, 1, + ParallelDescriptor::Mpi_typemap::type(), + ParallelDescriptor::Mpi_op>(), + root, comm); +#else + amrex::ignore_unused(vi, root, comm); +#endif + } + template void Max (T& v, int root, MPI_Comm comm) { detail::Reduce(detail::ReduceOp::max, v, root, comm); diff --git a/Src/Base/AMReX_ParmParse.H b/Src/Base/AMReX_ParmParse.H index 6555ee5aec0..504aaa4f256 100644 --- a/Src/Base/AMReX_ParmParse.H +++ b/Src/Base/AMReX_ParmParse.H @@ -554,7 +554,7 @@ public: const std::string& val); //! keyword for files to load - static std::string FileKeyword; + static std::string const FileKeyword; //! Add keys and values from a file to the end of the PP table. static void addfile (std::string const filename); diff --git a/Src/Base/AMReX_ParmParse.cpp b/Src/Base/AMReX_ParmParse.cpp index 79e80fbb8bd..253ad0e37e0 100644 --- a/Src/Base/AMReX_ParmParse.cpp +++ b/Src/Base/AMReX_ParmParse.cpp @@ -34,7 +34,7 @@ static bool finalize_verbose = false; static bool finalize_verbose = true; #endif -std::string ParmParse::FileKeyword = "FILE"; +std::string const ParmParse::FileKeyword = "FILE"; // // Used by constructor to build table. @@ -609,7 +609,8 @@ addDefn (std::string& def, tab.push_back(ParmParse::PP_entry(def,val)); } val.clear(); - def = std::string(); + if ( def != ParmParse::FileKeyword ) + def = std::string(); } void @@ -991,7 +992,8 @@ ParmParse::prefixedName (const std::string& str) const void ParmParse::addfile (std::string const filename) { auto l = std::list{filename}; - addDefn(FileKeyword, + auto file = FileKeyword; + addDefn(file, l, g_table); } diff --git a/Src/Base/AMReX_RandomEngine.H b/Src/Base/AMReX_RandomEngine.H index a639e4731d7..967b9e66569 100644 --- a/Src/Base/AMReX_RandomEngine.H +++ b/Src/Base/AMReX_RandomEngine.H @@ -15,7 +15,6 @@ #include #elif defined(AMREX_USE_DPCPP) #include -namespace sycl = cl::sycl; #include namespace mkl = oneapi::mkl; #endif diff --git a/Src/Base/AMReX_Reduce.H b/Src/Base/AMReX_Reduce.H index 9c07b7b4a2a..9076e984828 100644 --- a/Src/Base/AMReX_Reduce.H +++ b/Src/Base/AMReX_Reduce.H @@ -6,9 +6,11 @@ #include #include #include +#include #include #include +#include namespace amrex { @@ -133,7 +135,12 @@ struct ReduceOpMin void local_update (T& d, T const& s) const noexcept { d = amrex::min(d,s); } template - constexpr void init (T& t) const noexcept { t = std::numeric_limits::max(); } + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = std::numeric_limits::max(); } + + template + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = T::max(); } }; struct ReduceOpMax @@ -161,7 +168,12 @@ struct ReduceOpMax void local_update (T& d, T const& s) const noexcept { d = amrex::max(d,s); } template - constexpr void init (T& t) const noexcept { t = std::numeric_limits::lowest(); } + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = std::numeric_limits::lowest(); } + + template + constexpr std::enable_if_t::is_specialized> + init (T& t) const noexcept { t = T::lowest(); } }; struct ReduceOpLogicalAnd @@ -899,7 +911,8 @@ bool AnyOf (Box const& box, P&& pred) } }); #else - AMREX_LAUNCH_KERNEL(ec.numBlocks, ec.numThreads, 0, Gpu::gpuStream(), + AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, ec.numBlocks, ec.numThreads, 0, + Gpu::gpuStream(), [=] AMREX_GPU_DEVICE () noexcept { __shared__ int has_any; if (threadIdx.x == 0) has_any = *dp; diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H new file mode 100644 index 00000000000..b5e35f783c5 --- /dev/null +++ b/Src/Base/AMReX_RungeKutta.H @@ -0,0 +1,293 @@ +#ifndef AMREX_RUNGE_KUTTA_H_ +#define AMREX_RUNGE_KUTTA_H_ +#include + +#include + +namespace amrex { + +/** + * \brief Functions for Runge-Kutta methods + * + * This namespace RungeKutta has functions for a number RK methods, RK2, RK3 + * and RK4. Here, RK2 refers to the explicit trapezoid rule, RK3 refers to + * the SSPRK3 + * (https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Third-order_Strong_Stability_Preserving_Runge-Kutta_(SSPRK3)), + * and RK4 is the classical fourth-order method + * (https://en.wikipedia.org/wiki/List_of_Runge%E2%80%93Kutta_methods#Classic_fourth-order_method). + * The function templates take the old data in FabArray/MultiFab as input, + * and evolve the system for one time step. The result is stored in another + * FabArray/MultiFab. These two FabArrays must have ghost cells if they are + * needed for evaluating the right-hand side. The functions take three + * callable objects for computing the right-hand side, filling ghost cells, + * and optionally post-processing RK stage results. For RK3 and RK4, they + * also need a callable object for storing the data needed for filling + * coarse/fine boundaries in AMR simulations. + * + * The callable object for right-hand side has the signature of `void(int + * stage, MF& dudt, MF const& u, Real t, Real dt)`, where `stage` is the RK + * stage number starting from 1, `dudt` is the output, `u` is the input, `t` + * is the first-order approximate time of the stage, and `dt` is the + * sub-time step, which can be used for reflux operations in AMR + * simulations. + * + * The callable object for filling ghost cells has the signature of + * `void(int stage, MF& u, Real t)`, where `stage` is the RK stage number + * starting from 1, `u` is a FabArray/MultiFab whose ghost cells need to be + * filled, and `t` is the first-order approximate time of the data at that + * stage. The FillPatcher class can be useful for implementing such a + * callable. See AmrLevel::RK for an example. + * + * The callable object for post-processing stage results is optional. It's + * no-op by default. Its function signature is `void(int stage, MF& u)`, + * where `stage` is the RK stage number and `u` is the result of that stage. + * + * For RK3 and RK4, one must also provide a callable object with the + * signature of `void(Array const& rkk)`, where `order` is the RK + * order and `rkk` contains the right-hand side at all the RK stages. The + * FillPatcher class can be useful for implementing such a callable. See + * AmrLevel::RK for an example. + */ +namespace RungeKutta { + +struct PostStageNoOp { + template + std::enable_if_t::value> operator() (int, MF&) const {} +}; + +namespace detail { +//! Unew = Uold + dUdt * dt +template +void rk_update (MF& Unew, MF const& Uold, MF const& dUdt, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot = dUdt.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + dt*sdot[bi](i,j,k,n); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (dUdt1 + dUdt2) * dt +template +void rk_update (MF& Unew, MF const& Uold, MF const& dUdt1, MF const& dUdt2, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot1 = dUdt1.const_arrays(); + auto const& sdot2 = dUdt2.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + dt*(sdot1[bi](i,j,k,n) + + sdot2[bi](i,j,k,n)); + }); + Gpu::streamSynchronize(); +} + +//! Unew = (Uold+Unew)/2 + dUdt * dt/2 +template +void rk2_update_2 (MF& Unew, MF const& Uold, MF const& dUdt, Real dt) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& sdot = dUdt.const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = Real(0.5)*(snew[bi](i,j,k,n) + + sold[bi](i,j,k,n) + + sdot[bi](i,j,k,n) * dt); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (k1 + k2 + 4*k3) * dt6, where dt6 = dt/6 +template +void rk3_update_3 (MF& Unew, MF const& Uold, Array const& rkk, Real dt6) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& k1 = rkk[0].const_arrays(); + auto const& k2 = rkk[1].const_arrays(); + auto const& k3 = rkk[2].const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + + dt6 * (k1[bi](i,j,k,n) + k2[bi](i,j,k,n) + + Real(4.) * k3[bi](i,j,k,n)); + }); + Gpu::streamSynchronize(); +} + +//! Unew = Uold + (k1+k4+2*(k2+k3))*dt6, where dt6 = dt/6 +template +void rk4_update_4 (MF& Unew, MF const& Uold, Array const& rkk, Real dt6) +{ + auto const& snew = Unew.arrays(); + auto const& sold = Uold.const_arrays(); + auto const& k1 = rkk[0].const_arrays(); + auto const& k2 = rkk[1].const_arrays(); + auto const& k3 = rkk[2].const_arrays(); + auto const& k4 = rkk[3].const_arrays(); + amrex::ParallelFor(Unew, IntVect(0), Unew.nComp(), [=] AMREX_GPU_DEVICE + (int bi, int i, int j, int k, int n) noexcept + { + snew[bi](i,j,k,n) = sold[bi](i,j,k,n) + + dt6 * ( k1[bi](i,j,k,n) + k4[bi](i,j,k,n) + + Real(2.)*(k2[bi](i,j,k,n) + k3[bi](i,j,k,n))); + }); + Gpu::streamSynchronize(); +} +} + +/** + * \brief Time stepping with RK2 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param post_stage post-processing stage results + */ +template +void RK2 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta2"); + + MF dUdt(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + + // RK2 stage 1 + fillbndry(1, Uold, time); + frhs(1, dUdt, Uold, time, Real(0.5)*dt); + // Unew = Uold + dt * dUdt + detail::rk_update(Unew, Uold, dUdt, dt); + post_stage(1, Unew); + + // RK2 stage 2 + fillbndry(2, Unew, time+dt); + frhs(2, dUdt, Unew, time, Real(0.5)*dt); + // Unew = (Uold+Unew)/2 + dUdt_2 * dt/2, + // which is Unew = Uold + dt/2 * (dUdt_1 + dUdt_2) + detail::rk2_update_2(Unew, Uold, dUdt, dt); + post_stage(2, Unew); +} + +/** + * \brief Time stepping with RK3 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param store_crse_data storing right-hand side data for AMR + * \param post_stage post-processing stage results + */ +template +void RK3 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + R&& store_crse_data, P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta3"); + + Array rkk; + for (auto& mf : rkk) { + mf.define(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + } + + // RK3 stage 1 + fillbndry(1, Uold, time); + frhs(1, rkk[0], Uold, time, dt/Real(6.)); + // Unew = Uold + k1 * dt + detail::rk_update(Unew, Uold, rkk[0], dt); + post_stage(1, Unew); + + // RK3 stage 2 + fillbndry(2, Unew, time+dt); + frhs(2, rkk[1], Unew, time+dt, dt/Real(6.)); + // Unew = Uold + (k1+k2) * dt/4 + detail::rk_update(Unew, Uold, rkk[0], rkk[1], Real(0.25)*dt); + post_stage(2, Unew); + + // RK3 stage 3 + Real t_half = time + Real(0.5)*dt; + fillbndry(3, Unew, t_half); + frhs(3, rkk[2], Unew, t_half, dt*Real(2./3.)); + // Unew = Uold + (k1/6 + k2/6 + k3*(2/3)) * dt + detail::rk3_update_3(Unew, Uold, rkk, Real(1./6.)*dt); + post_stage(3, Unew); + + store_crse_data(rkk); +} + +/** + * \brief Time stepping with RK4 + * + * \param Uold input FabArray/MultiFab data at time + * \param Unew output FabArray/MultiFab data at time+dt + * \param time time at the beginning of the step + * \param dt time step + * \param frhs computing the right-hand side + * \param fillbndry filling ghost cells + * \param store_crse_data storing right-hand side data for AMR + * \param post_stage post-processing stage results + */ +template +void RK4 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry, + R&& store_crse_data, P&& post_stage = PostStageNoOp()) +{ + BL_PROFILE("RungeKutta4"); + + Array rkk; + for (auto& mf : rkk) { + mf.define(Unew.boxArray(), Unew.DistributionMap(), Unew.nComp(), 0, + MFInfo(), Unew.Factory()); + } + + // RK4 stage 1 + fillbndry(1, Uold, time); + frhs(1, rkk[0], Uold, time, dt/Real(6.)); + // Unew = Uold + k1 * dt/2 + detail::rk_update(Unew, Uold, rkk[0], Real(0.5)*dt); + post_stage(1, Unew); + + // RK4 stage 2 + Real t_half = time + Real(0.5)*dt; + fillbndry(2, Unew, t_half); + frhs(2, rkk[1], Unew, t_half, dt/Real(3.)); + // Unew = Uold + k2 * dt/2 + detail::rk_update(Unew, Uold, rkk[1], Real(0.5)*dt); + post_stage(2, Unew); + + // RK4 stage 3 + fillbndry(3, Unew, t_half); + frhs(3, rkk[2], Unew, t_half, dt/Real(3.)); + // Unew = Uold + k3 * dt; + detail::rk_update(Unew, Uold, rkk[2], dt); + post_stage(3, Unew); + + // RK4 stage 4 + fillbndry(4, Unew, time+dt); + frhs(4, rkk[3], Unew, time+dt, dt/Real(6.)); + // Unew = Uold + (k1/6 + k2/3 + k3/3 + k4/6) * dt + detail::rk4_update_4(Unew, Uold, rkk, Real(1./6.)*dt); + post_stage(4, Unew); + + store_crse_data(rkk); +} + +}} + +#endif diff --git a/Src/Base/AMReX_Scan.H b/Src/Base/AMReX_Scan.H index 96aefb870b6..3dc5cb98f9a 100644 --- a/Src/Base/AMReX_Scan.H +++ b/Src/Base/AMReX_Scan.H @@ -197,7 +197,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) amrex::launch(nblocks, nthreads, sm, stream, [=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept { - amrex::oneapi::sub_group const& sg = gh.item->get_sub_group(); + sycl::sub_group const& sg = gh.item->get_sub_group(); int lane = sg.get_local_id()[0]; int warp = sg.get_group_id()[0]; int nwarps = sg.get_group_range()[0]; @@ -226,7 +226,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) T x = x0; // Scan within a warp for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(x, i); + T s = sycl::shift_group_right(sg, x, i); if (lane >= i) x += s; } @@ -244,7 +244,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) if (warp == 0) { T y = (lane < nwarps) ? shared[lane] : 0; for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(y, i); + T s = sycl::shift_group_right(sg, y, i); if (lane >= i) y += s; } @@ -277,7 +277,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) amrex::launch(1, nthreads, sm, stream, [=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept { - amrex::oneapi::sub_group const& sg = gh.item->get_sub_group(); + sycl::sub_group const& sg = gh.item->get_sub_group(); int lane = sg.get_local_id()[0]; int warp = sg.get_group_id()[0]; int nwarps = sg.get_group_range()[0]; @@ -293,7 +293,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) T x = (offset < nblocks) ? blocksum_p[offset] : 0; // Scan within a warp for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(x, i); + T s = sycl::shift_group_right(sg, x, i); if (lane >= i) x += s; } @@ -311,7 +311,7 @@ T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum) if (warp == 0) { T y = (lane < nwarps) ? shared[lane] : 0; for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(y, i); + T s = sycl::shift_group_right(sg, y, i); if (lane >= i) y += s; } @@ -417,7 +417,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum amrex::launch(nblocks, nthreads, sm, stream, [=] AMREX_GPU_DEVICE (Gpu::Handler const& gh) noexcept { - amrex::oneapi::sub_group const& sg = gh.item->get_sub_group(); + sycl::sub_group const& sg = gh.item->get_sub_group(); int lane = sg.get_local_id()[0]; int warp = sg.get_group_id()[0]; int nwarps = sg.get_group_range()[0]; @@ -472,7 +472,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum T x = x0; // Scan within a warp for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(x, i); + T s = sycl::shift_group_right(sg, x, i); if (lane >= i) x += s; } @@ -490,7 +490,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum if (warp == 0) { T y = (lane < nwarps) ? shared[lane] : 0; for (int i = 1; i <= Gpu::Device::warp_size; i *= 2) { - T s = sg.shuffle_up(y, i); + T s = sycl::shift_group_right(sg, y, i); if (lane >= i) y += s; } @@ -543,7 +543,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum // implement our own __ballot unsigned status_bf = (stva.status == 'p') ? (0x1u << lane) : 0; for (int i = 1; i < Gpu::Device::warp_size; i *= 2) { - status_bf |= sg.shuffle_xor(status_bf, i); + status_bf |= sycl::permute_group_by_xor(sg, status_bf, i); } bool stop_lookback = status_bf & 0x1u; @@ -563,7 +563,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum } for (int i = Gpu::Device::warp_size/2; i > 0; i /= 2) { - x += sg.shuffle_down(x,i); + x += sycl::shift_group_left(sg, x,i); } } diff --git a/Src/Base/AMReX_TableData.H b/Src/Base/AMReX_TableData.H index e44758bde6d..f44157160a7 100644 --- a/Src/Base/AMReX_TableData.H +++ b/Src/Base/AMReX_TableData.H @@ -77,8 +77,8 @@ struct Table2D { T* AMREX_RESTRICT p = nullptr; Long jstride = 0; - GpuArray begin{1,1}; - GpuArray end{0,0}; + GpuArray begin{{1,1}}; + GpuArray end{{0,0}}; AMREX_GPU_HOST_DEVICE constexpr Table2D () noexcept {} @@ -142,8 +142,8 @@ struct Table3D T* AMREX_RESTRICT p = nullptr; Long jstride = 0; Long kstride = 0; - GpuArray begin{1,1,1}; - GpuArray end{0,0,0}; + GpuArray begin{{1,1,1}}; + GpuArray end{{0,0,0}}; AMREX_GPU_HOST_DEVICE constexpr Table3D () noexcept {} @@ -213,8 +213,8 @@ struct Table4D Long jstride = 0; Long kstride = 0; Long nstride = 0; - GpuArray begin{1,1,1,1}; - GpuArray end{0,0,0,0}; + GpuArray begin{{1,1,1,1}}; + GpuArray end{{0,0,0,0}}; AMREX_GPU_HOST_DEVICE constexpr Table4D () noexcept {} diff --git a/Src/Base/AMReX_TinyProfiler.H b/Src/Base/AMReX_TinyProfiler.H index 677b4448d3b..57c9ea0479c 100644 --- a/Src/Base/AMReX_TinyProfiler.H +++ b/Src/Base/AMReX_TinyProfiler.H @@ -10,7 +10,7 @@ #endif #if defined(AMREX_USE_HIP) && defined(AMREX_USE_ROCTX) -#include +#include #endif #include diff --git a/Src/Base/AMReX_ValLocPair.H b/Src/Base/AMReX_ValLocPair.H new file mode 100644 index 00000000000..b7b480b1dba --- /dev/null +++ b/Src/Base/AMReX_ValLocPair.H @@ -0,0 +1,35 @@ +#ifndef AMREX_VALLOCPAIR_H_ +#define AMREX_VALLOCPAIR_H_ + +#include + +namespace amrex { + +template +struct ValLocPair +{ + TV value; + TI index; + + static constexpr ValLocPair max () { + return ValLocPair{std::numeric_limits::max(), TI()}; + } + + static constexpr ValLocPair lowest () { + return ValLocPair{std::numeric_limits::lowest(), TI()}; + } + + friend constexpr bool operator< (ValLocPair const& a, ValLocPair const& b) + { + return a.value < b.value; + } + + friend constexpr bool operator> (ValLocPair const& a, ValLocPair const& b) + { + return a.value > b.value; + } +}; + +} + +#endif diff --git a/Src/Base/AMReX_VisMF.H b/Src/Base/AMReX_VisMF.H index 12777a08307..bfab54abf8d 100644 --- a/Src/Base/AMReX_VisMF.H +++ b/Src/Base/AMReX_VisMF.H @@ -638,7 +638,6 @@ Read (FabArray& fa, const std::string& name) } int totalioreqs = nboxes; - int messtotal = 0; int reqspending = 0; int iopfileindex; std::deque iopreads; @@ -669,7 +668,6 @@ Read (FabArray& fa, const std::string& name) } } else { ParallelDescriptor::Send(vreads, tryproc, readtag); - ++messtotal; ++reqspending; } availablefiles.erase(afilesiter); diff --git a/Src/Base/AMReX_bc_types_mod.F90 b/Src/Base/AMReX_bc_types_mod.F90 index c326d49e419..c1c6f237ba8 100644 --- a/Src/Base/AMReX_bc_types_mod.F90 +++ b/Src/Base/AMReX_bc_types_mod.F90 @@ -15,6 +15,9 @@ module amrex_bc_types_module integer, parameter, public :: amrex_bc_ext_dir = 3 integer, parameter, public :: amrex_bc_hoextrap = 4 integer, parameter, public :: amrex_bc_hoextrapcc = 5 + integer, parameter, public :: amrex_bc_user_1 = 1001 + integer, parameter, public :: amrex_bc_user_2 = 1002 + integer, parameter, public :: amrex_bc_user_3 = 1003 integer, parameter, public :: amrex_pbc_interior = 0 integer, parameter, public :: amrex_pbc_inflow = 1 diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index 6a2db4526cd..7af11a24b5a 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -30,6 +30,7 @@ target_sources( amrex AMReX_Utility.cpp AMReX_FileSystem.H AMReX_FileSystem.cpp + AMReX_ValLocPair.H AMReX_Reduce.H AMReX_Scan.H AMReX_Partition.H @@ -71,6 +72,7 @@ target_sources( amrex AMReX_DataAllocator.H AMReX_BLProfiler.H AMReX_BLBackTrace.H + AMReX_BLBackTrace.cpp AMReX_BLFort.H AMReX_NFiles.H AMReX_NFiles.cpp @@ -187,6 +189,7 @@ target_sources( amrex AMReX_IntegratorBase.H AMReX_RKIntegrator.H AMReX_TimeIntegrator.H + AMReX_RungeKutta.H # GPU -------------------------------------------------------------------- AMReX_Gpu.H AMReX_GpuQualifiers.H @@ -222,6 +225,7 @@ target_sources( amrex AMReX_MFParallelForC.H AMReX_MFParallelForG.H AMReX_TagParallelFor.H + AMReX_CTOParallelForImpl.H AMReX_ParReduce.H # CUDA -------------------------------------------------------------------- AMReX_CudaGraph.H @@ -231,8 +235,6 @@ target_sources( amrex # Memory pool ------------------------------------------------------------- AMReX_MemPool.cpp AMReX_MemPool.H - # Profiling --------------------------------------------------------------- - AMReX_BLBackTrace.cpp # Parser --------------------------------------------------------------- Parser/AMReX_Parser.cpp Parser/AMReX_Parser.H @@ -305,3 +307,8 @@ endif () if (AMReX_TINY_PROFILE) target_sources(amrex PRIVATE AMReX_TinyProfiler.cpp AMReX_TinyProfiler.H ) endif () + +# MPMD +if (AMReX_MPI) + target_sources(amrex PRIVATE AMReX_MPMD.cpp AMReX_MPMD.H ) +endif () diff --git a/Src/Base/Make.package b/Src/Base/Make.package index d7c4e520e7b..9dd615b3251 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -22,6 +22,7 @@ C$(AMREX_BASE)_sources += AMReX_BlockMutex.cpp C$(AMREX_BASE)_sources += AMReX_ParmParse.cpp AMReX_parmparse_fi.cpp AMReX_Utility.cpp C$(AMREX_BASE)_headers += AMReX_ParmParse.H AMReX_Utility.H AMReX_BLassert.H AMReX_ArrayLim.H C$(AMREX_BASE)_headers += AMReX_Functional.H AMReX_Reduce.H AMReX_Scan.H AMReX_Partition.H +C$(AMREX_BASE)_headers += AMReX_ValLocPair.H C$(AMREX_BASE)_headers += AMReX_FileSystem.H C$(AMREX_BASE)_sources += AMReX_FileSystem.cpp @@ -100,6 +101,7 @@ C$(AMREX_BASE)_headers += AMReX_MFParallelForC.H C$(AMREX_BASE)_headers += AMReX_MFParallelForG.H C$(AMREX_BASE)_headers += AMReX_TagParallelFor.H +C$(AMREX_BASE)_headers += AMReX_CTOParallelForImpl.H C$(AMREX_BASE)_headers += AMReX_ParReduce.H @@ -203,7 +205,7 @@ C$(AMREX_BASE)_headers += AMReX_FEIntegrator.H C$(AMREX_BASE)_headers += AMReX_IntegratorBase.H C$(AMREX_BASE)_headers += AMReX_RKIntegrator.H C$(AMREX_BASE)_headers += AMReX_TimeIntegrator.H - +C$(AMREX_BASE)_headers += AMReX_RungeKutta.H # # Fortran interface routines. @@ -271,6 +273,10 @@ CEXE_sources += AMReX_Machine.cpp # Forward declaration CEXE_headers += AMReX_BaseFwd.H +ifeq ($(USE_MPI),TRUE) + CEXE_headers += AMReX_MPMD.H + CEXE_sources += AMReX_MPMD.cpp +endif VPATH_LOCATIONS += $(AMREX_HOME)/Src/Base INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Base diff --git a/Src/Boundary/AMReX_LOUtil_K.H b/Src/Boundary/AMReX_LOUtil_K.H index b8fdb2a37ce..71bb1dd41d1 100644 --- a/Src/Boundary/AMReX_LOUtil_K.H +++ b/Src/Boundary/AMReX_LOUtil_K.H @@ -34,6 +34,22 @@ void poly_interp_coeff (Real xInt, Real const* AMREX_RESTRICT x, int N, Real* AM } } +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void poly_interp_coeff (Real xInt, Real const* AMREX_RESTRICT x, Real* AMREX_RESTRICT c) noexcept +{ + for (int j = 0; j < N; ++j) { + Real num = 1.0, den = 1.0; + for (int i = 0; i < N; ++i) { + if (i != j) { + num *= xInt-x[i]; + den *= x[j]-x[i]; + } + } + c[j] = num / den; + } +} + } #endif diff --git a/Src/EB/AMReX_EB2.H b/Src/EB/AMReX_EB2.H index ad56d532520..def8d2de9e0 100644 --- a/Src/EB/AMReX_EB2.H +++ b/Src/EB/AMReX_EB2.H @@ -49,6 +49,7 @@ public: virtual const Level& getLevel (const Geometry & geom) const = 0; virtual const Geometry& getGeometry (const Box& domain) const = 0; virtual const Box& coarsestDomain () const = 0; + virtual void addFineLevels (int num_new_fine_levels) = 0; protected: static AMREX_EXPORT Vector > m_instance; @@ -66,7 +67,7 @@ public: IndexSpaceImp (const G& gshop, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, - bool extend_domain_face); + bool extend_domain_face, int num_coarsen_opt); IndexSpaceImp (IndexSpaceImp const&) = delete; IndexSpaceImp (IndexSpaceImp &&) = delete; @@ -80,46 +81,67 @@ public: virtual const Box& coarsestDomain () const final { return m_geom.back().Domain(); } + virtual void addFineLevels (int num_new_fine_levels) final; using F = typename G::FunctionType; private: + G m_gshop; + bool m_build_coarse_level_by_coarsening; + bool m_extend_domain_face; + int m_num_coarsen_opt; + Vector > m_gslevel; Vector m_geom; Vector m_domain; Vector m_ngrow; - std::unique_ptr m_impfunc; }; #include bool ExtendDomainFace (); +int NumCoarsenOpt (); template void Build (const G& gshop, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow = 4, bool build_coarse_level_by_coarsening = true, - bool extend_domain_face = ExtendDomainFace()) + bool extend_domain_face = ExtendDomainFace(), + int num_coarsen_opt = NumCoarsenOpt()) { BL_PROFILE("EB2::Initialize()"); IndexSpace::push(new IndexSpaceImp(gshop, geom, required_coarsening_level, max_coarsening_level, ngrow, build_coarse_level_by_coarsening, - extend_domain_face)); + extend_domain_face, + num_coarsen_opt)); } void Build (const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow = 4, - bool build_coarse_level_by_coarsening = true); + bool build_coarse_level_by_coarsening = true, + bool extend_domain_face = ExtendDomainFace(), + int num_coarsen_opt = NumCoarsenOpt()); + + +void BuildFromChkptFile (std::string const& fname, + const Geometry& geom, + int required_coarsening_level, + int max_coarsening_level, + int ngrow = 4, + bool build_coarse_level_by_coarsening = true, + bool extend_domain_face = ExtendDomainFace()); int maxCoarseningLevel (const Geometry& geom); int maxCoarseningLevel (IndexSpace const* ebis, const Geometry& geom); +void addFineLevels (int num_new_fine_levels); + }} #endif diff --git a/Src/EB/AMReX_EB2.cpp b/Src/EB/AMReX_EB2.cpp index 3bdf44ee4e9..fc2d75e0a01 100644 --- a/Src/EB/AMReX_EB2.cpp +++ b/Src/EB/AMReX_EB2.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -21,12 +22,14 @@ AMREX_EXPORT Vector > IndexSpace::m_instance; AMREX_EXPORT int max_grid_size = 64; AMREX_EXPORT bool extend_domain_face = true; +AMREX_EXPORT int num_coarsen_opt = 0; void Initialize () { ParmParse pp("eb2"); pp.queryAdd("max_grid_size", max_grid_size); pp.queryAdd("extend_domain_face", extend_domain_face); + pp.queryAdd("num_coarsen_opt", num_coarsen_opt); amrex::ExecOnFinalize(Finalize); } @@ -41,6 +44,11 @@ bool ExtendDomainFace () return extend_domain_face; } +int NumCoarsenOpt () +{ + return num_coarsen_opt; +} + void IndexSpace::push (IndexSpace* ispace) { @@ -74,7 +82,8 @@ const IndexSpace* TopIndexSpaceIfPresent() noexcept { void Build (const Geometry& geom, int required_coarsening_level, - int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening) + int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, + bool a_extend_domain_face, int a_num_coarsen_opt) { ParmParse pp("eb2"); std::string geom_type; @@ -85,7 +94,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::AllRegularIF rif; EB2::GeometryShop gshop(rif); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "box") { @@ -102,7 +112,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(bf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "cylinder") { @@ -127,7 +138,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(cf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "plane") { @@ -141,7 +153,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(pf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "sphere") { @@ -158,7 +171,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(sf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "torus") { @@ -177,7 +191,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::GeometryShop gshop(sf); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "parser") { @@ -188,7 +203,8 @@ Build (const Geometry& geom, int required_coarsening_level, EB2::ParserIF pif(parser.compile<3>()); EB2::GeometryShop gshop(pif,parser); EB2::Build(gshop, geom, required_coarsening_level, - max_coarsening_level, ngrow, build_coarse_level_by_coarsening); + max_coarsening_level, ngrow, build_coarse_level_by_coarsening, + a_extend_domain_face, a_num_coarsen_opt); } else if (geom_type == "stl") { @@ -206,7 +222,8 @@ Build (const Geometry& geom, int required_coarsening_level, geom, required_coarsening_level, max_coarsening_level, ngrow, build_coarse_level_by_coarsening, - extend_domain_face)); + a_extend_domain_face, + a_num_coarsen_opt)); } else { @@ -214,6 +231,29 @@ Build (const Geometry& geom, int required_coarsening_level, } } +void addFineLevels (int num_new_fine_levels) +{ + BL_PROFILE("EB2::addFineLevels()"); + auto p = const_cast(TopIndexSpace()); + if (p) { + p->addFineLevels(num_new_fine_levels); + } +} + +void +BuildFromChkptFile (std::string const& fname, + const Geometry& geom, int required_coarsening_level, + int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, + bool a_extend_domain_face) +{ + ChkptFile chkpt_file(fname); + IndexSpace::push(new IndexSpaceChkptFile(chkpt_file, + geom, required_coarsening_level, + max_coarsening_level, ngrow, + build_coarse_level_by_coarsening, + a_extend_domain_face)); +} + namespace { static int comp_max_crse_level (Box cdomain, const Box& domain) { diff --git a/Src/EB/AMReX_EB2_2D_C.cpp b/Src/EB/AMReX_EB2_2D_C.cpp index bf17844658c..060ed8f4df4 100644 --- a/Src/EB/AMReX_EB2_2D_C.cpp +++ b/Src/EB/AMReX_EB2_2D_C.cpp @@ -391,6 +391,13 @@ void build_cells (Box const& bx, Array4 const& cell, }); } + set_connection_flags(bxg1, cell, fx, fy); +} + +void set_connection_flags (Box const& bxg1, + Array4 const& cell, + Array4 const& fx, Array4 const& fy) noexcept +{ // Build neighbors. By default, all neighbors are already set. AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, { diff --git a/Src/EB/AMReX_EB2_3D_C.H b/Src/EB/AMReX_EB2_3D_C.H index 14543f81d25..3ea77f149fe 100644 --- a/Src/EB/AMReX_EB2_3D_C.H +++ b/Src/EB/AMReX_EB2_3D_C.H @@ -200,11 +200,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nxm = 0; } else if (n == 2) { nxm = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on xlo-face"); } int nxp = -1; @@ -213,11 +210,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nxp = 0; } else if (n == 2) { nxp = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on xhi-face"); } // y-faces @@ -227,11 +221,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nym = 0; } else if (n == 2) { nym = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on ylo-face"); } int nyp = -1; @@ -240,11 +231,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nyp = 0; } else if (n == 2) { nyp = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on yhi-face"); } // z-faces @@ -254,11 +242,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nzm = 0; } else if (n == 2) { nzm = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on zlo-face"); } int nzp = -1; @@ -267,11 +252,8 @@ int check_mvmc (int i, int j, int k, Array4 const& fine) nzp = 0; } else if (n == 2) { nzp = 1; - } else if (n == 4) { - ierr = 1; } else { ierr = 1; - amrex::Abort("amrex::check_mvmc: how did this happen? wrong number of cuts on zhi-face"); } if (nxm == 1 && nym == 1 && nzm == 1 && nxp == 1 && nyp == 1 && nzp == 1) { diff --git a/Src/EB/AMReX_EB2_3D_C.cpp b/Src/EB/AMReX_EB2_3D_C.cpp index 0077d817ae4..767626eb9e9 100644 --- a/Src/EB/AMReX_EB2_3D_C.cpp +++ b/Src/EB/AMReX_EB2_3D_C.cpp @@ -853,89 +853,96 @@ void build_cells (Box const& bx, Array4 const& cell, nsmallcells += hp[0]; nmulticuts += hp[1]; + Box const& nbxg1 = amrex::surroundingNodes(bxg1); + Box const& bxg1x = amrex::surroundingNodes(bxg1,0); + Box const& bxg1y = amrex::surroundingNodes(bxg1,1); + Box const& bxg1z = amrex::surroundingNodes(bxg1,2); + AMREX_HOST_DEVICE_FOR_3D(nbxg1, i, j, k, + { + if (levset(i,j,k) < Real(0.0)) { + bool zero_levset = false; + if (bxg1.contains(i-1,j-1,k-1) + && cell(i-1,j-1,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j-1,k-1) + && cell(i ,j-1,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i-1,j ,k-1) + && cell(i-1,j ,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j ,k-1) + && cell(i ,j ,k-1).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i-1,j-1,k ) + && cell(i-1,j-1,k ).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j-1,k ) + && cell(i ,j-1,k ).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i-1,j ,k ) + && cell(i-1,j ,k ).isCovered()) { + zero_levset = true; + } else if (bxg1.contains(i ,j ,k ) + && cell(i ,j ,k ).isCovered()) { + zero_levset = true; + } else if (bxg1x.contains(i ,j-1,k-1) + && fx(i ,j-1,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1x.contains(i ,j ,k-1) + && fx(i ,j ,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1x.contains(i ,j-1,k ) + && fx(i ,j-1,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1x.contains(i ,j ,k ) + && fx(i ,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i-1,j ,k-1) + && fy(i-1,j ,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i ,j ,k-1) + && fy(i ,j ,k-1) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i-1,j ,k ) + && fy(i-1,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1y.contains(i ,j ,k ) + && fy(i ,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i-1,j-1,k ) + && fz(i-1,j-1,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i ,j-1,k ) + && fz(i ,j-1,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i-1,j ,k ) + && fz(i-1,j ,k ) == Type::covered) { + zero_levset = true; + } else if (bxg1z.contains(i ,j ,k ) + && fz(i ,j ,k ) == Type::covered) { + zero_levset = true; + } + if (zero_levset) { + levset(i,j,k) = Real(0.0); + } + } + }); + if (nsmallcells > 0 || nmulticuts > 0) { if (!cover_multiple_cuts && nmulticuts > 0) { amrex::Abort("amrex::EB2::build_cells: multi-cuts not supported"); } - Box const& nbxg1 = amrex::surroundingNodes(bxg1); - Box const& bxg1x = amrex::surroundingNodes(bxg1,0); - Box const& bxg1y = amrex::surroundingNodes(bxg1,1); - Box const& bxg1z = amrex::surroundingNodes(bxg1,2); - AMREX_HOST_DEVICE_FOR_3D(nbxg1, i, j, k, - { - if (levset(i,j,k) < Real(0.0)) { - bool zero_levset = false; - if (bxg1.contains(i-1,j-1,k-1) - && cell(i-1,j-1,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j-1,k-1) - && cell(i ,j-1,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i-1,j ,k-1) - && cell(i-1,j ,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j ,k-1) - && cell(i ,j ,k-1).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i-1,j-1,k ) - && cell(i-1,j-1,k ).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j-1,k ) - && cell(i ,j-1,k ).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i-1,j ,k ) - && cell(i-1,j ,k ).isCovered()) { - zero_levset = true; - } else if (bxg1.contains(i ,j ,k ) - && cell(i ,j ,k ).isCovered()) { - zero_levset = true; - } else if (cover_multiple_cuts) { - if (bxg1x.contains(i ,j-1,k-1) - && fx(i ,j-1,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1x.contains(i ,j ,k-1) - && fx(i ,j ,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1x.contains(i ,j-1,k ) - && fx(i ,j-1,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1x.contains(i ,j ,k ) - && fx(i ,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i-1,j ,k-1) - && fy(i-1,j ,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i ,j ,k-1) - && fy(i ,j ,k-1) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i-1,j ,k ) - && fy(i-1,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1y.contains(i ,j ,k ) - && fy(i ,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i-1,j-1,k ) - && fz(i-1,j-1,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i ,j-1,k ) - && fz(i ,j-1,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i-1,j ,k ) - && fz(i-1,j ,k ) == Type::covered) { - zero_levset = true; - } else if (bxg1z.contains(i ,j ,k ) - && fz(i ,j ,k ) == Type::covered) { - zero_levset = true; - } - } - if (zero_levset) { - levset(i,j,k) = Real(0.0); - } - } - }); return; + } else { + set_connection_flags(bx, bxg1, cell, ctmp, fx, fy, fz); } +} +void set_connection_flags (Box const& bx, + Box const& bxg1, Array4 const& cell, + Array4 const& ctmp, Array4 const& fx, + Array4 const& fy, Array4 const& fz) noexcept +{ // Build neighbors. By default all 26 neighbors are already set. AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, { diff --git a/Src/EB/AMReX_EB2_C.H b/Src/EB/AMReX_EB2_C.H index 7e752f3d051..0be84fdc913 100644 --- a/Src/EB/AMReX_EB2_C.H +++ b/Src/EB/AMReX_EB2_C.H @@ -36,6 +36,9 @@ void build_cells (Box const& bx, Array4 const& cell, Real small_volfrac, Geometry const& geom, bool extend_domain_face, int& nsmallcells, int const nmulticuts) noexcept; +void set_connection_flags(Box const& bxg1, Array4 const& cell, + Array4 const& fx, Array4 const& fy) noexcept; + #elif (AMREX_SPACEDIM == 3) int build_faces (Box const& bx, Array4 const& cell, @@ -67,6 +70,11 @@ void build_cells (Box const& bx, Array4 const& cell, bool extend_domain_face, bool cover_multiple_cuts, int& nsmallcells, int& nmulticuts) noexcept; +void set_connection_flags(Box const& bx, Box const& bxg1, + Array4 const& cell, Array4 const& ctmp, + Array4 const& fx, Array4 const& fy, + Array4 const& fz) noexcept; + #endif void intercept_to_edge_centroid (AMREX_D_DECL(Array4 const& excent, diff --git a/Src/EB/AMReX_EB2_GeometryShop.H b/Src/EB/AMReX_EB2_GeometryShop.H index ff80dd20593..2a7565abad2 100644 --- a/Src/EB/AMReX_EB2_GeometryShop.H +++ b/Src/EB/AMReX_EB2_GeometryShop.H @@ -244,6 +244,7 @@ public: } } } + amrex::ignore_unused(nzero); if (nbody == 0) { return allregular; diff --git a/Src/EB/AMReX_EB2_IndexSpaceI.H b/Src/EB/AMReX_EB2_IndexSpaceI.H index 192df9f43a0..e7db810b03b 100644 --- a/Src/EB/AMReX_EB2_IndexSpaceI.H +++ b/Src/EB/AMReX_EB2_IndexSpaceI.H @@ -4,7 +4,11 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, - bool extend_domain_face) + bool extend_domain_face, int num_coarsen_opt) + : m_gshop(gshop), + m_build_coarse_level_by_coarsening(build_coarse_level_by_coarsening), + m_extend_domain_face(extend_domain_face), + m_num_coarsen_opt(num_coarsen_opt) { // build finest level (i.e., level 0) first AMREX_ALWAYS_ASSERT(required_coarsening_level >= 0 && required_coarsening_level <= 30); @@ -20,7 +24,8 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, m_domain.push_back(geom.Domain()); m_ngrow.push_back(ngrow_finest); m_gslevel.reserve(max_coarsening_level+1); - m_gslevel.emplace_back(this, gshop, geom, EB2::max_grid_size, ngrow_finest, extend_domain_face); + m_gslevel.emplace_back(this, gshop, geom, EB2::max_grid_size, ngrow_finest, extend_domain_face, + num_coarsen_opt); for (int ilev = 1; ilev <= max_coarsening_level; ++ilev) { @@ -44,7 +49,8 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, if (build_coarse_level_by_coarsening) { amrex::Abort("Failed to build required coarse EB level "+std::to_string(ilev)); } else { - m_gslevel.emplace_back(this, gshop, cgeom, EB2::max_grid_size, ng, extend_domain_face); + m_gslevel.emplace_back(this, gshop, cgeom, EB2::max_grid_size, ng, extend_domain_face, + num_coarsen_opt-ilev); } } else { break; @@ -54,8 +60,6 @@ IndexSpaceImp::IndexSpaceImp (const G& gshop, const Geometry& geom, m_domain.push_back(cdomain); m_ngrow.push_back(ng); } - - m_impfunc = std::make_unique(gshop.GetImpFunc()); } @@ -76,3 +80,29 @@ IndexSpaceImp::getGeometry (const Box& dom) const int i = std::distance(m_domain.begin(), it); return m_geom[i]; } + +template +void +IndexSpaceImp::addFineLevels (int num_new_fine_levels) +{ + if (num_new_fine_levels <= 0) { return; } + + if (m_num_coarsen_opt > 0) { + m_num_coarsen_opt += num_new_fine_levels; + } + + IndexSpaceImp fine_isp(m_gshop, amrex::refine(m_geom[0], 1< + +#include +#include + +#include + +namespace amrex { namespace EB2 { + +class IndexSpaceChkptFile + : public IndexSpace +{ +public: + + IndexSpaceChkptFile (const ChkptFile& chkptfile, + const Geometry& geom, int required_coarsening_level, + int max_coarsening_level, int ngrow, + bool build_coarse_level_by_coarsening, + bool extend_domain_face); + + IndexSpaceChkptFile (IndexSpaceChkptFile const&) = delete; + IndexSpaceChkptFile (IndexSpaceChkptFile &&) = delete; + void operator= (IndexSpaceChkptFile const&) = delete; + void operator= (IndexSpaceChkptFile &&) = delete; + + virtual ~IndexSpaceChkptFile () {} + + virtual const Level& getLevel (const Geometry& geom) const final; + virtual const Geometry& getGeometry (const Box& dom) const final; + virtual const Box& coarsestDomain () const final { + return m_geom.back().Domain(); + } + virtual void addFineLevels (int num_new_fine_levels) final; + +private: + + Vector m_chkpt_file_level; + Vector m_geom; + Vector m_domain; + Vector m_ngrow; +}; + +}} + +#endif diff --git a/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp b/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp new file mode 100644 index 00000000000..b0318dd402c --- /dev/null +++ b/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp @@ -0,0 +1,86 @@ +#include + +namespace amrex { namespace EB2 { + +IndexSpaceChkptFile::IndexSpaceChkptFile (const ChkptFile& chkpt_file, + const Geometry& geom, int required_coarsening_level, + int max_coarsening_level, int ngrow, + bool build_coarse_level_by_coarsening, + bool extend_domain_face) +{ + Gpu::LaunchSafeGuard lsg(true); // Always use GPU + + // build finest level (i.e., level 0) first + AMREX_ALWAYS_ASSERT(required_coarsening_level >= 0 && required_coarsening_level <= 30); + max_coarsening_level = std::max(required_coarsening_level,max_coarsening_level); + max_coarsening_level = std::min(30,max_coarsening_level); + + int ngrow_finest = std::max(ngrow,0); + for (int i = 1; i <= required_coarsening_level; ++i) { + ngrow_finest *= 2; + } + + m_geom.push_back(geom); + m_domain.push_back(geom.Domain()); + m_ngrow.push_back(ngrow_finest); + m_chkpt_file_level.reserve(max_coarsening_level+1); + m_chkpt_file_level.emplace_back(this, chkpt_file, geom, EB2::max_grid_size, ngrow_finest, + extend_domain_face); + + for (int ilev = 1; ilev <= max_coarsening_level; ++ilev) + { + bool coarsenable = m_geom.back().Domain().coarsenable(2,2); + if (!coarsenable) { + if (ilev <= required_coarsening_level) { + amrex::Abort("IndexSpaceImp: domain is not coarsenable at level "+std::to_string(ilev)); + } else { + break; + } + } + + int ng = (ilev > required_coarsening_level) ? 0 : m_ngrow.back()/2; + + Box cdomain = amrex::coarsen(m_geom.back().Domain(),2); + Geometry cgeom = amrex::coarsen(m_geom.back(),2); + m_chkpt_file_level.emplace_back(this, ilev, EB2::max_grid_size, ng, cgeom, m_chkpt_file_level[ilev-1]); + if (!m_chkpt_file_level.back().isOK()) { + m_chkpt_file_level.pop_back(); + if (ilev <= required_coarsening_level) { + if (build_coarse_level_by_coarsening) { + amrex::Abort("Failed to build required coarse EB level "+std::to_string(ilev)); + } else { + amrex::Abort("Chkptfile only stored for finest level. Failed to build "+std::to_string(ilev)); + } + } else { + break; + } + } + m_geom.push_back(cgeom); + m_domain.push_back(cdomain); + m_ngrow.push_back(ng); + } +} + +const Level& +IndexSpaceChkptFile::getLevel (const Geometry& geom) const +{ + auto it = std::find(std::begin(m_domain), std::end(m_domain), geom.Domain()); + int i = std::distance(m_domain.begin(), it); + return m_chkpt_file_level[i]; +} + +const Geometry& +IndexSpaceChkptFile::getGeometry (const Box& dom) const +{ + auto it = std::find(std::begin(m_domain), std::end(m_domain), dom); + int i = std::distance(m_domain.begin(), it); + return m_geom[i]; +} + +void +IndexSpaceChkptFile::addFineLevels (int /*num_new_fine_levels*/) +{ + amrex::Abort("IndexSpaceChkptFile::addFineLevels: not supported"); +} + +}} diff --git a/Src/EB/AMReX_EB2_Level.H b/Src/EB/AMReX_EB2_Level.H index d47917328c5..8ebc864b903 100644 --- a/Src/EB/AMReX_EB2_Level.H +++ b/Src/EB/AMReX_EB2_Level.H @@ -60,6 +60,8 @@ public: const Geometry& Geom () const noexcept { return m_geom; } IndexSpace const* getEBIndexSpace () const noexcept { return m_parent; } + void write_to_chkpt_file (const std::string& fname, bool extend_domain_face, int max_grid_size) const; + protected: Level (Level && rhs) = default; @@ -98,12 +100,13 @@ class GShopLevel : public Level { public: - GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& geom, int max_grid_size, int ngrow, bool extend_domain_face); + GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& geom, int max_grid_size, + int ngrow, bool extend_domain_face, int num_crse_opt); GShopLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, const Geometry& geom, GShopLevel& fineLevel); GShopLevel (IndexSpace const* is, const Geometry& geom); void define_fine (G const& gshop, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face); + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt); }; template @@ -113,7 +116,7 @@ GShopLevel::GShopLevel (IndexSpace const* is, const Geometry& geom) template GShopLevel::GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face) + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt) : Level(is, geom) { if (std::is_same::value) { @@ -122,13 +125,13 @@ GShopLevel::GShopLevel (IndexSpace const* is, G const& gshop, const Geometry& return; } - define_fine(gshop, geom, max_grid_size, ngrow, extend_domain_face); + define_fine(gshop, geom, max_grid_size, ngrow, extend_domain_face, num_crse_opt); } template void GShopLevel::define_fine (G const& gshop, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face) + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt) { if (amrex::Verbose() > 0 && extend_domain_face == false) { amrex::Print() << "AMReX WARNING: extend_domain_face=false is not recommended!\n"; @@ -166,57 +169,84 @@ GShopLevel::define_fine (G const& gshop, const Geometry& geom, Box bounding_box = (extend_domain_face) ? domain : domain_grown; bounding_box.surroundingNodes(); - BoxList bl(domain); - bl.maxSize(max_grid_size); - if (m_ngrow != 0) { - const IntVect& domlo = domain.smallEnd(); - const IntVect& domhi = domain.bigEnd(); - for (auto& b : bl) { - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - if (m_ngrow[idim] != 0) { - if (b.smallEnd(idim) == domlo[idim]) { - b.growLo(idim,m_ngrow[idim]); - } - if (b.bigEnd(idim) == domhi[idim]) { - b.growHi(idim,m_ngrow[idim]); - } + BoxList cut_boxes; + BoxList covered_boxes; + + const int nprocs = ParallelDescriptor::NProcs(); + const int iproc = ParallelDescriptor::MyProc(); + + num_crse_opt = std::max(0,std::min(8,num_crse_opt)); + for (int clev = num_crse_opt; clev >= 0; --clev) { + IntVect crse_ratio(1 << clev); + if (domain.coarsenable(crse_ratio)) { + Box const& crse_bounding_box = amrex::coarsen(bounding_box, crse_ratio); + Geometry const& crse_geom = amrex::coarsen(geom, crse_ratio); + BoxList test_boxes; + if (cut_boxes.isEmpty()) { + covered_boxes.clear(); + test_boxes = BoxList(crse_geom.Domain()); + test_boxes.maxSize(max_grid_size); + } else { + test_boxes.swap(cut_boxes); + test_boxes.coarsen(crse_ratio); + test_boxes.maxSize(max_grid_size); + } + + const Long nboxes = test_boxes.size(); + const auto& boxes = test_boxes.data(); + for (Long i = iproc; i < nboxes; i += nprocs) { + const Box& vbx = boxes[i]; + const Box& gbx = amrex::surroundingNodes(amrex::grow(vbx,1)); + auto box_type = gshop.getBoxType(gbx&crse_bounding_box,crse_geom,RunOn::Gpu); + if (box_type == gshop.allcovered) { + covered_boxes.push_back(amrex::refine(vbx, crse_ratio)); + } else if (box_type == gshop.mixedcells) { + cut_boxes.push_back(amrex::refine(vbx, crse_ratio)); } } + + amrex::AllGatherBoxes(cut_boxes.data()); } } - m_grids.define(std::move(bl)); - m_dmap.define(m_grids); - - Vector cut_boxes; - Vector covered_boxes; + amrex::AllGatherBoxes(covered_boxes.data()); - for (MFIter mfi(m_grids, m_dmap); mfi.isValid(); ++mfi) - { - const Box& vbx = mfi.validbox(); - const Box& gbx = amrex::surroundingNodes(amrex::grow(vbx,1)); - int box_type = gshop.getBoxType(gbx & bounding_box, geom, RunOn::Gpu); - if (box_type == gshop.allcovered) { - covered_boxes.push_back(vbx); - } else if (box_type == gshop.mixedcells) { - cut_boxes.push_back(vbx); - } + if (m_ngrow != 0) { + auto grow_at_domain_boundary = [&] (BoxList& bl) + { + const IntVect& domlo = domain.smallEnd(); + const IntVect& domhi = domain.bigEnd(); + for (auto& b : bl) { + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (m_ngrow[idim] != 0) { + if (b.smallEnd(idim) == domlo[idim]) { + b.growLo(idim,m_ngrow[idim]); + } + if (b.bigEnd(idim) == domhi[idim]) { + b.growHi(idim,m_ngrow[idim]); + } + } + } + } + }; + grow_at_domain_boundary(covered_boxes); + grow_at_domain_boundary(cut_boxes); } - amrex::AllGatherBoxes(cut_boxes); - amrex::AllGatherBoxes(covered_boxes); - - if ( cut_boxes.empty() && - !covered_boxes.empty()) + if ( cut_boxes.isEmpty() && + !covered_boxes.isEmpty()) { amrex::Abort("AMReX_EB2_Level.H: Domain is completely covered"); } - if (!covered_boxes.empty()) { - m_covered_grids = BoxArray(BoxList(std::move(covered_boxes))); + if (!covered_boxes.isEmpty()) { + if (num_crse_opt > 2) { // don't want the box too big + covered_boxes.maxSize(max_grid_size*4); + } + m_covered_grids = BoxArray(std::move(covered_boxes)); } - if (cut_boxes.empty()) { + if (cut_boxes.isEmpty()) { m_grids = BoxArray(); m_dmap = DistributionMapping(); m_allregular = true; @@ -224,7 +254,7 @@ GShopLevel::define_fine (G const& gshop, const Geometry& geom, return; } - m_grids = BoxArray(BoxList(std::move(cut_boxes))); + m_grids = BoxArray(std::move(cut_boxes)); m_dmap = DistributionMapping(m_grids); m_mgf.define(m_grids, m_dmap); diff --git a/Src/EB/AMReX_EB2_Level.cpp b/Src/EB/AMReX_EB2_Level.cpp index 46277b59ab1..09b6db4a54c 100644 --- a/Src/EB/AMReX_EB2_Level.cpp +++ b/Src/EB/AMReX_EB2_Level.cpp @@ -1,6 +1,7 @@ #include #include +#include #include namespace amrex { namespace EB2 { @@ -916,4 +917,14 @@ Level::fillLevelSet (MultiFab& levelset, const Geometry& geom) const } } +void +Level::write_to_chkpt_file (const std::string& fname, bool extend_domain_face, int max_grid_size) const +{ + ChkptFile chkptFile(fname); + chkptFile.write_to_chkpt_file(m_grids, m_covered_grids, + m_volfrac, m_centroid, m_bndryarea, m_bndrycent, + m_bndrynorm, m_areafrac, m_facecent, m_edgecent, m_levelset, + m_geom, m_ngrow, extend_domain_face, max_grid_size); +} + }} diff --git a/Src/EB/AMReX_EB2_Level_STL.H b/Src/EB/AMReX_EB2_Level_STL.H index f29460d7a92..19cb31ef93b 100644 --- a/Src/EB/AMReX_EB2_Level_STL.H +++ b/Src/EB/AMReX_EB2_Level_STL.H @@ -13,7 +13,7 @@ class STLLevel public: STLLevel (IndexSpace const* is, STLtools const& stl_tools, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face); + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt); STLLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, const Geometry& geom, STLLevel& fineLevel); diff --git a/Src/EB/AMReX_EB2_Level_STL.cpp b/Src/EB/AMReX_EB2_Level_STL.cpp index 00f29958714..53243cd754a 100644 --- a/Src/EB/AMReX_EB2_Level_STL.cpp +++ b/Src/EB/AMReX_EB2_Level_STL.cpp @@ -3,12 +3,12 @@ namespace amrex { namespace EB2 { STLLevel::STLLevel (IndexSpace const* is, STLtools const& stl_tools, const Geometry& geom, - int max_grid_size, int ngrow, bool extend_domain_face) + int max_grid_size, int ngrow, bool extend_domain_face, int num_crse_opt) : GShopLevel(is, geom) { BL_PROFILE("EB2::STLLevel()-fine"); - define_fine(stl_tools, geom, max_grid_size, ngrow, extend_domain_face); + define_fine(stl_tools, geom, max_grid_size, ngrow, extend_domain_face, num_crse_opt); } STLLevel::STLLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, diff --git a/Src/EB/AMReX_EB2_Level_chkpt_file.H b/Src/EB/AMReX_EB2_Level_chkpt_file.H new file mode 100644 index 00000000000..881dd8f22f0 --- /dev/null +++ b/Src/EB/AMReX_EB2_Level_chkpt_file.H @@ -0,0 +1,31 @@ +#ifndef AMREX_EB2_LEVEL_CHKPT_FILE_H_ +#define AMREX_EB2_LEVEL_CHKPT_FILE_H_ +#include + +#include +#include + +namespace amrex { namespace EB2 { + +class ChkptFileLevel + : public GShopLevel +{ +public: + + ChkptFileLevel (IndexSpace const* is, ChkptFile const& chkpt_file, const Geometry& geom, + int max_grid_size, int ngrow, bool extend_domain_face); + + ChkptFileLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, + const Geometry& geom, ChkptFileLevel& fineLevel); + +// for cuda support + void define_fine_chkpt_file (ChkptFile const& chkpt_file, + Geometry const& geom, int max_grid_size, int ngrow, + bool extend_domain_face); + + void finalize_cell_flags (); //sets the connection flags and adjustments to cellflags +}; + +}} + +#endif diff --git a/Src/EB/AMReX_EB2_Level_chkpt_file.cpp b/Src/EB/AMReX_EB2_Level_chkpt_file.cpp new file mode 100644 index 00000000000..0b2d88e828f --- /dev/null +++ b/Src/EB/AMReX_EB2_Level_chkpt_file.cpp @@ -0,0 +1,203 @@ +#include +#include + +#include + +namespace amrex { namespace EB2 { + +ChkptFileLevel::ChkptFileLevel (IndexSpace const* is, ChkptFile const& chkpt_file, + Geometry const& geom, int max_grid_size, int ngrow, bool extend_domain_face) + : GShopLevel(is, geom) +{ + BL_PROFILE("EB2::ChkptFileLevel()-fine"); + + define_fine_chkpt_file(chkpt_file, geom, max_grid_size, ngrow, extend_domain_face); +} + +void +ChkptFileLevel::define_fine_chkpt_file (ChkptFile const& chkpt_file, + Geometry const& geom, int max_grid_size, + int ngrow, bool extend_domain_face) +{ + BL_PROFILE("EB2::ChkptFileLevel()-define-fine-chkptfile"); + + m_ngrow = IntVect{static_cast(std::ceil(ngrow/16.)) * 16}; + + Box const& domain = geom.Domain(); + Box domain_grown = domain; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (geom.isPeriodic(idim)) { + m_ngrow[idim] = 0; + } else { + m_ngrow[idim] = std::min(m_ngrow[idim], domain_grown.length(idim)); + } + } + + const int ng = GFab::ng; + chkpt_file.read_from_chkpt_file(m_grids, m_covered_grids, + m_dmap, m_volfrac, m_centroid, m_bndryarea, + m_bndrycent, m_bndrynorm, m_areafrac, m_facecent, + m_edgecent, m_levelset, ng, geom, m_ngrow, + extend_domain_face, max_grid_size); + + + if ( m_grids.empty() && + !m_covered_grids.empty()) + { + Abort("AMReX_EB2_Level.H: Domain is completely covered"); + } + + if (m_grids.empty()) { + m_allregular = true; + m_ok = true; + return; + } + + + m_mgf.define(m_grids, m_dmap); + MFInfo mf_info; + m_cellflag.define(m_grids, m_dmap, 1, ng, mf_info); + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(m_mgf); mfi.isValid(); ++mfi) + { + auto& gfab = m_mgf[mfi]; + + const auto& levelset = m_levelset.const_array(mfi); + const Box& bxg2 = amrex::grow(gfab.validbox(),ng); + const Box& nodal_box = amrex::surroundingNodes(bxg2); + const auto& ls = gfab.getLevelSet().array(); + + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(nodal_box, i, j, k, + { + ls(i,j,k) = levelset(i,j,k); + }); + + auto& cellflag = m_cellflag[mfi]; + gfab.buildTypes(cellflag); + } + + finalize_cell_flags(); +} + +void +ChkptFileLevel::finalize_cell_flags () +{ + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + { + EBCellFlagFab cellflagtmp; + for (MFIter mfi(m_mgf); mfi.isValid(); ++mfi) + { + auto& gfab = m_mgf[mfi]; + const Box& vbx = mfi.validbox(); + const Box& bxg1 = amrex::grow(vbx,1); + Array4 const& cell = m_cellflag.array(mfi); + + cellflagtmp.resize(m_cellflag[mfi].box()); + Elixir cellflagtmp_eli = cellflagtmp.elixir(); + Array4 const& ctmp = cellflagtmp.array(); + + auto& facetype = gfab.getFaceType(); + AMREX_D_TERM(Array4 const& fx = facetype[0].array();, + Array4 const& fy = facetype[1].array();, + Array4 const& fz = facetype[2].array();); + + + AMREX_D_TERM(Array4 const& apx = m_areafrac[0].const_array(mfi);, + Array4 const& apy = m_areafrac[1].const_array(mfi);, + Array4 const& apz = m_areafrac[2].const_array(mfi);); + + const Box& xbx = amrex::grow(amrex::surroundingNodes(vbx,0),1); + AMREX_HOST_DEVICE_FOR_3D ( xbx, i, j, k, + { + if (apx(i,j,k) == 0.0_rt) { + fx(i,j,k) = Type::covered; + } else if (apx(i,j,k) == 1.0_rt) { + fx(i,j,k) = Type::regular; + } + }); + + const Box& ybx = amrex::grow(amrex::surroundingNodes(vbx,1),1); + AMREX_HOST_DEVICE_FOR_3D ( ybx, i, j, k, + { + if (apy(i,j,k) == 0.0_rt) { + fy(i,j,k) = Type::covered; + } else if (apy(i,j,k) == 1.0_rt) { + fy(i,j,k) = Type::regular; + } + }); + + #if (AMREX_SPACEDIM == 3) + const Box& zbx = amrex::grow(amrex::surroundingNodes(vbx,2),1); + AMREX_HOST_DEVICE_FOR_3D ( zbx, i, j, k, + { + if (apz(i,j,k) == 0.0_rt) { + fz(i,j,k) = Type::covered; + } else if (apz(i,j,k) == 1.0_rt) { + fz(i,j,k) = Type::regular; + } + }); + #endif + + + #if (AMREX_SPACEDIM == 2) + ignore_unused(ctmp); + AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, + { + ignore_unused(k); + if (cell(i,j,0).isSingleValued()) { + if (fx(i,j,0) == Type::regular && fx(i+1,j,0) == Type::regular && + fy(i,j,0) == Type::regular && fy(i,j+1,0) == Type::regular) + { + cell(i,j,0).setRegular(); + } + else if (fx(i,j,0) == Type::covered && fx(i+1,j,0) == Type::covered && + fy(i,j,0) == Type::covered && fy(i,j+1,0) == Type::covered) + { + cell(i,j,0).setCovered(); + } + } + }); + + set_connection_flags(bxg1, cell, fx, fy); + + #else + AMREX_HOST_DEVICE_FOR_3D ( bxg1, i, j, k, + { + if (cell(i,j,k).isSingleValued()) { + if (fx(i,j,k) == Type::covered && fx(i+1,j,k) == Type::covered && + fy(i,j,k) == Type::covered && fy(i,j+1,k) == Type::covered && + fz(i,j,k) == Type::covered && fz(i,j,k+1) == Type::covered) + { + cell(i,j,k).setCovered(); + } + else if (fx(i,j,k) == Type::regular && fx(i+1,j,k) == Type::regular && + fy(i,j,k) == Type::regular && fy(i,j+1,k) == Type::regular && + fz(i,j,k) == Type::regular && fz(i,j,k+1) == Type::regular) + { + cell(i,j,k).setRegular(); + } + } + }); + + set_connection_flags(vbx, bxg1, cell, ctmp, fx, fy, fz); + + #endif + + } + + m_ok = true; + } +} + +ChkptFileLevel::ChkptFileLevel (IndexSpace const* is, int ilev, int max_grid_size, int ngrow, + const Geometry& geom, ChkptFileLevel& fineLevel) +: GShopLevel(is, ilev, max_grid_size, ngrow, geom, fineLevel) +{} + +}} diff --git a/Src/EB/AMReX_EB_chkpt_file.H b/Src/EB/AMReX_EB_chkpt_file.H new file mode 100644 index 00000000000..781db55a1d8 --- /dev/null +++ b/Src/EB/AMReX_EB_chkpt_file.H @@ -0,0 +1,60 @@ +#ifndef AMREX_EB_CHKPT_FILE_H_ +#define AMREX_EB_CHKPT_FILE_H_ + +#include + +namespace amrex { namespace EB2 { + +class ChkptFile +{ +private: + std::string m_restart_file = ""; + + const std::string m_volfrac_name = "volfrac"; + const std::string m_centroid_name = "centroid"; + const std::string m_bndryarea_name = "bndryarea"; + const std::string m_bndrycent_name = "bndrycent"; + const std::string m_bndrynorm_name = "bndrynorm"; + const std::string m_levelset_name = "levelset"; + + const amrex::Vector m_areafrac_name + = {AMREX_D_DECL("areafrac_x", "areafrac_y", "areafrac_z")}; + const amrex::Vector m_facecent_name + = {AMREX_D_DECL("facecent_x", "facecent_y", "facecent_z")}; + const amrex::Vector m_edgecent_name + = {AMREX_D_DECL("edgecent_x", "edgecent_y", "edgecent_z")}; + + void writeHeader (const BoxArray& cut_ba, const BoxArray& covered_ba, const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, int max_grid_size) const; + + void writeToFile (const MultiFab& mf, const std::string& mf_name) const; + + +public: + ChkptFile (const std::string &fname); + + void read_from_chkpt_file (BoxArray& cut_grids, BoxArray& covered_grids, + DistributionMapping& dmap, + MultiFab& volfrac, MultiFab& centroid, MultiFab& bndryarea, + MultiFab& bndrycent, MultiFab& bndrynorm, + Array& areafrac, + Array& facecent, + Array& edgecent, + MultiFab& levelset, int ng_gfab, const Geometry& geom, + const IntVect& ngrow_finest, bool extend_domain_face, int max_grid_size) const; + + void write_to_chkpt_file (const BoxArray& cut_grids, + const BoxArray& covered_grids, + const MultiFab& volfrac, + const MultiFab& centroid, const MultiFab& bndryarea, + const MultiFab& bndrycent, const MultiFab& bndrynorm, + const Array& areafrac, + const Array& facecent, + const Array& edgecent, + const MultiFab& levelset, const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, int max_grid_size) const; +}; + +}} + +#endif diff --git a/Src/EB/AMReX_EB_chkpt_file.cpp b/Src/EB/AMReX_EB_chkpt_file.cpp new file mode 100644 index 00000000000..cd1c00e9ee5 --- /dev/null +++ b/Src/EB/AMReX_EB_chkpt_file.cpp @@ -0,0 +1,324 @@ +#include + +#include +#include +#include // amrex::VisMF::Write(MultiFab) +#include // amrex::[read,write]IntData(array_of_ints) + +namespace { + +const std::string level_prefix = "Level_"; + +void gotoNextLine (std::istream& is) +{ + constexpr std::streamsize bl_ignore_max { 100000 }; + is.ignore(bl_ignore_max, '\n'); +} + +} + +namespace amrex { namespace EB2 { + +// Header information includes the cut and covered boxes (if any) +// Checkpoint file contains data for cut boxes +void +ChkptFile::writeHeader (const BoxArray& cut_ba, const BoxArray& covered_ba, + const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, + int max_grid_size) const +{ + if (ParallelDescriptor::IOProcessor()) + { + std::string HeaderFileName(m_restart_file + "/Header"); + VisMF::IO_Buffer io_buffer(VisMF::IO_Buffer_Size); + std::ofstream HeaderFile; + + HeaderFile.rdbuf()->pubsetbuf(io_buffer.dataPtr(), io_buffer.size()); + + HeaderFile.open(HeaderFileName.c_str(), std::ofstream::out | + std::ofstream::trunc | + std::ofstream::binary); + + if ( ! HeaderFile.good() ) + FileOpenFailed(HeaderFileName); + + HeaderFile.precision(17); + + HeaderFile << "Checkpoint version: 1\n"; + + const int nlevels = 1; + HeaderFile << nlevels << "\n"; + + // Geometry + for (int i = 0; i < AMREX_SPACEDIM; ++i) + HeaderFile << geom.ProbLo(i) << ' '; + HeaderFile << '\n'; + + for (int i = 0; i < AMREX_SPACEDIM; ++i) + HeaderFile << geom.ProbHi(i) << ' '; + HeaderFile << '\n'; + + // ngrow + for (int i = 0; i < AMREX_SPACEDIM; ++i) + HeaderFile << ngrow[i] << ' '; + HeaderFile << '\n'; + + // extend domain face + HeaderFile << extend_domain_face << "\n"; + + // max grid size + HeaderFile << max_grid_size << "\n"; + + // BoxArray + for (int lev = 0; lev < nlevels; ++lev) + { + cut_ba.writeOn(HeaderFile); + HeaderFile << '\n'; + + if (! covered_ba.empty()) { + covered_ba.writeOn(HeaderFile); + HeaderFile << '\n'; + } + } + } +} + +void +ChkptFile::writeToFile (const MultiFab& mf, const std::string& mf_name) const +{ + VisMF::Write(mf, MultiFabFileFullPrefix(0, m_restart_file, + level_prefix, mf_name)); +} + + +ChkptFile::ChkptFile (const std::string &fname) + : m_restart_file(fname) +{} + +void +ChkptFile::read_from_chkpt_file (BoxArray& cut_grids, BoxArray& covered_grids, + DistributionMapping& dmap, + MultiFab& volfrac, MultiFab& centroid, + MultiFab& bndryarea, MultiFab& bndrycent, + MultiFab& bndrynorm, Array& areafrac, + Array& facecent, + Array& edgecent, + MultiFab& levelset, int ng_gfab, const Geometry& geom, + const IntVect& ngrow_finest, bool extend_domain_face, + int max_grid_size) const +{ + Real prob_lo[AMREX_SPACEDIM]; + Real prob_hi[AMREX_SPACEDIM]; + + std::string File(m_restart_file + "/Header"); + + if (amrex::Verbose()) amrex::Print() << "file=" << File << std::endl; + + VisMF::IO_Buffer io_buffer(VisMF::GetIOBufferSize()); + + Vector fileCharPtr; + ParallelDescriptor::ReadAndBcastFile(File, fileCharPtr); + std::string fileCharPtrString(fileCharPtr.dataPtr()); + std::istringstream is(fileCharPtrString, std::istringstream::in); + + std::string line, word; + + std::getline(is, line); + + int nlevs; + is >> nlevs; + gotoNextLine(is); + AMREX_ASSERT(nlevs == 1); + + std::getline(is, line); + { + std::istringstream lis(line); + int i = 0; + while (lis >> word) { + prob_lo[i++] = std::stod(word); + } + } + + std::getline(is, line); + { + std::istringstream lis(line); + int i = 0; + while (lis >> word) { + prob_hi[i++] = std::stod(word); + } + } + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(Math::abs(prob_lo[idim] - geom.ProbLo()[idim]) < std::numeric_limits::epsilon(), + "EB2::ChkptFile cannot read from a different problem domain"); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(Math::abs(prob_hi[idim] - geom.ProbHi()[idim]) < std::numeric_limits::epsilon(), + "EB2::ChkptFile cannot read from a different problem domain"); + } + + IntVect ngrow_chkptfile; + std::getline(is, line); + { + std::istringstream lis(line); + int i = 0; + while (lis >> word) { + ngrow_chkptfile[i++] = std::stoi(word); + } + } + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ngrow_chkptfile == ngrow_finest, "EB2::ChkptFile cannot read from different ngrow"); + + bool edf_chkptfile; + is >> edf_chkptfile; + gotoNextLine(is); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(extend_domain_face == edf_chkptfile, + "EB2::ChkptFile cannot read from different extend_domain_face"); + + int mgs_chkptfile; + is >> mgs_chkptfile; + gotoNextLine(is); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(max_grid_size == mgs_chkptfile, + "EB2::ChkptFile cannot read from different max_grid_size"); + + if (amrex::Verbose()) amrex::Print() << "Loading cut_grids\n"; + cut_grids.readFrom(is); + gotoNextLine(is); + + if (is.peek() != EOF) { + if (amrex::Verbose()) amrex::Print() << "Loading covered_grids\n"; + covered_grids.readFrom(is); + gotoNextLine(is); + } + + dmap.define(cut_grids, ParallelDescriptor::NProcs()); + + // volfrac + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_volfrac_name << std::endl; + + volfrac.define(cut_grids, dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_volfrac_name); + VisMF::Read(volfrac, prefix); + } + + // centroid + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_centroid_name << std::endl; + + centroid.define(cut_grids, dmap, AMREX_SPACEDIM, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_centroid_name); + VisMF::Read(centroid, prefix); + } + + // bndryarea + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_bndryarea_name << std::endl; + + bndryarea.define(cut_grids, dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_bndryarea_name); + VisMF::Read(bndryarea, prefix); + } + + // bndrycent + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_bndrycent_name << std::endl; + + bndrycent.define(cut_grids, dmap, AMREX_SPACEDIM, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_bndrycent_name); + VisMF::Read(bndrycent, prefix); + } + + // bndrynorm + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_bndrynorm_name << std::endl; + + bndrynorm.define(cut_grids, dmap, AMREX_SPACEDIM, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_bndrynorm_name); + VisMF::Read(bndrynorm, prefix); + } + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + // areafrac + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_areafrac_name[idim] << std::endl; + + areafrac[idim].define(convert(cut_grids, IntVect::TheDimensionVector(idim)), dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_areafrac_name[idim]); + VisMF::Read(areafrac[idim], prefix); + } + + // facecent + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_facecent_name[idim] << std::endl; + + facecent[idim].define(convert(cut_grids, IntVect::TheDimensionVector(idim)), dmap, AMREX_SPACEDIM-1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_facecent_name[idim]); + VisMF::Read(facecent[idim], prefix); + } + + // edgecent + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_edgecent_name[idim] << std::endl; + + IntVect edge_type{1}; edge_type[idim] = 0; + edgecent[idim].define(convert(cut_grids, edge_type), dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_edgecent_name[idim]); + VisMF::Read(edgecent[idim], prefix); + } + } + + // levelset + { + if (amrex::Verbose()) amrex::Print() << " Loading " << m_levelset_name << std::endl; + + levelset.define(convert(cut_grids,IntVect::TheNodeVector()), dmap, 1, ng_gfab); + + auto prefix = MultiFabFileFullPrefix(0, m_restart_file, level_prefix, m_levelset_name); + VisMF::Read(levelset, prefix); + } +} + +void +ChkptFile::write_to_chkpt_file (const BoxArray& cut_grids, + const BoxArray& covered_grids, + const MultiFab& volfrac, + const MultiFab& centroid, const MultiFab& bndryarea, + const MultiFab& bndrycent, const MultiFab& bndrynorm, + const Array& areafrac, + const Array& facecent, + const Array& edgecent, + const MultiFab& levelset, const Geometry& geom, + const IntVect& ngrow, bool extend_domain_face, + int max_grid_size) const +{ + + if (ParallelDescriptor::IOProcessor()) { + std::cout << "\n\t Writing checkpoint " << m_restart_file << std::endl; + } + + const int nlevels = 1; + PreBuildDirectorHierarchy(m_restart_file, level_prefix, nlevels, true); + + writeHeader(cut_grids, covered_grids, geom, ngrow, extend_domain_face, max_grid_size); + + writeToFile(volfrac, m_volfrac_name); + writeToFile(centroid, m_centroid_name); + writeToFile(bndryarea, m_bndryarea_name); + writeToFile(bndrycent, m_bndrycent_name); + writeToFile(bndrynorm, m_bndrynorm_name); + writeToFile(levelset, m_levelset_name); + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + writeToFile(areafrac[idim], m_areafrac_name[idim]); + writeToFile(facecent[idim], m_facecent_name[idim]); + writeToFile(edgecent[idim], m_edgecent_name[idim]); + } +} + +}} diff --git a/Src/EB/AMReX_distFcnElement.H b/Src/EB/AMReX_distFcnElement.H index f839bdb5747..2a9c7a0c2f4 100644 --- a/Src/EB/AMReX_distFcnElement.H +++ b/Src/EB/AMReX_distFcnElement.H @@ -12,7 +12,7 @@ class distFcnElement2d { public: //! Constructor distFcnElement2d() {} - ~distFcnElement2d() {} + virtual ~distFcnElement2d() {} virtual distFcnElement2d* newDistFcnElement2d() const = 0; @@ -29,7 +29,7 @@ class distFcnElement2d { class LineDistFcnElement2d: public distFcnElement2d { public: LineDistFcnElement2d() {} - ~LineDistFcnElement2d() {} + virtual ~LineDistFcnElement2d() {} virtual distFcnElement2d* newDistFcnElement2d() const override; @@ -58,7 +58,7 @@ class LineDistFcnElement2d: public distFcnElement2d { class SplineDistFcnElement2d: public distFcnElement2d { public: SplineDistFcnElement2d() {} - ~SplineDistFcnElement2d() {} + virtual ~SplineDistFcnElement2d() {} virtual distFcnElement2d* newDistFcnElement2d() const override; diff --git a/Src/EB/CMakeLists.txt b/Src/EB/CMakeLists.txt index 8ceb433e159..017e4d783a8 100644 --- a/Src/EB/CMakeLists.txt +++ b/Src/EB/CMakeLists.txt @@ -70,11 +70,17 @@ target_sources(amrex AMReX_EB2_${AMReX_SPACEDIM}D_C.H AMReX_EB_STL_utils.H AMReX_EB_STL_utils.cpp + AMReX_EB_chkpt_file.H + AMReX_EB_chkpt_file.cpp AMReX_EB_triGeomOps_K.H AMReX_EB2_Level_STL.H AMReX_EB2_Level_STL.cpp AMReX_EB2_IndexSpace_STL.H AMReX_EB2_IndexSpace_STL.cpp + AMReX_EB2_Level_chkpt_file.H + AMReX_EB2_Level_chkpt_file.cpp + AMReX_EB2_IndexSpace_chkpt_file.H + AMReX_EB2_IndexSpace_chkpt_file.cpp ) if (AMReX_SPACEDIM EQUAL 3) diff --git a/Src/EB/Make.package b/Src/EB/Make.package index 5865a2da982..b684523924f 100644 --- a/Src/EB/Make.package +++ b/Src/EB/Make.package @@ -79,6 +79,12 @@ CEXE_headers += AMReX_EB_triGeomOps_K.H CEXE_headers += AMReX_EB2_Level_STL.H AMReX_EB2_IndexSpace_STL.H CEXE_sources += AMReX_EB2_Level_STL.cpp AMReX_EB2_IndexSpace_STL.cpp +CEXE_sources += AMReX_EB_chkpt_file.cpp +CEXE_headers += AMReX_EB_chkpt_file.H + +CEXE_headers += AMReX_EB2_Level_chkpt_file.H AMReX_EB2_IndexSpace_chkpt_file.H +CEXE_sources += AMReX_EB2_Level_chkpt_file.cpp AMReX_EB2_IndexSpace_chkpt_file.cpp + ifeq ($(DIM),3) CEXE_sources += AMReX_WriteEBSurface.cpp AMReX_EBToPVD.cpp CEXE_headers += AMReX_WriteEBSurface.H AMReX_EBToPVD.H diff --git a/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp b/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp index 021ed8c4f60..49a761da801 100644 --- a/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp +++ b/Src/Extern/HDF5/AMReX_PlotFileUtilHDF5.cpp @@ -232,11 +232,8 @@ WriteGenericPlotfileHeaderHDF5 (hid_t fid, int ratio = 1; if (ref_ratio.size() > 0) - ratio = ref_ratio[level][0]; + ratio = (level == finest_level)? 1: ref_ratio[level][0]; - if (level == finest_level) { - ratio = 1; - } CreateWriteHDF5AttrInt(grp, "ref_ratio", 1, &ratio); for (int k = 0; k < AMREX_SPACEDIM; ++k) { diff --git a/Src/Extern/HYPRE/AMReX_HypreIJIface.H b/Src/Extern/HYPRE/AMReX_HypreIJIface.H index 6d0dbacd95f..2ac96748b24 100644 --- a/Src/Extern/HYPRE/AMReX_HypreIJIface.H +++ b/Src/Extern/HYPRE/AMReX_HypreIJIface.H @@ -93,11 +93,11 @@ private: HypreIntType (*m_precondSolvePtr)( HYPRE_Solver, HYPRE_ParCSRMatrix, HYPRE_ParVector, HYPRE_ParVector){nullptr}; - HypreIntType (*m_solverSetTolPtr)(HYPRE_Solver, double){nullptr}; - HypreIntType (*m_solverSetAbsTolPtr)(HYPRE_Solver, double){nullptr}; + HypreIntType (*m_solverSetTolPtr)(HYPRE_Solver, amrex::Real){nullptr}; + HypreIntType (*m_solverSetAbsTolPtr)(HYPRE_Solver, amrex::Real){nullptr}; HypreIntType (*m_solverSetMaxIterPtr)(HYPRE_Solver, HypreIntType){nullptr}; HypreIntType (*m_solverNumItersPtr)(HYPRE_Solver, HypreIntType*){nullptr}; - HypreIntType (*m_solverFinalResidualNormPtr)(HYPRE_Solver, double*){nullptr}; + HypreIntType (*m_solverFinalResidualNormPtr)(HYPRE_Solver, amrex::Real*){nullptr}; HypreIntType m_ilower{0}; HypreIntType m_iupper{0}; diff --git a/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp b/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp index 9e7a42dbb5b..c2e4f126252 100644 --- a/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp +++ b/Src/Extern/HYPRE/AMReX_HypreIJIface.cpp @@ -275,7 +275,7 @@ void HypreIJIface::boomeramg_precond_configure (const std::string& prefix) if (hpp.pp.contains("bamg_non_galerkin_level_tols")) { std::vector levels; - std::vector tols; + std::vector tols; hpp.pp.getarr("bamg_non_galerkin_level_levels", levels); hpp.pp.getarr("bamg_non_galerkin_level_tols", tols); diff --git a/Src/Extern/PETSc/AMReX_PETSc.cpp b/Src/Extern/PETSc/AMReX_PETSc.cpp index bf0bf68a99c..7d8cd79b582 100644 --- a/Src/Extern/PETSc/AMReX_PETSc.cpp +++ b/Src/Extern/PETSc/AMReX_PETSc.cpp @@ -1,7 +1,4 @@ -#include -#include - #ifdef AMREX_USE_EB #include #include @@ -9,6 +6,9 @@ #include +#include +#include + #include #include #include diff --git a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H index 061ff14c301..602a6298126 100644 --- a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H +++ b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.H @@ -30,10 +30,10 @@ public: int GetNumberOfArrays(const std::string &meshName, int association, unsigned int &numberOfArrays) override; int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override; #endif - int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override; - int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; + int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override; + int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; int ReleaseData() override; protected: diff --git a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp index 135c21ef0e2..aa801eb0993 100644 --- a/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp +++ b/Src/Extern/SENSEI/AMReX_AmrDataAdaptor.cpp @@ -1,22 +1,22 @@ #include "AMReX_AmrDataAdaptor.H" +#include "senseiConfig.h" #include "MPIUtils.h" #include "STLUtils.h" -#include "VTKUtils.h" +#include "SVTKUtils.h" #include "Profiler.h" #include "Error.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -62,15 +62,15 @@ int DescriptorMap::Initialize(const DescriptorList &descriptors) if (itype.cellCentered()) { - this->Map[vtkDataObject::CELL][arrayName] = std::make_pair(i,j); + this->Map[svtkDataObject::CELL][arrayName] = std::make_pair(i,j); } else if (itype.nodeCentered()) { - this->Map[vtkDataObject::POINT][arrayName] = std::make_pair(i,j); + this->Map[svtkDataObject::POINT][arrayName] = std::make_pair(i,j); } else { - this->Map[vtkDataObject::FIELD][arrayName] = std::make_pair(i,j); + this->Map[svtkDataObject::FIELD][arrayName] = std::make_pair(i,j); } } } @@ -156,7 +156,7 @@ struct AmrDataAdaptor::InternalsType int PinMesh; amrex::InSituUtils::DescriptorMap SimMetadata; #if SENSEI_VERSION_MAJOR < 3 - std::vector ManagedObjects; + std::vector ManagedObjects; #endif std::vector> Masks; }; @@ -225,11 +225,11 @@ int AmrDataAdaptor::GetMeshMetadata(unsigned int id, metadata->GlobalView = true; metadata->MeshName = "mesh"; - metadata->MeshType = VTK_OVERLAPPING_AMR; - metadata->BlockType = VTK_UNIFORM_GRID; + metadata->MeshType = SVTK_OVERLAPPING_AMR; + metadata->BlockType = SVTK_UNIFORM_GRID; metadata->NumBlocks = 0; metadata->NumBlocksLocal = {-1}; - metadata->CoordinateType = InSituUtils::amrex_tt::vtk_type_enum(); + metadata->CoordinateType = InSituUtils::amrex_tt::svtk_type_enum(); metadata->StaticMesh = 0; // TODO @@ -318,14 +318,14 @@ int AmrDataAdaptor::GetMeshMetadata(unsigned int id, std::string arrayName = desc.name(j); metadata->ArrayName.push_back(arrayName); metadata->ArrayComponents.push_back(1); - metadata->ArrayType.push_back(InSituUtils::amrex_tt::vtk_type_enum()); + metadata->ArrayType.push_back(InSituUtils::amrex_tt::svtk_type_enum()); if (itype.cellCentered()) - metadata->ArrayCentering.push_back(vtkDataObject::CELL); + metadata->ArrayCentering.push_back(svtkDataObject::CELL); else if (itype.nodeCentered()) - metadata->ArrayCentering.push_back(vtkDataObject::POINT); + metadata->ArrayCentering.push_back(svtkDataObject::POINT); else - metadata->ArrayCentering.push_back(vtkDataObject::FIELD); + metadata->ArrayCentering.push_back(svtkDataObject::FIELD); } } @@ -557,8 +557,8 @@ int AmrDataAdaptor::GetNumberOfArrays(const std::string &meshName, return -1; } - if ((association != vtkDataObject::POINT) && - (association != vtkDataObject::CELL)) + if ((association != svtkDataObject::POINT) && + (association != svtkDataObject::CELL)) { SENSEI_ERROR("Invalid association " << association) return -1; @@ -590,7 +590,7 @@ int AmrDataAdaptor::GetArrayName(const std::string &meshName, if (this->Internals->SimMetadata.GetName(association, index, arrayName)) { SENSEI_ERROR("No array named \"" << arrayName << "\" in " - << sensei::VTKUtils::GetAttributesName(association) + << sensei::SVTKUtils::GetAttributesName(association) << " data") return -1; } @@ -603,7 +603,7 @@ int AmrDataAdaptor::GetArrayName(const std::string &meshName, //----------------------------------------------------------------------------- int AmrDataAdaptor::GetMesh(const std::string &meshName, - bool structureOnly, vtkDataObject *&mesh) + bool structureOnly, svtkDataObject *&mesh) { amrex::ignore_unused(structureOnly); @@ -626,8 +626,8 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName, unsigned int nLevels = InSituUtils::NumActiveLevels(levels); - // initialize new vtk datasets - vtkOverlappingAMR *amrMesh = vtkOverlappingAMR::New(); + // initialize new svtk datasets + svtkOverlappingAMR *amrMesh = svtkOverlappingAMR::New(); #if SENSEI_VERSION_MAJOR < 3 Internals->ManagedObjects.push_back(amrMesh); #endif @@ -685,12 +685,12 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName, int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())}; int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())}; - // vtk's representation of box metadata - vtkAMRBox block(cboxLo, cboxHi); + // svtk's representation of box metadata + svtkAMRBox block(cboxLo, cboxHi); amrMesh->SetAMRBox(i, j, block); amrMesh->SetAMRBlockSourceIndex(i, j, gid++); - // skip building a vtk amrMesh for the non local boxes + // skip building a svtk amrMesh for the non local boxes if (dmap[j] != rank) continue; @@ -705,14 +705,14 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName, int nboxLo[3] = {AMREX_ARLIM(nbox.loVect())}; int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())}; - // new vtk uniform amrMesh, node centered - vtkUniformGrid *ug = vtkUniformGrid::New(); + // new svtk uniform amrMesh, node centered + svtkUniformGrid *ug = svtkUniformGrid::New(); ug->SetOrigin(origin); ug->SetSpacing(spacing); ug->SetExtent(nboxLo[0], nboxHi[0], nboxLo[1], nboxHi[1], nboxLo[2], nboxHi[2]); - // pass the block into vtk + // pass the block into svtk amrMesh->SetDataSet(i, j, ug); ug->Delete(); } @@ -722,7 +722,7 @@ int AmrDataAdaptor::GetMesh(const std::string &meshName, } //----------------------------------------------------------------------------- -int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, +int AmrDataAdaptor::AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) { sensei::TimeEvent<64> event("AmrDataAdaptor::AddGhostCellsArray"); @@ -733,7 +733,7 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, return -1; } - vtkOverlappingAMR *amrMesh = dynamic_cast(mesh); + svtkOverlappingAMR *amrMesh = dynamic_cast(mesh); if (!amrMesh) { SENSEI_ERROR("Invalid mesh type " @@ -780,7 +780,7 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, if (dMap[j] != rank) continue; - vtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j); + svtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j); if (!blockMesh) { @@ -790,24 +790,24 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, long nCells = blockMesh->GetNumberOfCells(); - // transfer mask array into vtk - vtkUnsignedCharArray *ga = vtkUnsignedCharArray::New(); - ga->SetName("vtkGhostType"); + // transfer mask array into svtk + svtkUnsignedCharArray *ga = svtkUnsignedCharArray::New(); + ga->SetName("svtkGhostType"); ga->SetArray(mask[j], nCells, 0); blockMesh->GetCellData()->AddArray(ga); ga->Delete(); // for debug can visualize the ghost cells // FIXME -- a bug in Catalyst ignores internal ghost zones - // when using the VTK writrer. Until that bug gets fixed, one + // when using the SVTK writrer. Until that bug gets fixed, one // can manually inject this copy using a PV Python filter - ga = vtkUnsignedCharArray::New(); + ga = svtkUnsignedCharArray::New(); ga->SetName("GhostType"); ga->SetArray(mask[j], nCells, 1); blockMesh->GetCellData()->AddArray(ga); ga->Delete(); - // because VTK takes ownership + // because SVTK takes ownership mask[j] = nullptr; } } @@ -816,7 +816,7 @@ int AmrDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, } //----------------------------------------------------------------------------- -int AmrDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh, +int AmrDataAdaptor::AddGhostNodesArray(svtkDataObject *mesh, const std::string &meshName) { amrex::ignore_unused(mesh); @@ -834,7 +834,7 @@ int AmrDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh, } //----------------------------------------------------------------------------- -int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, +int AmrDataAdaptor::AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) { sensei::TimeEvent<64> event("AmrDataAdaptor::AddArray"); @@ -848,7 +848,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, return -1; } - vtkOverlappingAMR *amrMesh = dynamic_cast(mesh); + svtkOverlappingAMR *amrMesh = dynamic_cast(mesh); if (!amrMesh) { SENSEI_ERROR("Invalid mesh type " @@ -861,8 +861,8 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, return -1; } - if ((association != vtkDataObject::CELL) && - (association != vtkDataObject::POINT)) + if ((association != svtkDataObject::CELL) && + (association != svtkDataObject::POINT)) { SENSEI_ERROR("Invalid association " << association) return -1; @@ -878,7 +878,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, if (this->Internals->SimMetadata.GetIndex(arrayName, association, fab, comp)) { SENSEI_ERROR("Failed to locate descriptor for " - << sensei::VTKUtils::GetAttributesName(association) + << sensei::SVTKUtils::GetAttributesName(association) << " data array \"" << arrayName << "\"") return -1; } @@ -894,8 +894,8 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, amrex::MultiFab& state = levels[i]->get_new_data(fab); unsigned int ng = state.nGrow(); - if (!((association == vtkDataObject::CELL) && state.is_cell_centered()) && - !((association == vtkDataObject::POINT) && state.is_nodal())) + if (!((association == svtkDataObject::CELL) && state.is_cell_centered()) && + !((association == svtkDataObject::POINT) && state.is_nodal())) { SENSEI_ERROR("association does not match MultiFAB centering") return -1; @@ -926,7 +926,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())}; int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())}; - // skip building a vtk mesh for the non local boxes + // skip building a svtk mesh for the non local boxes if (dmap[j] != rank) continue; @@ -938,7 +938,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())}; // get the block mesh - vtkUniformGrid *ug = amrMesh->GetDataSet(i, j); + svtkUniformGrid *ug = amrMesh->GetDataSet(i, j); // node centered size long nlen = 1; @@ -953,9 +953,9 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, // pointer to the data amrex_real *pcd = state[j].dataPtr(comp); - // allocate vtk array - InSituUtils::amrex_tt::vtk_type *da = - InSituUtils::amrex_tt::vtk_type::New(); + // allocate svtk array + InSituUtils::amrex_tt::svtk_type *da = + InSituUtils::amrex_tt::svtk_type::New(); // set component name da->SetName(arrayName.c_str()); @@ -981,7 +981,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, #if defined(SENSEI_DEBUG) // mark level id - vtkFloatArray *la = vtkFloatArray::New(); + svtkFloatArray *la = svtkFloatArray::New(); la->SetName("amrex_level_id"); la->SetNumberOfTuples(clen); la->Fill(i); @@ -989,7 +989,7 @@ int AmrDataAdaptor::AddArray(vtkDataObject* mesh, const std::string &meshName, la->Delete(); // mark mpi rank - vtkFloatArray *ra = vtkFloatArray::New(); + svtkFloatArray *ra = svtkFloatArray::New(); ra->SetName("amrex_mpi_rank"); ra->SetNumberOfTuples(clen); ra->Fill(rank); diff --git a/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp b/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp index 26f63d2a101..018669a4bfc 100644 --- a/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp +++ b/Src/Extern/SENSEI/AMReX_AmrInSituBridge.cpp @@ -29,7 +29,7 @@ AmrInSituBridge::update(Amr *dataSource) data_adaptor->SetDataSource(dataSource); data_adaptor->SetDataTime(dataSource->cumTime()); data_adaptor->SetDataTimeStep(dataSource->levelSteps(0)); - ret = analysis_adaptor->Execute(data_adaptor) ? 0 : -1; + ret = analysis_adaptor->Execute(data_adaptor, nullptr) ? 0 : -1; data_adaptor->ReleaseData(); data_adaptor->Delete(); diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H index 5a8a88552af..54277505bd4 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H +++ b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.H @@ -34,10 +34,10 @@ public: int GetNumberOfArrays(const std::string &meshName, int association, unsigned int &numberOfArrays) override; int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override; #endif - int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override; - int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; + int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override; + int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; int ReleaseData() override; protected: diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp index 2e4968cc8b2..34b92c1d25d 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp +++ b/Src/Extern/SENSEI/AMReX_AmrMeshDataAdaptor.cpp @@ -2,18 +2,18 @@ #include "Profiler.h" #include "Error.h" -#include "VTKUtils.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "SVTKUtils.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -58,11 +58,11 @@ int MeshStateMap::Initialize( if (state.is_cell_centered()) { - this->Map[vtkDataObject::CELL][arrayName] = std::make_pair(i,j); + this->Map[svtkDataObject::CELL][arrayName] = std::make_pair(i,j); } else if (state.is_nodal()) { - this->Map[vtkDataObject::POINT][arrayName] = std::make_pair(i,j); + this->Map[svtkDataObject::POINT][arrayName] = std::make_pair(i,j); } } } @@ -83,7 +83,7 @@ struct AmrMeshDataAdaptor::InternalsType std::vector> Names; amrex::InSituUtils::MeshStateMap StateMetadata; #if SENSEI_VERSION_MAJOR < 3 - std::vector ManagedObjects; + std::vector ManagedObjects; #endif }; @@ -149,13 +149,13 @@ int AmrMeshDataAdaptor::GetMeshMetadata(unsigned int id, metadata->GlobalView = true; metadata->MeshName = "mesh"; - metadata->MeshType = VTK_OVERLAPPING_AMR; - metadata->BlockType = VTK_UNIFORM_GRID; + metadata->MeshType = SVTK_OVERLAPPING_AMR; + metadata->BlockType = SVTK_UNIFORM_GRID; metadata->NumBlocks = 0; metadata->NumCells = 0; metadata->NumPoints = 0; metadata->NumBlocksLocal = {-1}; - metadata->CoordinateType = InSituUtils::amrex_tt::vtk_type_enum(); + metadata->CoordinateType = InSituUtils::amrex_tt::svtk_type_enum(); metadata->StaticMesh = 0; // num levels @@ -224,7 +224,7 @@ int AmrMeshDataAdaptor::GetMeshMetadata(unsigned int id, {pdLo[0], pdHi[0], pdLo[1], pdHi[1], pdLo[2], pdHi[2]}); } - // global extent (note: VTK uses point centered indexing) + // global extent (note: SVTK uses point centered indexing) const amrex::Box& cdom = this->Internals->Mesh->Geom(0).Domain(); amrex::Box ndom = surroundingNodes(cdom); @@ -261,19 +261,19 @@ int AmrMeshDataAdaptor::GetMeshMetadata(unsigned int id, // scalar, vector, tensor metadata->ArrayComponents[j] = 1; // POD type - metadata->ArrayType[j] = InSituUtils::amrex_tt::vtk_type_enum(); + metadata->ArrayType[j] = InSituUtils::amrex_tt::svtk_type_enum(); // mesh centering if (state0.is_cell_centered()) { - metadata->ArrayCentering[j] = vtkDataObject::CELL; + metadata->ArrayCentering[j] = svtkDataObject::CELL; } else if (state0.is_nodal()) { - metadata->ArrayCentering[j] = vtkDataObject::POINT; + metadata->ArrayCentering[j] = svtkDataObject::POINT; } else { - metadata->ArrayCentering[j] = vtkDataObject::FIELD; + metadata->ArrayCentering[j] = svtkDataObject::FIELD; } } @@ -396,8 +396,8 @@ int AmrMeshDataAdaptor::GetNumberOfArrays(const std::string &meshName, return -1; } - if ((association != vtkDataObject::POINT) && - (association != vtkDataObject::CELL)) + if ((association != svtkDataObject::POINT) && + (association != svtkDataObject::CELL)) { SENSEI_ERROR("Invalid association " << association) return -1; @@ -427,7 +427,7 @@ int AmrMeshDataAdaptor::GetArrayName(const std::string &meshName, if (this->Internals->StateMetadata.GetName(association, index, arrayName)) { SENSEI_ERROR("No array named \"" << arrayName << "\" in " - << sensei::VTKUtils::GetAttributesName(association) + << sensei::SVTKUtils::GetAttributesName(association) << " data") return -1; } @@ -475,7 +475,7 @@ int AmrMeshDataAdaptor::GetMeshHasGhostCells(const std::string &meshName, int &n //----------------------------------------------------------------------------- int AmrMeshDataAdaptor::GetMesh(const std::string &meshName, - bool structureOnly, vtkDataObject *&mesh) + bool structureOnly, svtkDataObject *&mesh) { amrex::ignore_unused(structureOnly); @@ -498,8 +498,8 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName, int nLevels = this->Internals->Mesh->finestLevel() + 1; - // initialize new vtk datasets - vtkOverlappingAMR *amrMesh = vtkOverlappingAMR::New(); + // initialize new svtk datasets + svtkOverlappingAMR *amrMesh = svtkOverlappingAMR::New(); #if SENSEI_VERSION_MAJOR < 3 Internals->ManagedObjects.push_back(amrMesh); #endif @@ -560,12 +560,12 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName, int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())}; int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())}; - // vtk's representation of box metadata - vtkAMRBox block(cboxLo, cboxHi); + // svtk's representation of box metadata + svtkAMRBox block(cboxLo, cboxHi); amrMesh->SetAMRBox(i, j, block); amrMesh->SetAMRBlockSourceIndex(i, j, gid++); - // skip building a vtk amrMesh for the non local boxes + // skip building a svtk amrMesh for the non local boxes if (dmap[j] != rank) continue; @@ -580,14 +580,14 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName, int nboxLo[3] = {AMREX_ARLIM(nbox.loVect())}; int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())}; - // new vtk uniform amrMesh, node centered - vtkUniformGrid *ug = vtkUniformGrid::New(); + // new svtk uniform amrMesh, node centered + svtkUniformGrid *ug = svtkUniformGrid::New(); ug->SetOrigin(origin); ug->SetSpacing(spacing); ug->SetExtent(nboxLo[0], nboxHi[0], nboxLo[1], nboxHi[1], nboxLo[2], nboxHi[2]); - // pass the block into vtk + // pass the block into svtk amrMesh->SetDataSet(i, j, ug); ug->Delete(); } @@ -597,7 +597,7 @@ int AmrMeshDataAdaptor::GetMesh(const std::string &meshName, } //----------------------------------------------------------------------------- -int AmrMeshDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh, +int AmrMeshDataAdaptor::AddGhostNodesArray(svtkDataObject *mesh, const std::string &meshName) { amrex::ignore_unused(mesh); @@ -613,7 +613,7 @@ int AmrMeshDataAdaptor::AddGhostNodesArray(vtkDataObject *mesh, } //----------------------------------------------------------------------------- -int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, +int AmrMeshDataAdaptor::AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) { if (meshName != "mesh") @@ -622,7 +622,7 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, return -1; } - vtkOverlappingAMR *amrMesh = dynamic_cast(mesh); + svtkOverlappingAMR *amrMesh = dynamic_cast(mesh); if (!amrMesh) { SENSEI_ERROR("Invalid mesh type " @@ -701,7 +701,7 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, if (dmap[j] != rank) continue; - vtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j); + svtkUniformGrid *blockMesh = amrMesh->GetDataSet(i, j); if (!blockMesh) { @@ -711,18 +711,18 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, long nCells = blockMesh->GetNumberOfCells(); - // transfer mask array into vtk - vtkUnsignedCharArray *ga = vtkUnsignedCharArray::New(); - ga->SetName("vtkGhostType"); + // transfer mask array into svtk + svtkUnsignedCharArray *ga = svtkUnsignedCharArray::New(); + ga->SetName("svtkGhostType"); ga->SetArray(mask[j], nCells, 0); blockMesh->GetCellData()->AddArray(ga); ga->Delete(); // for debug can visualize the ghost cells // FIXME -- a bug in Catalyst ignores internal ghost zones - // when using the VTK writrer. Until that bug gets fixed, one + // when using the SVTK writer. Until that bug gets fixed, one // can manually inject this copy using a PV Python filter - ga = vtkUnsignedCharArray::New(); + ga = svtkUnsignedCharArray::New(); ga->SetName("GhostType"); ga->SetArray(mask[j], nCells, 1); blockMesh->GetCellData()->AddArray(ga); @@ -734,7 +734,7 @@ int AmrMeshDataAdaptor::AddGhostCellsArray(vtkDataObject* mesh, } //----------------------------------------------------------------------------- -int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, +int AmrMeshDataAdaptor::AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) { @@ -747,7 +747,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, return -1; } - vtkOverlappingAMR *amrMesh = dynamic_cast(mesh); + svtkOverlappingAMR *amrMesh = dynamic_cast(mesh); if (!amrMesh) { SENSEI_ERROR("Invalid mesh type " @@ -760,8 +760,8 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, return -1; } - if ((association != vtkDataObject::CELL) && - (association != vtkDataObject::CELL)) + if ((association != svtkDataObject::CELL) && + (association != svtkDataObject::CELL)) { SENSEI_ERROR("Invalid association " << association) return -1; @@ -774,7 +774,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, if (this->Internals->StateMetadata.GetIndex(arrayName, association, fab, comp)) { SENSEI_ERROR("Failed to locate descriptor for " - << sensei::VTKUtils::GetAttributesName(association) + << sensei::SVTKUtils::GetAttributesName(association) << " data array \"" << arrayName << "\"") return -1; } @@ -792,8 +792,8 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, unsigned int ng = state.nGrow(); // check centering - if (!((association == vtkDataObject::CELL) && state.is_cell_centered()) && - !((association == vtkDataObject::POINT) && state.is_nodal())) + if (!((association == svtkDataObject::CELL) && state.is_cell_centered()) && + !((association == svtkDataObject::POINT) && state.is_nodal())) { SENSEI_ERROR("association does not match MultiFab centering") return -1; @@ -824,7 +824,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, int cboxLo[3] = {AMREX_ARLIM(cbox.loVect())}; int cboxHi[3] = {AMREX_ARLIM(cbox.hiVect())}; - // skip building a vtk mesh for the non local boxes + // skip building a svtk mesh for the non local boxes if (dmap[j] != rank) continue; @@ -836,7 +836,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, int nboxHi[3] = {AMREX_ARLIM(nbox.hiVect())}; // get the block mesh - vtkUniformGrid *ug = amrMesh->GetDataSet(i, j); + svtkUniformGrid *ug = amrMesh->GetDataSet(i, j); // node centered size long nlen = 1; @@ -851,9 +851,9 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, // pointer to the data amrex_real *pcd = state[j].dataPtr(comp); - // allocate vtk array - InSituUtils::amrex_tt::vtk_type *da = - InSituUtils::amrex_tt::vtk_type::New(); + // allocate svtk array + InSituUtils::amrex_tt::svtk_type *da = + InSituUtils::amrex_tt::svtk_type::New(); // set component name da->SetName(arrayName.c_str()); @@ -879,7 +879,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, #if defined(SENSEI_DEBUG) // mark level id - vtkFloatArray *la = vtkFloatArray::New(); + svtkFloatArray *la = svtkFloatArray::New(); la->SetName("amrex_level_id"); la->SetNumberOfTuples(clen); la->Fill(i); @@ -887,7 +887,7 @@ int AmrMeshDataAdaptor::AddArray(vtkDataObject* mesh, la->Delete(); // mark mpi rank - vtkFloatArray *ra = vtkFloatArray::New(); + svtkFloatArray *ra = svtkFloatArray::New(); ra->SetName("amrex_mpi_rank"); ra->SetNumberOfTuples(clen); ra->Fill(rank); diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp b/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp index 55adb1b5c59..cd6b6794171 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp +++ b/Src/Extern/SENSEI/AMReX_AmrMeshInSituBridge.cpp @@ -35,7 +35,7 @@ AmrMeshInSituBridge::update(unsigned int step, double time, data_adaptor->SetDataSource(mesh, states, names); data_adaptor->SetDataTime(time); data_adaptor->SetDataTimeStep(step); - ret = analysis_adaptor->Execute(data_adaptor) ? 0 : -1; + ret = analysis_adaptor->Execute(data_adaptor, nullptr) ? 0 : -1; data_adaptor->ReleaseData(); data_adaptor->Delete(); diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H index 61e4d510745..fbd5227824f 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H +++ b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptor.H @@ -45,10 +45,10 @@ public: int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override; #endif int GetNumberOfMeshes(unsigned int &numMeshes) override; - int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override; - int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; + int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override; + int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; int ReleaseData() override; protected: diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H index a93357d5043..4cbb53203b6 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H +++ b/Src/Extern/SENSEI/AMReX_AmrMeshParticleDataAdaptorI.H @@ -148,7 +148,7 @@ template int AmrMeshParticleDataAdaptor::GetMesh( const std::string &meshName, bool structureOnly, - vtkDataObject *&mesh) + svtkDataObject *&mesh) { if(meshName == m_meshName) { @@ -164,7 +164,7 @@ int AmrMeshParticleDataAdaptor:: template int AmrMeshParticleDataAdaptor::AddGhostNodesArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName) { if(meshName == m_meshName) @@ -181,7 +181,7 @@ int AmrMeshParticleDataAdaptor:: template int AmrMeshParticleDataAdaptor::AddGhostCellsArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName) { if(meshName == m_meshName) @@ -198,7 +198,7 @@ int AmrMeshParticleDataAdaptor:: template int AmrMeshParticleDataAdaptor::AddArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) diff --git a/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H b/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H index bede5908cdc..9208c8a753b 100644 --- a/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H +++ b/Src/Extern/SENSEI/AMReX_AmrMeshParticleInSituBridge.H @@ -86,7 +86,7 @@ int AmrMeshParticleInSituBridge::update( data_adaptor->SetDataTime(time); data_adaptor->SetDataTimeStep(step); - ret = analysis_adaptor->Execute(data_adaptor) ? 0 : -1; + ret = analysis_adaptor->Execute(data_adaptor, nullptr) ? 0 : -1; data_adaptor->ReleaseData(); data_adaptor->Delete(); diff --git a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H index 886a7df6d18..3f7a945e019 100644 --- a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H +++ b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptor.H @@ -43,10 +43,10 @@ public: int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override; #endif int GetNumberOfMeshes(unsigned int &numMeshes) override; - int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override; - int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; + int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override; + int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; int ReleaseData() override; protected: diff --git a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H index 813466fc0f8..9035cd0c39c 100644 --- a/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H +++ b/Src/Extern/SENSEI/AMReX_AmrParticleDataAdaptorI.H @@ -146,7 +146,7 @@ template int AmrParticleDataAdaptor::GetMesh( const std::string &meshName, bool structureOnly, - vtkDataObject *&mesh) + svtkDataObject *&mesh) { if(meshName == m_meshName) { @@ -162,7 +162,7 @@ int AmrParticleDataAdaptor::GetM template int AmrParticleDataAdaptor::AddGhostNodesArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName) { if(meshName == m_meshName) @@ -179,7 +179,7 @@ int AmrParticleDataAdaptor::AddG template int AmrParticleDataAdaptor::AddGhostCellsArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName) { if(meshName == m_meshName) @@ -196,7 +196,7 @@ int AmrParticleDataAdaptor::AddG template int AmrParticleDataAdaptor::AddArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) diff --git a/Src/Extern/SENSEI/AMReX_InSituUtils.H b/Src/Extern/SENSEI/AMReX_InSituUtils.H index e7c212d7b4e..2799e21b367 100644 --- a/Src/Extern/SENSEI/AMReX_InSituUtils.H +++ b/Src/Extern/SENSEI/AMReX_InSituUtils.H @@ -2,10 +2,10 @@ #define AMReX_InSituUtils_H #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include @@ -47,21 +47,21 @@ enum PointGhostTypes -// traits helper for mapping between amrex_real and vtkDataArray +// traits helper for mapping between amrex_real and svtkDataArray template struct amrex_tt {}; -#define amrex_tt_specialize(cpp_t, vtk_t, vtk_t_e) \ +#define amrex_tt_specialize(cpp_t, svtk_t, svtk_t_e) \ template <> \ struct amrex_tt \ { \ - using vtk_type = vtk_t; \ + using svtk_type = svtk_t; \ \ static \ - constexpr int vtk_type_enum() { return vtk_t_e; } \ + constexpr int svtk_type_enum() { return svtk_t_e; } \ }; -amrex_tt_specialize(float, vtkFloatArray, VTK_FLOAT) -amrex_tt_specialize(double, vtkDoubleArray, VTK_DOUBLE) +amrex_tt_specialize(float, svtkFloatArray, SVTK_FLOAT) +amrex_tt_specialize(double, svtkDoubleArray, SVTK_DOUBLE) // helpers to modify values diff --git a/Src/Extern/SENSEI/AMReX_InSituUtils.cpp b/Src/Extern/SENSEI/AMReX_InSituUtils.cpp index 64c429e8bb6..d13c8738aeb 100644 --- a/Src/Extern/SENSEI/AMReX_InSituUtils.cpp +++ b/Src/Extern/SENSEI/AMReX_InSituUtils.cpp @@ -1,7 +1,7 @@ #include "AMReX_InSituUtils.H" #include "Error.h" -#include "VTKUtils.h" +#include "SVTKUtils.h" namespace amrex { namespace InSituUtils { @@ -14,7 +14,7 @@ int StateMap::GetIndex(const std::string &name, int centering, if (cit == this->Map.end()) { - SENSEI_ERROR("No " << sensei::VTKUtils::GetAttributesName(centering) + SENSEI_ERROR("No " << sensei::SVTKUtils::GetAttributesName(centering) << " arrays") return -1; } @@ -23,7 +23,7 @@ int StateMap::GetIndex(const std::string &name, int centering, if (nit == cit->second.end()) { SENSEI_ERROR("No array named \"" << name << "\" in " - << sensei::VTKUtils::GetAttributesName(centering) + << sensei::SVTKUtils::GetAttributesName(centering) << " centered data") return -1; } @@ -41,7 +41,7 @@ int StateMap::GetName(int centering, int id, std::string &name) if (cit == this->Map.end()) { - SENSEI_ERROR("No " << sensei::VTKUtils::GetAttributesName(centering) + SENSEI_ERROR("No " << sensei::SVTKUtils::GetAttributesName(centering) << " arrays") return -1; } diff --git a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H index 73ca142ec0b..f284b15831b 100644 --- a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H +++ b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptor.H @@ -8,7 +8,7 @@ #include #include -class vtkPolyData; +class svtkPolyData; namespace amrex { @@ -40,22 +40,22 @@ public: void SetPinMesh(int val); // get particle id numbers - int AddParticlesIDArray(vtkDataObject* mesh); + int AddParticlesIDArray(svtkDataObject* mesh); // get particle cpu numbers (process each particle was generated on) - int AddParticlesCPUArray(vtkDataObject* mesh); + int AddParticlesCPUArray(svtkDataObject* mesh); // get particle integer arrays in Structs of Arrays format - int AddParticlesSOAIntArray(const std::string &arrayName, vtkDataObject* mesh); + int AddParticlesSOAIntArray(const std::string &arrayName, svtkDataObject* mesh); // get particle real arrays in Structs of Arrays format - int AddParticlesSOARealArray(const std::string &arrayName, vtkDataObject* mesh); + int AddParticlesSOARealArray(const std::string &arrayName, svtkDataObject* mesh); // get particle integer arrays in Array Of Structs format - int AddParticlesAOSIntArray(const std::string &arrayName, vtkDataObject* mesh); + int AddParticlesAOSIntArray(const std::string &arrayName, svtkDataObject* mesh); // get particle real arrays in Array Of Structs format - int AddParticlesAOSRealArray(const std::string &arrayName, vtkDataObject* mesh); + int AddParticlesAOSRealArray(const std::string &arrayName, svtkDataObject* mesh); // SENSEI API #if SENSEI_VERSION_MAJOR >= 3 @@ -68,10 +68,10 @@ public: int GetArrayName(const std::string &meshName, int association, unsigned int index, std::string &arrayName) override; #endif int GetNumberOfMeshes(unsigned int &numMeshes) override; - int GetMesh(const std::string &meshName, bool structureOnly, vtkDataObject *&mesh) override; - int AddGhostNodesArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddGhostCellsArray(vtkDataObject* mesh, const std::string &meshName) override; - int AddArray(vtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; + int GetMesh(const std::string &meshName, bool structureOnly, svtkDataObject *&mesh) override; + int AddGhostNodesArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddGhostCellsArray(svtkDataObject* mesh, const std::string &meshName) override; + int AddArray(svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) override; int ReleaseData() override; protected: @@ -79,7 +79,7 @@ protected: ~ParticleDataAdaptor() = default; private: - vtkPolyData* BuildParticles(); + svtkPolyData* BuildParticles(); const std::string m_particlesName = "particles"; diff --git a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H index 26174f83e1b..8a2d15562d3 100644 --- a/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H +++ b/Src/Extern/SENSEI/AMReX_ParticleDataAdaptorI.H @@ -1,13 +1,13 @@ #include "Profiler.h" #include "Error.h" -#include "VTKUtils.h" +#include "SVTKUtils.h" #include "MeshMetadata.h" -// vtk includes -#include -#include -#include -#include -#include +// svtk includes +#include +#include +#include +#include +#include @@ -194,7 +194,7 @@ int ParticleDataAdaptor::GetNumb unsigned int &numberOfArrays) { numberOfArrays = 0; - if(association == vtkDataObject::POINT) + if(association == svtkDataObject::POINT) { numberOfArrays = m_realStructs.size() + m_intStructs.size() @@ -213,7 +213,7 @@ int ParticleDataAdaptor::GetArra unsigned int index, std::string &arrayName) { - if(association == vtkDataObject::POINT) + if(association == svtkDataObject::POINT) { if(index < m_realStructs.size()) { @@ -253,7 +253,7 @@ template int ParticleDataAdaptor::GetMesh( const std::string &meshName, bool structureOnly, - vtkDataObject *&mesh) + svtkDataObject *&mesh) { mesh = nullptr; int nprocs = 1; @@ -266,7 +266,7 @@ int ParticleDataAdaptor::GetMesh SENSEI_ERROR("No mesh named \"" << meshName << "\"") return -1; } - vtkMultiBlockDataSet* mb = vtkMultiBlockDataSet::New(); + svtkMultiBlockDataSet* mb = svtkMultiBlockDataSet::New(); if (structureOnly) { @@ -275,7 +275,7 @@ int ParticleDataAdaptor::GetMesh } mb->SetNumberOfBlocks(nprocs); - vtkPolyData *pd = BuildParticles(); + svtkPolyData *pd = BuildParticles(); mb->SetBlock(rank, pd); pd->Delete(); mesh = mb; @@ -286,7 +286,7 @@ int ParticleDataAdaptor::GetMesh //----------------------------------------------------------------------------- template int ParticleDataAdaptor::AddGhostNodesArray( - vtkDataObject*, + svtkDataObject*, const std::string &meshName) { if (meshName != m_particlesName) @@ -300,7 +300,7 @@ int ParticleDataAdaptor::AddGhos //----------------------------------------------------------------------------- template int ParticleDataAdaptor::AddGhostCellsArray( - vtkDataObject*, + svtkDataObject*, const std::string &meshName) { if (meshName != m_particlesName) @@ -314,7 +314,7 @@ int ParticleDataAdaptor::AddGhos //----------------------------------------------------------------------------- template int ParticleDataAdaptor::AddArray( - vtkDataObject* mesh, + svtkDataObject* mesh, const std::string &meshName, int association, const std::string &arrayName) @@ -325,7 +325,7 @@ int ParticleDataAdaptor::AddArra return -1; } - if (association != vtkDataObject::POINT) + if (association != svtkDataObject::POINT) { SENSEI_ERROR("Invalid association " << association); return -1; @@ -393,10 +393,10 @@ int ParticleDataAdaptor::GetMesh metadata->MeshName = m_particlesName; // container mesh type (all) - metadata->MeshType = VTK_MULTIBLOCK_DATA_SET; + metadata->MeshType = SVTK_MULTIBLOCK_DATA_SET; // block mesh type (all) - metadata->BlockType = VTK_POLY_DATA; + metadata->BlockType = SVTK_POLY_DATA; // global number of blocks (all) metadata->NumBlocks = nprocs; @@ -412,9 +412,9 @@ int ParticleDataAdaptor::GetMesh // type enum of point data (unstructured, optional) #ifdef AMREX_SINGLE_PRECISION_PARTICLES - metadata->CoordinateType = VTK_FLOAT; + metadata->CoordinateType = SVTK_FLOAT; #else - metadata->CoordinateType = VTK_DOUBLE; + metadata->CoordinateType = SVTK_DOUBLE; #endif // total number of points in all blocks (all, optional) @@ -467,19 +467,19 @@ int ParticleDataAdaptor::GetMesh metadata->ArrayCentering = {}; for(auto s : m_realStructs) { - metadata->ArrayCentering.push_back(vtkDataObject::POINT); + metadata->ArrayCentering.push_back(svtkDataObject::POINT); } for(auto s : m_intStructs) { - metadata->ArrayCentering.push_back(vtkDataObject::POINT); + metadata->ArrayCentering.push_back(svtkDataObject::POINT); } for(auto s : m_realArrays) { - metadata->ArrayCentering.push_back(vtkDataObject::POINT); + metadata->ArrayCentering.push_back(svtkDataObject::POINT); } for(auto s : m_intArrays) { - metadata->ArrayCentering.push_back(vtkDataObject::POINT); + metadata->ArrayCentering.push_back(svtkDataObject::POINT); } // number of components of each array (all) @@ -506,26 +506,26 @@ int ParticleDataAdaptor::GetMesh for(auto s : m_realStructs) { #ifdef AMREX_SINGLE_PRECISION_PARTICLES - metadata->ArrayType.push_back(VTK_FLOAT); + metadata->ArrayType.push_back(SVTK_FLOAT); #else - metadata->ArrayType.push_back(VTK_DOUBLE); + metadata->ArrayType.push_back(SVTK_DOUBLE); #endif } for(auto s : m_intStructs) { - metadata->ArrayType.push_back(VTK_INT); + metadata->ArrayType.push_back(SVTK_INT); } for(auto s : m_realArrays) { #ifdef AMREX_SINGLE_PRECISION_PARTICLES - metadata->ArrayType.push_back(VTK_FLOAT); + metadata->ArrayType.push_back(SVTK_FLOAT); #else - metadata->ArrayType.push_back(VTK_DOUBLE); + metadata->ArrayType.push_back(SVTK_DOUBLE); #endif } for(auto s : m_intArrays) { - metadata->ArrayType.push_back(VTK_INT); + metadata->ArrayType.push_back(SVTK_INT); } // global min,max of each array (all, optional) @@ -646,19 +646,19 @@ int ParticleDataAdaptor::GetMesh //----------------------------------------------------------------------------- template -vtkPolyData* ParticleDataAdaptor::BuildParticles() +svtkPolyData* ParticleDataAdaptor::BuildParticles() { // return particle data pd - vtkPolyData* pd = vtkPolyData::New(); + svtkPolyData* pd = svtkPolyData::New(); const auto& particles = this->m_particles->GetParticles(); long long numParticles = this->m_particles->TotalNumberOfParticles(true, true); // allocate vertex storage for particles #ifdef AMREX_SINGLE_PRECISION_PARTICLES - vtkNew coords; + svtkNew coords; #else - vtkNew coords; + svtkNew coords; #endif coords->SetName("coords"); coords->SetNumberOfComponents(3); @@ -669,12 +669,12 @@ vtkPolyData* ParticleDataAdaptor double *pCoords = coords->GetPointer(0); #endif - // use this to index into the VTK array as we copy level by level and tile by + // use this to index into the SVTK array as we copy level by level and tile by // tile long long ptId = 0; // allocate connectivity array for particles - vtkNew vertex; + svtkNew vertex; vertex->AllocateExact(numParticles, 1); // points->SetNumberOfPoints(numParticles); @@ -717,8 +717,8 @@ vtkPolyData* ParticleDataAdaptor } } - // pass the particle coordinates into VTK's point data structure. - vtkNew points; + // pass the particle coordinates into SVTK's point data structure. + svtkNew points; points->SetData(coords); // add point and vertex data to output mesh @@ -731,14 +731,14 @@ vtkPolyData* ParticleDataAdaptor //----------------------------------------------------------------------------- template int ParticleDataAdaptor::AddParticlesIDArray( - vtkDataObject* mesh) + svtkDataObject* mesh) { - auto vtk_particles = dynamic_cast(mesh); + auto svtk_particles = dynamic_cast(mesh); const auto& particles = this->m_particles->GetParticles(); auto nptsOnProc = this->m_particles->TotalNumberOfParticles(true, true); - // allocate a VTK array for the data - vtkNew idArray; + // allocate a SVTK array for the data + svtkNew idArray; idArray->SetName("id"); idArray->SetNumberOfComponents(1); idArray->SetNumberOfValues(nptsOnProc); @@ -767,8 +767,8 @@ int ParticleDataAdaptor::AddPart } } - // the association for this array is vtkDataObject::POINT - vtk_particles->GetPointData()->AddArray(idArray); + // the association for this array is svtkDataObject::POINT + svtk_particles->GetPointData()->AddArray(idArray); return 0; } @@ -776,14 +776,14 @@ int ParticleDataAdaptor::AddPart //----------------------------------------------------------------------------- template int ParticleDataAdaptor::AddParticlesCPUArray( - vtkDataObject* mesh) + svtkDataObject* mesh) { - auto vtk_particles = dynamic_cast(mesh); + auto svtk_particles = dynamic_cast(mesh); const auto& particles = this->m_particles->GetParticles(); auto nptsOnProc = this->m_particles->TotalNumberOfParticles(true, true); - // allocate a VTK array for the data - vtkNew cpuArray; + // allocate a SVTK array for the data + svtkNew cpuArray; cpuArray->SetName("cpu"); cpuArray->SetNumberOfComponents(1); cpuArray->SetNumberOfValues(nptsOnProc); @@ -811,8 +811,8 @@ int ParticleDataAdaptor::AddPart } } - // the association for this array is vtkDataObject::POINT - vtk_particles->GetPointData()->AddArray(cpuArray); + // the association for this array is svtkDataObject::POINT + svtk_particles->GetPointData()->AddArray(cpuArray); return 0; } @@ -821,7 +821,7 @@ int ParticleDataAdaptor::AddPart template int ParticleDataAdaptor::AddParticlesSOARealArray( const std::string &arrayName, - vtkDataObject* mesh) + svtkDataObject* mesh) { const long nParticles = this->m_particles->TotalNumberOfParticles(true, true); @@ -847,11 +847,11 @@ int ParticleDataAdaptor::AddPart } } - // allocate the vtkArray + // allocate the svtkArray #ifdef AMREX_SINGLE_PRECISION_PARTICLES - vtkNew data; + svtkNew data; #else - vtkNew data; + svtkNew data; #endif data->SetName(arrayName.c_str()); data->SetNumberOfComponents(nComps); @@ -896,9 +896,9 @@ int ParticleDataAdaptor::AddPart int rank = 0; MPI_Comm_rank(this->GetCommunicator(), &rank); - auto blocks = dynamic_cast(mesh); + auto blocks = dynamic_cast(mesh); - auto block = dynamic_cast(blocks->GetBlock(rank)); + auto block = dynamic_cast(blocks->GetBlock(rank)); block->GetPointData()->AddArray(data); return 0; @@ -908,7 +908,7 @@ int ParticleDataAdaptor::AddPart template int ParticleDataAdaptor::AddParticlesSOAIntArray( const std::string &arrayName, - vtkDataObject* mesh) + svtkDataObject* mesh) { // get the particles from the particle container auto nptsOnProc = this->m_particles->TotalNumberOfParticles(true, true); @@ -931,7 +931,7 @@ int ParticleDataAdaptor::AddPart return -1; } - vtkNew data; + svtkNew data; data->SetName(arrayName.c_str()); data->SetNumberOfComponents(1); data->SetNumberOfValues(nptsOnProc); @@ -967,9 +967,9 @@ int ParticleDataAdaptor::AddPart int rank = 0; MPI_Comm_rank(this->GetCommunicator(), &rank); - auto blocks = dynamic_cast(mesh); + auto blocks = dynamic_cast(mesh); - auto block = dynamic_cast(blocks->GetBlock(rank)); + auto block = dynamic_cast(blocks->GetBlock(rank)); block->GetPointData()->AddArray(data); return 0; @@ -979,7 +979,7 @@ int ParticleDataAdaptor::AddPart template int ParticleDataAdaptor::AddParticlesAOSRealArray( const std::string &arrayName, - vtkDataObject* mesh) + svtkDataObject* mesh) { // get the particles from the particle container const auto& particles = this->m_particles->GetParticles(); @@ -1007,11 +1007,11 @@ int ParticleDataAdaptor::AddPart } } - // allocate the vtk array + // allocate the svtk array #ifdef AMREX_SINGLE_PRECISION_PARTICLES - vtkNew data; + svtkNew data; #else - vtkNew data; + svtkNew data; #endif data->SetName(arrayName.c_str()); @@ -1053,9 +1053,9 @@ int ParticleDataAdaptor::AddPart int rank = 0; MPI_Comm_rank(this->GetCommunicator(), &rank); - auto blocks = dynamic_cast(mesh); + auto blocks = dynamic_cast(mesh); - auto block = dynamic_cast(blocks->GetBlock(rank)); + auto block = dynamic_cast(blocks->GetBlock(rank)); block->GetPointData()->AddArray(data); return 0; @@ -1065,7 +1065,7 @@ int ParticleDataAdaptor::AddPart template int ParticleDataAdaptor::AddParticlesAOSIntArray( const std::string &arrayName, - vtkDataObject* mesh) + svtkDataObject* mesh) { // get the particles from the particle container const auto& particles = this->m_particles->GetParticles(); @@ -1090,8 +1090,8 @@ int ParticleDataAdaptor::AddPart return -1; } - // allocate vtkArray - vtkNew data; + // allocate svtkArray + svtkNew data; data->SetName(arrayName.c_str()); data->SetNumberOfComponents(1); data->SetNumberOfValues(nptsOnProc); @@ -1121,9 +1121,9 @@ int ParticleDataAdaptor::AddPart int rank = 0; MPI_Comm_rank(this->GetCommunicator(), &rank); - auto blocks = dynamic_cast(mesh); + auto blocks = dynamic_cast(mesh); - auto block = dynamic_cast(blocks->GetBlock(rank)); + auto block = dynamic_cast(blocks->GetBlock(rank)); block->GetPointData()->AddArray(data); diff --git a/Src/LinearSolvers/CMakeLists.txt b/Src/LinearSolvers/CMakeLists.txt index bbefab67999..63de2af0113 100644 --- a/Src/LinearSolvers/CMakeLists.txt +++ b/Src/LinearSolvers/CMakeLists.txt @@ -98,3 +98,15 @@ if (AMReX_HYPRE) MLMG/AMReX_MLNodeLaplacian_hypre.cpp ) endif () + +if (AMReX_SPACEDIM EQUAL 3) + + target_include_directories(amrex PUBLIC $) + + target_sources(amrex + PRIVATE + OpenBC/AMReX_OpenBC.H + OpenBC/AMReX_OpenBC_K.H + OpenBC/AMReX_OpenBC.cpp + ) +endif () diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp index 89dbb268e10..e5a9b0b31af 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.cpp @@ -323,10 +323,10 @@ MLABecLaplacian::applyMetricTermsCoeffs () for (int alev = 0; alev < m_num_amr_levels; ++alev) { const int mglev = 0; - applyMetricTerm(alev, mglev, m_a_coeffs[alev][mglev]); + applyMetricTermToMF(alev, mglev, m_a_coeffs[alev][mglev]); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - applyMetricTerm(alev, mglev, m_b_coeffs[alev][mglev][idim]); + applyMetricTermToMF(alev, mglev, m_b_coeffs[alev][mglev][idim]); } } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H index 45464bbeb9c..a33d70b4771 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H @@ -40,6 +40,11 @@ public: Real eps_rel, Real eps_abs); + int solve (Any& solnL, + const Any& rhsL, + Real eps_rel, + Real eps_abs); + void setVerbose (int _verbose) { verbose = _verbose; } int getVerbose () const { return verbose; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp index c32b0d6199d..76144e6d42f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.cpp @@ -78,6 +78,13 @@ MLCGSolver::solve (MultiFab& sol, } } +int +MLCGSolver::solve (Any& sol, const Any& rhs, Real eps_rel, Real eps_abs) +{ + AMREX_ASSERT(sol.is()); // xxxxx TODO: MLCGSolver Any + return solve(sol.get(), rhs.get(), eps_rel, eps_abs); +} + int MLCGSolver::solve_bicgstab (MultiFab& sol, const MultiFab& rhs, diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H index 985bc9855b4..0cc6456b7c8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H @@ -59,9 +59,13 @@ public: virtual MultiFab const* getACoeffs (int amrlev, int mglev) const = 0; virtual Array getBCoeffs (int amrlev, int mglev) const = 0; - virtual void applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const final override; + virtual void applyInhomogNeumannTerm (int amrlev, Any& rhs) const final override; - virtual void applyOverset (int amlev, MultiFab& rhs) const override; + virtual void addInhomogNeumannFlux ( + int amrlev, const Array& grad, + MultiFab const& sol, bool mult_bcoef) const final override; + + virtual void applyOverset (int amlev, Any& rhs) const override; #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) virtual std::unique_ptr makeHypre (Hypre::Interface hypre_interface) const override; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp index b5580b3c15c..db57162c21f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.cpp @@ -108,7 +108,7 @@ MLCellABecLap::define (const Vector& a_geom, amrlev = 0; for (int mglev = 1; mglev < m_num_mg_levels[amrlev]; ++mglev) { MultiFab foo(m_grids[amrlev][mglev], m_dmap[amrlev][mglev], 1, 0, MFInfo().SetAlloc(false)); - if (! isMFIterSafe(*m_overset_mask[amrlev][mglev], foo)) { + if (! amrex::isMFIterSafe(*m_overset_mask[amrlev][mglev], foo)) { auto osm = std::make_unique(m_grids[amrlev][mglev], m_dmap[amrlev][mglev], 1, 1); osm->ParallelCopy(*m_overset_mask[amrlev][mglev]); @@ -189,17 +189,21 @@ MLCellABecLap::getFluxes (const Vector >& a_flux a_flux[alev][idim]->mult(betainv); } } + addInhomogNeumannFlux(alev, a_flux[alev], *a_sol[alev], true); } } void -MLCellABecLap::applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const +MLCellABecLap::applyInhomogNeumannTerm (int amrlev, Any& a_rhs) const { bool has_inhomog_neumann = hasInhomogNeumannBC(); bool has_robin = hasRobinBC(); if (!has_inhomog_neumann && !has_robin) return; + AMREX_ASSERT(a_rhs.is()); + MultiFab& rhs = a_rhs.get(); + int ncomp = getNComp(); const int mglev = 0; @@ -414,9 +418,121 @@ MLCellABecLap::applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const } void -MLCellABecLap::applyOverset (int amrlev, MultiFab& rhs) const +MLCellABecLap::addInhomogNeumannFlux ( + int amrlev, const Array& grad, MultiFab const& sol, + bool mult_bcoef) const +{ + /* + * if (mult_bcoef == true) + * grad is -bceof*grad phi + * else + * grad is grad phi + */ + Real fac = mult_bcoef ? Real(-1.0) : Real(1.0); + + bool has_inhomog_neumann = hasInhomogNeumannBC(); + bool has_robin = hasRobinBC(); + + if (!has_inhomog_neumann && !has_robin) return; + + int ncomp = getNComp(); + const int mglev = 0; + + const auto dxinv = m_geom[amrlev][mglev].InvCellSize(); + const Box domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + + Array bcoef = {AMREX_D_DECL(nullptr,nullptr,nullptr)}; + if (mult_bcoef) { + bcoef = getBCoeffs(amrlev,mglev); + } + + const auto& bndry = *m_bndry_sol[amrlev]; + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) mfi_info.SetDynamic(true); + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(sol, mfi_info); mfi.isValid(); ++mfi) + { + Box const& vbx = mfi.validbox(); + for (OrientationIter orit; orit.isValid(); ++orit) { + const Orientation ori = orit(); + const int idim = ori.coordDir(); + const Box& ccb = amrex::adjCell(vbx, ori); + const Dim3 os = IntVect::TheDimensionVector(idim).dim3(); + const Real dxi = dxinv[idim]; + if (! domain.contains(ccb)) { + for (int icomp = 0; icomp < ncomp; ++icomp) { + auto const& phi = sol.const_array(mfi,icomp); + auto const bv = bndry.bndryValues(ori).multiFab().const_array(mfi,icomp); + auto const bc = bcoef[idim] ? bcoef[idim]->const_array(mfi,icomp) + : Array4{}; + auto const& f = grad[idim]->array(mfi,icomp); + if (ori.isLow()) { + if (m_lobc_orig[icomp][idim] == + LinOpBCType::inhomogNeumann) { + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + int ii = i+os.x; + int jj = j+os.y; + int kk = k+os.z; + Real b = bc ? bc(ii,jj,kk) : Real(1.0); + f(ii,jj,kk) = fac*b*bv(i,j,k); + }); + } else if (m_lobc_orig[icomp][idim] == + LinOpBCType::Robin) { + Array4 const& rbc = (*m_robin_bcval[amrlev])[mfi].const_array(icomp*3); + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + int ii = i+os.x; + int jj = j+os.y; + int kk = k+os.z; + Real tmp = Real(1.0) / + (rbc(i,j,k,1)*dxi + rbc(i,j,k,0)*Real(0.5)); + Real RA = rbc(i,j,k,2) * tmp; + Real RB = (rbc(i,j,k,1)*dxi - rbc(i,j,k,0)*Real(0.5)) * tmp; + Real b = bc ? bc(ii,jj,kk) : Real(1.0); + f(ii,jj,kk) = fac*b*dxi*((Real(1.0)-RB)*phi(ii,jj,kk)-RA); + }); + } + } else { + if (m_hibc_orig[icomp][idim] == + LinOpBCType::inhomogNeumann) { + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + Real b = bc ? bc(i,j,k) : Real(1.0); + f(i,j,k) = fac*b*bv(i,j,k); + }); + } else if (m_hibc_orig[icomp][idim] == + LinOpBCType::Robin) { + Array4 const& rbc = (*m_robin_bcval[amrlev])[mfi].const_array(icomp*3); + AMREX_HOST_DEVICE_FOR_3D(ccb, i, j, k, + { + Real tmp = Real(1.0) / + (rbc(i,j,k,1)*dxi + rbc(i,j,k,0)*Real(0.5)); + Real RA = rbc(i,j,k,2) * tmp; + Real RB = (rbc(i,j,k,1)*dxi - rbc(i,j,k,0)*Real(0.5)) * tmp; + Real b = bc ? bc(i,j,k) : Real(1.0); + f(i,j,k) = fac*b*dxi*(RA+(RB-Real(1.0))* + phi(i-os.x,j-os.y,k-os.z)); + }); + } + } + } + } + } + } +} + + +void +MLCellABecLap::applyOverset (int amrlev, Any& a_rhs) const { if (m_overset_mask[amrlev][0]) { + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); const int ncomp = getNComp(); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H index f1168e5c41e..9a6bb222113 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H @@ -3,6 +3,7 @@ #include #include +#include namespace amrex { @@ -109,6 +110,8 @@ public: virtual void interpolation (int amrlev, int fmglev, MultiFab& fine, const MultiFab& crse) const override; + virtual void interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const override; + virtual void averageDownSolutionRHS (int camrlev, MultiFab& crse_sol, MultiFab& crse_rhs, const MultiFab& fine_sol, const MultiFab& fine_rhs) override; @@ -132,9 +135,12 @@ public: virtual void compGrad (int amrlev, const Array& grad, MultiFab& sol, Location loc) const override; - virtual void applyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const final override; + virtual void applyMetricTerm (int amrlev, int mglev, Any& rhs) const final override; virtual void unapplyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const final override; - virtual void fillSolutionBC (int amrlev, MultiFab& sol, const MultiFab* crse_bcdata=nullptr) final override; + virtual Vector getSolvabilityOffset (int amrlev, int mglev, + Any const& rhs) const override; + virtual void fixSolvabilityByOffset (int amrlev, int mglev, Any& rhs, + Vector const& offset) const override; virtual void prepareForSolve () override; @@ -146,6 +152,23 @@ public: const Array& flux, const FArrayBox& sol, Location loc, const int face_only=0) const = 0; + // This could be turned into template if needed. + void applyMetricTermToMF (int amrlev, int mglev, MultiFab& rhs) const; + + virtual Real AnyNormInfMask (int amrlev, Any const& a, bool local) const override; + + virtual void AnyAvgDownResAmr (int clev, Any& cres, Any const& fres) const override; + + virtual void AnyInterpolationAmr (int famrlev, Any& fine, const Any& crse, + IntVect const& /*nghost*/) const override; + + virtual void AnyAverageDownAndSync (Vector& sol) const override; + + virtual void addInhomogNeumannFlux (int /*amrlev*/, + const Array& /*grad*/, + MultiFab const& /*sol*/, + bool /*mult_bcoef*/) const {} + struct BCTL { BoundCond type; Real location; @@ -210,12 +233,17 @@ protected: // boundary cell flags for covered, not_covered, outside_domain Vector > > m_maskvals; + Vector > m_norm_fine_mask; + mutable Vector m_fluxreg; private: void defineAuxData (); void defineBC (); + + void computeVolInv () const; + mutable Vector > m_volinv; // used by solvability fix }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp index 8f6921950e7..5c8edcbb1a6 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #ifndef BL_NO_FORT @@ -9,6 +10,11 @@ namespace amrex { +#ifdef AMREX_SOFT_PERF_COUNTERS +// perf_counters +MLCellLinOp::Counters MLCellLinOp::perf_counters; +#endif + namespace { // Have to put it here due to CUDA extended lambda limitation struct ABCTag { @@ -97,6 +103,7 @@ MLCellLinOp::defineAuxData () m_undrrelxr.resize(m_num_amr_levels); m_maskvals.resize(m_num_amr_levels); m_fluxreg.resize(m_num_amr_levels-1); + m_norm_fine_mask.resize(m_num_amr_levels-1); const int ncomp = getNComp(); @@ -136,6 +143,9 @@ MLCellLinOp::defineAuxData () m_dmap[amrlev+1][0], m_dmap[amrlev][0], m_geom[amrlev+1][0], m_geom[amrlev][0], ratio, amrlev+1, ncomp); + m_norm_fine_mask[amrlev] = std::make_unique + (makeFineMask(m_grids[amrlev][0], m_dmap[amrlev][0], m_grids[amrlev+1][0], + ratio, 1, 0)); } #if (AMREX_SPACEDIM != 3) @@ -530,18 +540,6 @@ MLCellLinOp::solutionResidual (int amrlev, MultiFab& resid, MultiFab& x, const M MultiFab::Xpay(resid, Real(-1.0), b, 0, 0, ncomp, 0); } -void -MLCellLinOp::fillSolutionBC (int amrlev, MultiFab& sol, const MultiFab* crse_bcdata) -{ - BL_PROFILE("MLCellLinOp::fillSolutionBC()"); - if (crse_bcdata != nullptr) { - updateSolBC(amrlev, *crse_bcdata); - } - const int mglev = 0; - applyBC(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, - m_bndry_sol[amrlev].get()); -} - void MLCellLinOp::correctionResidual (int amrlev, int mglev, MultiFab& resid, MultiFab& x, const MultiFab& b, BCMode bc_mode, const MultiFab* crse_bcdata) @@ -940,6 +938,8 @@ MLCellLinOp::compGrad (int amrlev, const Array& grad, }); #endif } + + addInhomogNeumannFlux(amrlev, grad, sol, false); } void @@ -1316,7 +1316,20 @@ MLCellLinOp::BndryCondLoc::setLOBndryConds (const Geometry& geom, const Real* dx } void -MLCellLinOp::applyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const +MLCellLinOp::applyMetricTerm (int amrlev, int mglev, Any& rhs) const +{ + amrex::ignore_unused(amrlev,mglev,rhs); +#if (AMREX_SPACEDIM != 3) + + if (!m_has_metric_term) return; + + AMREX_ASSERT(rhs.is()); + applyMetricTermToMF(amrlev, mglev, rhs.get()); +#endif +} + +void +MLCellLinOp::applyMetricTermToMF (int amrlev, int mglev, MultiFab& rhs) const { amrex::ignore_unused(amrlev,mglev,rhs); #if (AMREX_SPACEDIM != 3) @@ -1435,9 +1448,417 @@ MLCellLinOp::update () if (MLLinOp::needsUpdate()) MLLinOp::update(); } -#ifdef AMREX_SOFT_PERF_COUNTERS -// perf_counters -MLCellLinOp::Counters MLCellLinOp::perf_counters; +void +MLCellLinOp::computeVolInv () const +{ + if (!m_volinv.empty()) return; + + m_volinv.resize(m_num_amr_levels); + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + m_volinv[amrlev].resize(NMGLevels(amrlev)); + } + + // We don't need to compute for every level + + auto f = [&] (int amrlev, int mglev) { +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(Factory(amrlev,mglev)); + if (factory) + { + const MultiFab& vfrac = factory->getVolFrac(); + m_volinv[amrlev][mglev] = vfrac.sum(0,true); + } + else +#endif + { + m_volinv[amrlev][mglev] + = Real(1.0 / compactify(Geom(amrlev,mglev).Domain()).d_numPts()); + } + }; + + // amrlev = 0, mglev = 0 + f(0,0); + + int mgbottom = NMGLevels(0)-1; + f(0,mgbottom); + +#ifdef AMREX_USE_EB + Real temp1, temp2; + auto factory = dynamic_cast(Factory(0,0)); + if (factory) + { + ParallelAllReduce::Sum({m_volinv[0][0], m_volinv[0][mgbottom]}, + ParallelContext::CommunicatorSub()); + temp1 = Real(1.0)/m_volinv[0][0]; + temp2 = Real(1.0)/m_volinv[0][mgbottom]; + } + else + { + temp1 = m_volinv[0][0]; + temp2 = m_volinv[0][mgbottom]; + } + m_volinv[0][0] = temp1; + m_volinv[0][mgbottom] = temp2; +#endif +} + +Vector +MLCellLinOp::getSolvabilityOffset (int amrlev, int mglev, Any const& a_rhs) const +{ + AMREX_ASSERT(a_rhs.is()); + auto const& rhs = a_rhs.get(); + + computeVolInv(); + + const int ncomp = getNComp(); + Vector offset(ncomp); + +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(Factory(amrlev,mglev)); + if (factory) + { + const MultiFab& vfrac = factory->getVolFrac(); + for (int c = 0; c < ncomp; ++c) { + offset[c] = MultiFab::Dot(rhs, c, vfrac, 0, 1, 0, true) * m_volinv[amrlev][mglev]; + } + } + else +#endif + { + for (int c = 0; c < ncomp; ++c) { + offset[c] = rhs.sum(c,true) * m_volinv[amrlev][mglev]; + } + } + + ParallelAllReduce::Sum(offset.data(), ncomp, ParallelContext::CommunicatorSub()); + + return offset; +} + +Real +MLCellLinOp::AnyNormInfMask (int amrlev, Any const& a, bool local) const +{ + AMREX_ASSERT(a.is()); + auto& mf = a.get(); + + const int finest_level = NAMRLevels() - 1; + Real norm = 0._rt; +#ifdef AMREX_USE_EB + const int ncomp = getNComp(); + if (! mf.isAllRegular()) { + auto factory = dynamic_cast(Factory(amrlev)); + const MultiFab& vfrac = factory->getVolFrac(); + if (amrlev == finest_level) { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + auto const& vfrac_ma = vfrac.const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + return amrex::Math::abs(ma[box_no](i,j,k,n) + *vfrac_ma[box_no](i,j,k)); + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + auto const& v = vfrac.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n)*v(i,j,k))); + }); + } + } + } else { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + auto const& mask_ma = m_norm_fine_mask[amrlev]->const_arrays(); + auto const& vfrac_ma = vfrac.const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + if (mask_ma[box_no](i,j,k)) { + return amrex::Math::abs(ma[box_no](i,j,k,n) + *vfrac_ma[box_no](i,j,k)); + } else { + return Real(0.0); + } + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + auto const& mask = m_norm_fine_mask[amrlev]->const_array(mfi); + auto const& v = vfrac.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + if (mask(i,j,k)) { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n)*v(i,j,k))); + } + }); + } + } + } + } else +#endif + { + iMultiFab const* fine_mask = (amrlev == finest_level) + ? nullptr : m_norm_fine_mask[amrlev].get(); + norm = MFNormInf(mf, fine_mask, true); + } + + if (!local) ParallelAllReduce::Max(norm, ParallelContext::CommunicatorSub()); + return norm; +} + +void +MLCellLinOp::AnyAvgDownResAmr (int clev, Any& cres, Any const& fres) const +{ + AMREX_ASSERT(cres.is() && fres.is()); +#ifdef AMREX_USE_EB + amrex::EB_average_down +#else + amrex::average_down +#endif + (fres.get(), cres.get(), 0, getNComp(), AMRRefRatio(clev)); +} + +void +MLCellLinOp::AnyInterpolationAmr (int famrlev, Any& a_fine, const Any& a_crse, + IntVect const& /*nghost*/) const +{ + AMREX_ASSERT(a_fine.is()); + MultiFab& fine = a_fine.get(); + MultiFab const& crse = a_crse.get(); + + const int ncomp = getNComp(); + const int refratio = AMRRefRatio(famrlev-1); + +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(Factory(famrlev)); + const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; +#endif + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, mfi_info); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& ff = fine.array(mfi); + Array4 const& cc = crse.const_array(mfi); +#ifdef AMREX_USE_EB + bool call_lincc; + if (factory) + { + const auto& flag = (*flags)[mfi]; + if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { + call_lincc = true; + } else { + Array4 const& flg = flag.const_array(); + switch(refratio) { + case 2: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); + }); + break; + } + case 4: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_eb_cc_interp_r<4>(tbx, ff, cc, flg, ncomp); + }); + break; + } + default: + amrex::Abort("mlmg_eb_cc_interp: only refratio 2 and 4 are supported"); + } + + call_lincc = false; + } + } + else + { + call_lincc = true; + } +#else + const bool call_lincc = true; +#endif + if (call_lincc) + { + switch(refratio) { + case 2: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); + }); + break; + } + case 4: + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_lin_cc_interp_r4(tbx, ff, cc, ncomp); + }); + break; + } + default: + amrex::Abort("mlmg_lin_cc_interp: only refratio 2 and 4 are supported"); + } + } + } +} + +void +MLCellLinOp::interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const +{ + const int ncomp = getNComp(); + + const Geometry& crse_geom = Geom(amrlev,fmglev+1); + const IntVect refratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[fmglev]; + const IntVect ng = crse.nGrowVect(); + + MultiFab cfine; + const MultiFab* cmf; + + if (amrex::isMFIterSafe(crse, fine)) + { + crse.FillBoundary(crse_geom.periodicity()); + cmf = &crse; + } + else + { + BoxArray cba = fine.boxArray(); + cba.coarsen(refratio); + cfine.define(cba, fine.DistributionMap(), ncomp, ng); + cfine.setVal(0.0); + cfine.ParallelCopy(crse, 0, 0, ncomp, IntVect(0), ng, crse_geom.periodicity()); + cmf = & cfine; + } + + bool isEB = fine.hasEBFabFactory(); + ignore_unused(isEB); + +#ifdef AMREX_USE_EB + auto factory = dynamic_cast(&(fine.Factory())); + const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; +#endif + + MFItInfo mfi_info; + if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, mfi_info); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + const auto& ff = fine.array(mfi); + const auto& cc = cmf->array(mfi); +#ifdef AMREX_USE_EB + bool call_lincc; + if (isEB) + { + const auto& flag = (*flags)[mfi]; + if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { + call_lincc = true; + } else { + Array4 const& flg = flag.const_array(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); + }); + + call_lincc = false; + } + } + else + { + call_lincc = true; + } +#else + const bool call_lincc = true; +#endif + if (call_lincc) + { +#if (AMREX_SPACEDIM == 3) + if (hasHiddenDimension()) { + Box const& bx_2d = compactify(bx); + auto const& ff_2d = compactify(ff); + auto const& cc_2d = compactify(cc); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx_2d, tbx, + { + TwoD::mlmg_lin_cc_interp_r2(tbx, ff_2d, cc_2d, ncomp); + }); + } else #endif + { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, + { + mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); + }); + } + } + } +} + +void +MLCellLinOp::AnyAverageDownAndSync (Vector& sol) const +{ + AMREX_ASSERT(sol[0].is()); + + int ncomp = getNComp(); + for (int falev = NAMRLevels()-1; falev > 0; --falev) + { +#ifdef AMREX_USE_EB + amrex::EB_average_down(sol[falev ].get(), + sol[falev-1].get(), 0, ncomp, AMRRefRatio(falev-1)); +#else + amrex::average_down(sol[falev ].get(), + sol[falev-1].get(), 0, ncomp, AMRRefRatio(falev-1)); +#endif + } +} + +void +MLCellLinOp::fixSolvabilityByOffset (int amrlev, int mglev, Any& a_rhs, + Vector const& offset) const +{ + amrex::ignore_unused(amrlev, mglev); + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + + const int ncomp = getNComp(); + for (int c = 0; c < ncomp; ++c) { + rhs.plus(-offset[c], c, 1); + } +#ifdef AMREX_USE_EB + if (rhs.hasEBFabFactory()) { + Vector val(ncomp, 0.0_rt); + amrex::EB_set_covered(rhs, 0, ncomp, val); + } +#endif +} } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp index a006976dc08..c8bea8dd2d2 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp @@ -34,7 +34,8 @@ MLEBABecLap::MLEBABecLap (const Vector& a_geom, std::unique_ptr > MLEBABecLap::makeFactory (int amrlev, int mglev) const { - return makeEBFabFactory(m_geom[amrlev][mglev], + return makeEBFabFactory(static_cast(Factory(0,0))->getEBIndexSpace(), + m_geom[amrlev][mglev], m_grids[amrlev][mglev], m_dmap[amrlev][mglev], {1,1,1}, EBSupport::full); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H index 1b490726405..08439f9f99b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H @@ -200,7 +200,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz_eb_doit (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - F && xeb, Real dr, Real dz, Real rlo) noexcept + F && xeb, Real sigr, Real dr, Real dz, Real rlo) noexcept { if (dmsk(i,j,k)) { y(i,j,k) = Real(0.0); @@ -211,11 +211,11 @@ void mlebndfdlap_adotx_rz_eb_doit (int i, int j, int k, Array4 const& y, Real const r = rlo + Real(i) * dr; if (r == Real(0.0)) { if (ecx(i,j,k) == Real(1.0)) { // regular - out = Real(4.0) * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); + out = Real(4.0) * sigr * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); scale = Real(1.0); } else { hp = Real(1.0) + Real(2.) * ecx(i,j,k); - out = Real(4.0) * (xeb(i+1,j,k)-x(i,j,k)) / (dr*dr*hp*hp); + out = Real(4.0) * sigr * (xeb(i+1,j,k)-x(i,j,k)) / (dr*dr*hp*hp); scale = hp; } } else { @@ -235,7 +235,7 @@ void mlebndfdlap_adotx_rz_eb_doit (int i, int j, int k, Array4 const& y, tmp += (xeb(i-1,j,k) - x(i,j,k)) / hm * (r - Real(0.5) * hp * dr); } - out = tmp * Real(2.0) / ((hp+hm) * r * dr * dr); + out = tmp * Real(2.0) * sigr / ((hp+hm) * r * dr * dr); scale = amrex::min(hm, hp); } @@ -266,29 +266,29 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz_eb (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - Real xeb, Real dr, Real dz, Real rlo) noexcept + Real xeb, Real sigr, Real dr, Real dz, Real rlo) noexcept { mlebndfdlap_adotx_rz_eb_doit(i, j, k, y, x, dmsk, ecx, ecy, [=] (int, int, int) -> Real { return xeb; }, - dr, dz, rlo); + sigr, dr, dz, rlo); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz_eb (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - Array4 const& xeb, Real dr, Real dz, Real rlo) noexcept + Array4 const& xeb, Real sigr, Real dr, Real dz, Real rlo) noexcept { mlebndfdlap_adotx_rz_eb_doit(i, j, k, y, x, dmsk, ecx, ecy, [=] (int i1, int i2, int i3) -> Real { return xeb(i1,i2,i3); }, - dr, dz, rlo); + sigr, dr, dz, rlo); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_adotx_rz (int i, int j, int k, Array4 const& y, Array4 const& x, Array4 const& dmsk, - Real dr, Real dz, Real rlo) noexcept + Real sigr, Real dr, Real dz, Real rlo) noexcept { if (dmsk(i,j,k)) { y(i,j,k) = Real(0.0); @@ -296,11 +296,11 @@ void mlebndfdlap_adotx_rz (int i, int j, int k, Array4 const& y, Real Ax = (x(i,j-1,k) - Real(2.0)*x(i,j,k) + x(i,j+1,k)) / (dz*dz); Real const r = rlo + Real(i)*dr; if (r == Real(0.0)) { - Ax += Real(4.0) * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); + Ax += Real(4.0) * sigr * (x(i+1,j,k)-x(i,j,k)) / (dr*dr); } else { Real const rp = r + Real(0.5)*dr; Real const rm = r - Real(0.5)*dr; - Ax += (rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); + Ax += sigr * (rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); } y(i,j,k) = Ax; } @@ -310,7 +310,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, Array4 const& rhs, Array4 const& dmsk, Array4 const& ecx, Array4 const& ecy, - Real dr, Real dz, Real rlo, int redblack) noexcept + Real sigr, Real dr, Real dz, Real rlo, int redblack) noexcept { if ((i+j+k+redblack)%2 == 0) { if (dmsk(i,j,k)) { @@ -322,12 +322,12 @@ void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, Real const r = rlo + Real(i) * dr; if (r == Real(0.0)) { if (ecx(i,j,k) == Real(1.0)) { // regular - Ax = (Real(4.0) / (dr*dr)) * (x(i+1,j,k)-x(i,j,k)); - gamma = -(Real(4.0) / (dr*dr)); + Ax = (Real(4.0) * sigr / (dr*dr)) * (x(i+1,j,k)-x(i,j,k)); + gamma = -(Real(4.0) * sigr / (dr*dr)); scale = Real(1.0); } else { hp = Real(1.0) + Real(2.) * ecx(i,j,k); - gamma = -(Real(4.0) / (dr*dr*hp*hp)); + gamma = -(Real(4.0) * sigr / (dr*dr*hp*hp)); Ax = gamma * x(i,j,k); scale = hp; } @@ -352,8 +352,8 @@ void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, tmp0 += Real(-1.0) / hm * (r - Real(0.5) * hp * dr); } - Ax = tmp * Real(2.0) / ((hp+hm) * r * dr * dr); - gamma = tmp0 * Real(2.0) / ((hp+hm) * r * dr * dr); + Ax = tmp * Real(2.0) * sigr / ((hp+hm) * r * dr * dr); + gamma = tmp0 * Real(2.0) * sigr / ((hp+hm) * r * dr * dr); scale = amrex::min(hm, hp); } @@ -390,7 +390,7 @@ void mlebndfdlap_gsrb_rz_eb (int i, int j, int k, Array4 const& x, AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mlebndfdlap_gsrb_rz (int i, int j, int k, Array4 const& x, Array4 const& rhs, Array4 const& dmsk, - Real dr, Real dz, Real rlo, int redblack) noexcept + Real sigr, Real dr, Real dz, Real rlo, int redblack) noexcept { if ((i+j+k+redblack)%2 == 0) { if (dmsk(i,j,k)) { @@ -400,13 +400,13 @@ void mlebndfdlap_gsrb_rz (int i, int j, int k, Array4 const& x, Real gamma = -Real(2.0) / (dz*dz); Real const r = rlo + Real(i)*dr; if (r == Real(0.0)) { - Ax += (Real(4.0)/(dr*dr)) * (x(i+1,j,k)-x(i,j,k)); - gamma += -(Real(4.0)/(dr*dr)); + Ax += (Real(4.0)*sigr/(dr*dr)) * (x(i+1,j,k)-x(i,j,k)); + gamma += -(Real(4.0)*sigr/(dr*dr)); } else { Real const rp = r + Real(0.5)*dr; Real const rm = r - Real(0.5)*dr; - Ax += (rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); - gamma += -(rp+rm) / (r*dr*dr); + Ax += sigr*(rp*x(i+1,j,k) - (rp+rm)*x(i,j,k) + rm*x(i-1,j,k)) / (r*dr*dr); + gamma += -sigr*(rp+rm) / (r*dr*dr); } constexpr Real omega = Real(1.25); x(i,j,k) += (rhs(i,j,k) - Ax) * (omega / gamma); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H index 1215eda1f6c..404aefc8c0b 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H @@ -19,8 +19,8 @@ namespace amrex { // with only diagonal components. The EB is assumed to be Dirichlet. // // del dot (simga grad phi) - alpha/r^2 phi = rhs, for RZ where alpha is a -// scalar constant that is zero by default. sigma is non-zero in -// z-direction only. For now the `alpha` term has not been implemented yet. +// scalar constant that is zero by default. For now the `alpha` term has +// not been implemented yet class MLEBNodeFDLaplacian : public MLNodeLinOp @@ -72,7 +72,7 @@ public: virtual std::unique_ptr > makeFactory (int amrlev, int mglev) const final override; - virtual void scaleRHS (int amrlev, MultiFab& rhs) const final; + virtual void scaleRHS (int amrlev, Any& rhs) const final; #endif @@ -100,6 +100,7 @@ public: virtual void fixUpResidualMask (int amrlev, iMultiFab& resmsk) final override; virtual bool isSingular (int) const final override { return false; } + virtual bool isBottomSingular () const final override { return false; } virtual void compGrad (int amrlev, const Array& grad, MultiFab& sol, Location /*loc*/) const override; @@ -118,8 +119,10 @@ public: Array4 const& bfab) const override; #endif + virtual void postSolve (Vector& sol) const override; + private: - GpuArray m_sigma{AMREX_D_DECL(1_rt,1_rt,1_rt)}; + GpuArray m_sigma{{AMREX_D_DECL(1_rt,1_rt,1_rt)}}; Real m_s_phi_eb = std::numeric_limits::lowest(); Vector m_phi_eb; int m_rz = false; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp index cfa7595b515..920e8540200 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.cpp @@ -310,16 +310,20 @@ MLEBNodeFDLaplacian::prepareForSolve () AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_lobc[0][0] == BCType::Neumann, "The lo-x BC must be Neumann for 2d RZ"); } - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_sigma[0] == 0._rt, - "r-direction sigma must be zero"); + if (m_sigma[0] == 0._rt) { + m_sigma[0] = 1._rt; // For backward compatibility + } } #endif } #ifdef AMREX_USE_EB void -MLEBNodeFDLaplacian::scaleRHS (int amrlev, MultiFab& rhs) const +MLEBNodeFDLaplacian::scaleRHS (int amrlev, Any& a_rhs) const { + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + auto const& dmask = *m_dirichlet_mask[amrlev][0]; auto factory = dynamic_cast(m_factory[amrlev][0].get()); auto const& edgecent = factory->getEdgeCent(); @@ -353,6 +357,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); #if (AMREX_SPACEDIM == 2) + const auto sig0 = m_sigma[0]; const auto dx0 = m_geom[amrlev][mglev].CellSize(0); const auto dx1 = m_geom[amrlev][mglev].CellSize(1)/std::sqrt(m_sigma[1]); const auto xlo = m_geom[amrlev][mglev].ProbLo(0); @@ -393,7 +398,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_adotx_rz_eb(i,j,k,yarr,xarr,dmarr,ecx,ecy, - phiebarr, dx0, dx1, xlo); + phiebarr, sig0, dx0, dx1, xlo); }); } else #endif @@ -410,7 +415,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_adotx_rz_eb(i,j,k,yarr,xarr,dmarr,ecx,ecy, - phieb, dx0, dx1, xlo); + phieb, sig0, dx0, dx1, xlo); }); } else #endif @@ -429,7 +434,7 @@ MLEBNodeFDLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa if (m_rz) { AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { - mlebndfdlap_adotx_rz(i,j,k,yarr,xarr,dmarr,dx0,dx1,xlo); + mlebndfdlap_adotx_rz(i,j,k,yarr,xarr,dmarr,sig0,dx0,dx1,xlo); }); } else #endif @@ -450,6 +455,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); #if (AMREX_SPACEDIM == 2) + const auto sig0 = m_sigma[0]; const auto dx0 = m_geom[amrlev][mglev].CellSize(0); const auto dx1 = m_geom[amrlev][mglev].CellSize(1)/std::sqrt(m_sigma[1]); const auto xlo = m_geom[amrlev][mglev].ProbLo(0); @@ -492,7 +498,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_gsrb_rz_eb(i,j,k,solarr,rhsarr,dmskarr,ecx,ecy, - dx0, dx1, xlo, redblack); + sig0, dx0, dx1, xlo, redblack); }); } else #endif @@ -511,7 +517,7 @@ MLEBNodeFDLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF AMREX_HOST_DEVICE_FOR_3D(box, i, j, k, { mlebndfdlap_gsrb_rz(i,j,k,solarr,rhsarr,dmskarr, - dx0, dx1, xlo, redblack); + sig0, dx0, dx1, xlo, redblack); }); } else #endif @@ -634,22 +640,57 @@ MLEBNodeFDLaplacian::compGrad (int amrlev, const Array #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) void -MLEBNodeFDLaplacian::fillIJMatrix (MFIter const& mfi, - Array4 const& gid, - Array4 const& lid, - HypreNodeLap::Int* const ncols, - HypreNodeLap::Int* const cols, - Real* const mat) const +MLEBNodeFDLaplacian::fillIJMatrix (MFIter const& /*mfi*/, + Array4 const& /*gid*/, + Array4 const& /*lid*/, + HypreNodeLap::Int* const /*ncols*/, + HypreNodeLap::Int* const /*cols*/, + Real* const /*mat*/) const { amrex::Abort("MLEBNodeFDLaplacian::fillIJMatrix: todo"); } void -MLEBNodeFDLaplacian::fillRHS (MFIter const& mfi, Array4 const& lid, - Real* const rhs, Array4 const& bfab) const +MLEBNodeFDLaplacian::fillRHS (MFIter const& /*mfi*/, Array4 const& /*lid*/, + Real* const /*rhs*/, Array4 const& /*bfab*/) const { amrex::Abort("MLEBNodeFDLaplacian::fillRHS: todo"); } #endif +void +MLEBNodeFDLaplacian::postSolve (Vector& sol) const +{ +#ifdef AMREX_USE_EB + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + const auto phieb = m_s_phi_eb; + auto factory = dynamic_cast(m_factory[amrlev][0].get()); + auto const& levset_mf = factory->getLevelSet(); + auto const& levset_ar = levset_mf.const_arrays(); + MultiFab& mf = sol[amrlev].get(); + auto const& sol_ar = mf.arrays(); + if (phieb == std::numeric_limits::lowest()) { + auto const& phieb_ar = m_phi_eb[amrlev].const_arrays(); + amrex::ParallelFor(mf, IntVect(1), + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + if (levset_ar[bi](i,j,k) >= Real(0.0)) { + sol_ar[bi](i,j,k) = phieb_ar[bi](i,j,k); + } + }); + } else { + amrex::ParallelFor(mf, IntVect(1), + [=] AMREX_GPU_DEVICE (int bi, int i, int j, int k) noexcept + { + if (levset_ar[bi](i,j,k) >= Real(0.0)) { + sol_ar[bi](i,j,k) = phieb; + } + }); + } + } +#else + amrex::ignore_unused(sol); +#endif +} + } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H index a522d5aa927..1ed29a84801 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H @@ -105,7 +105,8 @@ public: // for cuda void applyBCTensor (int amrlev, int mglev, MultiFab& vel, BCMode bc_mode, StateMode s_mode, const MLMGBndry* bndry) const; - void compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const; + void compCrossTerms(int amrlev, int mglev, MultiFab const& mf, + const MLMGBndry* bndry) const; }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp index 247e0fb292e..87bb78da730 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp @@ -226,7 +226,7 @@ MLEBTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode MultiFab const& kapebmf = m_eb_kappa[amrlev][mglev]; Real bscalar = m_b_scalar; - compCrossTerms(amrlev, mglev, in); + compCrossTerms(amrlev, mglev, in, bndry); MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); @@ -289,15 +289,23 @@ MLEBTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode } void -MLEBTensorOp::compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const +MLEBTensorOp::compCrossTerms(int amrlev, int mglev, MultiFab const& mf, + const MLMGBndry* bndry) const { auto factory = dynamic_cast(m_factory[amrlev][mglev].get()); const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; auto area = (factory) ? factory->getAreaFrac() : Array{AMREX_D_DECL(nullptr,nullptr,nullptr)}; + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; + + Array4 foo; + const Geometry& geom = m_geom[amrlev][mglev]; const auto dxinv = geom.InvCellSizeArray(); + const Box& domain = geom.growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); Array const& etamf = m_b_coeffs[amrlev][mglev]; Array const& kapmf = m_kappa[amrlev][mglev]; @@ -346,56 +354,143 @@ MLEBTensorOp::compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const } ); } else { - AMREX_D_TERM(Array4 const fxfab = fluxmf[0].array(mfi);, - Array4 const fyfab = fluxmf[1].array(mfi);, - Array4 const fzfab = fluxmf[2].array(mfi);); - Array4 const vfab = mf.const_array(mfi); - AMREX_D_TERM(Array4 const etaxfab = etamf[0].const_array(mfi);, - Array4 const etayfab = etamf[1].const_array(mfi);, - Array4 const etazfab = etamf[2].const_array(mfi);); - AMREX_D_TERM(Array4 const kapxfab = kapmf[0].const_array(mfi);, - Array4 const kapyfab = kapmf[1].const_array(mfi);, - Array4 const kapzfab = kapmf[2].const_array(mfi);); - - if (fabtyp == FabType::regular) - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); - } - , ybx, tybx, - { - mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); - } - , zbx, tzbx, - { - mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + AMREX_D_TERM(Array4 const fxfab = fluxmf[0].array(mfi);, + Array4 const fyfab = fluxmf[1].array(mfi);, + Array4 const fzfab = fluxmf[2].array(mfi);); + Array4 const vfab = mf.const_array(mfi); + AMREX_D_TERM(Array4 const etaxfab = etamf[0].const_array(mfi);, + Array4 const etayfab = etamf[1].const_array(mfi);, + Array4 const etazfab = etamf[2].const_array(mfi);); + AMREX_D_TERM(Array4 const kapxfab = kapmf[0].const_array(mfi);, + Array4 const kapyfab = kapmf[1].const_array(mfi);, + Array4 const kapzfab = kapmf[2].const_array(mfi);); + + if (fabtyp == FabType::regular) + { + if (domain.strictly_contains(bx)) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::low )].array(mfi) : foo; + const auto& bvylo = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::low )].array(mfi) : foo; + const auto& bvxhi = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::high)].array(mfi) : foo; + const auto& bvyhi = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::low )].array(mfi) : foo; + const auto& bvzhi = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; +#endif + + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv, + bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv, + bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv, + bvzlo, bvzhi, bct, dlo, dhi); + } + ); } - ); - } - else - { - AMREX_D_TERM(Array4 const& apx = area[0]->const_array(mfi);, - Array4 const& apy = area[1]->const_array(mfi);, - Array4 const& apz = area[2]->const_array(mfi);); - Array4 const& flag = flags->const_array(mfi); + } + else + { + AMREX_D_TERM(Array4 const& apx = area[0]->const_array(mfi);, + Array4 const& apy = area[1]->const_array(mfi);, + Array4 const& apz = area[2]->const_array(mfi);); + Array4 const& flag = flags->const_array(mfi); + + if (domain.strictly_contains(bx)) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mlebtensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,apx,flag,dxinv); + } + , ybx, tybx, + { + mlebtensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,apy,flag,dxinv); + } + , zbx, tzbx, + { + mlebtensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,apz,flag,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::low )].array(mfi) : foo; + const auto& bvylo = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::low )].array(mfi) : foo; + const auto& bvxhi = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::high)].array(mfi) : foo; + const auto& bvyhi = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::low )].array(mfi) : foo; + const auto& bvzhi = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; +#endif - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mlebtensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,apx,flag,dxinv); - } - , ybx, tybx, - { - mlebtensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,apy,flag,dxinv); - } - , zbx, tzbx, - { - mlebtensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,apz,flag,dxinv); - } - ); - } + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mlebtensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,apx,flag,dxinv, bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mlebtensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,apy,flag,dxinv, bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mlebtensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,apz,flag,dxinv, bvzlo, bvzhi, bct, dlo, dhi); + } + ); + } + } } } @@ -411,7 +506,7 @@ MLEBTensorOp::compFlux (int amrlev, const Array& fluxe BL_PROFILE("MLEBTensorOp::compFlux()"); if ( !(loc==Location::FaceCenter || loc==Location::FaceCentroid) ) - amrex::Abort("MLEBTensorOp::compFlux() unknown location for fluxes."); + amrex::Abort("MLEBTensorOp::compFlux() unknown location for fluxes."); const int mglev = 0; const int ncomp = getNComp(); @@ -429,7 +524,7 @@ MLEBTensorOp::compFlux (int amrlev, const Array& fluxe Array& fluxmf = m_tauflux[amrlev][mglev]; Real bscalar = m_b_scalar; - compCrossTerms(amrlev, mglev, sol); + compCrossTerms(amrlev, mglev, sol, m_bndry_sol[amrlev].get()); MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); @@ -515,104 +610,11 @@ MLEBTensorOp::compFlux (int amrlev, const Array& fluxe } void -MLEBTensorOp::compVelGrad (int amrlev, const Array& fluxes, - MultiFab& sol, Location loc) const +MLEBTensorOp::compVelGrad (int /*amrlev*/, + const Array& /*fluxes*/, + MultiFab& /*sol*/, Location /*loc*/) const { - BL_PROFILE("MLEBTensorOp::compVelGrad()"); - - if ( !(loc==Location::FaceCenter || loc==Location::FaceCentroid) ) - amrex::Abort("MLEBTensorOp::compVelGrad() unknown location for VelGradients."); - - const int mglev = 0; - - applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, m_bndry_sol[amrlev].get()); - - auto factory = dynamic_cast(m_factory[amrlev][mglev].get()); - const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; - - const Geometry& geom = m_geom[amrlev][mglev]; - const auto dxinv = geom.InvCellSizeArray(); - - const int dim_fluxes = AMREX_SPACEDIM*AMREX_SPACEDIM; - - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - { - Array fluxfab_tmp; - for (MFIter mfi(sol, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - - auto fabtyp = (flags) ? (*flags)[mfi].getType(bx) : FabType::regular; - if (fabtyp == FabType::covered) continue; - - if (fabtyp == FabType::regular) - { - - Array4 const vfab = sol.const_array(mfi); - AMREX_D_TERM(Box const xbx = mfi.nodaltilebox(0);, - Box const ybx = mfi.nodaltilebox(1);, - Box const zbx = mfi.nodaltilebox(2);); - AMREX_D_TERM(fluxfab_tmp[0].resize(xbx,dim_fluxes);, - fluxfab_tmp[1].resize(ybx,dim_fluxes);, - fluxfab_tmp[2].resize(zbx,dim_fluxes);); - AMREX_D_TERM(Elixir fxeli = fluxfab_tmp[0].elixir();, - Elixir fyeli = fluxfab_tmp[1].elixir();, - Elixir fzeli = fluxfab_tmp[2].elixir();); - AMREX_D_TERM(Array4 const fxfab = fluxfab_tmp[0].array();, - Array4 const fyfab = fluxfab_tmp[1].array();, - Array4 const fzfab = fluxfab_tmp[2].array();); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_vel_grads_fx(txbx,fxfab,vfab,dxinv); - } - , ybx, tybx, - { - mltensor_vel_grads_fy(tybx,fyfab,vfab,dxinv); - } - , zbx, tzbx, - { - mltensor_vel_grads_fz(tzbx,fzfab,vfab,dxinv); - } - ); - -// The derivatives are put in the array with the following order: -// component: 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 -// in 2D: dU/dx, dV/dx, dU/dy, dV/dy -// in 3D: dU/dx, dV/dx, dW/dx, dU/dy, dV/dy, dW/dy, dU/dz, dV/dz, dW/dz - - - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - const Box& nbx = mfi.nodaltilebox(idim); - Array4 dst = fluxes[idim]->array(mfi); - Array4 src = fluxfab_tmp[idim].const_array(); - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (nbx, dim_fluxes, i, j, k, n, - { - dst(i,j,k,n) = src(i,j,k,n); - }); - } - - - } - else if ( loc==Location::FaceCenter ) - { - - amrex::Abort("compVelGrad not yet implemented for cut-cells "); - - } - else // loc==Location::FaceCentroid - { - - amrex::Abort("compVelGrad not yet implemented for cut-cells "); - - } - - } - } + amrex::Abort("compVelGrad not yet implemented for EB."); } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp index c9c6eb232bb..98beecf01df 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp @@ -13,11 +13,12 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto& maskvals = m_maskvals[amrlev][mglev]; - FArrayBox foofab(Box::TheUnitBox(),3); - const auto& foo = foofab.array(); + Array4 foo; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); auto factory = dynamic_cast(m_factory[amrlev][mglev].get()); const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; @@ -39,14 +40,13 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto & bdlv = bcondloc.bndryLocs(mfi); const auto & bdcv = bcondloc.bndryConds(mfi); - GpuArray bct; - GpuArray bcl; - for (OrientationIter face; face; ++face) { - Orientation ori = face(); - const int iface = ori; - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori]; - bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori]; + Array2D bct; + Array2D bcl; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + bcl(ori,icomp) = bdlv[icomp][ori]; } } @@ -72,7 +72,7 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, mxlo, mylo, mxhi, myhi, bvxlo, bvylo, bvxhi, bvyhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); + dxinv, dlo, dhi); }); #else const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi); @@ -83,14 +83,37 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto& bvzhi = (bndry != nullptr) ? (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; - AMREX_HOST_DEVICE_FOR_1D ( 12, iedge, +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + amrex::launch(12, 64, Gpu::gpuStream(), +#ifdef AMREX_USE_DPCPP + [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item) + { + int bid = item.get_group_linear_id(); + int tid = item.get_local_linear_id(); + int bdim = item.get_local_range(0); +#else + [=] AMREX_GPU_DEVICE () + { + int bid = blockIdx.x; + int tid = threadIdx.x; + int bdim = blockDim.x; +#endif + mltensor_fill_edges(bid, tid, bdim, vbx, velfab, + mxlo, mylo, mzlo, mxhi, myhi, mzhi, + bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, + bct, bcl, inhomog, imaxorder, + dxinv, dlo, dhi); + }); + } else +#endif { - mltensor_fill_edges(iedge, vbx, velfab, + mltensor_fill_edges(vbx, velfab, mxlo, mylo, mzlo, mxhi, myhi, mzhi, bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); - }); + dxinv, dlo, dhi); + } AMREX_HOST_DEVICE_FOR_1D ( 8, icorner, { @@ -98,13 +121,12 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, mxlo, mylo, mzlo, mxhi, myhi, mzhi, bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); + dxinv, dlo, dhi); }); + #endif } } - - // Notet that it is incorrect to call EnforcePeriodicity on vel. } } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H index 165497d1a20..d93ea3a5d1a 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H @@ -6,10 +6,95 @@ namespace amrex { -namespace { - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Real mlebtensor_weight (int d) { - return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0); +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + Array4 const& apx, + Array4 const& flag, + GpuArray const& dxinv) noexcept +{ + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apx(i,j,0) == 0.0) + { + fx(i,j,0,0) = 0.0; + fx(i,j,0,1) = 0.0; + } + else + { + int jhip = j + flag(i ,j,0).isConnected(0, 1,0); + int jhim = j - flag(i ,j,0).isConnected(0,-1,0); + int jlop = j + flag(i-1,j,0).isConnected(0, 1,0); + int jlom = j - flag(i-1,j,0).isConnected(0,-1,0); + Real whi = mlebtensor_weight(jhip-jhim); + Real wlo = mlebtensor_weight(jlop-jlom); + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real divu = dvdy; + Real xif = kapx(i,j,0); + Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta + Real mut = etax(i,j,0,1); + fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,0,1) = -mut*dudy; + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + Array4 const& apy, + Array4 const& flag, + GpuArray const& dxinv) noexcept +{ + const Real dxi = dxinv[0]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apy(i,j,0) == 0.0) + { + fy(i,j,0,0) = 0.0; + fy(i,j,0,1) = 0.0; + } + else + { + int ihip = i + flag(i,j ,0).isConnected( 1,0,0); + int ihim = i - flag(i,j ,0).isConnected(-1,0,0); + int ilop = i + flag(i,j-1,0).isConnected( 1,0,0); + int ilom = i - flag(i,j-1,0).isConnected(-1,0,0); + Real whi = mlebtensor_weight(ihip-ihim); + Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real divu = dudx; + Real xif = kapy(i,j,0); + Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta + Real mut = etay(i,j,0,0); + fy(i,j,0,0) = -mut*dvdx; + fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu; + } + } } } @@ -20,13 +105,20 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, Array4 const& kapx, Array4 const& apx, Array4 const& flag, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dyi = dxinv[1]; const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); constexpr Real twoThirds = 2./3.; + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { @@ -43,13 +135,15 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, int jlom = j - flag(i-1,j,0).isConnected(0,-1,0); Real whi = mlebtensor_weight(jhip-jhim); Real wlo = mlebtensor_weight(jlop-jlom); - Real dudy = (0.5*dyi) * ((vel(i ,jhip,0,0)-vel(i ,jhim,0,0))*whi - +(vel(i-1,jlop,0,0)-vel(i-1,jlom,0,0))*wlo); - Real dvdy = (0.5*dyi) * ((vel(i ,jhip,0,1)-vel(i ,jhim,0,1))*whi - +(vel(i-1,jlop,0,1)-vel(i-1,jlom,0,1))*wlo); + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); Real divu = dvdy; Real xif = kapx(i,j,0); - Real mun = 0.75*(etax(i,j,0,0)-xif); // restore the original eta + Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta Real mut = etax(i,j,0,1); fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu; fx(i,j,0,1) = -mut*dudy; @@ -65,13 +159,20 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, Array4 const& kapy, Array4 const& apy, Array4 const& flag, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dxi = dxinv[0]; const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); constexpr Real twoThirds = 2./3.; + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { @@ -88,15 +189,16 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, int ilom = i - flag(i,j-1,0).isConnected(-1,0,0); Real whi = mlebtensor_weight(ihip-ihim); Real wlo = mlebtensor_weight(ilop-ilom); - Real dudx = (0.5*dxi) * ((vel(ihip,j ,0,0)-vel(ihim,j ,0,0))*whi - +(vel(ilop,j-1,0,0)-vel(ilom,j-1,0,0))*wlo); - Real dvdx = (0.5*dxi) * ((vel(ihip,j ,0,1)-vel(ihim,j ,0,1))*whi - +(vel(ilop,j-1,0,1)-vel(ilom,j-1,0,1))*wlo); - + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); Real divu = dudx; Real xif = kapy(i,j,0); - Real mun = 0.75*(etay(i,j,0,1)-xif); // restore the original eta - Real mut = etay(i,j,0,0); + Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta + Real mut = etay(i,j,0,0); fy(i,j,0,0) = -mut*dvdx; fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H index 3c26566e7ac..2651addee2c 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_3D_K.H @@ -6,11 +6,44 @@ namespace amrex { -namespace { - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Real mlebtensor_weight (int d) { - return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0); - } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_xface (int i, int j, int, int n, + Array4 const& vel, Real dzi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + return Real(0.5)*dzi * ((vel(i ,j,khip,n)-vel(i ,j,khim,n))*whi + + (vel(i-1,j,klop,n)-vel(i-1,j,klom,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_yface (int i, int j, int, int n, + Array4 const& vel, Real dzi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + return Real(0.5)*dzi * ((vel(i,j ,khip,n)-vel(i,j ,khim,n))*whi + + (vel(i,j-1,klop,n)-vel(i,j-1,klom,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_zface (int, int j, int k, int n, + Array4 const& vel, Real dxi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + return Real(0.5)*dxi * ((vel(ihip,j,k ,n)-vel(ihim,j,k ,n))*whi + + (vel(ilop,j,k-1,n)-vel(ilom,j,k-1,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_zface (int i, int, int k, int n, + Array4 const& vel, Real dyi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + return Real(0.5)*dyi * ((vel(i,jhip,k ,n)-vel(i,jhim,k ,n))*whi + + (vel(i,jlop,k-1,n)-vel(i,jlom,k-1,n))*wlo); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE @@ -46,26 +79,24 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, int jlom = j - flag(i-1,j,k).isConnected(0,-1,0); Real whi = mlebtensor_weight(jhip-jhim); Real wlo = mlebtensor_weight(jlop-jlom); - Real dudy = (0.5*dyi) * ((vel(i ,jhip,k,0)-vel(i ,jhim,k,0))*whi - +(vel(i-1,jlop,k,0)-vel(i-1,jlom,k,0))*wlo); - Real dvdy = (0.5*dyi) * ((vel(i ,jhip,k,1)-vel(i ,jhim,k,1))*whi - +(vel(i-1,jlop,k,1)-vel(i-1,jlom,k,1))*wlo); - + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); int khip = k + flag(i ,j,k).isConnected(0,0, 1); int khim = k - flag(i ,j,k).isConnected(0,0,-1); int klop = k + flag(i-1,j,k).isConnected(0,0, 1); int klom = k - flag(i-1,j,k).isConnected(0,0,-1); whi = mlebtensor_weight(khip-khim); wlo = mlebtensor_weight(klop-klom); - Real dudz = (0.5*dzi) * ((vel(i ,j,khip,0)-vel(i ,j,khim,0))*whi - +(vel(i-1,j,klop,0)-vel(i-1,j,klom,0))*wlo); - Real dwdz = (0.5*dzi) * ((vel(i ,j,khip,2)-vel(i ,j,khim,2))*whi - +(vel(i-1,j,klop,2)-vel(i-1,j,klom,2))*wlo); - + Real dudz = mlebtensor_dz_on_xface(i,j,k,0,vel,dzi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_xface(i,j,k,2,vel,dzi, + whi,wlo,khip,khim,klop,klom); Real divu = dvdy + dwdz; Real xif = kapx(i,j,k); - Real mun = 0.75*(etax(i,j,k,0)-xif); // restore the original eta - Real mut = etax(i,j,k,1); + Real mun = Real(0.75)*(etax(i,j,k,0)-xif);// restore the original eta + Real mut = etax(i,j,k,1); fx(i,j,k,0) = -mun*(-twoThirds*divu) - xif*divu; fx(i,j,k,1) = -mut*dudy; fx(i,j,k,2) = -mut*dudz; @@ -108,26 +139,24 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, int ilom = i - flag(i,j-1,k).isConnected(-1,0,0); Real whi = mlebtensor_weight(ihip-ihim); Real wlo = mlebtensor_weight(ilop-ilom); - Real dudx = (0.5*dxi) * ((vel(ihip,j ,k,0)-vel(ihim,j ,k,0))*whi - +(vel(ilop,j-1,k,0)-vel(ilom,j-1,k,0))*wlo); - Real dvdx = (0.5*dxi) * ((vel(ihip,j ,k,1)-vel(ihim,j ,k,1))*whi - +(vel(ilop,j-1,k,1)-vel(ilom,j-1,k,1))*wlo); - + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); int khip = k + flag(i,j ,k).isConnected(0,0, 1); int khim = k - flag(i,j ,k).isConnected(0,0,-1); int klop = k + flag(i,j-1,k).isConnected(0,0, 1); int klom = k - flag(i,j-1,k).isConnected(0,0,-1); whi = mlebtensor_weight(khip-khim); wlo = mlebtensor_weight(klop-klom); - Real dvdz = (0.5*dzi) * ((vel(i,j ,khip,1)-vel(i,j ,khim,1))*whi - +(vel(i,j-1,klop,1)-vel(i,j-1,klom,1))*wlo); - Real dwdz = (0.5*dzi) * ((vel(i,j ,khip,2)-vel(i,j ,khim,2))*whi - +(vel(i,j-1,klop,2)-vel(i,j-1,klom,2))*wlo); - + Real dvdz = mlebtensor_dz_on_yface(i,j,k,1,vel,dzi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_yface(i,j,k,2,vel,dzi, + whi,wlo,khip,khim,klop,klom); Real divu = dudx + dwdz; Real xif = kapy(i,j,k); - Real mun = 0.75*(etay(i,j,k,1)-xif); // restore the original eta - Real mut = etay(i,j,k,0); + Real mun = Real(0.75)*(etay(i,j,k,1)-xif);// restore the original eta + Real mut = etay(i,j,k,0); fy(i,j,k,0) = -mut*dvdx; fy(i,j,k,1) = -mun*(-twoThirds*divu) - xif*divu; fy(i,j,k,2) = -mut*dvdz; @@ -170,27 +199,457 @@ void mlebtensor_cross_terms_fz (Box const& box, Array4 const& fz, int ilom = i - flag(i,j,k-1).isConnected(-1,0,0); Real whi = mlebtensor_weight(ihip-ihim); Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_zface(i,j,k,0,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dwdx = mlebtensor_dx_on_zface(i,j,k,2,vel,dxi, + whi,wlo,ihip,ihim,ilop,ilom); + int jhip = j + flag(i,j,k ).isConnected(0, 1,0); + int jhim = j - flag(i,j,k ).isConnected(0,-1,0); + int jlop = j + flag(i,j,k-1).isConnected(0, 1,0); + int jlom = j - flag(i,j,k-1).isConnected(0,-1,0); + whi = mlebtensor_weight(jhip-jhim); + wlo = mlebtensor_weight(jlop-jlom); + Real dvdy = mlebtensor_dy_on_zface(i,j,k,1,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dwdy = mlebtensor_dy_on_zface(i,j,k,2,vel,dyi, + whi,wlo,jhip,jhim,jlop,jlom); + Real divu = dudx + dvdy; + Real xif = kapz(i,j,k); + Real mun = Real(0.75)*(etaz(i,j,k,2)-xif);// restore the original eta + Real mut = etaz(i,j,k,0); + + fz(i,j,k,0) = -mut*dwdx; + fz(i,j,k,1) = -mut*dwdy; + fz(i,j,k,2) = -mun*(-twoThirds*divu) - xif*divu; + } + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_xface (int i, int j, int k, int n, + Array4 const& vel, Real dzi, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + Real ddz; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (k == dlo.z) { + ddz = (bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k+1,n) * Real(2.) + + bvxlo(i-1,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k-1,n) * Real(2.) + + bvxlo(i-1,j,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = whi*dzi*(bvxlo(i-1,j,khip,n)-bvxlo(i-1,j,khim,n)); + } + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddz = whi*dzi*(vel(i,j,khip,n)-vel(i,j,khim,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (k == dlo.z) { + ddz = (bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k+1,n) * Real(2.) + + bvxhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k-1,n) * Real(2.) + + bvxhi(i,j,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = wlo*dzi*(bvxhi(i,j,klop,n)-bvxhi(i,j,klom,n)); + } + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddz = wlo*dzi*(vel(i-1,j,klop,n)-vel(i-1,j,klom,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else { + ddz = mlebtensor_dz_on_xface(i,j,k,n,vel,dzi,whi,wlo,khip,khim,klop,klom); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dz_on_yface (int i, int j, int k, int n, + Array4 const& vel, Real dzi, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int khip, int khim, int klop, int klom) noexcept +{ + Real ddz; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (k == dlo.z) { + ddz = (bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k+1,n) * Real(2.) + + bvylo(i,j-1,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k-1,n) * Real(2.) + + bvylo(i,j-1,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = whi*dzi*(bvylo(i,j-1,khip,n)-bvylo(i,j-1,khim,n)); + } + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddz = whi*dzi*(vel(i,j,khip,n)-vel(i,j,khim,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (k == dlo.z) { + ddz = (bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k+1,n) * Real(2.) + + bvyhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k-1,n) * Real(2.) + + bvyhi(i,j,k-2,n) * Real(-0.5)) * dzi; + } else { + ddz = wlo*dzi*(bvyhi(i,j,klop,n)-bvyhi(i,j,klom,n)); + } + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddz = wlo*dzi*(vel(i,j-1,klop,n)-vel(i,j-1,klom,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); + } + } else { + ddz = mlebtensor_dz_on_yface(i,j,k,n,vel,dzi,whi,wlo,khip,khim,klop,klom); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_zface (int i, int j, int k, int n, + Array4 const& vel, Real dxi, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + Real ddx; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (i == dlo.x) { + ddx = (bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i+1,j,k-1,n) * Real(2.) + + bvzlo(i+2,j,k-1,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i-1,j,k-1,n) * Real(2.) + + bvzlo(i-2,j,k-1,n) * Real(-0.5)) * dxi; + } else { + ddx = whi*dxi*(bvzlo(ihip,j,k-1,n)-bvzlo(ihim,j,k-1,n)); + } + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddx = whi*dxi*(vel(ihip,j,k,n)-vel(ihim,j,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (i == dlo.x) { + ddx = (bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i+1,j,k,n) * Real(2.) + + bvzhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i-1,j,k,n) * Real(2.) + + bvzhi(i-2,j,k,n) * Real(-0.5)) * dxi; + } else { + ddx = wlo*dxi*(bvzhi(ilop,j,k,n)-bvzhi(ilom,j,k,n)); + } + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddx = wlo*dxi*(vel(ilop,j,k-1,n)-vel(ilom,j,k-1,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else { + ddx = mlebtensor_dx_on_zface(i,j,k,n,vel,dxi,whi,wlo,ihip,ihim,ilop,ilom); + + } + return ddx; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_zface (int i, int j, int k, int n, + Array4 const& vel, Real dyi, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + Real ddy; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (j == dlo.y) { + ddy = (bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j+1,k-1,n) * Real(2.) + + bvzlo(i,j+2,k-1,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j-1,k-1,n) * Real(2.) + + bvzlo(i,j-2,k-1,n) * Real(-0.5)) * dyi; + } else { + ddy = whi*dyi*(bvzlo(i,jhip,k-1,n)-bvzlo(i,jhim,k-1,n)); + } + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddy = whi*dyi*(vel(i,jhip,k,n)-vel(i,jhim,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (j == dlo.y) { + ddy = (bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j+1,k,n) * Real(2.) + + bvzhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j-1,k,n) * Real(2.) + + bvzhi(i,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = wlo*dyi*(bvzhi(i,jlop,k,n)-bvzhi(i,jlom,k,n)); + } + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddy = wlo*dyi*(vel(i,jlop,k-1,n)-vel(i,jlom,k-1,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else { + ddy = mlebtensor_dy_on_zface(i,j,k,n,vel,dyi,whi,wlo,jhip,jhim,jlop,jlom); + } + return ddy; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + Array4 const& apx, + Array4 const& flag, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept + +{ + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apx(i,j,k) == 0.0) + { + fx(i,j,k,0) = 0.0; + fx(i,j,k,1) = 0.0; + fx(i,j,k,2) = 0.0; + } + else + { + int jhip = j + flag(i ,j,k).isConnected(0, 1,0); + int jhim = j - flag(i ,j,k).isConnected(0,-1,0); + int jlop = j + flag(i-1,j,k).isConnected(0, 1,0); + int jlom = j - flag(i-1,j,k).isConnected(0,-1,0); + Real whi = mlebtensor_weight(jhip-jhim); + Real wlo = mlebtensor_weight(jlop-jlom); + Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + int khip = k + flag(i ,j,k).isConnected(0,0, 1); + int khim = k - flag(i ,j,k).isConnected(0,0,-1); + int klop = k + flag(i-1,j,k).isConnected(0,0, 1); + int klom = k - flag(i-1,j,k).isConnected(0,0,-1); + whi = mlebtensor_weight(khip-khim); + wlo = mlebtensor_weight(klop-klom); + Real dudz = mlebtensor_dz_on_xface(i,j,k,0,vel,dzi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_xface(i,j,k,2,vel,dzi, + bvxlo,bvxhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real divu = dvdy + dwdz; + Real xif = kapx(i,j,k); + Real mun = Real(0.75)*(etax(i,j,k,0)-xif);// restore the original eta + Real mut = etax(i,j,k,1); + fx(i,j,k,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,k,1) = -mut*dudy; + fx(i,j,k,2) = -mut*dudz; + } + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + Array4 const& apy, + Array4 const& flag, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apy(i,j,k) == 0.0) + { + fy(i,j,k,0) = 0.0; + fy(i,j,k,1) = 0.0; + fy(i,j,k,2) = 0.0; + } + else + { + int ihip = i + flag(i,j ,k).isConnected( 1,0,0); + int ihim = i - flag(i,j ,k).isConnected(-1,0,0); + int ilop = i + flag(i,j-1,k).isConnected( 1,0,0); + int ilom = i - flag(i,j-1,k).isConnected(-1,0,0); + Real whi = mlebtensor_weight(ihip-ihim); + Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + int khip = k + flag(i,j ,k).isConnected(0,0, 1); + int khim = k - flag(i,j ,k).isConnected(0,0,-1); + int klop = k + flag(i,j-1,k).isConnected(0,0, 1); + int klom = k - flag(i,j-1,k).isConnected(0,0,-1); + whi = mlebtensor_weight(khip-khim); + wlo = mlebtensor_weight(klop-klom); + Real dvdz = mlebtensor_dz_on_yface(i,j,k,1,vel,dzi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real dwdz = mlebtensor_dz_on_yface(i,j,k,2,vel,dzi, + bvylo,bvyhi,bct,dlo,dhi, + whi,wlo,khip,khim,klop,klom); + Real divu = dudx + dwdz; + Real xif = kapy(i,j,k); + Real mun = Real(0.75)*(etay(i,j,k,1)-xif);// restore the original eta + Real mut = etay(i,j,k,0); + fy(i,j,k,0) = -mut*dvdx; + fy(i,j,k,1) = -mun*(-twoThirds*divu) - xif*divu; + fy(i,j,k,2) = -mut*dvdz; + } + } + } + } +} - Real dudx = (0.5*dxi) * ((vel(ihip,j,k ,0)-vel(ihim,j,k ,0))*whi - +(vel(ilop,j,k-1,0)-vel(ilom,j,k-1,0))*wlo); - Real dwdx = (0.5*dxi) * ((vel(ihip,j,k ,2)-vel(ihim,j,k ,2))*whi - +(vel(ilop,j,k-1,2)-vel(ilom,j,k-1,2))*wlo); +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mlebtensor_cross_terms_fz (Box const& box, Array4 const& fz, + Array4 const& vel, + Array4 const& etaz, + Array4 const& kapz, + Array4 const& apz, + Array4 const& flag, + GpuArray const& dxinv, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = 2./3.; + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + if (apz(i,j,k) == 0.0) + { + fz(i,j,k,0) = 0.0; + fz(i,j,k,1) = 0.0; + fz(i,j,k,2) = 0.0; + } + else + { + int ihip = i + flag(i,j,k ).isConnected( 1,0,0); + int ihim = i - flag(i,j,k ).isConnected(-1,0,0); + int ilop = i + flag(i,j,k-1).isConnected( 1,0,0); + int ilom = i - flag(i,j,k-1).isConnected(-1,0,0); + Real whi = mlebtensor_weight(ihip-ihim); + Real wlo = mlebtensor_weight(ilop-ilom); + Real dudx = mlebtensor_dx_on_zface(i,j,k,0,vel,dxi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); + Real dwdx = mlebtensor_dx_on_zface(i,j,k,2,vel,dxi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,ihip,ihim,ilop,ilom); int jhip = j + flag(i,j,k ).isConnected(0, 1,0); int jhim = j - flag(i,j,k ).isConnected(0,-1,0); int jlop = j + flag(i,j,k-1).isConnected(0, 1,0); int jlom = j - flag(i,j,k-1).isConnected(0,-1,0); whi = mlebtensor_weight(jhip-jhim); wlo = mlebtensor_weight(jlop-jlom); - Real dvdy = (0.5*dyi) * ((vel(i,jhip,k ,1)-vel(i,jhim,k ,1))*whi - +(vel(i,jlop,k-1,1)-vel(i,jlom,k-1,1))*wlo); - Real dwdy = (0.5*dyi) * ((vel(i,jhip,k ,2)-vel(i,jhim,k ,2))*whi - +(vel(i,jlop,k-1,2)-vel(i,jlom,k-1,2))*wlo); - + Real dvdy = mlebtensor_dy_on_zface(i,j,k,1,vel,dyi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); + Real dwdy = mlebtensor_dy_on_zface(i,j,k,2,vel,dyi, + bvzlo,bvzhi,bct,dlo,dhi, + whi,wlo,jhip,jhim,jlop,jlom); Real divu = dudx + dvdy; Real xif = kapz(i,j,k); - Real mun = 0.75*(etaz(i,j,k,2)-xif); // restore the original eta - Real mut = etaz(i,j,k,0); + Real mun = Real(0.75)*(etaz(i,j,k,2)-xif);// restore the original eta + Real mut = etaz(i,j,k,0); fz(i,j,k,0) = -mut*dwdx; fz(i,j,k,1) = -mut*dwdy; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H index c814b3b8e41..8abdde8a7c0 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_K.H @@ -4,6 +4,145 @@ #include +namespace amrex { + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_weight (int d) { + return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_xface (int i, int, int k, int n, + Array4 const& vel, Real dyi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + return Real(0.5)*dyi * ((vel(i ,jhip,k,n)-vel(i ,jhim,k,n))*whi + + (vel(i-1,jlop,k,n)-vel(i-1,jlom,k,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_yface (int, int j, int k, int n, + Array4 const& vel, Real dxi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + return Real(0.5)*dxi * ((vel(ihip,j ,k,n)-vel(ihim,j ,k,n))*whi + + (vel(ilop,j-1,k,n)-vel(ilom,j-1,k,n))*wlo); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dy_on_xface (int i, int j, int k, int n, + Array4 const& vel, Real dyi, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int jhip, int jhim, int jlop, int jlom) noexcept +{ + Real ddy; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (j == dlo.y) { + ddy = (bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j+1,k,n) * Real(2.) + + bvxlo(i-1,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j-1,k,n) * Real(2.) + + bvxlo(i-1,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = whi*dyi*(bvxlo(i-1,jhip,k,n)-bvxlo(i-1,jhim,k,n)); + } + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddy = whi*dyi*(vel(i,jhip,k,n)-vel(i,jhim,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (j == dlo.y) { + ddy = (bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j+1,k,n) * Real(2.) + + bvxhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j-1,k,n) * Real(2.) + + bvxhi(i,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = wlo*dyi*(bvxhi(i,jlop,k,n)-bvxhi(i,jlom,k,n)); + } + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddy = wlo*dyi*(vel(i-1,jlop,k,n)-vel(i-1,jlom,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else { + ddy = mlebtensor_dy_on_xface(i,j,k,n,vel,dyi,whi,wlo,jhip,jhim,jlop,jlom); + } + return ddy; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mlebtensor_dx_on_yface (int i, int j, int k, int n, + Array4 const& vel, Real dxi, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi, + Real whi, Real wlo, + int ihip, int ihim, int ilop, int ilom) noexcept +{ + Real ddx; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (i == dlo.x) { + ddx = (bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i+1,j-1,k,n) * Real(2.) + + bvylo(i+2,j-1,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i-1,j-1,k,n) * Real(2.) + + bvylo(i-2,j-1,k,n) * Real(-0.5)) * dxi; + } else { + ddx = whi*dxi*(bvylo(ihip,j-1,k,n)-bvylo(ihim,j-1,k,n)); + } + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddx = whi*dxi*(vel(ihip,j,k,n)-vel(ihim,j,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (i == dlo.x) { + ddx = (bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i+1,j,k,n) * Real(2.) + + bvyhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i-1,j,k,n) * Real(2.) + + bvyhi(i-2,j,k,n) * Real(-0.5)) * dxi; + } else { + ddx = wlo*dxi*(bvyhi(ilop,j,k,n)-bvyhi(ilom,j,k,n)); + } + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddx = wlo*dxi*(vel(ilop,j-1,k,n)-vel(ilom,j-1,k,n)); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else { + ddx = mlebtensor_dx_on_yface(i,j,k,n,vel,dxi,whi,wlo,ihip,ihim,ilop,ilom); + } + return ddx; +} + +} + #if (AMREX_SPACEDIM == 1) #elif (AMREX_SPACEDIM == 2) #include diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index f744c96e059..09d835d8b86 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -2,6 +2,7 @@ #define AMREX_ML_LINOP_H_ #include +#include #include #include #include @@ -177,10 +178,10 @@ public: * inhomogeneous Neumann BC, the value in leveldata is assumed to be * `d./dx`. */ - virtual void setLevelBC (int amrlev, const MultiFab* levelbcdata, - const MultiFab* robinbc_a = nullptr, - const MultiFab* robinbc_b = nullptr, - const MultiFab* robinbc_f = nullptr) = 0; + virtual void setLevelBC (int /*amrlev*/, const MultiFab* /*levelbcdata*/, + const MultiFab* /*robinbc_a*/ = nullptr, + const MultiFab* /*robinbc_b*/ = nullptr, + const MultiFab* /*robinbc_f*/ = nullptr) {} void setVerbose (int v) noexcept { verbose = v; } @@ -197,52 +198,51 @@ public: virtual bool needsUpdate () const { return false; } virtual void update () {} - virtual void restriction (int amrlev, int cmglev, MultiFab& crse, MultiFab& fine) const = 0; - virtual void interpolation (int amrlev, int fmglev, MultiFab& fine, const MultiFab& crse) const = 0; - virtual void averageDownSolutionRHS (int camrlev, MultiFab& crse_sol, MultiFab& crse_rhs, - const MultiFab& fine_sol, const MultiFab& fine_rhs) = 0; + virtual void restriction (int /*amrlev*/, int /*cmglev*/, MultiFab& /*crse*/, MultiFab& /*fine*/) const {} + virtual void interpolation (int /*amrlev*/, int /*fmglev*/, MultiFab& /*fine*/, const MultiFab& /*crse*/) const {} + virtual void interpAssign (int /*amrlev*/, int /*fmglev*/, MultiFab& /*fine*/, MultiFab& /*crse*/) const {} + virtual void averageDownSolutionRHS (int /*camrlev*/, MultiFab& /*crse_sol*/, MultiFab& /*crse_rhs*/, + const MultiFab& /*fine_sol*/, const MultiFab& /*fine_rhs*/) {} - virtual void apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc_mode, - StateMode s_mode, const MLMGBndry* bndry=nullptr) const = 0; - virtual void smooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rhs, - bool skip_fillboundary=false) const = 0; + virtual void apply (int /*amrlev*/, int /*mglev*/, MultiFab& /*out*/, MultiFab& /*in*/, BCMode /*bc_mode*/, + StateMode /*s_mode*/, const MLMGBndry* /*bndry*/=nullptr) const {} + virtual void smooth (int /*amrlev*/, int /*mglev*/, MultiFab& /*sol*/, const MultiFab& /*rhs*/, + bool /*skip_fillboundary*/=false) const {} // Divide mf by the diagonal component of the operator. Used by bicgstab. virtual void normalize (int /*amrlev*/, int /*mglev*/, MultiFab& /*mf*/) const {} - virtual void solutionResidual (int amrlev, MultiFab& resid, MultiFab& x, const MultiFab& b, - const MultiFab* crse_bcdata=nullptr) = 0; - virtual void correctionResidual (int amrlev, int mglev, MultiFab& resid, MultiFab& x, const MultiFab& b, - BCMode bc_mode, const MultiFab* crse_bcdata=nullptr) = 0; - - virtual void reflux (int crse_amrlev, - MultiFab& res, const MultiFab& crse_sol, const MultiFab& crse_rhs, - MultiFab& fine_res, MultiFab& fine_sol, const MultiFab& fine_rhs) const = 0; - virtual void compFlux (int amrlev, const Array& fluxes, - MultiFab& sol, Location loc) const = 0; - virtual void compGrad (int amrlev, const Array& grad, - MultiFab& sol, Location loc) const = 0; - - virtual void applyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const = 0; - virtual void unapplyMetricTerm (int amrlev, int mglev, MultiFab& rhs) const = 0; - virtual void fillSolutionBC (int amrlev, MultiFab& sol, const MultiFab* crse_bcdata=nullptr) = 0; - - virtual void unimposeNeumannBC (int /*amrlev*/, MultiFab& /*rhs*/) const {} // only nodal solver might need it - virtual void applyInhomogNeumannTerm (int /*amrlev*/, MultiFab& /*rhs*/) const {} - virtual void applyOverset (int /*amlev*/, MultiFab& /*rhs*/) const {} - virtual void scaleRHS (int /*amrlev*/, MultiFab& /*rhs*/) const {} - virtual Real getSolvabilityOffset (int /*amrlev*/, int /*mglev*/, MultiFab const& /*rhs*/) const { return 0._rt; } // Only nodal solvers need it - virtual void fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/, Real /*offset*/) const {} // Only nodal solvers need it + virtual void solutionResidual (int /*amrlev*/, MultiFab& /*resid*/, MultiFab& /*x*/, const MultiFab& /*b*/, + const MultiFab* /*crse_bcdata*/=nullptr) {} + virtual void correctionResidual (int /*amrlev*/, int /*mglev*/, MultiFab& /*resid*/, MultiFab& /*x*/, const MultiFab& /*b*/, + BCMode /*bc_mode*/, const MultiFab* /*crse_bcdata*/=nullptr) {} + + virtual void reflux (int /*crse_amrlev*/, + MultiFab& /*res*/, const MultiFab& /*crse_sol*/, const MultiFab& /*crse_rhs*/, + MultiFab& /*fine_res*/, MultiFab& /*fine_sol*/, const MultiFab& /*fine_rhs*/) const {} + virtual void compFlux (int /*amrlev*/, const Array& /*fluxes*/, + MultiFab& /*sol*/, Location /*loc*/) const {} + virtual void compGrad (int /*amrlev*/, const Array& /*grad*/, + MultiFab& /*sol*/, Location /*loc*/) const {} + + virtual void applyMetricTerm (int /*amrlev*/, int /*mglev*/, Any& /*rhs*/) const {} + virtual void unapplyMetricTerm (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/) const {} + + virtual void unimposeNeumannBC (int /*amrlev*/, Any& /*rhs*/) const {} // only nodal solver might need it + virtual void applyInhomogNeumannTerm (int /*amrlev*/, Any& /*rhs*/) const {} + virtual void applyOverset (int /*amlev*/, Any& /*rhs*/) const {} + virtual void scaleRHS (int /*amrlev*/, Any& /*rhs*/) const {} + virtual Vector getSolvabilityOffset (int /*amrlev*/, int /*mglev*/, + Any const& /*rhs*/) const { return {}; } + virtual void fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, Any& /*rhs*/, + Vector const& /*offset*/) const {} virtual void prepareForSolve () = 0; - virtual bool isSingular (int amrlev) const = 0; - virtual bool isBottomSingular () const = 0; - virtual Real xdoty (int amrlev, int mglev, const MultiFab& x, const MultiFab& y, bool local) const = 0; + virtual bool isSingular (int /*amrlev*/) const { return false; } + virtual bool isBottomSingular () const { return false; } + virtual Real xdoty (int /*amrlev*/, int /*mglev*/, const MultiFab& /*x*/, const MultiFab& /*y*/, bool /*local*/) const { return 0._rt; } - virtual void fixUpResidualMask (int /*amrlev*/, iMultiFab& /*resmsk*/) { } - virtual void nodalSync (int /*amrlev*/, int /*mglev*/, MultiFab& /*mf*/) const {} - - virtual std::unique_ptr makeNLinOp (int grid_size) const = 0; + virtual std::unique_ptr makeNLinOp (int /*grid_size*/) const { return {nullptr}; } virtual void getFluxes (const Vector >& /*a_flux*/, const Vector& /*a_sol*/, @@ -283,6 +283,59 @@ public: virtual void copyNSolveSolution (MultiFab&, MultiFab const&) const {} + virtual Any AnyMake (int amrlev, int mglev, IntVect const& ng) const; + virtual Any AnyMakeCoarseMG (int amrlev, int mglev, IntVect const& ng) const; + virtual Any AnyMakeCoarseAmr (int famrlev, IntVect const& ng) const; + virtual Any AnyMakeAlias (Any const& a) const; + virtual IntVect AnyGrowVect (Any const& a) const; + virtual void AnyCopy (Any& dst, Any const& src, IntVect const& ng) const; + virtual void AnyAdd (Any& dst, Any const& src, IntVect const& ng) const; + virtual void AnySetToZero (Any& a) const; + virtual void AnySetBndryToZero (Any& a) const; +#ifdef AMREX_USE_EB + virtual void AnySetCoveredToZero (Any& a) const; +#endif + virtual void AnyParallelCopy (Any& dst, Any const& src, + IntVect const& src_nghost, IntVect const& dst_nghost, + Periodicity const& period = Periodicity::NonPeriodic()) const; + + virtual Real AnyNormInf (Any& a) const; + + virtual Real AnyNormInfMask (int amrlev, Any const& a, bool local) const = 0; + + virtual void AnySolutionResidual (int amrlev, Any& resid, Any& x, Any const& b, + Any const* crse_bcdata = nullptr); + virtual void AnyCorrectionResidual (int amrlev, int mglev, Any& resid, Any& x, + const Any& b, BCMode bc_mode, + const Any* crse_bcdata=nullptr); + virtual void AnyReflux (int crse_amrlev, + Any& res, const Any& crse_sol, const Any& crse_rhs, + Any& fine_res, Any& fine_sol, const Any& fine_rhs); + + virtual void AnyAvgDownResAmr (int clev, Any& cres, Any const& fres) const = 0; + virtual void AnyAvgDownResMG (int clev, Any& cres, Any const& fres) const; + + virtual void AnySmooth (int amrlev, int mglev, Any& sol, const Any& rhs, + bool skip_fillboundary=false) const; + + virtual void AnyRestriction (int amrlev, int cmglev, Any& crse, Any& fine) const; + + virtual void AnyInterpolationMG (int amrlev, int fmglev, Any& fine, const Any& crse) const; + virtual void AnyInterpAssignMG (int amrlev, int fmglev, Any& fine, Any& crse) const; + virtual void AnyInterpolationAmr (int famrlev, Any& fine, const Any& crse, + IntVect const& /*nghost*/) const = 0; + + virtual void AnyAverageDownSolutionRHS (int camrlev, Any& crse_sol, Any& crse_rhs, + const Any& fine_sol, const Any& fine_rhs); + + virtual void AnyAverageDownAndSync (Vector& sol) const = 0; + + virtual void postSolve (Vector& sol) const; + + Real MFNormInf (MultiFab const& mf, iMultiFab const* fine_mask, bool local) const; + + bool isMFIterSafe (int amrlev, int mglev1, int mglev2) const; + protected: static constexpr int mg_coarsen_ratio = 2; @@ -401,7 +454,7 @@ protected: bool isCellCentered () const noexcept { return m_ixtype == 0; } - virtual void make (Vector >& mf, int nc, IntVect const& ng) const; + void make (Vector >& mf, IntVect const& ng) const; virtual std::unique_ptr > makeFactory (int /*amrlev*/, int /*mglev*/) const { return std::make_unique(); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp index 9c6ccc8ce05..e53ed376d97 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.cpp @@ -4,10 +4,12 @@ #include #include #include +#include #ifdef AMREX_USE_EB #include #include +#include #endif #ifdef AMREX_USE_PETSC @@ -544,7 +546,7 @@ MLLinOp::defineBC () } void -MLLinOp::make (Vector >& mf, int nc, IntVect const& ng) const +MLLinOp::make (Vector >& mf, IntVect const& ng) const { mf.clear(); mf.resize(m_num_amr_levels); @@ -553,8 +555,7 @@ MLLinOp::make (Vector >& mf, int nc, IntVect const& ng) const mf[alev].resize(m_num_mg_levels[alev]); for (int mlev = 0; mlev < m_num_mg_levels[alev]; ++mlev) { - const auto& ba = amrex::convert(m_grids[alev][mlev], m_ixtype); - mf[alev][mlev].define(ba, m_dmap[alev][mlev], nc, ng, MFInfo(), *m_factory[alev][mlev]); + mf[alev][mlev] = AnyMake(alev, mlev, ng); } } } @@ -895,6 +896,279 @@ MLLinOp::resizeMultiGrid (int new_size) } } +Any +MLLinOp::AnyMake (int amrlev, int mglev, IntVect const& ng) const +{ + return Any(MultiFab(amrex::convert(m_grids[amrlev][mglev], m_ixtype), + m_dmap[amrlev][mglev], getNComp(), ng, MFInfo(), + *m_factory[amrlev][mglev])); +} + +Any +MLLinOp::AnyMakeCoarseMG (int amrlev, int mglev, IntVect const& ng) const +{ + BoxArray cba = m_grids[amrlev][mglev]; + IntVect ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[mglev]; + cba.coarsen(ratio); + cba.convert(m_ixtype); + return Any(MultiFab(cba, m_dmap[amrlev][mglev], getNComp(), ng)); +} + +Any +MLLinOp::AnyMakeCoarseAmr (int famrlev, IntVect const& ng) const +{ + BoxArray cba = m_grids[famrlev][0]; + IntVect ratio(AMRRefRatio(famrlev-1)); + cba.coarsen(ratio); + cba.convert(m_ixtype); + return Any(MultiFab(cba, m_dmap[famrlev][0], getNComp(), ng)); +} + +Any +MLLinOp::AnyMakeAlias (Any const& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab const& mf = a.get(); + return Any(MultiFab(mf, amrex::make_alias, 0, mf.nComp())); +} + +IntVect +MLLinOp::AnyGrowVect (Any const& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab const& mf = a.get(); + return mf.nGrowVect(); +} + +void +MLLinOp::AnySetToZero (Any& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab& mf = a.get(); + mf.setVal(0._rt); +} + +void +MLLinOp::AnySetBndryToZero (Any& a) const +{ + AMREX_ASSERT(a.is()); + MultiFab& mf = a.get(); + mf.setBndry(0._rt, 0, getNComp()); +} + +#ifdef AMREX_USE_EB +void +MLLinOp::AnySetCoveredToZero (Any& a) const +{ + AMREX_ASSERT(a.is()); + auto& mf = a.get(); + EB_set_covered(mf, 0, getNComp(), 0, 0._rt); +} +#endif + +void +MLLinOp::AnyCopy (Any& dst, Any const& src, IntVect const& ng) const +{ + AMREX_ASSERT(dst.is() && src.is()); + MultiFab& dmf = dst.get(); + MultiFab const& smf = src.get(); + MultiFab::Copy(dmf, smf, 0, 0, getNComp(), ng); +} + +void +MLLinOp::AnyAdd (Any& dst, Any const& src, IntVect const& ng) const +{ + AMREX_ASSERT(dst.is() && src.is()); + MultiFab& dmf = dst.get(); + MultiFab const& smf = src.get(); + MultiFab::Add(dmf, smf, 0, 0, getNComp(), ng); +} + +void +MLLinOp::AnyAverageDownSolutionRHS (int camrlev, Any& a_crse_sol, Any& a_crse_rhs, + const Any& a_fine_sol, const Any& a_fine_rhs) +{ + AMREX_ASSERT(a_crse_sol.is() && + a_crse_rhs.is() && + a_fine_sol.is() && + a_fine_rhs.is()); + auto& crse_sol = a_crse_sol.get(); + auto& crse_rhs = a_crse_rhs.get(); + auto& fine_sol = a_fine_sol.get(); + auto& fine_rhs = a_fine_rhs.get(); + averageDownSolutionRHS(camrlev, crse_sol, crse_rhs, fine_sol, fine_rhs); +} + +void +MLLinOp::AnyParallelCopy (Any& dst, Any const& src, + IntVect const& src_nghost, IntVect const& dst_nghost, + Periodicity const& period) const +{ + AMREX_ASSERT(dst.is()); + MultiFab& dmf = dst.get(); + MultiFab const& smf = src.get(); + dmf.ParallelCopy(smf, 0, 0, getNComp(), src_nghost, dst_nghost, period); +} + +Real +MLLinOp::AnyNormInf (Any& a) const +{ + AMREX_ASSERT(a.is()); + return a.get().norminf(); +} + +void +MLLinOp::AnySolutionResidual (int amrlev, Any& resid, Any& x, Any const& b, + Any const* crse_bcdata) +{ + AMREX_ASSERT(x.is()); + solutionResidual(amrlev, resid.get(), x.get(), b.get(), + (crse_bcdata) ? &(crse_bcdata->get()) : nullptr); +} + +void +MLLinOp::AnyCorrectionResidual (int amrlev, int mglev, Any& resid, Any& x, const Any& b, + BCMode bc_mode, const Any* crse_bcdata) +{ + AMREX_ASSERT(x.is()); + correctionResidual(amrlev, mglev, resid.get(), x.get(), + b.get(), bc_mode, + (crse_bcdata) ? &(crse_bcdata->get()) : nullptr); +} + +void +MLLinOp::AnyReflux (int clev, Any& res, const Any& crse_sol, const Any& crse_rhs, + Any& fine_res, Any& fine_sol, const Any& fine_rhs) +{ + AMREX_ASSERT(res.is()); + reflux(clev,res.get(), crse_sol.get(), crse_rhs.get(), + fine_res.get(), fine_sol.get(), fine_rhs.get()); +} + +Real +MLLinOp::MFNormInf (MultiFab const& mf, iMultiFab const* fine_mask, bool local) const +{ + const int ncomp = getNComp(); + Real norm = 0._rt; + + if (fine_mask == nullptr) { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + return amrex::Math::abs(ma[box_no](i,j,k,n)); + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n))); + }); + } + } + } else { +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + auto const& ma = mf.const_arrays(); + auto const& mask_ma = fine_mask->const_arrays(); + norm = ParReduce(TypeList{}, TypeList{}, + mf, IntVect(0), ncomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) + -> GpuTuple + { + if (mask_ma[box_no](i,j,k)) { + return amrex::Math::abs(ma[box_no](i,j,k,n)); + } else { + return Real(0.0); + } + }); + } else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel reduction(max:norm) +#endif + for (MFIter mfi(mf,true); mfi.isValid(); ++mfi) { + Box const& bx = mfi.tilebox(); + auto const& fab = mf.const_array(mfi); + auto const& mask = fine_mask->const_array(mfi); + AMREX_LOOP_4D(bx, ncomp, i, j, k, n, + { + if (mask(i,j,k)) { + norm = std::max(norm, amrex::Math::abs(fab(i,j,k,n))); + } + }); + } + } + } + + if (!local) ParallelAllReduce::Max(norm, ParallelContext::CommunicatorSub()); + return norm; +} + +void +MLLinOp::AnyAvgDownResMG (int clev, Any& cres, Any const& fres) const +{ + AMREX_ASSERT(cres.is()); +#ifdef AMREX_USE_EB + amrex::EB_average_down +#else + amrex::average_down +#endif + (fres.get(), cres.get(), 0, getNComp(), + mg_coarsen_ratio_vec[clev-1]); +} + +void +MLLinOp::AnySmooth (int amrlev, int mglev, Any& sol, const Any& rhs, + bool skip_fillboundary) const +{ + AMREX_ASSERT(sol.is() && rhs.is()); + smooth(amrlev, mglev, sol.get(), rhs.get(), skip_fillboundary); +} + +void +MLLinOp::AnyRestriction (int amrlev, int cmglev, Any& crse, Any& fine) const +{ + AMREX_ASSERT(crse.is() && fine.is()); + restriction(amrlev, cmglev, crse.get(), fine.get()); +} + +void +MLLinOp::AnyInterpolationMG (int amrlev, int fmglev, Any& fine, const Any& crse) const +{ + AMREX_ASSERT(crse.is() && fine.is()); + interpolation(amrlev, fmglev, fine.get(), crse.get()); +} + +void +MLLinOp::AnyInterpAssignMG (int amrlev, int fmglev, Any& fine, Any& crse) const +{ + AMREX_ASSERT(crse.is() && fine.is()); + interpAssign(amrlev, fmglev, fine.get(), crse.get()); +} + +void +MLLinOp::postSolve (Vector& /* sol */) const {} + +bool +MLLinOp::isMFIterSafe (int amrlev, int mglev1, int mglev2) const +{ + return m_dmap[amrlev][mglev1] == m_dmap[amrlev][mglev2] + && BoxArray::SameRefs(m_grids[amrlev][mglev1], m_grids[amrlev][mglev2]); +} + #ifdef AMREX_USE_PETSC std::unique_ptr MLLinOp::makePETSc () const diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H new file mode 100644 index 00000000000..68d7c836ba5 --- /dev/null +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp_temp.H @@ -0,0 +1,486 @@ +#ifndef AMREX_MLLINOP_TEMP_H_ +#define AMREX_MLLINOP_TEMP_H_ + +//! This is a template for writing your own linear operator class for Ax=b. + +#include + +namespace amrex_temp +{ + +class MLLinOpTemp + : public amrex::MLLinOp +{ +public: + + //! In this example, there are 3 edge based MultiFabs. + using Container = amrex::Array; + + MLLinOpTemp () {} + + virtual ~MLLinOpTemp () {} + + MLLinOpTemp (const MLLinOpTemp&) = delete; + MLLinOpTemp (MLLinOpTemp&&) = delete; + MLLinOpTemp& operator= (const MLLinOpTemp&) = delete; + MLLinOpTemp& operator= (MLLinOpTemp&&) = delete; + + MLLinOpTemp (const amrex::Vector& a_geom, + const amrex::Vector& a_grids, + const amrex::Vector& a_dmap, + const amrex::LPInfo& a_info = amrex::LPInfo(), + const amrex::Vector const*>& a_factory = {}) + { + define(a_geom, a_grids, a_dmap, a_info, a_factory); + } + + void define (const amrex::Vector& a_geom, + const amrex::Vector& a_grids, + const amrex::Vector& a_dmap, + const amrex::LPInfo& a_info = amrex::LPInfo(), + const amrex::Vector const*>& a_factory = {}) + { + amrex::MLLinOp::define(a_geom, a_grids, a_dmap, a_info, a_factory); + } + + /** + * \brief Return the default solver at the bottom of MG cycles. By + * default, MLLinOp uses a BiCGStab solver implemented in + * AMReX::MLCGSolver. However, it only supports a single MultiFab. + * Since our data type is different, we use a smoother instead. In the + * future we can try to generalize MLCGSolver. + */ + virtual amrex::BottomSolver getDefaultBottomSolver () const override { + return amrex::BottomSolver::smoother; + } + + /** + * \brief Make data container (e.g., MultiFabs stored in Any) for given level. + * + * \param amrlev AMR level. Note that the lowest level is always 0. + * \param mglev MG level. Note that mglev+1 is one level coarser than mglev. + * \param ng number of ghost cells. + */ + virtual amrex::Any AnyMake (int amrlev, int mglev, amrex::IntVect const& ng) const override + { + auto const& ba = m_grids[amrlev][mglev]; + auto const& dm = m_dmap [amrlev][mglev]; + auto const& fc = *m_factory[amrlev][mglev]; + return amrex::Any(Container{amrex::MultiFab(amrex::convert(ba,amrex::IntVect(0,1,1)), + dm, 1, ng, amrex::MFInfo(), fc), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,0,1)), + dm, 1, ng, amrex::MFInfo(), fc), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,1,0)), + dm, 1, ng, amrex::MFInfo(), fc)}); + } + + /** + * \brief Make data container with coarsened BoxArray and + * DistributionMapping of the give MG level. + * + * \param amrlev AMR level. Note that the lowest level is always 0. + * \param mglev MG level. The coarser level is mglev+1. + * \param ng number of ghost cells. + */ + virtual amrex::Any AnyMakeCoarseMG (int amrlev, int mglev, amrex::IntVect const& ng) const override + { + auto ratio = (amrlev > 0) ? amrex::IntVect(2) : this->mg_coarsen_ratio_vec[mglev]; + auto const& ba = amrex::coarsen(m_grids[amrlev][mglev], ratio); + auto const& dm = m_dmap[amrlev][mglev]; + return amrex::Any(Container{amrex::MultiFab(amrex::convert(ba,amrex::IntVect(0,1,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,0,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,1,0)), + dm, 1, ng)}); + } + + /** + * \brief Make data container with coarsened BoxArray and + * DistributionMapping of the given AMR level. + * + * \param famrlev AMR level. The coarser AMR level is famrlev-1. + * \param ng number of ghost cells. + */ + virtual amrex::Any AnyMakeCoarseAmr (int famrlev, amrex::IntVect const& ng) const override + { + amrex::IntVect ratio(this->AMRRefRatio(famrlev-1)); + auto const& ba = amrex::coarsen(m_grids[famrlev][0], ratio); + auto const& dm = m_dmap[famrlev][0]; + return amrex::Any(Container{amrex::MultiFab(amrex::convert(ba,amrex::IntVect(0,1,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,0,1)), + dm, 1, ng), + amrex::MultiFab(amrex::convert(ba,amrex::IntVect(1,1,0)), + dm, 1, ng)}); + } + + /** + * \brief Make an alias of the given Any without deepcopying. + * + * \param a an Any object. + */ + virtual amrex::Any AnyMakeAlias (amrex::Any const& a) const override + { + auto const& rhs = a.get(); + return amrex::Any(Container{amrex::MultiFab(rhs[0], amrex::make_alias, 0, 1), + amrex::MultiFab(rhs[1], amrex::make_alias, 0, 1), + amrex::MultiFab(rhs[2], amrex::make_alias, 0, 1)}); + } + + /** + * \brief Retuen the number of ghost cells in the given Any. + * + * \param a an Any object. + */ + virtual amrex::IntVect AnyGrowVect (amrex::Any const& a) const override + { + auto const& mfs = a.get(); + return mfs[0].nGrowVect(); + } + + /** + * \brief Copy data from source Any to destination Any. + * + * \param dst destination Any. + * \param src source Any. + * \param ng number of ghost cells included in the operation. + */ + virtual void AnyCopy (amrex::Any& dst, amrex::Any const& src, amrex::IntVect const& ng) const override + { + auto& dmf = dst.get(); + auto const& smf = src.get(); + for (int idim=0; idim < 3; ++idim) { + amrex::MultiFab::Copy(dmf[idim], smf[idim], 0, 0, 1, ng); + } + } + + /** + * \brief Add data from source Any to destination Any. + * + * \param dst destination Any. + * \param src source Any. + * \param ng number of ghost cells included in the operation. + */ + virtual void AnyAdd (amrex::Any& dst, amrex::Any const& src, amrex::IntVect const& ng) const override + { + auto& dmf = dst.get(); + auto const& smf = src.get(); + for (int idim=0; idim < 3; ++idim) { + amrex::MultiFab::Add(dmf[idim], smf[idim], 0, 0, 1, ng); + } + } + + /** + * \brief Set the given Any to zero. + * + * \param a an Any object. + */ + virtual void AnySetToZero (amrex::Any& a) const override + { + auto& mfs = a.get(); + for (int idim=0; idim < 3; ++idim) { + mfs[idim].setVal(amrex::Real(0.0)); + } + } + + /** + * \brief Set boundary (i.e., ghost cells) the given Any to zero. + * + * \param a an Any object. + */ + virtual void AnySetBndryToZero (amrex::Any& a) const override + { + auto& mfs = a.get(); + for (int idim=0; idim < 3; ++idim) { + mfs[idim].setBndry(amrex::Real(0.0), 0, 1); + } + } + +#ifdef AMREX_USE_EB + /** + * \brief Set covered region of the given Any to zero. + * + * \param a an Any object. + */ + virtual void AnySetCoveredToZero (amrex::Any& a) const override + { + auto& mfs = a.get(); + for (int idim=0; idim < 3; ++idim) { + amrex::EB_set_covered(mfs[idim], 0, 1, 0, amrex::Real(0.0)); + } + } +#endif + + /** + * \brief ParallelCopy from source Any ot destination Any. + * + * \param dst destination Any. + * \param src source Any. + * \param src_nghost number of ghost cells in the source included in the operation. + * \param dst_nghost number of ghost cells in the destination included in the operation. + * \param period Periodicity. + */ + virtual void AnyParallelCopy (amrex::Any& dst, amrex::Any const& src, + amrex::IntVect const& src_nghost, amrex::IntVect const& dst_nghost, + amrex::Periodicity const& period = amrex::Periodicity::NonPeriodic()) const override + { + auto& dmf = dst.get(); + auto const& smf = src.get(); + for (int idim=0; idim < 3; ++idim) { + dmf[idim].ParallelCopy_nowait(smf[idim], 0, 0, 1, src_nghost, dst_nghost, period); + } + for (int idim=0; idim < 3; ++idim) { + dmf[idim].ParallelCopy_finish(); + } + } + + /** + * \brief Return the infinity norm of the given Any. + * + * \param a an Any object. + */ + virtual amrex::Real AnyNormInf (amrex::Any& a) const override + { + auto& mfs = a.get(); + amrex::Real r = amrex::Real(0.0); + for (int idim=0; idim < 3; ++idim) { + auto tmp = mfs[idim].norminf(0, 0, true); + r = std::max(r, tmp); + } + amrex::ParallelAllReduce::Max(r, amrex::ParallelContext::CommunicatorSub()); + return r; + } + + /** + * \brief Return the infinity norm of the masked region of the given Any. + * + * For a composite solve with multiple AMR levels, the region covered by + * finer AMR levels are not included in the operation. + * + * \parame amrlev AMR level. + * \param a an Any object. + * \parame local determines if the reduction is local (i.e., no MPI communication) or not. + */ + virtual amrex::Real AnyNormInfMask (int amrlev, amrex::Any const& a, bool local) const override + { + amrex::ignore_unused(amrlev, a, local); + amrex::Abort("TODO: AnyNormInfMask"); + // This is only needed for multi-level composite solve + return amrex::Real(0.0); + } + + /** + * \brief Compute residual of the original form, r = b - Ax. + * + * \param amrlev AMR level + * \param resid residual + * \param x the solution x + * \param b the RHS b + * \param crse_bcdata provides Dirichlet BC at AMR coarse/fine interface. + * It's a nullptr for single level solve. + */ + virtual void AnySolutionResidual (int amrlev, amrex::Any& resid, amrex::Any& x, amrex::Any const& b, + amrex::Any const* crse_bcdata = nullptr) override + { + amrex::ignore_unused(amrlev, resid, x, b, crse_bcdata); + amrex::Abort("TODO: AnySolutionResidual"); + } + + /** + * \brief Compute residual of the residual correction form, r = b - Ax. + * + * \param amrlev AMR level. + * \param resid residual of the residual correction form. + * \param x the correction. + * \param b the RHS for the residual correction form (i.e., the residual of the original form. + * \param bc_mode is either Homogeneous or Inhomogeneous. + * \param crse_bcdata provides inhomogenous Dirichlet BC at AMR coarse/fine interface. + * It's ignored for homogeneous Dirichlet BC. + */ + virtual void AnyCorrectionResidual (int amrlev, int mglev, amrex::Any& resid, amrex::Any& x, + const amrex::Any& b, MLLinOp::BCMode bc_mode, + const amrex::Any* crse_bcdata=nullptr) override + { + amrex::ignore_unused(amrlev, mglev, resid, x, b, bc_mode, crse_bcdata); + amrex::Abort("TODO: AnyCorrectionResidual"); + } + + /** + * \brief Reflux + * + * This modifies the coarse level residual at the coarse/fine interface. + * + * \param crse_amrlev coarse AMR level. + * \param res coarse level residual. + * \param crse_sol coarse level x. + * \param crse_rhs coarse level b. + * \param fine_res fine level residual. This may not be needed depending on the coarse/fine stencil. + * \param fine_sol fine level x. + * \param fine_rhs fine level b. + */ + virtual void AnyReflux (int crse_amrlev, + amrex::Any& res, const amrex::Any& crse_sol, const amrex::Any& crse_rhs, + amrex::Any& fine_res, amrex::Any& fine_sol, const amrex::Any& fine_rhs) override + { + amrex::ignore_unused(crse_amrlev, res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs); + amrex::Abort("TODO: AnyReflux"); + // This is only needed for multi-level composite solve + } + + /** + * \brief Average down residual from fine to coarse AMR level. + * + * \param clev coarse ARR level. + * \param cres coarse level residual. + * \param fres fine level residual. + */ + virtual void AnyAvgDownResAmr (int clev, amrex::Any& cres, amrex::Any const& fres) const override + { + amrex::ignore_unused(clev, cres, fres); + amrex::Abort("TODO: AnyAvgDownResAmr"); + // This is only needed for mulit-level composite solve. + // And maybe there is nothing neeed to be done here, like in the nodal projection solver. + } + + /** + * \brief Average down residual from fine to coarse MG level. + * + * This is only needed for MG F-cycle, and we don't need to implement this for V-cycle. + * + * \param clev coarse MG level. + * \param cres coarse level residual. + * \param fres fine level residual. + */ + virtual void AnyAvgDownResMG (int clev, amrex::Any& cres, amrex::Any const& fres) const override + { + amrex::ignore_unused(clev, cres, fres); + amrex::Abort("TODO: AnyAvgDownResMG"); // Not needed for V-cycle. + } + + /** + * \brief Smooth the given level. + * + * \param amrlev AMR level. Note that the lowest level is always 0. + * \param mglev MG level. Note that mglev+1 is one level coarser than mglev. + * \param sol x + * \param rhs b + * \param skip_fillboundary a flag for if we need to fill ghost cells in this function. + */ + virtual void AnySmooth (int amrlev, int mglev, amrex::Any& sol, const amrex::Any& rhs, + bool skip_fillboundary=false) const override + { + amrex::ignore_unused(amrlev, mglev, sol, rhs, skip_fillboundary); + amrex::Abort("TODO: AnySmooth"); + } + + /** + * \brief Restriction from fine to coarse MG level. + * + * \param amrlev AMR level. + * \param cmglev coarse MG level. The fine MG level is cmglev-1. + * \param crse coarse data. + * \param fine fine data. This is not const& because we may need to fill its ghost cells. + */ + virtual void AnyRestriction (int amrlev, int cmglev, amrex::Any& crse, amrex::Any& fine) const override + { + amrex::ignore_unused(amrlev, cmglev, crse, fine); + amrex::Abort("TODO: AnyRestriction"); + } + + /** + * \brief Add interpolated coarse data onto the fine MG level. + * + * Note that it's an ADD operation. + * + * \param amrlev AMR level. + * \param fmglev fine MG level. The coarse MG level is fmglev+1. + * \param fine fine MG level data. + * \param crse coarse MG level data. + */ + virtual void AnyInterpolationMG (int amrlev, int fmglev, amrex::Any& fine, const amrex::Any& crse) const override + { + amrex::ignore_unused(amrlev, fmglev, fine, crse); + amrex::Abort("TODO: AnyInterpolationMG"); + } + + /** + * \brief Assign (i.e., copy) interpolated coarse data onto the fine MG level. + * + * Note that it's an ASSIGN operation. This is used in MG F-cycle, and + * does not need to be implemented for V-cycle. + * + * \param amrlev AMR level. + * \param fmglev fine MG level. The coarse MG level is fmglev+1. + * \param fine fine MG level data. + * \param crse coarse MG level data. + */ + virtual void AnyInterpAssignMG (int amrlev, int fmglev, amrex::Any& fine, amrex::Any& crse) const override + { + amrex::ignore_unused(amrlev, fmglev, fine, crse); + amrex::Abort("TODO: AnyInterpAssignMG"); // not needed for V-cycle. + } + + /** + * \brief Interpolate data from coarse to fine AMR level. + * + * \param famrlev fine AMR level. The coarse AMR level is famrlev-1. + * \param fine data on fine AMR level. + * \param crse data on coarse AMR level. + */ + virtual void AnyInterpolationAmr (int famrlev, amrex::Any& fine, const amrex::Any& crse, + amrex::IntVect const& /*nghost*/) const override + { + amrex::ignore_unused(famrlev, fine, crse); + // This is only needed for multi-level composite solve + amrex::Abort("TODO: AnyInterpolationAmr"); + } + + /** + * \brief Average down x and b from fine to coarse AMR level. + * + * This is called before V-cycle to make data on AMR levels consistent. + * + * \param camrlev coarse AMR level. The fine level is camrlev+1. + * \param crse_sol x on coarse level. + * \param crse_rhs b on coarse level. + * \param fine_sol x on fine level. + * \param fine_rhs b on fine level. + */ + virtual void AnyAverageDownSolutionRHS (int camrlev, amrex::Any& crse_sol, amrex::Any& crse_rhs, + const amrex::Any& fine_sol, const amrex::Any& fine_rhs) override + { + amrex::ignore_unused(camrlev, crse_sol, crse_rhs, fine_sol, fine_rhs); + // This is only needed for multi-level composite solve + amrex::Abort("AnyAverageDownSolutionRHS"); + } + + /** + * \brief Average down and synchronize AMR data. + * + * Synchronize the data on each level. That is the nodal data in the + * same MultiFab needs to be synchronized. This function also needs to + * average down the data from fine to coarse AMR levels. + * + * \param sol data on all AMR levels. + */ + virtual void AnyAverageDownAndSync (amrex::Vector& sol) const override + { + amrex::ignore_unused(sol); + // Even for single level, we shoudl synchronize the data on level 0. + amrex::Abort("TODO: AnyAverageDownAndSync"); + } + + /** + * \brief Prepare the solver for MG cycle. + */ + virtual void prepareForSolve () override + { + amrex::Abort("TODO: prepareForSolve"); + } +}; + +} + + +#endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H index 32980d74c45..e884f877fbc 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H @@ -36,6 +36,10 @@ public: Real solve (const Vector& a_sol, const Vector& a_rhs, Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file = nullptr); + // For this version of solve, Any holds MultiFab like objects. + Real solve (Vector& a_sol, const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file = nullptr); + void getGradSolution (const Vector >& a_grad_sol, Location a_loc = Location::FaceCenter); @@ -121,7 +125,7 @@ public: void setHypreStrongThreshold (Real t) noexcept {hypre_strong_threshold = t;} #endif - void prepareForSolve (const Vector& a_sol, const Vector& a_rhs); + void prepareForSolve (Vector& a_sol, const Vector& a_rhs); void prepareForNSolve (); @@ -151,19 +155,16 @@ public: Real MLRhsNormInf (bool local = false); void buildFineMask (); - void averageDownAndSync (); - - void computeVolInv (); void makeSolvable (); - void makeSolvable (int amrlev, int mglev, MultiFab& mf); + void makeSolvable (int amrlev, int mglev, Any& mf); #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) - void bottomSolveWithHypre (MultiFab& x, const MultiFab& b); + void bottomSolveWithHypre (Any& x, const Any& b); #endif - void bottomSolveWithPETSc (MultiFab& x, const MultiFab& b); + void bottomSolveWithPETSc (Any& x, const Any& b); - int bottomSolveWithCG (MultiFab& x, const MultiFab& b, MLCGSolver::Type type); + int bottomSolveWithCG (Any& x, const Any& b, MLCGSolver::Type type); Real getInitRHS () const noexcept { return m_rhsnorm0; } // Initial composite residual @@ -242,26 +243,21 @@ private: * \brief To avoid confusion, terms like sol, cor, rhs, res, ... etc. are * in the frame of the original equation, not the correction form */ - Vector > sol_raii; - Vector sol; //!< alias to argument a_sol - Vector rhs; //!< Copy of original rhs - //! L(sol) = rhs + Vector sol; //!< Might be alias to argument a_sol + Vector rhs; //!< Copy of original rhs + //! L(sol) = rhs + + Vector sol_is_alias; /** * \brief First Vector: Amr levels. 0 is the coarest level * Second Vector: MG levels. 0 is the finest level */ - Vector > res; //! = rhs - L(sol) - Vector > > cor; //!< L(cor) = res - Vector > > cor_hold; - Vector > rescor; //!< = res - L(cor) - //! Residual of the correction form - - Vector > fine_mask; - - Vector > volinv; //!< used by makeSolvable - - Vector > scratch; + Vector > res; //! = rhs - L(sol) + Vector > cor; //!< L(cor) = res + Vector > cor_hold; + Vector > rescor; //!< = res - L(cor) + //! Residual of the correction form enum timer_types { solve_time=0, iter_time, bottom_time, ntimers }; Vector timer; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp index 2bdb9222b4b..28c833397b4 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #ifdef AMREX_USE_PETSC @@ -51,25 +50,52 @@ MLMG::~MLMG () Real MLMG::solve (const Vector& a_sol, const Vector& a_rhs, Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file) +{ + Vector any_sol(namrlevs); + Vector any_rhs(namrlevs); + for (int lev = 0; lev < namrlevs; ++lev) { + any_sol[lev] = MultiFab(*a_sol[lev], amrex::make_alias, 0, a_sol[lev]->nComp()); + any_rhs[lev] = MultiFab(*a_rhs[lev], amrex::make_alias, 0, a_rhs[lev]->nComp()); + } + return solve(any_sol, any_rhs, a_tol_rel, a_tol_abs, checkpoint_file); +} + +Real +MLMG::solve (Vector& a_sol, const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs, const char* checkpoint_file) { BL_PROFILE("MLMG::solve()"); if (checkpoint_file != nullptr) { - checkPoint(a_sol, a_rhs, a_tol_rel, a_tol_abs, checkpoint_file); + if (a_sol[0].is()) { + Vector mf_sol(namrlevs); + Vector mf_rhs(namrlevs); + for (int lev = 0; lev < namrlevs; ++lev) { + mf_sol[lev] = &(a_sol[lev].get()); + mf_rhs[lev] = &(a_rhs[lev].get()); + } + checkPoint(mf_sol, mf_rhs, a_tol_rel, a_tol_abs, checkpoint_file); + } else { + amrex::Abort("MLMG::solve: checkpoint not supported for non-MultiFab type"); + } } if (bottom_solver == BottomSolver::Default) { bottom_solver = linop.getDefaultBottomSolver(); } +#if defined(AMREX_USE_HYPRE) || defined(AMREX_USE_PETSC) if (bottom_solver == BottomSolver::hypre || bottom_solver == BottomSolver::petsc) { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(a_sol[0].is(), + "Non-MultiFab type not supported for hypre and petsc"); int mo = linop.getMaxOrder(); - if (a_sol[0]->hasEBFabFactory()) { + if (a_sol[0].get().hasEBFabFactory()) { linop.setMaxOrder(2); } else { linop.setMaxOrder(std::min(3,mo)); // maxorder = 4 not supported } } +#endif bool is_nsolve = linop.m_parent; @@ -84,8 +110,6 @@ MLMG::solve (const Vector& a_sol, const Vector& a_rh computeMLResidual(finest_amr_lev); - int ncomp = linop.getNComp(); - bool local = true; Real resnorm0 = MLResNormInf(finest_amr_lev, local); Real rhsnorm0 = MLRhsNormInf(local); @@ -194,15 +218,16 @@ MLMG::solve (const Vector& a_sol, const Vector& a_rh timer[iter_time] = amrex::second() - iter_start_time; } + linop.postSolve(sol); + IntVect ng_back = final_fill_bc ? IntVect(1) : IntVect(0); if (linop.hasHiddenDimension()) { ng_back[linop.hiddenDirection()] = 0; } for (int alev = 0; alev < namrlevs; ++alev) { - if (a_sol[alev] != sol[alev]) - { - MultiFab::Copy(*a_sol[alev], *sol[alev], 0, 0, ncomp, ng_back); + if (!sol_is_alias[alev]) { + linop.AnyCopy(a_sol[alev], sol[alev], ng_back); } } @@ -229,16 +254,13 @@ void MLMG::oneIter (int iter) { BL_PROFILE("MLMG::oneIter()"); - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(); - for (int alev = finest_amr_lev; alev > 0; --alev) { - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); miniCycle(alev); - MultiFab::Add(*sol[alev], *cor[alev][0], 0, 0, ncomp, nghost); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(alev)); + linop.AnyAdd(sol[alev], cor[alev][0], nghost); // compute residual for the coarse AMR level computeResWithCrseSolFineCor(alev-1,alev); @@ -250,7 +272,6 @@ void MLMG::oneIter (int iter) // coarsest amr level { - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(0); // enforce solvability if appropriate if (linop.isSingular(0) && linop.getEnforceSingularSolvable()) { @@ -258,24 +279,27 @@ void MLMG::oneIter (int iter) } if (iter < max_fmg_iters) { - mgFcycle (); + mgFcycle(); } else { - mgVcycle (0, 0); + mgVcycle(0, 0); } - MultiFab::Add(*sol[0], *cor[0][0], 0, 0, ncomp, nghost); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(0)); + linop.AnyAdd(sol[0], cor[0][0], nghost); } for (int alev = 1; alev <= finest_amr_lev; ++alev) { - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); // (Fine AMR correction) = I(Coarse AMR correction) interpCorrection(alev); - MultiFab::Add(*sol[alev], *cor[alev][0], 0, 0, ncomp, nghost); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(alev)); + linop.AnyAdd(sol[alev], cor[alev][0], nghost); if (alev != finest_amr_lev) { - MultiFab::Add(*cor_hold[alev][0], *cor[alev][0], 0, 0, ncomp, nghost); + linop.AnyAdd(cor_hold[alev][0], cor[alev][0], nghost); } // Update fine AMR level correction @@ -283,14 +307,14 @@ void MLMG::oneIter (int iter) miniCycle(alev); - MultiFab::Add(*sol[alev], *cor[alev][0], 0, 0, ncomp, nghost); + linop.AnyAdd(sol[alev], cor[alev][0], nghost); if (alev != finest_amr_lev) { - MultiFab::Add(*cor[alev][0], *cor_hold[alev][0], 0, 0, ncomp, nghost); + linop.AnyAdd(cor[alev][0], cor_hold[alev][0], nghost); } } - averageDownAndSync(); + linop.AnyAverageDownAndSync(sol); } // Compute multi-level Residual (res) up to amrlevmax. @@ -301,11 +325,11 @@ MLMG::computeMLResidual (int amrlevmax) const int mglev = 0; for (int alev = amrlevmax; alev >= 0; --alev) { - const MultiFab* crse_bcdata = (alev > 0) ? sol[alev-1] : nullptr; - linop.solutionResidual(alev, res[alev][mglev], *sol[alev], rhs[alev], crse_bcdata); + const Any* crse_bcdata = (alev > 0) ? &(sol[alev-1]) : nullptr; + linop.AnySolutionResidual(alev, res[alev][mglev], sol[alev], rhs[alev], crse_bcdata); if (alev < finest_amr_lev) { - linop.reflux(alev, res[alev][mglev], *sol[alev], rhs[alev], - res[alev+1][mglev], *sol[alev+1], rhs[alev+1]); + linop.AnyReflux(alev, res[alev][mglev], sol[alev], rhs[alev], + res[alev+1][mglev], sol[alev+1], rhs[alev+1]); } } } @@ -315,16 +339,8 @@ void MLMG::computeResidual (int alev) { BL_PROFILE("MLMG::computeResidual()"); - - MultiFab& x = *sol[alev]; - const MultiFab& b = rhs[alev]; - MultiFab& r = res[alev][0]; - - const MultiFab* crse_bcdata = nullptr; - if (alev > 0) { - crse_bcdata = sol[alev-1]; - } - linop.solutionResidual(alev, r, x, b, crse_bcdata); + const Any* crse_bcdata = (alev > 0) ? &(sol[alev-1]) : nullptr; + linop.AnySolutionResidual(alev, res[alev][0], sol[alev], rhs[alev], crse_bcdata); } // Compute coarse AMR level composite residual with coarse solution and fine correction @@ -333,39 +349,28 @@ MLMG::computeResWithCrseSolFineCor (int calev, int falev) { BL_PROFILE("MLMG::computeResWithCrseSolFineCor()"); - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = std::min(linop.getNGrow(falev),linop.getNGrow(calev)); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(std::min(linop.getNGrow(falev),linop.getNGrow(calev))); - MultiFab& crse_sol = *sol[calev]; - const MultiFab& crse_rhs = rhs[calev]; - MultiFab& crse_res = res[calev][0]; + Any& crse_sol = sol[calev]; + const Any& crse_rhs = rhs[calev]; + Any& crse_res = res[calev][0]; - MultiFab& fine_sol = *sol[falev]; - const MultiFab& fine_rhs = rhs[falev]; - MultiFab& fine_cor = *cor[falev][0]; - MultiFab& fine_res = res[falev][0]; - MultiFab& fine_rescor = rescor[falev][0]; + Any& fine_sol = sol[falev]; + const Any& fine_rhs = rhs[falev]; + Any& fine_cor = cor[falev][0]; + Any& fine_res = res[falev][0]; + Any& fine_rescor = rescor[falev][0]; - const MultiFab* crse_bcdata = nullptr; - if (calev > 0) { - crse_bcdata = sol[calev-1]; - } - linop.solutionResidual(calev, crse_res, crse_sol, crse_rhs, crse_bcdata); + const Any* crse_bcdata = (calev > 0) ? &(sol[calev-1]) : nullptr; + linop.AnySolutionResidual(calev, crse_res, crse_sol, crse_rhs, crse_bcdata); - linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res, BCMode::Homogeneous); - MultiFab::Copy(fine_res, fine_rescor, 0, 0, ncomp, nghost); + linop.AnyCorrectionResidual(falev, 0, fine_rescor, fine_cor, fine_res, BCMode::Homogeneous); + linop.AnyCopy(fine_res, fine_rescor, nghost); - linop.reflux(calev, crse_res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs); + linop.AnyReflux(calev, crse_res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs); - if (linop.isCellCentered()) { - const int amrrr = linop.AMRRefRatio(calev); -#ifdef AMREX_USE_EB - amrex::EB_average_down(fine_res, crse_res, 0, ncomp, amrrr); -#else - amrex::average_down(fine_res, crse_res, 0, ncomp, amrrr); -#endif - } + linop.AnyAvgDownResAmr(calev, crse_res, fine_res); } // Compute fine AMR level residual fine_res = fine_res - L(fine_cor) with coarse providing BC. @@ -374,20 +379,19 @@ MLMG::computeResWithCrseCorFineCor (int falev) { BL_PROFILE("MLMG::computeResWithCrseCorFineCor()"); - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(falev); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(falev)); - const MultiFab& crse_cor = *cor[falev-1][0]; + const Any& crse_cor = cor[falev-1][0]; - MultiFab& fine_cor = *cor[falev][0]; - MultiFab& fine_res = res[falev][0]; - MultiFab& fine_rescor = rescor[falev][0]; + Any& fine_cor = cor [falev][0]; + Any& fine_res = res [falev][0]; + Any& fine_rescor = rescor[falev][0]; // fine_rescor = fine_res - L(fine_cor) - linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res, - BCMode::Inhomogeneous, &crse_cor); - MultiFab::Copy(fine_res, fine_rescor, 0, 0, ncomp, nghost); + linop.AnyCorrectionResidual(falev, 0, fine_rescor, fine_cor, fine_res, + BCMode::Inhomogeneous, &crse_cor); + linop.AnyCopy(fine_res, fine_rescor, nghost); } void @@ -413,16 +417,16 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { - Real norm = res[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(res[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " DN: Norm before smooth " << norm << "\n"; } - cor[amrlev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev]); bool skip_fillboundary = true; for (int i = 0; i < nu1; ++i) { - linop.smooth(amrlev, mglev, *cor[amrlev][mglev], res[amrlev][mglev], - skip_fillboundary); + linop.AnySmooth(amrlev, mglev, cor[amrlev][mglev], res[amrlev][mglev], + skip_fillboundary); skip_fillboundary = false; } @@ -431,14 +435,13 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { - Real norm = rescor[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " DN: Norm after smooth " << norm << "\n"; } // res_crse = R(rescor_fine); this provides res/b to the level below - linop.restriction(amrlev, mglev+1, res[amrlev][mglev+1], rescor[amrlev][mglev]); - + linop.AnyRestriction(amrlev, mglev+1, res[amrlev][mglev+1], rescor[amrlev][mglev]); } BL_PROFILE_VAR("MLMG::mgVcycle_bottom", blp_bottom); @@ -446,7 +449,7 @@ MLMG::mgVcycle (int amrlev, int mglev_top) { if (verbose >= 4) { - Real norm = res[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(res[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " DN: Norm before bottom " << norm << "\n"; } @@ -454,7 +457,7 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev_bottom); - Real norm = rescor[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " UP: Norm after bottom " << norm << "\n"; @@ -464,21 +467,21 @@ MLMG::mgVcycle (int amrlev, int mglev_top) { if (verbose >= 4) { - Real norm = res[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(res[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " Norm before smooth " << norm << "\n"; } - cor[amrlev][mglev_bottom]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev_bottom]); bool skip_fillboundary = true; for (int i = 0; i < nu1; ++i) { - linop.smooth(amrlev, mglev_bottom, *cor[amrlev][mglev_bottom], res[amrlev][mglev_bottom], - skip_fillboundary); + linop.AnySmooth(amrlev, mglev_bottom, cor[amrlev][mglev_bottom], + res[amrlev][mglev_bottom], skip_fillboundary); skip_fillboundary = false; } if (verbose >= 4) { computeResOfCorrection(amrlev, mglev_bottom); - Real norm = rescor[amrlev][mglev_bottom].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev_bottom]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev_bottom << " Norm after smooth " << norm << "\n"; } @@ -493,12 +496,12 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev); - Real norm = rescor[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " UP: Norm before smooth " << norm << "\n"; } for (int i = 0; i < nu2; ++i) { - linop.smooth(amrlev, mglev, *cor[amrlev][mglev], res[amrlev][mglev]); + linop.AnySmooth(amrlev, mglev, cor[amrlev][mglev], res[amrlev][mglev]); } if (cf_strategy == CFStrategy::ghostnodes) computeResOfCorrection(amrlev, mglev); @@ -506,7 +509,7 @@ MLMG::mgVcycle (int amrlev, int mglev_top) if (verbose >= 4) { computeResOfCorrection(amrlev, mglev); - Real norm = rescor[amrlev][mglev].norm0(); + Real norm = linop.AnyNormInf(rescor[amrlev][mglev]); amrex::Print() << "AT LEVEL " << amrlev << " " << mglev << " UP: Norm after smooth " << norm << "\n"; } @@ -521,21 +524,18 @@ MLMG::mgFcycle () { BL_PROFILE("MLMG::mgFcycle()"); +#ifdef AMREX_USE_EB + AMREX_ASSERT(linop.isCellCentered()); +#endif + const int amrlev = 0; const int mg_bottom_lev = linop.NMGLevels(amrlev) - 1; - const int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(amrlev); + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(amrlev)); for (int mglev = 1; mglev <= mg_bottom_lev; ++mglev) { -#ifdef AMREX_USE_EB - amrex::EB_average_down(res[amrlev][mglev-1], res[amrlev][mglev], 0, ncomp, - linop.mg_coarsen_ratio_vec[mglev-1]); -#else - amrex::average_down(res[amrlev][mglev-1], res[amrlev][mglev], 0, ncomp, - linop.mg_coarsen_ratio_vec[mglev-1]); -#endif + linop.AnyAvgDownResMG(mglev, res[amrlev][mglev], res[amrlev][mglev-1]); } bottomSolve(); @@ -543,17 +543,17 @@ MLMG::mgFcycle () for (int mglev = mg_bottom_lev-1; mglev >= 0; --mglev) { // cor_fine = I(cor_crse) - interpCorrection (amrlev, mglev); + interpCorrection(amrlev, mglev); // rescor = res - L(cor) computeResOfCorrection(amrlev, mglev); // res = rescor; this provides b to the vcycle below - MultiFab::Copy(res[amrlev][mglev], rescor[amrlev][mglev], 0,0,ncomp,nghost); + linop.AnyCopy(res[amrlev][mglev], rescor[amrlev][mglev], nghost); // save cor; do v-cycle; add the saved to cor std::swap(cor[amrlev][mglev], cor_hold[amrlev][mglev]); mgVcycle(amrlev, mglev); - MultiFab::Add(*cor[amrlev][mglev], *cor_hold[amrlev][mglev], 0, 0, ncomp, nghost); + linop.AnyAdd(cor[amrlev][mglev], cor_hold[amrlev][mglev], nghost); } } @@ -563,17 +563,11 @@ MLMG::interpCorrection (int alev) { BL_PROFILE("MLMG::interpCorrection_1"); - const int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); - - const MultiFab& crse_cor = *cor[alev-1][0]; - MultiFab& fine_cor = *cor[alev][0]; + IntVect nghost(0); + if (cf_strategy == CFStrategy::ghostnodes) nghost = IntVect(linop.getNGrow(alev)); - BoxArray ba = fine_cor.boxArray(); - const int amrrr = linop.AMRRefRatio(alev-1); - IntVect refratio{amrrr}; - ba.coarsen(refratio); + Any const& crse_cor = cor[alev-1][0]; + Any & fine_cor = cor[alev ][0]; const Geometry& crse_geom = linop.Geom(alev-1,0); @@ -584,121 +578,12 @@ MLMG::interpCorrection (int alev) ng_src = linop.getNGrow(alev-1); ng_dst = linop.getNGrow(alev-1); } - MultiFab cfine(ba, fine_cor.DistributionMap(), ncomp, ng_dst); - cfine.setVal(0.0); - cfine.ParallelCopy(crse_cor, 0, 0, ncomp, ng_src, ng_dst, crse_geom.periodicity()); - - bool isEB = fine_cor.hasEBFabFactory(); - ignore_unused(isEB); -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(&(fine_cor.Factory())); - const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; -#endif - - if (linop.isCellCentered()) - { - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - Array4 const& ff = fine_cor.array(mfi); - Array4 const& cc = cfine.const_array(mfi); -#ifdef AMREX_USE_EB - bool call_lincc; - if (isEB) - { - const auto& flag = (*flags)[mfi]; - if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { - call_lincc = true; - } else { - Array4 const& flg = flag.const_array(); - switch(refratio[0]) { - case 2: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); - }); - break; - } - case 4: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_eb_cc_interp_r<4>(tbx, ff, cc, flg, ncomp); - }); - break; - } - default: - amrex::Abort("mlmg_eb_cc_interp: only refratio 2 and 4 are supported"); - } + Any cfine = linop.AnyMakeCoarseAmr(alev, IntVect(ng_dst)); + linop.AnySetToZero(cfine); + linop.AnyParallelCopy(cfine, crse_cor, IntVect(ng_src), IntVect(ng_dst), crse_geom.periodicity()); - call_lincc = false; - } - } - else - { - call_lincc = true; - } -#else - const bool call_lincc = true; -#endif - if (call_lincc) - { - switch(refratio[0]) { - case 2: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); - }); - break; - } - case 4: - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_lin_cc_interp_r4(tbx, ff, cc, ncomp); - }); - break; - } - default: - amrex::Abort("mlmg_lin_cc_interp: only refratio 2 and 4 are supported"); - } - } - } - } - else - { - AMREX_ALWAYS_ASSERT(amrrr == 2 || amrrr == 4); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - Box fbx = mfi.tilebox(); - if (cf_strategy == CFStrategy::ghostnodes && nghost >1) fbx.grow(nghost); - Array4 const& ffab = fine_cor.array(mfi); - Array4 const& cfab = cfine.const_array(mfi); - - if (amrrr == 2) { - AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, - { - mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); - }); - } else { - AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, - { - mlmg_lin_nd_interp_r4(i,j,k,n,ffab,cfab); - }); - } - } - } + linop.AnyInterpolationAmr(alev, fine_cor, cfine, nghost); } // Interpolate correction between MG levels @@ -709,119 +594,9 @@ MLMG::interpCorrection (int alev, int mglev) { BL_PROFILE("MLMG::interpCorrection_2"); - MultiFab& crse_cor = *cor[alev][mglev+1]; - MultiFab& fine_cor = *cor[alev][mglev ]; - - const int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(alev); - - const Geometry& crse_geom = linop.Geom(alev,mglev+1); - const IntVect refratio = (alev > 0) ? IntVect(2) : linop.mg_coarsen_ratio_vec[mglev]; - - MultiFab cfine; - const MultiFab* cmf; - - if (amrex::isMFIterSafe(crse_cor, fine_cor)) - { - crse_cor.FillBoundary(crse_geom.periodicity()); - cmf = &crse_cor; - } - else - { - BoxArray cba = fine_cor.boxArray(); - cba.coarsen(refratio); - IntVect ng = linop.isCellCentered() ? crse_cor.nGrowVect() : IntVect(0); - if (cf_strategy == CFStrategy::ghostnodes) ng = IntVect(nghost); - cfine.define(cba, fine_cor.DistributionMap(), ncomp, ng); - cfine.setVal(0.0); - cfine.ParallelCopy(crse_cor, 0, 0, ncomp, IntVect(0), ng, crse_geom.periodicity()); - cmf = & cfine; - } - - bool isEB = fine_cor.hasEBFabFactory(); - ignore_unused(isEB); - -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(&(fine_cor.Factory())); - const FabArray* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr; -#endif - - if (linop.isCellCentered()) - { - MFItInfo mfi_info; - if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, mfi_info); mfi.isValid(); ++mfi) - { - const Box& bx = mfi.tilebox(); - const auto& ff = fine_cor.array(mfi); - const auto& cc = cmf->array(mfi); -#ifdef AMREX_USE_EB - bool call_lincc; - if (isEB) - { - const auto& flag = (*flags)[mfi]; - if (flag.getType(amrex::grow(bx,1)) == FabType::regular) { - call_lincc = true; - } else { - Array4 const& flg = flag.const_array(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_eb_cc_interp_r<2>(tbx, ff, cc, flg, ncomp); - }); - - call_lincc = false; - } - } - else - { - call_lincc = true; - } -#else - const bool call_lincc = true; -#endif - if (call_lincc) - { -#if (AMREX_SPACEDIM == 3) - if (linop.hasHiddenDimension()) { - Box const& bx_2d = linop.compactify(bx); - auto const& ff_2d = linop.compactify(ff); - auto const& cc_2d = linop.compactify(cc); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx_2d, tbx, - { - TwoD::mlmg_lin_cc_interp_r2(tbx, ff_2d, cc_2d, ncomp); - }); - } else -#endif - { - AMREX_LAUNCH_HOST_DEVICE_LAMBDA (bx, tbx, - { - mlmg_lin_cc_interp_r2(tbx, ff, cc, ncomp); - }); - } - } - } - } - else - { -#ifdef AMREX_USE_OMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for (MFIter mfi(fine_cor, TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - const Box& fbx = mfi.tilebox(); - Array4 const& ffab = fine_cor.array(mfi); - Array4 const& cfab = cmf->const_array(mfi); - - AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, - { - mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); - }); - } - } + Any& crse_cor = cor[alev][mglev+1]; + Any& fine_cor = cor[alev][mglev ]; + linop.AnyInterpAssignMG(alev, mglev, fine_cor, crse_cor); } // (Fine MG level correction) += I(Coarse MG level correction) @@ -830,31 +605,24 @@ MLMG::addInterpCorrection (int alev, int mglev) { BL_PROFILE("MLMG::addInterpCorrection()"); - const int ncomp = linop.getNComp(); - - const MultiFab& crse_cor = *cor[alev][mglev+1]; - MultiFab& fine_cor = *cor[alev][mglev ]; + const Any& crse_cor = cor[alev][mglev+1]; + Any& fine_cor = cor[alev][mglev ]; - MultiFab cfine; - const MultiFab* cmf; + Any cfine; + const Any* cany; - if (amrex::isMFIterSafe(crse_cor, fine_cor)) + if (linop.isMFIterSafe(alev, mglev, mglev+1)) { - cmf = &crse_cor; + cany = &crse_cor; } else { - BoxArray cba = fine_cor.boxArray(); - IntVect ratio = (alev > 0) ? IntVect(2) : linop.mg_coarsen_ratio_vec[mglev]; - - cba.coarsen(ratio); - const int ng = 0; - cfine.define(cba, fine_cor.DistributionMap(), ncomp, ng); - cfine.ParallelCopy(crse_cor); - cmf = &cfine; + cfine = linop.AnyMakeCoarseMG(alev, mglev, IntVect(0)); + linop.AnyParallelCopy(cfine,crse_cor,IntVect(0),IntVect(0)); + cany = &cfine; } - linop.interpolation(alev, mglev, fine_cor, *cmf); + linop.AnyInterpolationMG(alev, mglev, fine_cor, *cany); } // Compute rescor = res - L(cor) @@ -865,10 +633,10 @@ void MLMG::computeResOfCorrection (int amrlev, int mglev) { BL_PROFILE("MLMG:computeResOfCorrection()"); - MultiFab& x = *cor[amrlev][mglev]; - const MultiFab& b = res[amrlev][mglev]; - MultiFab& r = rescor[amrlev][mglev]; - linop.correctionResidual(amrlev, mglev, r, x, b, BCMode::Homogeneous); + Any & x = cor[amrlev][mglev]; + const Any& b = res[amrlev][mglev]; + Any & r = rescor[amrlev][mglev]; + linop.AnyCorrectionResidual(amrlev, mglev, r, x, b, BCMode::Homogeneous); } // At the true bottom of the coarset AMR level. @@ -894,7 +662,7 @@ MLMG::NSolve (MLMG& a_solver, MultiFab& a_sol, MultiFab& a_rhs) a_sol.setVal(0.0); - MultiFab const& res_bottom = res[0].back(); + MultiFab const& res_bottom = res[0].back().get(); if (BoxArray::SameRefs(a_rhs.boxArray(),res_bottom.boxArray()) && DistributionMapping::SameRefs(a_rhs.DistributionMap(),res_bottom.DistributionMap())) { @@ -906,7 +674,7 @@ MLMG::NSolve (MLMG& a_solver, MultiFab& a_sol, MultiFab& a_rhs) a_solver.solve({&a_sol}, {&a_rhs}, Real(-1.0), Real(-1.0)); - linop.copyNSolveSolution(*cor[0].back(), a_sol); + linop.copyNSolveSolution(cor[0].back().get(), a_sol); } void @@ -914,8 +682,6 @@ MLMG::actualBottomSolve () { BL_PROFILE("MLMG::actualBottomSolve()"); - const int ncomp = linop.getNComp(); - if (!linop.isBottomActive()) return; auto bottom_start_time = amrex::second(); @@ -924,28 +690,28 @@ MLMG::actualBottomSolve () const int amrlev = 0; const int mglev = linop.NMGLevels(amrlev) - 1; - MultiFab& x = *cor[amrlev][mglev]; - MultiFab& b = res[amrlev][mglev]; + auto& x = cor[amrlev][mglev]; + auto& b = res[amrlev][mglev]; - x.setVal(0.0); + linop.AnySetToZero(x); if (bottom_solver == BottomSolver::smoother) { bool skip_fillboundary = true; for (int i = 0; i < nuf; ++i) { - linop.smooth(amrlev, mglev, x, b, skip_fillboundary); + linop.AnySmooth(amrlev, mglev, x, b, skip_fillboundary); skip_fillboundary = false; } } else { - MultiFab* bottom_b = &b; - MultiFab raii_b; + Any* bottom_b = &b; + Any raii_b; if (linop.isBottomSingular() && linop.getEnforceSingularSolvable()) { - raii_b.define(b.boxArray(), b.DistributionMap(), ncomp, b.nGrowVect(), - MFInfo(), *linop.Factory(amrlev,mglev)); - MultiFab::Copy(raii_b,b,0,0,ncomp,b.nGrowVect()); + const IntVect ng = linop.AnyGrowVect(b); + raii_b = linop.AnyMake(amrlev, mglev, ng); + linop.AnyCopy(raii_b, b, ng); bottom_b = &raii_b; makeSolvable(amrlev,mglev,*bottom_b); @@ -973,7 +739,7 @@ MLMG::actualBottomSolve () int ret = bottomSolveWithCG(x, *bottom_b, cg_type); // If the MLMG solve failed then set the correction to zero if (ret != 0) { - cor[amrlev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev]); if (bottom_solver == BottomSolver::cgbicg || bottom_solver == BottomSolver::bicgcg) { if (bottom_solver == BottomSolver::cgbicg) { @@ -983,7 +749,7 @@ MLMG::actualBottomSolve () } ret = bottomSolveWithCG(x, *bottom_b, cg_type); if (ret != 0) { - cor[amrlev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[amrlev][mglev]); } else { // switch permanently if (cg_type == MLCGSolver::Type::CG) { bottom_solver = BottomSolver::cg; @@ -995,7 +761,7 @@ MLMG::actualBottomSolve () } const int n = (ret==0) ? nub : nuf; for (int i = 0; i < n; ++i) { - linop.smooth(amrlev, mglev, x, b); + linop.AnySmooth(amrlev, mglev, x, b); } } } @@ -1006,7 +772,7 @@ MLMG::actualBottomSolve () } int -MLMG::bottomSolveWithCG (MultiFab& x, const MultiFab& b, MLCGSolver::Type type) +MLMG::bottomSolveWithCG (Any& x, const Any& b, MLCGSolver::Type type) { MLCGSolver cg_solver(this, linop); cg_solver.setSolver(type); @@ -1027,37 +793,7 @@ Real MLMG::ResNormInf (int alev, bool local) { BL_PROFILE("MLMG::ResNormInf()"); - const int ncomp = linop.getNComp(); - const int mglev = 0; - Real norm = 0.0; - MultiFab* pmf = &(res[alev][mglev]); -#ifdef AMREX_USE_EB - if (linop.isCellCentered() && scratch[alev]) { - pmf = scratch[alev].get(); - MultiFab::Copy(*pmf, res[alev][mglev], 0, 0, ncomp, 0); - auto factory = dynamic_cast(linop.Factory(alev)); - if (factory) { - const MultiFab& vfrac = factory->getVolFrac(); - for (int n=0; n < ncomp; ++n) { - MultiFab::Multiply(*pmf, vfrac, 0, n, 1, 0); - } - } else { - amrex::Abort("MLMG::ResNormInf: not EB Factory"); - } - } -#endif - for (int n = 0; n < ncomp; n++) - { - Real newnorm = 0.0; - if (fine_mask[alev]) { - newnorm = pmf->norm0(*fine_mask[alev],n,0,true); - } else { - newnorm = pmf->norm0(n,0,true); - } - norm = std::max(norm, newnorm); - } - if (!local) ParallelAllReduce::Max(norm, ParallelContext::CommunicatorSub()); - return norm; + return linop.AnyNormInfMask(alev, res[alev][0], local); } // Computes multi-level masked inf-norm of Residual (res). @@ -1079,66 +815,17 @@ Real MLMG::MLRhsNormInf (bool local) { BL_PROFILE("MLMG::MLRhsNormInf()"); - const int ncomp = linop.getNComp(); - Real r = 0.0; - for (int alev = 0; alev <= finest_amr_lev; ++alev) - { - MultiFab* pmf = &(rhs[alev]); -#ifdef AMREX_USE_EB - if (linop.isCellCentered() && scratch[alev]) { - pmf = scratch[alev].get(); - MultiFab::Copy(*pmf, rhs[alev], 0, 0, ncomp, 0); - auto factory = dynamic_cast(linop.Factory(alev)); - if (factory) { - const MultiFab& vfrac = factory->getVolFrac(); - for (int n=0; n < ncomp; ++n) { - MultiFab::Multiply(*pmf, vfrac, 0, n, 1, 0); - } - } else { - amrex::Abort("MLMG::MLRhsNormInf: not EB Factory"); - } - } -#endif - for (int n=0; nnorm0(*fine_mask[alev],n,0,true)); - } else { - r = std::max(r, pmf->norm0(n,0,true)); - } - } + Real r = 0.0_rt; + for (int alev = 0; alev <= finest_amr_lev; ++alev) { + auto t = linop.AnyNormInfMask(alev, rhs[alev], true); + r = std::max(r, t); } if (!local) ParallelAllReduce::Max(r, ParallelContext::CommunicatorSub()); return r; } void -MLMG::buildFineMask () -{ - BL_PROFILE("MLMG::buildFineMask()"); - - if (!fine_mask.empty()) return; - - fine_mask.clear(); - fine_mask.resize(namrlevs); - - const auto& amrrr = linop.AMRRefRatio(); - for (int alev = 0; alev < finest_amr_lev; ++alev) - { - fine_mask[alev] = std::make_unique - (makeFineMask(rhs[alev], rhs[alev+1], IntVect(0), IntVect(amrrr[alev]), - Periodicity::NonPeriodic(), 1, 0)); - } - - if (!linop.isCellCentered()) { - for (int alev = 0; alev < finest_amr_lev; ++alev) { - linop.fixUpResidualMask(alev, *fine_mask[alev]); - } - } -} - -void -MLMG::prepareForSolve (const Vector& a_sol, const Vector& a_rhs) +MLMG::prepareForSolve (Vector& a_sol, const Vector& a_rhs) { BL_PROFILE("MLMG::prepareForSolve()"); @@ -1147,7 +834,6 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector& a_sol, const VectornGrowVect() == ng_sol) + else if (linop.AnyGrowVect(a_sol[alev]) == ng_sol) { - sol[alev] = a_sol[alev]; - sol[alev]->setBndry(0.0); + sol[alev] = linop.AnyMakeAlias(a_sol[alev]); + linop.AnySetBndryToZero(sol[alev]); + sol_is_alias[alev] = true; } else { if (!solve_called) { - sol_raii[alev] = std::make_unique(a_sol[alev]->boxArray(), - a_sol[alev]->DistributionMap(), - ncomp, ng_sol, MFInfo(), - *linop.Factory(alev)); + sol[alev] = linop.AnyMake(alev, 0, ng_sol); } - MultiFab::Copy(*sol_raii[alev], *a_sol[alev], 0, 0, ncomp, 0); - sol_raii[alev]->setBndry(0.0); - sol[alev] = sol_raii[alev].get(); + linop.AnyCopy(sol[alev], a_sol[alev], IntVect(0)); + linop.AnySetBndryToZero(sol[alev]); + sol_is_alias[alev] = false; } } @@ -1202,10 +887,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const VectorboxArray(), a_rhs[alev]->DistributionMap(), ncomp, ng_rhs, - MFInfo(), *linop.Factory(alev)); + rhs[alev] = linop.AnyMake(alev, 0, ng_rhs); } - MultiFab::Copy(rhs[alev], *a_rhs[alev], 0, 0, ncomp, ng_rhs); + linop.AnyCopy(rhs[alev], a_rhs[alev], ng_rhs); linop.applyMetricTerm(alev, 0, rhs[alev]); linop.unimposeNeumannBC(alev, rhs[alev]); linop.applyInhomogNeumannTerm(alev, rhs[alev]); @@ -1215,38 +899,37 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(linop.Factory(alev)); if (factory) { - Vector val(ncomp, 0.0); - amrex::EB_set_covered(rhs[alev], 0, ncomp, val); - amrex::EB_set_covered(*sol[alev], 0, ncomp, val); + linop.AnySetCoveredToZero(rhs[alev]); + linop.AnySetCoveredToZero(sol[alev]); } #endif } for (int falev = finest_amr_lev; falev > 0; --falev) { - linop.averageDownSolutionRHS(falev-1, *sol[falev-1], rhs[falev-1], *sol[falev], rhs[falev]); + linop.AnyAverageDownSolutionRHS(falev-1, sol[falev-1], rhs[falev-1], + sol[falev], rhs[falev]); } // enforce solvability if appropriate if (linop.isSingular(0) && linop.getEnforceSingularSolvable()) { - computeVolInv(); makeSolvable(); } IntVect ng = linop.isCellCentered() ? IntVect(0) : IntVect(1); if (cf_strategy == CFStrategy::ghostnodes) ng = ng_rhs; if (!solve_called) { - linop.make(res, ncomp, ng); - linop.make(rescor, ncomp, ng); + linop.make(res, ng); + linop.make(rescor, ng); } for (int alev = 0; alev <= finest_amr_lev; ++alev) { const int nmglevs = linop.NMGLevels(alev); for (int mglev = 0; mglev < nmglevs; ++mglev) { - res[alev][mglev].setVal(0.0); - rescor[alev][mglev].setVal(0.0); + linop.AnySetToZero(res [alev][mglev]); + linop.AnySetToZero(rescor[alev][mglev]); } } @@ -1261,12 +944,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(res[alev][mglev].boxArray(), - res[alev][mglev].DistributionMap(), - ncomp, _ng, MFInfo(), - *linop.Factory(alev,mglev)); + cor[alev][mglev] = linop.AnyMake(alev, mglev, _ng); } - cor[alev][mglev]->setVal(0.0); + linop.AnySetToZero(cor[alev][mglev]); } } @@ -1280,12 +960,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(cor[alev][mglev]->boxArray(), - cor[alev][mglev]->DistributionMap(), - ncomp, _ng, MFInfo(), - *linop.Factory(alev,mglev)); + cor_hold[alev][mglev] = linop.AnyMake(alev, mglev, _ng); } - cor_hold[alev][mglev]->setVal(0.0); + linop.AnySetToZero(cor_hold[alev][mglev]); } } for (int alev = 1; alev < finest_amr_lev; ++alev) @@ -1294,31 +971,9 @@ MLMG::prepareForSolve (const Vector& a_sol, const Vector(cor[alev][0]->boxArray(), - cor[alev][0]->DistributionMap(), - ncomp, _ng, MFInfo(), - *linop.Factory(alev,0)); - } - cor_hold[alev][0]->setVal(0.0); - } - - buildFineMask(); - - if (!solve_called) - { - scratch.resize(namrlevs); -#ifdef AMREX_USE_EB - if (linop.isCellCentered()) { - for (int alev=0; alev < namrlevs; ++alev) { - if (rhs[alev].hasEBFabFactory()) { - scratch[alev] = std::make_unique(rhs[alev].boxArray(), - rhs[alev].DistributionMap(), - ncomp, 0, MFInfo(), - *linop.Factory(alev)); - } - } + cor_hold[alev][0] = linop.AnyMake(alev, 0, _ng); } -#endif + linop.AnySetToZero(cor_hold[alev][0]); } if (linop.m_parent) { @@ -1379,7 +1034,7 @@ MLMG::getGradSolution (const Vector >& a_grad_so { BL_PROFILE("MLMG::getGradSolution()"); for (int alev = 0; alev <= finest_amr_lev; ++alev) { - linop.compGrad(alev, a_grad_sol[alev], *sol[alev], a_loc); + linop.compGrad(alev, a_grad_sol[alev], sol[alev].get(), a_loc); } } @@ -1392,7 +1047,11 @@ MLMG::getFluxes (const Vector >& a_flux, } AMREX_ASSERT(sol.size() == a_flux.size()); - getFluxes(a_flux, sol, a_loc); + Vector solmf; + for (auto & s : sol) { + solmf.push_back(&(s.get())); + } + getFluxes(a_flux, solmf, a_loc); } void @@ -1413,7 +1072,11 @@ void MLMG::getFluxes (const Vector & a_flux, Location a_loc) { AMREX_ASSERT(sol.size() == a_flux.size()); - getFluxes(a_flux, sol, a_loc); + Vector solmf; + for (auto & s : sol) { + solmf.push_back(&(s.get())); + } + getFluxes(a_flux, solmf, a_loc); } void @@ -1459,7 +1122,11 @@ MLMG::getEBFluxes (const Vector& a_eb_flux) } AMREX_ASSERT(sol.size() == a_eb_flux.size()); - getEBFluxes(a_eb_flux, sol); + Vector solmf; + for (auto & s : sol) { + solmf.push_back(&(s.get())); + } + getEBFluxes(a_eb_flux, solmf); } void @@ -1486,28 +1153,21 @@ MLMG::compResidual (const Vector& a_res, const Vector& a_s if (linop.hasHiddenDimension()) ng_sol[linop.hiddenDirection()] = 0; sol.resize(namrlevs); - sol_raii.resize(namrlevs); + sol_is_alias.resize(namrlevs,true); for (int alev = 0; alev < namrlevs; ++alev) { - if (cf_strategy == CFStrategy::ghostnodes) + if (cf_strategy == CFStrategy::ghostnodes || a_sol[alev]->nGrowVect() == ng_sol) { - sol[alev] = a_sol[alev]; - } - else if (a_sol[alev]->nGrowVect() == ng_sol) - { - sol[alev] = a_sol[alev]; + sol[alev] = linop.AnyMakeAlias(*a_sol[alev]); + sol_is_alias[alev] = true; } else { - if (sol_raii[alev] == nullptr) + if (sol_is_alias[alev]) { - sol_raii[alev] = std::make_unique(a_sol[alev]->boxArray(), - a_sol[alev]->DistributionMap(), - ncomp, ng_sol, MFInfo(), - *linop.Factory(alev)); + sol[alev] = linop.AnyMake(alev, 0, ng_sol); } - MultiFab::Copy(*sol_raii[alev], *a_sol[alev], 0, 0, ncomp, 0); - sol[alev] = sol_raii[alev].get(); + MultiFab::Copy(sol[alev].get(), *a_sol[alev], 0, 0, ncomp, 0); } } @@ -1521,22 +1181,23 @@ MLMG::compResidual (const Vector& a_res, const Vector& a_s const auto& amrrr = linop.AMRRefRatio(); for (int alev = finest_amr_lev; alev >= 0; --alev) { - const MultiFab* crse_bcdata = (alev > 0) ? sol[alev-1] : nullptr; + const MultiFab* crse_bcdata = (alev > 0) ? &(sol[alev-1].get()) : nullptr; const MultiFab* prhs = a_rhs[alev]; #if (AMREX_SPACEDIM != 3) int nghost = (cf_strategy == CFStrategy::ghostnodes) ? linop.getNGrow(alev) : 0; - MultiFab rhstmp(prhs->boxArray(), prhs->DistributionMap(), ncomp, nghost, - MFInfo(), *linop.Factory(alev)); + Any rhstmp_a(MultiFab(prhs->boxArray(), prhs->DistributionMap(), ncomp, nghost, + MFInfo(), *linop.Factory(alev))); + MultiFab& rhstmp = rhstmp_a.get(); MultiFab::Copy(rhstmp, *prhs, 0, 0, ncomp, nghost); - linop.applyMetricTerm(alev, 0, rhstmp); - linop.unimposeNeumannBC(alev, rhstmp); - linop.applyInhomogNeumannTerm(alev, rhstmp); + linop.applyMetricTerm(alev, 0, rhstmp_a); + linop.unimposeNeumannBC(alev, rhstmp_a); + linop.applyInhomogNeumannTerm(alev, rhstmp_a); prhs = &rhstmp; #endif - linop.solutionResidual(alev, *a_res[alev], *sol[alev], *prhs, crse_bcdata); + linop.solutionResidual(alev, *a_res[alev], sol[alev].get(), *prhs, crse_bcdata); if (alev < finest_amr_lev) { - linop.reflux(alev, *a_res[alev], *sol[alev], *prhs, - *a_res[alev+1], *sol[alev+1], *a_rhs[alev+1]); + linop.reflux(alev, *a_res[alev], sol[alev].get(), *prhs, + *a_res[alev+1], sol[alev+1].get(), *a_rhs[alev+1]); if (linop.isCellCentered()) { #ifdef AMREX_USE_EB amrex::EB_average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]); @@ -1604,7 +1265,8 @@ MLMG::apply (const Vector& out, const Vector& a_in) } for (int alev = 0; alev < namrlevs; ++alev) { - linop.applyInhomogNeumannTerm(alev, rh[alev]); + Any a(MultiFab(rh[alev], amrex::make_alias, 0, rh[alev].nComp())); + linop.applyInhomogNeumannTerm(alev, a); } const auto& amrrr = linop.AMRRefRatio(); @@ -1637,215 +1299,45 @@ MLMG::apply (const Vector& out, const Vector& a_in) } } -void -MLMG::averageDownAndSync () -{ - const auto& amrrr = linop.AMRRefRatio(); - - int ncomp = linop.getNComp(); - int nghost = 0; - if (cf_strategy == CFStrategy::ghostnodes) nghost = linop.getNGrow(); - - if (linop.isCellCentered()) - { - for (int falev = finest_amr_lev; falev > 0; --falev) - { -#ifdef AMREX_USE_EB - amrex::EB_average_down(*sol[falev], *sol[falev-1], 0, ncomp, amrrr[falev-1]); -#else - amrex::average_down(*sol[falev], *sol[falev-1], 0, ncomp, amrrr[falev-1]); -#endif - } - } - else - { - linop.nodalSync(finest_amr_lev, 0, *sol[finest_amr_lev]); - - for (int falev = finest_amr_lev; falev > 0; --falev) - { - const auto& fmf = *sol[falev]; - auto& cmf = *sol[falev-1]; - - MultiFab tmpmf(amrex::coarsen(fmf.boxArray(), amrrr[falev-1]), fmf.DistributionMap(), ncomp, nghost); - amrex::average_down(fmf, tmpmf, 0, ncomp, amrrr[falev-1]); - cmf.ParallelCopy(tmpmf, 0, 0, ncomp); - linop.nodalSync(falev-1, 0, cmf); - } - } -} - -void -MLMG::computeVolInv () -{ - if (solve_called) return; - - if (linop.isCellCentered()) - { - volinv.resize(namrlevs); - for (int amrlev = 0; amrlev < namrlevs; ++amrlev) { - volinv[amrlev].resize(linop.NMGLevels(amrlev)); - } - - // We don't need to compute for every level - - auto f = [&] (int amrlev, int mglev) { -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(linop.Factory(amrlev,mglev)); - if (factory) - { - const MultiFab& vfrac = factory->getVolFrac(); - volinv[amrlev][mglev] = vfrac.sum(0,true); - } - else -#endif - { - volinv[amrlev][mglev] - = Real(1.0 / linop.compactify(linop.Geom(amrlev,mglev).Domain()).d_numPts()); - } - }; - - // amrlev = 0, mglev = 0 - f(0,0); - - int mgbottom = linop.NMGLevels(0)-1; - f(0,mgbottom); - -#ifdef AMREX_USE_EB - Real temp1, temp2; - if (rhs[0].hasEBFabFactory()) - { - ParallelAllReduce::Sum({volinv[0][0], volinv[0][mgbottom]}, - ParallelContext::CommunicatorSub()); - temp1 = Real(1.0)/volinv[0][0]; - temp2 = Real(1.0)/volinv[0][mgbottom]; - } - else - { - temp1 = volinv[0][0]; - temp2 = volinv[0][mgbottom]; - } - volinv[0][0] = temp1; - volinv[0][mgbottom] = temp2; -#endif - } -} - void MLMG::makeSolvable () { - const int ncomp = linop.getNComp(); - - if (linop.isCellCentered()) - { - Vector offset(ncomp); -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(linop.Factory(0)); - if (factory) - { - const MultiFab& vfrac = factory->getVolFrac(); - for (int c = 0; c < ncomp; ++c) { - offset[c] = MultiFab::Dot(rhs[0], c, vfrac, 0, 1, 0, true) * volinv[0][0]; - } - } - else -#endif - { - for (int c = 0; c < ncomp; ++c) { - offset[c] = rhs[0].sum(c,true) * volinv[0][0]; - } - } - ParallelAllReduce::Sum(offset.data(), ncomp, ParallelContext::CommunicatorSub()); - if (verbose >= 4) { - for (int c = 0; c < ncomp; ++c) { - amrex::Print() << "MLMG: Subtracting " << offset[c] - << " from rhs component " << c << "\n"; - } - } - for (int alev = 0; alev < namrlevs; ++alev) { - for (int c = 0; c < ncomp; ++c) { - rhs[alev].plus(-offset[c], c, 1); - } -#ifdef AMREX_USE_EB - if (rhs[alev].hasEBFabFactory()) { - Vector val(ncomp, 0.0); - amrex::EB_set_covered(rhs[alev], 0, ncomp, val); - } -#endif + auto const& offset = linop.getSolvabilityOffset(0, 0, rhs[0]); + if (verbose >= 4) { + const int ncomp = offset.size(); + for (int c = 0; c < ncomp; ++c) { + amrex::Print() << "MLMG: Subtracting " << offset[c] << " from rhs component " + << c << "\n"; } } - else - { - AMREX_ASSERT_WITH_MESSAGE(ncomp==1, "ncomp > 1 not supported for singular nodal problem"); - Real offset = linop.getSolvabilityOffset(0, 0, rhs[0]); - if (verbose >= 4) { - amrex::Print() << "MLMG: Subtracting " << offset << " from rhs\n"; - } - for (int alev = 0; alev < namrlevs; ++alev) { - linop.fixSolvabilityByOffset(alev, 0, rhs[alev], offset); - } + for (int alev = 0; alev < namrlevs; ++alev) { + linop.fixSolvabilityByOffset(alev, 0, rhs[alev], offset); } } void -MLMG::makeSolvable (int amrlev, int mglev, MultiFab& mf) +MLMG::makeSolvable (int amrlev, int mglev, Any& mf) { - const int ncomp = linop.getNComp(); - - if (linop.isCellCentered()) - { - Vector offset(ncomp); -#ifdef AMREX_USE_EB - auto factory = dynamic_cast(linop.Factory(amrlev,mglev)); - if (factory) - { - const MultiFab& vfrac = factory->getVolFrac(); - for (int c = 0; c < ncomp; ++c) { - offset[c] = MultiFab::Dot(mf, c, vfrac, 0, 1, 0, true) * volinv[amrlev][mglev]; - } - } - else -#endif - { - for (int c = 0; c < ncomp; ++c) { - offset[c] = mf.sum(c,true) * volinv[amrlev][mglev]; - } - } - - ParallelAllReduce::Sum(offset.data(), ncomp, ParallelContext::CommunicatorSub()); - - if (verbose >= 4) { - for (int c = 0; c < ncomp; ++c) { - amrex::Print() << "MLMG: Subtracting " << offset[c] - << " from mf component c = " << c << "\n"; - } - } - + auto const& offset = linop.getSolvabilityOffset(amrlev, mglev, mf); + if (verbose >= 4) { + const int ncomp = offset.size(); for (int c = 0; c < ncomp; ++c) { - mf.plus(-offset[c], c, 1); + amrex::Print() << "MLMG: Subtracting " << offset[c] + << " from mf component c = " << c + << " on level (" << amrlev << ", " << mglev << ")\n"; } -#ifdef AMREX_USE_EB - if (mf.hasEBFabFactory()) { - Vector val(ncomp, 0.0); - amrex::EB_set_covered(mf, 0, ncomp, val); - } -#endif - } - else - { - AMREX_ASSERT_WITH_MESSAGE(ncomp==1, "ncomp > 1 not supported for singular nodal problem"); - Real offset = linop.getSolvabilityOffset(amrlev, mglev, mf); - if (verbose >= 4) { - amrex::Print() << "MLMG: Subtracting " << offset << " on level (" << amrlev << ", " - << mglev << ")\n"; - } - linop.fixSolvabilityByOffset(amrlev, mglev, mf, offset); } + linop.fixSolvabilityByOffset(amrlev, mglev, mf, offset); } #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) void -MLMG::bottomSolveWithHypre (MultiFab& x, const MultiFab& b) +MLMG::bottomSolveWithHypre (Any& a_x, const Any& a_b) { + AMREX_ASSERT(a_x.is()); + MultiFab& x = a_x.get(); + MultiFab const& b = a_b.get(); + const int amrlev = 0; const int mglev = linop.NMGLevels(amrlev) - 1; @@ -1905,18 +1397,21 @@ MLMG::bottomSolveWithHypre (MultiFab& x, const MultiFab& b) // For precision reasons we enforce that the average of the correction from hypre is 0 if (linop.isSingular(amrlev) && linop.getEnforceSingularSolvable()) { - makeSolvable(amrlev, mglev, x); + makeSolvable(amrlev, mglev, a_x); } } #endif void -MLMG::bottomSolveWithPETSc (MultiFab& x, const MultiFab& b) +MLMG::bottomSolveWithPETSc (Any& a_x, const Any& a_b) { #if !defined(AMREX_USE_PETSC) - amrex::ignore_unused(x,b); + amrex::ignore_unused(a_x,a_b); amrex::Abort("bottomSolveWithPETSc is called without building with PETSc"); #else + AMREX_ASSERT(a_x.is()); + MultiFab& x = a_x.get(); + MultiFab const& b = a_b.get(); const int ncomp = linop.getNComp(); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ncomp == 1, "bottomSolveWithPETSc doesn't work with ncomp > 1"); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H index affe4c73eaf..50f20e22915 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.H @@ -116,9 +116,11 @@ public : } virtual void getFluxes (const Vector& a_flux, const Vector& a_sol) const final override; - virtual void unimposeNeumannBC (int amrlev, MultiFab& rhs) const final override; - virtual Real getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const override; - virtual void fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const override; + virtual void unimposeNeumannBC (int amrlev, Any& rhs) const final override; + virtual Vector getSolvabilityOffset (int amrlev, int mglev, + Any const& rhs) const override; + virtual void fixSolvabilityByOffset (int amrlev, int mglev, Any& rhs, + Vector const& offset) const override; virtual void compGrad (int /*amrlev*/, const Array& /*grad*/, MultiFab& /*sol*/, Location /*loc*/) const final override { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp index 79358b58898..c0efaed25d6 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp @@ -150,13 +150,16 @@ MLNodeLaplacian::resizeMultiGrid (int new_size) } void -MLNodeLaplacian::unimposeNeumannBC (int amrlev, MultiFab& rhs) const +MLNodeLaplacian::unimposeNeumannBC (int amrlev, Any& a_rhs) const { if (m_coarsening_strategy == CoarseningStrategy::RAP) { const Box& nddom = amrex::surroundingNodes(Geom(amrlev).Domain()); const auto lobc = LoBC(); const auto hibc = HiBC(); + AMREX_ASSERT(a_rhs.is()); + MultiFab& rhs = a_rhs.get(); + MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.EnableTiling().SetDynamic(true); #ifdef AMREX_USE_OMP @@ -171,14 +174,17 @@ MLNodeLaplacian::unimposeNeumannBC (int amrlev, MultiFab& rhs) const } } -Real -MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const +Vector +MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, Any const& a_rhs) const { amrex::ignore_unused(amrlev); - AMREX_ASSERT(amrlev==0); - AMREX_ASSERT(mglev+1==m_num_mg_levels[0] || mglev==0); + AMREX_ASSERT(amrlev==0 && (mglev+1==m_num_mg_levels[0] || mglev==0)); + AMREX_ASSERT(getNComp() == 1); if (m_coarsening_strategy == CoarseningStrategy::RAP) { + AMREX_ASSERT(a_rhs.is()); + auto const& rhs = a_rhs.get(); + #ifdef AMREX_USE_EB auto factory = dynamic_cast(m_factory[amrlev][0].get()); if (mglev == 0 && factory && !factory->isAllRegular()) { @@ -229,7 +235,7 @@ MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rh Real s1 = amrex::get<0>(r); Real s2 = amrex::get<1>(r); ParallelAllReduce::Sum({s1,s2}, ParallelContext::CommunicatorSub()); - return s1/s2; + return {s1/s2}; } else #endif { @@ -279,16 +285,21 @@ MLNodeLaplacian::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rh Real s1 = amrex::get<0>(r); Real s2 = amrex::get<1>(r); ParallelAllReduce::Sum({s1,s2}, ParallelContext::CommunicatorSub()); - return s1/s2; + return {s1/s2}; } } else { - return MLNodeLinOp::getSolvabilityOffset(amrlev, mglev, rhs); + return MLNodeLinOp::getSolvabilityOffset(amrlev, mglev, a_rhs); } } void -MLNodeLaplacian::fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const +MLNodeLaplacian::fixSolvabilityByOffset (int amrlev, int mglev, Any& a_rhs, + Vector const& a_offset) const { + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + Real offset = a_offset[0]; + if (m_coarsening_strategy == CoarseningStrategy::RAP) { #ifdef AMREX_USE_EB auto factory = dynamic_cast(m_factory[amrlev][0].get()); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp index df5ab489d2f..339ca98e072 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp @@ -26,7 +26,11 @@ MLNodeLaplacian::averageDownCoeffs () { for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { +#if (AMREX_SPACEDIM == 1) + int ndims = 1; +#else int ndims = (m_use_harmonic_average || m_use_mapped) ? AMREX_SPACEDIM : 1; +#endif for (int idim = 0; idim < ndims; ++idim) { if (m_sigma[amrlev][mglev][idim] == nullptr) { @@ -101,7 +105,11 @@ MLNodeLaplacian::averageDownCoeffsSameAmrLevel (int amrlev) if (m_coarsening_strategy != CoarseningStrategy::Sigma) return; +#if (AMREX_SPACEDIM == 1) + const int nsigma = 1; +#else const int nsigma = (m_use_harmonic_average || m_use_mapped) ? AMREX_SPACEDIM : 1; +#endif for (int mglev = 1; mglev < m_num_mg_levels[amrlev]; ++mglev) { diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H index c46f4a250f2..1935be89f1d 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.H @@ -36,10 +36,6 @@ public: const Vector const*>& a_factory = {}, int a_eb_limit_coarsening = -1); - virtual void setLevelBC (int /*amrlev*/, const MultiFab* /*levelbcdata*/, - const MultiFab* = nullptr, const MultiFab* = nullptr, - const MultiFab* = nullptr) final override {} - virtual void apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc_mode, StateMode s_mode, const MLMGBndry* bndry=nullptr) const final override; @@ -59,20 +55,15 @@ public: amrex::Abort("AMReX_MLNodeLinOp::compGrad::How did we get here?"); } - virtual void applyMetricTerm (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/) const final override {} + virtual void applyMetricTerm (int /*amrlev*/, int /*mglev*/, Any& /*rhs*/) const final override {} virtual void unapplyMetricTerm (int /*amrlev*/, int /*mglev*/, MultiFab& /*rhs*/) const final override {} - virtual void fillSolutionBC (int /*amrlev*/, MultiFab& /*sol*/, - const MultiFab* /*crse_bcdata*/=nullptr) final override { - amrex::Abort("AMReX_MLNodeLinOp::fillSolutionBC::How did we get here?"); - } - - virtual void applyInhomogNeumannTerm (int amrlev, MultiFab& rhs) const override; + virtual Vector getSolvabilityOffset (int amrlev, int mglev, + Any const& rhs) const override; + virtual void fixSolvabilityByOffset (int amrlev, int mglev, Any& rhs, + Vector const& offset) const override; - virtual Real getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const override; - virtual void fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs, Real offset) const override; - - virtual void prepareForSolve () override {} + virtual void prepareForSolve () override; virtual bool isSingular (int amrlev) const override { return (amrlev == 0) ? m_is_bottom_singular : false; } @@ -86,7 +77,7 @@ public: virtual void Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& in) const = 0; virtual void Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab& rsh) const = 0; - virtual void nodalSync (int amrlev, int mglev, MultiFab& mf) const final override; + void nodalSync (int amrlev, int mglev, MultiFab& mf) const; virtual std::unique_ptr makeNLinOp (int /*grid_size*/) const final override { amrex::Abort("MLNodeLinOp::makeNLinOp: N-Solve not supported"); @@ -102,6 +93,19 @@ public: // omask is either 0 or 1. 1 means the node is an unknown. 0 means it's known. void setOversetMask (int amrlev, const iMultiFab& a_omask); + virtual void fixUpResidualMask (int /*amrlev*/, iMultiFab& /*resmsk*/) { } + + virtual Real AnyNormInfMask (int amrlev, Any const& a, bool local) const override; + + virtual void AnyAvgDownResAmr (int, Any&, Any const&) const final override { } + + virtual void AnyInterpolationAmr (int famrlev, Any& fine, const Any& crse, + IntVect const& nghost) const override; + + virtual void AnyAverageDownAndSync (Vector& sol) const override; + + virtual void interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const override; + #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) virtual std::unique_ptr makeHypreNodeLap( int bottom_verbose, @@ -139,6 +143,8 @@ protected: MultiFab m_bottom_dot_mask; MultiFab m_coarse_dot_mask; + Vector > m_norm_fine_mask; + #ifdef AMREX_USE_EB CoarseningStrategy m_coarsening_strategy = CoarseningStrategy::RAP; #else diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp index baf0f5edb42..b5173b71f5f 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #ifdef AMREX_USE_OMP @@ -83,6 +84,22 @@ MLNodeLinOp::define (const Vector& a_geom, m_has_fine_bndry[amrlev] = std::make_unique >(m_grids[amrlev][0], m_dmap[amrlev][0]); } + + m_norm_fine_mask.resize(m_num_amr_levels-1); + for (int amrlev = 0; amrlev < m_num_amr_levels-1; ++amrlev) { + m_norm_fine_mask[amrlev] = std::make_unique + (makeFineMask(amrex::convert(m_grids[amrlev][0], IntVect(1)), m_dmap[amrlev][0], + amrex::convert(m_grids[amrlev+1][0], IntVect(1)), + IntVect(m_amr_ref_ratio[amrlev]), 1, 0)); + } +} + +void +MLNodeLinOp::prepareForSolve () +{ + for (int amrlev = 0; amrlev < m_num_amr_levels-1; ++amrlev) { + fixUpResidualMask(amrlev, *m_norm_fine_mask[amrlev]); + } } std::unique_ptr @@ -177,17 +194,16 @@ MLNodeLinOp::xdoty (int amrlev, int mglev, const MultiFab& x, const MultiFab& y, return result; } -void -MLNodeLinOp::applyInhomogNeumannTerm (int /*amrlev*/, MultiFab& /*rhs*/) const -{ -} - -Real -MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) const +Vector +MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, Any const& a_rhs) const { amrex::ignore_unused(amrlev); - AMREX_ASSERT(amrlev==0); - AMREX_ASSERT(mglev+1==m_num_mg_levels[0] || mglev==0); + AMREX_ASSERT(amrlev==0 && (mglev+1==m_num_mg_levels[0] || mglev==0)); + AMREX_ASSERT(getNComp() == 1); + + AMREX_ASSERT(a_rhs.is()); + auto const& rhs = a_rhs.get(); + const auto& mask = (mglev+1 == m_num_mg_levels[0]) ? m_bottom_dot_mask : m_coarse_dot_mask; const auto& mask_ma = mask.const_arrays(); const auto& rhs_ma = rhs.const_arrays(); @@ -203,13 +219,16 @@ MLNodeLinOp::getSolvabilityOffset (int amrlev, int mglev, MultiFab const& rhs) c Real s1 = amrex::get<0>(r); Real s2 = amrex::get<1>(r); ParallelAllReduce::Sum({s1,s2}, ParallelContext::CommunicatorSub()); - return s1/s2; + return {s1/s2}; } void -MLNodeLinOp::fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, MultiFab& rhs, Real offset) const +MLNodeLinOp::fixSolvabilityByOffset (int /*amrlev*/, int /*mglev*/, Any& a_rhs, + Vector const& offset) const { - rhs.plus(-offset, 0, 1); + AMREX_ASSERT(a_rhs.is()); + auto& rhs = a_rhs.get(); + rhs.plus(-offset[0], 0, 1); } namespace { @@ -448,6 +467,119 @@ MLNodeLinOp::resizeMultiGrid (int new_size) MLLinOp::resizeMultiGrid(new_size); } +Real +MLNodeLinOp::AnyNormInfMask (int amrlev, Any const& a, bool local) const +{ + AMREX_ASSERT(a.is()); + auto& mf = a.get(); + + const int finest_level = NAMRLevels() - 1; + iMultiFab const* fine_mask = (amrlev == finest_level) + ? nullptr : m_norm_fine_mask[amrlev].get(); + return MFNormInf(mf, fine_mask, local); +} + +void +MLNodeLinOp::AnyInterpolationAmr (int famrlev, Any& a_fine, const Any& a_crse, + IntVect const& nghost) const +{ + AMREX_ASSERT(a_fine.is()); + MultiFab& fine = a_fine.get(); + MultiFab const& crse = a_crse.get(); + + const int ncomp = getNComp(); + const int refratio = AMRRefRatio(famrlev-1); + + AMREX_ALWAYS_ASSERT(refratio == 2 || refratio == 4); +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Box fbx = mfi.tilebox(); + fbx.grow(nghost); + Array4 const& ffab = fine.array(mfi); + Array4 const& cfab = crse.const_array(mfi); + + if (refratio == 2) { + AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, + { + mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); + }); + } else { + AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, + { + mlmg_lin_nd_interp_r4(i,j,k,n,ffab,cfab); + }); + } + } +} + +void +MLNodeLinOp::AnyAverageDownAndSync (Vector& sol) const +{ + AMREX_ASSERT(sol[0].is()); + + const int ncomp = getNComp(); + const int finest_amr_lev = NAMRLevels() - 1; + + nodalSync(finest_amr_lev, 0, sol[finest_amr_lev].get()); + + for (int falev = finest_amr_lev; falev > 0; --falev) + { + const auto& fmf = sol[falev ].get(); + auto& cmf = sol[falev-1].get(); + + auto rr = AMRRefRatio(falev-1); + MultiFab tmpmf(amrex::coarsen(fmf.boxArray(), rr), fmf.DistributionMap(), ncomp, 0); + amrex::average_down(fmf, tmpmf, 0, ncomp, rr); + cmf.ParallelCopy(tmpmf, 0, 0, ncomp); + nodalSync(falev-1, 0, cmf); + } +} + +void +MLNodeLinOp::interpAssign (int amrlev, int fmglev, MultiFab& fine, MultiFab& crse) const +{ + const int ncomp = getNComp(); + + const Geometry& crse_geom = Geom(amrlev,fmglev+1); + const IntVect refratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[fmglev]; + AMREX_ALWAYS_ASSERT(refratio == 2); + + MultiFab cfine; + const MultiFab* cmf; + + if (amrex::isMFIterSafe(crse, fine)) + { + crse.FillBoundary(crse_geom.periodicity()); + cmf = &crse; + } + else + { + BoxArray cba = fine.boxArray(); + cba.coarsen(refratio); + cfine.define(cba, fine.DistributionMap(), ncomp, 0); + cfine.ParallelCopy(crse, 0, 0, ncomp, 0, 0, crse_geom.periodicity()); + cmf = & cfine; + } + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(fine, TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& fbx = mfi.tilebox(); + Array4 const& ffab = fine.array(mfi); + Array4 const& cfab = cmf->const_array(mfi); + + AMREX_HOST_DEVICE_FOR_4D ( fbx, ncomp, i, j, k, n, + { + mlmg_lin_nd_interp_r2(i,j,k,n,ffab,cfab); + }); + } +} + #if defined(AMREX_USE_HYPRE) && (AMREX_SPACEDIM > 1) std::unique_ptr MLNodeLinOp::makeHypreNodeLap (int bottom_verbose, const std::string& options_namespace) const diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H index 81dd431d953..41f8fbf1cae 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.H @@ -70,6 +70,10 @@ public: virtual void copyNSolveSolution (MultiFab& dst, MultiFab const& src) const final override; + //! Compute dphi/dn on domain faces after the solver has converged. + void get_dpdn_on_domain_faces (Array const& dpdn, + MultiFab const& phi); + private: Vector m_is_singular; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp index ce27eb936fd..15ee75e961a 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLPoisson.cpp @@ -702,4 +702,63 @@ MLPoisson::copyNSolveSolution (MultiFab& dst, MultiFab const& src) const dst.ParallelCopy(src); } +void +MLPoisson::get_dpdn_on_domain_faces (Array const& dpdn, + MultiFab const& phi) +{ + BL_PROFILE("MLPoisson::dpdn_faces()"); + + // We do not need to call applyBC because this function is used by the + // OpenBC solver after solver has converged. That means the BC has been + // filled to check the residual. + + Box const& domain0 = m_geom[0][0].Domain(); + AMREX_D_TERM(const Real dxi = m_geom[0][0].InvCellSize(0);, + const Real dyi = m_geom[0][0].InvCellSize(1);, + const Real dzi = m_geom[0][0].InvCellSize(2);) + +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(phi); mfi.isValid(); ++mfi) + { + Box const& vbx = mfi.validbox(); + for (OrientationIter oit; oit.isValid(); ++oit) { + Orientation face = oit(); + if (vbx[face] == domain0[face]) { + int dir = face.coordDir(); + Array4 const& p = phi.const_array(mfi); + Array4 const& gp = dpdn[dir]->array(mfi); + Box const& b2d = amrex::bdryNode(vbx,face); + if (dir == 0) { + // because it's dphi/dn, not dphi/dx. + Real fac = dxi * (face.isLow() ? -1.0_rt : 1._rt); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(b2d, i, j, k, + { + gp(i,j,k) = fac * (p(i,j,k) - p(i-1,j,k)); + }); + } +#if (AMREX_SPACEDIM > 1) + else if (dir == 1) { + Real fac = dyi * (face.isLow() ? -1.0_rt : 1._rt); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(b2d, i, j, k, + { + gp(i,j,k) = fac * (p(i,j,k) - p(i,j-1,k)); + }); + } +#if (AMREX_SPACEDIM > 2) + else { + Real fac = dzi * (face.isLow() ? -1.0_rt : 1._rt); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(b2d, i, j, k, + { + gp(i,j,k) = fac * (p(i,j,k) - p(i,j,k-1)); + }); + } +#endif +#endif + } + } + } +} + } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp index d4e77f312dc..0750ffdd969 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp.cpp @@ -210,9 +210,16 @@ MLTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc if (mglev >= m_kappa[amrlev].size()) return; - applyBCTensor(amrlev, mglev, in, bc_mode, s_mode, bndry ); + applyBCTensor(amrlev, mglev, in, bc_mode, s_mode, bndry); + + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; + + Array4 foo; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); + const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); Array const& etamf = m_b_coeffs[amrlev][mglev]; Array const& kapmf = m_kappa[amrlev][mglev]; @@ -247,20 +254,65 @@ MLTensorOp::apply (int amrlev, int mglev, MultiFab& out, MultiFab& in, BCMode bc Array4 const fyfab = fluxfab_tmp[1].array();, Array4 const fzfab = fluxfab_tmp[2].array();); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); - } - , ybx, tybx, - { - mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); - } - , zbx, tzbx, - { - mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); - } - ); + if (domain.strictly_contains(bx)) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::low )].array(mfi) : foo; + const auto& bvylo = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::low )].array(mfi) : foo; + const auto& bvxhi = (bndry != nullptr) ? + (*bndry)[Orientation(0,Orientation::high)].array(mfi) : foo; + const auto& bvyhi = (bndry != nullptr) ? + (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::low )].array(mfi) : foo; + const auto& bvzhi = (bndry != nullptr) ? + (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; +#endif + + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv, + bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv, + bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv, + bvzlo, bvzhi, bct, dlo, dhi); + } + ); + } if (m_overset_mask[amrlev][mglev]) { const auto& osm = m_overset_mask[amrlev][mglev]->array(mfi); @@ -288,18 +340,18 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, #if (AMREX_SPACEDIM == 1) amrex::ignore_unused(amrlev,mglev,vel,bc_mode,bndry); #else + const int inhomog = bc_mode == BCMode::Inhomogeneous; const int imaxorder = maxorder; const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto& maskvals = m_maskvals[amrlev][mglev]; - FArrayBox foofab(Box::TheUnitBox(),3); - const auto& foo = foofab.array(); + Array4 foo; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); - - // Domain and coarse-fine boundaries are handled below. + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); MFItInfo mfi_info; if (Gpu::notInLaunchRegion()) mfi_info.SetDynamic(true); @@ -315,14 +367,13 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, const auto & bdlv = bcondloc.bndryLocs(mfi); const auto & bdcv = bcondloc.bndryConds(mfi); - GpuArray bct; - GpuArray bcl; - for (OrientationIter face; face; ++face) { - Orientation ori = face(); - const int iface = ori; - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori]; - bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori]; + Array2D bct; + Array2D bcl; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + bcl(ori,icomp) = bdlv[icomp][ori]; } } @@ -341,14 +392,13 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, (*bndry)[Orientation(1,Orientation::high)].array(mfi) : foo; #if (AMREX_SPACEDIM == 2) - AMREX_HOST_DEVICE_FOR_1D ( 4, icorner, { mltensor_fill_corners(icorner, vbx, velfab, mxlo, mylo, mxhi, myhi, bvxlo, bvylo, bvxhi, bvyhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); + dxinv, dlo, dhi); }); #else const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi); @@ -360,18 +410,40 @@ MLTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel, (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo; // only edge vals used in 3D stencil - AMREX_HOST_DEVICE_FOR_1D ( 12, iedge, +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) { + amrex::launch(12, 64, Gpu::gpuStream(), +#ifdef AMREX_USE_DPCPP + [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item) + { + int bid = item.get_group_linear_id(); + int tid = item.get_local_linear_id(); + int bdim = item.get_local_range(0); +#else + [=] AMREX_GPU_DEVICE () + { + int bid = blockIdx.x; + int tid = threadIdx.x; + int bdim = blockDim.x; +#endif + mltensor_fill_edges(bid, tid, bdim, vbx, velfab, + mxlo, mylo, mzlo, mxhi, myhi, mzhi, + bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, + bct, bcl, inhomog, imaxorder, + dxinv, dlo, dhi); + }); + } else +#endif { - mltensor_fill_edges(iedge, vbx, velfab, + mltensor_fill_edges(vbx, velfab, mxlo, mylo, mzlo, mxhi, myhi, mzhi, bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi, bct, bcl, inhomog, imaxorder, - dxinv, domain); - }); + dxinv, dlo, dhi); + } #endif } - // Notet that it is incorrect to call EnforcePeriodicity on vel. #endif } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp index 705f38052d1..d395ecdac13 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensorOp_grad.cpp @@ -16,9 +16,15 @@ MLTensorOp::compFlux (int amrlev, const Array& fluxes, const int ncomp = getNComp(); MLABecLaplacian::compFlux(amrlev, fluxes, sol, loc); - applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, m_bndry_sol[amrlev].get()); + MLMGBndry const* bndry = m_bndry_sol[amrlev].get(); + applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, bndry); + + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); + const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); Array const& etamf = m_b_coeffs[amrlev][mglev]; Array const& kapmf = m_kappa[amrlev][mglev]; @@ -52,20 +58,59 @@ MLTensorOp::compFlux (int amrlev, const Array& fluxes, Array4 const fyfab = fluxfab_tmp[1].array();, Array4 const fzfab = fluxfab_tmp[2].array();); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM - ( xbx, txbx, - { - mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); - } - , ybx, tybx, - { - mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); - } - , zbx, tzbx, - { - mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); - } - ); + if (domain.strictly_contains(mfi.tilebox())) { + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv); + } + ); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } + } + + const auto& bvxlo = (*bndry)[Orientation(0,Orientation::low )].array(mfi); + const auto& bvylo = (*bndry)[Orientation(1,Orientation::low )].array(mfi); + const auto& bvxhi = (*bndry)[Orientation(0,Orientation::high)].array(mfi); + const auto& bvyhi = (*bndry)[Orientation(1,Orientation::high)].array(mfi); +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (*bndry)[Orientation(2,Orientation::low )].array(mfi); + const auto& bvzhi = (*bndry)[Orientation(2,Orientation::high)].array(mfi); +#endif + + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_cross_terms_fx(txbx,fxfab,vfab,etaxfab,kapxfab,dxinv, + bvxlo, bvxhi, bct, dlo, dhi); + } + , ybx, tybx, + { + mltensor_cross_terms_fy(tybx,fyfab,vfab,etayfab,kapyfab,dxinv, + bvylo, bvyhi, bct, dlo, dhi); + } + , zbx, tzbx, + { + mltensor_cross_terms_fz(tzbx,fzfab,vfab,etazfab,kapzfab,dxinv, + bvzlo, bvzhi, bct, dlo, dhi); + } + ); + } for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { const Box& nbx = mfi.nodaltilebox(idim); @@ -95,33 +140,36 @@ MLTensorOp::compVelGrad (int amrlev, const Array& flux const int mglev = 0; - applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, m_bndry_sol[amrlev].get()); + MLMGBndry const* bndry = m_bndry_sol[amrlev].get(); + applyBC(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, bndry); + applyBCTensor(amrlev, mglev, sol, BCMode::Inhomogeneous, StateMode::Solution, bndry); + + const auto& bcondloc = *m_bcondloc[amrlev][mglev]; const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray(); - const int dim_fluxes = AMREX_SPACEDIM*AMREX_SPACEDIM; + const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1); + const auto dlo = amrex::lbound(domain); + const auto dhi = amrex::ubound(domain); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif + for (MFIter mfi(sol, TilingIfNotGPU()); mfi.isValid(); ++mfi) { - Array fluxfab_tmp; + Array4 const vfab = sol.const_array(mfi); + AMREX_D_TERM(Box const xbx = mfi.nodaltilebox(0);, + Box const ybx = mfi.nodaltilebox(1);, + Box const zbx = mfi.nodaltilebox(2);) + AMREX_D_TERM(Array4 const fxfab = fluxes[0]->array(mfi);, + Array4 const fyfab = fluxes[1]->array(mfi);, + Array4 const fzfab = fluxes[2]->array(mfi);) - for (MFIter mfi(sol, TilingIfNotGPU()); mfi.isValid(); ++mfi) - { - Array4 const vfab = sol.const_array(mfi); - AMREX_D_TERM(Box const xbx = mfi.nodaltilebox(0);, - Box const ybx = mfi.nodaltilebox(1);, - Box const zbx = mfi.nodaltilebox(2);); - AMREX_D_TERM(fluxfab_tmp[0].resize(xbx,dim_fluxes);, - fluxfab_tmp[1].resize(ybx,dim_fluxes);, - fluxfab_tmp[2].resize(zbx,dim_fluxes);); - AMREX_D_TERM(Elixir fxeli = fluxfab_tmp[0].elixir();, - Elixir fyeli = fluxfab_tmp[1].elixir();, - Elixir fzeli = fluxfab_tmp[2].elixir();); - AMREX_D_TERM(Array4 const fxfab = fluxfab_tmp[0].array();, - Array4 const fyfab = fluxfab_tmp[1].array();, - Array4 const fzfab = fluxfab_tmp[2].array();); +// The derivatives are put in the array with the following order: +// component: 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 +// in 2D: dU/dx, dV/dx, dU/dy, dV/dy +// in 3D: dU/dx, dV/dx, dW/dx, dU/dy, dV/dy, dW/dy, dU/dz, dV/dz, dW/dz + if (domain.strictly_contains(mfi.tilebox())) { AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM ( xbx, txbx, { @@ -136,23 +184,39 @@ MLTensorOp::compVelGrad (int amrlev, const Array& flux mltensor_vel_grads_fz(tzbx,fzfab,vfab,dxinv); } ); - -// The derivatives are put in the array with the following order: -// component: 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 -// in 2D: dU/dx, dV/dx, dU/dy, dV/dy -// in 3D: dU/dx, dV/dx, dW/dx, dU/dy, dV/dy, dW/dy, dU/dz, dV/dz, dW/dz - - - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - const Box& nbx = mfi.nodaltilebox(idim); - Array4 dst = fluxes[idim]->array(mfi); - Array4 src = fluxfab_tmp[idim].const_array(); - AMREX_HOST_DEVICE_PARALLEL_FOR_4D (nbx, dim_fluxes, i, j, k, n, - { - dst(i,j,k,n) = src(i,j,k,n); - }); + } else { + const auto & bdcv = bcondloc.bndryConds(mfi); + + Array2D bct; + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + for (OrientationIter face; face; ++face) { + Orientation ori = face(); + bct(ori,icomp) = bdcv[icomp][ori]; + } } + const auto& bvxlo = (*bndry)[Orientation(0,Orientation::low )].array(mfi); + const auto& bvylo = (*bndry)[Orientation(1,Orientation::low )].array(mfi); + const auto& bvxhi = (*bndry)[Orientation(0,Orientation::high)].array(mfi); + const auto& bvyhi = (*bndry)[Orientation(1,Orientation::high)].array(mfi); +#if (AMREX_SPACEDIM == 3) + const auto& bvzlo = (*bndry)[Orientation(2,Orientation::low )].array(mfi); + const auto& bvzhi = (*bndry)[Orientation(2,Orientation::high)].array(mfi); +#endif + AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM + ( xbx, txbx, + { + mltensor_vel_grads_fx(txbx,fxfab,vfab,dxinv,bvxlo,bvxhi,bct,dlo,dhi); + } + , ybx, tybx, + { + mltensor_vel_grads_fy(tybx,fyfab,vfab,dxinv,bvylo,bvyhi,bct,dlo,dhi); + } + , zbx, tzbx, + { + mltensor_vel_grads_fz(tzbx,fzfab,vfab,dxinv,bvzlo,bvzhi,bct,dlo,dhi); + } + ); } } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H index 8f10f08ec58..a40fa4611a8 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensor_2D_K.H @@ -17,110 +17,168 @@ void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box Array4 const& bcvalylo, Array4 const& bcvalxhi, Array4 const& bcvalyhi, - GpuArray const& bct, - GpuArray const& bcl, + Array2D const& bct, + Array2D const& bcl, int inhomog, int maxorder, - GpuArray const& dxinv, Box const& domain) noexcept + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept { - constexpr int oxlo = 0; - constexpr int oylo = 1; - constexpr int oxhi = 2; - constexpr int oyhi = 3; - constexpr int xdir = 0; - constexpr int ydir = 1; + constexpr int k = 0; const auto blen = amrex::length(vbox); const auto vlo = amrex::lbound(vbox); const auto vhi = amrex::ubound(vbox); - const auto dlo = amrex::lbound(domain); - const auto dhi = amrex::ubound(domain); - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - switch (icorner) { - case 0: { - // xlo & ylo - if (mxlo(vlo.x-1,vlo.y-1,0) != BndryData::covered) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1); - if (vlo.x == dlo.x && vlo.y == dlo.y) { - vel(vlo.x-1,vlo.y-1,0,icomp) = vel(vlo.x-1,vlo.y,0,icomp) - + vel(vlo.x,vlo.y-1,0,icomp) - vel(vlo.x,vlo.y,0,icomp); - } else if (vlo.x == dlo.x || mylo(vlo.x,vlo.y-1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); + if (icorner == 0) { // xlo & ylo + int const i = vlo.x-1; + int const j = vlo.y-1; + if (mxlo(i,j,k) != BndryData::covered && (dlo.x != vlo.x || dlo.y != vlo.y)) { + bool x_interior = mylo(i+1,j ,k) == BndryData::covered; // i+1,j is a valid cell inside domain + bool x_exterior = mylo(i+1,j ,k) == BndryData::not_covered; // i+1,j is a ghost cell inside domain + bool y_interior = mxlo(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dlo.x == vlo.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dlo.y == vlo.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); } } - break; } - case 1: { - // xhi & ylo - if (mxhi(vhi.x+1,vlo.y-1,0) != BndryData::covered) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1); - if (vhi.x == dhi.x && vlo.y == dlo.y) { - vel(vhi.x+1,vlo.y-1,0,icomp) = vel(vhi.x+1,vlo.y,0,icomp) - + vel(vhi.x,vlo.y-1,0,icomp) - vel(vhi.x,vlo.y,0,icomp); - } else if (vhi.x == dhi.x || mylo(vhi.x,vlo.y-1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); + } else if (icorner == 1) { // xhi & ylo + int const i = vhi.x+1; + int const j = vlo.y-1; + if (mxhi(i,j,k) != BndryData::covered && (dhi.x != vhi.x || dlo.y != vlo.y)) { + bool x_interior = mylo(i-1,j ,k) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dhi.x == vhi.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dlo.y == vlo.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); } } - break; } - case 2: { - // xlo & yhi - if (mxlo(vlo.x-1,vhi.y+1,0) != BndryData::covered) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1); - if (vlo.x == dlo.x && vhi.y == dhi.y) { - vel(vlo.x-1,vhi.y+1,0,icomp) = vel(vlo.x-1,vhi.y,0,icomp) - + vel(vlo.x,vhi.y+1,0,icomp) - vel(vlo.x,vhi.y,0,icomp); - } else if (vlo.x == dlo.x || myhi(vlo.x,vhi.y+1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); + } else if (icorner == 2) { // xlo & yhi + int const i = vlo.x-1; + int const j = vhi.y+1; + if (mxlo(i,j,k) != BndryData::covered && (dlo.x != vlo.x || dhi.y != vhi.y)) { + bool x_interior = myhi(i+1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dlo.x == vlo.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dhi.y == vhi.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); } } - break; } - case 3: { - // xhi & yhi - if (mxhi(vhi.x+1,vhi.y+1,0) != BndryData::covered) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1); - if (vhi.x == dhi.x && vhi.y == dhi.y) { - vel(vhi.x+1,vhi.y+1,0,icomp) = vel(vhi.x+1,vhi.y,0,icomp) - + vel(vhi.x,vhi.y+1,0,icomp) - vel(vhi.x,vhi.y,0,icomp); - } else if (vhi.x == dhi.x || myhi(vhi.x,vhi.y+1,0) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); + } else if (icorner == 3) { // xhi & yhi + int const i = vhi.x+1; + int const j = vhi.y+1; + if (mxhi(i,j,k) != BndryData::covered && (dhi.x != vhi.x || dhi.y != vhi.y)) { + bool x_interior = myhi(i-1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || dhi.x == vhi.x) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || dhi.y == vhi.y) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); } } - break; - } - default: {} } } } @@ -137,11 +195,12 @@ void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, const auto hi = amrex::ubound(box); constexpr Real twoThirds = Real(2./3.); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudy = (vel(i,j+1,0,0)+vel(i-1,j+1,0,0)-vel(i,j-1,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,0,1)+vel(i-1,j+1,0,1)-vel(i,j-1,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); Real divu = dvdy; Real xif = kapx(i,j,0); Real mun = Real(0.75)*(etax(i,j,0,0)-xif); // restore the original eta @@ -164,11 +223,80 @@ void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, const auto hi = amrex::ubound(box); constexpr Real twoThirds = Real(2./3.); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,0,0)+vel(i+1,j-1,0,0)-vel(i-1,j,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,0,1)+vel(i+1,j-1,0,1)-vel(i-1,j,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real divu = dudx; + Real xif = kapy(i,j,0); + Real mun = Real(0.75)*(etay(i,j,0,1)-xif); // restore the original eta + Real mut = etay(i,j,0,0); + fy(i,j,0,0) = -mut*dvdx; + fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + // Three BC types: reflect odd, neumann, and dirichlet + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real divu = dvdy; + Real xif = kapx(i,j,0); + Real mun = Real(0.75)*(etax(i,j,0,0)-xif); // restore the original eta + Real mut = etax(i,j,0,1); + fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,0,1) = -mut*dudy; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); Real divu = dudx; Real xif = kapy(i,j,0); Real mun = Real(0.75)*(etay(i,j,0,1)-xif); // restore the original eta @@ -241,13 +369,14 @@ void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { Real dudx = (vel(i,j,0,0) - vel(i-1,j,0,0))*dxi; Real dvdx = (vel(i,j,0,1) - vel(i-1,j,0,1))*dxi; - Real dudy = (vel(i,j+1,0,0)+vel(i-1,j+1,0,0)-vel(i,j-1,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,0,1)+vel(i-1,j+1,0,1)-vel(i,j-1,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); fx(i,j,0,0) = dudx; fx(i,j,0,1) = dvdx; fx(i,j,0,2) = dudy; @@ -266,11 +395,74 @@ void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, const auto lo = amrex::lbound(box); const auto hi = amrex::ubound(box); + int k = 0; for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,0,0)+vel(i+1,j-1,0,0)-vel(i-1,j,0,0)-vel(i-1,j-1,0,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,0,1)+vel(i+1,j-1,0,1)-vel(i-1,j,0,1)-vel(i-1,j-1,0,1))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real dudy = (vel(i,j,0,0) - vel(i,j-1,0,0))*dyi; + Real dvdy = (vel(i,j,0,1) - vel(i,j-1,0,1))*dyi; + fy(i,j,0,0) = dudx; + fy(i,j,0,1) = dvdx; + fy(i,j,0,2) = dudy; + fy(i,j,0,3) = dvdy; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = (vel(i,j,0,0) - vel(i-1,j,0,0))*dxi; + Real dvdx = (vel(i,j,0,1) - vel(i-1,j,0,1))*dxi; + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + fx(i,j,0,0) = dudx; + fx(i,j,0,1) = dvdx; + fx(i,j,0,2) = dudy; + fx(i,j,0,3) = dvdy; + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + int k = 0; + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); Real dudy = (vel(i,j,0,0) - vel(i,j-1,0,0))*dyi; Real dvdy = (vel(i,j,0,1) - vel(i,j-1,0,1))*dyi; fy(i,j,0,0) = dudx; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H index a4a4c7df9ef..a5de05a385e 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensor_3D_K.H @@ -6,6 +6,643 @@ namespace amrex { +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_ylo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mylo, + Array4 const& bcvalxlo, + Array4 const& bcvalylo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool ylo_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !ylo_domain)) { + bool x_interior = mylo(i+1,j ,k) == BndryData::covered; + bool x_exterior = mylo(i+1,j ,k) == BndryData::not_covered; + bool y_interior = mxlo(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_ylo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& mylo, + Array4 const& bcvalxhi, + Array4 const& bcvalylo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool ylo_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !ylo_domain)) { + bool x_interior = mylo(i-1,j ,k) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_yhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& myhi, + Array4 const& bcvalxlo, + Array4 const& bcvalyhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool yhi_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !yhi_domain)) { + bool x_interior = myhi(i+1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_yhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& myhi, + Array4 const& bcvalxhi, + Array4 const& bcvalyhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool yhi_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !yhi_domain)) { + bool x_interior = myhi(i-1,j ,k) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k) == BndryData::not_covered; + if ((x_interior && y_interior) || (x_exterior && y_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mzlo, + Array4 const& bcvalxlo, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool zlo_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !zlo_domain)) { + bool x_interior = mzlo(i+1,j,k ) == BndryData::covered; + bool x_exterior = mzlo(i+1,j,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j,k+1) == BndryData::covered; + bool z_exterior = mxlo(i ,j,k+1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& mzlo, + Array4 const& bcvalxhi, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool zlo_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !zlo_domain)) { + bool x_interior = mzlo(i-1,j,k ) == BndryData::covered; + bool x_exterior = mzlo(i-1,j,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j,k+1) == BndryData::covered; + bool z_exterior = mxhi(i ,j,k+1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xlo_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mzhi, + Array4 const& bcvalxlo, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xlo_domain, bool zhi_domain) noexcept +{ + if (mxlo(i,j,k) != BndryData::covered && (!xlo_domain || !zhi_domain)) { + bool x_interior = mzhi(i+1,j,k ) == BndryData::covered; + bool x_exterior = mzhi(i+1,j,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j,k-1) == BndryData::covered; + bool z_exterior = mxlo(i ,j,k-1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_xhi_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mxhi, + Array4 const& mzhi, + Array4 const& bcvalxhi, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool xhi_domain, bool zhi_domain) noexcept +{ + if (mxhi(i,j,k) != BndryData::covered && (!xhi_domain || !zhi_domain)) { + bool x_interior = mzhi(i-1,j,k ) == BndryData::covered; + bool x_exterior = mzhi(i-1,j,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j,k-1) == BndryData::covered; + bool z_exterior = mxhi(i ,j,k-1) == BndryData::not_covered; + if ((x_interior && z_interior) || (x_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (x_interior || xhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_ylo_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mylo, + Array4 const& mzlo, + Array4 const& bcvalylo, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool ylo_domain, bool zlo_domain) noexcept +{ + if (mylo(i,j,k) != BndryData::covered && (!ylo_domain || !zlo_domain)) { + bool y_interior = mzlo(i,j+1,k ) == BndryData::covered; + bool y_exterior = mzlo(i,j+1,k ) == BndryData::not_covered; + bool z_interior = mylo(i,j ,k+1) == BndryData::covered; + bool z_exterior = mylo(i,j ,k+1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_yhi_zlo (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& myhi, + Array4 const& mzlo, + Array4 const& bcvalyhi, + Array4 const& bcvalzlo, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool yhi_domain, bool zlo_domain) noexcept +{ + if (myhi(i,j,k) != BndryData::covered && (!yhi_domain || !zlo_domain)) { + bool y_interior = mzlo(i,j-1,k ) == BndryData::covered; + bool y_exterior = mzlo(i,j-1,k ) == BndryData::not_covered; + bool z_interior = myhi(i,j ,k+1) == BndryData::covered; + bool z_exterior = myhi(i,j ,k+1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zlo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_ylo_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& mylo, + Array4 const& mzhi, + Array4 const& bcvalylo, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool ylo_domain, bool zhi_domain) noexcept +{ + if (mylo(i,j,k) != BndryData::covered && (!ylo_domain || !zhi_domain)) { + bool y_interior = mzhi(i,j+1,k ) == BndryData::covered; + bool y_exterior = mzhi(i,j+1,k ) == BndryData::not_covered; + bool z_interior = mylo(i,j ,k-1) == BndryData::covered; + bool z_exterior = mylo(i,j ,k-1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || ylo_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges_yhi_zhi (int const i, int const j, int const k, Dim3 const& blen, + Array4 const& vel, + Array4 const& myhi, + Array4 const& mzhi, + Array4 const& bcvalyhi, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + bool yhi_domain, bool zhi_domain) noexcept +{ + if (myhi(i,j,k) != BndryData::covered && (!yhi_domain || !zhi_domain)) { + bool y_interior = mzhi(i,j-1,k ) == BndryData::covered; + bool y_exterior = mzhi(i,j-1,k ) == BndryData::not_covered; + bool z_interior = myhi(i,j ,k-1) == BndryData::covered; + bool z_exterior = myhi(i,j ,k-1) == BndryData::not_covered; + if ((y_interior && z_interior) || (y_exterior && z_exterior)) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } + } else if (y_interior || yhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } + } else if (z_interior || zhi_domain) { + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + } + } + } +} + +#ifdef AMREX_USE_EB AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box Array4 const& vel, @@ -21,495 +658,680 @@ void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box Array4 const& bcvalxhi, Array4 const& bcvalyhi, Array4 const& bcvalzhi, - GpuArray const& bct, - GpuArray const& bcl, + Array2D const& bct, + Array2D const& bcl, int inhomog, int maxorder, - GpuArray const& dxinv, Box const& domain) noexcept + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept { - constexpr int oxlo = 0; - constexpr int oylo = 1; - constexpr int ozlo = 2; - constexpr int oxhi = 3; - constexpr int oyhi = 4; - constexpr int ozhi = 5; - constexpr int xdir = 0; - constexpr int ydir = 1; - constexpr int zdir = 2; const auto blen = amrex::length(vbox); const auto vlo = amrex::lbound(vbox); const auto vhi = amrex::ubound(vbox); - const auto dlo = amrex::lbound(domain); - const auto dhi = amrex::ubound(domain); + bool xlo_domain = (vlo.x == dlo.x); + bool ylo_domain = (vlo.y == dlo.y); + bool zlo_domain = (vlo.z == dlo.z); + bool xhi_domain = (vhi.x == dhi.x); + bool yhi_domain = (vhi.y == dhi.y); + bool zhi_domain = (vhi.z == dhi.z); + for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { switch (icorner) { case 0: { // xlo & ylo & zlo - Box bx = amrex::adjCellLo(amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y ,vlo.z ,icomp) - + vel(vlo.x ,vlo.y-1,vlo.z ,icomp) - + vel(vlo.x ,vlo.y ,vlo.z-1,icomp) - - vel(vlo.x ,vlo.y ,vlo.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vlo.y == dlo.y) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y ,vlo.z-1,icomp) - + vel(vlo.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vlo.x ,vlo.y ,vlo.z-1,icomp); - } else if (vlo.x == dlo.x && vlo.z == dlo.z) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y-1,vlo.z ,icomp) - + vel(vlo.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vlo.x ,vlo.y-1,vlo.z ,icomp); - } else if (vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vlo.x-1,vlo.y-1,vlo.z-1,icomp) - = vel(vlo.x-1,vlo.y-1,vlo.z ,icomp) - + vel(vlo.x-1,vlo.y ,vlo.z-1,icomp) - - vel(vlo.x-1,vlo.y ,vlo.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y-1,vlo.z-1) != BndryData::covered) { - if (mylo(vlo.x,vlo.y-1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vlo.y-1; + int k = vlo.z-1; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !ylo_domain || !zlo_domain)) { + bool x_interior = mylo(i+1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k+1) == BndryData::not_covered; + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 1: { // xhi & ylo & zlo - Box bx = amrex::adjCellLo(amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y ,vlo.z ,icomp) - + vel(vhi.x ,vlo.y-1,vlo.z ,icomp) - + vel(vhi.x ,vlo.y ,vlo.z-1,icomp) - - vel(vhi.x ,vlo.y ,vlo.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vlo.y == dlo.y) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y ,vlo.z-1,icomp) - + vel(vhi.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vhi.x ,vlo.y ,vlo.z-1,icomp); - } else if (vhi.x == dhi.x && vlo.z == dlo.z) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y-1,vlo.z ,icomp) - + vel(vhi.x ,vlo.y-1,vlo.z-1,icomp) - - vel(vhi.x ,vlo.y-1,vlo.z ,icomp); - } else if (vlo.y == dlo.y && vlo.z == dlo.z) { - vel (vhi.x+1,vlo.y-1,vlo.z-1,icomp) - = vel(vhi.x+1,vlo.y-1,vlo.z ,icomp) - + vel(vhi.x+1,vlo.y ,vlo.z-1,icomp) - - vel(vhi.x+1,vlo.y ,vlo.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y-1,vlo.z-1) != BndryData::covered) { - if (mylo(vhi.x,vlo.y-1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vlo.y-1; + int k = vlo.z-1; + bool x_interior = mylo(i-1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k+1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !ylo_domain || !zlo_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 2: { // xlo & yhi & zlo - Box bx = amrex::adjCellLo(amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y ,vlo.z ,icomp) - + vel(vlo.x ,vhi.y+1,vlo.z ,icomp) - + vel(vlo.x ,vhi.y ,vlo.z-1,icomp) - - vel(vlo.x ,vhi.y ,vlo.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vhi.y == dhi.y) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y ,vlo.z-1,icomp) - + vel(vlo.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vlo.x ,vhi.y ,vlo.z-1,icomp); - } else if (vlo.x == dlo.x && vlo.z == dlo.z) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y+1,vlo.z ,icomp) - + vel(vlo.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vlo.x ,vhi.y+1,vlo.z ,icomp); - } else if (vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vlo.x-1,vhi.y+1,vlo.z-1,icomp) - = vel(vlo.x-1,vhi.y+1,vlo.z ,icomp) - + vel(vlo.x-1,vhi.y ,vlo.z-1,icomp) - - vel(vlo.x-1,vhi.y ,vlo.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y+1,vlo.z-1) != BndryData::covered) { - if (myhi(vlo.x,vhi.y+1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vhi.y+1; + int k = vlo.z-1; + bool x_interior = myhi(i+1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k+1) == BndryData::not_covered; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !yhi_domain || !zlo_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 3: { // xhi & yhi & zlo - Box bx = amrex::adjCellLo(amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y ,vlo.z ,icomp) - + vel(vhi.x ,vhi.y+1,vlo.z ,icomp) - + vel(vhi.x ,vhi.y ,vlo.z-1,icomp) - - vel(vhi.x ,vhi.y ,vlo.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vhi.y == dhi.y) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y ,vlo.z-1,icomp) - + vel(vhi.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vhi.x ,vhi.y ,vlo.z-1,icomp); - } else if (vhi.x == dhi.x && vlo.z == dlo.z) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y+1,vlo.z ,icomp) - + vel(vhi.x ,vhi.y+1,vlo.z-1,icomp) - - vel(vhi.x ,vhi.y+1,vlo.z ,icomp); - } else if (vhi.y == dhi.y && vlo.z == dlo.z) { - vel (vhi.x+1,vhi.y+1,vlo.z-1,icomp) - = vel(vhi.x+1,vhi.y+1,vlo.z ,icomp) - + vel(vhi.x+1,vhi.y ,vlo.z-1,icomp) - - vel(vhi.x+1,vhi.y ,vlo.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y+1,vlo.z-1) != BndryData::covered) { - if (myhi(vhi.x,vhi.y+1,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vhi.y+1; + int k = vlo.z-1; + bool x_interior = myhi(i-1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k+1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k+1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !yhi_domain || !zlo_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::low, i,j,k, blen.z, vel, mzlo, + bct(Orientation::zlo(), icomp), + bcl(Orientation::zlo(), icomp), + bcvalzlo, maxorder, dxinv[2], inhomog, icomp); } } break; } case 4: { // xlo & ylo & zhi - Box bx = amrex::adjCellHi(amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y , vhi.z ,icomp) - + vel(vlo.x , vlo.y-1, vhi.z ,icomp) - + vel(vlo.x , vlo.y , vhi.z+1,icomp) - - vel(vlo.x , vlo.y , vhi.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vlo.y == dlo.y) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y , vhi.z+1,icomp) - + vel(vlo.x , vlo.y-1, vhi.z+1,icomp) - - vel(vlo.x , vlo.y , vhi.z+1,icomp); - } else if (vlo.x == dlo.x && vhi.z == dhi.z) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y-1, vhi.z ,icomp) - + vel(vlo.x , vlo.y-1, vhi.z+1,icomp) - - vel(vlo.x , vlo.y-1, vhi.z ,icomp); - } else if (vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vlo.x-1, vlo.y-1, vhi.z+1,icomp) - = vel(vlo.x-1, vlo.y-1, vhi.z ,icomp) - + vel(vlo.x-1, vlo.y , vhi.z+1,icomp) - - vel(vlo.x-1, vlo.y , vhi.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y-1,vhi.z+1) != BndryData::covered) { - if (mylo(vlo.x,vlo.y-1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vlo.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vlo.y-1; + int k = vhi.z+1; + bool x_interior = mylo(i+1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k-1) == BndryData::not_covered; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !ylo_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; } case 5: { // xhi & ylo & zhi - Box bx = amrex::adjCellHi(amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y ,vhi.z ,icomp) - + vel(vhi.x ,vlo.y-1,vhi.z ,icomp) - + vel(vhi.x ,vlo.y ,vhi.z+1,icomp) - - vel(vhi.x ,vlo.y ,vhi.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vlo.y == dlo.y) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y ,vhi.z+1,icomp) - + vel(vhi.x ,vlo.y-1,vhi.z+1,icomp) - - vel(vhi.x ,vlo.y ,vhi.z+1,icomp); - } else if (vhi.x == dhi.x && vhi.z == dhi.z) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y-1,vhi.z ,icomp) - + vel(vhi.x ,vlo.y-1,vhi.z+1,icomp) - - vel(vhi.x ,vlo.y-1,vhi.z ,icomp); - } else if (vlo.y == dlo.y && vhi.z == dhi.z) { - vel (vhi.x+1,vlo.y-1,vhi.z+1,icomp) - = vel(vhi.x+1,vlo.y-1,vhi.z ,icomp) - + vel(vhi.x+1,vlo.y ,vhi.z+1,icomp) - - vel(vhi.x+1,vlo.y ,vhi.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y-1,vhi.z+1) != BndryData::covered) { - if (mylo(vhi.x,vlo.y-1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vlo.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vlo.y-1; + int k = vhi.z+1; + bool x_interior = mylo(i-1,j ,k ) == BndryData::covered; + bool x_exterior = mylo(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j+1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j+1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k-1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !ylo_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::low, i,j,k, blen.y, vel, mylo, + bct(Orientation::ylo(), icomp), + bcl(Orientation::ylo(), icomp), + bcvalylo, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; } case 6: { // xlo & yhi & zhi - Box bx = amrex::adjCellHi(amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1),zdir,1); - if (vlo.x == dlo.x && vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y ,vhi.z ,icomp) - + vel(vlo.x ,vhi.y+1,vhi.z ,icomp) - + vel(vlo.x ,vhi.y ,vhi.z+1,icomp) - - vel(vlo.x ,vhi.y ,vhi.z ,icomp) * Real(2.0); - } else if (vlo.x == dlo.x && vhi.y == dhi.y) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y ,vhi.z+1,icomp) - + vel(vlo.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vlo.x ,vhi.y ,vhi.z+1,icomp); - } else if (vlo.x == dlo.x && vhi.z == dhi.z) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y+1,vhi.z ,icomp) - + vel(vlo.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vlo.x ,vhi.y+1,vhi.z ,icomp); - } else if (vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vlo.x-1,vhi.y+1,vhi.z+1,icomp) - = vel(vlo.x-1,vhi.y+1,vhi.z ,icomp) - + vel(vlo.x-1,vhi.y ,vhi.z+1,icomp) - - vel(vlo.x-1,vhi.y ,vhi.z ,icomp); - } else if (vlo.x == dlo.x) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y+1,vhi.z+1) != BndryData::covered) { - if (myhi(vlo.x,vhi.y+1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxlo(vlo.x-1,vhi.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vlo.x-1; + int j = vhi.y+1; + int k = vhi.z+1; + bool x_interior = myhi(i+1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i+1,j ,k ) == BndryData::not_covered; + bool y_interior = mxlo(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxlo(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxlo(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxlo(i ,j ,k-1) == BndryData::not_covered; + if (mxlo(i,j,k) != BndryData::covered && + (!xlo_domain || !yhi_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::low, i,j,k, blen.x, vel, mxlo, + bct(Orientation::xlo(), icomp), + bcl(Orientation::xlo(), icomp), + bcvalxlo, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; } case 7: { // xhi & yhi & zhi - Box bx = amrex::adjCellHi(amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1),zdir,1); - if (vhi.x == dhi.x && vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y ,vhi.z ,icomp) - + vel(vhi.x ,vhi.y+1,vhi.z ,icomp) - + vel(vhi.x ,vhi.y ,vhi.z+1,icomp) - - vel(vhi.x ,vhi.y ,vhi.z ,icomp) * Real(2.0); - } else if (vhi.x == dhi.x && vhi.y == dhi.y) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y ,vhi.z+1,icomp) - + vel(vhi.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vhi.x ,vhi.y ,vhi.z+1,icomp); - } else if (vhi.x == dhi.x && vhi.z == dhi.z) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y+1,vhi.z ,icomp) - + vel(vhi.x ,vhi.y+1,vhi.z+1,icomp) - - vel(vhi.x ,vhi.y+1,vhi.z ,icomp); - } else if (vhi.y == dhi.y && vhi.z == dhi.z) { - vel (vhi.x+1,vhi.y+1,vhi.z+1,icomp) - = vel(vhi.x+1,vhi.y+1,vhi.z ,icomp) - + vel(vhi.x+1,vhi.y ,vhi.z+1,icomp) - - vel(vhi.x+1,vhi.y ,vhi.z ,icomp); - } else if (vhi.x == dhi.x) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y+1,vhi.z+1) != BndryData::covered) { - if (myhi(vhi.x,vhi.y+1,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (mxhi(vhi.x+1,vhi.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + int i = vhi.x+1; + int j = vhi.y+1; + int k = vhi.z+1; + bool x_interior = myhi(i-1,j ,k ) == BndryData::covered; + bool x_exterior = myhi(i-1,j ,k ) == BndryData::not_covered; + bool y_interior = mxhi(i ,j-1,k ) == BndryData::covered; + bool y_exterior = mxhi(i ,j-1,k ) == BndryData::not_covered; + bool z_interior = mxhi(i ,j ,k-1) == BndryData::covered; + bool z_exterior = mxhi(i ,j ,k-1) == BndryData::not_covered; + if (mxhi(i,j,k) != BndryData::covered && + (!xhi_domain || !yhi_domain || !zhi_domain)) { + if ((x_interior && y_interior && z_interior) || + (x_exterior && y_exterior && z_exterior)) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + tmp += vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = Real(1./3.)*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && y_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior && z_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (y_interior && z_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + Real tmp = vel(i,j,k,icomp); + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); + vel(i,j,k,icomp) = 0.5_rt*(tmp+vel(i,j,k,icomp)); + } else if (x_interior) { + mllinop_apply_bc_x(Orientation::high, i,j,k, blen.x, vel, mxhi, + bct(Orientation::xhi(), icomp), + bcl(Orientation::xhi(), icomp), + bcvalxhi, maxorder, dxinv[0], inhomog, icomp); + } else if (y_interior) { + mllinop_apply_bc_y(Orientation::high, i,j,k, blen.y, vel, myhi, + bct(Orientation::yhi(), icomp), + bcl(Orientation::yhi(), icomp), + bcvalyhi, maxorder, dxinv[1], inhomog, icomp); + } else if (z_interior) { + mllinop_apply_bc_z(Orientation::high, i,j,k, blen.z, vel, mzhi, + bct(Orientation::zhi(), icomp), + bcl(Orientation::zhi(), icomp), + bcvalzhi, maxorder, dxinv[2], inhomog, icomp); } } break; @@ -518,9 +1340,10 @@ void mltensor_fill_corners (int icorner, Box const& vbox, // vbox: the valid box } } } +#endif -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void mltensor_fill_edges (int iedge, Box const& vbox, // vbox: the valid box +inline +void mltensor_fill_edges (Box const& vbox, // vbox: the valid box Array4 const& vel, Array4 const& mxlo, Array4 const& mylo, @@ -534,522 +1357,486 @@ void mltensor_fill_edges (int iedge, Box const& vbox, // vbox: the valid box Array4 const& bcvalxhi, Array4 const& bcvalyhi, Array4 const& bcvalzhi, - GpuArray const& bct, - GpuArray const& bcl, + Array2D const& bct, + Array2D const& bcl, int inhomog, int maxorder, - GpuArray const& dxinv, Box const& domain) noexcept + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept + { - constexpr int oxlo = 0; - constexpr int oylo = 1; - constexpr int ozlo = 2; - constexpr int oxhi = 3; - constexpr int oyhi = 4; - constexpr int ozhi = 5; - constexpr int xdir = 0; - constexpr int ydir = 1; - constexpr int zdir = 2; const auto blen = amrex::length(vbox); const auto vlo = amrex::lbound(vbox); const auto vhi = amrex::ubound(vbox); - const auto dlo = amrex::lbound(domain); - const auto dhi = amrex::ubound(domain); - for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) { - switch (iedge) { - case 0: { - // xlo & ylo - if (vlo.x == dlo.x && vlo.y == dlo.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vlo.x-1,vlo.y-1,k,icomp) - = vel(vlo.x ,vlo.y-1,k,icomp) - + vel(vlo.x-1,vlo.y ,k,icomp) - - vel(vlo.x ,vlo.y ,k,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxlo(vlo.x-1,vlo.y-1,k) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,vlo.y-1,k),IntVect(vlo.x-1,vlo.y-1,k)); - if (mylo(vlo.x,vlo.y-1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } - } - break; + bool xlo_domain = (vlo.x == dlo.x); + bool ylo_domain = (vlo.y == dlo.y); + bool zlo_domain = (vlo.z == dlo.z); + bool xhi_domain = (vhi.x == dhi.x); + bool yhi_domain = (vhi.y == dhi.y); + bool zhi_domain = (vhi.z == dhi.z); + + for (int k = vlo.z; k <= vhi.z; ++k) { + mltensor_fill_edges_xlo_ylo(vlo.x-1, vlo.y-1, k, blen, vel, mxlo, mylo, bcvalxlo, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, ylo_domain); + mltensor_fill_edges_xhi_ylo(vhi.x+1, vlo.y-1, k, blen, vel, mxhi, mylo, bcvalxhi, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, ylo_domain); + mltensor_fill_edges_xlo_yhi(vlo.x-1, vhi.y+1, k, blen, vel, mxlo, myhi, bcvalxlo, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, yhi_domain); + mltensor_fill_edges_xhi_yhi(vhi.x+1, vhi.y+1, k, blen, vel, mxhi, myhi, bcvalxhi, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, yhi_domain); + } + + for (int j = vlo.y; j <= vhi.y; ++j) { + mltensor_fill_edges_xlo_zlo(vlo.x-1, j, vlo.z-1, blen, vel, mxlo, mzlo, bcvalxlo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zlo_domain); + mltensor_fill_edges_xhi_zlo(vhi.x+1, j, vlo.z-1, blen, vel, mxhi, mzlo, bcvalxhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zlo_domain); + mltensor_fill_edges_xlo_zhi(vlo.x-1, j, vhi.z+1, blen, vel, mxlo, mzhi, bcvalxlo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zhi_domain); + mltensor_fill_edges_xhi_zhi(vhi.x+1, j, vhi.z+1, blen, vel, mxhi, mzhi, bcvalxhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zhi_domain); + } + + for (int i = vlo.x; i <= vhi.x; ++i) { + mltensor_fill_edges_ylo_zlo(i, vlo.y-1, vlo.z-1, blen, vel, mylo, mzlo, bcvalylo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zlo_domain); + mltensor_fill_edges_yhi_zlo(i, vhi.y+1, vlo.z-1, blen, vel, myhi, mzlo, bcvalyhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zlo_domain); + mltensor_fill_edges_ylo_zhi(i, vlo.y-1, vhi.z+1, blen, vel, mylo, mzhi, bcvalylo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zhi_domain); + mltensor_fill_edges_yhi_zhi(i, vhi.y+1, vhi.z+1, blen, vel, myhi, mzhi, bcvalyhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zhi_domain); + } +} + +#ifdef AMREX_USE_GPU +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mltensor_fill_edges (int const bid, int const tid, int const bdim, + Box const& vbox, // vbox: the valid box + Array4 const& vel, + Array4 const& mxlo, + Array4 const& mylo, + Array4 const& mzlo, + Array4 const& mxhi, + Array4 const& myhi, + Array4 const& mzhi, + Array4 const& bcvalxlo, + Array4 const& bcvalylo, + Array4 const& bcvalzlo, + Array4 const& bcvalxhi, + Array4 const& bcvalyhi, + Array4 const& bcvalzhi, + Array2D const& bct, + Array2D const& bcl, + int inhomog, int maxorder, + GpuArray const& dxinv, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const auto blen = amrex::length(vbox); + const auto vlo = amrex::lbound(vbox); + const auto vhi = amrex::ubound(vbox); + bool xlo_domain = (vlo.x == dlo.x); + bool ylo_domain = (vlo.y == dlo.y); + bool zlo_domain = (vlo.z == dlo.z); + bool xhi_domain = (vhi.x == dhi.x); + bool yhi_domain = (vhi.y == dhi.y); + bool zhi_domain = (vhi.z == dhi.z); + if (bid == 0) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xlo_ylo(vlo.x-1, vlo.y-1, k, blen, vel, mxlo, mylo, bcvalxlo, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, ylo_domain); } - case 1: { - // xhi & ylo - if (vhi.x == dhi.x && vlo.y == dlo.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vhi.x+1,vlo.y-1,k,icomp) - = vel(vhi.x ,vlo.y-1,k,icomp) - + vel(vhi.x+1,vlo.y ,k,icomp) - - vel(vhi.x ,vlo.y ,k,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxhi(vhi.x+1,vlo.y-1,k) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,vlo.y-1,k),IntVect(vhi.x+1,vlo.y-1,k)); - if (mylo(vhi.x,vlo.y-1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } + } else if (bid == 1) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xhi_ylo(vhi.x+1, vlo.y-1, k, blen, vel, mxhi, mylo, bcvalxhi, bcvalylo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, ylo_domain); + } + } else if (bid == 2) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xlo_yhi(vlo.x-1, vhi.y+1, k, blen, vel, mxlo, myhi, bcvalxlo, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, yhi_domain); + } + } else if (bid == 3) { + for (int k = vlo.z + tid; k <= vhi.z; k += bdim) { + mltensor_fill_edges_xhi_yhi(vhi.x+1, vhi.y+1, k, blen, vel, mxhi, myhi, bcvalxhi, bcvalyhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, yhi_domain); + } + } else if (bid == 4) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xlo_zlo(vlo.x-1, j, vlo.z-1, blen, vel, mxlo, mzlo, bcvalxlo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zlo_domain); + } + } else if (bid == 5) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xhi_zlo(vhi.x+1, j, vlo.z-1, blen, vel, mxhi, mzlo, bcvalxhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zlo_domain); + } + } else if (bid == 6) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xlo_zhi(vlo.x-1, j, vhi.z+1, blen, vel, mxlo, mzhi, bcvalxlo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xlo_domain, zhi_domain); + } + } else if (bid == 7) { + for (int j = vlo.y + tid; j <= vhi.y; j += bdim) { + mltensor_fill_edges_xhi_zhi(vhi.x+1, j, vhi.z+1, blen, vel, mxhi, mzhi, bcvalxhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, xhi_domain, zhi_domain); + } + } else if (bid == 8) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_ylo_zlo(i, vlo.y-1, vlo.z-1, blen, vel, mylo, mzlo, bcvalylo, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zlo_domain); + } + } else if (bid == 9) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_yhi_zlo(i, vhi.y+1, vlo.z-1, blen, vel, myhi, mzlo, bcvalyhi, bcvalzlo, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zlo_domain); + } + } else if (bid == 10) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_ylo_zhi(i, vlo.y-1, vhi.z+1, blen, vel, mylo, mzhi, bcvalylo, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, ylo_domain, zhi_domain); + } + } else if (bid == 11) { + for (int i = vlo.x + tid; i <= vhi.x; i += bdim) { + mltensor_fill_edges_yhi_zhi(i, vhi.y+1, vhi.z+1, blen, vel, myhi, mzhi, bcvalyhi, bcvalzhi, + bct, bcl, inhomog, maxorder, dxinv, yhi_domain, zhi_domain); + } + } +} +#endif + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dzi) noexcept +{ + return (vel(i,j,k+1,n)+vel(i-1,j,k+1,n)-vel(i,j,k-1,n)-vel(i-1,j,k-1,n))*(Real(0.25)*dzi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dzi) noexcept +{ + return (vel(i,j,k+1,n)+vel(i,j-1,k+1,n)-vel(i,j,k-1,n)-vel(i,j-1,k-1,n))*(Real(0.25)*dzi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dxi) noexcept +{ + return (vel(i+1,j,k,n)+vel(i+1,j,k-1,n)-vel(i-1,j,k,n)-vel(i-1,j,k-1,n))*(Real(0.25)*dxi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dyi) noexcept +{ + return (vel(i,j+1,k,n)+vel(i,j+1,k-1,n)-vel(i,j-1,k,n)-vel(i,j-1,k-1,n))*(Real(0.25)*dyi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + Array4 const& etax, + Array4 const& kapx, + GpuArray const& dxinv) noexcept +{ + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi); + Real divu = dvdy + dwdz; + Real xif = kapx(i,j,k); + Real mun = Real(0.75)*(etax(i,j,k,0)-xif); // restore the original eta + Real mut = etax(i,j,k,1); + fx(i,j,k,0) = -mun*(-twoThirds*divu) - xif*divu; + fx(i,j,k,1) = -mut*(dudy); + fx(i,j,k,2) = -mut*(dudz); } - break; } - case 2: { - // xlo & yhi - if (vlo.x == dlo.x && vhi.y == dhi.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vlo.x-1,vhi.y+1,k,icomp) - = vel(vlo.x ,vhi.y+1,k,icomp) - + vel(vlo.x-1,vhi.y ,k,icomp) - - vel(vlo.x ,vhi.y ,k,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxlo(vlo.x-1,vhi.y+1,k) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,vhi.y+1,k),IntVect(vlo.x-1,vhi.y+1,k)); - if (myhi(vlo.x,vhi.y+1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + Array4 const& etay, + Array4 const& kapy, + GpuArray const& dxinv) noexcept +{ + const Real dxi = dxinv[0]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi); + Real divu = dudx + dwdz; + Real xif = kapy(i,j,k); + Real mun = Real(0.75)*(etay(i,j,k,1)-xif); // restore the original eta + Real mut = etay(i,j,k,0); + fy(i,j,k,0) = -mut*(dvdx); + fy(i,j,k,1) = -mun*(-twoThirds*divu) - xif*divu; + fy(i,j,k,2) = -mut*(dvdz); } - break; } - case 3: { - // xhi & yhi - if (vhi.x == dhi.x && vhi.y == dhi.y) { - for (int k = vlo.z; k <= vhi.z; ++k) { - vel (vhi.x+1,vhi.y+1,k,icomp) - = vel(vhi.x ,vhi.y+1,k,icomp) - + vel(vhi.x+1,vhi.y ,k,icomp) - - vel(vhi.x ,vhi.y ,k,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),ydir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - for (int k = vlo.z; k <= vhi.z; ++k) { - if (mxhi(vhi.x+1,vhi.y+1,k) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,vhi.y+1,k),IntVect(vhi.x+1,vhi.y+1,k)); - if (myhi(vhi.x,vhi.y+1,k) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } - } - } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_cross_terms_fz (Box const& box, Array4 const& fz, + Array4 const& vel, + Array4 const& etaz, + Array4 const& kapz, + GpuArray const& dxinv) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + constexpr Real twoThirds = Real(2./3.); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + AMREX_PRAGMA_SIMD + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi); + Real divu = dudx + dvdy; + Real xif = kapz(i,j,k); + Real mun = Real(0.75)*(etaz(i,j,k,2)-xif); // restore the original eta + Real mut = etaz(i,j,k,0); + fz(i,j,k,0) = -mut*(dwdx); + fz(i,j,k,1) = -mut*(dwdy); + fz(i,j,k,2) = -mun*(-twoThirds*divu) - xif*divu; } - break; } - case 4: { - // xlo & zlo - if (vlo.x == dlo.x && vlo.z == dlo.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vlo.x-1,j,vlo.z-1,icomp) - = vel(vlo.x ,j,vlo.z-1,icomp) - + vel(vlo.x-1,j,vlo.z ,icomp) - - vel(vlo.x ,j,vlo.z ,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dzi, + Array4 const& bvxlo, Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddz; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (k == dlo.z) { + ddz = (bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k+1,n) * Real(2.) + + bvxlo(i-1,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxlo(i-1,j,k ,n) * Real(-1.5) + + bvxlo(i-1,j,k-1,n) * Real(2.) + + bvxlo(i-1,j,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxlo(vlo.x-1,j,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,j,vlo.z-1),IntVect(vlo.x-1,j,vlo.z-1)); - if (mzlo(vlo.x,j,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvxlo(i-1,j,k+1,n)-bvxlo(i-1,j,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i,j,k+1,n)-vel(i,j,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 5: { - // xhi & zlo - if (vhi.x == dhi.x && vlo.z == dlo.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vhi.x+1,j,vlo.z-1,icomp) - = vel(vhi.x ,j,vlo.z-1,icomp) - + vel(vhi.x+1,j,vlo.z ,icomp) - - vel(vhi.x ,j,vlo.z ,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (k == dlo.z) { + ddz = (bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k+1,n) * Real(2.) + + bvxhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvxhi(i,j,k ,n) * Real(-1.5) + + bvxhi(i,j,k-1,n) * Real(2.) + + bvxhi(i,j,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxhi(vhi.x+1,j,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,j,vlo.z-1),IntVect(vhi.x+1,j,vlo.z-1)); - if (mzlo(vhi.x,j,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvxhi(i,j,k+1,n)-bvxhi(i,j,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i-1,j,k+1,n)-vel(i-1,j,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 6: { - // xlo & zhi - if (vlo.x == dlo.x && vhi.z == dhi.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vlo.x-1,j,vhi.z+1,icomp) - = vel(vlo.x ,j,vhi.z+1,icomp) - + vel(vlo.x-1,j,vhi.z ,icomp) - - vel(vlo.x ,j,vhi.z ,icomp); - } - } else if (vlo.x == dlo.x) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else { + ddz = mltensor_dz_on_xface(i,j,k,n,vel,dzi); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dz_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dzi, + Array4 const& bvylo, Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddz; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (k == dlo.z) { + ddz = (bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k+1,n) * Real(2.) + + bvylo(i,j-1,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvylo(i,j-1,k ,n) * Real(-1.5) + + bvylo(i,j-1,k-1,n) * Real(2.) + + bvylo(i,j-1,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxlo(vlo.x-1,j,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(vlo.x-1,j,vhi.z+1),IntVect(vlo.x-1,j,vhi.z+1)); - if (mzhi(vlo.x,j,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxlo; - mllinop_apply_bc_x(Orientation::low, bx, blen.x, - vel, mxlo, bct[offset+icomp], bcl[offset+icomp], - bcvalxlo, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvylo(i,j-1,k+1,n)-bvylo(i,j-1,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i,j,k+1,n)-vel(i,j,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 7: { - // xhi & zhi - if (vhi.x == dhi.x && vhi.z == dhi.z) { - for (int j = vlo.y; j <= vhi.y; ++j) { - vel (vhi.x+1,j,vhi.z+1,icomp) - = vel(vhi.x ,j,vhi.z+1,icomp) - + vel(vhi.x+1,j,vhi.z ,icomp) - - vel(vhi.x ,j,vhi.z ,icomp); - } - } else if (vhi.x == dhi.x) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,xdir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (k == dlo.z) { + ddz = (bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k+1,n) * Real(2.) + + bvyhi(i,j,k+2,n) * Real(-0.5)) * dzi; + } else if (k == dhi.z) { + ddz = -(bvyhi(i,j,k ,n) * Real(-1.5) + + bvyhi(i,j,k-1,n) * Real(2.) + + bvyhi(i,j,k-2,n) * Real(-0.5)) * dzi; } else { - for (int j = vlo.y; j <= vhi.y; ++j) { - if (mxhi(vhi.x+1,j,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(vhi.x+1,j,vhi.z+1),IntVect(vhi.x+1,j,vhi.z+1)); - if (mzhi(vhi.x,j,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oxhi; - mllinop_apply_bc_x(Orientation::high, bx, blen.x, - vel, mxhi, bct[offset+icomp], bcl[offset+icomp], - bcvalxhi, maxorder, dxinv[xdir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddz = (bvyhi(i,j,k+1,n)-bvyhi(i,j,k-1,n))*(Real(0.5)*dzi); } - break; + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddz = (vel(i,j-1,k+1,n)-vel(i,j-1,k-1,n))*(Real(0.5)*dzi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddz = Real(0.); } - case 8: { - // ylo & zlo - if (vlo.y == dlo.y && vlo.z == dlo.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vlo.y-1,vlo.z-1,icomp) - = vel(i,vlo.y ,vlo.z-1,icomp) - + vel(i,vlo.y-1,vlo.z ,icomp) - - vel(i,vlo.y ,vlo.z ,icomp); - } - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } else { + ddz = mltensor_dz_on_yface(i,j,k,n,vel,dzi); + } + return ddz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dxi, + Array4 const& bvzlo, Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddx; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (i == dlo.x) { + ddx = (bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i+1,j,k-1,n) * Real(2.) + + bvzlo(i+2,j,k-1,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzlo(i ,j,k-1,n) * Real(-1.5) + + bvzlo(i-1,j,k-1,n) * Real(2.) + + bvzlo(i-2,j,k-1,n) * Real(-0.5)) * dxi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (mylo(i,vlo.y-1,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(i,vlo.y-1,vlo.z-1),IntVect(i,vlo.y-1,vlo.z-1)); - if (mzlo(i,vlo.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddx = (bvzlo(i+1,j,k-1,n)-bvzlo(i-1,j,k-1,n))*(Real(0.5)*dxi); } - break; + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j,k,n)-vel(i-1,j,k,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); } - case 9: { - // yhi & zlo - if (vhi.y == dhi.y && vlo.z == dlo.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vhi.y+1,vlo.z-1,icomp) - = vel(i,vhi.y ,vlo.z-1,icomp) - + vel(i,vhi.y+1,vlo.z ,icomp) - - vel(i,vhi.y ,vlo.z ,icomp); - } - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vlo.z == dlo.z) { - Box bx = amrex::adjCellLo(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (i == dlo.x) { + ddx = (bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i+1,j,k,n) * Real(2.) + + bvzhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvzhi(i ,j,k,n) * Real(-1.5) + + bvzhi(i-1,j,k,n) * Real(2.) + + bvzhi(i-2,j,k,n) * Real(-0.5)) * dxi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (myhi(i,vhi.y+1,vlo.z-1) != BndryData::covered) { - Box bx(IntVect(i,vhi.y+1,vlo.z-1),IntVect(i,vhi.y+1,vlo.z-1)); - if (mzlo(i,vhi.y,vlo.z-1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozlo; - mllinop_apply_bc_z(Orientation::low, bx, blen.z, - vel, mzlo, bct[offset+icomp], bcl[offset+icomp], - bcvalzlo, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddx = (bvzhi(i+1,j,k,n)-bvzhi(i-1,j,k,n))*(Real(0.5)*dxi); } - break; + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j,k-1,n)-vel(i-1,j,k-1,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); } - case 10: { - // ylo & zhi - if (vlo.y == dlo.y && vhi.z == dhi.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vlo.y-1,vhi.z+1,icomp) - = vel(i,vlo.y ,vhi.z+1,icomp) - + vel(i,vlo.y-1,vhi.z ,icomp) - - vel(i,vlo.y ,vhi.z ,icomp); - } - } else if (vlo.y == dlo.y) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellLo(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else { + ddx = mltensor_dx_on_zface(i,j,k,n,vel,dxi); + } + return ddx; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_zface (int i, int j, int k, int n, Array4 const& vel, Real dyi, + Array4 const& bvzlo, Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddy; + if (k == dlo.z) { + if (bct(Orientation::zlo(),n) == AMREX_LO_DIRICHLET && bvzlo) { + if (j == dlo.y) { + ddy = (bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j+1,k-1,n) * Real(2.) + + bvzlo(i,j+2,k-1,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzlo(i,j ,k-1,n) * Real(-1.5) + + bvzlo(i,j-1,k-1,n) * Real(2.) + + bvzlo(i,j-2,k-1,n) * Real(-0.5)) * dyi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (mylo(i,vlo.y-1,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(i,vlo.y-1,vhi.z+1),IntVect(i,vlo.y-1,vhi.z+1)); - if (mzhi(i,vlo.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oylo; - mllinop_apply_bc_y(Orientation::low, bx, blen.y, - vel, mylo, bct[offset+icomp], bcl[offset+icomp], - bcvalylo, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddy = (bvzlo(i,j+1,k-1,n)-bvzlo(i,j-1,k-1,n))*(Real(0.5)*dyi); } - break; + } else if (bct(Orientation::zlo(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i,j+1,k,n)-vel(i,j-1,k,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); } - case 11: { - // yhi & zhi - if (vhi.y == dhi.y && vhi.z == dhi.z) { - for (int i = vlo.x; i <= vhi.x; ++i) { - vel (i,vhi.y+1,vhi.z+1,icomp) - = vel(i,vhi.y ,vhi.z+1,icomp) - + vel(i,vhi.y+1,vhi.z ,icomp) - - vel(i,vhi.y ,vhi.z ,icomp); - } - } else if (vhi.y == dhi.y) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else if (vhi.z == dhi.z) { - Box bx = amrex::adjCellHi(amrex::adjCellHi(vbox,ydir,1),zdir,1); - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); + } else if (k == dhi.z+1) { + if (bct(Orientation::zhi(),n) == AMREX_LO_DIRICHLET && bvzhi) { + if (j == dlo.y) { + ddy = (bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j+1,k,n) * Real(2.) + + bvzhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvzhi(i,j ,k,n) * Real(-1.5) + + bvzhi(i,j-1,k,n) * Real(2.) + + bvzhi(i,j-2,k,n) * Real(-0.5)) * dyi; } else { - for (int i = vlo.x; i <= vhi.x; ++i) { - if (myhi(i,vhi.y+1,vhi.z+1) != BndryData::covered) { - Box bx(IntVect(i,vhi.y+1,vhi.z+1),IntVect(i,vhi.y+1,vhi.z+1)); - if (mzhi(i,vhi.y,vhi.z+1) == BndryData::covered) { - int offset = AMREX_SPACEDIM * oyhi; - mllinop_apply_bc_y(Orientation::high, bx, blen.y, - vel, myhi, bct[offset+icomp], bcl[offset+icomp], - bcvalyhi, maxorder, dxinv[ydir], inhomog, icomp); - } else { - int offset = AMREX_SPACEDIM * ozhi; - mllinop_apply_bc_z(Orientation::high, bx, blen.z, - vel, mzhi, bct[offset+icomp], bcl[offset+icomp], - bcvalzhi, maxorder, dxinv[zdir], inhomog, icomp); - } - } - } + ddy = (bvzhi(i,j+1,k,n)-bvzhi(i,j-1,k,n))*(Real(0.5)*dyi); } - break; - } - default: {} + } else if (bct(Orientation::zhi(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i,j+1,k-1,n)-vel(i,j-1,k-1,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); } + } else { + ddy = mltensor_dy_on_zface(i,j,k,n,vel,dyi); } + return ddy; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE @@ -1057,7 +1844,13 @@ void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, Array4 const& vel, Array4 const& etax, Array4 const& kapx, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dyi = dxinv[1]; const Real dzi = dxinv[2]; @@ -1067,12 +1860,11 @@ void mltensor_cross_terms_fx (Box const& box, Array4 const& fx, for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudy = (vel(i,j+1,k,0)+vel(i-1,j+1,k,0)-vel(i,j-1,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,k,1)+vel(i-1,j+1,k,1)-vel(i,j-1,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dyi); - Real dudz = (vel(i,j,k+1,0)+vel(i-1,j,k+1,0)-vel(i,j,k-1,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i-1,j,k+1,2)-vel(i,j,k-1,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dzi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); Real divu = dvdy + dwdz; Real xif = kapx(i,j,k); Real mun = Real(0.75)*(etax(i,j,k,0)-xif); // restore the original eta @@ -1090,7 +1882,13 @@ void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, Array4 const& vel, Array4 const& etay, Array4 const& kapy, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dxi = dxinv[0]; const Real dzi = dxinv[2]; @@ -1100,12 +1898,11 @@ void mltensor_cross_terms_fy (Box const& box, Array4 const& fy, for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j-1,k,0)-vel(i-1,j,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,k,1)+vel(i+1,j-1,k,1)-vel(i-1,j,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dxi); - Real dvdz = (vel(i,j,k+1,1)+vel(i,j-1,k+1,1)-vel(i,j,k-1,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i,j-1,k+1,2)-vel(i,j,k-1,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dzi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); Real divu = dudx + dwdz; Real xif = kapy(i,j,k); Real mun = Real(0.75)*(etay(i,j,k,1)-xif); // restore the original eta @@ -1123,7 +1920,13 @@ void mltensor_cross_terms_fz (Box const& box, Array4 const& fz, Array4 const& vel, Array4 const& etaz, Array4 const& kapz, - GpuArray const& dxinv) noexcept + GpuArray const& dxinv, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept { const Real dxi = dxinv[0]; const Real dyi = dxinv[1]; @@ -1133,12 +1936,11 @@ void mltensor_cross_terms_fz (Box const& box, Array4 const& fz, for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { - AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j,k-1,0)-vel(i-1,j,k,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dxi); - Real dwdx = (vel(i+1,j,k,2)+vel(i+1,j,k-1,2)-vel(i-1,j,k,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dxi); - Real dvdy = (vel(i,j+1,k,1)+vel(i,j+1,k-1,1)-vel(i,j-1,k,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dyi); - Real dwdy = (vel(i,j+1,k,2)+vel(i,j+1,k-1,2)-vel(i,j-1,k,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dyi); + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); Real divu = dudx + dvdy; Real xif = kapz(i,j,k); Real mun = Real(0.75)*(etaz(i,j,k,2)-xif); // restore the original eta @@ -1242,13 +2044,13 @@ void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, Real dvdx = (vel(i,j,k,1) - vel(i-1,j,k,1))*dxi; Real dwdx = (vel(i,j,k,2) - vel(i-1,j,k,2))*dxi; - Real dudy = (vel(i,j+1,k,0)+vel(i-1,j+1,k,0)-vel(i,j-1,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,k,1)+vel(i-1,j+1,k,1)-vel(i,j-1,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dyi); - Real dwdy = (vel(i,j+1,k,2)+vel(i-1,j+1,k,2)-vel(i,j-1,k,2)-vel(i-1,j-1,k,2))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi); + Real dwdy = mltensor_dy_on_xface(i,j,k,2,vel,dyi); - Real dudz = (vel(i,j,k+1,0)+vel(i-1,j,k+1,0)-vel(i,j,k-1,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dzi); - Real dvdz = (vel(i,j,k+1,1)+vel(i-1,j,k+1,1)-vel(i,j,k-1,1)-vel(i-1,j,k-1,1))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i-1,j,k+1,2)-vel(i,j,k-1,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dzi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi); + Real dvdz = mltensor_dz_on_xface(i,j,k,1,vel,dzi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi); fx(i,j,k,0) = dudx; fx(i,j,k,1) = dvdx; @@ -1281,17 +2083,17 @@ void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j-1,k,0)-vel(i-1,j,k,0)-vel(i-1,j-1,k,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,k,1)+vel(i+1,j-1,k,1)-vel(i-1,j,k,1)-vel(i-1,j-1,k,1))*(Real(0.25)*dxi); - Real dwdx = (vel(i+1,j,k,2)+vel(i+1,j-1,k,2)-vel(i-1,j,k,2)-vel(i-1,j-1,k,2))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi); + Real dwdx = mltensor_dx_on_yface(i,j,k,2,vel,dxi); Real dudy = (vel(i,j,k,0) - vel(i,j-1,k,0))*dyi; Real dvdy = (vel(i,j,k,1) - vel(i,j-1,k,1))*dyi; Real dwdy = (vel(i,j,k,2) - vel(i,j-1,k,2))*dyi; - Real dudz = (vel(i,j,k+1,0)+vel(i,j-1,k+1,0)-vel(i,j,k-1,0)-vel(i,j-1,k-1,0))*(Real(0.25)*dzi); - Real dvdz = (vel(i,j,k+1,1)+vel(i,j-1,k+1,1)-vel(i,j,k-1,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dzi); - Real dwdz = (vel(i,j,k+1,2)+vel(i,j-1,k+1,2)-vel(i,j,k-1,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dzi); + Real dudz = mltensor_dz_on_yface(i,j,k,0,vel,dzi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi); fy(i,j,k,0) = dudx; fy(i,j,k,1) = dvdx; @@ -1324,13 +2126,13 @@ void mltensor_vel_grads_fz (Box const& box, Array4 const& fz, AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { - Real dudx = (vel(i+1,j,k,0)+vel(i+1,j,k-1,0)-vel(i-1,j,k,0)-vel(i-1,j,k-1,0))*(Real(0.25)*dxi); - Real dvdx = (vel(i+1,j,k,1)+vel(i+1,j,k-1,1)-vel(i-1,j,k,1)-vel(i-1,j,k-1,1))*(Real(0.25)*dxi); - Real dwdx = (vel(i+1,j,k,2)+vel(i+1,j,k-1,2)-vel(i-1,j,k,2)-vel(i-1,j,k-1,2))*(Real(0.25)*dxi); + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi); + Real dvdx = mltensor_dx_on_zface(i,j,k,1,vel,dxi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi); - Real dudy = (vel(i,j+1,k,0)+vel(i,j+1,k-1,0)-vel(i,j-1,k,0)-vel(i,j-1,k-1,0))*(Real(0.25)*dyi); - Real dvdy = (vel(i,j+1,k,1)+vel(i,j+1,k-1,1)-vel(i,j-1,k,1)-vel(i,j-1,k-1,1))*(Real(0.25)*dyi); - Real dwdy = (vel(i,j+1,k,2)+vel(i,j+1,k-1,2)-vel(i,j-1,k,2)-vel(i,j-1,k-1,2))*(Real(0.25)*dyi); + Real dudy = mltensor_dy_on_zface(i,j,k,0,vel,dyi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi); Real dudz = (vel(i,j,k,0) - vel(i,j,k-1,0))*dzi; Real dvdz = (vel(i,j,k,1) - vel(i,j,k-1,1))*dzi; @@ -1351,6 +2153,138 @@ void mltensor_vel_grads_fz (Box const& box, Array4 const& fz, } } +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fx (Box const& box, Array4 const& fx, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvxlo, + Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = (vel(i,j,k,0) - vel(i-1,j,k,0))*dxi; + Real dvdx = (vel(i,j,k,1) - vel(i-1,j,k,1))*dxi; + Real dwdx = (vel(i,j,k,2) - vel(i-1,j,k,2))*dxi; + Real dudy = mltensor_dy_on_xface(i,j,k,0,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_xface(i,j,k,1,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dwdy = mltensor_dy_on_xface(i,j,k,2,vel,dyi,bvxlo,bvxhi,bct,dlo,dhi); + Real dudz = mltensor_dz_on_xface(i,j,k,0,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + Real dvdz = mltensor_dz_on_xface(i,j,k,1,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_xface(i,j,k,2,vel,dzi,bvxlo,bvxhi,bct,dlo,dhi); + fx(i,j,k,0) = dudx; + fx(i,j,k,1) = dvdx; + fx(i,j,k,2) = dwdx; + fx(i,j,k,3) = dudy; + fx(i,j,k,4) = dvdy; + fx(i,j,k,5) = dwdy; + fx(i,j,k,6) = dudz; + fx(i,j,k,7) = dvdz; + fx(i,j,k,8) = dwdz; + + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fy (Box const& box, Array4 const& fy, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvylo, + Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_yface(i,j,k,0,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_yface(i,j,k,1,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dwdx = mltensor_dx_on_yface(i,j,k,2,vel,dxi,bvylo,bvyhi,bct,dlo,dhi); + Real dudy = (vel(i,j,k,0) - vel(i,j-1,k,0))*dyi; + Real dvdy = (vel(i,j,k,1) - vel(i,j-1,k,1))*dyi; + Real dwdy = (vel(i,j,k,2) - vel(i,j-1,k,2))*dyi; + Real dudz = mltensor_dz_on_yface(i,j,k,0,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + Real dvdz = mltensor_dz_on_yface(i,j,k,1,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + Real dwdz = mltensor_dz_on_yface(i,j,k,2,vel,dzi,bvylo,bvyhi,bct,dlo,dhi); + fy(i,j,k,0) = dudx; + fy(i,j,k,1) = dvdx; + fy(i,j,k,2) = dwdx; + fy(i,j,k,3) = dudy; + fy(i,j,k,4) = dvdy; + fy(i,j,k,5) = dwdy; + fy(i,j,k,6) = dudz; + fy(i,j,k,7) = dvdz; + fy(i,j,k,8) = dwdz; + + } + } + } +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void mltensor_vel_grads_fz (Box const& box, Array4 const& fz, + Array4 const& vel, + GpuArray const& dxinv, + Array4 const& bvzlo, + Array4 const& bvzhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + const Real dxi = dxinv[0]; + const Real dyi = dxinv[1]; + const Real dzi = dxinv[2]; + const auto lo = amrex::lbound(box); + const auto hi = amrex::ubound(box); + + for (int k = lo.z; k <= hi.z; ++k) { + for (int j = lo.y; j <= hi.y; ++j) { + for (int i = lo.x; i <= hi.x; ++i) { + Real dudx = mltensor_dx_on_zface(i,j,k,0,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dvdx = mltensor_dx_on_zface(i,j,k,1,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdx = mltensor_dx_on_zface(i,j,k,2,vel,dxi,bvzlo,bvzhi,bct,dlo,dhi); + Real dudy = mltensor_dy_on_zface(i,j,k,0,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dvdy = mltensor_dy_on_zface(i,j,k,1,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dwdy = mltensor_dy_on_zface(i,j,k,2,vel,dyi,bvzlo,bvzhi,bct,dlo,dhi); + Real dudz = (vel(i,j,k,0) - vel(i,j,k-1,0))*dzi; + Real dvdz = (vel(i,j,k,1) - vel(i,j,k-1,1))*dzi; + Real dwdz = (vel(i,j,k,2) - vel(i,j,k-1,2))*dzi; + fz(i,j,k,0) = dudx; + fz(i,j,k,1) = dvdx; + fz(i,j,k,2) = dwdx; + fz(i,j,k,3) = dudy; + fz(i,j,k,4) = dvdy; + fz(i,j,k,5) = dwdy; + fz(i,j,k,6) = dudz; + fz(i,j,k,7) = dvdz; + fz(i,j,k,8) = dwdz; + + } + } + } +} + } #endif diff --git a/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H b/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H index 4440f57e7a8..33457ec1ced 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLTensor_K.H @@ -5,6 +5,123 @@ #include #include +namespace amrex { + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dyi) noexcept +{ + return (vel(i,j+1,k,n)+vel(i-1,j+1,k,n)-vel(i,j-1,k,n)-vel(i-1,j-1,k,n))*(Real(0.25)*dyi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dxi) noexcept +{ + return (vel(i+1,j,k,n)+vel(i+1,j-1,k,n)-vel(i-1,j,k,n)-vel(i-1,j-1,k,n))*(Real(0.25)*dxi); +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dy_on_xface (int i, int j, int k, int n, Array4 const& vel, Real dyi, + Array4 const& bvxlo, Array4 const& bvxhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddy; + if (i == dlo.x) { + if (bct(Orientation::xlo(),n) == AMREX_LO_DIRICHLET && bvxlo) { + if (j == dlo.y) { + ddy = (bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j+1,k,n) * Real(2.) + + bvxlo(i-1,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxlo(i-1,j ,k,n) * Real(-1.5) + + bvxlo(i-1,j-1,k,n) * Real(2.) + + bvxlo(i-1,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = (bvxlo(i-1,j+1,k,n)-bvxlo(i-1,j-1,k,n))*(Real(0.5)*dyi); + } + } else if (bct(Orientation::xlo(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i,j+1,k,n)-vel(i,j-1,k,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else if (i == dhi.x+1) { + if (bct(Orientation::xhi(),n) == AMREX_LO_DIRICHLET && bvxhi) { + if (j == dlo.y) { + ddy = (bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j+1,k,n) * Real(2.) + + bvxhi(i,j+2,k,n) * Real(-0.5)) * dyi; + } else if (j == dhi.y) { + ddy = -(bvxhi(i,j ,k,n) * Real(-1.5) + + bvxhi(i,j-1,k,n) * Real(2.) + + bvxhi(i,j-2,k,n) * Real(-0.5)) * dyi; + } else { + ddy = (bvxhi(i,j+1,k,n)-bvxhi(i,j-1,k,n))*(Real(0.5)*dyi); + } + } else if (bct(Orientation::xhi(),n) == AMREX_LO_NEUMANN) { + ddy = (vel(i-1,j+1,k,n)-vel(i-1,j-1,k,n))*(Real(0.5)*dyi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddy = Real(0.); + } + } else { + ddy = mltensor_dy_on_xface(i,j,k,n,vel,dyi); + } + return ddy; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real mltensor_dx_on_yface (int i, int j, int k, int n, Array4 const& vel, Real dxi, + Array4 const& bvylo, Array4 const& bvyhi, + Array2D const& bct, + Dim3 const& dlo, Dim3 const& dhi) noexcept +{ + Real ddx; + if (j == dlo.y) { + if (bct(Orientation::ylo(),n) == AMREX_LO_DIRICHLET && bvylo) { + if (i == dlo.x) { + ddx = (bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i+1,j-1,k,n) * Real(2.) + + bvylo(i+2,j-1,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvylo(i ,j-1,k,n) * Real(-1.5) + + bvylo(i-1,j-1,k,n) * Real(2.) + + bvylo(i-2,j-1,k,n) * Real(-0.5)) * dxi; + } else { + ddx = (bvylo(i+1,j-1,k,n)-bvylo(i-1,j-1,k,n))*(Real(0.5)*dxi); + } + } else if (bct(Orientation::ylo(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j,k,n)-vel(i-1,j,k,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else if (j == dhi.y+1) { + if (bct(Orientation::yhi(),n) == AMREX_LO_DIRICHLET && bvyhi) { + if (i == dlo.x) { + ddx = (bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i+1,j,k,n) * Real(2.) + + bvyhi(i+2,j,k,n) * Real(-0.5)) * dxi; + } else if (i == dhi.x) { + ddx = -(bvyhi(i ,j,k,n) * Real(-1.5) + + bvyhi(i-1,j,k,n) * Real(2.) + + bvyhi(i-2,j,k,n) * Real(-0.5)) * dxi; + } else { + ddx = (bvyhi(i+1,j,k,n)-bvyhi(i-1,j,k,n))*(Real(0.5)*dxi); + } + } else if (bct(Orientation::yhi(),n) == AMREX_LO_NEUMANN) { + ddx = (vel(i+1,j-1,k,n)-vel(i-1,j-1,k,n))*(Real(0.5)*dxi); + } else { // AMREX_LO_REFLECT_ODD or homogeneous Dirichlet + ddx = Real(0.); + } + } else { + ddx = mltensor_dx_on_yface(i,j,k,n,vel,dxi); + } + return ddx; +} +} + #if (AMREX_SPACEDIM == 1) #include #elif (AMREX_SPACEDIM == 2) diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H new file mode 100644 index 00000000000..00d589e34b4 --- /dev/null +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.H @@ -0,0 +1,141 @@ +#ifndef AMREX_OPENBC_H_ +#define AMREX_OPENBC_H_ +#include + +#include +#include + +namespace amrex +{ + +namespace openbc { + + static constexpr int M = 7; // highest order of moments + static constexpr int P = 3; + + struct Moments + { + typedef GpuArray array_type; + array_type mom; + Real x, y, z; + Orientation face; + }; + + struct MomTag + { + Array4 gp; + Box b2d; + Orientation face; + int offset; + }; + + std::ostream& operator<< (std::ostream& os, Moments const& mom); +} + +#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP) +template<> +struct Gpu::SharedMemory +{ + AMREX_GPU_DEVICE openbc::Moments::array_type* dataPtr () noexcept { + AMREX_HIP_OR_CUDA(HIP_DYNAMIC_SHARED(openbc::Moments::array_type,amrex_openbc_momarray);, + extern __shared__ openbc::Moments::array_type amrex_openbc_momarray[];) + return amrex_openbc_momarray; + } +}; +#endif + +/** + * \brief Open Boundary Poisson Solver + * + * References: + * (1) The Solution of Poisson's Equation for Isolated Source + * Distributions, R. A. James, 1977, JCP 25, 71 + * (2) A Local Corrections Algorithm for Solving Poisson's Equation in Three + * Dimensions, P. McCorquodale, P. Colella, G. T. Balls, & S. B. Baden, + * 2007, Communications in Applied Mathematics and Computational Science, + * 2, 1, 57-81 + */ +class OpenBCSolver +{ +public: + OpenBCSolver (); + + OpenBCSolver (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo()); + + ~OpenBCSolver (); + + OpenBCSolver (const OpenBCSolver&) = delete; + OpenBCSolver (OpenBCSolver&&) = delete; + OpenBCSolver& operator= (const OpenBCSolver&) = delete; + OpenBCSolver& operator= (OpenBCSolver&&) = delete; + + void define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info = LPInfo()); + + void setVerbose (int v) noexcept; + void setBottomVerbose (int v) noexcept; + + void useHypre (bool use_hypre) noexcept; + + Real solve (const Vector& a_sol, const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs); + +public: // public for cuda + + void compute_moments (Gpu::DeviceVector& moments); + void compute_potential (Gpu::DeviceVector const& moments); + void interpolate_potential (MultiFab& solg); + +private: + +#ifdef AMREX_USE_MPI + void bcast_moments (Gpu::DeviceVector& moments); +#endif + + int m_verbose = 0; + int m_bottom_verbose = 0; + Vector m_geom; + Vector m_grids; + Vector m_dmap; + LPInfo m_info; + std::unique_ptr m_poisson_1; + std::unique_ptr m_poisson_2; + std::unique_ptr m_mlmg_1; + std::unique_ptr m_mlmg_2; + BottomSolver m_bottom_solver_type = BottomSolver::bicgstab; + + int m_coarsen_ratio = 0; + Array m_dpdn; + Gpu::PinnedVector m_momtags_h; +#ifdef AMREX_USE_GPU + Gpu::DeviceVector m_momtags_d; + Gpu::PinnedVector m_ngpublocks_h; + Gpu::DeviceVector m_ngpublocks_d; + int m_nthreads_momtag; +#endif + + int m_nblocks_local = 0; + int m_nblocks = 0; +#ifdef AMREX_USE_MPI + Vector m_countvec; + Vector m_offset; +#endif + + IntVect m_ngrowdomain; + MultiFab m_crse_grown_faces_phi; + MultiFab m_phind; + BoxArray m_bag; + + BoxArray m_ba_all; + DistributionMapping m_dm_all; + Geometry m_geom_all; +}; + +} + +#endif diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp new file mode 100644 index 00000000000..9e320d7a55f --- /dev/null +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC.cpp @@ -0,0 +1,864 @@ +#include +#include +#include + +namespace amrex +{ + +OpenBCSolver::OpenBCSolver () {} + +OpenBCSolver::OpenBCSolver (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info) +{ + define(a_geom, a_grids, a_dmap, a_info); +} + +OpenBCSolver::~OpenBCSolver () {} + +void OpenBCSolver::define (const Vector& a_geom, + const Vector& a_grids, + const Vector& a_dmap, + const LPInfo& a_info) +{ + BL_PROFILE("OpenBCSoler::define()"); + + m_geom = a_geom; + m_grids = a_grids; + m_dmap = a_dmap; + m_info = a_info; + for (auto& grids : m_grids) { + grids.enclosedCells(); + } + + Box const domain0 = m_geom[0].Domain(); + m_coarsen_ratio = 8; + AMREX_ALWAYS_ASSERT(domain0.coarsenable(m_coarsen_ratio)); + int N1d = static_cast(std::round(std::pow(domain0.d_numPts(),1./3.))); + while (domain0.coarsenable(m_coarsen_ratio*2) + && 4*m_coarsen_ratio*m_coarsen_ratio <= N1d) { + m_coarsen_ratio *= 2; + } + + int ntags = 0; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + Box lo = amrex::coarsen(amrex::bdryLo(domain0, idim), m_coarsen_ratio); + Box hi = amrex::coarsen(amrex::bdryHi(domain0, idim), m_coarsen_ratio); + BoxList bl({lo,hi}); + IntVect chunk = lo.length(); + while (bl.size() < ParallelContext::NProcsSub()) { + IntVect chunk_prev = chunk; + for (int jdim = AMREX_SPACEDIM-1; jdim >= 0; --jdim) { + if (jdim != idim) { + int new_chunk_size = chunk[jdim] / 2; + if (bl.size() < ParallelContext::NProcsSub() + && new_chunk_size > 0) { + chunk[jdim] = new_chunk_size; + bl.maxSize(chunk); + } + } + } + if (chunk == chunk_prev) { + break; + } + } + int mgs = std::max(1, 256/m_coarsen_ratio); + bl.maxSize(mgs); + bl.refine(m_coarsen_ratio); + BoxArray ba2d(std::move(bl)); + DistributionMapping dm2d{ba2d}; + m_dpdn[idim].define(ba2d, dm2d, 1, 0); + ntags += m_dpdn[idim].local_size(); + } + + m_momtags_h.reserve(ntags); + int nblocks = 0; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + for (MFIter mfi(m_dpdn[idim]); mfi.isValid(); ++mfi) { + Box const& b2d = mfi.validbox(); + Orientation::Side side = (b2d.smallEnd(idim) == domain0.smallEnd(idim)) + ? Orientation::low : Orientation::high; + Orientation face(idim, side); + m_momtags_h.push_back({m_dpdn[idim].const_array(mfi), b2d, face, + nblocks}); + nblocks += static_cast(b2d.numPts()) + / (m_coarsen_ratio*m_coarsen_ratio); + } + } + m_nblocks_local = nblocks; + +#ifdef AMREX_USE_GPU + if (ntags > 0) { + m_momtags_d.resize(ntags); + Gpu::copyAsync(Gpu::hostToDevice, m_momtags_h.begin(), m_momtags_h.end(), m_momtags_d.begin()); + + m_nthreads_momtag = (m_coarsen_ratio == 8) ? 64 : 128; + int ntotgpublocks = 0; + m_ngpublocks_h.reserve(ntags+1); + for (auto const& tag : m_momtags_h) { + m_ngpublocks_h.push_back(ntotgpublocks); + Box cb2d = amrex::coarsen(tag.b2d, m_coarsen_ratio); + ntotgpublocks += static_cast(cb2d.numPts()); + } + m_ngpublocks_h.push_back(ntotgpublocks); + m_ngpublocks_d.resize(m_ngpublocks_h.size()); + Gpu::copyAsync(Gpu::hostToDevice, m_ngpublocks_h.begin(), m_ngpublocks_h.end(), + m_ngpublocks_d.begin()); + } +#endif + + auto const dx = m_geom[0].CellSize(); + Real dmax = amrex::max(std::sqrt(dx[0]*dx[0]+dx[1]*dx[1]), + std::sqrt(dx[0]*dx[0]+dx[2]*dx[2]), + std::sqrt(dx[1]*dx[1]+dx[2]*dx[2])); + m_ngrowdomain[0] = static_cast(std::ceil(dmax/dx[0])) * m_coarsen_ratio; + m_ngrowdomain[1] = static_cast(std::ceil(dmax/dx[1])) * m_coarsen_ratio; + m_ngrowdomain[2] = static_cast(std::ceil(dmax/dx[2])) * m_coarsen_ratio; + // This is the minimal size we need to embiggen the domain. + + Box const domain1 = amrex::grow(domain0, m_ngrowdomain); + BoxList bl_crse_grown_faces(IndexType::TheNodeType()); + for (OrientationIter oit; oit.isValid(); ++oit) { + Orientation face = oit(); + Box face_box = amrex::surroundingNodes(amrex::bdryNode(domain1,face)); + face_box.coarsen(m_coarsen_ratio); + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (idim != face.coordDir()) { + face_box.grow(idim,openbc::P); + } + } + bl_crse_grown_faces.push_back(face_box); + } + + bl_crse_grown_faces.maxSize(16); // xxxxx make this a parameter? + BoxArray ba_crse_grown_faces(std::move(bl_crse_grown_faces)); + DistributionMapping dm_crse_grown_faces(ba_crse_grown_faces); + m_crse_grown_faces_phi.define(ba_crse_grown_faces, dm_crse_grown_faces, 1, 0); + + BoxList blg = amrex::boxDiff(domain1, domain0); + blg.maxSize(std::max(64,m_coarsen_ratio)); // xxxxx make this a parameter? + m_bag = BoxArray(std::move(blg)); + DistributionMapping dmg(m_bag); + m_phind.define(amrex::coarsen(amrex::convert(m_bag,IntVect(1)),m_coarsen_ratio), + dmg, 1, openbc::P); + + BoxList bl0 = m_grids[0].boxList(); + BoxList bl1 = m_bag.boxList(); + Vector p0 = m_dmap[0].ProcessorMap(); + Vector p1 = dmg.ProcessorMap(); + bl0.join(bl1); + p0.insert(p0.end(), p1.begin(), p1.end()); + IntVect const offset = -domain1.smallEnd(); + for (auto& b : bl0) { + b.shift(offset); + } + m_ba_all = BoxArray(std::move(bl0)); + m_dm_all = DistributionMapping(std::move(p0)); + + auto const problo = m_geom[0].ProbLo(); + auto const probhi = m_geom[0].ProbHi(); + std::array problo_all, probhi_all; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + problo_all[idim] = problo[idim] - m_ngrowdomain[idim]*dx[idim]; + probhi_all[idim] = probhi[idim] + m_ngrowdomain[idim]*dx[idim]; + } + m_geom_all = Geometry(amrex::shift(domain1,offset), + RealBox(problo_all,probhi_all), + m_geom[0].Coord(), m_geom[0].isPeriodic()); +} + +void OpenBCSolver::setVerbose (int v) noexcept +{ + m_verbose = v; +} + +void OpenBCSolver::setBottomVerbose (int v) noexcept +{ + m_bottom_verbose = v; +} + +void OpenBCSolver::useHypre (bool use_hypre) noexcept +{ + if (use_hypre) { + m_bottom_solver_type = BottomSolver::hypre; + m_info.setMaxCoarseningLevel(0); +#ifndef AMREX_USE_HYPRE + amrex::Abort("OpenBCSolver: Must enable Hypre support to use it."); +#endif + } +} + +Real OpenBCSolver::solve (const Vector& a_sol, + const Vector& a_rhs, + Real a_tol_rel, Real a_tol_abs) +{ + BL_PROFILE("OpenBCSolver::solve()"); + + auto solve_start_time = amrex::second(); + + int nlevels = m_geom.size(); + + BL_PROFILE_VAR("OpenBCSolver::MG1", blp_mg1); + + if (m_poisson_1 == nullptr) { + m_poisson_1 = std::make_unique(m_geom, m_grids, m_dmap, m_info); + m_poisson_1->setVerbose(m_verbose); + m_poisson_1->setMaxOrder(4); + m_poisson_1->setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}, + {AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}); + for (int ilev = 0; ilev < nlevels; ++ilev) { + m_poisson_1->setLevelBC(ilev, nullptr); + } + + m_mlmg_1 = std::make_unique(*m_poisson_1); + m_mlmg_1->setVerbose(m_verbose); + m_mlmg_1->setBottomVerbose(m_bottom_verbose); + m_mlmg_1->setBottomSolver(m_bottom_solver_type); +#ifdef AMREX_USE_HYPRE + if (m_bottom_solver_type == BottomSolver::hypre) { + m_mlmg_1->setHypreInterface(Hypre::Interface::structed); + } +#endif + } + m_mlmg_1->solve(a_sol, a_rhs, a_tol_rel, a_tol_abs); + + BL_PROFILE_VAR_STOP(blp_mg1); + + Array dpdn_tmp; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + dpdn_tmp[idim].define(amrex::convert(m_grids[0], + IntVect::TheDimensionVector(idim)), + m_dmap[0], 1, 0); + } + m_poisson_1->get_dpdn_on_domain_faces(GetArrOfPtrs(dpdn_tmp), *a_sol[0]); + + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + m_dpdn[idim].ParallelCopy(dpdn_tmp[idim]); + } + + { + Gpu::DeviceVector moments(m_nblocks_local); + compute_moments(moments); + compute_potential(moments); + } + + MultiFab rhsg(m_bag, m_phind.DistributionMap(), 1, a_rhs[0]->nGrowVect()); + rhsg.setVal(0._rt); + + MultiFab solg(m_bag, m_phind.DistributionMap(), 1, 1); + solg.setVal(0._rt); + interpolate_potential(solg); + + const int nboxes0 = m_grids[0].size(); + MultiFab sol_all(m_ba_all, m_dm_all, 1, solg.nGrowVect(), + MFInfo().SetAlloc(false)); + MultiFab rhs_all(m_ba_all, m_dm_all, 1, rhsg.nGrowVect(), + MFInfo().SetAlloc(false)); + + Box const domain1 = amrex::grow(m_geom[0].Domain(), m_ngrowdomain); + IntVect const offset = -domain1.smallEnd(); + for (MFIter mfi(sol_all); mfi.isValid(); ++mfi) { + const int index = mfi.index(); + FArrayBox solfab, rhsfab; + if (index < nboxes0) { + FArrayBox& sfab0 = (*a_sol[0])[index]; + if (sol_all.nGrowVect() == a_sol[0]->nGrowVect()) { + solfab = FArrayBox(sfab0, amrex::make_alias, 0, 1); + } else { + Box b = sfab0.box(); + b.grow(sol_all.nGrowVect()-a_sol[0]->nGrowVect()); + solfab.resize(b,1); + solfab.template setVal(0._rt); + } + rhsfab = FArrayBox((*a_rhs[0])[index], amrex::make_alias, 0, 1); + } else { + solfab = FArrayBox(solg[index-nboxes0], amrex::make_alias, 0, 1); + rhsfab = FArrayBox(rhsg[index-nboxes0], amrex::make_alias, 0, 1); + } + solfab.shift(offset); + rhsfab.shift(offset); + sol_all.setFab(index, std::move(solfab)); + rhs_all.setFab(index, std::move(rhsfab)); + } + + BL_PROFILE_VAR("OpenBCSolver::MG2", blp_mg2); + + if (m_poisson_2 == nullptr) { + Vector geom_all = m_geom; + Vector grids_all = m_grids; + Vector dmap_all = m_dmap; + geom_all[0] = m_geom_all; + grids_all[0] = m_ba_all; + dmap_all[0] = m_dm_all; + m_poisson_2 = std::make_unique(geom_all, grids_all, dmap_all, + m_info); + m_poisson_2->setVerbose(m_verbose); + m_poisson_2->setMaxOrder(4); + m_poisson_2->setDomainBC({AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}, + {AMREX_D_DECL(LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet, + LinOpBCType::Dirichlet)}); + m_poisson_2->setLevelBC(0, &sol_all); + for (int ilev = 1; ilev < nlevels; ++ilev) { + m_poisson_2->setLevelBC(ilev, nullptr); + } + + m_mlmg_2 = std::make_unique(*m_poisson_2); + m_mlmg_2->setVerbose(m_verbose); + m_mlmg_2->setBottomVerbose(m_bottom_verbose); + m_mlmg_2->setBottomSolver(m_bottom_solver_type); +#ifdef AMREX_USE_HYPRE + if (m_bottom_solver_type == BottomSolver::hypre) { + m_mlmg_2->setHypreInterface(Hypre::Interface::structed); + } +#endif + } + Vector solv_all = a_sol; + Vector rhsv_all = a_rhs; + solv_all[0] = &sol_all; + rhsv_all[0] = &rhs_all; + Real err = m_mlmg_2->solve(solv_all, rhsv_all, a_tol_rel, a_tol_abs); + + BL_PROFILE_VAR_STOP(blp_mg2); + + if (sol_all.nGrowVect() != a_sol[0]->nGrowVect()) { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*a_sol[0], TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + Box const& bx = mfi.tilebox(); + Array4 const& sall = sol_all.const_array(mfi.index()); + Array4 const& s = a_sol[0]->array(mfi); + AMREX_HOST_DEVICE_PARALLEL_FOR_3D(bx, i, j, k, + { + s(i,j,k) = sall(i,j,k); + }); + } + } + + auto solve_stop_time = amrex::second(); + if (m_verbose >= 1) { + amrex::Print() << "OpenBCSolver time = " + << solve_stop_time - solve_start_time << "\n"; + } + + return err; +} + +void OpenBCSolver::compute_moments (Gpu::DeviceVector& moments) +{ + BL_PROFILE("OpenBCSolver::comp_mom()"); + + auto const problo = m_geom[0].ProbLoArray(); + auto const probhi = m_geom[0].ProbHiArray(); + auto const dx = m_geom[0].CellSizeArray(); + +#ifdef AMREX_USE_GPU + if (m_momtags_h.size() > 0) + { + int crse_ratio = m_coarsen_ratio; + int ntags = m_momtags_h.size(); + openbc::Moments* pm = moments.data(); + openbc::MomTag const* ptag = m_momtags_d.data(); + int const* pnblks = m_ngpublocks_d.data(); + std::size_t shared_mem_bytes = m_nthreads_momtag * sizeof(openbc::Moments::array_type); + +#ifdef AMREX_USE_DPCPP + amrex::ignore_unused(problo,probhi,dx,crse_ratio,ntags,pm,ptag,pnblks, + shared_mem_bytes); + amrex::Abort("xxxx DPCPP todo: openbc compute_moments"); +#else + amrex::launch(m_ngpublocks_h.back(), m_nthreads_momtag, shared_mem_bytes, Gpu::gpuStream(), + [=] AMREX_GPU_DEVICE () noexcept + { + Gpu::SharedMemory gsm; + openbc::Moments::array_type* const shared = gsm.dataPtr(); + openbc::Moments::array_type& tmom = shared[threadIdx.x]; + for (int i = 0; i < (openbc::M+1)*(openbc::M+2)/2; ++i) { + tmom[i] = Real(0.); + } + + int tag_id = amrex::bisect(pnblks, 0, ntags, static_cast(blockIdx.x)); + int iblock = blockIdx.x - pnblks[tag_id]; // iblock'th gpublock on this box. + auto const& tag = ptag[tag_id]; + openbc::Moments& mom = pm[tag.offset+iblock]; + if (tag.face.coordDir() == 0) { + int const nby = tag.b2d.length(1) / crse_ratio; + int const kb = iblock / nby; + int const jb = iblock - kb*nby; + int const i = tag.b2d.smallEnd(0); + int const jlo = tag.b2d.smallEnd(1) + jb*crse_ratio; + int const klo = tag.b2d.smallEnd(2) + kb*crse_ratio; + Real const fac = dx[1]*dx[2]; + Real const xc = tag.face.isLow() ? problo[0] : probhi[0]; + for (int icell = threadIdx.x; icell < crse_ratio*crse_ratio; icell += blockDim.x) { + int k = icell/crse_ratio; + int j = icell - k*crse_ratio; + Real const yy = (j-crse_ratio/2+Real(0.5))*dx[1]; + Real const zz = (k-crse_ratio/2+Real(0.5))*dx[2]; + j += jlo; + k += klo; + Real const charge = tag.gp(i,j,k) * fac; + Real zpow = Real(1.); + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real ypow = Real(1.); + for (int p = 0; p <= openbc::M-q; ++p) { + tmom[m++] += charge*ypow*zpow; + ypow *= yy; + } + zpow *= zz; + } + } + if (threadIdx.x == 0) { + mom.x = xc; + mom.y = problo[1] + dx[1]*(jlo + crse_ratio/2); + mom.z = problo[2] + dx[2]*(klo + crse_ratio/2); + mom.face = tag.face; + } + } else if (tag.face.coordDir() == 1) { + int const nbx = tag.b2d.length(0) / crse_ratio; + int const kb = iblock / nbx; + int const ib = iblock - kb*nbx; + int const j = tag.b2d.smallEnd(1); + int const ilo = tag.b2d.smallEnd(0) + ib*crse_ratio; + int const klo = tag.b2d.smallEnd(2) + kb*crse_ratio; + Real const fac = dx[0]*dx[2]; + Real const yc = tag.face.isLow() ? problo[1] : probhi[1]; + for (int icell = threadIdx.x; icell < crse_ratio*crse_ratio; icell += blockDim.x) { + int k = icell/crse_ratio; + int i = icell - k*crse_ratio; + Real const xx = (i-crse_ratio/2+Real(0.5))*dx[0]; + Real const zz = (k-crse_ratio/2+Real(0.5))*dx[2]; + i += ilo; + k += klo; + Real const charge = tag.gp(i,j,k) * fac; + Real zpow = Real(1.); + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real xpow = Real(1.); + for (int p = 0; p <= openbc::M-q; ++p) { + tmom[m++] += charge*xpow*zpow; + xpow *= xx; + } + zpow *= zz; + } + } + if (threadIdx.x == 0) { + mom.x = problo[0] + dx[0]*(ilo + crse_ratio/2); + mom.y = yc; + mom.z = problo[2] + dx[2]*(klo + crse_ratio/2); + mom.face = tag.face; + } + } else { + int const nbx = tag.b2d.length(0) / crse_ratio; + int const jb = iblock / nbx; + int const ib = iblock - jb*nbx; + int const k = tag.b2d.smallEnd(2); + int const ilo = tag.b2d.smallEnd(0) + ib*crse_ratio; + int const jlo = tag.b2d.smallEnd(1) + jb*crse_ratio; + Real const fac = dx[0]*dx[1]; + Real const zc = tag.face.isLow() ? problo[2] : probhi[2]; + for (int icell = threadIdx.x; icell < crse_ratio*crse_ratio; icell += blockDim.x) { + int j = icell/crse_ratio; + int i = icell - j*crse_ratio; + Real const xx = (i-crse_ratio/2+Real(0.5))*dx[0]; + Real const yy = (j-crse_ratio/2+Real(0.5))*dx[1]; + i += ilo; + j += jlo; + Real const charge = tag.gp(i,j,k) * fac; + Real ypow = Real(1.); + int m = 0; + for (int q=0; q <= openbc::M; ++q) { + Real xpow = Real(1.); + for (int p = 0; p <= openbc::M-q; ++p) { + tmom[m++] += charge*xpow*ypow; + xpow *= xx; + } + ypow *= yy; + } + } + if (threadIdx.x == 0) { + mom.x = problo[0] + dx[0]*(ilo + crse_ratio/2); + mom.y = problo[1] + dx[1]*(jlo + crse_ratio/2); + mom.z = zc; + mom.face = tag.face; + } + } + openbc::scale_moments(tmom); + + __syncthreads(); + + if (threadIdx.x < (openbc::M+1)*(openbc::M+2)/2) { + mom.mom[threadIdx.x] = Real(0.); + for (unsigned int i = 0; i < blockDim.x; ++i) { + mom.mom[threadIdx.x] += shared[i][threadIdx.x]; + } + } + }); +#endif + } +#else + for (auto const& tag : m_momtags_h) { + if (tag.face.coordDir() == 0) { + int nby = tag.b2d.length(1) / m_coarsen_ratio; + int nbz = tag.b2d.length(2) / m_coarsen_ratio; + int i = tag.b2d.smallEnd(0); + int jlo = tag.b2d.smallEnd(1); + int klo = tag.b2d.smallEnd(2); + Real fac = dx[1]*dx[2]; + Real xc = tag.face.isLow() ? problo[0] : probhi[0]; + for (int kb = 0; kb < nbz; ++kb) { + for (int jb = 0; jb < nby; ++jb) { + openbc::Moments& mom = moments[tag.offset+jb+kb*nby]; + for (auto& m : mom.mom) { + m = 0._rt; + } + for (int kk = 0; kk < m_coarsen_ratio; ++kk) { + for (int jj = 0; jj < m_coarsen_ratio; ++jj) { + Real charge = tag.gp(i, jlo+jb*m_coarsen_ratio+jj, + klo+kb*m_coarsen_ratio+kk) * fac; + Real yy = (jj-m_coarsen_ratio/2+0.5_rt)*dx[1]; + Real zz = (kk-m_coarsen_ratio/2+0.5_rt)*dx[2]; + Real zpow = 1._rt; + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real ypow = 1._rt; + for (int p = 0; p <= openbc::M-q; ++p) { + mom.mom[m++] += charge*ypow*zpow; + ypow *= yy; + } + zpow *= zz; + } + }} + openbc::scale_moments(mom.mom); + // center of the block + mom.x = xc; + mom.y = problo[1] + dx[1]*(tag.b2d.smallEnd(1) + + jb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.z = problo[2] + dx[2]*(tag.b2d.smallEnd(2) + + kb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.face = tag.face; + }} + } else if (tag.face.coordDir() == 1) { + int nbx = tag.b2d.length(0) / m_coarsen_ratio; + int nbz = tag.b2d.length(2) / m_coarsen_ratio; + int j = tag.b2d.smallEnd(1); + int ilo = tag.b2d.smallEnd(0); + int klo = tag.b2d.smallEnd(2); + Real fac = dx[0]*dx[2]; + Real yc = tag.face.isLow() ? problo[1] : probhi[1]; + for (int kb = 0; kb < nbz; ++kb) { + for (int ib = 0; ib < nbx; ++ib) { + openbc::Moments& mom = moments[tag.offset+ib+kb*nbx]; + for (auto& m : mom.mom) { + m = 0._rt; + } + for (int kk = 0; kk < m_coarsen_ratio; ++kk) { + for (int ii = 0; ii < m_coarsen_ratio; ++ii) { + Real charge = tag.gp(ilo+ib*m_coarsen_ratio+ii, j, + klo+kb*m_coarsen_ratio+kk) * fac; + Real xx = (ii-m_coarsen_ratio/2+0.5_rt)*dx[0]; + Real zz = (kk-m_coarsen_ratio/2+0.5_rt)*dx[2]; + Real zpow = 1._rt; + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real xpow = 1._rt; + for (int p = 0; p <= openbc::M-q; ++p) { + mom.mom[m++] += charge*xpow*zpow; + xpow *= xx; + } + zpow *= zz; + } + }} + openbc::scale_moments(mom.mom); + mom.x = problo[0] + dx[0]*(tag.b2d.smallEnd(0) + + ib*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.y = yc; + mom.z = problo[2] + dx[2]*(tag.b2d.smallEnd(2) + + kb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.face = tag.face; + }} + } else { + int nbx = tag.b2d.length(0) / m_coarsen_ratio; + int nby = tag.b2d.length(1) / m_coarsen_ratio; + int k = tag.b2d.smallEnd(2); + int ilo = tag.b2d.smallEnd(0); + int jlo = tag.b2d.smallEnd(1); + Real fac = dx[0]*dx[1]; + Real zc = tag.face.isLow() ? problo[2] : probhi[2]; + for (int jb = 0; jb < nby; ++jb) { + for (int ib = 0; ib < nbx; ++ib) { + openbc::Moments& mom = moments[tag.offset+ib+jb*nbx]; + for (auto& m : mom.mom) { + m = 0._rt; + } + for (int jj = 0; jj < m_coarsen_ratio; ++jj) { + for (int ii = 0; ii < m_coarsen_ratio; ++ii) { + Real charge = tag.gp(ilo+ib*m_coarsen_ratio+ii, + jlo+jb*m_coarsen_ratio+jj, k) * fac; + Real xx = (ii-m_coarsen_ratio/2+0.5_rt)*dx[0]; + Real yy = (jj-m_coarsen_ratio/2+0.5_rt)*dx[1]; + Real ypow = 1._rt; + int m = 0; + for (int q = 0; q <= openbc::M; ++q) { + Real xpow = 1._rt; + for (int p = 0; p <= openbc::M-q; ++p) { + mom.mom[m++] += charge*xpow*ypow; + xpow *= xx; + } + ypow *= yy; + } + }} + openbc::scale_moments(mom.mom); + mom.x = problo[0] + dx[0]*(tag.b2d.smallEnd(0) + + ib*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.y = problo[1] + dx[1]*(tag.b2d.smallEnd(1) + + jb*m_coarsen_ratio + + m_coarsen_ratio/2); + mom.z = zc; + mom.face = tag.face; + }} + } + } +#endif + +#ifdef AMREX_USE_MPI + bcast_moments(moments); +#endif + m_nblocks = moments.size(); +} + +#ifdef AMREX_USE_MPI +void OpenBCSolver::bcast_moments (Gpu::DeviceVector& moments) +{ + if (ParallelContext::NProcsSub() > 1) + { + MPI_Comm comm = ParallelContext::CommunicatorSub(); + if (m_nblocks == 0) { + int count = moments.size(); + count *= static_cast(sizeof(openbc::Moments)); + m_countvec.resize(ParallelContext::NProcsSub()); + MPI_Allgather(&count, 1, MPI_INT, m_countvec.data(), 1, MPI_INT, comm); + + m_offset.resize(m_countvec.size(), 0); + Long count_tot = m_countvec[0]; + for (int i = 1, N = m_offset.size(); i < N; ++i) { + m_offset[i] = m_offset[i-1] + m_countvec[i-1]; + count_tot += m_countvec[i]; + } + + if (count_tot > static_cast(std::numeric_limits::max())) { + amrex::Abort("OpenBC: integer overflow. Let us know and we will fix this."); + } + + m_nblocks = count_tot/sizeof(openbc::Moments); + } + + Gpu::DeviceVector moments_all(m_nblocks); + +#ifdef AMREX_USE_GPU + Gpu::PinnedVector h_moments(moments.size()); + Gpu::PinnedVector h_moments_all(moments_all.size()); + Gpu::copyAsync(Gpu::deviceToHost, moments.begin(), moments.end(), + h_moments.begin()); + Gpu::streamSynchronize(); +#else + auto const& h_moments = moments; + auto& h_moments_all = moments_all; +#endif + + int count = m_nblocks_local*static_cast(sizeof(openbc::Moments)); + MPI_Allgatherv(h_moments.data(), count, MPI_CHAR, h_moments_all.data(), + m_countvec.data(), m_offset.data(), MPI_CHAR, comm); + +#ifdef AMREX_USE_GPU + Gpu::copyAsync(Gpu::hostToDevice, h_moments_all.begin(), h_moments_all.end(), + moments_all.begin()); + Gpu::streamSynchronize(); +#endif + + std::swap(moments, moments_all); + } +} +#endif + +void OpenBCSolver::compute_potential (Gpu::DeviceVector const& moments) +{ + BL_PROFILE("OpenBCSolver::comp_phi()"); + + auto const problo = m_geom[0].ProbLoArray(); + auto const dx = m_geom[0].CellSizeArray(); + + int crse_ratio = m_coarsen_ratio; + int nblocks = m_nblocks; + openbc::Moments const* pmom = moments.data(); + for (MFIter mfi(m_crse_grown_faces_phi); mfi.isValid(); ++mfi) { + Box const& b = mfi.validbox(); + Array4 const& phi_arr = m_crse_grown_faces_phi.array(mfi); +#if defined(AMREX_USE_GPU) + const auto lo = amrex::lbound(b); + const auto len = amrex::length(b); + const auto lenxy = len.x*len.y; + const auto lenx = len.x; +#ifdef AMREX_USE_DPCPP + amrex::ignore_unused(problo,dx,crse_ratio,nblocks,pmom,b,phi_arr,lo, + lenxy,lenx); + amrex::Abort("xxxxx DPCPP todo: openbc compute_potential"); +#else + amrex::launch(b.numPts(), AMREX_GPU_MAX_THREADS, Gpu::gpuStream(), + [=] AMREX_GPU_DEVICE () noexcept + { + int icell = blockIdx.x; + int k = icell / lenxy; + int j = (icell - k*lenxy) / lenx; + int i = (icell - k*lenxy) - j*lenx; + i += lo.x; + j += lo.y; + k += lo.z; + Real xb = problo[0] + i*crse_ratio*dx[0]; + Real yb = problo[1] + j*crse_ratio*dx[1]; + Real zb = problo[2] + k*crse_ratio*dx[2]; + Real phi = Real(0.); + for (int iblock = threadIdx.x; iblock < nblocks; iblock += blockDim.x) { + phi += openbc::block_potential(pmom[iblock], xb, yb, zb); + } + Real phitot = Gpu::blockReduceSum(phi); + if (threadIdx.x == 0) { + phi_arr(i,j,k) = phitot; + } + }); +#endif +#else + amrex::LoopOnCpu(b, [&] (int i, int j, int k) noexcept + { + Real xb = problo[0] + i*crse_ratio*dx[0]; + Real yb = problo[1] + j*crse_ratio*dx[1]; + Real zb = problo[2] + k*crse_ratio*dx[2]; + Real phi = 0._rt; + for (int iblock = 0; iblock < nblocks; ++iblock) { + phi += openbc::block_potential(pmom[iblock], xb, yb, zb); + } + phi_arr(i,j,k) = phi; + }); +#endif + } + + m_phind.ParallelCopy(m_crse_grown_faces_phi, 0, 0, 1, IntVect(0), + m_phind.nGrowVect()); +} + +void OpenBCSolver::interpolate_potential (MultiFab& solg) +{ + BL_PROFILE("OpenBCSolver::interp_phi"); + + Box const domain1 = amrex::grow(m_geom[0].Domain(), m_ngrowdomain); + int crse_ratio = m_coarsen_ratio; + + for (MFIter mfi(solg); mfi.isValid(); ++mfi) { + Box const& vbx = mfi.validbox(); + for (OrientationIter oit; oit.isValid(); ++oit) { + Orientation face = oit(); + if (vbx[face] == domain1[face]) { + Array4 const& solg_arr = solg.array(mfi); + Array4 const& phi_arr = m_phind.const_array(mfi); + Box const& b2d = amrex::bdryNode(vbx, face); + int offset = face.isLow() ? -1 : 0; + if (face.coordDir() == 0) { + Box b = amrex::coarsen(b2d,IntVect(crse_ratio,crse_ratio,1)); + b.grow(1,openbc::P).surroundingNodes(1); + FArrayBox tmpfab(b,1,The_Async_Arena()); + Array4 const& tmp = tmpfab.array(); + Array4 const& ctmp = tmpfab.const_array(); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int jc, int k) noexcept + { + tmp(ic,jc,k) = openbc::interpccz(ic,jc,k,phi_arr,crse_ratio); + }); + b = amrex::coarsen(b2d,IntVect(crse_ratio,1,1)); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int j, int k) noexcept + { + int i = ic*crse_ratio+offset; + solg_arr(i,j,k) = openbc::interpccy(ic,j,k,ctmp,crse_ratio); + }); + } else if (face.coordDir() == 1) { + Box b = amrex::coarsen(b2d,IntVect(crse_ratio,crse_ratio,1)); + b.grow(0,openbc::P).surroundingNodes(0); + FArrayBox tmpfab(b,1,The_Async_Arena()); + Array4 const& tmp = tmpfab.array(); + Array4 const& ctmp = tmpfab.const_array(); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int jc, int k) noexcept + { + tmp(ic,jc,k) = openbc::interpccz(ic,jc,k,phi_arr,crse_ratio); + }); + b = amrex::coarsen(b2d,IntVect(1,crse_ratio,1)); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int i, int jc, int k) noexcept + { + int j = jc*crse_ratio+offset; + solg_arr(i,j,k) = openbc::interpccx(i,jc,k,ctmp,crse_ratio); + }); + } else { + Box b = amrex::coarsen(b2d,IntVect(crse_ratio,1,crse_ratio)); + b.grow(0,openbc::P).surroundingNodes(0); + FArrayBox tmpfab(b,1,The_Async_Arena()); + Array4 const& tmp = tmpfab.array(); + Array4 const& ctmp = tmpfab.const_array(); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int ic, int j, int kc) noexcept + { + tmp(ic,j,kc) = openbc::interpccy(ic,j,kc,phi_arr,crse_ratio); + }); + b = amrex::coarsen(b2d,IntVect(1,1,crse_ratio)); + amrex::ParallelFor(b, + [=] AMREX_GPU_DEVICE (int i, int j, int kc) noexcept + { + int k = kc*crse_ratio+offset; + solg_arr(i,j,k) = openbc::interpccx(i,j,kc,ctmp,crse_ratio); + }); + } + } + } + } +} + +namespace openbc { +std::ostream& operator<< (std::ostream& os, Moments const& mom) +{ + os << "Face " << mom.face << ", x = " << mom.x << ", y = " << mom.y + << ", z = " << mom.z << "\n" + << " " << mom.mom[0] << "\n" + << " " << mom.mom[1] << ", " << mom.mom[8] << "\n" + << " " << mom.mom[2] << ", " << mom.mom[9] << ", " << mom.mom[15] << "\n" + << " " << mom.mom[3] << ", " << mom.mom[10] << ", " << mom.mom[16] + << ", " << mom.mom[21] << "\n" + << " " << mom.mom[4] << ", " << mom.mom[11] << ", " << mom.mom[17] + << ", " << mom.mom[22] << ", " << mom.mom[26] << "\n" + << " " << mom.mom[5] << ", " << mom.mom[12] << ", " << mom.mom[18] + << ", " << mom.mom[23] << ", " << mom.mom[27] << ", " << mom.mom[30] << "\n" + << " " << mom.mom[6] << ", " << mom.mom[13] << ", " << mom.mom[19] + << ", " << mom.mom[24] << ", " << mom.mom[28] << ", " << mom.mom[31] + << ", " << mom.mom[33] << "\n" + << " " << mom.mom[7] << ", " << mom.mom[14] << ", " << mom.mom[20] + << ", " << mom.mom[25] << ", " << mom.mom[29] << ", " << mom.mom[32] + << ", " << mom.mom[34] << ", " << mom.mom[35] << "\n"; + return os; +} +} + +} diff --git a/Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H b/Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H new file mode 100644 index 00000000000..7a6b2643b68 --- /dev/null +++ b/Src/LinearSolvers/OpenBC/AMReX_OpenBC_K.H @@ -0,0 +1,166 @@ +#ifndef AMREX_OPENBC_K_H_ +#define AMREX_OPENBC_K_H_ + +#include +#include + +namespace amrex { namespace openbc { + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void scale_moments (openbc::Moments::array_type& mom) +{ // p!*q! in the order of 0!*0!, 1!*0!, ..., 7!*0!, 0!*1!, 1!*1!, 2!*1!, ..., 6!*1!, 0!*2!, ..., 0!*7!. + mom[ 2] *= Real(0.5); + mom[ 3] *= Real(1./6.); + mom[ 4] *= Real(1./24.); + mom[ 5] *= Real(1./120.); + mom[ 6] *= Real(1./720.); + mom[ 7] *= Real(1./5040.); + mom[10] *= Real(0.5); + mom[11] *= Real(1./6.); + mom[12] *= Real(1./24.); + mom[13] *= Real(1./120.); + mom[14] *= Real(1./720.); + mom[15] *= Real(0.5); + mom[16] *= Real(0.5); + mom[17] *= Real(0.25); + mom[18] *= Real(1./12.); + mom[19] *= Real(1./48.); + mom[20] *= Real(1./240.); + mom[21] *= Real(1./6.); + mom[22] *= Real(1./6.); + mom[23] *= Real(1./12.); + mom[24] *= Real(1./36.); + mom[25] *= Real(1./144.); + mom[26] *= Real(1./24.); + mom[27] *= Real(1./24.); + mom[28] *= Real(1./48.); + mom[29] *= Real(1./144.); + mom[30] *= Real(1./120.); + mom[31] *= Real(1./120.); + mom[32] *= Real(1./240.); + mom[33] *= Real(1./720.); + mom[34] *= Real(1./720.); + mom[35] *= Real(1./5040.); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real block_potential (openbc::Moments const& mom, Real xb, Real yb, Real zb) +{ + constexpr Real oneover4pi = Real(1.)/Real(4.*3.1415926535897932); + + xb -= mom.x; + yb -= mom.y; + zb -= mom.z; + Real ri = Real(1.)/std::sqrt(xb*xb+yb*yb+zb*zb); + Real ri2 = ri*ri; + Real ri3 = ri2*ri; + Real ri4 = ri3*ri; + Real xr, yr; + if (mom.face.coordDir() == 0) { + xr = yb*ri; + yr = zb*ri; + } else if (mom.face.coordDir() == 1) { + xr = xb*ri; + yr = zb*ri; + } else { + xr = xb*ri; + yr = yb*ri; + } + Real xr2 = xr *xr; + Real xr4 = xr2*xr2; + Real xr6 = xr4*xr2; + Real yr2 = yr *yr; + Real yr4 = yr2*yr2; + Real yr6 = yr4*yr2; + Real phi = ri * mom.mom[0] + + ri2*(xr*mom.mom[1] + yr*mom.mom[8]) + + ri3*((Real(3.) * xr2 - Real(1.)) * mom.mom[2] + + (Real(3.) * xr * yr ) * mom.mom[9] + + (Real(3.) * yr2 - Real(1.)) * mom.mom[15]) + + ri4 * (xr * (Real(15.) * xr2 - Real(9.)) * mom.mom[3] + + yr * (Real(15.) * xr2 - Real(3.)) * mom.mom[10] + + xr * (Real(15.) * yr2 - Real(3.)) * mom.mom[16] + + yr * (Real(15.) * yr2 - Real(9.)) * mom.mom[21]) + + ri4*ri * ((Real(105.) * xr4 - Real(90.) * xr2 + Real(9.)) * mom.mom[4] + + (xr * yr * (Real(105.) * xr2 - Real(45.))) * mom.mom[11] + + (Real(105.) * xr2 * yr2 - Real(15.) * xr2 - Real(15.) * yr2 + Real(3.)) * mom.mom[17] + + (xr * yr * (Real(105.) * yr2 - Real(45.))) * mom.mom[22] + + (Real(105.) * yr4 - Real(90.) * yr2 + Real(9.)) * mom.mom[26]) + + ri4*ri2 * (xr * (Real(945.)*xr4 - Real(1050.)*xr2 + Real(225.)) * mom.mom[5] + + yr * (Real(945.)*xr4 - Real(630.)*xr2 + Real(45.)) * mom.mom[12] + + xr * (Real(945.)*xr2*yr2 - Real(105.)*xr2 - Real(315.)*yr2 + Real(45.)) * mom.mom[18] + + yr * (Real(945.)*xr2*yr2 - Real(315.)*xr2 - Real(105.)*yr2 + Real(45.)) * mom.mom[23] + + xr * (Real(945.)*yr4 - Real(630.)*yr2 + Real(45.)) * mom.mom[27] + + yr * (Real(945.)*yr4 - Real(1050.)*yr2 + Real(225.)) * mom.mom[30]) + + ri4*ri3 * (Real(45.) * (Real(231.)*xr6 - Real(315.)*xr4 + Real(105.)*xr2 - Real(5.)) * mom.mom[6] + + Real(315.)*xr*yr * (Real(33.)*xr4 - Real(30.)*xr2 + Real(5.)) * mom.mom[13] + + Real(45.) * (Real(231.)*xr4*yr2 - Real(21.)*xr4 - Real(126.)*xr2*yr2 + Real(14.)*xr2 + Real(7.)*yr2 - Real(1.)) * mom.mom[19] + + Real(945.)*xr*yr * (Real(11.)*xr2*yr2 - Real(3.)*xr2 - Real(3.)*yr2 + Real(1.)) * mom.mom[24] + + Real(45.) * (Real(231.)*xr2*yr4 - Real(126.)*xr2*yr2 + Real(7.)*xr2 - Real(21.)*yr4 + Real(14.)*yr2 - Real(1.)) * mom.mom[28] + + Real(315.)*xr*yr * (Real(33.)*yr4 - Real(30.)*yr2 + Real(5.)) * mom.mom[31] + + Real(45.) * (Real(231.)*yr6 - Real(315.)*yr4 + Real(105.)*yr2 - Real(5.)) * mom.mom[33]) + + ri4*ri4*(Real(315.)*xr*(Real(429.)*xr6 - Real(693.)*xr4 + Real(315.)*xr2 - Real(35.)) * mom.mom[7] + + Real(315.)*yr*(Real(429.)*xr6 - Real(495.)*xr4 + Real(135.)*xr2 - Real(5.)) * mom.mom[14] + + Real(315.)*xr*(Real(429.)*xr4*yr2 - Real(33.)*xr4 - Real(330.)*xr2*yr2 + Real(30.)*xr2 + Real(45.)*yr2 - Real(5.)) * mom.mom[20] + + Real(945.)*yr*(Real(143.)*xr4*yr2 - Real(33.)*xr4 - Real(66.)*xr2*yr2 + Real(18.)*xr2 + Real(3.)*yr2 - Real(1.)) * mom.mom[25] + + Real(945.)*xr*(Real(143.)*xr2*yr4 - Real(66.)*xr2*yr2 + Real(3.)*xr2 - Real(33.)*yr4 + Real(18.)*yr2 - Real(1.)) * mom.mom[29] + + Real(315.)*yr*(Real(429.)*xr2*yr4 - Real(330.)*xr2*yr2 + Real(45.)*xr2 - Real(33.)*yr4 + Real(30.)*yr2 - Real(5.)) * mom.mom[32] + + Real(315.)*xr*(Real(429.)*yr6 - Real(495.)*yr4 + Real(135.)*yr2 - Real(5.)) * mom.mom[34] + + Real(315.)*yr*(Real(429.)*yr6 - Real(693.)*yr4 + Real(315.)*yr2 - Real(35.)) * mom.mom[35]); + return phi*(-oneover4pi); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void interp_coef (int i, int ii, Real* AMREX_RESTRICT c, int crse_ratio) +{ + static_assert(openbc::P == 3, "openbc::P is assumed to be 3 here"); + Real xint = (ii-i*crse_ratio + Real(0.5))/static_cast(crse_ratio); + constexpr Real x[] = {-3._rt, -2._rt, -1._rt, 0._rt, 1._rt, 2._rt, 3._rt, 4._rt}; + poly_interp_coeff<8>(xint, x, c); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real interpccx (int ii, int j, int k, Array4 const& phi, int crse_ratio) +{ + int i = amrex::coarsen(ii,crse_ratio); + Real c[8]; + interp_coef(i,ii,c,crse_ratio); + + Real p = Real(0.); + for (int n = 0; n < 8; ++n) { + p += c[n] * phi(i-3+n,j,k); + } + return p; +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real interpccy (int i, int jj, int k, Array4 const& phi, int crse_ratio) +{ + int j = amrex::coarsen(jj,crse_ratio); + Real c[8]; + interp_coef(j,jj,c,crse_ratio); + + Real p = Real(0.); + for (int n = 0; n < 8; ++n) { + p += c[n] * phi(i,j-3+n,k); + } + return p; +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real interpccz (int i, int j, int kk, Array4 const& phi, int crse_ratio) +{ + int k = amrex::coarsen(kk,crse_ratio); + Real c[8]; + interp_coef(k,kk,c,crse_ratio); + + Real p = Real(0.); + for (int n = 0; n < 8; ++n) { + p += c[n] * phi(i,j,k-3+n); + } + return p; +} + +}} + +#endif diff --git a/Src/LinearSolvers/OpenBC/Make.package b/Src/LinearSolvers/OpenBC/Make.package new file mode 100644 index 00000000000..5fc39f69371 --- /dev/null +++ b/Src/LinearSolvers/OpenBC/Make.package @@ -0,0 +1,6 @@ + +CEXE_headers += AMReX_OpenBC.H AMReX_OpenBC_K.H +CEXE_sources += AMReX_OpenBC.cpp + +VPATH_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/OpenBC +INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/LinearSolvers/OpenBC diff --git a/Src/Particle/AMReX_DenseBins.H b/Src/Particle/AMReX_DenseBins.H index 93c9415ad25..0f1e94bb176 100644 --- a/Src/Particle/AMReX_DenseBins.H +++ b/Src/Particle/AMReX_DenseBins.H @@ -200,6 +200,7 @@ public: m_bins.resize(nitems); m_perm.resize(nitems); + m_local_offsets.resize(nitems); m_counts.resize(0); m_counts.resize(nbins+1, 0); @@ -209,21 +210,21 @@ public: index_type* pbins = m_bins.dataPtr(); index_type* pcount = m_counts.dataPtr(); + index_type* plocal_offsets = m_local_offsets.dataPtr(); amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (int i) noexcept { pbins[i] = f(v[i]); - Gpu::Atomic::AddNoRet(&pcount[pbins[i]], index_type{ 1 }); + index_type off = Gpu::Atomic::Add(&pcount[pbins[i]], index_type{ 1 }); + plocal_offsets[i] = off; }); Gpu::exclusive_scan(m_counts.begin(), m_counts.end(), m_offsets.begin()); - Gpu::copyAsync(Gpu::deviceToDevice, m_offsets.begin(), m_offsets.end(), m_counts.begin()); - index_type* pperm = m_perm.dataPtr(); - constexpr index_type max_index = std::numeric_limits::max(); + index_type* poffsets = m_offsets.dataPtr(); amrex::ParallelFor(nitems, [=] AMREX_GPU_DEVICE (int i) noexcept { - index_type index = Gpu::Atomic::Inc(&pcount[pbins[i]], max_index); + index_type index = poffsets[pbins[i]] + plocal_offsets[i]; pperm[index] = i; }); @@ -503,6 +504,7 @@ private: Gpu::DeviceVector m_bins; Gpu::DeviceVector m_counts; + Gpu::DeviceVector m_local_offsets; Gpu::DeviceVector m_offsets; Gpu::DeviceVector m_perm; }; diff --git a/Src/Particle/AMReX_NeighborParticles.H b/Src/Particle/AMReX_NeighborParticles.H index 36d2c5351d7..344d39f778e 100644 --- a/Src/Particle/AMReX_NeighborParticles.H +++ b/Src/Particle/AMReX_NeighborParticles.H @@ -348,6 +348,11 @@ protected: /// void BuildMasks (); + /// + /// Are the masks computed by the above function still valid? + /// + bool areMasksValid (); + void GetNeighborCommTags (); void GetCommTagsBox (Vector& tags, const int lev, const Box& in_box); diff --git a/Src/Particle/AMReX_NeighborParticlesCPUImpl.H b/Src/Particle/AMReX_NeighborParticlesCPUImpl.H index d5fb9fc40ee..4d5ecb4fcc8 100644 --- a/Src/Particle/AMReX_NeighborParticlesCPUImpl.H +++ b/Src/Particle/AMReX_NeighborParticlesCPUImpl.H @@ -7,8 +7,10 @@ void NeighborParticleContainer ::fillNeighborsCPU () { BL_PROFILE("NeighborParticleContainer::fillNeighborsCPU"); - BuildMasks(); - GetNeighborCommTags(); + if (!areMasksValid()) { + BuildMasks(); + GetNeighborCommTags(); + } cacheNeighborInfo(); updateNeighborsCPU(false); } diff --git a/Src/Particle/AMReX_NeighborParticlesGPUImpl.H b/Src/Particle/AMReX_NeighborParticlesGPUImpl.H index 6e112318757..81bef1302e2 100644 --- a/Src/Particle/AMReX_NeighborParticlesGPUImpl.H +++ b/Src/Particle/AMReX_NeighborParticlesGPUImpl.H @@ -121,7 +121,7 @@ buildNeighborCopyOp (bool use_boundary_neighbor) { BL_PROFILE("NeighborParticleContainer::buildNeighborCopyOp()"); - AMREX_ASSERT(hasNeighbors() == false); + AMREX_ASSERT(!hasNeighbors() || use_boundary_neighbor); const int lev = 0; const auto& geom = this->Geom(lev); diff --git a/Src/Particle/AMReX_NeighborParticlesI.H b/Src/Particle/AMReX_NeighborParticlesI.H index a07cfab92c4..202f41f87f3 100644 --- a/Src/Particle/AMReX_NeighborParticlesI.H +++ b/Src/Particle/AMReX_NeighborParticlesI.H @@ -119,6 +119,30 @@ NeighborParticleContainer this->Redistribute(); } +template +bool +NeighborParticleContainer +::areMasksValid () { + + BL_PROFILE("NeighborParticleContainer::areMasksValid"); + + resizeContainers(this->numLevels()); + + for (int lev = 0; lev < this->numLevels(); ++lev) + { + BoxArray ba = this->ParticleBoxArray(lev); + const DistributionMapping& dmap = this->ParticleDistributionMap(lev); + + if (mask_ptr[lev] == nullptr || + ! BoxArray::SameRefs(mask_ptr[lev]->boxArray(), ba) || + ! DistributionMapping::SameRefs(mask_ptr[lev]->DistributionMap(), dmap)) + { + return false; + } + } + return true; +} + template void NeighborParticleContainer @@ -136,30 +160,25 @@ NeighborParticleContainer BoxArray ba = this->ParticleBoxArray(lev); const DistributionMapping& dmap = this->ParticleDistributionMap(lev); - if (mask_ptr[lev] == nullptr || - ! BoxArray::SameRefs(mask_ptr[lev]->boxArray(), ba) || - ! DistributionMapping::SameRefs(mask_ptr[lev]->DistributionMap(), dmap)) - { - const Geometry& geom = this->Geom(lev); + const Geometry& geom = this->Geom(lev); - mask_ptr[lev] = std::make_unique(ba, dmap, int(num_mask_comps), m_num_neighbor_cells); - mask_ptr[lev]->setVal(-1, m_num_neighbor_cells); + mask_ptr[lev] = std::make_unique(ba, dmap, int(num_mask_comps), m_num_neighbor_cells); + mask_ptr[lev]->setVal(-1, m_num_neighbor_cells); #ifdef AMREX_USE_OMP #pragma omp parallel #endif - for (MFIter mfi(*mask_ptr[lev],this->do_tiling ? this->tile_size : IntVect::TheZeroVector()); - mfi.isValid(); ++mfi) { - const Box& box = mfi.tilebox(); - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - (*mask_ptr[lev])[mfi].template setVal(grid_id, box, MaskComps::grid, 1); - (*mask_ptr[lev])[mfi].template setVal(tile_id, box, MaskComps::tile, 1); - (*mask_ptr[lev])[mfi].template setVal(lev , box, MaskComps::level, 1); - } - - mask_ptr[lev]->FillBoundary(geom.periodicity()); + for (MFIter mfi(*mask_ptr[lev],this->do_tiling ? this->tile_size : IntVect::TheZeroVector()); + mfi.isValid(); ++mfi) { + const Box& box = mfi.tilebox(); + const int grid_id = mfi.index(); + const int tile_id = mfi.LocalTileIndex(); + (*mask_ptr[lev])[mfi].template setVal(grid_id, box, MaskComps::grid, 1); + (*mask_ptr[lev])[mfi].template setVal(tile_id, box, MaskComps::tile, 1); + (*mask_ptr[lev])[mfi].template setVal(lev , box, MaskComps::level, 1); } + + mask_ptr[lev]->FillBoundary(geom.periodicity()); } } @@ -794,9 +813,21 @@ NeighborParticleContainer:: selectActualNeighbors (CheckPair&& check_pair, int num_cells) { BL_PROFILE("NeighborParticleContainer::selectActualNeighbors"); + const auto& geom_fine = this->Geom(0); + const auto& ba_fine = this->ParticleBoxArray(0); + if (ba_fine.size() == 1 && !geom_fine.isAnyPeriodic()) { + return; + } for (int lev = 0; lev < this->numLevels(); ++lev) { + // clear previous neighbor particle ids + if (!m_boundary_particle_ids.empty()) { + for (auto& keyval: m_boundary_particle_ids[lev]) { + keyval.second.clear(); + } + } + for (MyParIter pti(*this, lev); pti.isValid(); ++pti) { PairIndex index(pti.index(), pti.LocalTileIndex()); @@ -838,8 +869,8 @@ selectActualNeighbors (CheckPair&& check_pair, int num_cells) auto pperm = bins.permutationPtr(); auto poffset = bins.offsetsPtr(); - unsigned int np_boundary = 0; - unsigned int* p_np_boundary = &np_boundary; + Gpu::Buffer np_boundary({0}); + unsigned int* p_np_boundary = np_boundary.data(); constexpr unsigned int max_unsigned_int = std::numeric_limits::max(); AMREX_FOR_1D ( np_real, i, @@ -880,9 +911,9 @@ selectActualNeighbors (CheckPair&& check_pair, int num_cells) } } });// end amrex_for_1d - Gpu::streamSynchronize(); - m_boundary_particle_ids[lev][index].resize(np_boundary); + unsigned int* p_np_boundary_h = np_boundary.copyToHost(); + m_boundary_particle_ids[lev][index].resize(*p_np_boundary_h); }// end mypariter }// end lev diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index 7876379cf08..225f2120e5c 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -205,7 +205,7 @@ struct Particle { static constexpr bool is_soa_particle = false; using StorageParticleType = Particle; - + //! \brief number of extra Real components in the particle struct static constexpr int NReal = T_NReal; diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index c164e7214d3..d604a36c896 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -531,9 +531,9 @@ public: */ bool OK (int lev_min = 0, int lev_max = -1, int nGrow = 0) const; - void ByteSpread () const; + std::array ByteSpread () const; - void PrintCapacity () const; + std::array PrintCapacity () const; void ShrinkToFit (); diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 316fccb277f..f257ff17ddd 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -241,10 +241,11 @@ ParticleContainer_impl const auto& geom = Geom(0); const auto plo = geom.ProbLoArray(); const auto phi = geom.ProbHiArray(); - const auto rhi = geom.RoundoffHiArray(); + const auto rlo = geom.ProbLoArrayInParticleReal(); + const auto rhi = geom.ProbHiArrayInParticleReal(); const auto is_per = geom.isPeriodicArray(); - return enforcePeriodic(p, plo, phi, rhi, is_per); + return enforcePeriodic(p, plo, phi, rlo, rhi, is_per); } template class Allocator> void ParticleContainer_impl::locateParticle (ParticleType& p, ParticleLocData& pld, - int lev_min, int lev_max, int nGrow, int local_grid) const + int lev_min, int lev_max, int nGrow, int local_grid) const { bool outside = AMREX_D_TERM(p.pos(0) < Geom(0).ProbLo(0) || p.pos(0) >= Geom(0).ProbHi(0), @@ -316,20 +317,21 @@ ParticleContainer_impl::locatePa if (! outside) { - if (Geom(0).outsideRoundoffDomain(AMREX_D_DECL(Real(p.pos(0)), Real(p.pos(1)), Real(p.pos(2))))) + if (Geom(0).outsideRoundoffDomain(AMREX_D_DECL(p.pos(0), p.pos(1), p.pos(2)))) { - RealBox roundoff_domain = Geom(0).RoundoffDomain(); + GpuArray rhi = Geom(0).ProbHiArrayInParticleReal(); + GpuArray rlo = Geom(0).ProbLoArrayInParticleReal(); for (int idim=0; idim < AMREX_SPACEDIM; ++idim) { - if (p.pos(idim) <= roundoff_domain.lo(idim)) { - p.pos(idim) = std::nextafter((ParticleReal) roundoff_domain.lo(idim), (ParticleReal) roundoff_domain.hi(idim)); + if (p.pos(idim) <= rlo[idim]) { + p.pos(idim) = std::nextafter(rlo[idim], rhi[idim]); } - if (p.pos(idim) >= roundoff_domain.hi(idim)) { - p.pos(idim) = std::nextafter((ParticleReal) roundoff_domain.hi(idim), (ParticleReal) roundoff_domain.lo(idim)); + if (p.pos(idim) >= rhi[idim]) { + p.pos(idim) = std::nextafter(rhi[idim], rlo[idim]); } } - AMREX_ASSERT(! Geom(0).outsideRoundoffDomain(AMREX_D_DECL(Real(p.pos(0)), Real(p.pos(1)), Real(p.pos(2))))); + AMREX_ASSERT(! Geom(0).outsideRoundoffDomain(AMREX_D_DECL(p.pos(0), p.pos(1), p.pos(2)))); } } @@ -517,8 +519,9 @@ Long ParticleContainer_impl::Num template class Allocator> -void -ParticleContainer_impl::ByteSpread () const +std::array +ParticleContainer_impl +::ByteSpread () const { Long cnt = 0; @@ -533,7 +536,7 @@ ParticleContainer_impl::ByteSpre Long mn = cnt, mx = mn; const int IOProc = ParallelContext::IOProcessorNumberSub(); - const std::size_t sz = sizeof(ParticleType)+NumRealComps()*sizeof(Real)+NumIntComps()*sizeof(int); + const Long sz = sizeof(ParticleType)+NumRealComps()*sizeof(ParticleReal)+NumIntComps()*sizeof(int); #ifdef AMREX_LAZY Lazy::QueueReduction( [=] () mutable { @@ -542,22 +545,27 @@ ParticleContainer_impl::ByteSpre ParallelReduce::Max(mx, IOProc, ParallelContext::CommunicatorSub()); ParallelReduce::Sum(cnt, IOProc, ParallelContext::CommunicatorSub()); - amrex::Print() << "ParticleContainer byte spread across MPI nodes: [" + amrex::Print() << "ParticleContainer spread across MPI nodes - bytes (num particles): [Min: " << mn*sz << " (" << mn << ")" - << " ... " + << ", Max: " << mx*sz << " (" << mx << ")" - << "] total particles: (" << cnt << ")\n"; + << ", Total: " + << cnt*sz + << " (" << cnt << ")]\n"; #ifdef AMREX_LAZY }); #endif + + return {mn*sz, mx*sz, cnt*sz}; } template class Allocator> -void -ParticleContainer_impl::PrintCapacity () const +std::array +ParticleContainer_impl +::PrintCapacity () const { Long cnt = 0; @@ -580,16 +588,18 @@ ParticleContainer_impl::PrintCap ParallelReduce::Max(mx, IOProc, ParallelContext::CommunicatorSub()); ParallelReduce::Sum(cnt, IOProc, ParallelContext::CommunicatorSub()); - amrex::Print() << "ParticleContainer byte spread across MPI nodes: [" + amrex::Print() << "ParticleContainer spread across MPI nodes - bytes: [Min: " << mn - << " (" << mn << ")" - << " ... " + << ", Max: " << mx - << " (" << mx << ")" - << "] total memory: (" << cnt << ")\n"; + << ", Total: " + << cnt + << "]\n"; #ifdef AMREX_LAZY }); #endif + + return {mn, mx, cnt}; } template ::Redistribute (int lev_min, int lev_max, int nGrow, int local, bool remove_negative) { + BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: Redist"); + #ifdef AMREX_USE_GPU if ( Gpu::inLaunchRegion() ) { @@ -1122,6 +1134,8 @@ ParticleContainer_impl #else RedistributeCPU(lev_min, lev_max, nGrow, local, remove_negative); #endif + + BL_PROFILE_SYNC_STOP(); } template ::SortPart for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) { - auto& ptile = ParticlesAt(lev, mfi); - auto& aos = ptile.GetArrayOfStructs(); - const size_t np = aos.numParticles(); - auto pstruct_ptr = aos().dataPtr(); + auto& ptile = ParticlesAt(lev, mfi); + auto& aos = ptile.GetArrayOfStructs(); + auto pstruct_ptr = aos().dataPtr(); + const size_t np = aos.numParticles(); + const size_t np_total = np + aos.numNeighborParticles(); const Box& box = mfi.validbox(); @@ -1164,26 +1179,26 @@ ParticleContainer_impl::SortPart if (memEfficientSort) { { - ParticleVector tmp_particles(np); + ParticleVector tmp_particles(np_total); auto src = ptile.getParticleTileData(); ParticleType* dst = tmp_particles.data(); - AMREX_HOST_DEVICE_FOR_1D( np, i, + AMREX_HOST_DEVICE_FOR_1D( np_total, i, { - dst[i] = src.m_aos[inds[i]]; + dst[i] = i < np ? src.m_aos[inds[i]] : src.m_aos[i]; }); Gpu::streamSynchronize(); ptile.GetArrayOfStructs()().swap(tmp_particles); } - RealVector tmp_real(np); + RealVector tmp_real(np_total); for (int comp = 0; comp < NArrayReal + m_num_runtime_real; ++comp) { auto src = ptile.GetStructOfArrays().GetRealData(comp).data(); ParticleReal* dst = tmp_real.data(); - AMREX_HOST_DEVICE_FOR_1D( np, i, + AMREX_HOST_DEVICE_FOR_1D( np_total, i, { - dst[i] = src[inds[i]]; + dst[i] = i < np ? src[inds[i]] : src[i]; }); Gpu::streamSynchronize(); @@ -1191,13 +1206,13 @@ ParticleContainer_impl::SortPart ptile.GetStructOfArrays().GetRealData(comp).swap(tmp_real); } - IntVector tmp_int(np); + IntVector tmp_int(np_total); for (int comp = 0; comp < NArrayInt + m_num_runtime_int; ++comp) { auto src = ptile.GetStructOfArrays().GetIntData(comp).data(); int* dst = tmp_int.data(); - AMREX_HOST_DEVICE_FOR_1D( np, i, + AMREX_HOST_DEVICE_FOR_1D( np_total , i, { - dst[i] = src[inds[i]]; + dst[i] = i < np ? src[inds[i]] : src[i]; }); Gpu::streamSynchronize(); @@ -1207,8 +1222,11 @@ ParticleContainer_impl::SortPart } else { ParticleTileType ptile_tmp; ptile_tmp.define(m_num_runtime_real, m_num_runtime_int); - ptile_tmp.resize(np); + ptile_tmp.resize(np_total); + // copy re-ordered particles gatherParticles(ptile_tmp, ptile, np, m_bins.permutationPtr()); + // copy neighbor particles + amrex::copyParticles(ptile_tmp, ptile, np, np, np_total-np); ptile.swap(ptile_tmp); } } @@ -1271,7 +1289,8 @@ ParticleContainer_impl Vector > new_sizes(num_levels); const auto plo = Geom(0).ProbLoArray(); const auto phi = Geom(0).ProbHiArray(); - const auto rhi = Geom(0).RoundoffHiArray(); + const auto rlo = Geom(0).ProbLoArrayInParticleReal(); + const auto rhi = Geom(0).ProbHiArrayInParticleReal(); const auto is_per = Geom(0).isPeriodicArray(); for (int lev = lev_min; lev <= finest_lev_particles; ++lev) { @@ -1292,7 +1311,7 @@ ParticleContainer_impl "perhaps particles have not been initialized correctly?"); int num_stay = partitionParticlesByDest(src_tile, assign_grid, BufferMap(), - plo, phi, rhi, is_per, lev, gid, tid, + plo, phi, rlo, rhi, is_per, lev, gid, tid, lev_min, lev_max, nGrow, remove_negative); int num_move = np - num_stay; @@ -1575,7 +1594,7 @@ ParticleContainer_impl p.id() = -p.id(); // Invalidate the particle } - } + } else { auto& particles_to_send = tmp_remote[who][thread_num]; auto old_size = particles_to_send.size(); @@ -1627,7 +1646,7 @@ ParticleContainer_impl } } - } else{ + } else{ // soa particle auto particle_tile = ptile_ptrs[pmap_it]; if (npart != 0) { @@ -1659,7 +1678,6 @@ ParticleContainer_impl particlePostLocate(p, pld, lev); if (p.id() < 0){ - p = ParticleType(ptd,last); for (int comp = 0; comp < NumRealComps(); comp++) soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; @@ -1687,7 +1705,7 @@ ParticleContainer_impl } p.id() = -p.id(); // Invalidate the particle - } + } } else { auto& particles_to_send = tmp_remote[who][thread_num]; @@ -1712,7 +1730,6 @@ ParticleContainer_impl dst += sizeof(int); } } - p.id() = -p.id(); // Invalidate the particle } @@ -1873,10 +1890,6 @@ RedistributeMPI (std::map >& not_ours, #ifdef AMREX_USE_MPI - int particle_size=0; - int superparticle_size = particle_size + - num_real_comm_comps*sizeof(ParticleReal) + num_int_comm_comps*sizeof(int); - using buffer_type = unsigned long long; std::map > mpi_snd_data; @@ -1996,10 +2009,28 @@ RedistributeMPI (std::map >& not_ours, auto& ptile = m_particles[rcv_levs[ipart]][std::make_pair(rcv_grid[ipart], rcv_tile[ipart])]; - auto p = make_particle{}(ptile.getParticleTileData(),ipart); - - std::memcpy(&p, pbuf, sizeof(ParticleType)); - locateParticle(p, pld, lev_min, lev_max, nGrow); + Particle p; + if constexpr(!ParticleType::is_soa_particle) + { + std::memcpy(&p, pbuf, sizeof(ParticleType)); + } else + { + std::memcpy(&p.pos(0), pbuf , sizeof(ParticleReal)); + std::memcpy(&p.pos(1), pbuf + sizeof(ParticleReal), sizeof(ParticleReal)); + std::memcpy(&p.pos(2), pbuf + 2*sizeof(ParticleReal), sizeof(ParticleReal)); + } + + bool success = Where(p, pld, lev_min, lev_max, 0); + if (!success) + { + success = (nGrow > 0) && Where(p, pld, lev_min, lev_min, nGrow); + pld.m_grown_gridbox = pld.m_gridbox; // reset grown box for subsequent calls. + } + if (!success) + { + amrex::Abort("RedistributeMPI_locate:: invalid particle."); + } + rcv_levs[ipart] = pld.m_lev; rcv_grid[ipart] = pld.m_grid; rcv_tile[ipart] = pld.m_tile; @@ -2024,11 +2055,13 @@ RedistributeMPI (std::map >& not_ours, rcv_tile[ipart])]; char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size; - auto p = make_particle{}(ptile.getParticleTileData(),ipart); + if constexpr(! ParticleType::is_soa_particle) { + ParticleType p; + std::memcpy(&p, pbuf, sizeof(ParticleType)); + pbuf += sizeof(ParticleType); + ptile.push_back(p); + } - std::memcpy(&p, pbuf, sizeof(ParticleType)); - pbuf += sizeof(ParticleType); - ptile.push_back(p); int array_comp_start = AMREX_SPACEDIM + NStructReal; for (int comp = 0; comp < NumRealComps(); ++comp) { if (h_redistribute_real_comp[array_comp_start + comp]) { @@ -2055,7 +2088,7 @@ RedistributeMPI (std::map >& not_ours, ++ipart; } } - + #else Vector, Gpu::HostVector > > host_particles; host_particles.reserve(15); diff --git a/Src/Particle/AMReX_ParticleInit.H b/Src/Particle/AMReX_ParticleInit.H index 7aa2141c0b0..c21d0ea3da7 100644 --- a/Src/Particle/AMReX_ParticleInit.H +++ b/Src/Particle/AMReX_ParticleInit.H @@ -1022,8 +1022,6 @@ InitRandom (Long icount, ParticleLocData pld; - int cnt = 0; - Vector, Gpu::HostVector > > host_particles; host_particles.reserve(15); host_particles.resize(finestLevel()+1); @@ -1079,8 +1077,6 @@ InitRandom (Long icount, for (int i = 0; i < NArrayInt; i++) { host_int_attribs[pld.m_lev][ind][i].push_back(pdata.int_array_data[i]); } - - cnt++; } } diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index 1a44ceefcb8..1e2515cce40 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -42,8 +42,8 @@ struct ParticleTileData ParticleType* AMREX_RESTRICT m_aos; - GpuArray m_rdata; - GpuArray m_idata; + GpuArray m_rdata; + GpuArray m_idata; int m_num_runtime_real; int m_num_runtime_int; @@ -202,7 +202,7 @@ struct ParticleTileData } }; -// SOA Particle Structure +// SOA Particle Structure template struct SoAParticle : SoAParticleBase { @@ -217,37 +217,25 @@ struct SoAParticle : SoAParticleBase static Long the_next_id; - SoAParticle (PTD ptd, int const index) + SoAParticle (PTD ptd, int const index) { m_particle_tile_data=ptd; m_index=index; } - + //functions to get id and cpu in the SOA data AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleCPUWrapper cpu () & { - uint64_t unsigned_cpu_value; - unsigned_cpu_value = (uint64_t) this->m_particle_tile_data.m_idata[1][m_index]; - return ParticleCPUWrapper(unsigned_cpu_value); } + int& cpu () & { return this->m_particle_tile_data.m_idata[1][m_index]; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ParticleIDWrapper id () & { - uint64_t unsigned_id_value; - unsigned_id_value = (uint64_t) this->m_particle_tile_data.m_idata[0][m_index]; - return ParticleIDWrapper(unsigned_id_value); } + int& id () & { return this->m_particle_tile_data.m_idata[0][m_index]; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleCPUWrapper cpu () const & { - uint64_t unsigned_cpu_value; - unsigned_cpu_value = (uint64_t) this->m_particle_tile_data.m_idata[1][m_index]; - return ConstParticleCPUWrapper(unsigned_cpu_value); } + const int& cpu () const & { return this->m_particle_tile_data.m_idata[1][m_index]; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - ConstParticleIDWrapper id () const & { - uint64_t unsigned_id_value; - unsigned_id_value = (uint64_t) this->m_particle_tile_data.m_idata[0][m_index]; - return ConstParticleIDWrapper(unsigned_id_value); } + const int& id () const & { return this->m_particle_tile_data.m_idata[0][m_index]; } //functions to get positions of the particle in the SOA data @@ -414,8 +402,8 @@ struct ConstParticleTileData Long m_size; const ParticleType* AMREX_RESTRICT m_aos; - GpuArray m_rdata; - GpuArray m_idata; + GpuArray m_rdata; + GpuArray m_idata; auto const* rdata(const int attribute_index) const { diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 3dfee74e637..e0ec8944361 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -320,7 +320,7 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple { ParticleType p(tile_data,i); - if ((p.id() < 0)) return false; + if ((p.id() < 0)) { return false; } IntVect iv = IntVect( AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])), int(amrex::Math::floor((p.pos(1)-plo[1])*dxi[1])), @@ -556,7 +556,8 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool enforcePeriodic (P& p, amrex::GpuArray const& plo, amrex::GpuArray const& phi, - amrex::GpuArray const& rhi, + amrex::GpuArray const& rlo, + amrex::GpuArray const& rhi, amrex::GpuArray const& is_per) noexcept { bool shifted = false; @@ -568,7 +569,9 @@ bool enforcePeriodic (P& p, p.pos(idim) -= static_cast(phi[idim] - plo[idim]); } // clamp to avoid precision issues; - if (p.pos(idim) < plo[idim]) p.pos(idim) = static_cast(plo[idim]); + if (p.pos(idim) < rlo[idim]) { + p.pos(idim) = rlo[idim]; + } shifted = true; } else if (p.pos(idim) < plo[idim]) { @@ -576,8 +579,8 @@ bool enforcePeriodic (P& p, p.pos(idim) += static_cast(phi[idim] - plo[idim]); } // clamp to avoid precision issues; - if (p.pos(idim) >= rhi[idim]) { - p.pos(idim) = static_cast(rhi[idim]); + if (p.pos(idim) > rhi[idim]) { + p.pos(idim) = rhi[idim]; } shifted = true; } @@ -594,7 +597,8 @@ int partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBufferMap& pmap, const GpuArray& plo, const GpuArray& phi, - const GpuArray& rhi, + const GpuArray& rlo, + const GpuArray& rhi, const GpuArray& is_per, int lev, int gid, int /*tid*/, int lev_min, int lev_max, int nGrow, bool remove_negative) @@ -641,7 +645,7 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBuff else { auto p_prime = p; - enforcePeriodic(p_prime, plo, phi, rhi, is_per); + enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per); auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow); assigned_grid = amrex::get<0>(tup_prime); assigned_lev = amrex::get<1>(tup_prime); diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index 6c9494f88c5..b5c59174ae3 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -231,12 +231,26 @@ packIOData (Vector& idata, Vector& rdata, const PC& pc, int l } } + for (int j = 0; j < ptd.m_num_runtime_int; j++) { + if (write_int_comp_d_ptr[PC::SuperParticleType::NInt + j]) { + idata_d_ptr[iout_index] = ptd.m_runtime_idata[j][pindex]; + iout_index++; + } + } + for (int j = 0; j < PC::SuperParticleType::NReal; j++) { if (write_real_comp_d_ptr[j]) { rdata_d_ptr[rout_index] = p.rdata(j); rout_index++; } } + + for (int j = 0; j < ptd.m_num_runtime_real; j++) { + if (write_real_comp_d_ptr[PC::SuperParticleType::NReal + j]) { + rdata_d_ptr[rout_index] = ptd.m_runtime_rdata[j][pindex]; + rout_index++; + } + } } }); @@ -752,7 +766,25 @@ void WriteBinaryParticleDataAsync (PC const& pc, if (np_per_grid_local[lev][mfi.index()] > 0) { const auto& ptile = pc.ParticlesAt(lev, mfi); - new_ptile.resize(np_per_grid_local[lev][mfi.index()]); + + const auto np = np_per_grid_local[lev][mfi.index()]; + + new_ptile.resize(np); + + const auto runtime_real_comps = ptile.NumRuntimeRealComps(); + const auto runtime_int_comps = ptile.NumRuntimeIntComps(); + + constexpr auto NReal = NArrayReal + NStructReal; + constexpr auto NInt = NArrayInt + NStructInt; + + new_ptile.define(runtime_real_comps, runtime_int_comps); + + for (auto comp(0); comp < runtime_real_comps; ++comp) + new_ptile.push_back_real(NReal+comp, np, 0.); + + for (auto comp(0); comp < runtime_int_comps; ++comp) + new_ptile.push_back_int(NInt+comp, np, 0); + amrex::filterParticles(new_ptile, ptile, KeepValidFilter()); } } diff --git a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp index b5e48e6e409..4f97cbf3184 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAllLevels.cpp @@ -35,7 +35,8 @@ AmrCoreAdv::AdvancePhiAllLevels (Real time, Real dt_lev, int /*iteration*/) // State with ghost cells MultiFab Sborder(grids[lev], dmap[lev], phi_new[lev].nComp(), num_grow); - FillPatch(lev, time, Sborder, 0, Sborder.nComp()); + FillPatch(lev, time, Sborder, 0, Sborder.nComp(), + FillPatchType::fillpatch_function); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) diff --git a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp index 3ddd055eda0..7a5e1abbaa7 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/AdvancePhiAtLevel.cpp @@ -33,7 +33,8 @@ AmrCoreAdv::AdvancePhiAtLevel (int lev, Real time, Real dt_lev, int /*iteration* // State with ghost cells MultiFab Sborder(grids[lev], dmap[lev], S_new.nComp(), num_grow); - FillPatch(lev, time, Sborder, 0, Sborder.nComp()); + FillPatch(lev, time, Sborder, 0, Sborder.nComp(), + FillPatchType::fillpatch_class); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) diff --git a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H index e330d30e740..1b6832d8663 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H +++ b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.H @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef AMREX_USE_OMP # include @@ -98,15 +99,18 @@ private: // more flexible version of AverageDown() that lets you average down across multiple levels void AverageDownTo (int crse_lev); + enum class FillPatchType { fillpatch_class, fillpatch_function }; + // compute a new multifab by coping in phi from valid region and filling ghost cells // works for single level and 2-level cases (fill fine grid ghost by interpolating from coarse) - void FillPatch (int lev, amrex::Real time, amrex::MultiFab& mf, int icomp, int ncomp); + void FillPatch (int lev, amrex::Real time, amrex::MultiFab& mf, int icomp, + int ncomp, FillPatchType fptype); // fill an entire multifab by interpolating from the coarser level // this comes into play when a new level of refinement appears void FillCoarsePatch (int lev, amrex::Real time, amrex::MultiFab& mf, int icomp, int ncomp); - // utility to copy in data from phi_old and/or phi_new into another multifab + // Pack pointers to phi_old and/or phi_new and associated times. void GetData (int lev, amrex::Real time, amrex::Vector& data, amrex::Vector& datatime); @@ -165,6 +169,9 @@ private: // used in the reflux operation amrex::Vector > flux_reg; + // This is for fillpatch during timestepping, but not for regridding. + amrex::Vector>> fillpatcher; + // Velocity on all faces at all levels amrex::Vector< amrex::Array > facevel; diff --git a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp index 62c9dc7417e..3300e4622cc 100644 --- a/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/AmrCoreAdv.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -95,6 +94,10 @@ AmrCoreAdv::AmrCoreAdv () // with the lev/lev-1 interface (and has grid spacing associated with lev-1) // therefore flux_reg[0] is never actually used in the reflux operation flux_reg.resize(nlevs_max+1); + + // fillpatcher[lev] is for filling data on level lev using the data on + // lev-1 and lev. + fillpatcher.resize(nlevs_max+1); } AmrCoreAdv::~AmrCoreAdv () @@ -230,7 +233,8 @@ AmrCoreAdv::RemakeLevel (int lev, Real time, const BoxArray& ba, MultiFab new_state(ba, dm, ncomp, ng); MultiFab old_state(ba, dm, ncomp, ng); - FillPatch(lev, time, new_state, 0, ncomp); + // Must use fillpatch_function + FillPatch(lev, time, new_state, 0, ncomp, FillPatchType::fillpatch_function); std::swap(new_state, phi_new[lev]); std::swap(old_state, phi_old[lev]); @@ -257,6 +261,7 @@ AmrCoreAdv::ClearLevel (int lev) phi_new[lev].clear(); phi_old[lev].clear(); flux_reg[lev].reset(nullptr); + fillpatcher[lev].reset(nullptr); } // Make a new level from scratch using provided BoxArray and DistributionMapping. @@ -418,7 +423,8 @@ AmrCoreAdv::AverageDownTo (int crse_lev) // compute a new multifab by coping in phi from valid region and filling ghost cells // works for single level and 2-level cases (fill fine grid ghost by interpolating from coarse) void -AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp) +AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp, + FillPatchType fptype) { if (lev == 0) { @@ -450,16 +456,31 @@ AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp) Interpolater* mapper = &cell_cons_interp; + if (fptype == FillPatchType::fillpatch_class) { + if (fillpatcher[lev] == nullptr) { + fillpatcher[lev] = std::make_unique> + (boxArray(lev ), DistributionMap(lev ), Geom(lev ), + boxArray(lev-1), DistributionMap(lev-1), Geom(lev-1), + mf.nGrowVect(), mf.nComp(), mapper); + } + } + if(Gpu::inLaunchRegion()) { GpuBndryFuncFab gpu_bndry_func(AmrCoreFill{}); PhysBCFunct > cphysbc(geom[lev-1],bcs,gpu_bndry_func); PhysBCFunct > fphysbc(geom[lev],bcs,gpu_bndry_func); - amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, - 0, icomp, ncomp, geom[lev-1], geom[lev], - cphysbc, 0, fphysbc, 0, refRatio(lev-1), - mapper, bcs, 0); + if (fptype == FillPatchType::fillpatch_class) { + fillpatcher[lev]->fill(mf, mf.nGrowVect(), time, + cmf, ctime, fmf, ftime, 0, icomp, ncomp, + cphysbc, 0, fphysbc, 0, bcs, 0); + } else { + amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, + 0, icomp, ncomp, geom[lev-1], geom[lev], + cphysbc, 0, fphysbc, 0, refRatio(lev-1), + mapper, bcs, 0); + } } else { @@ -467,10 +488,16 @@ AmrCoreAdv::FillPatch (int lev, Real time, MultiFab& mf, int icomp, int ncomp) PhysBCFunct cphysbc(geom[lev-1],bcs,bndry_func); PhysBCFunct fphysbc(geom[lev],bcs,bndry_func); - amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, - 0, icomp, ncomp, geom[lev-1], geom[lev], - cphysbc, 0, fphysbc, 0, refRatio(lev-1), - mapper, bcs, 0); + if (fptype == FillPatchType::fillpatch_class) { + fillpatcher[lev]->fill(mf, mf.nGrowVect(), time, + cmf, ctime, fmf, ftime, 0, icomp, ncomp, + cphysbc, 0, fphysbc, 0, bcs, 0); + } else { + amrex::FillPatchTwoLevels(mf, time, cmf, ctime, fmf, ftime, + 0, icomp, ncomp, geom[lev-1], geom[lev], + cphysbc, 0, fphysbc, 0, refRatio(lev-1), + mapper, bcs, 0); + } } } } @@ -513,21 +540,18 @@ AmrCoreAdv::FillCoarsePatch (int lev, Real time, MultiFab& mf, int icomp, int nc } } -// utility to copy in data from phi_old and/or phi_new into another multifab void AmrCoreAdv::GetData (int lev, Real time, Vector& data, Vector& datatime) { data.clear(); datatime.clear(); - const Real teps = (t_new[lev] - t_old[lev]) * 1.e-3; - - if (time > t_new[lev] - teps && time < t_new[lev] + teps) + if (amrex::almostEqual(time, t_new[lev], 5)) { data.push_back(&phi_new[lev]); datatime.push_back(t_new[lev]); } - else if (time > t_old[lev] - teps && time < t_old[lev] + teps) + else if (amrex::almostEqual(time, t_old[lev], 5)) { data.push_back(&phi_old[lev]); datatime.push_back(t_old[lev]); @@ -631,6 +655,8 @@ AmrCoreAdv::timeStepWithSubcycling (int lev, Real time, int iteration) } AverageDownTo(lev); // average lev+1 down to lev + + fillpatcher[lev+1].reset(); // Because the data on lev have changed. } @@ -694,6 +720,10 @@ AmrCoreAdv::timeStepNoSubcycling (Real time, int iteration) // Make sure the coarser levels are consistent with the finer levels AverageDown (); + for (auto& fp : fillpatcher) { + fp.reset(); // Because the data have changed. + } + for (int lev = 0; lev <= finest_level; lev++) ++istep[lev]; diff --git a/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp b/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp index 995393e05f8..4dc1076dec8 100644 --- a/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp +++ b/Tests/Amr/Advection_AmrCore/Source/DefineVelocity.cpp @@ -39,7 +39,7 @@ AmrCoreAdv::DefineVelocityAtLevel (int lev, Real time) facevel[lev][2].array(mfi)) }; const Box& psibox = Box(IntVect(AMREX_D_DECL(std::min(ngbxx.smallEnd(0)-1, ngbxy.smallEnd(0)-1), - std::min(ngbxx.smallEnd(1)-1, ngbxy.smallEnd(0)-1), + std::min(ngbxx.smallEnd(1)-1, ngbxy.smallEnd(1)-1), 0)), IntVect(AMREX_D_DECL(std::max(ngbxx.bigEnd(0), ngbxy.bigEnd(0)+1), std::max(ngbxx.bigEnd(1)+1, ngbxy.bigEnd(1)), diff --git a/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package b/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package index e98f493727c..5254ff6f63f 100644 --- a/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package +++ b/Tests/Amr/Advection_AmrCore/Source/Src_K/Make.package @@ -1,3 +1,3 @@ CEXE_headers += Adv_K.H -CEXE_headers += compute_flux_K_$(DIM).H +CEXE_headers += compute_flux_$(DIM)D_K.H CEXE_headers += slope_K.H diff --git a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H index 1e5bacbc497..faf56357e29 100644 --- a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H +++ b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.H @@ -231,7 +231,7 @@ protected: /* * The data. */ - amrex::FluxRegister* flux_reg; + std::unique_ptr flux_reg; /* * Static data members. diff --git a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp index db69749a85f..7fae3038f72 100644 --- a/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp +++ b/Tests/Amr/Advection_AmrLevel/Source/AmrLevelAdv.cpp @@ -36,7 +36,6 @@ int AmrLevelAdv::do_tracers = 0; */ AmrLevelAdv::AmrLevelAdv () { - flux_reg = 0; } /** @@ -51,9 +50,9 @@ AmrLevelAdv::AmrLevelAdv (Amr& papa, : AmrLevel(papa,lev,level_geom,bl,dm,time) { - flux_reg = 0; - if (level > 0 && do_reflux) - flux_reg = new FluxRegister(grids,dmap,crse_ratio,level,NUM_STATE); + if (level > 0 && do_reflux) { + flux_reg = std::make_unique(grids,dmap,crse_ratio,level,NUM_STATE); + } } /** @@ -61,7 +60,6 @@ AmrLevelAdv::AmrLevelAdv (Amr& papa, */ AmrLevelAdv::~AmrLevelAdv () { - delete flux_reg; } /** @@ -74,9 +72,9 @@ AmrLevelAdv::restart (Amr& papa, { AmrLevel::restart(papa,is,bReadSpecial); - BL_ASSERT(flux_reg == 0); - if (level > 0 && do_reflux) - flux_reg = new FluxRegister(grids,dmap,crse_ratio,level,NUM_STATE); + if (level > 0 && do_reflux) { + flux_reg = std::make_unique(grids,dmap,crse_ratio,level,NUM_STATE); + } } /** @@ -88,11 +86,11 @@ AmrLevelAdv::checkPoint (const std::string& dir, VisMF::How how, bool dump_old) { - AmrLevel::checkPoint(dir, os, how, dump_old); + AmrLevel::checkPoint(dir, os, how, dump_old); #ifdef AMREX_PARTICLES - if (do_tracers && level == 0) { - TracerPC->WritePlotFile(dir, "Tracer"); - } + if (do_tracers && level == 0) { + TracerPC->WritePlotFile(dir, "Tracer"); + } #endif } @@ -285,7 +283,8 @@ AmrLevelAdv::advance (Real time, // State with ghost cells MultiFab Sborder(grids, dmap, NUM_STATE, NUM_GROW); - FillPatch(*this, Sborder, NUM_GROW, time, Phi_Type, 0, NUM_STATE); + // We use FillPatcher to do fillpatch here if we can + FillPatcherFill(Sborder, 0, NUM_STATE, NUM_GROW, time, Phi_Type, 0); // MF to hold the mac velocity MultiFab Umac[BL_SPACEDIM]; @@ -601,11 +600,19 @@ AmrLevelAdv::post_timestep (int iteration) // int finest_level = parent->finestLevel(); - if (do_reflux && level < finest_level) + if (do_reflux && level < finest_level) { reflux(); + } - if (level < finest_level) + if (level < finest_level) { avgDown(); + } + + if (level < finest_level) { + // fillpatcher on level+1 needs to be reset because data on this + // level have changed. + getLevel(level+1).resetFillPatcher(); + } #ifdef AMREX_PARTICLES if (TracerPC) diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 50cc2bb8cb2..8d318f918b8 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -1,7 +1,7 @@ # # List of subdirectories to search for CMakeLists. # -set( AMREX_TESTS_SUBDIRS AsyncOut MultiBlock Amr CLZ Parser) +set( AMREX_TESTS_SUBDIRS AsyncOut MultiBlock Amr CLZ Parser CTOParFor) if (AMReX_PARTICLES) list(APPEND AMREX_TESTS_SUBDIRS Particles) diff --git a/Tests/CTOParFor/CMakeLists.txt b/Tests/CTOParFor/CMakeLists.txt new file mode 100644 index 00000000000..57c1e7715e2 --- /dev/null +++ b/Tests/CTOParFor/CMakeLists.txt @@ -0,0 +1,7 @@ +set(_sources main.cpp) +set(_input_files) + +setup_test(_sources _input_files) + +unset(_sources) +unset(_input_files) diff --git a/Tests/CTOParFor/GNUmakefile b/Tests/CTOParFor/GNUmakefile new file mode 100644 index 00000000000..0dbc65578af --- /dev/null +++ b/Tests/CTOParFor/GNUmakefile @@ -0,0 +1,20 @@ +AMREX_HOME = ../../ + +DEBUG = FALSE +DIM = 3 +COMP = gcc + +USE_MPI = FALSE +USE_OMP = FALSE +USE_CUDA = FALSE + +TINY_PROFILE = FALSE + +CXXSTD = c++17 + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/CTOParFor/Make.package b/Tests/CTOParFor/Make.package new file mode 100644 index 00000000000..4497b0e25b9 --- /dev/null +++ b/Tests/CTOParFor/Make.package @@ -0,0 +1,4 @@ +CEXE_sources += main.cpp + + + diff --git a/Tests/CTOParFor/main.cpp b/Tests/CTOParFor/main.cpp new file mode 100644 index 00000000000..0cf1d7ea35a --- /dev/null +++ b/Tests/CTOParFor/main.cpp @@ -0,0 +1,64 @@ +#include +#include + +using namespace amrex; + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); +#if (__cplusplus >= 201703L) + { + enum A_options: int { + A0 = 0, A1 + }; + + enum B_options: int { + B0 = 0, B1, B2 + }; + + Box box(IntVect(0),IntVect(7)); + IArrayBox fab(box,2); + fab.setVal(-10); + + auto const& arr = fab.array(); + + for (int ia = 0; ia < 2; ++ia) { + for (int ib = 0; ib < 3; ++ib) { + ParallelFor(TypeList, + CompileTimeOptions>{}, + {ia, ib}, + box, [=] AMREX_GPU_DEVICE (int i, int j, int k, + auto A_control, + auto B_control) + { + auto const& larr = arr; + int a, b; + if constexpr (A_control.value == 0) { + a = 0; + } else if constexpr (A_control.value == 1) { + a = 1; + } else { + a = -1; + } + if constexpr (B_control.value == 0) { + b = 0; + } else if constexpr (B_control.value == 1) { + b = 1; + } else if constexpr (B_control.value == 2) { + b = 2; + } else if constexpr (B_control.value == 3) { + b = 3; + } + larr(i,j,k) = a*10 + b; + }); + + auto s = fab.sum(0); + AMREX_ALWAYS_ASSERT(s == box.numPts()*(ia*10+ib)); + } + } + } +#else + amrex::Print() << "This test requires C++17." << std::endl; +#endif + amrex::Finalize(); +} diff --git a/Tests/EB/CNS/Source/main.cpp b/Tests/EB/CNS/Source/main.cpp index aa851c47956..393431f8f79 100644 --- a/Tests/EB/CNS/Source/main.cpp +++ b/Tests/EB/CNS/Source/main.cpp @@ -53,7 +53,13 @@ int main (int argc, char* argv[]) AmrLevel::SetEBSupportLevel(EBSupport::full); AmrLevel::SetEBMaxGrowCells(CNS::numGrow(),4,2); - initialize_EB2(amr.Geom(amr.maxLevel()), amr.maxLevel(), amr.maxLevel()); + int max_eb_level = amr.maxLevel(); + ParmParse pp("amr"); + pp.query("max_eb_level", max_eb_level); + initialize_EB2(amr.Geom(max_eb_level), max_eb_level, max_eb_level); + if (max_eb_level < amr.maxLevel()) { + EB2::addFineLevels(amr.maxLevel() - max_eb_level); + } amr.init(strt_time,stop_time); diff --git a/Tests/GPU/CNS/Source/CNS.H b/Tests/GPU/CNS/Source/CNS.H index 877f0b523da..eedb7d486ba 100644 --- a/Tests/GPU/CNS/Source/CNS.H +++ b/Tests/GPU/CNS/Source/CNS.H @@ -157,6 +157,8 @@ protected: static int do_reflux; + static int rk_order; + static bool do_visc; static bool use_const_visc; diff --git a/Tests/GPU/CNS/Source/CNS.cpp b/Tests/GPU/CNS/Source/CNS.cpp index c3b5e2fb600..1a073c68c8a 100644 --- a/Tests/GPU/CNS/Source/CNS.cpp +++ b/Tests/GPU/CNS/Source/CNS.cpp @@ -19,6 +19,7 @@ int CNS::verbose = 0; IntVect CNS::hydro_tile_size {AMREX_D_DECL(1024,16,16)}; Real CNS::cfl = 0.3; int CNS::do_reflux = 1; +int CNS::rk_order = 2; int CNS::refine_max_dengrad_lev = -1; Real CNS::refine_dengrad = 1.0e10; @@ -241,6 +242,9 @@ CNS::post_timestep (int /*iteration*/) if (level < parent->finestLevel()) { avgDown(); + // fillpatcher on level+1 needs to be reset because data on this + // level have changed. + getLevel(level+1).resetFillPatcher(); } } @@ -354,6 +358,7 @@ CNS::read_params () } pp.query("do_reflux", do_reflux); + pp.query("rk_order", rk_order); pp.query("do_visc", do_visc); diff --git a/Tests/GPU/CNS/Source/CNS_advance.cpp b/Tests/GPU/CNS/Source/CNS_advance.cpp index c086cac0e9f..99749dded19 100644 --- a/Tests/GPU/CNS/Source/CNS_advance.cpp +++ b/Tests/GPU/CNS/Source/CNS_advance.cpp @@ -7,7 +7,7 @@ using namespace amrex; Real -CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) +CNS::advance (Real time, Real dt, int iteration, int ncycle) { BL_PROFILE("CNS::advance()"); @@ -16,11 +16,6 @@ CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) state[i].swapTimeLevels(dt); } - MultiFab& S_new = get_new_data(State_Type); - MultiFab& S_old = get_old_data(State_Type); - MultiFab dSdt(grids,dmap,NUM_STATE,0,MFInfo(),Factory()); - MultiFab Sborder(grids,dmap,NUM_STATE,NUM_GROW,MFInfo(),Factory()); - FluxRegister* fr_as_crse = nullptr; if (do_reflux && level < parent->finestLevel()) { CNS& fine_level = getLevel(level+1); @@ -36,23 +31,14 @@ CNS::advance (Real time, Real dt, int /*iteration*/, int /*ncycle*/) fr_as_crse->setVal(Real(0.0)); } - // RK2 stage 1 - FillPatch(*this, Sborder, NUM_GROW, time, State_Type, 0, NUM_STATE); - compute_dSdt(Sborder, dSdt, Real(0.5)*dt, fr_as_crse, fr_as_fine); - // U^* = U^n + dt*dUdt^n - MultiFab::LinComb(S_new, Real(1.0), Sborder, 0, dt, dSdt, 0, 0, NUM_STATE, 0); - computeTemp(S_new,0); - - // RK2 stage 2 - // After fillpatch Sborder = U^n+dt*dUdt^n - FillPatch(*this, Sborder, NUM_GROW, time+dt, State_Type, 0, NUM_STATE); - compute_dSdt(Sborder, dSdt, Real(0.5)*dt, fr_as_crse, fr_as_fine); - // S_new = 0.5*(Sborder+S_old) = U^n + 0.5*dt*dUdt^n - MultiFab::LinComb(S_new, Real(0.5), Sborder, 0, Real(0.5), S_old, 0, 0, NUM_STATE, 0); - // S_new += 0.5*dt*dSdt - MultiFab::Saxpy(S_new, Real(0.5)*dt, dSdt, 0, 0, NUM_STATE, 0); - // We now have S_new = U^{n+1} = (U^n+0.5*dt*dUdt^n) + 0.5*dt*dUdt^* - computeTemp(S_new,0); + RK(rk_order, State_Type, time, dt, iteration, ncycle, + // Given state S, compute dSdt. dtsub is needed for flux register operations + [&] (int /*stage*/, MultiFab& dSdt, MultiFab const& S, + Real /*t*/, Real dtsub) { + compute_dSdt(S, dSdt, dtsub, fr_as_crse, fr_as_fine); + }, + // Optional. In case if there is anything needed after each RK substep. + [&] (int /*stage*/, MultiFab& S) { computeTemp(S,0); }); return dt; } @@ -254,5 +240,3 @@ CNS::compute_dSdt (const MultiFab& S, MultiFab& dSdt, Real dt, } } } - - diff --git a/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H b/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H index b9bf5a18f78..75f4f784fad 100644 --- a/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H +++ b/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H @@ -17,24 +17,24 @@ cns_diffcoef (int i, int j, int k, { using amrex::Real; - coefs(i,j,k,CETA) = parm.C_S * std::sqrt(q(i,j,k,QTEMP)) * q(i,j,k,QTEMP) / (q(i,j,k,QTEMP)+parm.T_S); - coefs(i,j,k,CXI) = Real(0.0); - coefs(i,j,k,CLAM) = coefs(i,j,k,CETA)*parm.cp/parm.Pr; + coefs(i,j,k,CETA) = parm.C_S * std::sqrt(q(i,j,k,QTEMP)) * q(i,j,k,QTEMP) / (q(i,j,k,QTEMP)+parm.T_S); + coefs(i,j,k,CXI) = Real(0.0); + coefs(i,j,k,CLAM) = coefs(i,j,k,CETA)*parm.cp/parm.Pr; } AMREX_GPU_DEVICE inline void cns_constcoef (int i, int j, int k, - amrex::Array4 const& q, + amrex::Array4 const& /*q*/, amrex::Array4 const& coefs, Parm const& parm) noexcept { using amrex::Real; - coefs(i,j,k,CETA) = parm.const_visc_mu; - coefs(i,j,k,CXI) = parm.const_visc_ki; - coefs(i,j,k,CLAM) = parm.const_lambda; + coefs(i,j,k,CETA) = parm.const_visc_mu; + coefs(i,j,k,CXI) = parm.const_visc_ki; + coefs(i,j,k,CLAM) = parm.const_lambda; } AMREX_GPU_DEVICE @@ -45,7 +45,7 @@ cns_diff_x (int i, int j, int k, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fx, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; @@ -81,7 +81,7 @@ cns_diff_y (int i, int j, int k, amrex::Array4 const& q, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fy, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; @@ -119,7 +119,7 @@ cns_diff_z (int i, int j, int k, amrex::Array4 const& coeffs, amrex::GpuArray const& dxinv, amrex::Array4 const& fz, - Parm const& parm) noexcept + Parm const& /*parm*/) noexcept { using amrex::Real; diff --git a/Tests/LinearSolvers/CellEB2/inputs.rt.2d b/Tests/LinearSolvers/CellEB2/inputs.rt.2d index 8dfd8a7bb3f..4afdf526259 100644 --- a/Tests/LinearSolvers/CellEB2/inputs.rt.2d +++ b/Tests/LinearSolvers/CellEB2/inputs.rt.2d @@ -11,6 +11,7 @@ max_level = 1 n_cell = 128 max_grid_size = 64 eb2.max_grid_size = 32 +eb2.num_coarsen_opt=3 eb2.geom_type = sphere eb2.sphere_center = 0.5 0.5 0.5 diff --git a/Tests/LinearSolvers/CellEB2/inputs.rt.3d b/Tests/LinearSolvers/CellEB2/inputs.rt.3d index 9a8037a68c0..64fcef6281b 100644 --- a/Tests/LinearSolvers/CellEB2/inputs.rt.3d +++ b/Tests/LinearSolvers/CellEB2/inputs.rt.3d @@ -11,6 +11,7 @@ max_level = 1 n_cell = 128 max_grid_size = 64 eb2.max_grid_size = 32 +eb2.num_coarsen_opt=3 eb2.geom_type = sphere eb2.sphere_center = 0.5 0.5 0.5 diff --git a/Tools/AMRProfParser/GNUmakefile b/Tools/AMRProfParser/GNUmakefile index 619d67a557a..59fd2a54b0c 100644 --- a/Tools/AMRProfParser/GNUmakefile +++ b/Tools/AMRProfParser/GNUmakefile @@ -23,7 +23,6 @@ USE_MPI = FALSE USE_OMP = FALSE EBASE = amrprofparser BL_NO_FORT = FALSE -USE_CXX11 = TRUE include $(AMREX_HOME)/Tools/GNUMake/Make.defs include $(AMREX_HOME)/Src/Base/Make.package diff --git a/Tools/Backtrace/parse_bt.py b/Tools/Backtrace/parse_bt.py index ce4a6684911..dd0234f9120 100755 --- a/Tools/Backtrace/parse_bt.py +++ b/Tools/Backtrace/parse_bt.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys import re diff --git a/Tools/CMake/AMReXConfig.cmake.in b/Tools/CMake/AMReXConfig.cmake.in index 6b0cdd3fd74..64a112da181 100644 --- a/Tools/CMake/AMReXConfig.cmake.in +++ b/Tools/CMake/AMReXConfig.cmake.in @@ -223,10 +223,12 @@ endif () # CUDA # # AMReX 21.06+ supports CUDA_ARCHITECTURES -if(CMAKE_VERSION VERSION_LESS 3.20) - if (@AMReX_CUDA@) - include(AMReX_SetupCUDA) - endif () +if (@AMReX_CUDA@) + if (CMAKE_VERSION VERSION_LESS 3.20) + include(AMReX_SetupCUDA) + else () + find_dependency(CUDAToolkit REQUIRED) + endif () endif () include( "${CMAKE_CURRENT_LIST_DIR}/AMReXTargets.cmake" ) diff --git a/Tools/CMake/AMReXFlagsTargets.cmake b/Tools/CMake/AMReXFlagsTargets.cmake index 64dcf3f3a5f..2e89c32fddc 100644 --- a/Tools/CMake/AMReXFlagsTargets.cmake +++ b/Tools/CMake/AMReXFlagsTargets.cmake @@ -82,15 +82,15 @@ target_compile_options( Flags_CXX $<${_cxx_cray_dbg}:-O0> $<${_cxx_cray_rwdbg}:> $<${_cxx_cray_rel}:> - $<${_cxx_clang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_clang_rwdbg}:-Wno-pass-failed> - $<${_cxx_clang_rel}:-Wno-pass-failed> - $<${_cxx_appleclang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_appleclang_rwdbg}:-Wno-pass-failed> - $<${_cxx_appleclang_rel}:-Wno-pass-failed> - $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-pass-failed> - $<${_cxx_intelllvm_rwdbg}:-Wno-pass-failed> - $<${_cxx_intelllvm_rel}:-Wno-pass-failed> + $<${_cxx_clang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_clang_rwdbg}:> + $<${_cxx_clang_rel}:> + $<${_cxx_appleclang_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_appleclang_rwdbg}:> + $<${_cxx_appleclang_rel}:> + $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable> + $<${_cxx_intelllvm_rwdbg}:> + $<${_cxx_intelllvm_rel}:> ) # diff --git a/Tools/CMake/AMReXParallelBackends.cmake b/Tools/CMake/AMReXParallelBackends.cmake index ebf397266f8..61b563f7c51 100644 --- a/Tools/CMake/AMReXParallelBackends.cmake +++ b/Tools/CMake/AMReXParallelBackends.cmake @@ -198,10 +198,12 @@ if (AMReX_HIP) unset(_valid_hip_compilers) if(NOT DEFINED HIP_PATH) - if(NOT DEFINED ENV{HIP_PATH}) - set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") - else() + if(DEFINED ENV{HIP_PATH}) set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") + elseif(DEFINED ENV{ROCM_PATH}) + set(HIP_PATH "$ENV{ROCM_PATH}/hip" CACHE PATH "Path to which HIP has been installed") + else() + set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") endif() endif() @@ -255,9 +257,15 @@ if (AMReX_HIP) if(AMReX_ROCTX) # To be modernized in the future, please see: # https://github.com/ROCm-Developer-Tools/roctracer/issues/56 - target_include_directories(amrex PUBLIC ${HIP_PATH}/../roctracer/include ${HIP_PATH}/../rocprofiler/include) - target_link_libraries(amrex PUBLIC "-L${HIP_PATH}/../roctracer/lib/ -lroctracer64" "-L${HIP_PATH}/../roctracer/lib -lroctx64") - endif () + target_include_directories(amrex SYSTEM PUBLIC + ${HIP_PATH}/../roctracer/include + ${HIP_PATH}/../rocprofiler/include + ) + target_link_libraries(amrex PUBLIC + "-L${HIP_PATH}/../roctracer/lib -lroctracer64" + "-L${HIP_PATH}/../roctracer/lib -lroctx64" + ) + endif() target_link_libraries(amrex PUBLIC hip::hiprand roc::rocrand roc::rocprim) # avoid forcing the rocm LLVM flags on a gfortran @@ -271,7 +279,7 @@ if (AMReX_HIP) # else there will be a runtime issue (cannot find # missing gpu devices) target_compile_options(amrex PUBLIC - $<$:--amdgpu-target=${AMReX_AMD_ARCH_HIPCC} -Wno-pass-failed>) + $<$:--amdgpu-target=${AMReX_AMD_ARCH_HIPCC}>) endif() target_compile_options(amrex PUBLIC $<$:-m64>) diff --git a/Tools/CMake/AMReXSYCL.cmake b/Tools/CMake/AMReXSYCL.cmake index 8e6c7f2f4d5..007b5f321fe 100644 --- a/Tools/CMake/AMReXSYCL.cmake +++ b/Tools/CMake/AMReXSYCL.cmake @@ -45,7 +45,7 @@ target_compile_features(SYCL INTERFACE cxx_std_17) # target_compile_options( SYCL INTERFACE - $<${_cxx_dpcpp}:-Wno-error=sycl-strict -Wno-pass-failed -fsycl> + $<${_cxx_dpcpp}:-Wno-error=sycl-strict -fsycl> $<${_cxx_dpcpp}:$<$:-fsycl-device-code-split=per_kernel>>) # temporary work-around for DPC++ beta08 bug diff --git a/Tools/CMake/AMReXThirdPartyLibraries.cmake b/Tools/CMake/AMReXThirdPartyLibraries.cmake index 1afbcac4ee2..2b0a90febe1 100644 --- a/Tools/CMake/AMReXThirdPartyLibraries.cmake +++ b/Tools/CMake/AMReXThirdPartyLibraries.cmake @@ -45,7 +45,7 @@ endif () # Sensei # if (AMReX_SENSEI) - find_package(SENSEI REQUIRED) + find_package( SENSEI 4.0.0 REQUIRED ) target_link_libraries( amrex PUBLIC sensei ) endif () diff --git a/Tools/CMake/AMReXTypecheck.cmake b/Tools/CMake/AMReXTypecheck.cmake index 926fcda9daf..0b68fb8c274 100644 --- a/Tools/CMake/AMReXTypecheck.cmake +++ b/Tools/CMake/AMReXTypecheck.cmake @@ -250,7 +250,7 @@ function( add_typecheck_target _target) add_custom_command( OUTPUT ${_cppd_file} COMMAND ${CMAKE_C_COMPILER} - ARGS ${_cxx_defines} ${_includes} -E -P -x c -std=c99 ${_fullname} > ${_cppd_file} + ARGS ${_cxx_defines} ${_includes} -E -P -x c -std=c11 ${_fullname} > ${_cppd_file} COMMAND sed ARGS -i -e 's/amrex::Real/${AMREX_REAL}/g' ${_cppd_file} COMMAND sed diff --git a/Tools/CMake/AMReX_Config.cmake b/Tools/CMake/AMReX_Config.cmake index 1754b339094..c842db1e136 100644 --- a/Tools/CMake/AMReX_Config.cmake +++ b/Tools/CMake/AMReX_Config.cmake @@ -37,22 +37,18 @@ function (configure_amrex) # # Setup compilers # - # Set C++ standard and disable compiler-specific extensions, like "-std=gnu++14" for GNU + # Set C++ standard and disable compiler-specific extensions, like "-std=gnu++17" for GNU # This will also enforce the same standard with the CUDA compiler # Moreover, it will also enforce such standard on all the consuming targets # set_target_properties(amrex PROPERTIES CXX_EXTENSIONS OFF) - # minimum: C++14 on Linux, C++17 on Windows, C++17 for dpc++ and hip - if (AMReX_DPCPP OR AMReX_HIP) - target_compile_features(amrex PUBLIC cxx_std_17) - else () - target_compile_features(amrex PUBLIC $,Windows>,cxx_std_17,cxx_std_14>) - endif () + # minimum: C++17 + target_compile_features(amrex PUBLIC cxx_std_17) if (AMReX_CUDA) set_target_properties(amrex PROPERTIES CUDA_EXTENSIONS OFF) - # minimum: C++14 on Linux, C++17 on Windows - target_compile_features(amrex PUBLIC $,Windows>,cuda_std_17,cuda_std_14>) + # minimum: C++17 + target_compile_features(amrex PUBLIC cuda_std_17) endif() # diff --git a/Tools/C_scripts/describe_sources.py b/Tools/C_scripts/describe_sources.py index c49d16694a9..97cfe5e1e1c 100755 --- a/Tools/C_scripts/describe_sources.py +++ b/Tools/C_scripts/describe_sources.py @@ -1,10 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys - -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for dep.py") - import argparse import os import subprocess diff --git a/Tools/C_scripts/gatherbuildtime.py b/Tools/C_scripts/gatherbuildtime.py index 082ec766c28..b0b1740847f 100755 --- a/Tools/C_scripts/gatherbuildtime.py +++ b/Tools/C_scripts/gatherbuildtime.py @@ -1,11 +1,7 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 -from __future__ import print_function import sys, os, glob, operator, time -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for dep.py") - if __name__ == "__main__": dt = float(sys.argv[3])-float(sys.argv[2]) hours, rem = divmod(dt, 3600) diff --git a/Tools/C_scripts/makebuildinfo_C.py b/Tools/C_scripts/makebuildinfo_C.py index 8a05cd3f75d..07f31c0585a 100755 --- a/Tools/C_scripts/makebuildinfo_C.py +++ b/Tools/C_scripts/makebuildinfo_C.py @@ -186,11 +186,11 @@ def runcommand(command): out = p.stdout.read() return out.strip().decode("ascii") -def get_git_hash(d): +def get_git_hash(d, git_style): cwd = os.getcwd() os.chdir(d) try: - ghash = runcommand("git describe --always --tags --dirty") + ghash = runcommand("git describe " + git_style) except: ghash = "" os.chdir(cwd) @@ -259,6 +259,10 @@ def get_git_hash(d): help="the full path to the build directory that corresponds to build_git_name", type=str, default="") + parser.add_argument("--GIT_STYLE", + help="style options for the 'git describe' command used to construct hash strings", + type=str, default="--always --tags --dirty") + # parse and convert to a dictionary args = parser.parse_args() @@ -281,7 +285,7 @@ def get_git_hash(d): git_hashes = [] for d in GIT: if d and os.path.isdir(d): - git_hashes.append(get_git_hash(d)) + git_hashes.append(get_git_hash(d, args.GIT_STYLE)) else: git_hashes.append("") @@ -291,7 +295,7 @@ def get_git_hash(d): except: build_git_hash = "directory not valid" else: - build_git_hash = get_git_hash(args.build_git_dir) + build_git_hash = get_git_hash(args.build_git_dir, args.GIT_STYLE) os.chdir(running_dir) else: build_git_hash = "" diff --git a/Tools/CompileTesting/compiletesting.py b/Tools/CompileTesting/compiletesting.py index 129e83ca960..9cb5f59bac5 100755 --- a/Tools/CompileTesting/compiletesting.py +++ b/Tools/CompileTesting/compiletesting.py @@ -1,6 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 -from __future__ import print_function import sys import os import shlex @@ -148,4 +147,3 @@ def run(command, outfile=None): if __name__ == "__main__": compiletesting(sys.argv[1:]) - diff --git a/Tools/F_scripts/dep.py b/Tools/F_scripts/dep.py index 894dcdb65e6..24bd8318fb8 100755 --- a/Tools/F_scripts/dep.py +++ b/Tools/F_scripts/dep.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # automatically generate Makefile dependencies for Fortran 90 source. # @@ -20,18 +20,7 @@ # (e.g. iso_c_binding). Add any system-provided modules to the # `IGNORES` list below -from __future__ import print_function - import sys - -if sys.version_info < (2, 7): - sys.exit("ERROR: need python 2.7 or later for dep.py") - -if sys.version[0] == "2": - reload(sys) - sys.setdefaultencoding('latin-1') - - import io import re import os diff --git a/Tools/F_scripts/f90doc/README b/Tools/F_scripts/f90doc/README deleted file mode 100644 index 6edb2de011f..00000000000 --- a/Tools/F_scripts/f90doc/README +++ /dev/null @@ -1,36 +0,0 @@ -This is f90doc version 0.3.4, a documentation tool for Fortran 90. For -more information (e.g., documentation), see - - http://theory.lcs.mit.edu/~edemaine/f90doc - -or contact Erik Demaine (edemaine@mit.edu). Comments, suggestions, -criticisms, and bug reports go to this e-mail address. If you modify f90doc or -use it in a serious way, please contact me (I'd be interested). - -COPYRIGHT - -f90doc is freeware. If you use it in a research or commercial project, you -must acknowledge the software and its author. I would also appreciate it if -you contact me -- I'd like to know how f90doc is used. If you base code on -f90doc, you must acknowledge this. Again, please let me know if you think your -changes would be at all useful to the rest of the world (even if you are not -willing to share it, the ideas may be useful). - -This information must accompany any copy of f90doc. - -INSTALLATION - -You shouldn't have to compile anything. You can put the file f90doc in -a more accessible place, but the .pl files have to be in the same directory. -Alternatively, you can create a symlink to the real f90doc, where the .pl -files are held. For example, - - ln -s /usr/local/lib/f90doc-0.3.4/f90doc /usr/local/bin/f90doc - -If you don't have a command /usr/bin/env, you'll need to replace the first line -of f90doc with - - #!/path/to/perl5/bin/perl -w - -Otherwise, Perl version 5.003 or higher must be the first program called "perl" -in your path. diff --git a/Tools/F_scripts/f90doc/expr_parse.pl b/Tools/F_scripts/f90doc/expr_parse.pl deleted file mode 100644 index 3e831337041..00000000000 --- a/Tools/F_scripts/f90doc/expr_parse.pl +++ /dev/null @@ -1,793 +0,0 @@ -$yysccsid = "@(#)yaccpar 1.8 (Berkeley) 01/20/91 (Perl 2.0 12/31/92)"; -#define YYBYACC 1 -#line 2 "expr_parse.y" -package expr_parse; - -;# On failure, print out this as the line we were working on. -$expr_parse::line = ""; - -;# Portion of line left to parse -$expr_parse::left = ""; -#line 12 "y.tab.pl" -$COMMA=257; -$LPAREN=258; -$RPAREN=259; -$NOT=260; -$OR=261; -$AND=262; -$EQV=263; -$NEQV=264; -$COMPARISON=265; -$DBLSLASH=266; -$PERCENT=267; -$PLUS=268; -$MINUS=269; -$UPLUS=270; -$UMINUS=271; -$ASTERIK=272; -$SLASH=273; -$DBLASTERIK=274; -$CONST=275; -$NAME=276; -$COLON=277; -$LARRAY=278; -$RARRAY=279; -$EQUALS=280; -$YYERRCODE=256; -@yylhs = ( -1, - 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 5, 5, 5, 5, 5, 4, 4, 7, 6, - 6, 3, 3, 3, 8, 8, 9, 9, 10, 10, - 10, 12, 11, 11, 11, 11, -); -@yylen = ( 2, - 1, 2, 1, 1, 1, 3, 2, 2, 2, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 1, 3, 1, 3, 3, 3, 1, 1, 5, - 7, 1, 3, 4, 0, 1, 3, 1, 1, 1, - 1, 3, 1, 2, 2, 3, -); -@yydefred = ( 0, - 0, 0, 0, 0, 3, 32, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 28, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 10, 0, 6, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 38, 40, 41, 33, - 23, 0, 26, 25, 27, 0, 0, 0, 34, 0, - 0, 0, 0, 37, 0, 0, 0, 0, 0, -); -@yydgoto = ( 8, - 19, 10, 11, 20, 15, 63, 21, 55, 56, 57, - 58, 59, -); -@yysindex = ( -212, - -157, -212, -212, -212, 0, 0, -212, 0, -137, 0, - -246, -241, -29, -234, -235, -19, -223, -223, -29, -257, - 0, 0, -212, -212, -212, -212, -212, -212, -212, -212, - -212, -212, -212, -216, -229, -267, -222, 0, -212, 0, - -255, -19, 227, 227, 236, -164, -223, -223, -233, -233, - -233, -205, -212, -76, -174, -162, 0, 0, 0, 0, - 0, -180, 0, 0, 0, -212, -29, -212, 0, -216, - -212, -29, -29, 0, -118, -212, -95, -212, -29, -); -@yyrindex = ( 0, - 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, - 1, -59, 0, -43, 0, 163, 77, 96, -242, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, -152, 0, 0, 0, 0, 0, 0, - 191, 172, 199, 208, 182, 153, 115, 134, 20, 39, - 58, -175, -219, -214, 0, -146, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, -192, -188, 0, 0, - 0, -183, -178, 0, 0, 0, -145, 0, -143, -); -@yygindex = ( 0, - 2, 116, 0, 0, 0, 85, 84, 0, 0, 60, - 0, 0, -); -$YYTABLESIZE=510; -@yytable = ( 39, - 5, 9, 13, 16, 17, 18, 24, 61, 62, 27, - 28, 34, 29, 30, 29, 36, 31, 32, 33, 12, - 35, 40, 37, 38, 41, 42, 43, 44, 45, 46, - 47, 48, 49, 50, 51, 54, 29, 43, 13, 43, - 33, 1, 39, 2, 39, 1, 60, 2, 31, 32, - 33, 3, 4, 62, 67, 3, 4, 11, 5, 52, - 53, 7, 5, 6, 45, 7, 45, 72, 44, 73, - 44, 54, 75, 42, 66, 42, 7, 77, 46, 79, - 46, 32, 32, 32, 69, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 70, 8, 32, 32, 32, 71, - 1, 32, 2, 29, 30, 1, 35, 31, 32, 33, - 3, 4, 36, 30, 14, 31, 14, 12, 6, 22, - 7, 64, 65, 23, 24, 25, 26, 27, 28, 74, - 29, 30, 0, 15, 31, 32, 33, 0, 76, 0, - 0, 0, 23, 24, 25, 26, 27, 28, 0, 29, - 30, 0, 16, 31, 32, 33, 0, 0, 0, 0, - 0, 78, 9, 0, 0, 23, 24, 25, 26, 27, - 28, 18, 29, 30, 0, 0, 31, 32, 33, 0, - 0, 17, 0, 0, 23, 24, 25, 26, 27, 28, - 19, 29, 30, 0, 0, 31, 32, 33, 20, 22, - 68, 3, 3, 3, 3, 3, 3, 21, 3, 3, - 0, 0, 3, 3, 3, 24, 0, 4, 4, 4, - 4, 4, 4, 0, 4, 4, 0, 0, 4, 4, - 4, 23, 24, 25, 26, 27, 28, 0, 29, 30, - 0, 0, 31, 32, 33, 27, 28, 0, 29, 30, - 0, 0, 31, 32, 33, 0, 0, 5, 0, 5, - 0, 5, 5, 5, 5, 5, 5, 0, 5, 5, - 0, 0, 5, 5, 5, 0, 12, 5, 12, 5, - 12, 12, 12, 12, 12, 12, 0, 12, 12, 0, - 0, 12, 12, 0, 0, 13, 12, 13, 12, 13, - 13, 13, 13, 13, 13, 0, 13, 13, 0, 0, - 13, 13, 0, 0, 11, 13, 11, 13, 11, 11, - 11, 11, 11, 11, 0, 11, 11, 0, 0, 11, - 11, 0, 0, 7, 11, 7, 11, 7, 7, 7, - 7, 7, 7, 0, 7, 7, 0, 0, 0, 0, - 0, 0, 8, 7, 8, 7, 8, 8, 8, 8, - 8, 8, 0, 8, 8, 0, 0, 0, 0, 0, - 0, 14, 8, 14, 8, 14, 14, 14, 14, 14, - 14, 0, 14, 14, 0, 0, 0, 0, 0, 0, - 15, 14, 15, 14, 15, 15, 15, 15, 15, 15, - 0, 15, 15, 0, 0, 0, 0, 0, 0, 16, - 15, 16, 15, 16, 16, 16, 16, 16, 16, 9, - 0, 9, 0, 9, 9, 9, 9, 0, 18, 16, - 18, 16, 18, 18, 18, 18, 0, 0, 17, 9, - 17, 9, 17, 17, 17, 17, 0, 19, 18, 19, - 18, 19, 0, 19, 19, 20, 0, 20, 17, 0, - 17, 20, 20, 0, 21, 0, 21, 19, 0, 19, - 21, 21, 0, 0, 0, 20, 0, 20, 0, 0, - 0, 0, 0, 0, 21, 0, 21, 23, 24, 0, - 0, 27, 28, 0, 29, 30, 0, 0, 31, 32, - 33, 28, 0, 29, 30, 0, 0, 31, 32, 33, -); -@yycheck = ( 257, - 0, 0, 1, 2, 3, 4, 262, 275, 276, 265, - 266, 258, 268, 269, 257, 257, 272, 273, 274, 0, - 267, 279, 257, 259, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 279, 257, 0, 259, - 274, 258, 257, 260, 259, 258, 276, 260, 272, 273, - 274, 268, 269, 276, 53, 268, 269, 0, 275, 276, - 277, 278, 275, 276, 257, 278, 259, 66, 257, 68, - 259, 70, 71, 257, 280, 259, 0, 76, 257, 78, - 259, 257, 258, 259, 259, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 257, 0, 272, 273, 274, 280, - 258, 277, 260, 268, 269, 0, 259, 272, 273, 274, - 268, 269, 259, 259, 0, 259, 1, 275, 276, 257, - 278, 37, 39, 261, 262, 263, 264, 265, 266, 70, - 268, 269, -1, 0, 272, 273, 274, -1, 257, -1, - -1, -1, 261, 262, 263, 264, 265, 266, -1, 268, - 269, -1, 0, 272, 273, 274, -1, -1, -1, -1, - -1, 257, 0, -1, -1, 261, 262, 263, 264, 265, - 266, 0, 268, 269, -1, -1, 272, 273, 274, -1, - -1, 0, -1, -1, 261, 262, 263, 264, 265, 266, - 0, 268, 269, -1, -1, 272, 273, 274, 0, 259, - 277, 261, 262, 263, 264, 265, 266, 0, 268, 269, - -1, -1, 272, 273, 274, 259, -1, 261, 262, 263, - 264, 265, 266, -1, 268, 269, -1, -1, 272, 273, - 274, 261, 262, 263, 264, 265, 266, -1, 268, 269, - -1, -1, 272, 273, 274, 265, 266, -1, 268, 269, - -1, -1, 272, 273, 274, -1, -1, 257, -1, 259, - -1, 261, 262, 263, 264, 265, 266, -1, 268, 269, - -1, -1, 272, 273, 274, -1, 257, 277, 259, 279, - 261, 262, 263, 264, 265, 266, -1, 268, 269, -1, - -1, 272, 273, -1, -1, 257, 277, 259, 279, 261, - 262, 263, 264, 265, 266, -1, 268, 269, -1, -1, - 272, 273, -1, -1, 257, 277, 259, 279, 261, 262, - 263, 264, 265, 266, -1, 268, 269, -1, -1, 272, - 273, -1, -1, 257, 277, 259, 279, 261, 262, 263, - 264, 265, 266, -1, 268, 269, -1, -1, -1, -1, - -1, -1, 257, 277, 259, 279, 261, 262, 263, 264, - 265, 266, -1, 268, 269, -1, -1, -1, -1, -1, - -1, 257, 277, 259, 279, 261, 262, 263, 264, 265, - 266, -1, 268, 269, -1, -1, -1, -1, -1, -1, - 257, 277, 259, 279, 261, 262, 263, 264, 265, 266, - -1, 268, 269, -1, -1, -1, -1, -1, -1, 257, - 277, 259, 279, 261, 262, 263, 264, 265, 266, 257, - -1, 259, -1, 261, 262, 263, 264, -1, 257, 277, - 259, 279, 261, 262, 263, 264, -1, -1, 257, 277, - 259, 279, 261, 262, 263, 264, -1, 257, 277, 259, - 279, 261, -1, 263, 264, 257, -1, 259, 277, -1, - 279, 263, 264, -1, 257, -1, 259, 277, -1, 279, - 263, 264, -1, -1, -1, 277, -1, 279, -1, -1, - -1, -1, -1, -1, 277, -1, 279, 261, 262, -1, - -1, 265, 266, -1, 268, 269, -1, -1, 272, 273, - 274, 266, -1, 268, 269, -1, -1, 272, 273, 274, -); -$YYFINAL=8; -#ifndef YYDEBUG -#define YYDEBUG 0 -#endif -$YYMAXTOKEN=280; -#if YYDEBUG -@yyname = ( -"end-of-file",'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','','', -'','','','','','','','','','','','','','','','','','','','','','','',"COMMA","LPAREN","RPAREN","NOT", -"OR","AND","EQV","NEQV","COMPARISON","DBLSLASH","PERCENT","PLUS","MINUS", -"UPLUS","UMINUS","ASTERIK","SLASH","DBLASTERIK","CONST","NAME","COLON","LARRAY", -"RARRAY","EQUALS", -); -@yyrule = ( -"\$accept : expr_with_abort", -"expr_with_abort : expr", -"expr_with_abort : expr COMMA", -"expr : CONST", -"expr : expr_without_const", -"expr_without_const : chain", -"expr_without_const : LARRAY array RARRAY", -"expr_without_const : PLUS expr", -"expr_without_const : MINUS expr", -"expr_without_const : NOT expr", -"expr_without_const : LPAREN potential_complex_or_implied_do RPAREN", -"expr_without_const : expr DBLASTERIK expr", -"expr_without_const : expr ASTERIK expr", -"expr_without_const : expr SLASH expr", -"expr_without_const : expr PLUS expr", -"expr_without_const : expr MINUS expr", -"expr_without_const : expr DBLSLASH expr", -"expr_without_const : expr COMPARISON expr", -"expr_without_const : expr AND expr", -"expr_without_const : expr OR expr", -"expr_without_const : expr EQV expr", -"expr_without_const : expr NEQV expr", -"potential_complex_or_implied_do : CONST", -"potential_complex_or_implied_do : CONST COMMA CONST", -"potential_complex_or_implied_do : expr_without_const", -"potential_complex_or_implied_do : expr_without_const COMMA do_args", -"potential_complex_or_implied_do : CONST COMMA do_args", -"array : array COMMA array_piece", -"array : array_piece", -"array_piece : expr", -"do_args : NAME EQUALS expr COMMA expr", -"do_args : NAME EQUALS expr COMMA expr COMMA expr", -"chain : NAME", -"chain : chain PERCENT NAME", -"chain : chain LPAREN exprlist RPAREN", -"exprlist :", -"exprlist : exprlist_ne", -"exprlist_ne : exprlist_ne COMMA argument", -"exprlist_ne : argument", -"argument : expr", -"argument : colonexpr", -"argument : namedargument", -"namedargument : NAME EQUALS expr", -"colonexpr : COLON", -"colonexpr : expr COLON", -"colonexpr : COLON expr", -"colonexpr : expr COLON expr", -); -#endif -sub yyclearin { $yychar = -1; } -sub yyerrok { $yyerrflag = 0; } -$YYSTACKSIZE = $YYSTACKSIZE || $YYMAXDEPTH || 500; -$YYMAXDEPTH = $YYMAXDEPTH || $YYSTACKSIZE || 500; -$yyss[$YYSTACKSIZE] = 0; -$yyvs[$YYSTACKSIZE] = 0; -sub YYERROR { ++$yynerrs; &yy_err_recover; } -sub yy_err_recover -{ - if ($yyerrflag < 3) - { - $yyerrflag = 3; - while (1) - { - if (($yyn = $yysindex[$yyss[$yyssp]]) && - ($yyn += $YYERRCODE) >= 0 && - $yycheck[$yyn] == $YYERRCODE) - { -#if YYDEBUG - print "yydebug: state $yyss[$yyssp], error recovery shifting", - " to state $yytable[$yyn]\n" if $yydebug; -#endif - $yyss[++$yyssp] = $yystate = $yytable[$yyn]; - $yyvs[++$yyvsp] = $yylval; - next yyloop; - } - else - { -#if YYDEBUG - print "yydebug: error recovery discarding state ", - $yyss[$yyssp], "\n" if $yydebug; -#endif - return(1) if $yyssp <= 0; - --$yyssp; - --$yyvsp; - } - } - } - else - { - return (1) if $yychar == 0; -#if YYDEBUG - if ($yydebug) - { - $yys = ''; - if ($yychar <= $YYMAXTOKEN) { $yys = $yyname[$yychar]; } - if (!$yys) { $yys = 'illegal-symbol'; } - print "yydebug: state $yystate, error recovery discards ", - "token $yychar ($yys)\n"; - } -#endif - $yychar = -1; - next yyloop; - } -0; -} # yy_err_recover - -sub yyparse -{ -#ifdef YYDEBUG - if ($yys = $ENV{'YYDEBUG'}) - { - $yydebug = int($1) if $yys =~ /^(\d)/; - } -#endif - - $yynerrs = 0; - $yyerrflag = 0; - $yychar = (-1); - - $yyssp = 0; - $yyvsp = 0; - $yyss[$yyssp] = $yystate = 0; - -yyloop: while(1) - { - yyreduce: { - last yyreduce if ($yyn = $yydefred[$yystate]); - if ($yychar < 0) - { - if (($yychar = &yylex) < 0) { $yychar = 0; } -#if YYDEBUG - if ($yydebug) - { - $yys = ''; - if ($yychar <= $#yyname) { $yys = $yyname[$yychar]; } - if (!$yys) { $yys = 'illegal-symbol'; }; - print "yydebug: state $yystate, reading $yychar ($yys)\n"; - } -#endif - } - if (($yyn = $yysindex[$yystate]) && ($yyn += $yychar) >= 0 && - $yycheck[$yyn] == $yychar) - { -#if YYDEBUG - print "yydebug: state $yystate, shifting to state ", - $yytable[$yyn], "\n" if $yydebug; -#endif - $yyss[++$yyssp] = $yystate = $yytable[$yyn]; - $yyvs[++$yyvsp] = $yylval; - $yychar = (-1); - --$yyerrflag if $yyerrflag > 0; - next yyloop; - } - if (($yyn = $yyrindex[$yystate]) && ($yyn += $yychar) >= 0 && - $yycheck[$yyn] == $yychar) - { - $yyn = $yytable[$yyn]; - last yyreduce; - } - if (! $yyerrflag) { - &yyerror('syntax error'); - ++$yynerrs; - } - return(1) if &yy_err_recover; - } # yyreduce -#if YYDEBUG - print "yydebug: state $yystate, reducing by rule ", - "$yyn ($yyrule[$yyn])\n" if $yydebug; -#endif - $yym = $yylen[$yyn]; - $yyval = $yyvs[$yyvsp+1-$yym]; - switch: - { -if ($yyn == 1) { -#line 29 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; return 1; -last switch; -} } -if ($yyn == 2) { -#line 30 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-1]; return "s,"; -last switch; -} } -if ($yyn == 3) { -#line 33 "expr_parse.y" -{ $yyval = [ "%const", @{$yyvs[$yyvsp-0]} ]; -last switch; -} } -if ($yyn == 4) { -#line 34 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 5) { -#line 37 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 6) { -#line 38 "expr_parse.y" -{ $yyval = [ "%array", @{$yyvs[$yyvsp-1]} ]; -last switch; -} } -if ($yyn == 7) { -#line 39 "expr_parse.y" -{ $yyval = [ "u+", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 8) { -#line 40 "expr_parse.y" -{ $yyval = [ "u-", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 9) { -#line 41 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 10) { -#line 43 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-1]; -last switch; -} } -if ($yyn == 11) { -#line 44 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 12) { -#line 45 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 13) { -#line 46 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 14) { -#line 47 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 15) { -#line 48 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 16) { -#line 49 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 17) { -#line 50 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 18) { -#line 51 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 19) { -#line 52 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 20) { -#line 53 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 21) { -#line 54 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 22) { -#line 57 "expr_parse.y" -{ $yyval = [ "%const", @{$yyvs[$yyvsp-0]} ]; -last switch; -} } -if ($yyn == 23) { -#line 59 "expr_parse.y" -{ my ($type1, $val1) = @{$yyvs[$yyvsp-2]}; - my ($type2, $val2) = @{$yyvs[$yyvsp-0]}; - $yyval = ["%const", typing::make_complex_type ($type1, $type2), - [$val1, $val2]]; - -last switch; -} } -if ($yyn == 24) { -#line 64 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 25) { -#line 66 "expr_parse.y" -{ $yyval = [ "%do", $yyvs[$yyvsp-2], @{$yyvs[$yyvsp-0]} ]; -last switch; -} } -if ($yyn == 26) { -#line 68 "expr_parse.y" -{ $yyval = [ "%do", [ "%const", @{$yyvs[$yyvsp-2]} ], @{$yyvs[$yyvsp-0]} ]; - -last switch; -} } -if ($yyn == 27) { -#line 72 "expr_parse.y" -{ $yyval = [ @{$yyvs[$yyvsp-2]}, $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 28) { -#line 73 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 29) { -#line 76 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 30) { -#line 80 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-4], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 31) { -#line 82 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-6], $yyvs[$yyvsp-4], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 32) { -#line 85 "expr_parse.y" -{ $yyval = [ "%var", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 33) { -#line 86 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-1], $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 34) { -#line 87 "expr_parse.y" -{ $yyval = [ "%call", $yyvs[$yyvsp-3], @{$yyvs[$yyvsp-1]} ]; -last switch; -} } -if ($yyn == 35) { -#line 90 "expr_parse.y" -{ $yyval = []; -last switch; -} } -if ($yyn == 36) { -#line 91 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 37) { -#line 94 "expr_parse.y" -{ $yyval = [ @{$yyvs[$yyvsp-2]}, $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 38) { -#line 95 "expr_parse.y" -{ $yyval = [ $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 39) { -#line 98 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 40) { -#line 99 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 41) { -#line 100 "expr_parse.y" -{ $yyval = $yyvs[$yyvsp-0]; -last switch; -} } -if ($yyn == 42) { -#line 103 "expr_parse.y" -{ $yyval = [ "%namedarg", $yyvs[$yyvsp-2], $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 43) { -#line 106 "expr_parse.y" -{ $yyval = [ "%colon", "", "" ]; -last switch; -} } -if ($yyn == 44) { -#line 107 "expr_parse.y" -{ $yyval = [ "%colon", $yyvs[$yyvsp-1], "" ]; -last switch; -} } -if ($yyn == 45) { -#line 108 "expr_parse.y" -{ $yyval = [ "%colon", "", $yyvs[$yyvsp-0] ]; -last switch; -} } -if ($yyn == 46) { -#line 109 "expr_parse.y" -{ $yyval = [ "%colon", $yyvs[$yyvsp-2], $yyvs[$yyvsp-1] ]; -last switch; -} } -#line 624 "y.tab.pl" - } # switch - $yyssp -= $yym; - $yystate = $yyss[$yyssp]; - $yyvsp -= $yym; - $yym = $yylhs[$yyn]; - if ($yystate == 0 && $yym == 0) - { -#if YYDEBUG - print "yydebug: after reduction, shifting from state 0 ", - "to state $YYFINAL\n" if $yydebug; -#endif - $yystate = $YYFINAL; - $yyss[++$yyssp] = $YYFINAL; - $yyvs[++$yyvsp] = $yyval; - if ($yychar < 0) - { - if (($yychar = &yylex) < 0) { $yychar = 0; } -#if YYDEBUG - if ($yydebug) - { - $yys = ''; - if ($yychar <= $#yyname) { $yys = $yyname[$yychar]; } - if (!$yys) { $yys = 'illegal-symbol'; } - print "yydebug: state $YYFINAL, reading $yychar ($yys)\n"; - } -#endif - } - return(0) if $yychar == 0; - next yyloop; - } - if (($yyn = $yygindex[$yym]) && ($yyn += $yystate) >= 0 && - $yyn <= $#yycheck && $yycheck[$yyn] == $yystate) - { - $yystate = $yytable[$yyn]; - } else { - $yystate = $yydgoto[$yym]; - } -#if YYDEBUG - print "yydebug: after reduction, shifting from state ", - "$yyss[$yyssp] to state $yystate\n" if $yydebug; -#endif - $yyss[++$yyssp] = $yystate; - $yyvs[++$yyvsp] = $yyval; - } # yyloop -} # yyparse -#line 112 "expr_parse.y" - -sub yylex { - $expr_parse::left =~ s/^\s*//; - return 0 if $expr_parse::left eq ""; - my ($ncharsread, $token, $value) = expr_parse::good_yylex ($expr_parse::left); - # print "yylex: token eof\n" unless $ncharsread; - return 0 unless $ncharsread; - # print "yylex: token $token (" . substr ($expr_parse::left, 0, $ncharsread) . ") with value $value\n"; - # print join (";", @$value) . "\n"; - $expr_parse::left = substr ($expr_parse::left, $ncharsread); - $yylval = $value; - return $token; -} - -# returns (ncharsread, token, value) -sub good_yylex { - my ($s) = @_; - my ($c) = substr ($s, 0, 1); - - if ($c eq "") { - return 0; - } elsif ($s =~ /^(\d+(?:\.\d*)?|\.\d+)D[+-]?\d+/i) { - return (length ($&), $CONST, [$typing::double_precision, $&]); - } elsif ($s =~ /^(\d+E[+-]?\d+|(?:\d+\.\d*|\.\d+)(?:E[+-]?\d+)?)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('real', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'real'}, $1]); - } - } elsif ($s =~ /^(\d+)(_\w+)?/) { - if ($2) { - return (length ($&), $CONST, [typing::make_type ('integer', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'integer'}, $1]); - } - } elsif ($s =~ /^(\.true\.|\.false\.)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('logical', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'logical'}, $1]); - } - } elsif ($s =~ /^'(\d+)'(_\w+)?/) { - # Interior of string is digits because it has been grabbed already. - my ($str) = stmts::get_string ($1); - if (defined $2) { - return (length ($&), $CONST, [typing::make_character_type (substr ($2, 1), length ($str)), $str]); - } else { - return (length ($&), $CONST, [typing::make_character_type ($typing::default_character_kind, length ($str)), $str]); - } - } elsif ($s =~ /^\w+/) { - return (length ($&), $NAME, $&); - } else { - switch: { - $s =~ /^==/ && return (2, $COMPARISON, "=="); - $s =~ /^<=/ && return (2, $COMPARISON, "<="); - $s =~ /^>=/ && return (2, $COMPARISON, ">="); - $s =~ /^/ && return (1, $COMPARISON, ">"); - $s =~ /^\/=/ && return (2, $COMPARISON, "/="); - $s =~ /^=/ && return (1, $EQUALS, "="); - $s =~ /^\.eq\./i && return (4, $COMPARISON, "=="); - $s =~ /^\.le\./i && return (4, $COMPARISON, "<="); - $s =~ /^\.ge\./i && return (4, $COMPARISON, ">="); - $s =~ /^\.lt\./i && return (4, $COMPARISON, "<"); - $s =~ /^\.gt\./i && return (4, $COMPARISON, ">"); - $s =~ /^\.ne\./i && return (4, $COMPARISON, "/="); - $s =~ /^\.neqv\./i && return (6, $NEQV, ".neqv."); - $s =~ /^\.eqv\./i && return (5, $EQV, ".eqv."); - $s =~ /^\.and\./i && return (5, $AND, ".and."); - $s =~ /^\.or\./i && return (4, $OR, ".or."); - $s =~ /^\.not\./i && return (5, $NOT, ".not."); - $s =~ /^\*\*/ && return (2, $DBLASTERIK, "**"); - $s =~ /^\/\// && return (2, $DBLSLASH, "//"); - $s =~ /^\(\// && return (2, $LARRAY, "(/"); - $s =~ /^\/\)/ && return (2, $RARRAY, "/)"); - $c eq "," && return (1, $COMMA, ","); - $c eq "+" && return (1, $PLUS, "+"); - $c eq "-" && return (1, $MINUS, "-"); - $c eq "*" && return (1, $ASTERIK, "*"); - $c eq "/" && return (1, $SLASH, "/"); - $c eq "(" && return (1, $LPAREN, "("); - $c eq ")" && return (1, $RPAREN, ")"); - $c eq "%" && return (1, $PERCENT, "%"); - $c eq ":" && return (1, $COLON, ":"); - } - die "Lexer failed on `$s'"; - } -} - -##### -# Takes a string that consists entirely of an expression, and returns a -# reference to the parse tree it defines. -##### -sub parse_expr { - my ($s) = @_; - # print "parsing string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - die "Expression `$expr_parse::line' has trailing garbage `$1$expr_parse::left'" - if yyparse () =~ /^s(.*)$/; - return $yyval; -} - -##### -# Takes a string that consists partly of an expression. (The first part -# is an expression.) Returns (parse tree ref, rest string, separator string). -##### -sub parse_part_as_expr { - my ($s) = @_; - # print "parsing part of string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - if (yyparse () =~ /^s(.*)$/) { - return ($yyval, $expr_parse::left, $1); - } else { - return ($yyval); - } -} - -sub yyerror { - my ($s) = @_; - die "yyerror: $s during parsing of F90 code `$expr_parse::line'"; -} - -1; -#line 794 "y.tab.pl" diff --git a/Tools/F_scripts/f90doc/expr_parse.y b/Tools/F_scripts/f90doc/expr_parse.y deleted file mode 100644 index 94070cfc768..00000000000 --- a/Tools/F_scripts/f90doc/expr_parse.y +++ /dev/null @@ -1,234 +0,0 @@ -%{ -package expr_parse; - -# On failure, print out this as the line we were working on. -$expr_parse::line = ""; - -# Portion of line left to parse -$expr_parse::left = ""; -%} - -%token COMMA LPAREN RPAREN NOT OR AND EQV NEQV COMPARISON DBLSLASH PERCENT -%token PLUS MINUS UPLUS UMINUS ASTERIK SLASH DBLASTERIK CONST NAME COLON -%token LARRAY RARRAY EQUALS - -%left EQV NEQV -%left OR -%left AND -%nonassoc NOT -%nonassoc COMPARISON -%left DBLSLASH -%left PLUS MINUS -%nonassoc UPLUS UMINUS -%left ASTERIK SLASH -%right DBLASTERIK -%left PERCENT - -%% - -expr_with_abort: expr { $$ = $1; return 1; } - | expr COMMA { $$ = $1; return "s,"; } - -expr: - CONST { $$ = [ "%const", @{$1} ]; } - | expr_without_const { $$ = $1; } - -expr_without_const: - chain { $$ = $1; } - | LARRAY array RARRAY { $$ = [ "%array", @{$2} ]; } - | PLUS expr %prec UPLUS { $$ = [ "u+", $2 ]; } - | MINUS expr %prec UMINUS { $$ = [ "u-", $2 ]; } - | NOT expr { $$ = [ $1, $2 ]; } - | LPAREN potential_complex_or_implied_do RPAREN - { $$ = $2; } - | expr DBLASTERIK expr { $$ = [ $2, $1, $3 ]; } - | expr ASTERIK expr { $$ = [ $2, $1, $3 ]; } - | expr SLASH expr { $$ = [ $2, $1, $3 ]; } - | expr PLUS expr { $$ = [ $2, $1, $3 ]; } - | expr MINUS expr { $$ = [ $2, $1, $3 ]; } - | expr DBLSLASH expr { $$ = [ $2, $1, $3 ]; } - | expr COMPARISON expr { $$ = [ $2, $1, $3 ]; } - | expr AND expr { $$ = [ $2, $1, $3 ]; } - | expr OR expr { $$ = [ $2, $1, $3 ]; } - | expr EQV expr { $$ = [ $2, $1, $3 ]; } - | expr NEQV expr { $$ = [ $2, $1, $3 ]; } - -potential_complex_or_implied_do: - CONST { $$ = [ "%const", @{$1} ]; } - | CONST COMMA CONST - { my ($type1, $val1) = @{$1}; - my ($type2, $val2) = @{$3}; - $$ = ["%const", typing::make_complex_type ($type1, $type2), - [$val1, $val2]]; - } - | expr_without_const { $$ = $1; } - | expr_without_const COMMA do_args - { $$ = [ "%do", $1, @{$3} ]; } - | CONST COMMA do_args - { $$ = [ "%do", [ "%const", @{$1} ], @{$3} ]; - } - -array: - array COMMA array_piece { $$ = [ @{$1}, $3 ]; } - | array_piece { $$ = [ $1 ]; } - -array_piece: - expr { $$ = $1; } -# | implied_do is handled within expr - -do_args: - NAME EQUALS expr COMMA expr { $$ = [ $1, $3, $5 ]; } - | NAME EQUALS expr COMMA expr COMMA expr - { $$ = [ $1, $3, $5, $7 ]; } - -chain: - NAME { $$ = [ "%var", $1 ]; } - | chain PERCENT NAME { $$ = [ $2, $1, $3 ]; } - | chain LPAREN exprlist RPAREN { $$ = [ "%call", $1, @{$3} ]; } - -exprlist: - { $$ = []; } - | exprlist_ne { $$ = $1; } - -exprlist_ne: - exprlist_ne COMMA argument { $$ = [ @{$1}, $3 ]; } - | argument { $$ = [ $1 ]; } - -argument: - expr { $$ = $1; } - | colonexpr { $$ = $1; } - | namedargument { $$ = $1; } - -namedargument: - NAME EQUALS expr { $$ = [ "%namedarg", $1, $3 ]; } - -colonexpr: - COLON { $$ = [ "%colon", "", "" ]; } - | expr COLON { $$ = [ "%colon", $1, "" ]; } - | COLON expr { $$ = [ "%colon", "", $2 ]; } - | expr COLON expr { $$ = [ "%colon", $1, $2 ]; } - -%% - -sub yylex { - $expr_parse::left =~ s/^\s*//; - return 0 if $expr_parse::left eq ""; - my ($ncharsread, $token, $value) = expr_parse::good_yylex ($expr_parse::left); - # print "yylex: token eof\n" unless $ncharsread; - return 0 unless $ncharsread; - # print "yylex: token $token (" . substr ($expr_parse::left, 0, $ncharsread) . ") with value $value\n"; - # print join (";", @$value) . "\n"; - $expr_parse::left = substr ($expr_parse::left, $ncharsread); - $yylval = $value; - return $token; -} - -# returns (ncharsread, token, value) -sub good_yylex { - my ($s) = @_; - my ($c) = substr ($s, 0, 1); - - if ($c eq "") { - return 0; - } elsif ($s =~ /^(\d+(?:\.\d*)?|\.\d+)D[+-]?\d+/i) { - return (length ($&), $CONST, [$typing::double_precision, $&]); - } elsif ($s =~ /^(\d+E[+-]?\d+|(?:\d+\.\d*|\.\d+)(?:E[+-]?\d+)?)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('real', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'real'}, $1]); - } - } elsif ($s =~ /^(\d+)(_\w+)?/) { - if ($2) { - return (length ($&), $CONST, [typing::make_type ('integer', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'integer'}, $1]); - } - } elsif ($s =~ /^(\.true\.|\.false\.)(_\w+)?/i) { - if (defined $2) { - return (length ($&), $CONST, [typing::make_type ('logical', substr ($2, 1)), $1]); - } else { - return (length ($&), $CONST, [$typing::default_type{'logical'}, $1]); - } - } elsif ($s =~ /^'(\d+)'(_\w+)?/) { - # Interior of string is digits because it has been grabbed already. - my ($str) = stmts::get_string ($1); - if (defined $2) { - return (length ($&), $CONST, [typing::make_character_type (substr ($2, 1), length ($str)), $str]); - } else { - return (length ($&), $CONST, [typing::make_character_type ($typing::default_character_kind, length ($str)), $str]); - } - } elsif ($s =~ /^\w+/) { - return (length ($&), $NAME, $&); - } else { - switch: { - $s =~ /^==/ && return (2, $COMPARISON, "=="); - $s =~ /^<=/ && return (2, $COMPARISON, "<="); - $s =~ /^>=/ && return (2, $COMPARISON, ">="); - $s =~ /^/ && return (1, $COMPARISON, ">"); - $s =~ /^\/=/ && return (2, $COMPARISON, "/="); - $s =~ /^=/ && return (1, $EQUALS, "="); - $s =~ /^\.eq\./i && return (4, $COMPARISON, "=="); - $s =~ /^\.le\./i && return (4, $COMPARISON, "<="); - $s =~ /^\.ge\./i && return (4, $COMPARISON, ">="); - $s =~ /^\.lt\./i && return (4, $COMPARISON, "<"); - $s =~ /^\.gt\./i && return (4, $COMPARISON, ">"); - $s =~ /^\.ne\./i && return (4, $COMPARISON, "/="); - $s =~ /^\.neqv\./i && return (6, $NEQV, ".neqv."); - $s =~ /^\.eqv\./i && return (5, $EQV, ".eqv."); - $s =~ /^\.and\./i && return (5, $AND, ".and."); - $s =~ /^\.or\./i && return (4, $OR, ".or."); - $s =~ /^\.not\./i && return (5, $NOT, ".not."); - $s =~ /^\*\*/ && return (2, $DBLASTERIK, "**"); - $s =~ /^\/\// && return (2, $DBLSLASH, "//"); - $s =~ /^\(\// && return (2, $LARRAY, "(/"); - $s =~ /^\/\)/ && return (2, $RARRAY, "/)"); - $c eq "," && return (1, $COMMA, ","); - $c eq "+" && return (1, $PLUS, "+"); - $c eq "-" && return (1, $MINUS, "-"); - $c eq "*" && return (1, $ASTERIK, "*"); - $c eq "/" && return (1, $SLASH, "/"); - $c eq "(" && return (1, $LPAREN, "("); - $c eq ")" && return (1, $RPAREN, ")"); - $c eq "%" && return (1, $PERCENT, "%"); - $c eq ":" && return (1, $COLON, ":"); - } - die "Lexer failed on `$s'"; - } -} - -##### -# Takes a string that consists entirely of an expression, and returns a -# reference to the parse tree it defines. -##### -sub parse_expr { - my ($s) = @_; - # print "parsing string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - die "Expression `$expr_parse::line' has trailing garbage `$1$expr_parse::left'" - if yyparse () =~ /^s(.*)$/; - return $yyval; -} - -##### -# Takes a string that consists partly of an expression. (The first part -# is an expression.) Returns (parse tree ref, rest string, separator string). -##### -sub parse_part_as_expr { - my ($s) = @_; - # print "parsing part of string: $s.\n"; - $expr_parse::left = $expr_parse::line = $s; - if (yyparse () =~ /^s(.*)$/) { - return ($yyval, $expr_parse::left, $1); - } else { - return ($yyval); - } -} - -sub yyerror { - my ($s) = @_; - die "yyerror: $s during parsing of F90 code `$expr_parse::line'"; -} - -1; diff --git a/Tools/F_scripts/f90doc/f90doc b/Tools/F_scripts/f90doc/f90doc deleted file mode 100755 index 0afe6dafe73..00000000000 --- a/Tools/F_scripts/f90doc/f90doc +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env perl -eval 'exec perl $0 ${1+"$@"}' - if 0; -warn ("Perl 5 not detected, likely a big problem") if $] < 5.0; -warn "Less than Perl 5.003. You may witness mysterious segmentation faults." - if $] < 5.003; - -use strict; - -BEGIN { - my $zero = $0; - while (-l $zero) { - my $nextzero = readlink $zero; - if (substr ($nextzero, 0, 1) eq "/") { - $zero = $nextzero; - } elsif ($zero =~ m#^(.*)/#) { - $zero = "$1/$nextzero"; - } else { - $zero = $nextzero; - } - } - if ($zero =~ m#(.*)/\w+#) { - push @INC, "$1/../common/", $1; - } else { - push @INC, "../common/", "."; - } -} - -require "htmling.pl"; -require "stmts.pl"; -require "utils.pl"; -#require "expr_parse.pl"; -#require "typing.pl"; - -#################### - -if (! @ARGV) { - print <$part in module $1"); - } else { - push (@::see_list, "module $1"); - } - } elsif ($macro =~ /^author\s+/i) { - push (@::authors, $'); - } elsif ($macro =~ /^version\s+/i) { - die "Two versions in a single !! block" if $::version_num; - $::version_num = $'; - } else { - die "Unrecognized macro $macro"; - } -} diff --git a/Tools/F_scripts/f90doc/htmling.pl b/Tools/F_scripts/f90doc/htmling.pl deleted file mode 100644 index 956513244d9..00000000000 --- a/Tools/F_scripts/f90doc/htmling.pl +++ /dev/null @@ -1,376 +0,0 @@ -package htmling; - -use strict; - -### CONSTANTS -$htmling::dblspace = " "; -$htmling::indentspace = $htmling::dblspace x 2; -$htmling::headerspace = $htmling::indentspace; -$htmling::comment_indent = $htmling::indentspace x 2; - -### PUBLIC GLOBALS -$htmling::comments_type = "smart"; -$htmling::suppress_calls = 0; -$htmling::calls_make_links = 0; -$htmling::html_filenames_original_case = 0; - -### GLOBALS -$htmling::htmlfile = ""; -$htmling::indent = 0; - -# Return the name of the HTML file for the specified PROGRAM or MODULE -sub html_filename { - my ($name) = @_; - $name = lc $name unless $htmling::html_filenames_original_case; - return $name . ".html"; -} - -# This is the main calling point from f90doc. -# Takes all top-level objects: programs, subroutines, functions, and modules. -# Warns if given something else. -sub do_toplevel { - my ($top, $outfile) = @_; - - my $type = $top->{'type'}; - unless ($type eq 'module' || $type eq 'subroutine' || $type eq 'function' || - $type eq 'program') { - warn "Warning: Unrecognized top-level object $type will not be documented.\n"; - return; - } - - # A positive-length name. Necessary because programs may not have names. - if (defined $outfile) { - $htmling::htmlfile = $outfile; - } else { - $htmling::htmlfile = html_filename ( - ($top->{'name'} eq '' ? $type : $top->{'name'})); - } - print "Generating $htmling::htmlfile...\n"; - open OUT, ">$htmling::htmlfile"; - - print OUT "\n"; - print OUT "\n"; - print OUT " $type $top->{'name'} (generated by f90doc) \n"; - print OUT "\n"; - print OUT "

", ucfirst ($type), " $top->{'name'}

\n"; - print OUT "
$type $top->{'name'}\n";
-
-  list_uses (@{$top->{'uses'}});
-  list_calls (1, keys %{$top->{'calls'}}) if exists $top->{'calls'};
-  list_html ("Types", map (($_->{'type'} eq "type" ? ($_) : ()), @{$top->{'ocontains'}}));
-  list_html ("Variables", map (($_->{'type'} eq "var" ? ($_) : ()), @{$top->{'ocontains'}}));
-  list_html ("Interfaces", map (($_->{'type'} eq "interface" ? ($_) : ()), @{$top->{'ocontains'}}));
-  list_html ("Subroutines and functions", map (($_->{'type'} eq "subroutine" || $_->{'type'} eq "function" ? ($_) : ()), @{$top->{'ocontains'}}));
-
-  print OUT "\nend $type $top->{'name'}\n";
-  do_comments ($top->{'comments'}, 1);
-
-  my @list;
-  @list = map (($_->{'type'} eq "type" ? ($_) : ()), @{$top->{'ocontains'}});
-  print OUT "\n

Description of Types

\n" if @list; - do_html (@list); - @list = map (($_->{'type'} eq "var" ? ($_) : ()), @{$top->{'ocontains'}}); - print OUT "\n

Description of Variables

\n" if @list; - do_html (@list); - @list = map (($_->{'type'} eq "interface" ? ($_) : ()), @{$top->{'ocontains'}}); - print OUT "\n

Description of Interfaces

\n" if @list; - do_html (@list); - @list = map (($_->{'type'} eq "subroutine" || $_->{'type'} eq "function" ? ($_) : ()), @{$top->{'ocontains'}}); - print OUT "\n

Description of Subroutines and Functions

\n" if @list; - do_html (@list); - - print OUT "\n"; - close OUT; -} - -sub list_uses { - if (@_) { - print OUT "\n${htmling::indentspace}${htmling::headerspace}! Uses\n"; - my ($use); - foreach $use (@_) { - my ($module, $extra) = @$use; - $extra = defined $extra ? ", $extra" : ""; - print OUT "${htmling::indentspace}", - "use $module$extra\n"; - } - } -} - -sub list_calls { - return if $htmling::suppress_calls; - my ($big, @calls) = (@_); - if (@calls) { - @calls = sort @calls; - @calls = map { "$_" } @calls - if $htmling::calls_make_links; - if ($big) { - print OUT join ("\n", - "\n${htmling::indentspace}${htmling::headerspace}! Calls", - (map { "${htmling::indentspace}call $_" } @calls), ""); - } else { - print OUT "${htmling::indentspace}! Calls: ", join (", ", @calls), "\n"; - } - } -} - -sub list_html { - my ($title) = shift; - - if (@_) { - print OUT "\n${htmling::indentspace}${htmling::headerspace}! $title\n"; - my ($struct); - foreach $struct (@_) { - my ($name, $type) = (txt2html ($struct->{'name'}), $struct->{'type'}); - my ($href) = "$name"; - print OUT $htmling::indentspace; - if ($type eq "var") { - print OUT var2str ($struct, $href) . "\n"; - } elsif ($type eq "subroutine" || - $type eq "function") { - print OUT join (" ", attriblist ($struct), ""); - print OUT typing::type_to_f90 ($struct->{'rtype'}) . " " - if exists $struct->{'rtype'}; - my $flag; - for $flag ('recursive', 'elemental', 'pure') { - print OUT "$flag " if $struct->{$flag}; - } - print OUT "$type $href"; - print OUT " (" . join (", ", @{$struct->{'parms'}}) . ")"; - print OUT " result ($struct->{'result'})" - if exists $struct->{'result'} && !exists $struct->{'rtype'}; - print OUT "\n"; - } else { - print OUT join (" ", attriblist ($struct), ""); - print OUT "$type $href\n"; - } - } - } -} - -sub do_html { - if (@_) { - my ($struct); - - foreach $struct (@_) { - my ($name, $type) = (txt2html ($struct->{'name'}), $struct->{'type'}); - if (! $htmling::indent) { - print OUT "

$name

\n"; - print OUT "
";
-         }
-
-         print OUT $htmling::indentspace x $htmling::indent;
-         if ($type eq "var") {
-             print OUT var2str ($struct) . "\n";
-         } elsif ($type eq "mprocedure") {
-             die "do_html: bare module procedure $struct->{'name'} (no enclosing module)"
-                 unless exists $struct->{'bind'};
-             print OUT
-                 "module procedure {'bind'}->{'type'}_" .
-                 lc ($struct->{'name'}) . "\">$name\n";
-         } elsif ($type eq "subroutine" || $type eq "function") {
-             print OUT join (" ", attriblist ($struct), "");
-             print OUT typing::type_to_f90 ($struct->{'rtype'}) . " "
-                 if exists $struct->{'rtype'} && !exists $struct->{'result'};
-             my $flag;
-             for $flag ('recursive', 'elemental', 'pure') {
-               print OUT "$flag " if $struct->{$flag};
-             }
-             print OUT "$type $name";
-             print OUT " (" . join (", ", @{$struct->{'parms'}}) . ")";
-             print OUT " result ($struct->{'result'})"
-               if exists $struct->{'result'};
-             print OUT "\n";
-         } else {
-             print OUT join (" ", attriblist ($struct), "");
-             print OUT "$type $name\n";
-         }
-
-         $htmling::indent++;
-
-         if ($type eq "var" || $type eq "mprocedure") {
-         } elsif ($type eq "type") {
-           print OUT $htmling::indentspace x $htmling::indent, "private\n"
-             if exists $struct->{'privatetype'};
-           print OUT $htmling::indentspace x $htmling::indent, "sequence\n"
-             if exists $struct->{'sequencetype'};
-           do_html (@{$struct->{'ocontains'}});
-         } elsif ($type eq "interface") {
-           do_html (@{$struct->{'ocontains'}});
-         } elsif ($type eq "subroutine" || $type eq "function") {
-           my @interest = @{$struct->{'parms'}};
-           push @interest, $struct->{'result'} if exists $struct->{'result'};
-           push @interest, $name
-             if $type eq "function" && !exists $struct->{'result'} &&
-               !exists $struct->{'rtype'};
-           my $arg;
-           foreach $arg (@interest) {
-             my (@things) = values %{$struct->{'contains'}->{lc $arg}};
-             die "Confused by/no declaration for parameter $arg of $type $name"
-               if scalar @things != 1;
-             do_html ($things[0]);
-           }
-         } else {
-           die "do: I don't know what a $type is";
-         }
-
-         list_calls (0, keys %{$struct->{'calls'}}) if exists $struct->{'calls'};
-
-         $htmling::indent--;
-
-         if ($type ne "var" && $type ne "mprocedure") {
-            print OUT $htmling::indentspace x $htmling::indent . "end $type $name\n";
-         }
-
-         do_comments ($struct->{'comments'}, ! $htmling::indent);
-      }
-   }
-}
-
-# Pass comments and a flag saying if you want to end the current 
 block.
-sub do_comments {
-   my ($comments, $endpre) = @_;
-   if ($comments eq "") {
-      print OUT "
\n" if $endpre; - return; - } - - #print OUT "\n" unless $htmling::indent; - - if ($htmling::comments_type eq "preformatted") { - my ($s) = $htmling::indentspace x $htmling::indent . $htmling::comment_indent; - $comments =~ s/^/$s/m if $htmling::indent; - $comments =~ s/^\n*//s; - $comments =~ s/\n*$//s; - print OUT $comments, "\n"; - print OUT "
\n" if $endpre; - } else { - print OUT "
\n"; - print OUT "
\n" if $htmling::indent; - if ($htmling::comments_type eq "html") { - } elsif ($htmling::comments_type eq "smart") { - my @newcomments = (); - my $verbmode = 0; - my @listmode = (); - my $line; - foreach $line (split ("\n", $comments)) { - if ($verbmode) { - if ($line =~ /^>/) { - warn "`$line' found while already in verbatim mode"; - substr ($line, 0, 1) = " "; - push @newcomments, $line; - } elsif ($line =~ /^"; - } elsif ($line =~ /^v/) { - warn "`$line' found while already in verbatim mode"; - substr ($line, 0, 1) = " "; - push @newcomments, $line; - } else { - push @newcomments, $line; - } - next; - } - - # _italic_ and *bold* - while ($line =~ /(\A|\W)_(\w|\w.*?\w)_(\Z|\W)/) { - my ($left, $mid, $right) = ("$`$1", $2, "$3$'"); - $mid =~ s/_/ /g; - $line = $left . $mid . $right; - } - while ($line =~ /(\A|\W)\*(\w|\w.*?\w)\*(\Z|\W)/) { - my ($left, $mid, $right) = ("$`$1", $2, "$3$'"); - $mid =~ s/\*/ /g; - $line = $left . $mid . $right; - } - - # Lists - if ($line =~ /^( *)-/) { - if (! @listmode || length ($1) > $listmode[$#listmode]) { - push @listmode, length $1; - push @newcomments, $1 . "
    "; - } else { - while ($listmode[$#listmode] != length ($1)) { - push @newcomments, " " x $listmode[$#listmode] . "
"; - pop @listmode; - die "Unindented to invalid position in `$line'" - unless @listmode; - } - } - push @newcomments, $1 . "
  • " . substr ($line, length ($&)); - } elsif ($line =~ /^>/) { - #warn "Verbatim mode started in list mode" if @listmode; - $verbmode = 1; - substr ($line, 0, 1) = " "; - push @newcomments, "
    " . $line;
    -            # Ignore $line =~ /^$line
    "; - } elsif ($line =~ /^\s*$/) { - push @newcomments, "

    "; - } elsif (@listmode) { - $line =~ /^( *)(\t?)/; - warn "Tabs have strange effects on indentation detection" - if length ($2) > 0; - while (@listmode && $listmode[$#listmode] > length ($1)) { - push @newcomments, " " x $listmode[$#listmode] . ""; - pop @listmode; - } - push @newcomments, $line; - } else { - push @newcomments, $line; - } - } - my $list; - foreach $list (@listmode) { - push @newcomments, " " x $list . ""; - } - $comments = join ("\n", @newcomments); - } else { - die "Unsupported comments type `$htmling::comments_type'"; - } - $comments =~ s/

    \n(

    \n)+/

    \n/g; - $comments =~ s/

    \n$//; - $comments =~ s/^

    \n//; - $comments =~ s/

    /

    /g if $htmling::indent; - print OUT $comments . "\n"; - print OUT "
  • \n" if $htmling::indent; - print OUT "
    " unless $endpre;
    -   }
    -}
    -
    -sub var2str {
    -    my ($var, $href) = @_;
    -
    -    my ($typestr) = typing::type_to_f90 ($var->{'vartype'});
    -    my ($initial) = (!exists $var->{'initial'} ? ""
    -          : " $var->{'initop'} " . typing::expr_to_f90 ($var->{'initial'}));
    -    $href = txt2html ($var->{'name'}) unless $href;
    -    return $typestr . join (", ", "", attriblist ($var)) . " :: $href$initial";
    -}
    -
    -sub txt2html {
    -    my ($txt) = @_;
    -    $txt =~ s//>/g;
    -    return $txt;
    -}
    -
    -sub attriblist {
    -    my ($struct) = @_;
    -    my @attribs = ();
    -
    -    push @attribs, $struct->{'vis'} if exists $struct->{'vis'};
    -    push @attribs, "optional" if exists $struct->{'optional'};
    -    push @attribs, @{$struct->{'tempattribs'}}
    -        if exists $struct->{'tempattribs'};
    -
    -    return @attribs;
    -}
    -
    -1;
    diff --git a/Tools/F_scripts/f90doc/stmts.pl b/Tools/F_scripts/f90doc/stmts.pl
    deleted file mode 100644
    index 83d20a300af..00000000000
    --- a/Tools/F_scripts/f90doc/stmts.pl
    +++ /dev/null
    @@ -1,891 +0,0 @@
    -package stmts;
    -
    -use strict;
    -
    -require "expr_parse.pl";
    -require "typing.pl";
    -require "utils.pl";
    -
    -#########################################################################
    -# PUBLIC GLOBALS
    -
    -# Set to a reference to a routine to take !! comments if !! comments are
    -# to be caught.
    -$stmts::bangbang = "";
    -
    -# Set to a reference to a routine to return accumulated comments if !! comments
    -# are caught.  You should reset them after each time you call read_line or
    -# read_stmt.
    -$stmts::comments = "";
    -
    -# Set this to disable warnings.  Don't use this for a compiler!  Suitable for
    -# something like f90doc though.  This shouldn't be used once stmts supports
    -# all Fortran 90 statements and attributes; until then, it's pretty much
    -# needed; after then, it should be removed.
    -$stmts::diable_warns = 0;
    -
    -# Set this to use fixed-form Fortran, like good old Fortran 77.
    -$stmts::fixed_form = 0;
    -
    -#########################################################################
    -# PRIVATE GLOBALS
    -
    -# A "left-over" piece of a statement is stored here when semi-colons are
    -# encountered.
    -$stmts::leftover = "";
    -
    -# Number of opened files.
    -$stmts::nfile = 0;
    -
    -# List of string's values.
    -@stmts::strings = ();
    -
    -# List of structure pointers that we're currently nested in.
    -# topnest stores the top of the stack.
    -@stmts::nesting = ();
    -$stmts::topnest = undef;
    -
    -# List of structure pointers that we're currently nested in, but for a
    -# specified type.
    -%stmts::nesting_by = ();
    -
    -#########################################################################
    -# ROUTINES
    -
    -#####
    -# Reads an entire file, and returns all the top-level structures found.
    -# If specified, a given function will be called after every statement
    -# (usually this is for resetting !! comments and such).
    -#####
    -sub read_file {
    -  my ($filename, $every_stmt) = @_;
    -  stmts::open_file ($filename);
    -
    -  my ($stmt, $struct, @rval);
    -  my @toplevel = ();
    -  while ((@rval = stmts::read_stmt ()) [0]) {
    -    push @toplevel, $rval[1] if !defined $stmts::topnest && ref $rval[1];
    -    &$every_stmt () if defined $every_stmt;
    -  }
    -
    -  return @toplevel;
    -}
    -
    -#####
    -# Starts reading the specified filename.
    -#####
    -sub open_file {
    -   my ($filename) = @_;
    -   $stmts::FILE = "";
    -
    -   open IN, $filename
    -     or die "Couldn't open $filename";
    -   $stmts::{'FILE' . $stmts::nfile} = $stmts::{'IN'};
    -}
    -
    -#####
    -# Cleans up from reading the current file.
    -# This is automatically called by read_line, so most don't have to worry
    -# about it.
    -# Returns false if there are no files left.
    -#####
    -sub close_file {
    -   close IN;
    -   $stmts::nfile--;
    -   if ($stmts::nfile > 0) {
    -      # CHECK--does this still do the desired thing, in light of open_file?
    -      $stmts::{'IN'} = $stmts::{'FILE' . $stmts::nfile};
    -      return 1;
    -   } else {
    -      # Clean up strings.
    -      @stmts::strings = ();
    -      return 0;
    -   }
    -}
    -
    -#####
    -# Reads a line of Fortran 90 doing whatever it takes.  This may involve
    -# reading multiple lines from the current file, walking into files, etc.
    -# INCLUDE is parsed at this level.
    -# Note that the returned string may have various cases (lc isn't called).
    -#####
    -sub read_line {
    -
    -ALLOVERAGAIN:
    -  my $line;
    -  if ($stmts::leftover ne '') {
    -    $line = $stmts::leftover;
    -    $stmts::leftover = '';
    -  } else {
    -    $line = ;
    -    until (defined $line) {
    -      return "" unless close_file ();
    -      $line = ;
    -    }
    -    chomp $line;
    -
    -    substr ($line, 0, 1) = '!' if $stmts::fixed_form && $line =~ /^\S/;
    -  }
    -
    -  # This is used for fixed-form continuations.
    -  my $lastlen = length $line;
    -
    -  my $continue = 0;
    -
    -  while (1) {
    -    # Grab doubled comments (!!) if requested.
    -    if ($stmts::bangbang && $line =~ /^([^"'!]|('[^']*')|("[^"]*"))*(!!.*)$/) {
    -      $line = substr ($line, 0, length ($line) - length ($4));
    -      &$stmts::bangbang ($4);
    -    }
    -
    -    # Delete comments.
    -    elsif ($line =~ /^([^"'!]|(\'[^']*')|("[^"]*"))*(!.*)$/) {
    -      $line = substr ($line, 0, length ($line) - length ($4));
    -    }
    -
    -    # Fixed-form continuations.
    -    if ($stmts::fixed_form) {
    -
    -      # Check next line for continuation mark.
    -      $stmts::leftover = ;
    -      $stmts::leftover = '' unless defined $stmts::leftover;
    -      chomp $stmts::leftover;
    -      substr ($stmts::leftover, 0, 1) = '!' if $stmts::leftover =~ /^\S/;
    -      if ($stmts::leftover =~ /^\s....\S/) {
    -
    -        # Pad previous line with spaces if it had less than 72 characters.
    -        $line .= ' ' x (72-$lastlen) if $lastlen < 72;
    -
    -        # Add next (continuation) line to the line.
    -        $line .= substr ($stmts::leftover, 6);
    -        $lastlen = length $stmts::leftover;
    -        
    -        # Continue on to check the next line.
    -        $stmts::leftover = '';
    -        next;
    -      }
    -      
    -    # Free-form continuations.
    -    } elsif ($continue || $line =~ /&\s*$/) {
    -      $line = $` if $line =~ /&\s*$/;
    -      my $rest = ;
    -      chomp $rest;
    -      $rest = $' if $rest =~ /^\s*&/;
    -      $line = "$line$rest";
    -      # Blank lines don't stop the continuation.
    -      $continue = ($rest =~ /^\s*(?:!.*)?$/);
    -      next;
    -    }
    -
    -    last;
    -  }
    -
    -  # Semicolons.
    -  if ($line =~ /^([^;]*);(.*)$/) {
    -    $line = $1;
    -    if ($stmts::leftover eq '') {
    -      $stmts::leftover = $2;
    -    } else {
    -      $stmts::leftover .= ";$2";
    -    }
    -  }
    -
    -  # Replace strings to avoid confusion.
    -  my @quotes;
    -  while ($line =~ / " ([^"]|"")* " | ' ([^']|'')* ' /xg) {
    -    push @quotes, [length $`, length $&, $&];
    -  }
    -  for my $quote (reverse @quotes) {
    -    ## Process in reverse order so that $start is preserved despite replacement
    -    my ($start, $length, $string) = @$quote;
    -    push @stmts::strings, $string;
    -    substr ($line, $start, $length) = "\'" . $#stmts::strings . "\'";
    -  }
    -
    -  # Get rid of spaces on either end.
    -  $line = utils::trim ($line);
    -
    -  goto ALLOVERAGAIN if $line eq '';
    -
    -  #print "read line `$line'\n";
    -
    -  return $line;
    -}
    -
    -#####
    -# Returns the physical value for the given string number.
    -#####
    -sub get_string {
    -   my ($n) = @_;
    -   return $stmts::strings[$n];
    -}
    -
    -#####
    -# Reads a Fortran 90 statement from the current input.
    -# Checks for proper nesting, etc., and keeps tracks of what's in what.
    -# Possible results:
    -#    ('?', $the_line)
    -#    ('program', \%structure)
    -#    ('endprogram', \%structure)
    -#    ('module', \%structure)
    -#    ('endmodule', \%structure)
    -#    ('subroutine', \%structure)
    -#    ('endsubroutine', \%structure)
    -#    ('function', \%structure)
    -#    ('endfunction', \%structure)
    -#    ('program', \%structure)
    -#    ('endprogram', \%structure)
    -#    ('type', \%structure)
    -#    ('endtype', \%structure)
    -#    ('interface', \%structure)
    -#    ('endinterface', \%structure)
    -#    ('var', \%struct1, \%struct2, ...)
    -#    ('contains', \%parent)
    -#    ('public', $name1, $name2, ...)          empty means global default
    -#    ('private', $name1, $name2, ...)         empty means global default
    -#    ('optional', $name1, $name2, ...)
    -#    ('call', $arg1, $arg2, ...)              currently args are unparsed
    -#####
    -sub read_stmt {
    -   my ($line) = read_line ();
    -   if (! $line) {
    -      die "File ended while still nested" if @stmts::nesting;
    -      return ("", "");
    -   }
    -
    -   # MODULE PROCEDURE (must be before module)
    -   if ($line =~ /^module\s+procedure\s+(\w.*)$/i) {
    -      die "module procedure outside of interface block" unless defined $stmts::topnest && $stmts::topnest->{'type'} eq "interface" && $stmts::topnest->{'name'} ne "";
    -      my (@list) = split (/\s*,\s*/, utils::trim ($1));
    -      my ($p);
    -      foreach $p (@list) {
    -         die "Invalid module procedure `$p'" unless $p =~ /^\w+$/;
    -         new_struct ({
    -            'type'   => "mprocedure",
    -            'name'   => $p,
    -            hashed_comments ()
    -         });
    -      }
    -      return ("mprocedure", @list);
    -   }
    -
    -   # MODULE/PROGRAM
    -   elsif ($line =~ /^(module|program)(?:\s+(\w+))?$/i) {
    -      die "$1 begun not at top level" if defined $stmts::topnest;
    -      return new_nest ({
    -         'type' => lc $1,
    -         'name' => (defined $2 ? $2 : ''),
    -         hashed_comments ()
    -      });
    -   }
    -
    -   # END MODULE/SUBROUTINE/FUNCTION/PROGRAM/TYPE/INTERFACE, or general END
    -   elsif ($line =~ /^end\s*(?:(module|subroutine|function|program|type|interface)(?:\s+(\w+))?)?$/i) {
    -      die "END statement outside of any nesting" unless defined $stmts::topnest;
    -      my $top = $stmts::topnest;
    -
    -      # We do some special "fixing up" for modules, which resolves named
    -      # references (module procedures) and computes publicity.
    -      #
    -      # Note that end_nest will ensure that the type of thing ended matches
    -      # the thing the user says it is ending, so we don't have to worry about
    -      # that.
    -      if ($top->{'type'} eq "module") {
    -
    -        # Set publicity (visibility) of objects within the module.
    -
    -        # First, the explicitly set ones.
    -        my $name;
    -        foreach $name (@{$top->{'publiclist'}}) {
    -          do_attrib ($name, "vis", 'public', "visibility");
    -        }
    -        foreach $name (@{$top->{'privatelist'}}) {
    -          do_attrib ($name, "vis", 'private', "visibility");
    -        }
    -
    -        # Second, the globally set ones (those obeying the default).
    -        my $obj;
    -        $top->{'defaultvis'} = "public" unless exists $top->{'defaultvis'};
    -        foreach $obj (@{$top->{'ocontains'}}) {
    -          $obj->{'vis'} = $top->{'defaultvis'} unless exists $obj->{'vis'};
    -        }
    -
    -        # Traverse (arbitrarily deeply) nested structures.
    -        sub traverse {
    -          my ($node) = @_;
    -          my $top = $stmts::topnest;   # HAVE NO IDEA WHY THIS IS NEEDED
    -          
    -          # Graduate nested MODULE PROCEDURE (mprocedure) to point to the
    -          # appropriate thing (either a function or a subroutine with that
    -          # name).
    -          if ($node->{'type'} eq "mprocedure") {
    -            die "Couldn't find module procedure $node->{'name'} (nothing with that name in module $top->{'name'})"
    -              unless exists $top->{'contains'}->{lc $node->{'name'}};
    -            
    -            my ($possibles) =
    -              $top->{'contains'}->{lc $node->{'name'}};
    -            die "Couldn't find module procedure $node->{'name'} in module $top->{'name'} (wrong type)"
    -              if !exists $possibles->{'subroutine'}
    -              && !exists $possibles->{'function'};
    -            die "Found both a subroutine and function to match module procedure $node->{'name'} in module $top->{'name'}"
    -              if exists $possibles->{'subroutine'}
    -              && exists $possibles->{'function'};
    -            
    -            if (exists $possibles->{'subroutine'}) {
    -              $node->{'bind'} = $possibles->{'subroutine'};
    -            } else {
    -              $node->{'bind'} = $possibles->{'function'};
    -            }
    -          }
    -
    -          # Recurse.
    -          map { traverse ($_) } @{$node->{'ocontains'}}
    -          if exists $node->{'ocontains'};
    -        }
    -        map { traverse ($_) } @{$top->{'ocontains'}};
    -      }
    -
    -      my @return_val = end_nest ($1, $2);
    -
    -      # Subroutines and functions in interface blocks must be noted at the
    -      # top level.  We do this with "interface" structures with the names
    -      # of the actual contained routines (unless this is already the
    -      # case).  Make sense?
    -      if ($top->{'type'} eq "interface" && $top->{'name'} eq "") {
    -          my $sub;
    -          foreach $sub (@{$top->{'ocontains'}}) {
    -              next if $sub->{'name'} eq $top->{'name'} ||
    -                      $sub->{'type'} eq "mprocedure";
    -
    -              my %copy = %$top;
    -              $copy{'name'} = $sub->{'name'};
    -              new_nest (\%copy);
    -              my $old_within = $sub->{'within'};
    -              new_struct ($sub);
    -              $sub->{'within'} = $old_within;
    -              end_nest ('interface', $sub->{'name'});
    -          }
    -      }
    -
    -      return @return_val;
    -   }
    -
    -   # SUBROUTINE/FUNCTION
    -   elsif ($line =~ /^(?:(.+?)\s+)?(subroutine|function)\s+(\w+)\s*(\([^()]*\))?(?:\s*result\s*\(\s*(\w+)\s*\))?$/i) {
    -      my ($type, $name, $parmstr, $rtype, $result) =
    -         (lc $2, $3,    $4,       $1,     $5);
    -
    -      die "Start of $type $name before `contains' section of $stmts::topnest->{'type'} $stmts::topnest->{'name'}"
    -          if defined $stmts::topnest && ! $stmts::topnest->{'incontains'} &&
    -             $stmts::topnest->{'type'} ne "interface";
    -      if (exists $stmts::nesting_by{'subroutine'} ||
    -          exists $stmts::nesting_by{'function'}) {
    -         my $n = 0;
    -         $n += scalar @{$stmts::nesting_by{'subroutine'}}
    -            if exists $stmts::nesting_by{'subroutine'};
    -         $n += scalar @{$stmts::nesting_by{'function'}}
    -            if exists $stmts::nesting_by{'function'};
    -#FIXME  #die "Routine nested in routine nested in routine" if $n > 1;
    -      }
    -
    -      $parmstr = "()" unless defined $parmstr;
    -      $parmstr = utils::trim (substr ($parmstr, 1, length ($parmstr) - 2));
    -      my (@parms);
    -      if ($parmstr) {
    -         @parms = split (/\s*,\s*/, $parmstr);
    -         my ($parm);
    -         foreach $parm (@parms) {
    -            die "Parameter `$parm' is not just a word or *"
    -              unless $parm =~ /^\w+|\*$/;
    -            ## * as a final argument allows the calling to specify a statement
    -            ## to jump as an alternative return address.  (Legacy Fortran!)
    -            ## Thanks to Art Olin for this info.
    -         }
    -      } else {
    -         @parms = ();
    -      }
    -
    -      my $struct = {
    -         'type'      => $type,
    -         'name'      => $name,
    -         'parms'     => \@parms,
    -         hashed_comments ()
    -      };
    -      new_nest ($struct);
    -
    -      $struct->{'result'} = $result if defined $result;
    -
    -      $rtype = "" unless defined $rtype;
    -      while ($rtype =~ /(?:^|\s+)(recursive|pure|elemental)$/i ||
    -             $rtype =~ /^(recursive|pure|elemental)(?:\s+|$)/i) {
    -        $rtype = $` . $'; # actually whichever is not blank
    -        $struct->{lc $1} = 1;
    -      }
    -      if ($rtype ne '') {
    -        $struct->{'rtype'} = parse_type ($rtype);
    -        new_struct ({
    -          'type'        => 'var',
    -          'name'        => (defined $result ? $result : $name),
    -          'vartype'     => $struct->{'rtype'},
    -          'comments'    => ''
    -        });
    -      }
    -
    -      return ($type, $struct);
    -   }
    -
    -   # TYPE definition (must go before variable declarations)
    -   elsif ($line =~ /^type(?:\s+|\s*(,.*)?::\s*)(\w+)$/i) {
    -     my $struct = new_nest ({
    -       'type' => 'type',
    -       'name' => $2,
    -       hashed_comments ()
    -     });
    -     if (defined $1) {
    -       my $attrib = utils::trim (substr ($1, 1));
    -       if ($attrib =~ /^(public|private)$/i) {
    -         $struct->{'vis'} = lc $attrib;
    -       } elsif ($attrib) {
    -         warn "Invalid attribute `$attrib' for derived-type declaration--should be just public or private";
    -       }
    -     }
    -     return $struct;
    -   }
    -
    -   # INTERFACE block (for overloading) or statement (for definition of external)
    -   elsif ($line =~ /^interface(?:\s+(\S.+))?$/i) {
    -       return new_nest ({
    -           'type' => 'interface',
    -           'name' => (defined $1 ? $1 : ""),
    -           hashed_comments ()
    -       });
    -   }
    -
    -   # CONTAINS
    -   elsif ($line =~ /^contains$/i) {
    -      die "`contains' found at top level" unless defined $stmts::topnest;
    -      die "`contains' found in $stmts::topnest->{'type'} $stmts::topnest->{'name'}" unless exists $stmts::topnest->{'incontains'};
    -      die "Multiple `contains' found in same scope"
    -         if $stmts::topnest->{'incontains'};
    -      die "`contains' found in interface definition"
    -         if $stmts::topnest->{'interface'};
    -      $stmts::topnest->{'incontains'} = 1;
    -      return ("contains", $stmts::topnest);
    -   }
    -
    -   # PUBLIC/PRIVATE/SEQUENCE
    -   elsif ($line =~ /^(public|private|sequence)(?=\s+[^=(]|::|$)(\s*::\s*)?/i) {
    -     my ($what, $rest) = (lc $1, $');
    -
    -     if (defined $stmts::topnest && $stmts::topnest->{'type'} eq "type") {
    -       die "public statement not allowed in a type declaration"
    -         if $what eq 'public';
    -       die "$1 cannot be qualified inside type declaration" if $rest;
    -       $stmts::topnest->{$what . 'type'} = 1;
    -       return ($what);
    -     } else {
    -       die "sequence statement only allowed immediately inside type declaration"
    -         if $1 eq 'sequence';
    -
    -       die "$1 statement not immediately inside a module or type declaration"
    -         unless defined $stmts::topnest && $stmts::topnest->{'type'} eq "module";
    -       if ($rest eq "") {  # Unqualified
    -         die "Unqualified $what in addition to unqualified " .
    -           $stmts::topnest->{'defaultvis'}
    -         if exists $stmts::topnest->{'defaultvis'};
    -         $stmts::topnest->{'defaultvis'} = $what;
    -         return ($what);
    -         
    -       } else {  # Qualified
    -         my @namelist = map {
    -           die "Invalid name `$_' specified in $what statement"
    -             unless /^\s*(\w+)(?:\s*(\([^()]+\)))?\s*$/i;
    -           $1 . (defined $2 ? $2 : "");
    -         } (split ',', $rest);
    -         push @{$stmts::topnest->{"${what}list"}}, @namelist;
    -         return ($what, @namelist);
    -       }
    -     }
    -   }
    -
    -    # OPTIONAL
    -    elsif ($line =~ /^optional(\s+|\s*::\s*)((\w|\s|,)+)$/i) {
    -        my $name;
    -        my @namelist = split (/\s*,\s*/, utils::trim ($2));
    -        foreach $name (@namelist) {
    -            do_attrib ($name, "optional", 1, "optional attribute");
    -        }
    -        return ('optional', @namelist);
    -    }
    -
    -   # Variable declarations
    -   elsif ($line =~ /^(integer|real|double\s*precision|character|complex|logical|type)\s*(\(|\s\w|[:,*])/i) {
    -      my ($vartype, $rest) = parse_part_as_type ($line);
    -      my (@attribs, @right);
    -      if ($rest =~ /^(.*)\:\:(.*)/) {
    -         my ($a, $b) = ($1, $2);
    -         @attribs = map (( utils::trim ($_) ), utils::balsplit (",", $a));
    -         @right = map (( utils::trim ($_) ), utils::balsplit (",", $b));
    -      } else {
    -         @attribs = ();
    -         @right = map (( &utils::trim ($_) ), utils::balsplit (",", $rest));
    -      }
    -      my ($r, @structs);
    -      foreach $r (@right) {
    -          my ($rl, $rassign) = &utils::balsplit ("=", $r);
    -          my ($rll, $starpart) = &utils::balsplit ("*", $rl);
    -          if (defined $starpart) {
    -            die "Sorry, I don't support 'character var*kind' yet; use 'character*kind var' instead";
    -          }
    -          $rll =~ /^ (\w+) (\s* \(.*\))? \s* $/x
    -              or die "Invalid variable declaration `$rll'";
    -          my ($name, $dimension) = ($1, $2);
    -          my ($initop, $initial);
    -          if (defined $rassign) {
    -            # implicit lead =
    -            $rassign =~ /^ (>?) \s* (.*) $/x
    -              or die "Invalid variable initialization `= $rassign'";
    -            ($initop, $initial) = ("=" . $1, $2);
    -          }
    -
    -          my $struct;
    -          $struct = {
    -              'type'        => 'var',
    -              'name'        => $name,
    -              'vartype'     => $vartype,
    -              hashed_comments ()
    -          };
    -          if (defined $initial) {
    -            $struct->{'initop'} = $initop;
    -            $struct->{'initial'} = expr_parse::parse_expr ($initial);
    -          }
    -          new_struct ($struct);
    -          push @structs, $struct;
    -
    -          my @attribs_copy = @attribs;
    -          push @attribs_copy, "dimension $dimension" if defined $dimension;
    -
    -          my ($attrib, @tempattribs);
    -          foreach $attrib (@attribs_copy) {
    -              if ($attrib =~ /^(public|private)$/i) {
    -                  $attrib = lc $attrib;
    -                  $struct->{'vis'} = $attrib;
    -              } elsif ($attrib =~ /^optional$/i) {
    -                  $attrib = lc $attrib;
    -                  $struct->{$attrib} = 1;
    -              } elsif ($attrib) {
    -                  warn "Unrecognized attribute `$attrib'"
    -                      unless $stmts::disable_warns;
    -                  push @tempattribs, $attrib;
    -              }
    -          }
    -
    -          $struct->{'tempattribs'} = \@tempattribs;
    -      }
    -
    -      return ('var', @structs);
    -   }
    -
    -   # USE
    -   elsif ($line =~ /^use\s+(\w+)($|,\s*)/i) {
    -      die "`use' found at top level" unless defined $stmts::topnest;
    -      die "`use' found in $stmts::topnest->{'type'} $stmts::topnest->{'name'}" unless exists $stmts::topnest->{'uses'};
    -      my $extra = length $' ? $' : undef;
    -      push @{$stmts::topnest->{'uses'}}, [$1, $extra];
    -
    -      return ('use', $1, $extra);
    -   }
    -   
    -   # CALL or IF (...) CALL [hack--xxx]
    -   elsif ($line =~ /^(?:if\s*\(.*\)\s*)?call\s+(\w+)\s*(?:\(\s*(.*?)\s*\))?$/i) {
    -      die "`call' found at top level" unless defined $stmts::topnest;
    -      die "`call' found in $stmts::topnest->{'type'} $stmts::topnest->{'name'}" unless exists $stmts::topnest->{'calls'};
    -      $stmts::topnest->{'calls'}->{$1} = 1;
    -      my @args = ();
    -      @args = split /\s*,\s*/, $2 if defined $2;
    -      return ('call', @args);
    -   }
    -   
    -   # Unrecognized statement
    -   else {
    -      if ($line =~ /^\w+/) {
    -         warn "Unrecognized statement beginning with word $&" unless $stmts::disable_warns;
    -      } else {
    -         warn "Unrecognized statement" unless $stmts::disable_warns;
    -      }
    -      return ('?', $line);
    -   }
    -}
    -
    -#####
    -# Returns a list that would fit right into a hash table you're making.  If
    -# there are no comments, returns the empty list.  The entry is called
    -# 'comments'.
    -#####
    -sub hashed_comments {
    -   if ($stmts::comments) {
    -      return ( 'comments', &$stmts::comments () );
    -   } else {
    -      return ();
    -   }
    -}
    -
    -#####
    -# Makes note of a new structure.  Called by new_nest, for example.
    -#####
    -sub new_struct {
    -   my ($struct) = @_;
    -   my $type = $struct->{'type'};
    -
    -   die "Basic structure must be found at a nesting level"
    -     unless defined $stmts::topnest;
    -
    -   if (exists ($stmts::topnest->{'contains'}->{lc $struct->{'name'}})) {
    -      die "Redefinition of $type $struct->{'name'} in $stmts::topnest->{'type'} $stmts::topnest->{'name'}"
    -         if exists ($stmts::topnest->{'contains'}->{lc $struct->{'name'}}->{$type});
    -      $stmts::topnest->{'contains'}->{lc $struct->{'name'}}->{$type} = $struct;
    -   } else {
    -      $stmts::topnest->{'contains'}->{lc $struct->{'name'}} =
    -         { $type => $struct };
    -   }
    -   push @{$stmts::topnest->{'ocontains'}}, $struct;
    -   $struct->{'within'} = $stmts::topnest;
    -}
    -
    -#####
    -# Starts a new nesting level represented by the given structure.  The
    -# structure must define the 'type' and 'name' entries.  You should not
    -# define the 'contains' or 'defaultvis' entry.
    -#####
    -sub new_nest {
    -   my ($struct) = @_;
    -   my ($type) = $struct->{'type'};
    -
    -   $struct->{'contains'} = { };
    -   $struct->{'ocontains'} = [ ];
    -
    -   # Program unit
    -   if ($type eq "subroutine" || $type eq "function" || $type eq "module" || $type eq "program") {
    -     $struct->{'incontains'} = 0;
    -     $struct->{'uses'} = [ ];
    -     $struct->{'interface'} = 0 if $type eq "subroutine" || $type eq "function";
    -   }
    -
    -   # Program unit with code
    -   if ($type eq "subroutine" || $type eq "function" || $type eq "program") {
    -     $struct->{'calls'} = { };
    -   }
    -
    -   if (defined $stmts::topnest) {
    -      my ($toptype) = $stmts::topnest->{'type'};
    -      if ($toptype eq "interface" && ($struct->{'type'} eq "subroutine" || $struct->{'type'} eq "function")) {
    -         $struct->{'interface'} = 1;
    -      } else {
    -         die "Nesting in $toptype not allowed" unless $toptype eq "subroutine" || $toptype eq "function" || $toptype eq "module" || $toptype eq "program";
    -      }
    -      new_struct ($struct) unless $struct->{'name'} eq "";
    -   }
    -   push @stmts::nesting, $struct;
    -   if (exists ($stmts::nesting_by{$type})) {
    -      push @{$stmts::nesting_by{$type}}, $struct;
    -   } else {
    -      $stmts::nesting_by{$type} = [ $struct ];
    -   }
    -   $stmts::topnest = $struct;
    -   return ( $type, $struct );
    -}
    -
    -#####
    -# Ends the current nesting level.  Optionally, you can pass the 'type' that
    -# it's supposed to be as the first argument.  Optionally, you can pass the
    -# 'name' it should have after that (as the second argument).
    -#####
    -sub end_nest {
    -  my ($type, $name) = @_;
    -  $type = lc $type if defined $type;
    -  unless (defined $stmts::topnest) {
    -    if (defined $name && defined $type) {
    -      die "Ended $type $name at top level";
    -    } elsif (defined $type) {
    -      die "Ended unnamed $type at top level";
    -    } else {
    -      die "END statement at top level";
    -    }
    -  }
    -  my ($struct) = pop @stmts::nesting;
    -  die "Ended $type while in $struct->{'type'} $struct->{'name'}"
    -    if defined $type && $type ne $struct->{'type'};
    -  die "Ended $name while in $struct->{'type'} $struct->{'name'}"
    -    if defined $name && $name !~ /^\Q$struct->{'name'}\E$/i;
    -  if (@stmts::nesting) {
    -    $stmts::topnest = $stmts::nesting[$#stmts::nesting];
    -  } else {
    -    $stmts::topnest = undef;
    -  }
    -  pop @{$stmts::nesting_by{$struct->{'type'}}};
    -  return ( "end" . (defined $type ? $type : ''), $struct );
    -}
    -
    -#####
    -# Parses the basic type that prefixes the given string.
    -# Returns (parsed type, string portion remaining).
    -#####
    -sub parse_part_as_type {
    -  my ($str) = @_;
    -
    -  $str =~ /^integer|real|double\s*precision|character|complex|logical|type/i
    -    or die "parse_part_as_type: Invalid input `$str'";
    -  my ($base, $rest) = ($&, $');
    -
    -  my $level = 0;
    -  ## Wait till we are outside of all parens and see a letter, colon, or comma.
    -  while ($rest =~ /[()a-zA-Z_:,]/g) {
    -    if ($& eq '(') {
    -      $level++;
    -    } elsif ($& eq ')') {
    -      $level--;
    -      die "Unbalanced parens (too many )'s)" if $level < 0;
    -    } elsif ($level == 0) {
    -      return (parse_type ($base . $`), $& . $');
    -    }
    -  }
    -  
    -  die "Couldn't split into type and rest for `$str'";
    -
    -# Some old, presumably less-efficient code:
    -#  my ($level, $len) = (0, length ($str));
    -#  my ($i, $c);
    -#  for ($i = length ($&); $i < $len; $i++) {
    -#    $c = substr ($str, $i, 1);
    -#    if ($c eq "(") {
    -#      $level++;
    -#    } elsif ($c eq ")") {
    -#      $level--;
    -#      die "Unbalanced parens (too many )'s)" if $level < 0;
    -#    } elsif ($level == 0 && $c =~ /^\w|:|,$/) {
    -#      last;
    -#    }
    -#  }
    -#  return (parse_type (substr ($str, 0, $i)), substr ($str, $i));
    -}
    -
    -#####
    -# Parses a basic type, creating a type structure for it:
    -#     integer [( [kind=] kind_val )]
    -#     real [( [kind=] kind_val )]
    -#     double precision                  (no kind is allowed)
    -#     complex [( [kind=] kind_val )]
    -#     character [( char_stuff )]
    -#     logical [( [kind=] kind_val )]
    -#     type (type_name)
    -#
    -# integer*number, real*number, complex*number, and logical*number are also
    -# supported as nonstandard Fortran extensions for kind specification.
    -# "number" can either be a direct integer or an expression in parentheses.
    -# 
    -# char_stuff is empty or (stuff), where stuff is one of:
    -#     len_val [, [kind=] kind_val]
    -#     kind=kind_val [, [len=] len_val]
    -#     len=len_val [, kind=kind_val]
    -# kind_val and len_val are expressions; len_val can also be just `*'.
    -# 
    -# The length can also be specified using the nonstandard Fortran extension
    -# character*number.  If number is `*', it must be in parentheses (indeed,
    -# any expression other than a number must be in parentheses).
    -#####
    -sub parse_type {
    -  my ($str) = @_;
    -
    -  # print "Parsing type: $str\n";
    -
    -  $str = utils::trim ($str);
    -  $str =~ /^(integer|real|double\s*precision|complex|character|logical|type)
    -    \s* (?: \( (.*) \) | \* \s* (\d+ | \(.*\)) )?$/ix
    -    or die "Invalid type `$str'";
    -  my $base = lc $1;
    -
    -  if ($base =~ /^double\s*precision$/) {
    -    die "double precision cannot have kind specification"
    -      if defined $2 || defined $3;
    -    return $typing::double_precision;
    -  }
    -
    -  if (defined $2 || defined $3) {
    -    my $star = defined $3;
    -    my $args = utils::trim ($star ? $3 : $2);
    -
    -    if ($base eq 'type') {
    -      die "type$args invalid--use type($args)" if $star;
    -      die "type(w) for non-word w" unless $args =~ /^\w+$/;
    -      return typing::make_type ($base, $args);
    -    } elsif ($base eq 'character') {
    -      my ($kind, $len, $rest);
    -      if ($star) {
    -        if ($args =~ /^\(\s*\*\s*\)$/) {
    -          $len = '*';
    -        } else {
    -          $len = expr_parse::parse_expr ($args);
    -        }
    -      } elsif ($args =~ /^kind\s*=\s*/i) {
    -        $args = substr ($args, length ($&));
    -        ($kind, $rest) = expr_parse::parse_part_as_expr ($args);
    -        if (defined $rest) {
    -          $rest = utils::trim ($rest);
    -          $rest =~ s/^len\s*=\s*//i;
    -          $len = ($rest eq '*' ? '*' : expr_parse::parse_expr ($rest));
    -        }
    -      } elsif ($args =~ /^len\s*=\s*/i) {
    -        $args = substr ($args, length ($&));
    -        if (substr ($args, 0, 1) eq '*') {
    -          $len = '*';
    -          $rest = $args;
    -          $rest =~ s/^\*\s*,// or $rest = undef;
    -        } else {
    -          ($len, $rest) = expr_parse::parse_part_as_expr ($args);
    -        }
    -        if (defined $rest) {
    -          $rest = utils::trim ($rest);
    -          $rest =~ /^kind\s*=\s*/
    -            or die "kind= specifier needed when len= specifier is given";
    -          $rest = substr ($rest, length ($&));
    -          $kind = expr_parse::parse_expr ($rest);
    -        }
    -      } else {  # len
    -        if (substr ($args, 0, 1) eq '*') {
    -          $len = "*";
    -          $rest = $args;
    -          $rest =~ s/^\*\s*,// or $rest = undef;
    -        } else {
    -          ($len, $rest) = expr_parse::parse_part_as_expr ($args);
    -        }
    -        if (defined $rest) {
    -          $rest = utils::trim ($rest);
    -          $rest = substr ($rest, length ($&)) if $rest =~ /^kind\s*=\s*/i;
    -          $kind = expr_parse::parse_expr ($rest);
    -        }
    -      }
    -      return typing::make_character_type ($kind, $len);
    -    } else {
    -      $args =~ s/^kind\s*=\s*//i unless $star;
    -      return typing::make_type ($base, expr_parse::parse_expr ($args));
    -    }
    -  } else {
    -    die "type without (type-name) after it" if $base eq 'type';
    -    die "No default type for `$base'"
    -      unless exists $typing::default_type{$base};
    -    return $typing::default_type{$base};
    -  }
    -}
    -
    -sub do_attrib {
    -    my ($name, $attrib, $val, $attribname) = @_;
    -    my ($struct);
    -    foreach $struct (values %{$stmts::topnest->{'contains'}->{lc $name}}) {
    -        die "Redefining $attribname of $struct->{'type'} $name from " .
    -            "$struct->{$attrib} to $val" if exists $struct->{$attrib};
    -        $struct->{$attrib} = $val;
    -    }
    -}
    -
    -1;
    diff --git a/Tools/F_scripts/f90doc/typing.pl b/Tools/F_scripts/f90doc/typing.pl
    deleted file mode 100644
    index 9347b8bbb16..00000000000
    --- a/Tools/F_scripts/f90doc/typing.pl
    +++ /dev/null
    @@ -1,516 +0,0 @@
    -package typing;
    -
    -use strict;
    -
    -# Stores the type of each variable.
    -$typing::typeof = "";
    -# Stack: one typeof per scope.
    -@typing::typeofs = ();
    -
    -# Stores the definition of each type.
    -$typing::typedef = "";
    -# Stack: one typedef per scope.
    -@typing::typedefs = ();
    -
    -# Stores the definition of each function/operator.
    -$typing::code = "";
    -# Stack: one code per scope.
    -@typing::codes = ();
    -
    -
    -# DOUBLE PRECISION type.
    -$typing::double_precision = typing::make_type ('real', 8, "double precision");
    -
    -# Default character kind.
    -$typing::default_character_kind = 1;
    -
    -# Default types.
    -%typing::default_type = (
    -  'complex' => typing::make_type ('complex', 8, "complex"),
    -  'integer' => typing::make_type ('integer', 4, "integer"),
    -  'logical' => typing::make_type ('logical', 1, "logical"),
    -  'real'    => typing::make_type ('real', 4, "real"),
    -);
    -$typing::default_type{'character'} = typing::make_character_type ();
    -
    -# Types with wild sub and any other info (just a base defined).
    -$typing::wild_type = {
    -   'complex'   => typing::make_type ('complex'),
    -   'real'      => typing::make_type ('real'),
    -   'integer'   => typing::make_type ('integer'),
    -   'logical'   => typing::make_type ('logical'),
    -   'character' => typing::make_type ('character')
    -};
    -
    -
    -# Precedence of operations; based on that which is in expr_parse.y.
    -# Higher precedence indicated by larger number.
    -$typing::precedence = {
    -  '.eqv.'  => 1,
    -  '.neqv.' => 1,
    -  '.or.'   => 2,
    -  '.and.'  => 3,
    -  '.not.'  => 4,
    -  '<'      => 5,
    -  '>'      => 5,
    -  '<='     => 5,
    -  '>='     => 5,
    -  '=='     => 5,
    -  '/='     => 5,
    -  '//'     => 6,
    -  '+'      => 7,
    -  '-'      => 7,
    -  'u+'     => 8,
    -  'u-'     => 8,
    -  '*'      => 9,
    -  '/'      => 9,
    -  '**'     => 10,
    -  '%'      => 11,
    -  '%call'  => 11,
    -  '%colon' => 30, # this is a guess
    -  '%namedarg' => 30, # this is a guess
    -  '%array' => 40,    # as in "forty days and forty nights," which means
    -  '%const' => 40,    #    "a long time," here we use 40 as an approx. to infty.
    -  '%var'   => 40,
    -  '%do'    => 40,
    -};
    -
    -#####
    -# Starts a new scope.  If this is a top-level scope, initializes the codes
    -# to intrinsics and the like.
    -#####
    -sub new_scope {
    -   my ($newtypeof, $newtypedef, $newcode);
    -
    -   if (@typing::typeofs) {
    -      $typing::typeof = utils::copy_hash ($typing::typeof);
    -      $typing::typedef = utils::copy_hash ($typing::typedef);
    -      $typing::code = utils::copy_hash ($typing::code);
    -   } else {
    -      $typing::typeof = {};
    -      $typing::typedef = {};
    -      $typing::code = {};
    -      $typing::code{"//"} = [ {
    -         'parms' => [ $typing::wild_type{'character'},
    -                      $typing::wild_type{'character'} ],
    -         'return' => $typing::wild_type{'character'}
    -      } ];
    -      my ($int, $real, $logical, $char) = ( $typing::wild_type{'integer'},
    -         $typing::wild_type{'real'}, $typing::wild_type{'logical'},
    -         $typing::wild_type{'character'} );
    -      my ($op);
    -      foreach $op ("+", "-", "*", "/") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $int, $int ], 'return' => $int },
    -            { 'parms' => [ $real, $int ], 'return' => $real },
    -            { 'parms' => [ $int, $real ], 'return' => $real },
    -            { 'parms' => [ $real, $real ], 'return' => $real }
    -         ];
    -      }
    -      $typing::code->{"**"} = [
    -         { 'parms' => [ $int, $int ], 'return' => $int },
    -         { 'parms' => [ $real, $int ], 'return' => $real },
    -         { 'parms' => [ $int, $real ], 'return' => $real },
    -         { 'parms' => [ $real, $real ], 'return' => $real },
    -      ];
    -      foreach $op ("u+", "u-") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $int ], 'return' => $int },
    -            { 'parms' => [ $real ], 'return' => $real }
    -         ];
    -      }
    -      foreach $op ("<", "<=", "==", "/=", ">", ">=") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $int, $int ], 'return' => $logical },
    -            { 'parms' => [ $real, $int ], 'return' => $logical },
    -            { 'parms' => [ $int, $real ], 'return' => $logical },
    -            { 'parms' => [ $real, $real ], 'return' => $logical },
    -            { 'parms' => [ $char, $char ], 'return' => $logical }
    -         ];
    -      }
    -      foreach $op (".or.", ".and.", ".eqv.", ".neqv.") {
    -         $typing::code->{$op} = [
    -            { 'parms' => [ $logical, $logical ], 'return' => $logical }
    -         ];
    -      }
    -      $typing::code->{".not."} = [
    -         { 'parms' => [ $logical ], 'return' => $logical }
    -      ];
    -      $typing::code->{"//"} = [
    -         { 'parms' => [ $char, $char ], 'return' => $char }
    -      ];
    -   }
    -
    -   push @typing::typeofs, $typing::typeof;
    -   push @typing::typedefs, $typing::typedef;
    -   push @typing::codes, $typing::code;
    -}
    -
    -#####
    -# Ends an old scope.
    -#####
    -sub end_scope {
    -   pop @typing::typeofs;
    -   pop @typing::typedefs;
    -   pop @typing::codes;
    -
    -   if ($typing::typeofs) {
    -      $typing::typeof = $typing::typeofs[$#typing::typeofs];
    -      $typing::typedef = $typing::typedefs[$#typing::typedefs];
    -      $typing::code = $typing::codes[$#typing::codes];
    -   }
    -}
    -
    -#####
    -# Creates a new type with specified base and sub.
    -# Note that sub corresponds to kind for built-in types.
    -# sub can be left out for a wild type.
    -# A third argument, print, can specify how the type should print.  Used for
    -# default types, double precision, etc.
    -#####
    -sub make_type {
    -  my ($base, $sub, $print) = @_;
    -  my $type = { 'base' => $base };
    -  $type->{'sub'} = $sub if $sub;
    -  $type->{'print'} = $print;
    -  return $type;
    -}
    -
    -#####
    -# Creates a new complex type with specified types of "sides."
    -#####
    -sub make_complex_type {
    -  my ($type1, $type2) = @_;
    -  my ($base1, $base2) = ($type1->{'base'}, $type2->{'base'});
    -  die "Complex constant must have real and/or integer parts, but I found types $base1 and $base2"
    -    unless ($base1 eq 'integer' || $base1 eq 'real') &&
    -           ($base2 eq 'integer' || $base2 eq 'real');
    -  my $which;
    -  # From Metcalf and Reed's Fortran 90 Explained, if one of the types is an
    -  # integer then the kind of the complex is the kind of the other type.
    -  if ($base1 eq 'integer') {
    -    $which = $type2;
    -  } elsif ($base2 eq 'integer') {
    -    $which = $type1;
    -  } else {
    -    if ($type1->{'sub'} > $type2->{'sub'}) {
    -      $which = $type1;
    -    } else {
    -      $which = $type2;
    -    }
    -  }
    -  return {
    -    'base'    => 'complex',
    -    'sub'     => $which
    -  };
    -}
    -
    -#####
    -# Creates a new character type with specified sub (kind) and len.
    -#####
    -sub make_character_type {
    -  my ($sub, $len) = @_;
    -  $sub = $typing::default_character_kind unless defined $sub;
    -  $sub = [ "%const", $typing::default_type{'integer'}, $sub ] unless ref $sub;
    -  $len = "1" unless defined $len;
    -  $len = [ "%const", $typing::default_type{'integer'}, $len ]
    -    unless ref $len || $len eq "*";
    -  return {
    -    'base' => 'character',
    -    'sub'  => $sub,
    -    'len'  => $len
    -  };
    -}
    -
    -#####
    -# Returns true iff the given type was created to be the default of its kind.
    -# This has no meaning for compound types (hence it returns false).  For
    -# characters, there's a slight bug in that it will say that the type was
    -# created default even if you specify the default explicitly.  No biggie.
    -# Note that the defaultness is only for the KIND, not the LENGTH.
    -# 
    -# I could fix the above-mentioned problem by storing a 'default' entry just for
    -# the default types.  Then is_default_kind just translates to an exists test.
    -# This is much simpler and avoids the weird checks for double precision numbers
    -# (0.0d0 ==> don't show a kind.  This is really "default").  This would be
    -# kinda nice but 'default' is probably the wrong word.
    -#####
    -sub is_default_kind {
    -   my ($type) = @_;
    -
    -   if ($type->{'base'} eq "character") {
    -     my ($top, @rest) = @{$type->{'sub'}};
    -     return ($top eq "%const" && $rest[0] eq $typing::default_type{'integer'}
    -          && $rest[1] == $typing::default_character_kind);
    -   } else {
    -      return (exists $typing::default_type{$type->{'base'}} && $typing::default_type{$type->{'base'}} eq $type);
    -   }
    -}
    -
    -#####
    -# Converts the given type to a string, written in Fortran 90 code.
    -# Only displays the kind if it was specified explicitly.  Slight bug:
    -# if you say character (kind=1) :: c, then it will print character :: c.
    -# (This is only for characters with default kind.  For other types with
    -# default kind explicitly specified, it is printed.)
    -#####
    -sub type_to_f90 {
    -  my ($type) = @_;
    -
    -  # This covers the case where the kind is the default, except for characters.
    -  return $type->{'print'} if defined $type->{'print'};
    -
    -  my $mods = "";
    -  if ($type->{'base'} eq "character") {
    -    if ($type->{'len'} eq "*") {
    -      $mods = "len=*";
    -    } elsif ($type->{'len'}->[0] ne "%const" ||
    -             $type->{'len'}->[1] != $typing::default_type{'integer'} ||
    -             $type->{'len'}->[2] ne "1") {
    -      $mods = "len=" . expr_to_f90 ($type->{'len'});
    -    }
    -    unless (is_default_kind ($type)) {
    -      $mods .= ", " unless $mods eq '';
    -      $mods .= "kind=" . expr_to_f90 ($type->{'sub'});
    -    }
    -  } elsif ($type->{'base'} eq "type") {
    -    $mods = "$type->{'sub'}";
    -  } else {
    -    $mods = "kind=" . expr_to_f90 ($type->{'sub'});
    -  }
    -  $mods = " ($mods)" unless $mods eq '';
    -  return $type->{'base'} . $mods;
    -}
    -
    -#####
    -# Converts an expression right back to a string, doing "no" conversion (i.e.,
    -# output is in Fortran 90).  Optionally returns the precedence of the outmost
    -# operation in the expression (see $typing::precedence).
    -#####
    -sub expr_to_f90 {
    -  my ($exprptr) = @_;
    -  my ($op, @children) = @$exprptr;
    -
    -  die "Unrecognized operation $op",%$op," (has no precedence?)"
    -    unless exists $typing::precedence->{$op};
    -  my $prec = $typing::precedence->{$op};
    -
    -  my $answer;
    -  if ($op eq "%") {
    -    my ($struct, $elem) = @children;
    -    my ($s, $sprec) = expr_to_f90 ($struct);
    -    $s = "($s)" if $prec > $sprec;
    -    $answer = "$s%$elem";
    -  } elsif ($op eq "%var") {
    -    $answer = $children[0];
    -  } elsif ($op eq "%const") {
    -    my ($type, $val) = @children;
    -    if ($type->{'base'} eq 'complex') {
    -      if (!is_default_kind ($type->{'sub'})) {
    -        my ($k1, $k2) = ("", "");
    -        $k1 = "_$type->{'sub'}->{'sub'}" unless $val->[0] =~ /D[+-]?\d+$/i;
    -        $k2 = "_$type->{'sub'}->{'sub'}" unless $val->[1] =~ /D[+-]?\d+$/i;
    -        $answer = "($val->[0]$k1, $val->[1]$k2)";
    -      } else {
    -        $answer = "($val->[0], $val->[1])";
    -      }
    -    } elsif (is_default_kind ($type) || $val =~ /D[+-]?\d+$/i) {
    -      $answer = $val;
    -    } else {
    -      $answer = "${val}_$type->{'sub'}";
    -    }
    -  } elsif ($op eq "%array") {
    -    $answer = "(/ " . join (", ", map { (expr_to_f90 ($_))[0] } @children)
    -            . " /)";
    -  } elsif ($op eq "%colon") {
    -    my ($left, $right) = @children;
    -    $left = (expr_to_f90 ($left))[0] if $left ne '';
    -    $right = (expr_to_f90 ($right))[0] if $right ne '';
    -    $answer = $left . ":" . $right;  # : has ultimately low precedence
    -  } elsif ($op eq "%namedarg") {
    -    my ($left, $right) = @children;
    -    $answer = $left . " = " .
    -              (expr_to_f90 ($right))[0];  # = has ultimately low precedence
    -  } elsif ($op eq "%do") {
    -    my ($child, $var, @args) = @children;
    -    $answer = "(" . expr_to_f90 ($child) . ", " . $var . " = " .
    -              join (", ", map { (expr_to_f90 ($_))[0] } @args) . ")";
    -  } elsif ($op eq "%call") {
    -    ($op, @children) = @children;
    -    my ($s, $sprec) = expr_to_f90 ($op);
    -    $s = "($s)" if $prec > $sprec;
    -    $answer = "$s (" . join (", ", map ((expr_to_f90 ($_))[0], @children))
    -      . ")";
    -  } elsif (scalar @children == 1) {
    -    $op = substr ($op, 1) if substr ($op, 0, 1) eq 'u';
    -    my ($s, $sprec) = expr_to_f90 ($children[0]);
    -    $s = "($s)" if $prec > $sprec;
    -    $answer = "$op$s";
    -  } elsif (scalar @children == 2) {
    -    my ($s1, $sprec1) = expr_to_f90 ($children[0]);
    -    $s1 = "($s1)" if $prec > $sprec1;
    -    my ($s2, $sprec2) = expr_to_f90 ($children[1]);
    -    $s2 = "($s2)" if $prec > $sprec2;
    -    $answer = "$s1 $op $s2";
    -  } else {
    -    die "expr_to_f90: Unrecognized operation $op with " . (scalar @children) .
    -      " children";
    -  }
    -
    -  if (wantarray) {
    -    return ($answer, $prec);
    -  } else {
    -    return $answer;
    -  }
    -}
    -
    -#####
    -# Computes the type of the given expression (which is passed by reference).
    -# Returns a reference to the actual type.
    -#####
    -sub expr_type {
    -   my ($exprptr) = @_;
    -   my ($op, @children) = @$exprptr;
    -
    -   if ($op eq "%") {
    -      my ($struct, $elem) = @children;
    -      my ($type) = expr_type ($struct);
    -      die "expr_type: \%$elem failed: left part is not a compound type" unless $type->{'base'} eq "type";
    -      my ($typedef) = $typing::typedef->{$type->{'sub'}};
    -      my ($elemtype) = $typedef->{$elem};
    -      die "expr_type: \%$elem failed: left part does not include $elem" unless $elemtype;
    -      return $elemtype;
    -   } elsif ($op eq "%var") {
    -      my ($var) = @children;
    -      my ($vartype) = $typing::typeof->{$var};
    -      die "expr_type: Variable $var undefined" unless $vartype;
    -      return $vartype;
    -   } elsif ($op eq "%const") {
    -      my ($type, $val) = @children;
    -      return $type;
    -   } elsif ($op eq "%array") {
    -      # HERE
    -   } elsif ($op eq "%colon") {
    -      my ($string, $left, $right) = @children;
    -      my ($stringtype) = expr_type ($string);
    -      die "expr_type: colon notation for non-character string" if $stringtype->{'base'} ne "character";
    -      die "expr_type: colon notation for character array" if $stringtype->{'dimension'};
    -      return typing::make_character_type ($stringtype->{'sub'}, "*");
    -   } elsif ($op eq "%call") {
    -      ($op, @children) = @children;
    -      my ($subop, @subchildren) = @$op;
    -      if ($subop eq "%var") {
    -         ($op) = @subchildren;
    -         # Fall through: we allow overloaded function name in this special case.
    -      } else {
    -         # Function call without overloading or an array reference.
    -         my ($optype) = expr_type ($op);
    -
    -         if ($optype->{'dimension'}) {  # array reference
    -            return make_type ($optype->{'base'}, $optype->{'sub'});
    -         } else {
    -            die "expr_type: Array/function call for something that is neither" unless $optype->{'base'} eq "interface";
    -            # HERE function call without overloading.
    -         }
    -      }
    -   }
    -
    -   my ($opcodes) = $typing::code->{$op};
    -   die "Operation/function $op undefined" unless $opcodes;
    -   my (@childtypes) = ();
    -   my ($child);
    -   foreach $child (@children) {
    -      print "childtypes was: @childtypes\n";
    -      print "type of $child is ", expr_type ($child), "\n";
    -      push @childtypes, expr_type ($child);
    -      print "childtypes is now: @childtypes\n";
    -   }
    -   my ($opcode);
    -   foreach $opcode (@$opcodes) {
    -      print "children: @children\n";
    -      print "childtypes: @childtypes\n";
    -      if (typing::subtypes_list (\@childtypes, $opcode->{'parms'})) {
    -         my ($parm);
    -         my ($ret) = $opcode->{'return'};
    -         if ($ret->{'base'} eq "character" && ! $ret->{'len'}) {
    -            $ret->{'len'} = 0;
    -find_len:
    -            foreach $parm (@$opcode->{'parms'}) {
    -               if ($parm->{'base'} eq $ret->{'base'}) {
    -                  if ($parm->{'len'} eq "*") {
    -                     $ret->{'len'} = "*";
    -                     last find_len;
    -                  } else {
    -                     $ret->{'len'} += $parm->{'len'};
    -                  }
    -               }
    -            }
    -         }
    -         if ($ret->{'sub'}) {
    -            return $ret;
    -         } else {
    -            # Make intrinsic type's kind: look for all parameters with the same
    -            # base type, and use the maximum kind out of those.
    -            my ($maxkind) = -1;
    -            foreach $parm (@$opcode->{'parms'}) {
    -               if ($parm->{'base'} eq $ret->{'base'}) {
    -                  $maxkind = $parm->{'sub'} if $maxkind < $parm->{'sub'};
    -               }
    -            }
    -            die "expr_type: Internal error caused by new_scope" if $maxkind < 0;
    -            return { %$ret, 'sub' => $maxkind };
    -         }
    -      }
    -   }
    -   die "Operation/function $op defined but not for this (these) type(s)";
    -}
    -
    -#####
    -# Returns if first type is a subtype of the second type.
    -# This currently only supports intrinsic types (integer*4 subtypes integer*?).
    -#####
    -sub subtypes {
    -   my ($t1, $t2) = @_;
    -   return 0 if $t1->{'base'} ne $t2->{'base'};
    -   if ($t1->{'base'} eq "type") {
    -      return 0 if $t1->{'sub'} eq $t2->{'sub'};
    -   } else {
    -      if ($t1->{'base'} eq "character") {
    -         if ($t1->{'len'}) {
    -            return 0 unless $t1->{'len'};
    -            return 0 if $t2->{'len'} != $t1->{'len'};
    -         }
    -      }
    -      if ($t1->{'base'} eq "interface") {
    -         # HERE fill this in when I do function types ("interface").
    -      }
    -      if ($t1->{'sub'}) {
    -         return 0 unless $t1->{'sub'};
    -         return 0 if $t2->{'sub'} ne $t1->{'sub'};
    -      }
    -   }
    -   return 1;
    -}
    -
    -#####
    -# Returns if first type is a subtype of the second type, where the first
    -# and second type are (conceptually) tuples.  That is, the lengths must be
    -# equal, and each element must subtype the corresponding element.
    -# The lists are passed as references.
    -#####
    -sub subtypes_list {
    -   my ($l1ptr, $l2ptr) = @_;
    -   my (@l1) = @$l1ptr;
    -   my (@l2) = @$l2ptr;
    -   return 0 if $#l1 != $#l2;
    -
    -   print "l1 is: @l1\n";
    -   print "l2 is: @l2\n";
    -
    -   my ($i);
    -   for ($i = 0; $i <= $#l1; $i++) {
    -      print "calling subtypes with $l1[$i] and $l2[$i]\n";
    -      return 0 unless typing::subtypes ($l1[$i], $l2[$i]);
    -   }
    -   return 1;
    -}
    diff --git a/Tools/F_scripts/f90doc/utils.pl b/Tools/F_scripts/f90doc/utils.pl
    deleted file mode 100644
    index 8e409f0db1c..00000000000
    --- a/Tools/F_scripts/f90doc/utils.pl
    +++ /dev/null
    @@ -1,87 +0,0 @@
    -package utils;
    -
    -use strict;
    -
    -sub copy_list {
    -   my ($listref) = @_;
    -   my @list;
    -   @list = @$listref;
    -   \@list;
    -}
    -
    -sub copy_hash {
    -   my ($hashref) = @_;
    -   my %hash;
    -   %hash = %$hashref;
    -   \%hash;
    -}
    -
    -sub hash2str {
    -   my ($hash) = @_;
    -   my ($key, $s);
    -   $s = "{\n";
    -   foreach $key (keys %$hash) {
    -      $s .= "   $key => $hash->{$key}\n";
    -   }
    -   $s .= "}";
    -}
    -
    -sub trim {
    -   my ($s) = @_;
    -   $s =~ s/^\s*//;
    -   $s =~ s/\s*$//;
    -   $s;
    -}
    -
    -# balsplit (sep, string) splits string into pieces divided by sep when
    -# sep is "outside" ()s.  Returns a list just like split.
    -sub balsplit {
    -   my ($sep, $str) = @_;
    -   my ($i, $c);
    -   my ($len, $level, $left) = (length ($str), 0, 0);
    -   my (@list) = ();
    -
    -   for ($i = 0; $i < $len; $i++) {
    -      $c = substr ($str, $i, 1);
    -      if ($c eq "(") {
    -         $level++;
    -      } elsif ($c eq ")") {
    -         $level--;
    -         die "balsplit: Unbalanced parens (too many )'s)" if $level < 0;
    -      } elsif ($c eq $sep && $level == 0) {
    -         push (@list, substr ($str, $left, $i-$left));
    -         $left = $i + 1;
    -      }
    -   }
    -
    -   push (@list, substr ($str, $left));
    -   return @list;
    -}
    -
    -# Takes the first word of each element of the list.
    -sub leftword {
    -   my ($listref) = @_;
    -   my @out = ();
    -   my ($x);
    -   foreach $x (@$listref) {
    -      $x =~ s/^\s*//;
    -      $x =~ /^\w*/;
    -      push (@out, $&);
    -   }
    -   @out;
    -}
    -
    -sub remove_blanks {
    -   my ($listref) = @_;
    -   my @out = ();
    -   my ($x);
    -   foreach $x (@$listref) {
    -      push (@out, $x) unless $x =~ /^\s*$/;
    -   }
    -   @out;
    -}
    -
    -sub do_nothing {
    -}
    -
    -1;
    diff --git a/Tools/F_scripts/fcheck.py b/Tools/F_scripts/fcheck.py
    index 20033f85ac9..f5be4efd726 100755
    --- a/Tools/F_scripts/fcheck.py
    +++ b/Tools/F_scripts/fcheck.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     # a simple routine to parse Fortran files and make sure that things are
     # declared double precision, and constants are of the form 1.0_dp_t or
    @@ -122,9 +122,3 @@ def visit(argFiles, dirname, files):
     
             if (badFile == 1):
                 print " "
    -
    -
    -
    -
    -
    -
    diff --git a/Tools/F_scripts/find_files_vpath.py b/Tools/F_scripts/find_files_vpath.py
    index c9dd5485930..a52d0f28f3d 100755
    --- a/Tools/F_scripts/find_files_vpath.py
    +++ b/Tools/F_scripts/find_files_vpath.py
    @@ -1,12 +1,10 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     """
     Take a vpath and a list of files and find where in the first vpath the
     first occurrence of the file.
     """
     
    -from __future__ import print_function
    -
     import sys
     import os
     import argparse
    diff --git a/Tools/F_scripts/findparams.py b/Tools/F_scripts/findparams.py
    index 70280b134de..79d698ade8d 100755
    --- a/Tools/F_scripts/findparams.py
    +++ b/Tools/F_scripts/findparams.py
    @@ -1,6 +1,4 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import sys
     import os
    diff --git a/Tools/F_scripts/makebuildinfo.py b/Tools/F_scripts/makebuildinfo.py
    index e5f206339b2..4d08a571145 100755
    --- a/Tools/F_scripts/makebuildinfo.py
    +++ b/Tools/F_scripts/makebuildinfo.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     # a simple script that writes the build_info.f90 file that is used
     # to store information for the job_info file that we store in plotfiles.
    diff --git a/Tools/F_scripts/write_probin.py b/Tools/F_scripts/write_probin.py
    index 10ec4489066..54729eb5f5e 100755
    --- a/Tools/F_scripts/write_probin.py
    +++ b/Tools/F_scripts/write_probin.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     """This routine parses plain-text parameter files that list runtime
     parameters for use in our codes.  The general format of a parameter
    @@ -24,8 +24,6 @@
     
     """
     
    -from __future__ import print_function
    -
     import os
     import sys
     import argparse
    diff --git a/Tools/GNUMake/Make.defs b/Tools/GNUMake/Make.defs
    index db1ce350e54..f3f712816a6 100644
    --- a/Tools/GNUMake/Make.defs
    +++ b/Tools/GNUMake/Make.defs
    @@ -1,8 +1,3 @@
    -# Check python version
    -my_python_version := $(word 2, $(shell python --version 2>&1))
    -ifneq ($(firstword $(sort 2.7 $(my_python_version))), 2.7)
    -  $(error Python >= 2.7 required! Your version is $(my_python_version))
    -endif
     
     ifneq (,$(findstring ~,$(AMREX_HOME)))
       $(warning *** AMREX_HOME string constains ~ and make will not like it. So it is replaced.)
    @@ -762,6 +757,7 @@ else ifeq ($(USE_CUDA),TRUE)
             LINK_WITH_FORTRAN_COMPILER=TRUE
         endif
     
    +    $(info Loading $(AMREX_HOME)/Tools/GNUMake/comps/nvcc.mak...)
         include $(AMREX_HOME)/Tools/GNUMake/comps/nvcc.mak
     
         ifeq ($(USE_MPI),TRUE)
    @@ -971,17 +967,17 @@ endif
     F90CACHE =
     
     ifeq ($(TP_PROFILING),VTUNE)
    -  $(into Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.vtune
    +  $(info Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.vtune
       include        $(AMREX_HOME)/Tools/GNUMake/tools/Make.vtune
     endif
     
     ifeq ($(TP_PROFILING),CRAYPAT)
    -  $(into Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.craypat
    +  $(info Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.craypat
       include        $(AMREX_HOME)/Tools/GNUMake/tools/Make.craypat
     endif
     
     ifeq ($(TP_PROFILING),FORGE)
    -  $(into Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.forge
    +  $(info Loading $(AMREX_HOME)/Tools/GNUMake/tools/Make.forge
       include        $(AMREX_HOME)/Tools/GNUMake/tools/Make.forge
     endif
     
    diff --git a/Tools/GNUMake/Make.machines b/Tools/GNUMake/Make.machines
    index 6903ba05125..738461965d0 100644
    --- a/Tools/GNUMake/Make.machines
    +++ b/Tools/GNUMake/Make.machines
    @@ -60,11 +60,22 @@ ifdef OLCF_ROCM_ROOT
         which_site := olcf
         which_computer := crusher
       endif
    +
    +  ifeq ($(findstring frontier, $(host_name)), frontier)
    +    which_site := olcf
    +    which_computer := frontier
    +  endif
     endif
     
    -ifeq ($(findstring theta, $(host_name)), theta)
    -  which_site := alcf
    -  which_computer := theta
    +ifeq ($(findstring alcf.anl.gov, $(host_name)),alcf.anl.gov)
    +  ifeq ($(findstring theta, $(host_name)), theta)
    +    which_site := alcf
    +    which_computer := theta
    +  endif
    +  ifeq ($(findstring polaris, $(host_name)), polaris)
    +    which_site := alcf
    +    which_computer := polaris
    +  endif
     endif
     
     ifeq ($(findstring sierra, $(host_name)), sierra)
    diff --git a/Tools/GNUMake/Make.rules b/Tools/GNUMake/Make.rules
    index 8b014678500..48ef6d9d3f8 100644
    --- a/Tools/GNUMake/Make.rules
    +++ b/Tools/GNUMake/Make.rules
    @@ -441,7 +441,7 @@ $(tmpEXETempDir)/%.F.orig: %.F
     # & --> *
     $(tmpEXETempDir)/%-cppd.h: %.H
     	@if [ ! -d $(tmpEXETempDir) ]; then mkdir -p $(tmpEXETempDir); fi
    -	$(SILENT) $(CC) $(CPPFLAGS) -DAMREX_TYPECHECK $(includes) -E -P -x c -std=c99 $< -o $@
    +	$(SILENT) $(CC) $(CPPFLAGS) -DAMREX_TYPECHECK $(includes) -E -P -x c -std=c11 $< -o $@
     	@$(SHELL) -ec 'sed -i -e '\''s/amrex::Real/$(amrex_real)/g'\'' $@ ; \
     	               sed -i -e '\''s/amrex_real/$(amrex_real)/g'\''  $@ ; \
     	               sed -i -e '\''s/amrex_particle_real/$(amrex_particle_real)/g'\''  $@ ; \
    @@ -512,9 +512,14 @@ endif
     # e.g. libraries, simply do "make print-libraries".  This will
     # print out the value.
     print-%:
    -	@echo $* is '$($*)'
    +	@echo $* is "$($*)"
     	@echo '    origin = $(origin $*)'
    -	@echo '     value = $(value  $*)'
    +	@echo '     value = $(subst ','"'"',$(value  $*))'
    +# We need to use subst on the result of $(value) because it contains single
    +# quotes.  Shell command echo does not like things like 'x'$(filiter-out)'y',
    +# because what it sees is 'x', $(filter-out), and 'y'.  With the substition, it
    +# will see 'x', "'", '$(filter-out)', "'", and 'y', with $(filter-out) inside a
    +# pair of single quotes.
     
     .PHONY: help
     help:
    diff --git a/Tools/GNUMake/comps/armclang.mak b/Tools/GNUMake/comps/armclang.mak
    index efe4a718106..d2826cb1134 100644
    --- a/Tools/GNUMake/comps/armclang.mak
    +++ b/Tools/GNUMake/comps/armclang.mak
    @@ -57,18 +57,18 @@ ifeq ($(WARN_ERROR),TRUE)
     endif
     
     # disable some warnings
    -CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions
    +CXXFLAGS += -Wno-c++17-extensions
     
     ########################################################################
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
     else
    -  CXXSTD := c++14
    +  CXXSTD := c++17
     endif
     
     CXXFLAGS += -std=$(CXXSTD)
    -CFLAGS   += -std=c99
    +CFLAGS   += -std=c11
     
     FMODULES = -J$(fmoddir) -I $(fmoddir)
     
    diff --git a/Tools/GNUMake/comps/cray.mak b/Tools/GNUMake/comps/cray.mak
    index 85a1133e412..cf484e6ec38 100644
    --- a/Tools/GNUMake/comps/cray.mak
    +++ b/Tools/GNUMake/comps/cray.mak
    @@ -53,10 +53,10 @@ else
         # CCE <= 8. So we adjust some flags to achieve similar optimization. See
         # this page:
         # http://pubs.cray.com/content/S-5212/9.0/cray-compiling-environment-cce-release-overview/cce-900-software-enhancements
    -    CXXFLAGS += -O2 -ffast-math #-fsave-loopmark -fsave-decompile
    -    CFLAGS   += -O2 -ffast-math #-fsave-loopmark -fsave-decompile
    -    FFLAGS   += -O2 -h list=a
    -    F90FLAGS += -O2 -h list=a
    +    CXXFLAGS += -O3 -ffast-math #-fsave-loopmark -fsave-decompile
    +    CFLAGS   += -O3 -ffast-math #-fsave-loopmark -fsave-decompile
    +    FFLAGS   += -O3 -h list=a
    +    F90FLAGS += -O3 -h list=a
       else
         GENERIC_COMP_FLAGS += -h list=a
     
    @@ -73,15 +73,15 @@ endif
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
     else
    -  CXXSTD := c++14
    +  CXXSTD := c++17
     endif
     
     ifeq ($(CRAY_IS_CLANG_BASED),TRUE)
       CXXFLAGS += -std=$(CXXSTD)
    -  CFLAGS   += -std=c99
    +  CFLAGS   += -std=c11
     else
       CXXFLAGS += -h std=$(CXXSTD)
    -  CFLAGS   += -h c99
    +  CFLAGS   += -h c11
     endif
     
     F90FLAGS += -N 255 -em
    @@ -119,10 +119,6 @@ else
       endif
     endif
     
    -ifeq ($(CRAY_IS_CLANG_BASED),TRUE)
    -  CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions
    -endif
    -
     CXXFLAGS += $(GENERIC_COMP_FLAGS)
     CFLAGS   += $(GENERIC_COMP_FLAGS)
     FFLAGS   += $(GENERIC_COMP_FLAGS)
    diff --git a/Tools/GNUMake/comps/dpcpp.mak b/Tools/GNUMake/comps/dpcpp.mak
    index d2f7f72108e..33c05fc0c7a 100644
    --- a/Tools/GNUMake/comps/dpcpp.mak
    +++ b/Tools/GNUMake/comps/dpcpp.mak
    @@ -36,8 +36,6 @@ else
     
     endif
     
    -CXXFLAGS += -Wno-pass-failed # disable this warning
    -
     ifeq ($(WARN_ALL),TRUE)
       warning_flags = -Wall -Wextra -Wno-sign-compare -Wunreachable-code -Wnull-dereference
       warning_flags += -Wfloat-conversion -Wextra-semi
    @@ -71,7 +69,7 @@ else
     endif
     
     CXXFLAGS += -Wno-error=sycl-strict -fsycl
    -CFLAGS   += -std=c99
    +CFLAGS   += -std=c11
     
     ifneq ($(DEBUG),TRUE)  # There is currently a bug that DEBUG build will crash.
     ifeq ($(DPCPP_AOT),TRUE)
    diff --git a/Tools/GNUMake/comps/gnu.mak b/Tools/GNUMake/comps/gnu.mak
    index 10510f30a8d..2d67d418717 100644
    --- a/Tools/GNUMake/comps/gnu.mak
    +++ b/Tools/GNUMake/comps/gnu.mak
    @@ -38,23 +38,23 @@ ifeq ($(EXPORT_DYNAMIC),TRUE)
       GENERIC_GNU_FLAGS += -rdynamic -fno-omit-frame-pointer
     endif
     
    -gcc_major_ge_5 = $(shell expr $(gcc_major_version) \>= 5)
    -gcc_major_ge_6 = $(shell expr $(gcc_major_version) \>= 6)
    -gcc_major_ge_7 = $(shell expr $(gcc_major_version) \>= 7)
     gcc_major_ge_8 = $(shell expr $(gcc_major_version) \>= 8)
     gcc_major_ge_9 = $(shell expr $(gcc_major_version) \>= 9)
     gcc_major_ge_10 = $(shell expr $(gcc_major_version) \>= 10)
     gcc_major_ge_11 = $(shell expr $(gcc_major_version) \>= 11)
    +gcc_major_ge_12 = $(shell expr $(gcc_major_version) \>= 12)
    +
    +ifneq ($(gcc_major_ge_8),1)
    +  $(error GCC < 8 not supported)
    +endif
     
     ifeq ($(THREAD_SANITIZER),TRUE)
       GENERIC_GNU_FLAGS += -fsanitize=thread
     endif
     ifeq ($(FSANITIZER),TRUE)
       GENERIC_GNU_FLAGS += -fsanitize=address -fsanitize=undefined
    -  ifeq ($(gcc_major_ge_8),1)
    -    GENERIC_GNU_FLAGS += -fsanitize=pointer-compare -fsanitize=pointer-subtract
    -    GENERIC_GNU_FLAGS += -fsanitize=builtin -fsanitize=pointer-overflow
    -  endif
    +  GENERIC_GNU_FLAGS += -fsanitize=pointer-compare -fsanitize=pointer-subtract
    +  GENERIC_GNU_FLAGS += -fsanitize=builtin -fsanitize=pointer-overflow
     endif
     
     ifeq ($(USE_OMP),TRUE)
    @@ -97,7 +97,7 @@ else
     endif
     
     ifeq ($(WARN_ALL),TRUE)
    -  warning_flags = -Wall -Wextra
    +  warning_flags = -Wall -Wextra -Wlogical-op -Wfloat-conversion -Wnull-dereference -Wmisleading-indentation -Wduplicated-cond -Wduplicated-branches
     
       ifeq ($(WARN_SIGN_COMPARE),FALSE)
         warning_flags += -Wno-sign-compare
    @@ -108,27 +108,15 @@ ifeq ($(WARN_ALL),TRUE)
         warning_flags += -Wpedantic
       endif
     
    -  ifeq ($(gcc_major_ge_6),1)
    -    warning_flags += -Wnull-dereference
    -  endif
    -
    -  ifeq ($(gcc_major_ge_5),1)
    -    warning_flags += -Wfloat-conversion
    -  endif
    -
       ifneq ($(WARN_SHADOW),FALSE)
         warning_flags += -Wshadow
       endif
     
    -  ifeq ($(gcc_major_version),7)
    -    warning_flags += -Wno-array-bounds
    -  endif
    -
       ifeq ($(gcc_major_ge10),1)
         warning_flags += -Wextra-semi
       endif
     
    -  CXXFLAGS += $(warning_flags) -Woverloaded-virtual
    +  CXXFLAGS += $(warning_flags) -Woverloaded-virtual -Wnon-virtual-dtor
       CFLAGS += $(warning_flags)
     endif
     
    @@ -157,21 +145,12 @@ endif
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
    -  ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -    ifneq ($(NO_CONFIG_CHECKING),TRUE)
    -      ifeq ($(CXXSTD),c++14)
    -        $(error C++14 support requires GCC 5 or newer.)
    -      endif
    -    endif
    -  endif
       CXXFLAGS += -std=$(CXXSTD)
     else
    -  ifeq ($(gcc_major_version),5)
    -    CXXFLAGS += -std=c++14
    -  endif
    +  CXXFLAGS += -std=c++17
     endif
     
    -CFLAGS   += -std=gnu99
    +CFLAGS   += -std=c11
     
     ########################################################################
     
    diff --git a/Tools/GNUMake/comps/hip.mak b/Tools/GNUMake/comps/hip.mak
    index d94f8f3c66f..6005409f9cc 100644
    --- a/Tools/GNUMake/comps/hip.mak
    +++ b/Tools/GNUMake/comps/hip.mak
    @@ -23,7 +23,7 @@ endif
     
     # Generic flags, always used
     CXXFLAGS = -std=$(CXXSTD) -m64
    -CFLAGS   = -std=c99 -m64
    +CFLAGS   = -std=c11 -m64
     
     FFLAGS   = -ffixed-line-length-none -fno-range-check -fno-second-underscore
     F90FLAGS = -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none
    @@ -86,8 +86,6 @@ ifeq ($(HIP_COMPILER),clang)
     
       endif
     
    -  CXXFLAGS += -Wno-pass-failed  # disable this warning
    -
       ifeq ($(WARN_ALL),TRUE)
         warning_flags = -Wall -Wextra -Wunreachable-code -Wnull-dereference
         warning_flags += -Wfloat-conversion -Wextra-semi
    @@ -109,7 +107,7 @@ ifeq ($(HIP_COMPILER),clang)
     
       # Generic HIP info
       ROC_PATH=$(realpath $(dir $(HIP_PATH)))
    -  SYSTEM_INCLUDE_LOCATIONS += $(HIP_PATH)/include
    +  SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include $(HIP_PATH)/include
     
       # rocRand
       SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/hiprand $(ROC_PATH)/include/rocrand
    @@ -122,13 +120,12 @@ ifeq ($(HIP_COMPILER),clang)
       # rocThrust - Header only
       # SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/rocthrust
     
    -  ifeq ($(USE_ROCTX),TRUE)
       # rocTracer
    -  CXXFLAGS += -DAMREX_USE_ROCTX
    -  HIPCC_FLAGS += -DAMREX_USE_ROCTX
    -  SYSTEM_INCLUDE_LOCATIONS += $(ROC_PATH)/include/roctracer $(ROC_PATH)/include/rocprofiler
    -  LIBRARY_LOCATIONS += $(ROC_PATH)/lib
    -  LIBRARIES += -lroctracer64 -lroctx64
    +  ifeq ($(USE_ROCTX),TRUE)
    +    CXXFLAGS += -DAMREX_USE_ROCTX
    +    HIPCC_FLAGS += -DAMREX_USE_ROCTX
    +    LIBRARY_LOCATIONS += $(ROC_PATH)/lib
    +    LIBRARIES += -Wl,--rpath=$(ROC_PATH)/lib -lroctracer64 -lroctx64
       endif
     
       # hipcc passes a lot of unused arguments to clang
    diff --git a/Tools/GNUMake/comps/intel.mak b/Tools/GNUMake/comps/intel.mak
    index 0c4d6e30b2a..2341192d163 100644
    --- a/Tools/GNUMake/comps/intel.mak
    +++ b/Tools/GNUMake/comps/intel.mak
    @@ -39,21 +39,12 @@ endif
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
    -  ifneq ($(firstword $(sort 17.0 $(intel_version))), 17.0)
    -    ifeq ($(CXXSTD),c++14)
    -      $(error C++14 support requires Intel icpc 17.0 or newer.)
    -    endif
    -  endif
       CXXFLAGS += -std=$(CXXSTD)
     else
    -  ifeq ($(firstword $(sort 17.0 $(intel_version))), 17.0)
    -    CXXFLAGS += -std=c++14
    -  else
    -    $(error Intel icpc 17.0 or newer is required.)
    -  endif
    +  CXXFLAGS += -std=c++17
     endif
     
    -CFLAGS   += -std=c99
    +CFLAGS   += -std=c11
     
     F90FLAGS += -implicitnone
     
    @@ -64,11 +55,7 @@ FMODULES = -module $(fmoddir) -I$(fmoddir)
     GENERIC_COMP_FLAGS =
     
     ifeq ($(USE_OMP),TRUE)
    -  ifeq ($(firstword $(sort 16.0 $(intel_version))), 16.0) 
    -    GENERIC_COMP_FLAGS += -qopenmp
    -  else
    -    GENERIC_COMP_FLAGS += -openmp
    -  endif
    +  GENERIC_COMP_FLAGS += -qopenmp
     endif
     
     CXXFLAGS += $(GENERIC_COMP_FLAGS) -pthread
    diff --git a/Tools/GNUMake/comps/llvm-flang.mak b/Tools/GNUMake/comps/llvm-flang.mak
    index 58a0a06b64e..c9abdaaaeeb 100644
    --- a/Tools/GNUMake/comps/llvm-flang.mak
    +++ b/Tools/GNUMake/comps/llvm-flang.mak
    @@ -43,11 +43,11 @@ endif
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
     else
    -  CXXSTD := c++14
    +  CXXSTD := c++17
     endif
     
     CXXFLAGS += -std=$(CXXSTD)
    -CFLAGS   += -std=c99
    +CFLAGS   += -std=c11
     
     FMODULES = -J$(fmoddir) -I $(fmoddir)
     
    diff --git a/Tools/GNUMake/comps/llvm.mak b/Tools/GNUMake/comps/llvm.mak
    index 2bf710c0d94..ead1d9290c2 100644
    --- a/Tools/GNUMake/comps/llvm.mak
    +++ b/Tools/GNUMake/comps/llvm.mak
    @@ -50,7 +50,7 @@ ifeq ($(WARN_ALL),TRUE)
         warning_flags += -Wshadow
       endif
     
    -  CXXFLAGS += $(warning_flags) -Woverloaded-virtual
    +  CXXFLAGS += $(warning_flags) -Woverloaded-virtual -Wnon-virtual-dtor
       CFLAGS += $(warning_flags)
     endif
     
    @@ -60,18 +60,18 @@ ifeq ($(WARN_ERROR),TRUE)
     endif
     
     # disable some warnings
    -CXXFLAGS += -Wno-pass-failed -Wno-c++17-extensions
    +CXXFLAGS += -Wno-c++17-extensions
     
     ########################################################################
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
     else
    -  CXXSTD := c++14
    +  CXXSTD := c++17
     endif
     
     CXXFLAGS += -std=$(CXXSTD)
    -CFLAGS   += -std=c99
    +CFLAGS   += -std=c11
     
     FFLAGS   += -ffixed-line-length-none -fno-range-check -fno-second-underscore
     F90FLAGS += -ffree-line-length-none -fno-range-check -fno-second-underscore -fimplicit-none
    diff --git a/Tools/GNUMake/comps/nag.mak b/Tools/GNUMake/comps/nag.mak
    index faaf0db7155..55ec14b0620 100644
    --- a/Tools/GNUMake/comps/nag.mak
    +++ b/Tools/GNUMake/comps/nag.mak
    @@ -52,17 +52,12 @@ endif
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
    -  ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -    ifeq ($(CXXSTD),c++14)
    -      $(error C++14 support requires GCC 5 or newer.)
    -    endif
    -  endif
       CXXFLAGS += -std=$(CXXSTD)
     else
    -  CXXFLAGS += -std=c++14
    +  CXXFLAGS += -std=c++17
     endif
     
    -CFLAGS   += -std=gnu99
    +CFLAGS   += -std=c11
     
     FFLAGS   += -mismatch
     F90FLAGS += -mismatch -u
    diff --git a/Tools/GNUMake/comps/nvcc.mak b/Tools/GNUMake/comps/nvcc.mak
    index 9d9bf90ce51..f52dfeb6c86 100644
    --- a/Tools/GNUMake/comps/nvcc.mak
    +++ b/Tools/GNUMake/comps/nvcc.mak
    @@ -10,21 +10,11 @@ else
       nvcc_minor_version := 9
     endif
     
    -# Disallow CUDA toolkit versions < 10
    +# Disallow CUDA toolkit versions < 11
     
    -nvcc_major_lt_10 = $(shell expr $(nvcc_major_version) \< 10)
    -ifeq ($(nvcc_major_lt_10),1)
    -  $(error Your nvcc version is $(nvcc_version). This is unsupported. Please use CUDA toolkit version 10.0 or newer.)
    -endif
    -
    -nvcc_forward_unknowns = 0
    -ifeq ($(shell expr $(nvcc_major_version) \= 10),1)
    -ifeq ($(shell expr $(nvcc_minor_version) \>= 2),1)
    -  nvcc_forward_unknowns = 1
    -endif
    -endif
    -ifeq ($(shell expr $(nvcc_major_version) \>= 11),1)
    -  nvcc_forward_unknowns = 1
    +nvcc_major_lt_11 = $(shell expr $(nvcc_major_version) \< 11)
    +ifeq ($(nvcc_major_lt_11),1)
    +  $(error Your nvcc version is $(nvcc_version). This is unsupported. Please use CUDA toolkit version 11.0 or newer.)
     endif
     
     ifeq ($(shell expr $(nvcc_major_version) \= 11),1)
    @@ -34,24 +24,6 @@ ifeq ($(shell expr $(nvcc_minor_version) \= 0),1)
     endif
     endif
     
    -ifeq ($(shell expr $(nvcc_major_version) \< 11),1)
    -  # -MMD -MP not supported in < 11
    -  USE_LEGACY_DEPFLAGS = TRUE
    -  DEPFLAGS =
    -endif
    -
    -ifeq ($(shell expr $(nvcc_major_version) \< 10),1)
    -  # -MM not supported in < 10
    -  LEGACY_DEPFLAGS = -M
    -endif
    -
    -ifeq ($(shell expr $(nvcc_major_version) \= 10),1)
    -ifeq ($(shell expr $(nvcc_minor_version) \= 0),1)
    -  # -MM not supported in 10.0
    -  LEGACY_DEPFLAGS = -M
    -endif
    -endif
    -
     #
     # nvcc compiler driver does not always accept pgc++
     # as a host compiler at present. However, if we're using
    @@ -72,16 +44,14 @@ endif
     
     ifeq ($(lowercase_nvcc_host_comp),gnu)
     
    -  ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -    ifneq ($(NO_CONFIG_CHECKING),TRUE)
    -      $(error C++14 support requires GCC 5 or newer.)
    -    endif
    +  ifeq ($(shell expr $(gcc_major_version) \< 8),1)
    +    $(error GCC >= 8 required.)
       endif
     
       ifdef CXXSTD
         CXXSTD := $(strip $(CXXSTD))
       else
    -    CXXSTD = c++14
    +    CXXSTD = c++17
       endif
       CXXFLAGS += -std=$(CXXSTD)
     
    @@ -95,27 +65,22 @@ ifeq ($(lowercase_nvcc_host_comp),gnu)
     else ifeq ($(lowercase_nvcc_host_comp),pgi)
       ifdef CXXSTD
         CXXSTD := $(strip $(CXXSTD))
    -    ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -      ifeq ($(CXXSTD),c++14)
    -        $(error C++14 support requires GCC 5 or newer.)
    -      endif
    -    endif
       else
    -    CXXSTD := c++14
    +    CXXSTD := c++17
       endif
     
       CXXFLAGS += -std=$(CXXSTD)
     
       NVCC_CCBIN ?= pgc++
     
    -  # In pgi.make, we use gcc_major_version to handle c++14 flag.
    +  # In pgi.make, we use gcc_major_version to handle c++17 flag.
       CXXFLAGS_FROM_HOST := -ccbin=$(NVCC_CCBIN) -Xcompiler='$(CXXFLAGS)' --std=$(CXXSTD)
       CFLAGS_FROM_HOST := $(CXXFLAGS_FROM_HOST)
     else
       ifdef CXXSTD
         CXXSTD := $(strip $(CXXSTD))
       else
    -    CXXSTD := c++14
    +    CXXSTD := c++17
       endif
     
       NVCC_CCBIN ?= $(CXX)
    @@ -124,7 +89,7 @@ else
       CFLAGS_FROM_HOST := $(CXXFLAGS_FROM_HOST)
     endif
     
    -NVCC_FLAGS = -Wno-deprecated-gpu-targets -m64 -arch=compute_$(CUDA_ARCH) -code=sm_$(CUDA_ARCH) -maxrregcount=$(CUDA_MAXREGCOUNT) --expt-relaxed-constexpr --expt-extended-lambda
    +NVCC_FLAGS = -Wno-deprecated-gpu-targets -m64 -arch=compute_$(CUDA_ARCH) -code=sm_$(CUDA_ARCH) -maxrregcount=$(CUDA_MAXREGCOUNT) --expt-relaxed-constexpr --expt-extended-lambda --forward-unknown-to-host-compiler
     # This is to work around a bug with nvcc, see: https://github.com/kokkos/kokkos/issues/1473
     NVCC_FLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored
     
    @@ -154,11 +119,6 @@ endif
     
     NVCC_FLAGS += $(XTRA_NVCC_FLAGS)
     
    -ifeq ($(nvcc_forward_unknowns),1)
    -  NVCC_FLAGS += --forward-unknown-to-host-compiler
    -endif
    -
    -ifeq ($(shell expr $(nvcc_major_version) \>= 11),1)
     ifeq ($(GPU_ERROR_CAPTURE_THIS),TRUE)
       NVCC_FLAGS += --Werror ext-lambda-captures-this
     else
    @@ -166,7 +126,6 @@ ifeq ($(GPU_WARN_CAPTURE_THIS),TRUE)
       NVCC_FLAGS += --Wext-lambda-captures-this
     endif
     endif
    -endif
     
     nvcc_diag_error = 0
     ifeq ($(shell expr $(nvcc_major_version) \>= 12),1)
    diff --git a/Tools/GNUMake/comps/nvhpc.mak b/Tools/GNUMake/comps/nvhpc.mak
    index 49f815213f1..d76e7c9d36e 100644
    --- a/Tools/GNUMake/comps/nvhpc.mak
    +++ b/Tools/GNUMake/comps/nvhpc.mak
    @@ -94,19 +94,15 @@ endif
     # The logic here should be consistent with what's in nvcc.mak
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
    -  ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -    ifeq ($(CXXSTD),c++14)
    -      $(error C++14 support requires GCC 5 or newer.)
    -    endif
    +  ifeq ($(shell expr $(gcc_major_version) \< 8),1)
    +    $(error GCC >= 8 required.)
       endif
       CXXFLAGS += -std=$(CXXSTD)
     else
    -  ifeq ($(gcc_major_version),5)
    -    CXXFLAGS += -std=c++14
    -  endif
    +  CXXFLAGS += -std=c++17
     endif
     
    -CFLAGS   += -c99
    +CFLAGS   += -c11
     
     CXXFLAGS += $(GENERIC_NVHPC_FLAGS)
     CFLAGS   += $(GENERIC_NVHPC_FLAGS)
    diff --git a/Tools/GNUMake/comps/pgi.mak b/Tools/GNUMake/comps/pgi.mak
    index 0cf50d77287..d2736c71a33 100644
    --- a/Tools/GNUMake/comps/pgi.mak
    +++ b/Tools/GNUMake/comps/pgi.mak
    @@ -87,20 +87,18 @@ endif
     
     # The logic here should be consistent with what's in nvcc.mak
     
    -ifeq ($(shell expr $(gcc_major_version) \< 5),1)
    -  $(error C++14 support requires GCC 5 or newer.)
    +ifeq ($(shell expr $(gcc_major_version) \< 8),1)
    +  $(error GCC >= 8 required)
     endif
     
     ifdef CXXSTD
       CXXSTD := $(strip $(CXXSTD))
       CXXFLAGS += -std=$(CXXSTD)
     else
    -  ifeq ($(gcc_major_version),5)
    -    CXXFLAGS += -std=c++14
    -  endif
    +  CXXFLAGS += -std=c++17
     endif
     
    -CFLAGS   += -c99
    +CFLAGS   += -c11
     
     CXXFLAGS += $(GENERIC_PGI_FLAGS)
     CFLAGS   += $(GENERIC_PGI_FLAGS)
    diff --git a/Tools/GNUMake/packages/Make.hdf5 b/Tools/GNUMake/packages/Make.hdf5
    index d09fe43a082..9d54463ce4e 100644
    --- a/Tools/GNUMake/packages/Make.hdf5
    +++ b/Tools/GNUMake/packages/Make.hdf5
    @@ -27,8 +27,9 @@ ifeq ($(USE_HDF5_ZFP),TRUE)
           ZFP_ABSPATH = $(abspath $(ZFP_HOME))
           H5Z_ABSPATH = $(abspath $(H5Z_HOME))
           INCLUDE_LOCATIONS += $(ZFP_ABSPATH)/include $(H5Z_ABSPATH)/include
    -      LIBRARY_LOCATIONS += $(ZFP_ABSPATH)/lib $(H5Z_ABSPATH)/lib
    +      LIBRARY_LOCATIONS += $(ZFP_ABSPATH)/lib $(ZFP_ABSPATH)/lib64 $(H5Z_ABSPATH)/lib
           LDFLAGS += -Xlinker -rpath -Xlinker $(ZFP_ABSPATH)/lib
    +      LDFLAGS += -Xlinker -rpath -Xlinker $(ZFP_ABSPATH)/lib64
         endif
       endif
     endif
    diff --git a/Tools/GNUMake/packages/Make.hypre b/Tools/GNUMake/packages/Make.hypre
    index 11e0690a67e..d2cc0d7c17a 100644
    --- a/Tools/GNUMake/packages/Make.hypre
    +++ b/Tools/GNUMake/packages/Make.hypre
    @@ -19,5 +19,5 @@ ifdef AMREX_HYPRE_HOME
     endif
     
     ifeq ($(USE_CUDA),TRUE)
    -  LIBRARIES += -lcusparse -lcurand
    +  LIBRARIES += -lcusparse -lcurand -lcublas
     endif
    diff --git a/Tools/GNUMake/sites/Make.alcf b/Tools/GNUMake/sites/Make.alcf
    index 324d419ccce..cf607596515 100644
    --- a/Tools/GNUMake/sites/Make.alcf
    +++ b/Tools/GNUMake/sites/Make.alcf
    @@ -8,3 +8,78 @@ ifeq ($(which_computer),theta)
         LIBRARIES += -lmpichf90
       endif
     endif
    +
    +ifeq ($(which_computer),$(filter $(which_computer),polaris))
    +
    +  ifdef PE_ENV
    +    ifneq ($(USE_GPU),TRUE)
    +      lowercase_peenv := $(shell echo $(PE_ENV) | tr A-Z a-z)
    +      ifneq ($(lowercase_peenv),$(lowercase_comp))
    +        has_compiler_mismatch = COMP=$(COMP) does not match PrgEnv-$(lowercase_peenv)
    +      endif
    +      ifeq ($(MAKECMDGOALS),)
    +        ifeq ($(lowercase_peenv),nvidia)
    +          $(error PrgEnv-nvidia cannot be used with CPU-only builds. Try PrgEnv-gnu instead.)
    +        endif
    +      endif
    +    endif
    +  endif
    +
    +  ifeq ($(USE_CUDA),TRUE)
    +    CFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))'
    +    CXXFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))'
    +  else ifeq ($(USE_MPI),FALSE)
    +    CFLAGS += $(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))
    +    CXXFLAGS += $(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))
    +  endif
    +
    +  ifeq ($(USE_MPI),TRUE)
    +    ifneq ($(USE_CUDA),TRUE)
    +      CC  = cc
    +      CXX = CC
    +      FC  = ftn
    +      F90 = ftn
    +      LIBRARIES += -lmpichf90
    +    endif
    +
    +    includes += $(shell CC --cray-print-opts=cflags)
    +  endif
    +
    +  ifeq ($(USE_CUDA),TRUE)
    +    CUDA_ARCH = 80
    +
    +    ifeq ($(USE_MPI), FALSE)
    +      includes += $(CRAY_CUDATOOLKIT_INCLUDE_OPTS)
    +    endif
    +
    +    comm := ,
    +    ifneq ($(BL_NO_FORT),TRUE)
    +      LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell ftn --cray-print-opts=libs))
    +    else
    +      LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell CC --cray-print-opts=libs))
    +    endif
    +
    +    ifneq ($(CUDA_ROOT),)
    +        SYSTEM_CUDA_PATH := $(CUDA_ROOT)
    +        COMPILE_CUDA_PATH := $(CUDA_ROOT)
    +    else ifneq ($(CUDA_HOME),)
    +        SYSTEM_CUDA_PATH := $(CUDA_HOME)
    +        COMPILE_CUDA_PATH := $(CUDA_HOME)
    +    else ifneq ($(CUDA_PATH),)
    +        SYSTEM_CUDA_PATH := $(CUDA_PATH)
    +        COMPILE_CUDA_PATH := $(CUDA_PATH)
    +    else ifneq ($(NVIDIA_PATH),)
    +        SYSTEM_CUDA_PATH := $(NVIDIA_PATH)/cuda
    +        COMPILE_CUDA_PATH := $(NVIDIA_PATH)/cuda
    +    else
    +        $(error No CUDA_ROOT nor CUDA_HOME nor CUDA_PATH found. Please load a cuda module.)
    +    endif
    +
    +    # Provide system configuration information.
    +
    +    GPUS_PER_NODE=4
    +    GPUS_PER_SOCKET=4
    +
    +  endif
    +
    +endif
    \ No newline at end of file
    diff --git a/Tools/GNUMake/sites/Make.nersc b/Tools/GNUMake/sites/Make.nersc
    index c8c938a627a..426b9525887 100644
    --- a/Tools/GNUMake/sites/Make.nersc
    +++ b/Tools/GNUMake/sites/Make.nersc
    @@ -25,11 +25,16 @@ ifeq ($(which_computer),$(filter $(which_computer),perlmutter))
       endif
     
       ifeq ($(USE_CUDA),TRUE)
    -      CFLAGS += -Xcompiler="$(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))"
    -      CXXFLAGS += -Xcompiler="$(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))"
    +    ifdef NPE_VERSION
    +      CFLAGS += -Xcompiler='$(filter-out -Wl%, $(wordlist 2,1024,$(shell mpicc -show 2> /dev/null)))'
    +      CXXFLAGS += -Xcompiler='$(filter-out -Wl%, $(wordlist 2,1024,$(shell mpicxx -show 2> /dev/null)))'
    +    else
    +      CFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))'
    +      CXXFLAGS += -Xcompiler='$(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))'
    +    endif
       else ifeq ($(USE_MPI),FALSE)
    -      CFLAGS += $(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))
    -      CXXFLAGS += $(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))
    +    CFLAGS += $(wordlist 2,1024,$(shell cc -craype-verbose 2> /dev/null))
    +    CXXFLAGS += $(wordlist 2,1024,$(shell CC -craype-verbose 2> /dev/null))
       endif
     
       ifeq ($(USE_MPI),TRUE)
    @@ -41,7 +46,9 @@ ifeq ($(which_computer),$(filter $(which_computer),perlmutter))
           LIBRARIES += -lmpichf90
         endif
     
    -    includes += $(shell CC --cray-print-opts=cflags)
    +    ifndef NPE_VERSION
    +      includes += $(shell CC --cray-print-opts=cflags)
    +    endif
       endif
     
       ifeq ($(USE_CUDA),TRUE)
    @@ -51,11 +58,23 @@ ifeq ($(which_computer),$(filter $(which_computer),perlmutter))
           includes += $(CRAY_CUDATOOLKIT_INCLUDE_OPTS)
         endif
     
    +    ifdef NPE_VERSION
    +      includes += $(CRAY_CUDATOOLKIT_INCLUDE_OPTS)
    +    endif
    +
         comm := ,
         ifneq ($(BL_NO_FORT),TRUE)
    +      ifdef NPE_VERSION
    +        LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(wordlist 2,1024,$(shell mpifort -show)))
    +      else
             LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell ftn --cray-print-opts=libs))
    +      endif
         else
    +      ifdef NPE_VERSION
    +        LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(wordlist 2,1024,$(shell mpicxx -show)))
    +      else
             LIBRARIES += $(subst -Wl$(comm),-Xlinker=,$(shell CC --cray-print-opts=libs))
    +      endif
         endif
     
         ifneq ($(CUDA_ROOT),)
    diff --git a/Tools/GNUMake/sites/Make.nrel b/Tools/GNUMake/sites/Make.nrel
    index 68ac8e5116f..ca705698ea1 100644
    --- a/Tools/GNUMake/sites/Make.nrel
    +++ b/Tools/GNUMake/sites/Make.nrel
    @@ -40,27 +40,32 @@ else ifeq ($(which_computer), rhodes)
       endif
     endif
     
    -# Account for Intel-MPI, MPICH, OpenMPI, and HPE MPT
     ifeq ($(USE_MPI),TRUE)
    +  CXX := mpicxx
    +  CC  := mpicc
    +  FC  := mpif90
    +  F90 := mpif90
       ifeq ($(COMP), intel)
    -    CXX := mpiicpc
    -    CC  := mpiicc
    -    FC  := mpiifort
    -    F90 := mpiifort
    -  else
    -    CXX := mpicxx
    -    CC  := mpicc
    -    FC  := mpif90
    -    F90 := mpif90
    -    ifneq ($(findstring mpich, $(shell $(F90) -show 2>&1)),)
    -      mpif90_link_flags := $(shell $(F90) -link_info)
    -      LIBRARIES += $(wordlist 2,1024,$(mpif90_link_flags))
    -    else ifneq ($(findstring Open MPI, $(shell $(F90) -showme:version 2>&1)),)
    -      mpif90_link_flags := $(shell $(F90) -showme:link)
    -      LIBRARIES += $(mpif90_link_flags)
    -    else
    -      # MPT case (no option available to query link flags)
    -      LIBRARIES += -lmpi
    +    ifeq ($(which_computer), eagle)
    +        # Always assume MPT on Eagle
    +        export MPICXX_CXX := icpc
    +        export MPICC_CC   := icc
    +        export MPIF90_F90 := ifort
    +    else ifeq ($(which_computer), rhodes)
    +        CXX := mpiicpc
    +        CC  := mpiicc
    +        FC  := mpiifort
    +        F90 := mpiifort
         endif
       endif
    +  ifneq ($(findstring mpich, $(shell $(F90) -show 2>&1)),)
    +    mpif90_link_flags := $(shell $(F90) -link_info)
    +    LIBRARIES += $(wordlist 2,1024,$(mpif90_link_flags))
    +  else ifneq ($(findstring Open MPI, $(shell $(F90) -showme:version 2>&1)),)
    +    mpif90_link_flags := $(shell $(F90) -showme:link)
    +    LIBRARIES += $(mpif90_link_flags)
    +  else
    +    # MPT case (no option available to query link flags)
    +    LIBRARIES += -lmpi
    +  endif
     endif
    diff --git a/Tools/GNUMake/sites/Make.olcf b/Tools/GNUMake/sites/Make.olcf
    index 651971c6c95..69f557786df 100644
    --- a/Tools/GNUMake/sites/Make.olcf
    +++ b/Tools/GNUMake/sites/Make.olcf
    @@ -2,7 +2,7 @@
     # For Summit et al. at OLCF
     #
     
    -OLCF_MACHINES := summit ascent spock crusher
    +OLCF_MACHINES := summit ascent spock crusher frontier
     
     ifneq ($(which_computer), $(findstring $(which_computer), $(OLCF_MACHINES)))
       $(error Unknown OLCF computer, $(which_computer))
    @@ -60,7 +60,7 @@ ifeq ($(which_computer),spock)
         endif
         # for gpu aware mpi
         ifeq ($(USE_HIP),TRUE)
    -      LIBRARIES += $(PE_MPICH_GTL_DIR_gfx908) -lmpi_gtl_hsa
    +      LIBRARIES += $(PE_MPICH_GTL_DIR_amd_gfx908) -lmpi_gtl_hsa
         endif
       endif
     endif
    @@ -80,7 +80,27 @@ ifeq ($(which_computer),crusher)
         endif
         # for gpu aware mpi
         ifeq ($(USE_HIP),TRUE)
    -      LIBRARIES += -lmpi_gtl_hsa
    +      LIBRARIES += $(PE_MPICH_GTL_DIR_amd_gfx90a) -lmpi_gtl_hsa
    +    endif
    +  endif
    +endif
    +
    +ifeq ($(which_computer),frontier)
    +  ifeq ($(USE_HIP),TRUE)
    +    # MI250X
    +    AMD_ARCH=gfx90a
    +  endif
    +
    +  ifeq ($(USE_MPI),TRUE)
    +    includes += $(shell CC --cray-print-opts=cflags)
    +    ifneq ($(BL_NO_FORT),TRUE)
    +      LIBRARIES += $(shell ftn --cray-print-opts=libs)
    +    else
    +      LIBRARIES += $(shell CC --cray-print-opts=libs)
    +    endif
    +    # for gpu aware mpi
    +    ifeq ($(USE_HIP),TRUE)
    +      LIBRARIES += $(PE_MPICH_GTL_DIR_amd_gfx90a) -lmpi_gtl_hsa
         endif
       endif
     endif
    diff --git a/Tools/GNUMake/sites/Make.unknown b/Tools/GNUMake/sites/Make.unknown
    index 332a7a558de..2ecf6a50ddb 100644
    --- a/Tools/GNUMake/sites/Make.unknown
    +++ b/Tools/GNUMake/sites/Make.unknown
    @@ -29,6 +29,8 @@ ifeq ($(USE_MPI),TRUE)
     
       ifeq ($(LINK_WITH_FORTRAN_COMPILER),TRUE)
         MPI_OTHER_COMP := mpicxx
    +  else ifeq ($(BL_NO_FORT),TRUE)
    +    MPI_OTHER_COMP := mpicxx
       else
         MPI_OTHER_COMP := mpif90
       endif
    @@ -55,7 +57,10 @@ ifeq ($(USE_MPI),TRUE)
          mpi_link_flags := $(filter-out $(mpi_filter), $(mpi_link_flags))
       endif
     
    -  LIBRARIES += $(mpi_link_flags) $(mpicxx_link_libs)
    +  LIBRARIES += $(mpi_link_flags)
    +  ifneq ($(MPI_OTHER_COMP),mpicxx)
    +    LIBRARIES += $(mpicxx_link_libs)
    +  endif
     
       # OpenMPI specific flag
       # Uncomment if statement if flag causes issue with another compiler.
    diff --git a/Tools/Plotfile/CMakeLists.txt b/Tools/Plotfile/CMakeLists.txt
    index 44f99d9523c..9f8f066fbbb 100644
    --- a/Tools/Plotfile/CMakeLists.txt
    +++ b/Tools/Plotfile/CMakeLists.txt
    @@ -34,5 +34,5 @@ target_include_directories(fsnapshot PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
     target_sources(fsnapshot PRIVATE AMReX_PPMUtil.H AMReX_PPMUtil.cpp)
     if (AMReX_CUDA)
        set_source_files_properties(AMReX_PPMUtil.cpp PROPERTIES LANGUAGE CUDA)
    -   target_compile_features(fsnapshot PUBLIC cxx_std_14)
    +   target_compile_features(fsnapshot PUBLIC cxx_std_17)
     endif()
    diff --git a/Tools/Postprocessing/python/column_depth.py b/Tools/Postprocessing/python/column_depth.py
    index 3aff2ac4705..be17d6bb663 100755
    --- a/Tools/Postprocessing/python/column_depth.py
    +++ b/Tools/Postprocessing/python/column_depth.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     import sys
     import numpy
     
    diff --git a/Tools/Postprocessing/python/conv_slopes.py b/Tools/Postprocessing/python/conv_slopes.py
    index f2fe5404aae..9f1a22e3960 100755
    --- a/Tools/Postprocessing/python/conv_slopes.py
    +++ b/Tools/Postprocessing/python/conv_slopes.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     import sys
     import os
     import commands
    diff --git a/Tools/Postprocessing/python/dumpparthistory.py b/Tools/Postprocessing/python/dumpparthistory.py
    index 092f924423b..23f6d22d1a8 100755
    --- a/Tools/Postprocessing/python/dumpparthistory.py
    +++ b/Tools/Postprocessing/python/dumpparthistory.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     # a simple routine to parse particle files and dump out the particle
     # histories into separate files (1 file per particle) so that they can
    @@ -96,8 +96,3 @@ def main(files):
             sys.exit(2)
     
         main(sys.argv[1:])
    -
    -
    -
    -
    -
    diff --git a/Tools/Postprocessing/python/test_helmeos.py b/Tools/Postprocessing/python/test_helmeos.py
    index 890a66aef77..824f369cf60 100755
    --- a/Tools/Postprocessing/python/test_helmeos.py
    +++ b/Tools/Postprocessing/python/test_helmeos.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     #
     # a script showing how to use the helmeos module
     # it reads T, rho, X data from a sample data file, calculates abar and zbar
    diff --git a/Tools/Postprocessing/python/test_parseparticles.py b/Tools/Postprocessing/python/test_parseparticles.py
    index b9181af4d8a..8a85fe2faf6 100755
    --- a/Tools/Postprocessing/python/test_parseparticles.py
    +++ b/Tools/Postprocessing/python/test_parseparticles.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     # simple script showing how to make plots of particles using the parseparticles
     # module
    @@ -92,4 +92,3 @@ def main(fileList):
     
     # this is for profiling
     #    cProfile.run("main(sys.argv[1:])","profile.tmp2")
    -
    diff --git a/Tools/Py_util/plotsinglevar.py b/Tools/Py_util/plotsinglevar.py
    index 616c516c805..bb1c2abacaa 100755
    --- a/Tools/Py_util/plotsinglevar.py
    +++ b/Tools/Py_util/plotsinglevar.py
    @@ -1,11 +1,9 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     # a simple script to plot 2-d or 3-d BoxLib data using the matplotlib
     # library
     #
     
    -from __future__ import print_function
    -
     import matplotlib
     matplotlib.use('agg')
     
    diff --git a/Tools/Release/ppCleanup.py b/Tools/Release/ppCleanup.py
    index 109444daff3..2935d0c1983 100755
    --- a/Tools/Release/ppCleanup.py
    +++ b/Tools/Release/ppCleanup.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     import os
     import shutil
    diff --git a/Tools/Release/ppCleanupDir.py b/Tools/Release/ppCleanupDir.py
    index befebc15f2d..2d8a598291d 100755
    --- a/Tools/Release/ppCleanupDir.py
    +++ b/Tools/Release/ppCleanupDir.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     import os
     import shutil
    diff --git a/Tools/Release/release.py b/Tools/Release/release.py
    index 87de82e5a30..8f2b4d9d5dc 100755
    --- a/Tools/Release/release.py
    +++ b/Tools/Release/release.py
    @@ -1,4 +1,4 @@
    -#!/usr/bin/env python
    +#!/usr/bin/env python3
     
     import os
     import shutil
    diff --git a/Tools/libamrex/configure.py b/Tools/libamrex/configure.py
    index ac4b399a471..ebb3cd369f4 100755
    --- a/Tools/libamrex/configure.py
    +++ b/Tools/libamrex/configure.py
    @@ -1,12 +1,6 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import sys
    -
    -if sys.version_info < (2, 7):
    -    sys.exit("ERROR: need python 2.7 or later for configure.py")
    -
     import argparse
     
     def configure(argv):
    diff --git a/Tools/libamrex/mkconfig.py b/Tools/libamrex/mkconfig.py
    index 30c54f285a2..21f66348891 100755
    --- a/Tools/libamrex/mkconfig.py
    +++ b/Tools/libamrex/mkconfig.py
    @@ -1,12 +1,6 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import sys, re
    -
    -if sys.version_info < (2, 7):
    -    sys.exit("ERROR: need python 2.7 or later for mkconfig.py")
    -
     import argparse
     
     def doit(defines, undefines, comp, allow_diff_comp):
    diff --git a/Tools/libamrex/mkpkgconfig.py b/Tools/libamrex/mkpkgconfig.py
    index be91e8736a8..c8a626901da 100755
    --- a/Tools/libamrex/mkpkgconfig.py
    +++ b/Tools/libamrex/mkpkgconfig.py
    @@ -1,12 +1,6 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import sys
    -
    -if sys.version_info < (2, 7):
    -    sys.exit("ERROR: need python 2.7 or later for mkpkgconfig.py")
    -
     import argparse
     
     def doit(prefix, version, cflags, libs, libpriv, fflags):
    diff --git a/Tools/libamrex/mkversionheader.py b/Tools/libamrex/mkversionheader.py
    index f2f6f8865f9..b1dbf0eb2ad 100755
    --- a/Tools/libamrex/mkversionheader.py
    +++ b/Tools/libamrex/mkversionheader.py
    @@ -1,12 +1,6 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import sys, re
    -
    -if sys.version_info < (2, 7):
    -    sys.exit("ERROR: need python 2.7 or later for mkversionheader.py")
    -
     import argparse
     
     def doit(code, defines):
    diff --git a/Tools/typechecker/typechecker.py b/Tools/typechecker/typechecker.py
    index 2086b22d1b5..6035b7a6c15 100755
    --- a/Tools/typechecker/typechecker.py
    +++ b/Tools/typechecker/typechecker.py
    @@ -1,6 +1,4 @@
    -#!/usr/bin/env python
    -
    -from __future__ import print_function
    +#!/usr/bin/env python3
     
     import os
     import sys
    
    From 4d9bf4c1ec6dae49dbf1cd4b8e11288c80f807e1 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Wed, 2 Nov 2022 14:33:02 -0700
    Subject: [PATCH 057/111] Squashed commit of the following:
    
    commit 40b3d2176b17785191050482a2ead5539993fac6
    Author: Weiqun Zhang 
    Date:   Wed Jul 13 13:24:15 2022 -0700
    
        Add extra braces in initialization of GpuArray (#2876)
    
        It should not be needed since C++14.  But some compilers seem to need the
        double braces.
    
    commit a633d2bff1db1a3335efd077a34b6a8dcfb4e793
    Author: Luca Fedeli 
    Date:   Fri Jul 8 20:34:18 2022 +0200
    
        Workaround to bypass issue observed at very large scale with Fujitsu MPI (#2874)
    
        We have observed some MPI issues at very large scale when WarpX is compiled using Fujitsu MPI (i.e., with the Fujitsu compiler). These issues seem to be related to the use of MPI Gatherv with MPI_Datatype. This PR implements a possible workaround, initially proposed by @WeiqunZhang . The idea is that, when WarpX is compiled with the Fujitsu compiler, simpler integer arrays instead of MPI_Datatype are used in the routine where the issue was observed.
    
    commit 7660c885d46779367344adf88af75e630a0bc77a
    Author: Weiqun Zhang 
    Date:   Fri Jul 8 08:48:14 2022 -0700
    
        Allow zero components MultiFab and BaseFab (#2873)
    
        This is useful for particle I/O that does not have any mesh data.  yt needs
        a header file associated with a MultiFab.
    
    commit c849dd1994388cebd78a6a1624e80bc3ab640970
    Author: Weiqun Zhang 
    Date:   Fri Jul 8 08:06:37 2022 -0700
    
        New EB optimization parameter: eb2.num_coarsen_opt (#2872)
    
        At the beginning of EB generation, we chop the entire finest domain into
        boxes and find out the type of the boxes.  We then collect the completely
        covered boxes and cut boxes into two BoxArrays.  This process can be costly
        because of the number of calls to the implicit functions.  In this commit,
        we have introduced a new ParmParse parameter, eb2.num_coarsen_opt with a
        default value of zero.  If for instance it is set to 3, we start the box
        type categorization at a resolution that is coarsened by a factor of 2^3.
        For the provisional cut boxes, we refine them by a factor of 2, Then we chop
        them into small boxes and categorize the new boxes.  This process is
        performed recursively until we are at the original finest resolution.
    
        The users should be aware that, if eb2.num_coaren_opt is too big, this could
        produce in erroneous results because evaluating the implicit function on
        coarse boxes could miss fine structures in the EB.
    
        Thank Robert Marskar for sharing this algorithm.
    
    commit 557aae84902f63a84edc8b49831ee66af7d1a46a
    Author: Erik 
    Date:   Wed Jul 6 08:54:24 2022 -0700
    
        point to new location of AMReX images, AMReX website repo (#2867)
    
    commit cbdc6580ee3d78cccdd37172e4ba077ee181f483
    Author: Axel Huebl 
    Date:   Tue Jul 5 01:41:03 2022 +0200
    
        SENSEI 4.0: Fix Build for Particles (#2869)
    
        ## Summary
    
        This part causes a compile error now in WarpX.
    
        cc  @burlen @kwryankrattiger
    
        ## Additional background
    
        X-ref: Blocks WarpX 22.07 release https://github.com/ECP-WarpX/WarpX/pull/3211
    
        Follow-up to:
        - #2785
        - #2834
    
    commit dc8b734b6a70583602150cfbee1b7d51f8dacdeb
    Author: Andrew Myers 
    Date:   Fri Jul 1 17:19:20 2022 -0700
    
        Cache the neighbor comm tags for the CPU implementation of fillNeighbors. (#2862)
    
        * Cache the neighbor comm tags for the CPU implementation of fillNeighbors.
    
        * fix areMasksValid function
    
    commit 2b42fb56a96e752d301916ca23160098c5369386
    Author: drangara <69211175+drangara@users.noreply.github.com>
    Date:   Fri Jul 1 18:44:35 2022 -0400
    
        Remove some hard checks in check_mvmc for 3D (#2864)
    
        Removing some hard checks in 3D coarsening logic as it appears that those are not necessarily bad states, and a soft failure to coarsen should suffice.
    
    commit 19c70685cdb0c3322712e9f442092b1140cfe7ec
    Author: Erik 
    Date:   Fri Jul 1 18:24:24 2022 -0400
    
        Carry over fix for ngbxy.smallEnd typo (#2868)
    
        This a typo that got correct in other places but didn't get fixed here.
    
    commit d736ef299b724b96b34d41103dfc5318d0ecdee4
    Author: Weiqun Zhang 
    Date:   Fri Jul 1 11:00:15 2022 -0700
    
        Update CHANGES for 22.07 (#2866)
    
    commit be813d024e6b314e41c727734b8e53481898e08e
    Author: Weiqun Zhang 
    Date:   Fri Jul 1 10:29:13 2022 -0700
    
        Hypre: add version check (#2865)
    
        These HYPRE_SetSp* are only available in hypre >= 22500.
    
    commit 8fb23ec17a58284af6bdafbcda3eea0d86d8ce69
    Author: Jon Rood 
    Date:   Wed Jun 29 16:52:35 2022 -0600
    
        Refactor Make.nrel to use MPT for MPI with the Intel compiler on Eagle. (#2861)
    
    commit 6f9a46c7e834046970d46d684927a078671355bc
    Author: PaulMullowney <60452402+PaulMullowney@users.noreply.github.com>
    Date:   Wed Jun 29 11:09:57 2022 -0600
    
        Adding control APIs and namespacing for core algorithm paths like SpGEMM, SpMV, and SpTrans. (#2859)
    
        Co-authored-by: Paul Mullowney 
    
    commit e4c83cfddc8afb1bd091c45a6ad3040d23f019bc
    Author: Jon Rood 
    Date:   Wed Jun 29 11:08:42 2022 -0600
    
        Add lib64 library location for ZFP since it may exist there instead of lib. (#2860)
    
    commit b2b9150ada12af878a07e0628be03668a9d17270
    Author: Burlen Loring 
    Date:   Tue Jun 28 13:42:41 2022 -0700
    
        update the SENSEI in situ coupling for SENSEI v4.0.0 (#2785)
    
        In this release, an install of VTK is no longer required.
        To compile AMReX w/ SENSEI use:
    
        ```cmake
        -DAMReX_SENSEI=ON -DSENSEI_DIR=//cmake
        ```
    
        Note:  may be `lib` or `lib64` or something else depending on
        your OS and is determined by CMake at configure time. See the CMake
        GNUInstallDirs documentation for more information.
    
    commit 2c5f475d451aede47fe2cad2bbd8681c9ca1f456
    Author: Andrew Myers 
    Date:   Tue Jun 28 12:51:19 2022 -0700
    
        Write runtime attribs to checkpoints on GPUs (#2856)
    
    commit d2cb54668b5e49fd35a60164f40ad6f36720f806
    Author: Jon Rood 
    Date:   Tue Jun 28 13:27:02 2022 -0600
    
        Fix gnu make on Crusher for mpi_gtl_hsa (#2857)
    
        Update environment variable at OLCF for mpi_gtl_hsa.
    
    commit 21fe4b3016a796b99c409760cfad7ae00a7475ba
    Author: Axel Huebl 
    Date:   Tue Jun 28 19:53:09 2022 +0200
    
        CMake: FindDependency CUDAToolkit (#2849)
    
        If we install AMReX with CUDA support using a modern
        CMake, we need to repopulate targets such as `CUDA::curand`
        from `find_dependency` for downstream.
        Downstream users find us via `find_package` and that target
        link dependency showed up to be unpopulated in MFIX.
    
    commit 027f2ff77fed33a191cfc735d8adaabb42d21743
    Author: Weiqun Zhang 
    Date:   Thu Jun 23 16:15:57 2022 -0700
    
        Fix make help (#2854)
    
        This reverts the change in #2845, which fixed an issue with `make print-%`, but broke
        `make help`.  This is now fixed in a different way.  Both `make print-%` and `make help`
        should work now.
    
    commit 3d3ad213ca4b60421c9a80328e1316b23435958f
    Author: kngott 
    Date:   Thu Jun 23 13:39:59 2022 -0700
    
        NERSC Programming Environment prototype (#2848)
    
    commit 487267625412e4f8a4fa1ab2492cb578955c4239
    Author: Weiqun Zhang 
    Date:   Thu Jun 23 12:41:20 2022 -0700
    
        GNU Make: No need to query mpif90 if Fortran is not used. (#2852)
    
        This minimize potential issues.
    
    commit fc0d6469f4ad590d576a7109d8719b018838dd86
    Author: Weiqun Zhang 
    Date:   Thu Jun 23 12:23:55 2022 -0700
    
        Remove f90doc (#2851)
    
        We no longer use it.
    
    commit 5188a6a28e64dc627c3333d13bebeb0d7250b506
    Author: Weiqun Zhang 
    Date:   Thu Jun 23 11:09:15 2022 -0700
    
        Explicitly invoke python3 (#2850)
    
        According to PEP 394, a python distributor may choose to not provide the
        python command.  In fact, that's what recent versions of macOS do.
    
    commit 2d931f63cb4d611d0d23d694726889647f8a482d
    Author: Andrew Myers 
    Date:   Wed Jun 22 15:03:50 2022 -0500
    
        Maintain the high end of the 'roundoff domain' in both float and double precision (#2839)
    
        * Maintain the high end of the 'roundoff domain' in both float and double precision
    
        * fix shadowing
    
        * fix warning
    
        * fix float conversion warning
    
        * fix logic
    
        * Update Src/Base/AMReX_Geometry.H
    
        * Update Src/Base/AMReX_Geometry.H
    ---
     Src/Base/AMReX_Geometry.H         | 1 +
     Src/EB/AMReX_EB2.H                | 1 -
     Src/Particle/AMReX_ParticleUtil.H | 2 +-
     3 files changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H
    index 890ec2e0f7e..110b7078d7f 100644
    --- a/Src/Base/AMReX_Geometry.H
    +++ b/Src/Base/AMReX_Geometry.H
    @@ -248,6 +248,7 @@ public:
             return roundoff_lo_d;
     #endif
         }
    +
         GpuArray ProbHiArrayInParticleReal () const noexcept {
     #ifdef AMREX_SINGLE_PRECISION_PARTICLES
             return roundoff_hi_f;
    diff --git a/Src/EB/AMReX_EB2.H b/Src/EB/AMReX_EB2.H
    index def8d2de9e0..b140ffbb6f9 100644
    --- a/Src/EB/AMReX_EB2.H
    +++ b/Src/EB/AMReX_EB2.H
    @@ -128,7 +128,6 @@ void Build (const Geometry& geom,
                 bool extend_domain_face = ExtendDomainFace(),
                 int num_coarsen_opt = NumCoarsenOpt());
     
    -
     void BuildFromChkptFile (std::string const& fname,
                              const Geometry& geom,
                              int required_coarsening_level,
    diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
    index e0ec8944361..d1d57fc1807 100644
    --- a/Src/Particle/AMReX_ParticleUtil.H
    +++ b/Src/Particle/AMReX_ParticleUtil.H
    @@ -580,7 +580,7 @@ bool enforcePeriodic (P& p,
                 }
                 // clamp to avoid precision issues;
                 if (p.pos(idim) > rhi[idim]) {
    -                p.pos(idim) = rhi[idim];
    +                p.pos(idim) = static_cast(rhi[idim]);
                 }
                 shifted = true;
             }
    
    From 95b053de390343301d6e4e499327706ed1d3d052 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 3 Nov 2022 10:37:11 -0700
    Subject: [PATCH 058/111] fix no-effect const
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 20 ++++++++++----------
     1 file changed, 10 insertions(+), 10 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index 1e2515cce40..8029697ec34 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -20,9 +20,9 @@ struct SoAParticle;
     
     template 
     struct ConstParticleTileData;
    - 
    +
     template 
    -struct ParticleTileData 
    +struct ParticleTileData
     {
     
         static constexpr int NAR = NArrayReal;
    @@ -41,7 +41,7 @@ struct ParticleTileData
         Long m_size;
     
         ParticleType* AMREX_RESTRICT m_aos;
    -    
    +
         GpuArray m_rdata;
         GpuArray m_idata;
     
    @@ -278,7 +278,7 @@ struct SoAParticle : SoAParticleBase
         int m_index;
     };
     
    -// SOA Particle Structure 
    +// SOA Particle Structure
     template 
     struct ConstSoAParticle : SoAParticleBase
     {
    @@ -294,22 +294,22 @@ struct ConstSoAParticle : SoAParticleBase
         static Long the_next_id;
     
     
    -    ConstSoAParticle (ConstPTD ptd, int const index) 
    +    ConstSoAParticle (ConstPTD ptd, int const index)
         {
             m_constparticle_tile_data=ptd;
             m_index=index;
         }
    -     
    +
         //functions to get id and cpu in the SOA data
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const ConstParticleCPUWrapper cpu () const & { 
    +    const ConstParticleCPUWrapper cpu () const & {
             uint64_t unsigned_cpu_value;
             unsigned_cpu_value = (uint64_t) this->m_constparticle_tile_data.m_idata[1][m_index];
             return ConstParticleCPUWrapper(unsigned_cpu_value); }
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const ConstParticleIDWrapper id () const & { 
    +    const ConstParticleIDWrapper id () const & {
             uint64_t unsigned_id_value;
             unsigned_id_value = (uint64_t) this->m_constparticle_tile_data.m_idata[0][m_index];
             return ConstParticleIDWrapper(unsigned_id_value); }
    @@ -317,10 +317,10 @@ struct ConstSoAParticle : SoAParticleBase
         //functions to get positions of the particle in the SOA data
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_constparticle_tile_data->m_rdata[0][m_index], this->m_constparticle_tile_data.m_rdata[1][m_index], this->m_constparticle_tile_data->m_rdata[2][m_index]));}
    +    RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_constparticle_tile_data->m_rdata[0][m_index], this->m_constparticle_tile_data.m_rdata[1][m_index], this->m_constparticle_tile_data->m_rdata[2][m_index]));}
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const RealType  pos (int position_index) const &
    +    const RealType&  pos (int position_index) const &
         {
             AMREX_ASSERT(position_index < AMREX_SPACEDIM);
             return this->m_constparticle_tile_data.m_rdata[position_index][m_index];
    
    From 226181ca8acff7052c4461a74787730ce44c8379 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 3 Nov 2022 10:38:34 -0700
    Subject: [PATCH 059/111] fix id() and cpu() for ConstSoAParticle too
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 10 ++--------
     1 file changed, 2 insertions(+), 8 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index 8029697ec34..5e3097cecad 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -303,16 +303,10 @@ struct ConstSoAParticle : SoAParticleBase
         //functions to get id and cpu in the SOA data
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const ConstParticleCPUWrapper cpu () const & {
    -        uint64_t unsigned_cpu_value;
    -        unsigned_cpu_value = (uint64_t) this->m_constparticle_tile_data.m_idata[1][m_index];
    -        return ConstParticleCPUWrapper(unsigned_cpu_value); }
    +    int cpu () const { return this->m_constparticle_tile_data.m_idata[1][m_index]; }
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const ConstParticleIDWrapper id () const & {
    -        uint64_t unsigned_id_value;
    -        unsigned_id_value = (uint64_t) this->m_constparticle_tile_data.m_idata[0][m_index];
    -        return ConstParticleIDWrapper(unsigned_id_value); }
    +    int id () const { return this->m_constparticle_tile_data.m_idata[0][m_index]; }
     
         //functions to get positions of the particle in the SOA data
     
    
    From aa5e7dd607d40533dda1b9299a34ad0529a9542f Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 3 Nov 2022 11:08:29 -0700
    Subject: [PATCH 060/111] add AMReX_MakeParticle.H to Make.package and
     CMakeLists.txt
    
    ---
     Src/Particle/AMReX_MakeParticle.H | 4 ++--
     Src/Particle/CMakeLists.txt       | 1 +
     Src/Particle/Make.package         | 1 +
     3 files changed, 4 insertions(+), 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H
    index efe9f0cdbcc..5c00a9110c0 100644
    --- a/Src/Particle/AMReX_MakeParticle.H
    +++ b/Src/Particle/AMReX_MakeParticle.H
    @@ -6,7 +6,7 @@ struct is_soa_particle
              bool,
              T::is_soa_particle
          > {};
    - 
    +
     
     template 
     struct make_particle
    @@ -30,4 +30,4 @@ struct make_particle
    Date: Thu, 3 Nov 2022 11:28:37 -0700
    Subject: [PATCH 061/111] remove unused
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H | 1 -
     1 file changed, 1 deletion(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index f257ff17ddd..d0c41723081 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -2314,7 +2314,6 @@ AssignCellDensitySingleLevel (int rho_index,
             FArrayBox local_rho;
             for (ParConstIter pti(*this, lev); pti.isValid(); ++pti) {
                 const auto& particles = pti.GetArrayOfStructs();
    -            const auto pstruct = particles().data();
                 const Long np = pti.numParticles();
                 auto ptd = pti.GetParticleTile().getConstParticleTileData();
                 FArrayBox& fab = (*mf_pointer)[pti];
    
    From a5c800f1ca55271cd77d2e630c67f0a19a9ec81f Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 3 Nov 2022 11:29:45 -0700
    Subject: [PATCH 062/111] remove unused
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H | 2 --
     1 file changed, 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index d0c41723081..443e511c6b1 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -2006,8 +2006,6 @@ RedistributeMPI (std::map >& not_ours,
                 for (int i = 0; i < int(Cnt); ++i)
                 {
                     char* pbuf = ((char*) &recvdata[offset]) + i*superparticle_size;
    -                auto& ptile = m_particles[rcv_levs[ipart]][std::make_pair(rcv_grid[ipart],
    -                                                                          rcv_tile[ipart])];
     
                     Particle p;
                     if constexpr(!ParticleType::is_soa_particle)
    
    From 5614975cfbedfb0091a51e1653e89c168207055b Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Tue, 14 Feb 2023 21:20:50 -0800
    Subject: [PATCH 063/111] Fix: Unused Variables/Typedefs
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H |  1 -
     Tests/Particles/SOAParticle/main.cpp    | 32 +++++++++++++------------
     2 files changed, 17 insertions(+), 16 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index 31d420f32c7..e10172da0b3 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -2311,7 +2311,6 @@ AssignCellDensitySingleLevel (int rho_index,
         {
             FArrayBox local_rho;
             for (ParConstIter pti(*this, lev); pti.isValid(); ++pti) {
    -            const auto& particles = pti.GetArrayOfStructs();
                 const Long np = pti.numParticles();
                 auto ptd = pti.GetParticleTile().getConstParticleTileData();
                 FArrayBox& fab = (*mf_pointer)[pti];
    diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp
    index 17b7ccdc7c1..599f2ec1f5f 100644
    --- a/Tests/Particles/SOAParticle/main.cpp
    +++ b/Tests/Particles/SOAParticle/main.cpp
    @@ -37,16 +37,16 @@ void addParticles ()
     
         T_PC pc(geom, dm, ba);
     
    -    int const NReal = pc.NStructReal;
    -    int const NInt = pc.NStructInt;
    +    //int const NReal = pc.NStructReal;
    +    //int const NInt = pc.NStructInt;
         int const NArrayReal = pc.NArrayReal;
         int const NArrayInt = pc.NArrayInt;
     
         using ParticleType = typename T_PC::ParticleType;
         using ParticleTileDataType = typename T_PC::ParticleTileType::ParticleTileDataType;
    -    using RealVector = amrex::PODVector >;
    -    using IntVector = amrex::PODVector >;
    -    using SPType = typename T_PC::SuperParticleType;
    +    //using RealVector = amrex::PODVector >;
    +    //using IntVector = amrex::PODVector >;
    +    //using SPType = typename T_PC::SuperParticleType;
     
         const int add_num_particles = 5;
     
    @@ -63,7 +63,7 @@ void addParticles ()
             ptile1.id(i) = 1;
             ptile1.cpu(i) = 1;
         }
    -    
    +
         int lev=0;
         // int numparticles=0;
         using MyParIter = ParIter_impl;
    @@ -75,8 +75,8 @@ void addParticles ()
     
             // preparing access to particle data: AoS
             //using PType = ImpactXParticleContainer::ParticleType;
    -        auto& aos = pti.GetArrayOfStructs();
    -        ParticleType* AMREX_RESTRICT aos_ptr = aos().dataPtr();
    +        //auto& aos = pti.GetArrayOfStructs();
    +        //ParticleType* AMREX_RESTRICT aos_ptr = aos().dataPtr();
     
             // preparing access to particle data: SoA of Reals
             auto& soa = pti.GetStructOfArrays();
    @@ -87,6 +87,7 @@ void addParticles ()
             amrex::ParticleReal* const AMREX_RESTRICT part_z = soa_real[2].dataPtr();
             amrex::ParticleReal* const AMREX_RESTRICT part_aaa = soa_real[3].dataPtr();
             auto& soa_int = pti.GetStructOfArrays().GetIntData();
    +        amrex::ignore_unused(size, part_x, part_y, part_z, part_aaa, soa_int);
     
             // Iterating over old Particles
             // ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip)
    @@ -125,7 +126,7 @@ void addParticles ()
         //ParticleContainer<1,1> pc_og;
         auto tmp = pc.template make_alike();
         tmp.copyParticles(pc, true);
    -    
    +
         using MyPinnedParIter = ParIter_impl;
     
         for (MyPinnedParIter pti(tmp, lev); pti.isValid(); ++pti) {
    @@ -162,8 +163,9 @@ void addParticles ()
             },
             reduce_ops
         );
    +    amrex::ignore_unused(r);
     
    -    // Reduce for SoA Particle Struct 
    +    // Reduce for SoA Particle Struct
         /*
         using PTDType = typename T_PC::ParticleTileType::ConstParticleTileDataType;
         amrex::ReduceOps reduce_ops;
    @@ -177,10 +179,10 @@ void addParticles ()
                     const int c = ptd.idata(1)[i];
                     return {a, b, c};
                  }, reduce_ops);
    -  
    -    AMREX_ALWAYS_ASSERT(amrex::get<0>(r) == amrex::Real(std::pow(256, AMREX_SPACEDIM))); 
    -    AMREX_ALWAYS_ASSERT(amrex::get<1>(r) == 2.0); 
    -    AMREX_ALWAYS_ASSERT(amrex::get<2>(r) == 1); 
    +
    +    AMREX_ALWAYS_ASSERT(amrex::get<0>(r) == amrex::Real(std::pow(256, AMREX_SPACEDIM)));
    +    AMREX_ALWAYS_ASSERT(amrex::get<1>(r) == 2.0);
    +    AMREX_ALWAYS_ASSERT(amrex::get<2>(r) == 1);
         */
     }
     
    @@ -196,6 +198,6 @@ int main(int argc, char* argv[])
         amrex::Finalize();
      }
     
    - 
    +
     
     
    
    From 04fd51bcc64ef4d8cfcddc7c94ac94ce99566c19 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Tue, 14 Feb 2023 21:30:52 -0800
    Subject: [PATCH 064/111] PureSoA Test: ID Assignment
    
    ---
     Tests/Particles/SOAParticle/main.cpp | 5 ++---
     1 file changed, 2 insertions(+), 3 deletions(-)
    
    diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp
    index 599f2ec1f5f..900182d87e8 100644
    --- a/Tests/Particles/SOAParticle/main.cpp
    +++ b/Tests/Particles/SOAParticle/main.cpp
    @@ -59,9 +59,8 @@ void addParticles ()
             ptile1.pos(i, 1) = 12.0;
             ptile1.pos(i, 2) = 12.0;
     
    -        // TODO
    -        ptile1.id(i) = 1;
    -        ptile1.cpu(i) = 1;
    +        ptile1.push_back_int(0, ParticleType::NextID());
    +        ptile1.push_back_int(1, amrex::ParallelDescriptor::MyProc());
         }
     
         int lev=0;
    
    From 5df2b0be512303f9fad92f06214c49bf2dbb8a05 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Tue, 14 Feb 2023 21:56:31 -0800
    Subject: [PATCH 065/111] Remove EOL Whitespaces
    
    ---
     Src/Particle/AMReX_ParIter.H      | 2 +-
     Src/Particle/AMReX_ParticleTile.H | 8 ++++----
     2 files changed, 5 insertions(+), 5 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParIter.H b/Src/Particle/AMReX_ParIter.H
    index fae27c074df..41cd42cc1d8 100644
    --- a/Src/Particle/AMReX_ParIter.H
    +++ b/Src/Particle/AMReX_ParIter.H
    @@ -158,7 +158,7 @@ public:
     
         ParConstIter_impl (ContainerType const& pc, int level, MFItInfo& info)
             : ParIterBase_impl(pc,level,info)
    -        {}    
    +        {}
     };
     
     template  0)    
    +        if constexpr(NArrayReal > 0)
                 for (int i = 0; i < NArrayReal; ++i)
                     sp.rdata(NStructReal+i) = m_rdata[i][index];
             sp.id() = m_aos[index].id();
    @@ -547,7 +547,7 @@ struct ParticleTile
             m_runtime_i_cptrs.resize(a_num_runtime_int);
         }
     
    -    // Get cpu data 
    +    // Get cpu data
     
         // AoS
         template ::type = 0>
    @@ -570,7 +570,7 @@ struct ParticleTile
             return p.cpu();
         }
     
    -    // const 
    +    // const
     
         template ::type = 0>
         ConstParticleCPUWrapper cpu (int index) const & {
    @@ -594,7 +594,7 @@ struct ParticleTile
             return p.id();
         }
     
    -    // SoA 
    +    // SoA
         template ::type = 0>
         ParticleIDWrapper id (int index) & {
             ParticleType& p = m_aos_tile().dataPtr()[index];
    
    From a580f7b7cb98cb74cc5cc1b4de14b097cab70926 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Tue, 14 Feb 2023 23:27:59 -0800
    Subject: [PATCH 066/111] [Work-Around] GPUArray & Particle Type
    
    - seen with all modern, clang-based compilers: GPUarray member type
      does not resolve if move or copy-constructed in an existing var
    
    ```
    Src/Base/AMReX_Array.H:32:12: error: cannot initialize a parameter of type 'void *' with an rvalue of type 'int *__restrict (*)[4]'
        struct GpuArray
               ^~~~~~~~
    Src/Particle/AMReX_ParticleTile.H:25:8: note: in implicit move assignment operator for 'amrex::GpuArray' first required here
    struct ParticleTileData
           ^
    Src/Particle/AMReX_ParticleTile.H:207:8: note: in implicit move assignment operator for 'amrex::ParticleTileData' first required here
    struct SoAParticle : SoAParticleBase
           ^
    Src/Particle/AMReX_ParticleContainerI.H:1666:25: note: in implicit move assignment operator for 'amrex::SoAParticle<3, 4>' first required here
                          p = ParticleType(ptd,last);
                            ^
    Src/Particle/AMReX_ParticleContainerI.H:1135:5: note: in instantiation of member function 'amrex::ParticleContainer_impl, 3, 4, amrex::PinnedArenaAllocator>::RedistributeCPU' requested here
        RedistributeCPU(lev_min, lev_max, nGrow, local, remove_negative);
        ^
    ```
    ---
     Src/Particle/AMReX_ParticleContainerI.H | 9 ++++++---
     1 file changed, 6 insertions(+), 3 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index e10172da0b3..5ab743a9a65 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -1663,7 +1663,8 @@ ParticleContainer_impl
     
                       if (p.id() < 0){
     
    -                      p = ParticleType(ptd,last);
    +                      ParticleType p2(ptd,last); // here
    +                      p = p2;
                           for (int comp = 0; comp < NumRealComps(); comp++)
                               soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
                           for (int comp = 0; comp < NumIntComps(); comp++)
    @@ -1678,7 +1679,8 @@ ParticleContainer_impl
                       particlePostLocate(p, pld, lev);
     
                       if (p.id() < 0){
    -                      p = ParticleType(ptd,last);
    +                      ParticleType p2(ptd,last);
    +                      p = p2;
                           for (int comp = 0; comp < NumRealComps(); comp++)
                               soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
                           for (int comp = 0; comp < NumIntComps(); comp++)
    @@ -1734,7 +1736,8 @@ ParticleContainer_impl
                         }
     
                       if (p.id() < 0){
    -                      p = ParticleType(ptd,last);
    +                      ParticleType p2(ptd,last);
    +                      p = p2;
                           for (int comp = 0; comp < NumRealComps(); comp++)
                               soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
                           for (int comp = 0; comp < NumIntComps(); comp++)
    
    From be3598332cbff221cdfb9e9ff954a48e208c8143 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Tue, 14 Feb 2023 23:30:58 -0800
    Subject: [PATCH 067/111] ParticleTileData: Remove Debug Static Assert
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 2 --
     1 file changed, 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index 10b3ee8e774..5cf8a065946 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -36,8 +36,6 @@ struct ParticleTileData
     
         using SuperParticleType = Particle;
     
    -    static_assert(!std::is_same>::value || NAR != 0, "ParticleTileData NAR==0");
    -
         Long m_size;
     
         ParticleType* AMREX_RESTRICT m_aos;
    
    From fb3783732b85dfaef922a41133183651df10b7da Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Tue, 14 Feb 2023 23:51:32 -0800
    Subject: [PATCH 068/111] Cleanup ConstType Traits
    
    ---
     Src/Particle/AMReX_Particle.H          |  1 +
     Src/Particle/AMReX_ParticleContainer.H |  5 +-
     Src/Particle/AMReX_ParticleTile.H      | 82 +++++++++++++-------------
     3 files changed, 45 insertions(+), 43 deletions(-)
    
    diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H
    index 225f2120e5c..10a3a8dca81 100644
    --- a/Src/Particle/AMReX_Particle.H
    +++ b/Src/Particle/AMReX_Particle.H
    @@ -205,6 +205,7 @@ struct Particle
     {
         static constexpr bool is_soa_particle = false;
         using StorageParticleType = Particle;
    +    using ConstType = Particle const;
     
         //! \brief number of extra Real components in the particle struct
         static constexpr int NReal = T_NReal;
    diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H
    index d604a36c896..9481680ab3b 100644
    --- a/Src/Particle/AMReX_ParticleContainer.H
    +++ b/Src/Particle/AMReX_ParticleContainer.H
    @@ -144,6 +144,8 @@ class ParticleContainer_impl : public ParticleContainerBase
     {
     public:
         using ParticleType = T_ParticleType;
    +    using ConstParticleType = typename ParticleType::ConstType;
    +
         //! \brief Number of extra Real components in the particle struct
         static constexpr int NStructReal = ParticleType::NReal;
         //! \brief Number of extra integer components in the particle struct
    @@ -153,8 +155,7 @@ public:
         //! \brief Number of extra integer components stored in struct-of-array form
         static constexpr int NArrayInt = T_NArrayInt;
             //! \brief The type of the "Particle"
    -    //using ConstParticleType = typename std::conditional >::type;
    -    using ConstParticleType = ConstSoAParticle;
    +
     private:
         friend class ParIterBase_impl;
         friend class ParIterBase_impl;
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index 5cf8a065946..f60e42a9d7a 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -202,56 +202,43 @@ struct ParticleTileData
     
     // SOA Particle Structure
     template 
    -struct SoAParticle : SoAParticleBase
    +struct ConstSoAParticle : SoAParticleBase
     {
         static constexpr int NArrayReal = T_NArrayReal;
         static constexpr int NArrayInt = T_NArrayInt;
         using StorageParticleType = SoAParticleBase;
    -    using PTD = ParticleTileData;
    -    static constexpr bool is_soa_particle = true;
    -    static constexpr bool is_constsoa_particle = false;
    +    using ConstPTD = ConstParticleTileData;
    +    static constexpr bool is_soa_particle = false;
    +    static constexpr bool is_constsoa_particle = true;
     
         using RealType = ParticleReal;
     
    -    static Long the_next_id;
    +    //static Long the_next_id;
     
    -    SoAParticle (PTD ptd, int const index)
    +    ConstSoAParticle (ConstPTD ptd, int const index)
         {
    -        m_particle_tile_data=ptd;
    +        m_constparticle_tile_data=ptd;
             m_index=index;
         }
     
         //functions to get id and cpu in the SOA data
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    int& cpu () & { return this->m_particle_tile_data.m_idata[1][m_index]; }
    -
    -    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    int& id () & { return this->m_particle_tile_data.m_idata[0][m_index]; }
    -
    -    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const int& cpu () const & { return this->m_particle_tile_data.m_idata[1][m_index]; }
    +    int cpu () const { return this->m_constparticle_tile_data.m_idata[1][m_index]; }
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const int& id () const & { return this->m_particle_tile_data.m_idata[0][m_index]; }
    +    int id () const { return this->m_constparticle_tile_data.m_idata[0][m_index]; }
     
         //functions to get positions of the particle in the SOA data
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_particle_tile_data->m_rdata[0][m_index], this->m_particle_tile_data.m_rdata[1][m_index], this->m_particle_tile_data->m_rdata[2][m_index]));}
    -
    -    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    RealType& pos (int position_index) &
    -    {
    -        AMREX_ASSERT(position_index < AMREX_SPACEDIM);
    -        return this->m_particle_tile_data.m_rdata[position_index][m_index];
    -    }
    +    RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_constparticle_tile_data->m_rdata[0][m_index], this->m_constparticle_tile_data.m_rdata[1][m_index], this->m_constparticle_tile_data->m_rdata[2][m_index]));}
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    RealType  pos (int position_index) const &
    +    const RealType&  pos (int position_index) const &
         {
             AMREX_ASSERT(position_index < AMREX_SPACEDIM);
    -        return this->m_particle_tile_data.m_rdata[position_index][m_index];
    +        return this->m_constparticle_tile_data.m_rdata[position_index][m_index];
         }
     
         static Long NextID ();
    @@ -272,50 +259,62 @@ struct SoAParticle : SoAParticleBase
     
         static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable");
     
    -    PTD m_particle_tile_data;
    +    ConstPTD m_constparticle_tile_data;
         int m_index;
     };
     
    -// SOA Particle Structure
     template 
    -struct ConstSoAParticle : SoAParticleBase
    +struct SoAParticle : SoAParticleBase
     {
         static constexpr int NArrayReal = T_NArrayReal;
         static constexpr int NArrayInt = T_NArrayInt;
         using StorageParticleType = SoAParticleBase;
    -    using ConstPTD = ConstParticleTileData;
    -    static constexpr bool is_soa_particle = false;
    -    static constexpr bool is_constsoa_particle = true;
    +    using PTD = ParticleTileData;
    +    static constexpr bool is_soa_particle = true;
    +    static constexpr bool is_constsoa_particle = false;
     
    +    using ConstType = ConstSoAParticle;
         using RealType = ParticleReal;
     
         static Long the_next_id;
     
    -
    -    ConstSoAParticle (ConstPTD ptd, int const index)
    +    SoAParticle (PTD ptd, int const index)
         {
    -        m_constparticle_tile_data=ptd;
    +        m_particle_tile_data=ptd;
             m_index=index;
         }
     
         //functions to get id and cpu in the SOA data
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    int cpu () const { return this->m_constparticle_tile_data.m_idata[1][m_index]; }
    +    int& cpu () & { return this->m_particle_tile_data.m_idata[1][m_index]; }
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    int id () const { return this->m_constparticle_tile_data.m_idata[0][m_index]; }
    +    int& id () & { return this->m_particle_tile_data.m_idata[0][m_index]; }
    +
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    const int& cpu () const & { return this->m_particle_tile_data.m_idata[1][m_index]; }
    +
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    const int& id () const & { return this->m_particle_tile_data.m_idata[0][m_index]; }
     
         //functions to get positions of the particle in the SOA data
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_constparticle_tile_data->m_rdata[0][m_index], this->m_constparticle_tile_data.m_rdata[1][m_index], this->m_constparticle_tile_data->m_rdata[2][m_index]));}
    +    RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_particle_tile_data->m_rdata[0][m_index], this->m_particle_tile_data.m_rdata[1][m_index], this->m_particle_tile_data->m_rdata[2][m_index]));}
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const RealType&  pos (int position_index) const &
    +    RealType& pos (int position_index) &
         {
             AMREX_ASSERT(position_index < AMREX_SPACEDIM);
    -        return this->m_constparticle_tile_data.m_rdata[position_index][m_index];
    +        return this->m_particle_tile_data.m_rdata[position_index][m_index];
    +    }
    +
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    RealType  pos (int position_index) const &
    +    {
    +        AMREX_ASSERT(position_index < AMREX_SPACEDIM);
    +        return this->m_particle_tile_data.m_rdata[position_index][m_index];
         }
     
         static Long NextID ();
    @@ -332,14 +331,15 @@ struct ConstSoAParticle : SoAParticleBase
         */
         static void NextID (Long nextid);
     
    -    private :
    +private :
     
         static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable");
     
    -    ConstPTD m_constparticle_tile_data;
    +    PTD m_particle_tile_data;
         int m_index;
     };
     
    +//template  Long ConstSoAParticle::the_next_id = 1;
     template  Long SoAParticle::the_next_id = 1;
     
     template 
    
    From c9f1402380036bbb0661b5bceb9950705179e9a3 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Wed, 15 Feb 2023 00:07:42 -0800
    Subject: [PATCH 069/111] Fix: ConstSoAParticle is SoAParticle
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index f60e42a9d7a..39dc2c3bd10 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -208,7 +208,7 @@ struct ConstSoAParticle : SoAParticleBase
         static constexpr int NArrayInt = T_NArrayInt;
         using StorageParticleType = SoAParticleBase;
         using ConstPTD = ConstParticleTileData;
    -    static constexpr bool is_soa_particle = false;
    +    static constexpr bool is_soa_particle = true;
         static constexpr bool is_constsoa_particle = true;
     
         using RealType = ParticleReal;
    
    From a026399705fd93ab7f515668c130662e48971b70 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Wed, 15 Feb 2023 00:15:22 -0800
    Subject: [PATCH 070/111] ConstSOAParticle: Add Missing Forward Declaration
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 2 ++
     1 file changed, 2 insertions(+)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index 39dc2c3bd10..9ae01f81480 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -16,6 +16,8 @@ namespace amrex {
     
     // Forward Declaration
     template 
    +struct ConstSoAParticle;
    +template 
     struct SoAParticle;
     
     template 
    
    From 0fc9623c072b2ebb1a2c6d59a1a81f045af29050 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Wed, 15 Feb 2023 00:19:39 -0800
    Subject: [PATCH 071/111] Constructor Cosmetics
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 18 ++++++++----------
     1 file changed, 8 insertions(+), 10 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index 9ae01f81480..bf8cb3369af 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -215,14 +215,13 @@ struct ConstSoAParticle : SoAParticleBase
     
         using RealType = ParticleReal;
     
    -    //static Long the_next_id;
    -
    -    ConstSoAParticle (ConstPTD ptd, int const index)
    +    ConstSoAParticle(ConstPTD const ptd, long i) :
    +        m_constparticle_tile_data(ptd), m_index(i)
         {
    -        m_constparticle_tile_data=ptd;
    -        m_index=index;
         }
     
    +    //static Long the_next_id;
    +
         //functions to get id and cpu in the SOA data
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    @@ -278,14 +277,13 @@ struct SoAParticle : SoAParticleBase
         using ConstType = ConstSoAParticle;
         using RealType = ParticleReal;
     
    -    static Long the_next_id;
    -
    -    SoAParticle (PTD ptd, int const index)
    +    SoAParticle(PTD const ptd, long i) :
    +        m_particle_tile_data(ptd), m_index(i)
         {
    -        m_particle_tile_data=ptd;
    -        m_index=index;
         }
     
    +    static Long the_next_id;
    +
         //functions to get id and cpu in the SOA data
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    
    From 0187ac90583784fa106e4e6b4932c078b1264a5b Mon Sep 17 00:00:00 2001
    From: atmyers 
    Date: Wed, 15 Feb 2023 19:04:43 -0800
    Subject: [PATCH 072/111] remove restrict from these members of
     ConstParticleTileData
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index bf8cb3369af..f2b692af6ba 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -42,8 +42,8 @@ struct ParticleTileData
     
         ParticleType* AMREX_RESTRICT m_aos;
     
    -    GpuArray m_rdata;
    -    GpuArray m_idata;
    +    GpuArray m_rdata;
    +    GpuArray m_idata;
     
         int m_num_runtime_real;
         int m_num_runtime_int;
    
    From e4a8f0e54607fd4e60c6f53fb139c7551c4b8c4a Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 16 Feb 2023 10:17:22 -0800
    Subject: [PATCH 073/111] fix applying periodic shift to the particle send
     buffer
    
    ---
     Src/Particle/AMReX_ParticleCommunication.H | 11 +++++------
     1 file changed, 5 insertions(+), 6 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleCommunication.H b/Src/Particle/AMReX_ParticleCommunication.H
    index 51d4b6de4b9..cb5dc7f27bc 100644
    --- a/Src/Particle/AMReX_ParticleCommunication.H
    +++ b/Src/Particle/AMReX_ParticleCommunication.H
    @@ -366,18 +366,17 @@ void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan&
     
                         if (do_periodic_shift)
                         {
    -                        auto p = make_particle{}(ptd,kv);
    -                        //ParticleType p;
    -                        amrex::Gpu::memcpy(&p, &p_snd_buffer[dst_offset], sizeof(ParticleType));
    +                        ParticleReal pos[AMREX_SPACEDIM];
    +                        amrex::Gpu::memcpy(&pos[0], &p_snd_buffer[dst_offset], 3*sizeof(ParticleReal));
                             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
                             {
                                 if (! is_per[idim]) continue;
                                 if (pshift[idim] > 0)
    -                                p.pos(idim) += phi[idim] - plo[idim];
    +                                pos[idim] += phi[idim] - plo[idim];
                                 else if (pshift[idim] < 0)
    -                                p.pos(idim) -= phi[idim] - plo[idim];
    +                                pos[idim] -= phi[idim] - plo[idim];
                             }
    -                        amrex::Gpu::memcpy(&p_snd_buffer[dst_offset], &p, sizeof(ParticleType));
    +                        amrex::Gpu::memcpy(&p_snd_buffer[dst_offset], &pos[0], 3*sizeof(ParticleReal));
                         }
                     }
                 });
    
    From 1c6134f493eca473c69cd5e880e78ba6af4d73c2 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Thu, 16 Feb 2023 10:17:05 -0800
    Subject: [PATCH 074/111] ParticleTile: Fix Const Access
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 9 ++++++---
     1 file changed, 6 insertions(+), 3 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index f2b692af6ba..5ca67e383a8 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -557,7 +557,8 @@ struct ParticleTile
         // const
         template ::type = 0>
         ConstParticleCPUWrapper cpu (int index) const & {
    -        ParticleType p(this->getParticleTileData(), index);
    +        using ConstParticleType = typename ParticleType::ConstType;
    +        ConstParticleType p(this->getConstParticleTileData(), index);
             return p.cpu();
         }
     
    @@ -588,7 +589,8 @@ struct ParticleTile
         // const
         template ::type = 0>
         ConstParticleIDWrapper id (int index) const & {
    -        ParticleType p(this->getParticleTileData(), index);
    +        using ConstParticleType = typename ParticleType::ConstType;
    +        ConstParticleType p(this->getConstParticleTileData(), index);
             return p.id();
         }
     
    @@ -622,7 +624,8 @@ struct ParticleTile
         template ::type = 0>
         RealType  pos (int index, int position_index) const &
         {
    -        ParticleType p(this->getParticleTileData(), index);
    +        using ConstParticleType = typename ParticleType::ConstType;
    +        ConstParticleType p(this->getConstParticleTileData(), index);
             return p.pos(position_index);
         }
     
    
    From 2c4a6e5832c47cdbf610be78c16b4028c18b02e1 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 16 Feb 2023 10:21:48 -0800
    Subject: [PATCH 075/111] remove now unused type alias
    
    ---
     Src/Particle/AMReX_ParticleCommunication.H | 2 --
     1 file changed, 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleCommunication.H b/Src/Particle/AMReX_ParticleCommunication.H
    index cb5dc7f27bc..b188ac27535 100644
    --- a/Src/Particle/AMReX_ParticleCommunication.H
    +++ b/Src/Particle/AMReX_ParticleCommunication.H
    @@ -294,8 +294,6 @@ void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan&
     {
         BL_PROFILE("amrex::packBuffer");
     
    -    using ParticleType = typename PC::ParticleType;
    -
         Long psize = plan.superParticleSize();
     
         int num_levels = op.numLevels();
    
    From d286e8bbe3abb806a43a39385b3f9c3ee7dee72e Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 16 Feb 2023 10:42:02 -0800
    Subject: [PATCH 076/111] fix out of bounds access
    
    ---
     Src/Particle/AMReX_ParticleCommunication.H |  6 ++++--
     Src/Particle/AMReX_ParticleContainerI.H    | 20 +++++++-------------
     2 files changed, 11 insertions(+), 15 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleCommunication.H b/Src/Particle/AMReX_ParticleCommunication.H
    index b188ac27535..9414f477516 100644
    --- a/Src/Particle/AMReX_ParticleCommunication.H
    +++ b/Src/Particle/AMReX_ParticleCommunication.H
    @@ -365,7 +365,8 @@ void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan&
                         if (do_periodic_shift)
                         {
                             ParticleReal pos[AMREX_SPACEDIM];
    -                        amrex::Gpu::memcpy(&pos[0], &p_snd_buffer[dst_offset], 3*sizeof(ParticleReal));
    +                        amrex::Gpu::memcpy(&pos[0], &p_snd_buffer[dst_offset],
    +                                           AMREX_SPACEDIM*sizeof(ParticleReal));
                             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim)
                             {
                                 if (! is_per[idim]) continue;
    @@ -374,7 +375,8 @@ void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan&
                                 else if (pshift[idim] < 0)
                                     pos[idim] -= phi[idim] - plo[idim];
                             }
    -                        amrex::Gpu::memcpy(&p_snd_buffer[dst_offset], &pos[0], 3*sizeof(ParticleReal));
    +                        amrex::Gpu::memcpy(&p_snd_buffer[dst_offset], &pos[0],
    +                                           AMREX_SPACEDIM*sizeof(ParticleReal));
                         }
                     }
                 });
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index 5ab743a9a65..8015d2ed43d 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -2010,16 +2010,8 @@ RedistributeMPI (std::map >& not_ours,
                 {
                     char* pbuf = ((char*) &recvdata[offset]) + i*superparticle_size;
     
    -                Particle p;
    -                if constexpr(!ParticleType::is_soa_particle)
    -                {
    -                    std::memcpy(&p, pbuf, sizeof(ParticleType));
    -                } else
    -                {
    -                    std::memcpy(&p.pos(0), pbuf                         , sizeof(ParticleReal));
    -                    std::memcpy(&p.pos(1), pbuf +   sizeof(ParticleReal), sizeof(ParticleReal));
    -                    std::memcpy(&p.pos(2), pbuf + 2*sizeof(ParticleReal), sizeof(ParticleReal));
    -                }
    +                Particle<0, 0> p;
    +                std::memcpy(&p, pbuf, AMREX_SPACEDIM*sizeof(ParticleReal));
     
                     bool success = Where(p, pld, lev_min, lev_max, 0);
                     if (!success)
    @@ -2118,9 +2110,11 @@ RedistributeMPI (std::map >& not_ours,
     
                     char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size;
     
    -                ParticleType p;
    -                std::memcpy(&p, pbuf, sizeof(ParticleType));
    -                pbuf += sizeof(ParticleType);
    +                if constexpr(! ParticleType::is_soa_particle) {
    +                        ParticleType p;
    +                        std::memcpy(&p, pbuf, sizeof(ParticleType));
    +                        pbuf += sizeof(ParticleType);
    +                    }
     
                     host_real_attribs[lev][ind].resize(NumRealComps());
                     host_int_attribs[lev][ind].resize(NumIntComps());
    
    From c044a29fa4a899dfbe3253c50767aa09b59ca259 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 16 Feb 2023 10:43:48 -0800
    Subject: [PATCH 077/111] also constexpr when unpacking on CPU for GPU runs
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H | 12 +++++-------
     1 file changed, 5 insertions(+), 7 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index 8015d2ed43d..7170505b114 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -2111,17 +2111,15 @@ RedistributeMPI (std::map >& not_ours,
                     char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size;
     
                     if constexpr(! ParticleType::is_soa_particle) {
    -                        ParticleType p;
    -                        std::memcpy(&p, pbuf, sizeof(ParticleType));
    -                        pbuf += sizeof(ParticleType);
    -                    }
    +                    ParticleType p;
    +                    std::memcpy(&p, pbuf, sizeof(ParticleType));
    +                    pbuf += sizeof(ParticleType);
    +                    host_particles[lev][ind].push_back(p);
    +                }
     
                     host_real_attribs[lev][ind].resize(NumRealComps());
                     host_int_attribs[lev][ind].resize(NumIntComps());
     
    -                // add the struct
    -                host_particles[lev][ind].push_back(p);
    -
                     // add the real...
                     int array_comp_start = AMREX_SPACEDIM + NStructReal;
                     for (int comp = 0; comp < NumRealComps(); ++comp) {
    
    From df3d866da0e48d4e94689cfa9540c110b54a5b7e Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 16 Feb 2023 10:49:32 -0800
    Subject: [PATCH 078/111] fix warnings
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H | 9 ++++++++-
     1 file changed, 8 insertions(+), 1 deletion(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index 7170505b114..18cef2acf82 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -2010,8 +2010,15 @@ RedistributeMPI (std::map >& not_ours,
                 {
                     char* pbuf = ((char*) &recvdata[offset]) + i*superparticle_size;
     
    +                ParticleReal pos[AMREX_SPACEDIM];
    +                std::memcpy(&pos[0], pbuf, AMREX_SPACEDIM*sizeof(ParticleReal));
    +
                     Particle<0, 0> p;
    -                std::memcpy(&p, pbuf, AMREX_SPACEDIM*sizeof(ParticleReal));
    +                p.id() = 0;
    +                p.cpu() = 0;
    +                AMREX_D_TERM(p.pos(0) = pos[0];,
    +                             p.pos(1) = pos[1];,
    +                             p.pos(2) = pos[2]);
     
                     bool success = Where(p, pld, lev_min, lev_max, 0);
                     if (!success)
    
    From 6bb4b0eb0c84b1e197521d222c20d0b17b5b0ffa Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 16 Feb 2023 11:13:31 -0800
    Subject: [PATCH 079/111] fix make_particle for legacy particle type
    
    ---
     Src/Particle/AMReX_MakeParticle.H | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H
    index 5c00a9110c0..438753a995e 100644
    --- a/Src/Particle/AMReX_MakeParticle.H
    +++ b/Src/Particle/AMReX_MakeParticle.H
    @@ -13,10 +13,10 @@ struct make_particle
     {
         template 
         auto
    -    operator()(PTD, int)
    +    operator()(PTD ptd, int i)
         {
             // legacy Particle (AoS)
    -        return T_ParticleType();
    +        return ptd.m_aos[i];
         }
     };
     
    
    From b72780e6b91ea53cfc65cc602e7a3f13a3876f9b Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 16 Feb 2023 11:54:29 -0800
    Subject: [PATCH 080/111] some host / device annotations
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 20 ++++++++++++++++++--
     1 file changed, 18 insertions(+), 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index 5ca67e383a8..885123096a2 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -50,6 +50,18 @@ struct ParticleTileData
         ParticleReal* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_rdata;
         int* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_idata;
     
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto* rdata (const int attribute_index)
    +    {
    +        return this->m_rdata[attribute_index];
    +    }
    +
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto* idata (const int attribute_index)
    +    {
    +        return this->m_idata[attribute_index];
    +    }
    +
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         void packParticleData (char* buffer, int src_index, std::size_t dst_offset,
                                const int* comm_real, const int * comm_int) const noexcept
    @@ -215,6 +227,7 @@ struct ConstSoAParticle : SoAParticleBase
     
         using RealType = ParticleReal;
     
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         ConstSoAParticle(ConstPTD const ptd, long i) :
             m_constparticle_tile_data(ptd), m_index(i)
         {
    @@ -277,6 +290,7 @@ struct SoAParticle : SoAParticleBase
         using ConstType = ConstSoAParticle;
         using RealType = ParticleReal;
     
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         SoAParticle(PTD const ptd, long i) :
             m_particle_tile_data(ptd), m_index(i)
         {
    @@ -397,12 +411,14 @@ struct ConstParticleTileData
         GpuArray m_rdata;
         GpuArray m_idata;
     
    -    auto const* rdata(const int attribute_index) const
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto const* rdata (const int attribute_index) const
         {
             return this->m_rdata[attribute_index];
         }
     
    -    auto const* idata(const int attribute_index) const
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto const* idata (const int attribute_index) const
         {
             return this->m_idata[attribute_index];
         }
    
    From 99b49d211fbfc42ed281e43e403187bdd8ef91f2 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Thu, 16 Feb 2023 12:05:43 -0800
    Subject: [PATCH 081/111] More Host-Device
    
    ---
     Src/Particle/AMReX_MakeParticle.H | 2 ++
     1 file changed, 2 insertions(+)
    
    diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H
    index 438753a995e..ab502c09647 100644
    --- a/Src/Particle/AMReX_MakeParticle.H
    +++ b/Src/Particle/AMReX_MakeParticle.H
    @@ -12,6 +12,7 @@ template 
     struct make_particle
     {
         template 
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         auto
         operator()(PTD ptd, int i)
         {
    @@ -24,6 +25,7 @@ template 
     struct make_particle::value>::type>
     {
         template 
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         auto
         operator()(PTD ptd, int index)
         {
    
    From 09acfd001f91b2b4fe7ff027d50c0c9c75ddedd1 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 16 Feb 2023 13:52:47 -0800
    Subject: [PATCH 082/111] add missing include guard
    
    ---
     Src/Particle/AMReX_MakeParticle.H | 5 +++++
     1 file changed, 5 insertions(+)
    
    diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H
    index 438753a995e..f064f0b6dc7 100644
    --- a/Src/Particle/AMReX_MakeParticle.H
    +++ b/Src/Particle/AMReX_MakeParticle.H
    @@ -1,3 +1,6 @@
    +#ifndef AMREX_MAKEPARTICLE_H_
    +#define AMREX_MAKEPARTICLE_H_
    +
     #include 
     
     template< class T >
    @@ -31,3 +34,5 @@ struct make_particle
    Date: Thu, 16 Feb 2023 13:53:25 -0800
    Subject: [PATCH 083/111] some make_particle in ParticleUtil.H
    
    ---
     Src/Particle/AMReX_ParticleUtil.H | 6 +++---
     1 file changed, 3 insertions(+), 3 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
    index ea7550a1697..c96e82fb239 100644
    --- a/Src/Particle/AMReX_ParticleUtil.H
    +++ b/Src/Particle/AMReX_ParticleUtil.H
    @@ -6,6 +6,7 @@
     #include 
     #include 
     #include 
    +#include 
     #include 
     #include 
     #include 
    @@ -263,8 +264,7 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow)
     
         const auto& tile = pti.GetParticleTile();
         const auto np = tile.numParticles();
    -    const auto& aos = tile.GetArrayOfStructs();
    -    const auto pstruct = aos().dataPtr();
    +    const auto ptd = tile.getConstParticleTileData();
         const auto& geom = pti.Geom(pti.GetLevel());
     
         const auto domain = geom.Domain();
    @@ -281,7 +281,7 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow)
         reduce_op.eval(np, reduce_data,
         [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
         {
    -        const ParticleType& p = pstruct[i];
    +        auto p = make_particle{}(ptd,i);
             if ((p.id() < 0)) return false;
             IntVect iv = IntVect(
                 AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])),
    
    From b37fe614a228b0ec9d06e979e7c6b81d3e5b4a75 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 16 Feb 2023 13:53:37 -0800
    Subject: [PATCH 084/111] remove old assert and add constexpr if
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H | 10 +++++-----
     1 file changed, 5 insertions(+), 5 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index 18cef2acf82..5c8d5e9ed3b 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -1178,7 +1178,7 @@ ParticleContainer_impl::SortPart
                 auto inds = m_bins.permutationPtr();
     
                 if (memEfficientSort) {
    -                {
    +                if constexpr(!ParticleType::is_soa_particle) {
                         ParticleVector tmp_particles(np_total);
                         auto src = ptile.getParticleTileData();
                         ParticleType* dst = tmp_particles.data();
    @@ -1528,10 +1528,10 @@ ParticleContainer_impl
               auto& aos = ptile_ptrs[pmap_it]->GetArrayOfStructs();
               auto& soa = ptile_ptrs[pmap_it]->GetStructOfArrays();
     
    -          AMREX_ASSERT_WITH_MESSAGE((NumRealComps() == 0 && NumIntComps() == 0)
    -                                    || aos.size() == soa.size(),
    -              "The AoS and SoA data on this tile are different sizes - "
    -              "perhaps particles have not been initialized correctly?");
    +          // AMREX_ASSERT_WITH_MESSAGE((NumRealComps() == 0 && NumIntComps() == 0)
    +          //                           || aos.size() == soa.size(),
    +          //     "The AoS and SoA data on this tile are different sizes - "
    +          //     "perhaps particles have not been initialized correctly?");
               unsigned npart = aos.numParticles();
               ParticleLocData pld;
     
    
    From 49f672ca2932077d295e2da39bd7d8cf2a99b170 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Thu, 16 Feb 2023 14:56:57 -0800
    Subject: [PATCH 085/111] Cleaning
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 3 +--
     1 file changed, 1 insertion(+), 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index 885123096a2..d2eb2534f60 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -1061,7 +1061,6 @@ private:
         mutable amrex::PODVector >m_runtime_i_cptrs;
     };
     
    -} // namespace amrex;
    +} // namespace amrex
     
     #endif // AMREX_PARTICLETILE_H_
    -
    
    From 5af2224e50b9ef9b5250e5c525c6c8cc5d06f125 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Thu, 16 Feb 2023 15:26:09 -0800
    Subject: [PATCH 086/111] SOAParticle: 1D and 2D Support
    
    ---
     Tests/Particles/SOAParticle/main.cpp | 22 +++++++++-------------
     1 file changed, 9 insertions(+), 13 deletions(-)
    
    diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp
    index 900182d87e8..d137a1a56d7 100644
    --- a/Tests/Particles/SOAParticle/main.cpp
    +++ b/Tests/Particles/SOAParticle/main.cpp
    @@ -14,9 +14,9 @@ using namespace amrex;
     template  class Allocator=DefaultAllocator>
     void addParticles ()
     {
    -    int is_per[BL_SPACEDIM];
    -    for (int i = 0; i < BL_SPACEDIM; i++)
    -        is_per[i] = 1;
    +    int is_per[AMREX_SPACEDIM];
    +    for (int d = 0; d < AMREX_SPACEDIM; d++)
    +        is_per[d] = 1;
     
         RealBox real_box;
         for (int n = 0; n < AMREX_SPACEDIM; n++)
    @@ -55,10 +55,8 @@ void addParticles ()
     
         for (int i = 0; i < add_num_particles; ++i)
         {
    -        ptile1.pos(i, 0) = 12.0;
    -        ptile1.pos(i, 1) = 12.0;
    -        ptile1.pos(i, 2) = 12.0;
    -
    +        for (int d = 0; d < AMREX_SPACEDIM; d++)
    +            ptile1.pos(i, d) = 12.0;
             ptile1.push_back_int(0, ParticleType::NextID());
             ptile1.push_back_int(1, amrex::ParallelDescriptor::MyProc());
         }
    @@ -113,9 +111,8 @@ void addParticles ()
             ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip)
             {
                 ParticleType p(ptd, ip);
    -            p.pos(0) += 1;
    -            p.pos(1) += 1;
    -            p.pos(2) += 1;
    +            for (int d = 0; d < AMREX_SPACEDIM; d++)
    +                p.pos(d) += 1;
             });
     
     
    @@ -151,10 +148,9 @@ void addParticles ()
             pc,
             [=] AMREX_GPU_DEVICE(const ConstPTDType& ptd, const int i) noexcept
             {
    -
                 const amrex::ParticleReal x = ptd.rdata(0)[i];
    -            const amrex::ParticleReal y = ptd.rdata(1)[i];
    -            const amrex::ParticleReal z = ptd.rdata(2)[i];
    +            const amrex::ParticleReal y = AMREX_SPACEDIM >= 2 ? ptd.rdata(1)[i] : 0.0;
    +            const amrex::ParticleReal z = AMREX_SPACEDIM >= 3 ? ptd.rdata(2)[i] : 0.0;
     
                 amrex::ParticleReal const w = ptd.rdata(1)[i];
     
    
    From 3bd8972dbab7459e3a06fe9ef3d10a51edf01f3d Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Thu, 16 Feb 2023 15:56:39 -0800
    Subject: [PATCH 087/111] ParticleTile: Const Ptr Interfaces
    
    ---
     Src/Particle/AMReX_ParticleTile.H | 12 ++++++++++++
     1 file changed, 12 insertions(+)
    
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index d2eb2534f60..42dda2dc4f0 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -56,12 +56,24 @@ struct ParticleTileData
             return this->m_rdata[attribute_index];
         }
     
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto const * rdata (const int attribute_index) const
    +    {
    +        return this->m_rdata[attribute_index];
    +    }
    +
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         auto* idata (const int attribute_index)
         {
             return this->m_idata[attribute_index];
         }
     
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto const * idata (const int attribute_index) const
    +    {
    +        return this->m_idata[attribute_index];
    +    }
    +
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         void packParticleData (char* buffer, int src_index, std::size_t dst_offset,
                                const int* comm_real, const int * comm_int) const noexcept
    
    From f2bdf6f3b1c19226d601237cc8975e9fc6a028a4 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Thu, 16 Feb 2023 15:57:59 -0800
    Subject: [PATCH 088/111] SOAParticlTest: Improve & Clean
    
    ---
     Tests/Particles/SOAParticle/main.cpp | 46 ++++++++++++----------------
     1 file changed, 19 insertions(+), 27 deletions(-)
    
    diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp
    index d137a1a56d7..f25317fcdfe 100644
    --- a/Tests/Particles/SOAParticle/main.cpp
    +++ b/Tests/Particles/SOAParticle/main.cpp
    @@ -36,17 +36,11 @@ void addParticles ()
         DistributionMapping dm(ba);
     
         T_PC pc(geom, dm, ba);
    -
    -    //int const NReal = pc.NStructReal;
    -    //int const NInt = pc.NStructInt;
         int const NArrayReal = pc.NArrayReal;
         int const NArrayInt = pc.NArrayInt;
     
         using ParticleType = typename T_PC::ParticleType;
         using ParticleTileDataType = typename T_PC::ParticleTileType::ParticleTileDataType;
    -    //using RealVector = amrex::PODVector >;
    -    //using IntVector = amrex::PODVector >;
    -    //using SPType = typename T_PC::SuperParticleType;
     
         const int add_num_particles = 5;
     
    @@ -57,6 +51,8 @@ void addParticles ()
         {
             for (int d = 0; d < AMREX_SPACEDIM; d++)
                 ptile1.pos(i, d) = 12.0;
    +        ptile1.getParticleTileData().rdata(AMREX_SPACEDIM)[i] = 1.2;  // w
    +
             ptile1.push_back_int(0, ParticleType::NextID());
             ptile1.push_back_int(1, amrex::ParallelDescriptor::MyProc());
         }
    @@ -66,25 +62,16 @@ void addParticles ()
         using MyParIter = ParIter_impl;
         for (MyParIter pti(pc, lev); pti.isValid(); ++pti) {
             const int np = pti.numParticles();
    -        //const auto t_lev = pti.GetLevel();
    -        //const auto index = pti.GetPairIndex();
    -        // ...
    -
    -        // preparing access to particle data: AoS
    -        //using PType = ImpactXParticleContainer::ParticleType;
    -        //auto& aos = pti.GetArrayOfStructs();
    -        //ParticleType* AMREX_RESTRICT aos_ptr = aos().dataPtr();
    -
             // preparing access to particle data: SoA of Reals
             auto& soa = pti.GetStructOfArrays();
             auto soa_real = soa.GetRealData();
             auto size = soa.size();
             amrex::ParticleReal* const AMREX_RESTRICT part_x = soa_real[0].dataPtr();
    -        amrex::ParticleReal* const AMREX_RESTRICT part_y = soa_real[1].dataPtr();
    -        amrex::ParticleReal* const AMREX_RESTRICT part_z = soa_real[2].dataPtr();
    -        amrex::ParticleReal* const AMREX_RESTRICT part_aaa = soa_real[3].dataPtr();
    +        amrex::ParticleReal* const AMREX_RESTRICT part_y = AMREX_SPACEDIM >= 2 ? soa_real[1].dataPtr() : nullptr;
    +        amrex::ParticleReal* const AMREX_RESTRICT part_z = AMREX_SPACEDIM >= 3 ? soa_real[2].dataPtr() : nullptr;
    +        amrex::ParticleReal* const AMREX_RESTRICT part_w = soa_real[AMREX_SPACEDIM].dataPtr();
             auto& soa_int = pti.GetStructOfArrays().GetIntData();
    -        amrex::ignore_unused(size, part_x, part_y, part_z, part_aaa, soa_int);
    +        amrex::ignore_unused(size, part_x, part_y, part_z, part_w, soa_int);
     
             // Iterating over old Particles
             // ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip)
    @@ -111,15 +98,20 @@ void addParticles ()
             ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip)
             {
                 ParticleType p(ptd, ip);
    -            for (int d = 0; d < AMREX_SPACEDIM; d++)
    -                p.pos(d) += 1;
    +            for (int d = 0; d < AMREX_SPACEDIM; d++) {
    +                p.pos(d) += 1_prt;
    +                AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ptd.rdata(d)[ip] == 13_prt,
    +                                                 "pos attribute expected to be 13");
    +            }
    +
    +            AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ptd.rdata(AMREX_SPACEDIM)[ip] == 1.2_prt,
    +                                             "w attribute expected to be 1.2");
             });
     
     
         }
     
         // create a host-side particle buffer
    -    //ParticleContainer<1,1> pc_og;
         auto tmp = pc.template make_alike();
         tmp.copyParticles(pc, true);
     
    @@ -148,11 +140,11 @@ void addParticles ()
             pc,
             [=] AMREX_GPU_DEVICE(const ConstPTDType& ptd, const int i) noexcept
             {
    -            const amrex::ParticleReal x = ptd.rdata(0)[i];
    -            const amrex::ParticleReal y = AMREX_SPACEDIM >= 2 ? ptd.rdata(1)[i] : 0.0;
    -            const amrex::ParticleReal z = AMREX_SPACEDIM >= 3 ? ptd.rdata(2)[i] : 0.0;
    +            amrex::ParticleReal const x = ptd.rdata(0)[i];
    +            amrex::ParticleReal const y = AMREX_SPACEDIM >= 2 ? ptd.rdata(1)[i] : 0.0;
    +            amrex::ParticleReal const z = AMREX_SPACEDIM >= 3 ? ptd.rdata(2)[i] : 0.0;
     
    -            amrex::ParticleReal const w = ptd.rdata(1)[i];
    +            amrex::ParticleReal const w = ptd.rdata(AMREX_SPACEDIM)[i];
     
                 return amrex::makeTuple(x, x*x, y, y*y, z, z*z, w);
             },
    @@ -188,7 +180,7 @@ int main(int argc, char* argv[])
      {
         amrex::Initialize(argc,argv);
         {
    -        addParticles< ParticleContainerPureSoA<3,4> > ();
    +        addParticles< ParticleContainerPureSoA<3, 4> > ();
         }
         amrex::Finalize();
      }
    
    From 602a0812dc0e2e3207791b29564bf0948bdb9ccf Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 16 Feb 2023 16:23:30 -0800
    Subject: [PATCH 089/111] fix a couple of redistribute bugs
    
    ---
     Src/Particle/AMReX_ParticleContainer.H  |  3 +-
     Src/Particle/AMReX_ParticleContainerI.H | 82 +++++++++++++------------
     2 files changed, 44 insertions(+), 41 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H
    index 9481680ab3b..ec9cb8d6521 100644
    --- a/Src/Particle/AMReX_ParticleContainer.H
    +++ b/Src/Particle/AMReX_ParticleContainer.H
    @@ -1192,7 +1192,8 @@ public:
         *
         * \param prt
         */
    -    bool PeriodicShift (ParticleType& prt) const;
    +    template 
    +    bool PeriodicShift (P& p) const;
     
         void SetLevelDirectoriesCreated (bool tf) { levelDirectoriesCreated = tf; }
     
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index 5c8d5e9ed3b..5240423360f 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -18,7 +18,11 @@ ParticleContainer_impl::SetParti
             if (h_redistribute_int_comp[i]) {++num_int_comm_comps;}
         }
     
    -    particle_size = sizeof(ParticleType);
    +    if constexpr(!ParticleType::is_soa_particle) {
    +        particle_size = sizeof(ParticleType);
    +    } else {
    +        particle_size = 0;
    +    }
         superparticle_size = particle_size +
             num_real_comm_comps*sizeof(ParticleReal) + num_int_comm_comps*sizeof(int);
     }
    @@ -179,7 +183,10 @@ ParticleContainer_impl
         AMREX_ASSERT(lev_max <= finestLevel());
     
         // Create a copy "dummy" particle to check for periodic outs.
    -    ParticleType p_prime = p;
    +    Particle<0, 0> p_prime;
    +    AMREX_D_TERM(p_prime.pos(0) = p.pos(0);,
    +                 p_prime.pos(0) = p.pos(0);,
    +                 p_prime.pos(0) = p.pos(0));
         if (PeriodicShift(p_prime)) {
             std::vector< std::pair > isects;
             for (int lev = lev_max; lev >= lev_min; lev--) {
    @@ -234,9 +241,10 @@ ParticleContainer_impl
     
     template  class Allocator>
    +template 
     bool
     ParticleContainer_impl
    -::PeriodicShift (ParticleType& p) const
    +::PeriodicShift (P& p) const
     {
         const auto& geom = Geom(0);
         const auto plo = geom.ProbLoArray();
    @@ -1525,14 +1533,14 @@ ParticleContainer_impl
               int thread_num = OpenMP::get_thread_num();
               int grid = grid_tile_ids[pmap_it].first;
               int tile = grid_tile_ids[pmap_it].second;
    -          auto& aos = ptile_ptrs[pmap_it]->GetArrayOfStructs();
               auto& soa = ptile_ptrs[pmap_it]->GetStructOfArrays();
    +          auto& aos = ptile_ptrs[pmap_it]->GetArrayOfStructs();
     
               // AMREX_ASSERT_WITH_MESSAGE((NumRealComps() == 0 && NumIntComps() == 0)
               //                           || aos.size() == soa.size(),
               //     "The AoS and SoA data on this tile are different sizes - "
               //     "perhaps particles have not been initialized correctly?");
    -          unsigned npart = aos.numParticles();
    +          unsigned npart = ptile_ptrs[pmap_it]->numParticles();
               ParticleLocData pld;
     
               if constexpr(!ParticleType::is_soa_particle){
    @@ -1560,7 +1568,7 @@ ParticleContainer_impl
                               continue;
                           }
     
    -                    locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1);
    +                     locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1);
     
                          particlePostLocate(p, pld, lev);
     
    @@ -1593,9 +1601,9 @@ ParticleContainer_impl
                                     }
     
                                 p.id() = -p.id(); // Invalidate the particle
    -                            }
    +                         }
                           }
    -                    else {
    +                      else {
                            auto& particles_to_send = tmp_remote[who][thread_num];
                            auto old_size = particles_to_send.size();
                            auto new_size = old_size + superparticle_size;
    @@ -1603,22 +1611,22 @@ ParticleContainer_impl
                            std::memcpy(&particles_to_send[old_size], &p, particle_size);
                            char* dst = &particles_to_send[old_size] + particle_size;
                            int array_comp_start = AMREX_SPACEDIM + NStructReal;
    -                          for (int comp = 0; comp < NumRealComps(); comp++) {
    -                               if (h_redistribute_real_comp[array_comp_start + comp]) {
    -                                 std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal));
    -                                 dst += sizeof(ParticleReal);
    -                                }
    -                            }
    -                        array_comp_start = 2 + NStructInt;
    -                        for (int comp = 0; comp < NumIntComps(); comp++) {
    -                              if (h_redistribute_int_comp[array_comp_start + comp]) {
    -                                  std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int));
    -                                  dst += sizeof(int);
    -                                }
    -                            }
    -
    -                        p.id() = -p.id(); // Invalidate the particle
    -                  }
    +                       for (int comp = 0; comp < NumRealComps(); comp++) {
    +                           if (h_redistribute_real_comp[array_comp_start + comp]) {
    +                               std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal));
    +                               dst += sizeof(ParticleReal);
    +                           }
    +                       }
    +                       array_comp_start = 2 + NStructInt;
    +                       for (int comp = 0; comp < NumIntComps(); comp++) {
    +                           if (h_redistribute_int_comp[array_comp_start + comp]) {
    +                               std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int));
    +                               dst += sizeof(int);
    +                           }
    +                       }
    +
    +                       p.id() = -p.id(); // Invalidate the particle
    +                    }
     
                         if (p.id() < 0)
                         {
    @@ -1662,9 +1670,6 @@ ParticleContainer_impl
                         }
     
                       if (p.id() < 0){
    -
    -                      ParticleType p2(ptd,last); // here
    -                      p = p2;
                           for (int comp = 0; comp < NumRealComps(); comp++)
                               soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
                           for (int comp = 0; comp < NumIntComps(); comp++)
    @@ -1673,14 +1678,12 @@ ParticleContainer_impl
                           --last;
                           continue;
                         }
    -                //ParticleType& p2(ptd,last)
    +
                       locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1);
     
                       particlePostLocate(p, pld, lev);
     
                       if (p.id() < 0){
    -                      ParticleType p2(ptd,last);
    -                      p = p2;
                           for (int comp = 0; comp < NumRealComps(); comp++)
                               soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
                           for (int comp = 0; comp < NumIntComps(); comp++)
    @@ -1714,30 +1717,26 @@ ParticleContainer_impl
                           auto old_size = particles_to_send.size();
                           auto new_size = old_size + superparticle_size;
                           particles_to_send.resize(new_size);
    -                      /*
    -                      std::memcpy(&particles_to_send[old_size], &p, particle_size);
    -                      */
    +
                           char* dst = &particles_to_send[old_size];
                           int array_comp_start = AMREX_SPACEDIM + NStructReal;
                           for (int comp = 0; comp < NumRealComps(); comp++) {
                               if (h_redistribute_real_comp[array_comp_start + comp]) {
                                   std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal));
                                   dst += sizeof(ParticleReal);
    -                            }
    +                          }
                             }
                           array_comp_start = 2 + NStructInt;
                           for (int comp = 0; comp < NumIntComps(); comp++) {
                               if (h_redistribute_int_comp[array_comp_start + comp]) {
                                   std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int));
                                   dst += sizeof(int);
    -                            }
    +                          }
                             }
                           p.id() = -p.id(); // Invalidate the particle
                         }
     
                       if (p.id() < 0){
    -                      ParticleType p2(ptd,last);
    -                      p = p2;
                           for (int comp = 0; comp < NumRealComps(); comp++)
                               soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
                           for (int comp = 0; comp < NumIntComps(); comp++)
    @@ -1750,7 +1749,6 @@ ParticleContainer_impl
                       ++pindex;
                     }
     
    -              aos().erase(aos().begin() + last + 1, aos().begin() + npart);
                   for (int comp = 0; comp < NumRealComps(); comp++) {
                       RealVector& rdata = soa.GetRealData(comp);
                       rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart);
    @@ -2013,9 +2011,12 @@ RedistributeMPI (std::map >& not_ours,
                     ParticleReal pos[AMREX_SPACEDIM];
                     std::memcpy(&pos[0], pbuf, AMREX_SPACEDIM*sizeof(ParticleReal));
     
    +                int idcpu[2];
    +                std::memcpy(&idcpu[0], pbuf + NumRealComps()*sizeof(ParticleReal), 2*sizeof(int));
    +
                     Particle<0, 0> p;
    -                p.id() = 0;
    -                p.cpu() = 0;
    +                p.id() = idcpu[0];
    +                p.cpu() = idcpu[1];
                     AMREX_D_TERM(p.pos(0) = pos[0];,
                                  p.pos(1) = pos[1];,
                                  p.pos(2) = pos[2]);
    @@ -2034,6 +2035,7 @@ RedistributeMPI (std::map >& not_ours,
                     rcv_levs[ipart] = pld.m_lev;
                     rcv_grid[ipart] = pld.m_grid;
                     rcv_tile[ipart] = pld.m_tile;
    +
                     ++ipart;
                 }
             }
    
    From aa244d9fdd62e1de02864c33b9ed1f633a1b9b3a Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Fri, 17 Feb 2023 14:13:29 -0800
    Subject: [PATCH 090/111] fix CPU redistribute with pure SOA particles
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H       |   6 +-
     .../Particles/RedistributeSOA/CMakeLists.txt  |  11 +
     Tests/Particles/RedistributeSOA/GNUmakefile   |  22 +
     Tests/Particles/RedistributeSOA/Make.package  |   4 +
     Tests/Particles/RedistributeSOA/inputs        |  16 +
     Tests/Particles/RedistributeSOA/inputs.rt     |  14 +
     .../Particles/RedistributeSOA/inputs.rt.cuda  |  12 +
     .../RedistributeSOA/inputs.rt.cuda.big        |  12 +
     .../RedistributeSOA/inputs.rt.cuda.mr         |  13 +
     .../inputs.rt.cuda.nonperiodic                |  12 +
     .../RedistributeSOA/inputs.rt.cuda.sort       |  14 +
     Tests/Particles/RedistributeSOA/main.cpp      | 482 ++++++++++++++++++
     12 files changed, 615 insertions(+), 3 deletions(-)
     create mode 100644 Tests/Particles/RedistributeSOA/CMakeLists.txt
     create mode 100644 Tests/Particles/RedistributeSOA/GNUmakefile
     create mode 100644 Tests/Particles/RedistributeSOA/Make.package
     create mode 100644 Tests/Particles/RedistributeSOA/inputs
     create mode 100644 Tests/Particles/RedistributeSOA/inputs.rt
     create mode 100644 Tests/Particles/RedistributeSOA/inputs.rt.cuda
     create mode 100644 Tests/Particles/RedistributeSOA/inputs.rt.cuda.big
     create mode 100644 Tests/Particles/RedistributeSOA/inputs.rt.cuda.mr
     create mode 100644 Tests/Particles/RedistributeSOA/inputs.rt.cuda.nonperiodic
     create mode 100644 Tests/Particles/RedistributeSOA/inputs.rt.cuda.sort
     create mode 100644 Tests/Particles/RedistributeSOA/main.cpp
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index 5240423360f..fbea85eb7c4 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -185,8 +185,8 @@ ParticleContainer_impl
         // Create a copy "dummy" particle to check for periodic outs.
         Particle<0, 0> p_prime;
         AMREX_D_TERM(p_prime.pos(0) = p.pos(0);,
    -                 p_prime.pos(0) = p.pos(0);,
    -                 p_prime.pos(0) = p.pos(0));
    +                 p_prime.pos(1) = p.pos(1);,
    +                 p_prime.pos(2) = p.pos(2));
         if (PeriodicShift(p_prime)) {
             std::vector< std::pair > isects;
             for (int lev = lev_max; lev >= lev_min; lev--) {
    @@ -1683,7 +1683,7 @@ ParticleContainer_impl
     
                       particlePostLocate(p, pld, lev);
     
    -                  if (p.id() < 0){
    +                  if (p.id() < 0) {
                           for (int comp = 0; comp < NumRealComps(); comp++)
                               soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last];
                           for (int comp = 0; comp < NumIntComps(); comp++)
    diff --git a/Tests/Particles/RedistributeSOA/CMakeLists.txt b/Tests/Particles/RedistributeSOA/CMakeLists.txt
    new file mode 100644
    index 00000000000..bca883b4de5
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/CMakeLists.txt
    @@ -0,0 +1,11 @@
    +set(_sources     main.cpp)
    +if (AMReX_CUDA)
    +  set(_input_files inputs.rt.cuda  )
    +else ()
    +  set(_input_files inputs.rt  )
    +endif ()
    +
    +setup_test(_sources _input_files NTASKS 2)
    +
    +unset(_sources)
    +unset(_input_files)
    diff --git a/Tests/Particles/RedistributeSOA/GNUmakefile b/Tests/Particles/RedistributeSOA/GNUmakefile
    new file mode 100644
    index 00000000000..c9f05029f0c
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/GNUmakefile
    @@ -0,0 +1,22 @@
    +AMREX_HOME = ../../../
    +
    +DEBUG	= FALSE
    +
    +DIM	= 3
    +
    +COMP    = gcc
    +
    +USE_MPI   = TRUE
    +USE_OMP   = FALSE
    +USE_CUDA  = FALSE
    +
    +TINY_PROFILE = TRUE
    +USE_PARTICLES = TRUE
    +
    +include $(AMREX_HOME)/Tools/GNUMake/Make.defs
    +
    +include ./Make.package
    +include $(AMREX_HOME)/Src/Base/Make.package
    +include $(AMREX_HOME)/Src/Particle/Make.package
    +
    +include $(AMREX_HOME)/Tools/GNUMake/Make.rules
    diff --git a/Tests/Particles/RedistributeSOA/Make.package b/Tests/Particles/RedistributeSOA/Make.package
    new file mode 100644
    index 00000000000..4497b0e25b9
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/Make.package
    @@ -0,0 +1,4 @@
    +CEXE_sources += main.cpp
    +
    +
    +
    diff --git a/Tests/Particles/RedistributeSOA/inputs b/Tests/Particles/RedistributeSOA/inputs
    new file mode 100644
    index 00000000000..73d71ccf1b8
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/inputs
    @@ -0,0 +1,16 @@
    +redistribute.size = (256, 256, 384)
    +redistribute.max_grid_size = 128
    +redistribute.is_periodic = 1
    +redistribute.num_ppc = 1
    +redistribute.move_dir = (1, 1, 1)
    +redistribute.do_random = 1
    +redistribute.nsteps = 500
    +redistribute.nlevs = 1
    +redistribute.do_regrid = 1
    +
    +redistribute.num_runtime_real = 0
    +redistribute.num_runtime_int = 0
    +
    +redistribute.sort = 0
    +
    +amrex.use_gpu_aware_mpi = 0
    diff --git a/Tests/Particles/RedistributeSOA/inputs.rt b/Tests/Particles/RedistributeSOA/inputs.rt
    new file mode 100644
    index 00000000000..2cc83a0dda6
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/inputs.rt
    @@ -0,0 +1,14 @@
    +redistribute.size = (32, 64, 64)
    +redistribute.max_grid_size = 32
    +redistribute.is_periodic = 1
    +redistribute.num_ppc = 1
    +redistribute.move_dir = (1, 1, 1)
    +redistribute.do_random = 1
    +redistribute.nsteps = 100
    +redistribute.nlevs = 1
    +redistribute.do_regrid = 1
    +
    +redistribute.num_runtime_real = 0
    +redistribute.num_runtime_int = 0
    +
    +particles.do_tiling=1
    diff --git a/Tests/Particles/RedistributeSOA/inputs.rt.cuda b/Tests/Particles/RedistributeSOA/inputs.rt.cuda
    new file mode 100644
    index 00000000000..9253741d78c
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/inputs.rt.cuda
    @@ -0,0 +1,12 @@
    +redistribute.size = (32, 64, 64)
    +redistribute.max_grid_size = 32
    +redistribute.is_periodic = 1
    +redistribute.num_ppc = 1
    +redistribute.move_dir = (1, 1, 1)
    +redistribute.do_random = 1
    +redistribute.nsteps = 100
    +redistribute.nlevs = 1
    +redistribute.do_regrid = 1
    +
    +redistribute.num_runtime_real = 2
    +redistribute.num_runtime_int = 3
    diff --git a/Tests/Particles/RedistributeSOA/inputs.rt.cuda.big b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.big
    new file mode 100644
    index 00000000000..d9066ba21df
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.big
    @@ -0,0 +1,12 @@
    +redistribute.size = (64, 64, 128)
    +redistribute.max_grid_size = 64
    +redistribute.is_periodic = 1
    +redistribute.num_ppc = 4
    +redistribute.move_dir = (1, 1, 1)
    +redistribute.do_random = 1
    +redistribute.nsteps = 0
    +redistribute.nlevs = 1
    +redistribute.do_regrid = 1
    +
    +redistribute.num_runtime_real = 2
    +redistribute.num_runtime_int = 3
    diff --git a/Tests/Particles/RedistributeSOA/inputs.rt.cuda.mr b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.mr
    new file mode 100644
    index 00000000000..e74b8d243f7
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.mr
    @@ -0,0 +1,13 @@
    +redistribute.size = (32, 64, 64)
    +redistribute.max_grid_size = 32
    +redistribute.is_periodic = 1
    +redistribute.num_ppc = 1
    +redistribute.move_dir = (1, 1, 1)
    +redistribute.do_random = 1
    +redistribute.nsteps = 100
    +redistribute.nlevs = 3
    +redistribute.test_level_lost = 3
    +redistribute.do_regrid = 1
    +
    +redistribute.num_runtime_real = 1
    +redistribute.num_runtime_int = 0
    diff --git a/Tests/Particles/RedistributeSOA/inputs.rt.cuda.nonperiodic b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.nonperiodic
    new file mode 100644
    index 00000000000..2fc4168c8aa
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.nonperiodic
    @@ -0,0 +1,12 @@
    +redistribute.size = (32, 64, 64)
    +redistribute.max_grid_size = 32
    +redistribute.is_periodic = 0
    +redistribute.num_ppc = 1
    +redistribute.move_dir = (1, 1, 1)
    +redistribute.do_random = 0
    +redistribute.nsteps = 100
    +redistribute.nlevs = 1
    +redistribute.do_regrid = 1
    +
    +redistribute.num_runtime_real = 0
    +redistribute.num_runtime_int = 1
    diff --git a/Tests/Particles/RedistributeSOA/inputs.rt.cuda.sort b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.sort
    new file mode 100644
    index 00000000000..3cfa52d24a2
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.sort
    @@ -0,0 +1,14 @@
    +redistribute.size = (32, 64, 64)
    +redistribute.max_grid_size = 32
    +redistribute.is_periodic = 1
    +redistribute.num_ppc = 1
    +redistribute.move_dir = (1, 1, 1)
    +redistribute.do_random = 1
    +redistribute.nsteps = 100
    +redistribute.nlevs = 1
    +redistribute.do_regrid = 1
    +
    +redistribute.sort = 1
    +
    +redistribute.num_runtime_real = 2
    +redistribute.num_runtime_int = 3
    diff --git a/Tests/Particles/RedistributeSOA/main.cpp b/Tests/Particles/RedistributeSOA/main.cpp
    new file mode 100644
    index 00000000000..ff31d81939d
    --- /dev/null
    +++ b/Tests/Particles/RedistributeSOA/main.cpp
    @@ -0,0 +1,482 @@
    +#include 
    +#include 
    +#include 
    +#include 
    +
    +using namespace amrex;
    +
    +static constexpr int NR = 7;
    +static constexpr int NI = 4;
    +
    +int num_runtime_real = 0;
    +int num_runtime_int = 0;
    +
    +bool remove_negative = true;
    +
    +void get_position_unit_cell (Real* r, const IntVect& nppc, int i_part)
    +{
    +    int nx = nppc[0];
    +#if AMREX_SPACEDIM > 1
    +    int ny = nppc[1];
    +#else
    +    int ny = 1;
    +#endif
    +#if AMREX_SPACEDIM > 2
    +    int nz = nppc[2];
    +#else
    +    int nz = 1;
    +#endif
    +
    +    int ix_part = i_part/(ny * nz);
    +    int iy_part = (i_part % (ny * nz)) % ny;
    +    int iz_part = (i_part % (ny * nz)) / ny;
    +
    +    r[0] = (0.5+ix_part)/nx;
    +    r[1] = (0.5+iy_part)/ny;
    +    r[2] = (0.5+iz_part)/nz;
    +}
    +
    +class TestParticleContainer
    +    : public amrex::ParticleContainerPureSoA
    +{
    +
    +public:
    +
    +    TestParticleContainer (const Vector            & a_geom,
    +                           const Vector & a_dmap,
    +                           const Vector            & a_ba,
    +                           const Vector             & a_rr)
    +        : amrex::ParticleContainerPureSoA(a_geom, a_dmap, a_ba, a_rr)
    +    {
    +        for (int i = 0; i < num_runtime_real; ++i)
    +        {
    +            AddRealComp(true);
    +        }
    +        for (int i = 0; i < num_runtime_int; ++i)
    +        {
    +            AddIntComp(true);
    +        }
    +    }
    +
    +    void RedistributeLocal (bool remove_neg=true)
    +    {
    +        const int lev_min = 0;
    +        const int lev_max = finestLevel();
    +        const int nGrow = 0;
    +        const int local = 1;
    +        Redistribute(lev_min, lev_max, nGrow, local, remove_neg);
    +    }
    +
    +    void RedistributeGlobal (bool remove_neg=true)
    +    {
    +        const int lev_min = 0;
    +        const int lev_max = finestLevel();
    +        const int nGrow = 0;
    +        const int local = 0;
    +        Redistribute(lev_min, lev_max, nGrow, local, remove_neg);
    +    }
    +
    +    void InitParticles (const amrex::IntVect& a_num_particles_per_cell)
    +    {
    +        BL_PROFILE("InitParticles");
    +
    +        const int lev = 0;  // only add particles on level 0
    +        const Real* dx = Geom(lev).CellSize();
    +        const Real* plo = Geom(lev).ProbLo();
    +
    +        const int num_ppc = AMREX_D_TERM( a_num_particles_per_cell[0],
    +                                         *a_num_particles_per_cell[1],
    +                                         *a_num_particles_per_cell[2]);
    +
    +        for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
    +        {
    +            const Box& tile_box  = mfi.tilebox();
    +
    +            std::array, NR> host_real;
    +            std::array, NI> host_int;
    +
    +            std::vector > host_runtime_real(NumRuntimeRealComps());
    +            std::vector > host_runtime_int(NumRuntimeIntComps());
    +
    +            for (IntVect iv = tile_box.smallEnd(); iv <= tile_box.bigEnd(); tile_box.next(iv))
    +            {
    +                for (int i_part=0; i_part (plo[0] + (iv[0] + r[0])*dx[0]));
    +#if AMREX_SPACEDIM > 1
    +                    host_real[1].push_back(static_cast (plo[1] + (iv[1] + r[1])*dx[1]));
    +#endif
    +#if AMREX_SPACEDIM > 2
    +                    host_real[2].push_back(static_cast (plo[2] + (iv[2] + r[2])*dx[2]));
    +#endif
    +
    +                    for (int i = AMREX_SPACEDIM; i < NR; ++i)
    +                        host_real[i].push_back(id);
    +                    for (int i = 2; i < NI; ++i)
    +                        host_int[i].push_back(id);
    +                    for (int i = 0; i < NumRuntimeRealComps(); ++i)
    +                        host_runtime_real[i].push_back(id);
    +                    for (int i = 0; i < NumRuntimeIntComps(); ++i)
    +                        host_runtime_int[i].push_back(id);
    +                }
    +            }
    +
    +            auto& particle_tile = DefineAndReturnParticleTile(lev, mfi.index(), mfi.LocalTileIndex());
    +            auto old_size = particle_tile.GetArrayOfStructs().size();
    +            auto new_size = old_size + host_real[0].size();
    +            particle_tile.resize(new_size);
    +
    +            auto& soa = particle_tile.GetStructOfArrays();
    +            for (int i = 0; i < NR; ++i)
    +            {
    +                Gpu::copyAsync(Gpu::hostToDevice,
    +                               host_real[i].begin(),
    +                               host_real[i].end(),
    +                               soa.GetRealData(i).begin() + old_size);
    +            }
    +
    +            for (int i = 0; i < NI; ++i)
    +            {
    +                Gpu::copyAsync(Gpu::hostToDevice,
    +                               host_int[i].begin(),
    +                               host_int[i].end(),
    +                               soa.GetIntData(i).begin() + old_size);
    +            }
    +            for (int i = 0; i < NumRuntimeRealComps(); ++i)
    +            {
    +                Gpu::copyAsync(Gpu::hostToDevice,
    +                               host_runtime_real[i].begin(),
    +                               host_runtime_real[i].end(),
    +                               soa.GetRealData(NR+i).begin() + old_size);
    +            }
    +
    +            for (int i = 0; i < NumRuntimeIntComps(); ++i)
    +            {
    +                Gpu::copyAsync(Gpu::hostToDevice,
    +                               host_runtime_int[i].begin(),
    +                               host_runtime_int[i].end(),
    +                               soa.GetIntData(NI+i).begin() + old_size);
    +            }
    +
    +            Gpu::streamSynchronize();
    +        }
    +
    +        RedistributeLocal();
    +    }
    +
    +    void moveParticles (const IntVect& move_dir, int do_random)
    +    {
    +        BL_PROFILE("TestParticleContainer::moveParticles");
    +
    +        for (int lev = 0; lev <= finestLevel(); ++lev)
    +        {
    +            const auto dx = Geom(lev).CellSizeArray();
    +            auto& plev  = GetParticles(lev);
    +
    +            for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
    +            {
    +                int gid = mfi.index();
    +                int tid = mfi.LocalTileIndex();
    +                auto& ptile = plev[std::make_pair(gid, tid)];
    +                auto ptd = ptile.getParticleTileData();
    +                const size_t np = ptile.numParticles();
    +
    +                if (do_random == 0)
    +                {
    +                    amrex::ParallelFor( np, [=] AMREX_GPU_DEVICE (int i) noexcept
    +                    {
    +                        ParticleType p(ptd, i);
    +                        p.pos(0) += static_cast (move_dir[0]*dx[0]);
    +#if AMREX_SPACEDIM > 1
    +                        p.pos(1) += static_cast (move_dir[1]*dx[1]);
    +#endif
    +#if AMREX_SPACEDIM > 2
    +                        p.pos(2) += static_cast (move_dir[2]*dx[2]);
    +#endif
    +                    });
    +                }
    +                else
    +                {
    +                    amrex::ParallelForRNG( np,
    +                    [=] AMREX_GPU_DEVICE (int i, RandomEngine const& engine) noexcept
    +                    {
    +                        ParticleType p(ptd, i);
    +                        p.pos(0) += static_cast ((2*amrex::Random(engine)-1)*move_dir[0]*dx[0]);
    +#if AMREX_SPACEDIM > 1
    +                        p.pos(1) += static_cast ((2*amrex::Random(engine)-1)*move_dir[1]*dx[1]);
    +#endif
    +#if AMREX_SPACEDIM > 2
    +                        p.pos(2) += static_cast ((2*amrex::Random(engine)-1)*move_dir[2]*dx[2]);
    +#endif
    +                    });
    +                }
    +            }
    +        }
    +    }
    +
    +    void negateEven ()
    +    {
    +        BL_PROFILE("TestParticleContainer::invalidateEven");
    +
    +        for (int lev = 0; lev <= finestLevel(); ++lev)
    +        {
    +            auto& plev  = GetParticles(lev);
    +            for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
    +            {
    +                int gid = mfi.index();
    +                int tid = mfi.LocalTileIndex();
    +                auto& ptile = plev[std::make_pair(gid, tid)];
    +                auto ptd = ptile.getParticleTileData();
    +                const size_t np = ptile.numParticles();
    +                amrex::ParallelFor( np, [=] AMREX_GPU_DEVICE (int i) noexcept
    +                {
    +                    ParticleType p(ptd, i);
    +                    if (p.id() % 2 == 0) {
    +                        p.id() = -p.id();
    +                    }
    +                });
    +            }
    +        }
    +    }
    +
    +    void checkAnswer () const
    +    {
    +        BL_PROFILE("TestParticleContainer::checkAnswer");
    +
    +        AMREX_ALWAYS_ASSERT(OK());
    +
    +        int num_rr = NumRuntimeRealComps();
    +        int num_ii = NumRuntimeIntComps();
    +
    +        for (int lev = 0; lev <= finestLevel(); ++lev)
    +        {
    +            auto& plev  = GetParticles(lev);
    +            for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
    +            {
    +                int gid = mfi.index();
    +                int tid = mfi.LocalTileIndex();
    +                auto& ptile = plev.at(std::make_pair(gid, tid));
    +                const auto ptd = ptile.getConstParticleTileData();
    +                const size_t np = ptile.numParticles();
    +
    +                AMREX_FOR_1D ( np, i,
    +                {
    +                    ConstParticleType p(ptd, i);
    +                    for (int j = AMREX_SPACEDIM; j < NR; ++j)
    +                    {
    +                        AMREX_ALWAYS_ASSERT(ptd.m_rdata[j][i] == p.id());
    +                    }
    +                    for (int j = 2; j < NI; ++j)
    +                    {
    +                        AMREX_ALWAYS_ASSERT(ptd.m_idata[j][i] == p.id());
    +                    }
    +                    for (int j = 0; j < num_rr; ++j)
    +                    {
    +                        AMREX_ALWAYS_ASSERT(ptd.m_runtime_rdata[j][i] == p.id());
    +                    }
    +                    for (int j = 0; j < num_ii; ++j)
    +                    {
    +                        AMREX_ALWAYS_ASSERT(ptd.m_runtime_idata[j][i] == p.id());
    +                    }
    +                });
    +            }
    +        }
    +    }
    +};
    +
    +struct TestParams
    +{
    +    IntVect size;
    +    int max_grid_size;
    +    int num_ppc;
    +    int is_periodic;
    +    IntVect move_dir;
    +    int do_random;
    +    int nsteps;
    +    int nlevs;
    +    int do_regrid;
    +    int sort;
    +    int test_level_lost = 0;
    +};
    +
    +void testRedistribute();
    +
    +int main (int argc, char* argv[])
    +{
    +    amrex::Initialize(argc,argv);
    +
    +    amrex::Print() << "Running redistribute test \n";
    +    testRedistribute();
    +
    +    amrex::Finalize();
    +}
    +
    +void get_test_params(TestParams& params, const std::string& prefix)
    +{
    +    ParmParse pp(prefix);
    +    pp.get("size", params.size);
    +    pp.get("max_grid_size", params.max_grid_size);
    +    pp.get("num_ppc", params.num_ppc);
    +    pp.get("is_periodic", params.is_periodic);
    +    pp.get("move_dir", params.move_dir);
    +    pp.get("do_random", params.do_random);
    +    pp.get("nsteps", params.nsteps);
    +    pp.get("nlevs", params.nlevs);
    +    pp.get("do_regrid", params.do_regrid);
    +    pp.query("test_level_lost", params.test_level_lost);
    +    pp.query("num_runtime_real", num_runtime_real);
    +    pp.query("num_runtime_int", num_runtime_int);
    +    pp.query("remove_negative", remove_negative);
    +
    +    params.sort = 0;
    +    pp.query("sort", params.sort);
    +}
    +
    +void testRedistribute ()
    +{
    +    BL_PROFILE("testRedistribute");
    +    TestParams params;
    +    get_test_params(params, "redistribute");
    +
    +    int is_per[BL_SPACEDIM];
    +    for (int i = 0; i < BL_SPACEDIM; i++)
    +        is_per[i] = params.is_periodic;
    +
    +    Vector rr(params.nlevs-1);
    +    for (int lev = 1; lev < params.nlevs; lev++)
    +        rr[lev-1] = IntVect(AMREX_D_DECL(2,2,2));
    +
    +    RealBox real_box;
    +    for (int n = 0; n < BL_SPACEDIM; n++)
    +    {
    +        real_box.setLo(n, 0.0);
    +        real_box.setHi(n, params.size[n]);
    +    }
    +
    +    IntVect domain_lo(AMREX_D_DECL(0, 0, 0));
    +    IntVect domain_hi(AMREX_D_DECL(params.size[0]-1,params.size[1]-1,params.size[2]-1));
    +    const Box base_domain(domain_lo, domain_hi);
    +
    +    Vector geom(params.nlevs);
    +    geom[0].define(base_domain, &real_box, CoordSys::cartesian, is_per);
    +    for (int lev = 1; lev < params.nlevs; lev++) {
    +        geom[lev].define(amrex::refine(geom[lev-1].Domain(), rr[lev-1]),
    +                         &real_box, CoordSys::cartesian, is_per);
    +    }
    +
    +    Vector ba(params.nlevs);
    +    Vector dm(params.nlevs);
    +    IntVect lo = IntVect(AMREX_D_DECL(0, 0, 0));
    +    IntVect size = params.size;
    +    for (int lev = 0; lev < params.nlevs; ++lev)
    +    {
    +        ba[lev].define(Box(lo, lo+params.size-1));
    +        ba[lev].maxSize(params.max_grid_size);
    +        dm[lev].define(ba[lev]);
    +        lo += size/2;
    +        size *= 2;
    +    }
    +
    +    TestParticleContainer pc(geom, dm, ba, rr);
    +
    +    int npc = params.num_ppc;
    +    IntVect nppc = IntVect(AMREX_D_DECL(npc, npc, npc));
    +
    +    amrex::Print() << "About to initialize particles \n";
    +
    +    pc.InitParticles(nppc);
    +
    +    pc.checkAnswer();
    +
    +    auto np_old = pc.TotalNumberOfParticles();
    +
    +    if (params.sort) pc.SortParticlesByCell();
    +
    +    for (int i = 0; i < params.nsteps; ++i)
    +    {
    +        amrex::Print() << "step " << i << "\n";
    +        pc.moveParticles(params.move_dir, params.do_random);
    +        if (!remove_negative) {
    +            auto old = pc.TotalNumberOfParticles();
    +            pc.negateEven();
    +            pc.RedistributeLocal(false);
    +            AMREX_ALWAYS_ASSERT(old == pc.TotalNumberOfParticles(false));
    +            pc.negateEven();
    +        }
    +        pc.RedistributeLocal();
    +        if (params.sort) pc.SortParticlesByCell();
    +        pc.checkAnswer();
    +    }
    +
    +    if (params.do_regrid)
    +    {
    +        const int NProcs = ParallelDescriptor::NProcs();
    +        {
    +            for (int lev = 0; lev < params.nlevs; ++lev)
    +            {
    +                DistributionMapping new_dm;
    +                Vector pmap;
    +                for (int i = 0; i < ba[lev].size(); ++i) pmap.push_back(i % NProcs);
    +                new_dm.define(pmap);
    +                pc.SetParticleDistributionMap(lev, new_dm);
    +            }
    +            if (!remove_negative) {
    +                auto old = pc.TotalNumberOfParticles();
    +                pc.negateEven();
    +                pc.RedistributeGlobal(false);
    +                AMREX_ALWAYS_ASSERT(old == pc.TotalNumberOfParticles(false));
    +                pc.negateEven();
    +            }
    +            pc.RedistributeGlobal();
    +            pc.checkAnswer();
    +        }
    +
    +        {
    +            for (int lev = 0; lev < params.nlevs; ++lev)
    +            {
    +                DistributionMapping new_dm;
    +                Vector pmap;
    +                for (int i = 0; i < ba[lev].size(); ++i) pmap.push_back((i+1) % NProcs);
    +                new_dm.define(pmap);
    +                pc.SetParticleDistributionMap(lev, new_dm);
    +            }
    +            if (!remove_negative) {
    +                auto old = pc.TotalNumberOfParticles();
    +                pc.negateEven();
    +                pc.RedistributeGlobal(false);
    +                AMREX_ALWAYS_ASSERT(old == pc.TotalNumberOfParticles(false));
    +                pc.negateEven();
    +            }
    +            pc.RedistributeGlobal();
    +            pc.checkAnswer();
    +        }
    +
    +        if (params.test_level_lost) {
    +            AMREX_ALWAYS_ASSERT(params.nlevs > 2);
    +            auto np_before_level_lost = pc.TotalNumberOfParticles();
    +            Vector new_ba = ba; new_ba.resize(ba.size()-1);
    +            Vector new_dm = dm; new_dm.resize(dm.size()-1);
    +            Vector new_geom = geom; new_geom.resize(geom.size()-1);
    +            Vector new_rr = rr; new_rr.resize(rr.size()-1);
    +            pc.ParticleContainerBase::Define(new_geom, new_dm, new_ba, new_rr);
    +            pc.Redistribute();
    +            amrex::Print() << np_before_level_lost << "\n";
    +            amrex::Print() << pc.TotalNumberOfParticles() << "\n";
    +            AMREX_ALWAYS_ASSERT(np_before_level_lost == pc.TotalNumberOfParticles());
    +        }
    +    }
    +
    +    if (geom[0].isAllPeriodic()) {
    +        amrex::Print() << np_old << " " << pc.TotalNumberOfParticles() << "\n";
    +        AMREX_ALWAYS_ASSERT(np_old == pc.TotalNumberOfParticles());
    +    }
    +
    +    // the way this test is set up, if we make it here we pass
    +    amrex::Print() << "pass \n";
    +}
    
    From 610c1303430ca7d3059fe1847274845cd6589c97 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 6 Apr 2023 18:31:02 -0700
    Subject: [PATCH 091/111] fix up RedistributeGPU for SOA particles
    
    ---
     Src/Particle/AMReX_MakeParticle.H       |  6 +-
     Src/Particle/AMReX_Particle.H           |  1 +
     Src/Particle/AMReX_ParticleContainerI.H | 90 ++++++++-----------------
     Src/Particle/AMReX_ParticleTile.H       | 70 ++++++++++++++++++-
     Src/Particle/AMReX_ParticleUtil.H       | 29 ++++----
     5 files changed, 117 insertions(+), 79 deletions(-)
    
    diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H
    index d49f256cf1e..31a3e8d777f 100644
    --- a/Src/Particle/AMReX_MakeParticle.H
    +++ b/Src/Particle/AMReX_MakeParticle.H
    @@ -16,8 +16,8 @@ struct make_particle
     {
         template 
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    auto
    -    operator()(PTD ptd, int i)
    +    auto&
    +    operator() (PTD ptd, int i)
         {
             // legacy Particle (AoS)
             return ptd.m_aos[i];
    @@ -30,7 +30,7 @@ struct make_particle
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         auto
    -    operator()(PTD ptd, int index)
    +    operator() (PTD ptd, int index)
         {
             // SoAParticle
             return T_ParticleType(ptd, index);
    diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H
    index 10a3a8dca81..73e2ce6de8a 100644
    --- a/Src/Particle/AMReX_Particle.H
    +++ b/Src/Particle/AMReX_Particle.H
    @@ -192,6 +192,7 @@ struct SoAParticleBase
     {
         static constexpr int NReal=0;
         static constexpr int NInt=0;
    +    static constexpr bool is_soa_particle = true;
     };
     
     /** \brief The struct used to store particles.
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index fbea85eb7c4..9b5bcc5ba6f 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -402,43 +402,22 @@ ParticleContainer_impl::NumberOf
             int gid = pti.index();
             if (only_valid)
             {
    -            if constexpr(!ParticleType::is_soa_particle){
    -                const auto& ptile = ParticlesAt(lev, pti);
    -                const auto& aos = ptile.GetArrayOfStructs();
    -                const auto pstruct = aos().dataPtr();
    -                const int np = ptile.numParticles();
    +            const auto& ptile = ParticlesAt(lev, pti);
    +            const int np = ptile.numParticles();
    +            auto const ptd = ptile.getConstParticleTileData();
     
    -                ReduceOps reduce_op;
    -                ReduceData reduce_data(reduce_op);
    -                using ReduceTuple = typename decltype(reduce_data)::Type;
    +            ReduceOps reduce_op;
    +            ReduceData reduce_data(reduce_op);
    +            using ReduceTuple = typename decltype(reduce_data)::Type;
     
    -                reduce_op.eval(np, reduce_data,
    +            reduce_op.eval(np, reduce_data,
                                [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
    -                            {
    -                               return (pstruct[i].id() > 0) ? 1 : 0;
    -                            });
    -
    -                int np_valid = amrex::get<0>(reduce_data.value(reduce_op));
    -                np_per_grid_local[gid] += np_valid;
    -
    -            } else{
    -                const auto& ptile = ParticlesAt(lev, pti);
    -                const int np = ptile.numParticles();
    -                auto const tile_data = ptile.getParticleTileData();
    -
    -                ReduceOps reduce_op;
    -                ReduceData reduce_data(reduce_op);
    -                using ReduceTuple = typename decltype(reduce_data)::Type;
    -
    -                reduce_op.eval(np, reduce_data,
    -                            [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
    -                            {
    -                               return (tile_data.idata(0)[i] > 0) ? 1 : 0;
    -                            });
    -
    -                int np_valid = amrex::get<0>(reduce_data.value(reduce_op));
    -                np_per_grid_local[gid] += np_valid;
    -            }
    +                           {
    +                               return (ptd.id(i) > 0) ? 1 : 0;
    +                           });
    +
    +            int np_valid = amrex::get<0>(reduce_data.value(reduce_op));
    +            np_per_grid_local[gid] += np_valid;
             } else
             {
                 np_per_grid_local[gid] += pti.numParticles();
    @@ -477,32 +456,15 @@ Long ParticleContainer_impl::Num
             ReduceData reduce_data(reduce_op);
             using ReduceTuple = typename decltype(reduce_data)::Type;
     
    -        if constexpr(!ParticleType::is_soa_particle){
    -
    -            for (const auto& kv : GetParticles(level)) {
    -                const auto& ptile = kv.second;
    -                auto const& ptaos = ptile.GetArrayOfStructs();
    -                ParticleType const* pp = ptaos().data();
    -
    -                reduce_op.eval(ptaos.numParticles(), reduce_data,
    -                            [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
    -                            {
    -                               return (pp[i].id() > 0) ? 1 : 0;
    -                            });
    -            }
    -
    -        } else{
    -            for (const auto& kv : GetParticles(level)) {
    +        for (const auto& kv : GetParticles(level)) {
                 const auto& ptile = kv.second;
    -            auto const tile_data = ptile.getConstParticleTileData();
    -            auto const& ptaos = ptile.GetStructOfArrays();
    +            auto const ptd = ptile.getConstParticleTileData();
     
    -            reduce_op.eval(ptaos.numParticles(), reduce_data,
    +            reduce_op.eval(ptile.numParticles(), reduce_data,
                                [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
                                {
    -                               return (tile_data.idata(0)[i] > 0) ? 1 : 0; //get the id in SoA particle way
    +                               return (ptd.id(i) > 0) ? 1 : 0;
                                });
    -            }
             }
     
             nparticles = static_cast(amrex::get<0>(reduce_data.value(reduce_op)));
    @@ -1175,8 +1137,8 @@ ParticleContainer_impl::SortPart
                 auto& ptile           = ParticlesAt(lev, mfi);
                 auto& aos             = ptile.GetArrayOfStructs();
                 auto  pstruct_ptr     = aos().dataPtr();
    -            const size_t np       = aos.numParticles();
    -            const size_t np_total = np + aos.numNeighborParticles();
    +            const size_t np       = ptile.numParticles();
    +            const size_t np_total = np + ptile.numNeighborParticles();
     
                 const Box& box = mfi.validbox();
     
    @@ -1191,10 +1153,11 @@ ParticleContainer_impl::SortPart
                         auto src = ptile.getParticleTileData();
                         ParticleType* dst = tmp_particles.data();
     
    -                    AMREX_HOST_DEVICE_FOR_1D( np_total, i,
    -                    {
    -                        dst[i] = i < np ? src.m_aos[inds[i]] : src.m_aos[i];
    -                    });
    +                    amrex::ParallelFor(np_total,
    +                        [=] AMREX_GPU_DEVICE (int i) noexcept
    +                        {
    +                            dst[i] = i < np ? src.m_aos[inds[i]] : src.m_aos[i];
    +                        });
     
                         Gpu::streamSynchronize();
                         ptile.GetArrayOfStructs()().swap(tmp_particles);
    @@ -1330,11 +1293,12 @@ ParticleContainer_impl
                 auto p_levs = op.m_levels[lev][gid].dataPtr();
                 auto p_src_indices = op.m_src_indices[lev][gid].dataPtr();
                 auto p_periodic_shift = op.m_periodic_shift[lev][gid].dataPtr();
    -            auto p_ptr = &(aos[0]);
    +            auto ptd = src_tile.getParticleTileData();
     
                 AMREX_FOR_1D ( num_move, i,
                 {
    -                const auto& p = p_ptr[i + num_stay];
    +                const auto p = make_particle{}(ptd,i + num_stay);
    +
                     if (p.id() < 0)
                     {
                         p_boxes[i] = -1;
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index 42dda2dc4f0..23d66f0c246 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -26,7 +26,6 @@ struct ConstParticleTileData;
     template 
     struct ParticleTileData
     {
    -
         static constexpr int NAR = NArrayReal;
         static constexpr int NAI = NArrayInt;
     
    @@ -50,6 +49,65 @@ struct ParticleTileData
         ParticleReal* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_rdata;
         int* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_idata;
     
    +    // AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    // ParticleReal pos (const int dir, const int index) const &
    +    // {
    +    //     if constexpr(!ParticleType::is_soa_particle) {
    +    //         return this->m_aos[index].pos(dir);
    +    //     } else {
    +    //         return this->m_rdata[dir][index];
    +    //     }
    +    // }
    +
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    ParticleReal& pos (const int dir, const int index) const &
    +    {
    +        if constexpr(!ParticleType::is_soa_particle) {
    +            return this->m_aos[index].pos(dir);
    +        }
    +        return this->m_rdata[dir][index];
    +    }
    +
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto id (const int index) const &
    +    {
    +        if constexpr(!ParticleType::is_soa_particle) {
    +            return this->m_aos[index].id();
    +        } else {
    +            return this->m_idata[0][index];
    +        }
    +    }
    +
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto& id (const int index) &
    +    {
    +        if constexpr(!ParticleType::is_soa_particle) {
    +                return this->m_aos[index].id();
    +        } else {
    +            return this->m_idata[0][index];
    +        }
    +    }
    +
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto cpu (const int index) const &
    +    {
    +        if constexpr(!ParticleType::is_soa_particle) {
    +            return this->m_aos[index].cpu();
    +        } else {
    +            return this->m_idata[1][index];
    +        }
    +    }
    +
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto& cpu (const int index) &
    +    {
    +        if constexpr(!ParticleType::is_soa_particle) {
    +            return this->m_aos[index].cpu();
    +        } else {
    +            return this->m_idata[1][index];
    +        }
    +    }
    +
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         auto* rdata (const int attribute_index)
         {
    @@ -423,6 +481,16 @@ struct ConstParticleTileData
         GpuArray m_rdata;
         GpuArray m_idata;
     
    +    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    +    auto id (const int index) const &
    +    {
    +        if constexpr(!ParticleType::is_soa_particle) {
    +            return this->m_aos[index].id();
    +        } else {
    +            return this->m_idata[0][index];
    +        }
    +    }
    +
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         auto const* rdata (const int attribute_index) const
         {
    diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
    index c96e82fb239..e68404a3ef5 100644
    --- a/Src/Particle/AMReX_ParticleUtil.H
    +++ b/Src/Particle/AMReX_ParticleUtil.H
    @@ -647,13 +647,12 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBuff
                               int lev, int gid, int /*tid*/,
                               int lev_min, int lev_max, int nGrow, bool remove_negative)
     {
    -    auto& aos = ptile.GetArrayOfStructs();
    -    const int np = aos.numParticles();
    +    using ParticleType = typename PTile::ParticleType;
     
    +    const int np = ptile.numParticles();
         if (np == 0) return 0;
     
         auto getPID = pmap.getPIDFunctor();
    -    auto p_ptr = &(aos[0]);
     
         int pid = ParallelContext::MyProcSub();
         constexpr int chunk_size = 256*256*256;
    @@ -678,36 +677,42 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBuff
                 {
                     int assigned_grid;
                     int assigned_lev;
    +                //                auto& p = make_particle{}(src_data,i+this_offset);
     
    -                auto& p = p_ptr[i+this_offset];
    -
    -                if (p.id() < 0 )
    +                if (src_data.id(i+this_offset) < 0 )
                     {
                         assigned_grid = -1;
                         assigned_lev  = -1;
                     }
                     else
                     {
    -                    auto p_prime = p;
    +                    amrex::Particle<0> p_prime;
    +                    AMREX_D_TERM(p_prime.pos(0) = src_data.pos(0, i+this_offset);,
    +                                 p_prime.pos(1) = src_data.pos(1, i+this_offset);,
    +                                 p_prime.pos(2) = src_data.pos(2, i+this_offset););
    +
                         enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per);
                         auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow);
                         assigned_grid = amrex::get<0>(tup_prime);
                         assigned_lev  = amrex::get<1>(tup_prime);
                         if (assigned_grid >= 0)
                         {
    -                      AMREX_D_TERM(p.pos(0) = p_prime.pos(0);,
    -                                   p.pos(1) = p_prime.pos(1);,
    -                                   p.pos(2) = p_prime.pos(2););
    +                      AMREX_D_TERM(src_data.pos(0, i+this_offset) = p_prime.pos(0);,
    +                                   src_data.pos(1, i+this_offset) = p_prime.pos(1);,
    +                                   src_data.pos(2, i+this_offset) = p_prime.pos(2););
                         }
                         else if (lev_min > 0)
                         {
    -                      auto tup = ploc(p, lev_min, lev_max, nGrow);
    +                      AMREX_D_TERM(p_prime.pos(0) = src_data.pos(0, i+this_offset);,
    +                                   p_prime.pos(1) = src_data.pos(1, i+this_offset);,
    +                                   p_prime.pos(2) = src_data.pos(2, i+this_offset););
    +                      auto tup = ploc(p_prime, lev_min, lev_max, nGrow);
                           assigned_grid = amrex::get<0>(tup);
                           assigned_lev  = amrex::get<1>(tup);
                         }
                     }
     
    -                if ((remove_negative == false) && (p.id() < 0)) {
    +                if ((remove_negative == false) && (src_data.id(i+this_offset) < 0)) {
                         return true;
                     }
     
    
    From 1047e019629a5593f7850a3c9519654baaa42d5a Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 6 Apr 2023 18:45:06 -0700
    Subject: [PATCH 092/111] fix bad merge
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H | 8 ++++----
     1 file changed, 4 insertions(+), 4 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index c19ba3d357a..b5de7151826 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -1122,9 +1122,9 @@ ParticleContainer_impl
     
         if (memEfficientSort) {
             if constexpr(!ParticleType::is_soa_particle) {
    -                ParticleVector tmp_particles(np_total);
    -                auto src = ptile.getParticleTileData();
    -                ParticleType* dst = tmp_particles.data();
    +            ParticleVector tmp_particles(np_total);
    +            auto src = ptile.getParticleTileData();
    +            ParticleType* dst = tmp_particles.data();
     
                 AMREX_HOST_DEVICE_FOR_1D( np_total, i,
                 {
    @@ -1133,7 +1133,7 @@ ParticleContainer_impl
     
                 Gpu::streamSynchronize();
                 ptile.GetArrayOfStructs()().swap(tmp_particles);
    -        
    +        }
     
             RealVector tmp_real(np_total);
             for (int comp = 0; comp < NArrayReal + m_num_runtime_real; ++comp) {
    
    From 867f298f5949ea7fcfc1a3d78278c9b967c9ac71 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 6 Apr 2023 19:32:22 -0700
    Subject: [PATCH 093/111] remove unused
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H | 1 -
     1 file changed, 1 deletion(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index b5de7151826..846b00a9df4 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -1205,7 +1205,6 @@ ParticleContainer_impl
                 auto& aos             = ptile.GetArrayOfStructs();
                 auto  pstruct_ptr     = aos().dataPtr();
                 const size_t np       = ptile.numParticles();
    -            const size_t np_total = np + ptile.numNeighborParticles();
     
                 const Box& box = mfi.validbox();
     
    
    From 6f0e1c0b289a07bc775766bebceb3e707add7e98 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 6 Apr 2023 20:08:27 -0700
    Subject: [PATCH 094/111] removed unused typedef
    
    ---
     Src/Particle/AMReX_ParticleUtil.H | 3 ---
     1 file changed, 3 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
    index 76526d89e83..d8a9e0f60b6 100644
    --- a/Src/Particle/AMReX_ParticleUtil.H
    +++ b/Src/Particle/AMReX_ParticleUtil.H
    @@ -647,8 +647,6 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBuff
                               int lev, int gid, int /*tid*/,
                               int lev_min, int lev_max, int nGrow, bool remove_negative)
     {
    -    using ParticleType = typename PTile::ParticleType;
    -
         const int np = ptile.numParticles();
         if (np == 0) return 0;
     
    @@ -677,7 +675,6 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBuff
                 {
                     int assigned_grid;
                     int assigned_lev;
    -                //                auto& p = make_particle{}(src_data,i+this_offset);
     
                     if (src_data.id(i+this_offset) < 0 )
                     {
    
    From 00ebbd936267d0003a920b1fee59de5c9762034b Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 6 Apr 2023 22:10:06 -0700
    Subject: [PATCH 095/111] use auto
    
    ---
     Src/Particle/AMReX_ParticleUtil.H | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
    index d8a9e0f60b6..2e7b784278c 100644
    --- a/Src/Particle/AMReX_ParticleUtil.H
    +++ b/Src/Particle/AMReX_ParticleUtil.H
    @@ -321,7 +321,7 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow)
         {
             ParticleType p(tile_data,i);
             if ((p.id() < 0)) { return false; }
    -        IntVect iv = IntVect(
    +        auto iv = IntVect(
                 AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])),
                              int(amrex::Math::floor((p.pos(1)-plo[1])*dxi[1])),
                              int(amrex::Math::floor((p.pos(2)-plo[2])*dxi[2]))));
    
    From cc9880bb5be4b4ee28185a55ebf82bd327e18a79 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Thu, 6 Apr 2023 22:11:21 -0700
    Subject: [PATCH 096/111] use auto*
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index 846b00a9df4..71f931867bd 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -1203,7 +1203,7 @@ ParticleContainer_impl
             {
                 auto& ptile           = ParticlesAt(lev, mfi);
                 auto& aos             = ptile.GetArrayOfStructs();
    -            auto  pstruct_ptr     = aos().dataPtr();
    +            auto *pstruct_ptr     = aos().dataPtr();
                 const size_t np       = ptile.numParticles();
     
                 const Box& box = mfi.validbox();
    
    From 07d73cd1b9abc97e6287e20bd1c04de5b9fa12b7 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Fri, 7 Apr 2023 14:15:06 -0700
    Subject: [PATCH 097/111] test did not have enough components for what it's
     doing
    
    ---
     Tests/Particles/SOAParticle/main.cpp | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp
    index f25317fcdfe..2db902f8cfc 100644
    --- a/Tests/Particles/SOAParticle/main.cpp
    +++ b/Tests/Particles/SOAParticle/main.cpp
    @@ -180,7 +180,7 @@ int main(int argc, char* argv[])
      {
         amrex::Initialize(argc,argv);
         {
    -        addParticles< ParticleContainerPureSoA<3, 4> > ();
    +        addParticles< ParticleContainerPureSoA<4, 2> > ();
         }
         amrex::Finalize();
      }
    
    From c4f92eb7e83b4172f913a0a4cf9d81c3dd580ad7 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Fri, 7 Apr 2023 16:47:15 -0700
    Subject: [PATCH 098/111] make the return type of MakeParticle const
    
    ---
     Src/Particle/AMReX_MakeParticle.H | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H
    index 31a3e8d777f..a3d83412109 100644
    --- a/Src/Particle/AMReX_MakeParticle.H
    +++ b/Src/Particle/AMReX_MakeParticle.H
    @@ -16,7 +16,7 @@ struct make_particle
     {
         template 
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    auto&
    +    const auto
         operator() (PTD ptd, int i)
         {
             // legacy Particle (AoS)
    @@ -29,7 +29,7 @@ struct make_particle
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    auto
    +    const auto
         operator() (PTD ptd, int index)
         {
             // SoAParticle
    
    From 0576ec151a160bd2419286e0313fc2f461d093ba Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Fri, 7 Apr 2023 16:52:00 -0700
    Subject: [PATCH 099/111] pass by const&
    
    ---
     Src/Particle/AMReX_MakeParticle.H | 4 ++--
     Src/Particle/AMReX_ParticleTile.H | 4 ++--
     2 files changed, 4 insertions(+), 4 deletions(-)
    
    diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H
    index a3d83412109..f08af3c23bb 100644
    --- a/Src/Particle/AMReX_MakeParticle.H
    +++ b/Src/Particle/AMReX_MakeParticle.H
    @@ -17,7 +17,7 @@ struct make_particle
         template 
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         const auto
    -    operator() (PTD ptd, int i)
    +    operator() (PTD const& ptd, int i)
         {
             // legacy Particle (AoS)
             return ptd.m_aos[i];
    @@ -30,7 +30,7 @@ struct make_particle
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
         const auto
    -    operator() (PTD ptd, int index)
    +    operator() (PTD const& ptd, int index)
         {
             // SoAParticle
             return T_ParticleType(ptd, index);
    diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
    index 035c4c2aa07..6d1c3b03822 100644
    --- a/Src/Particle/AMReX_ParticleTile.H
    +++ b/Src/Particle/AMReX_ParticleTile.H
    @@ -298,7 +298,7 @@ struct ConstSoAParticle : SoAParticleBase
         using RealType = ParticleReal;
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    ConstSoAParticle(ConstPTD const ptd, long i) :
    +    ConstSoAParticle(ConstPTD const& ptd, long i) :
             m_constparticle_tile_data(ptd), m_index(i)
         {
         }
    @@ -361,7 +361,7 @@ struct SoAParticle : SoAParticleBase
         using RealType = ParticleReal;
     
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    SoAParticle(PTD const ptd, long i) :
    +    SoAParticle(PTD const& ptd, long i) :
             m_particle_tile_data(ptd), m_index(i)
         {
         }
    
    From c96cbd8e2af74342c8ba18f4d56b432906aec787 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Sat, 8 Apr 2023 14:54:34 -0700
    Subject: [PATCH 100/111] handle different id numbers in RedistributeMPI
    
    ---
     Src/Particle/AMReX_ParticleContainerI.H | 18 +++++++++++-------
     1 file changed, 11 insertions(+), 7 deletions(-)
    
    diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
    index 71f931867bd..1176d8a7a1b 100644
    --- a/Src/Particle/AMReX_ParticleContainerI.H
    +++ b/Src/Particle/AMReX_ParticleContainerI.H
    @@ -2013,19 +2013,23 @@ RedistributeMPI (std::map >& not_ours,
                 {
                     char* pbuf = ((char*) &recvdata[offset]) + i*superparticle_size;
     
    +                Particle<0, 0> p;
                     ParticleReal pos[AMREX_SPACEDIM];
                     std::memcpy(&pos[0], pbuf, AMREX_SPACEDIM*sizeof(ParticleReal));
    -
    -                int idcpu[2];
    -                std::memcpy(&idcpu[0], pbuf + NumRealComps()*sizeof(ParticleReal), 2*sizeof(int));
    -
    -                Particle<0, 0> p;
    -                p.id() = idcpu[0];
    -                p.cpu() = idcpu[1];
                     AMREX_D_TERM(p.pos(0) = pos[0];,
                                  p.pos(1) = pos[1];,
                                  p.pos(2) = pos[2]);
     
    +                if constexpr (!ParticleType::is_soa_particle) {
    +                    std::memcpy(&(p.m_idcpu), pbuf + NumRealComps()*sizeof(ParticleReal), sizeof(uint64_t));
    +                } else {
    +                    int idcpu[2];
    +                    std::memcpy(&idcpu[0], pbuf + NumRealComps()*sizeof(ParticleReal), 2*sizeof(int));
    +
    +                    p.id() = idcpu[0];
    +                    p.cpu() = idcpu[1];
    +                }
    +
                     bool success = Where(p, pld, lev_min, lev_max, 0);
                     if (!success)
                     {
    
    From b8c7acff4425e29e412984b1ae1f976c0043ec62 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Sat, 8 Apr 2023 15:11:23 -0700
    Subject: [PATCH 101/111] auto const
    
    ---
     Src/Particle/AMReX_MakeParticle.H | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H
    index f08af3c23bb..cd32402a6ef 100644
    --- a/Src/Particle/AMReX_MakeParticle.H
    +++ b/Src/Particle/AMReX_MakeParticle.H
    @@ -16,7 +16,7 @@ struct make_particle
     {
         template 
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const auto
    +    auto const
         operator() (PTD const& ptd, int i)
         {
             // legacy Particle (AoS)
    @@ -29,7 +29,7 @@ struct make_particle
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    const auto
    +    auto const
         operator() (PTD const& ptd, int index)
         {
             // SoAParticle
    
    From acaf3a4a088b833a73560a1f35f8901e4baa3e80 Mon Sep 17 00:00:00 2001
    From: Andrew Myers 
    Date: Sat, 8 Apr 2023 15:14:42 -0700
    Subject: [PATCH 102/111] remove auto const
    
    ---
     Src/Particle/AMReX_MakeParticle.H | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H
    index cd32402a6ef..4b9e35597dc 100644
    --- a/Src/Particle/AMReX_MakeParticle.H
    +++ b/Src/Particle/AMReX_MakeParticle.H
    @@ -16,7 +16,7 @@ struct make_particle
     {
         template 
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    auto const
    +    auto&
         operator() (PTD const& ptd, int i)
         {
             // legacy Particle (AoS)
    @@ -29,7 +29,7 @@ struct make_particle
         AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
    -    auto const
    +    auto
         operator() (PTD const& ptd, int index)
         {
             // SoAParticle
    
    From 94a5ada96d06777e9cd8f2e8056dd26f12958897 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Sat, 8 Apr 2023 23:42:12 -0700
    Subject: [PATCH 103/111] Compiler Warnings: Const & Narrowing ID
    
    ---
     Tests/Particles/RedistributeSOA/main.cpp | 6 +++---
     1 file changed, 3 insertions(+), 3 deletions(-)
    
    diff --git a/Tests/Particles/RedistributeSOA/main.cpp b/Tests/Particles/RedistributeSOA/main.cpp
    index ff31d81939d..25d48847c07 100644
    --- a/Tests/Particles/RedistributeSOA/main.cpp
    +++ b/Tests/Particles/RedistributeSOA/main.cpp
    @@ -104,7 +104,7 @@ class TestParticleContainer
                         Real r[3];
                         get_position_unit_cell(r, a_num_particles_per_cell, i_part);
     
    -                    int id = ParticleType::NextID();
    +                    amrex::Long id = ParticleType::NextID();
     
                         host_int[0].push_back(id);
                         host_int[1].push_back(ParallelDescriptor::MyProc());
    @@ -256,12 +256,12 @@ class TestParticleContainer
     
             for (int lev = 0; lev <= finestLevel(); ++lev)
             {
    -            auto& plev  = GetParticles(lev);
    +            const auto & plev  = GetParticles(lev);
                 for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
                 {
                     int gid = mfi.index();
                     int tid = mfi.LocalTileIndex();
    -                auto& ptile = plev.at(std::make_pair(gid, tid));
    +                const auto & ptile = plev.at(std::make_pair(gid, tid));
                     const auto ptd = ptile.getConstParticleTileData();
                     const size_t np = ptile.numParticles();
     
    
    From 8c91aef327b17873e24bc20eda5ec228279ea0cd Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Sun, 9 Apr 2023 08:23:31 -0700
    Subject: [PATCH 104/111] More Narrorwing Conversions
    
    ---
     Tests/Particles/RedistributeSOA/main.cpp | 8 ++++----
     1 file changed, 4 insertions(+), 4 deletions(-)
    
    diff --git a/Tests/Particles/RedistributeSOA/main.cpp b/Tests/Particles/RedistributeSOA/main.cpp
    index 25d48847c07..0fb1e6b00a9 100644
    --- a/Tests/Particles/RedistributeSOA/main.cpp
    +++ b/Tests/Particles/RedistributeSOA/main.cpp
    @@ -117,13 +117,13 @@ class TestParticleContainer
     #endif
     
                         for (int i = AMREX_SPACEDIM; i < NR; ++i)
    -                        host_real[i].push_back(id);
    +                        host_real[i].push_back(static_cast(id));
                         for (int i = 2; i < NI; ++i)
    -                        host_int[i].push_back(id);
    +                        host_int[i].push_back(static_cast(id));
                         for (int i = 0; i < NumRuntimeRealComps(); ++i)
    -                        host_runtime_real[i].push_back(id);
    +                        host_runtime_real[i].push_back(static_cast(id));
                         for (int i = 0; i < NumRuntimeIntComps(); ++i)
    -                        host_runtime_int[i].push_back(id);
    +                        host_runtime_int[i].push_back(static_cast(id));
                     }
                 }
     
    
    From 0d8bf583551791942df2c4ea3471e897b1e1296b Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Sun, 9 Apr 2023 11:49:46 -0700
    Subject: [PATCH 105/111] modernize loop as per linter
    
    ---
     Tests/Particles/SOAParticle/main.cpp | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp
    index 2db902f8cfc..56a621daf40 100644
    --- a/Tests/Particles/SOAParticle/main.cpp
    +++ b/Tests/Particles/SOAParticle/main.cpp
    @@ -15,8 +15,8 @@ template  class Allocator=DefaultAllocator>
     void addParticles ()
     {
         int is_per[AMREX_SPACEDIM];
    -    for (int d = 0; d < AMREX_SPACEDIM; d++)
    -        is_per[d] = 1;
    +    for (int & d : is_per)
    +        d = 1;
     
         RealBox real_box;
         for (int n = 0; n < AMREX_SPACEDIM; n++)
    
    From 57a66d712e891537637f9b1e6fbdcfbe1ae8680f Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Sun, 9 Apr 2023 12:30:06 -0700
    Subject: [PATCH 106/111] Destructor: override -> virtual
    
    Some discussion online for destructors, but virtual seems to be the general choice...
    ---
     Src/AmrCore/AMReX_AmrParticles.H | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Src/AmrCore/AMReX_AmrParticles.H b/Src/AmrCore/AMReX_AmrParticles.H
    index 8dba8fd4666..d635aaf52df 100644
    --- a/Src/AmrCore/AMReX_AmrParticles.H
    +++ b/Src/AmrCore/AMReX_AmrParticles.H
    @@ -277,7 +277,7 @@ public:
         {
         }
     
    -    ~AmrParticleContainer_impl () override = default;
    +    virtual ~AmrParticleContainer_impl () = default;
     
         AmrParticleContainer_impl ( const AmrParticleContainer_impl &) = delete;
         AmrParticleContainer_impl& operator= ( const AmrParticleContainer_impl & ) = delete;
    
    From 34326a5f570a9b8642f1518c164d2b30759041b7 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Sun, 9 Apr 2023 13:00:44 -0700
    Subject: [PATCH 107/111] Revert last change in destructor
    
    ---
     Src/AmrCore/AMReX_AmrParticles.H | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Src/AmrCore/AMReX_AmrParticles.H b/Src/AmrCore/AMReX_AmrParticles.H
    index d635aaf52df..8dba8fd4666 100644
    --- a/Src/AmrCore/AMReX_AmrParticles.H
    +++ b/Src/AmrCore/AMReX_AmrParticles.H
    @@ -277,7 +277,7 @@ public:
         {
         }
     
    -    virtual ~AmrParticleContainer_impl () = default;
    +    ~AmrParticleContainer_impl () override = default;
     
         AmrParticleContainer_impl ( const AmrParticleContainer_impl &) = delete;
         AmrParticleContainer_impl& operator= ( const AmrParticleContainer_impl & ) = delete;
    
    From 3d2c49f10c1cf9b17d5187e3c2ada4d0fbd3f4b5 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Sun, 9 Apr 2023 19:05:05 -0700
    Subject: [PATCH 108/111] Re-add NOLINT
    
    ---
     Src/AmrCore/AMReX_AmrParticles.H | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Src/AmrCore/AMReX_AmrParticles.H b/Src/AmrCore/AMReX_AmrParticles.H
    index 8dba8fd4666..aa6260e6952 100644
    --- a/Src/AmrCore/AMReX_AmrParticles.H
    +++ b/Src/AmrCore/AMReX_AmrParticles.H
    @@ -251,7 +251,7 @@ ParticleToMesh (PC const& pc, const Vector& mf,
     
     template  class Allocator=DefaultAllocator>
    -class AmrParticleContainer_impl
    +class AmrParticleContainer_impl // NOLINT(cppcoreguidelines-virtual-class-destructor)
         : public ParticleContainer_impl
     {
     
    
    From 00c28c892f31193d0166caf72b1aa881a1fc1c0e Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Sun, 9 Apr 2023 19:59:18 -0700
    Subject: [PATCH 109/111] RedistributeSOA/main.cpp more clang-tidy
    
    ---
     Tests/Particles/RedistributeSOA/main.cpp | 6 +++---
     1 file changed, 3 insertions(+), 3 deletions(-)
    
    diff --git a/Tests/Particles/RedistributeSOA/main.cpp b/Tests/Particles/RedistributeSOA/main.cpp
    index 0fb1e6b00a9..2e1d42a78e9 100644
    --- a/Tests/Particles/RedistributeSOA/main.cpp
    +++ b/Tests/Particles/RedistributeSOA/main.cpp
    @@ -106,7 +106,7 @@ class TestParticleContainer
     
                         amrex::Long id = ParticleType::NextID();
     
    -                    host_int[0].push_back(id);
    +                    host_int[0].push_back(static_cast(id));
                         host_int[1].push_back(ParallelDescriptor::MyProc());
                         host_real[0].push_back(static_cast (plo[0] + (iv[0] + r[0])*dx[0]));
     #if AMREX_SPACEDIM > 1
    @@ -345,8 +345,8 @@ void testRedistribute ()
         get_test_params(params, "redistribute");
     
         int is_per[BL_SPACEDIM];
    -    for (int i = 0; i < BL_SPACEDIM; i++)
    -        is_per[i] = params.is_periodic;
    +    for (int & d : is_per)
    +        d = params.is_periodic;
     
         Vector rr(params.nlevs-1);
         for (int lev = 1; lev < params.nlevs; lev++)
    
    From 46171378c59122fb751c0ebbd28b143e286e427f Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Sun, 9 Apr 2023 23:48:38 -0700
    Subject: [PATCH 110/111] 2x modernize-auto
    
    ---
     Tests/Particles/RedistributeSOA/main.cpp | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/Tests/Particles/RedistributeSOA/main.cpp b/Tests/Particles/RedistributeSOA/main.cpp
    index 2e1d42a78e9..0bf11f6f11c 100644
    --- a/Tests/Particles/RedistributeSOA/main.cpp
    +++ b/Tests/Particles/RedistributeSOA/main.cpp
    @@ -372,7 +372,7 @@ void testRedistribute ()
     
         Vector ba(params.nlevs);
         Vector dm(params.nlevs);
    -    IntVect lo = IntVect(AMREX_D_DECL(0, 0, 0));
    +    auto lo = IntVect(AMREX_D_DECL(0, 0, 0));
         IntVect size = params.size;
         for (int lev = 0; lev < params.nlevs; ++lev)
         {
    @@ -386,7 +386,7 @@ void testRedistribute ()
         TestParticleContainer pc(geom, dm, ba, rr);
     
         int npc = params.num_ppc;
    -    IntVect nppc = IntVect(AMREX_D_DECL(npc, npc, npc));
    +    auto nppc = IntVect(AMREX_D_DECL(npc, npc, npc));
     
         amrex::Print() << "About to initialize particles \n";
     
    
    From adc1db98ba6225177b2eed9fa327eef608fcbf59 Mon Sep 17 00:00:00 2001
    From: Axel Huebl 
    Date: Sun, 9 Apr 2023 23:49:01 -0700
    Subject: [PATCH 111/111] Avoid copy
    
    ---
     Src/Particle/AMReX_ParticleUtil.H | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
    index 53abc4a141d..a73e6dbffe2 100644
    --- a/Src/Particle/AMReX_ParticleUtil.H
    +++ b/Src/Particle/AMReX_ParticleUtil.H
    @@ -300,7 +300,7 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow)
     {
         using ParticleType = typename Iterator::ContainerType::ConstParticleType;
     
    -    const auto tile = pti.GetParticleTile();
    +    const auto& tile = pti.GetParticleTile();
         const auto tile_data = tile.getConstParticleTileData();
         const auto np = tile.numParticles();
         const auto& geom = pti.Geom(pti.GetLevel());