diff --git a/Src/AmrCore/AMReX_AmrParticles.H b/Src/AmrCore/AMReX_AmrParticles.H index 6a5dfd51687..aa6260e6952 100644 --- a/Src/AmrCore/AMReX_AmrParticles.H +++ b/Src/AmrCore/AMReX_AmrParticles.H @@ -10,10 +10,10 @@ namespace amrex { -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::AssignDensity (int rho_index, Vector >& mf_to_be_filled, int lev_min, int ncomp, int finest_level, int ngrow) const @@ -249,40 +249,47 @@ ParticleToMesh (PC const& pc, const Vector& mf, } } -template class Allocator=DefaultAllocator> -class AmrParticleContainer // NOLINT - : public ParticleContainer +class AmrParticleContainer_impl // NOLINT(cppcoreguidelines-virtual-class-destructor) + : public ParticleContainer_impl { public: - typedef Particle ParticleType; + using ParticleType = T_ParticleType; - AmrParticleContainer () = default; + AmrParticleContainer_impl () + : ParticleContainer_impl() + { + } - AmrParticleContainer (AmrCore* amr_core) - : ParticleContainer(amr_core->GetParGDB()) + AmrParticleContainer_impl (AmrCore* amr_core) + : ParticleContainer_impl(amr_core->GetParGDB()) { } - AmrParticleContainer (const Vector & geom, - const Vector & dmap, - const Vector & ba, - const Vector & rr) - : ParticleContainer(geom, dmap, ba, rr) + AmrParticleContainer_impl (const Vector & geom, + const Vector & dmap, + const Vector & ba, + const Vector & rr) + : ParticleContainer_impl(geom, dmap, ba, rr) { } - ~AmrParticleContainer () override = default; + ~AmrParticleContainer_impl () override = default; - AmrParticleContainer ( const AmrParticleContainer &) = delete; - AmrParticleContainer& operator= ( const AmrParticleContainer & ) = delete; + AmrParticleContainer_impl ( const AmrParticleContainer_impl &) = delete; + AmrParticleContainer_impl& operator= ( const AmrParticleContainer_impl & ) = delete; - AmrParticleContainer ( AmrParticleContainer && ) noexcept = default; - AmrParticleContainer& operator= ( AmrParticleContainer && ) noexcept = default; + AmrParticleContainer_impl ( AmrParticleContainer_impl && ) noexcept = default; + AmrParticleContainer_impl& operator= ( AmrParticleContainer_impl && ) noexcept = default; }; +template class Allocator=DefaultAllocator> +using AmrParticleContainer = AmrParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; + class AmrTracerParticleContainer : public TracerParticleContainer { diff --git a/Src/Base/AMReX_Array.H b/Src/Base/AMReX_Array.H index 22e8ab2457b..c9a3eb63a56 100644 --- a/Src/Base/AMReX_Array.H +++ b/Src/Base/AMReX_Array.H @@ -897,3 +897,4 @@ namespace amrex } #endif + diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H index 75b1fb1fb24..4017273151a 100644 --- a/Src/Base/AMReX_Geometry.H +++ b/Src/Base/AMReX_Geometry.H @@ -194,6 +194,7 @@ public: [[nodiscard]] GpuArray ProbLoArrayInParticleReal () const noexcept { return roundoff_lo; } + [[nodiscard]] GpuArray ProbHiArrayInParticleReal () const noexcept { return roundoff_hi; } diff --git a/Src/Base/AMReX_TypeTraits.H b/Src/Base/AMReX_TypeTraits.H index bbcaf0bcf34..3c5642e7780 100644 --- a/Src/Base/AMReX_TypeTraits.H +++ b/Src/Base/AMReX_TypeTraits.H @@ -54,17 +54,17 @@ namespace amrex struct IsMultiFabIterator : public std::is_base_of::type {}; #ifdef AMREX_PARTICLES - template class Allocator> - class ParIterBase; + // template class Allocator> + // class ParIterBase; - template class Allocator> - class ParIter; + // template class Allocator> + // class ParIter; - template class Allocator> - class ParConstIter; + // template class Allocator> + // class ParConstIter; class ParticleContainerBase; diff --git a/Src/EB/AMReX_EB2.H b/Src/EB/AMReX_EB2.H index b8019623490..0b6e121e415 100644 --- a/Src/EB/AMReX_EB2.H +++ b/Src/EB/AMReX_EB2.H @@ -134,7 +134,6 @@ void Build (const Geometry& geom, bool extend_domain_face = ExtendDomainFace(), int num_coarsen_opt = NumCoarsenOpt()); - void BuildFromChkptFile (std::string const& fname, const Geometry& geom, int required_coarsening_level, diff --git a/Src/Extern/HDF5/AMReX_ParticleHDF5.H b/Src/Extern/HDF5/AMReX_ParticleHDF5.H index 3ea80dba983..9e0d79e3400 100644 --- a/Src/Extern/HDF5/AMReX_ParticleHDF5.H +++ b/Src/Extern/HDF5/AMReX_ParticleHDF5.H @@ -17,10 +17,10 @@ #include "H5Z_SZ.h" #endif -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointHDF5 (const std::string& dir, const std::string& name, bool /*is_checkpoint*/, const Vector& real_comp_names, @@ -70,10 +70,10 @@ ParticleContainer }, true); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointHDF5 (const std::string& dir, const std::string& name, const std::string& compression) const { @@ -106,10 +106,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const std::string& compression) const { @@ -142,10 +142,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& real_comp_names, const Vector& int_comp_names, @@ -170,10 +170,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& real_comp_names, const std::string& compression) const @@ -204,10 +204,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& write_real_comp, @@ -242,10 +242,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -265,11 +265,11 @@ WritePlotFileHDF5 (const std::string& dir, const std::string& name, }); } -template class Allocator> template >::value>::type*> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const std::string& compression, F&& f) const { @@ -298,11 +298,11 @@ ParticleContainer std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& real_comp_names, const Vector& int_comp_names, @@ -323,11 +323,11 @@ ParticleContainer compression, std::forward(f)); } -template class Allocator> template >::value>::type*> void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& real_comp_names, const std::string& compression, F&& f) const @@ -354,11 +354,11 @@ ParticleContainer compression, std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& write_real_comp, @@ -389,11 +389,11 @@ ParticleContainer compression, std::forward(f)); } -template class Allocator> template void -ParticleContainer:: +ParticleContainer_impl:: WritePlotFileHDF5 (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -409,11 +409,11 @@ WritePlotFileHDF5 (const std::string& dir, const std::string& name, compression, std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WriteHDF5ParticleData (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -437,10 +437,10 @@ ParticleContainer /* } */ } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointPreHDF5 () { if( ! usePrePost) { @@ -494,10 +494,10 @@ ParticleContainer } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointPostHDF5 () { if( ! usePrePost) { @@ -550,30 +550,30 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFilePreHDF5 () { CheckpointPreHDF5(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFilePostHDF5 () { CheckpointPostHDF5(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WriteParticlesHDF5 (int lev, hid_t grp, Vector& which, Vector& count, Vector& where, const Vector& write_real_comp, @@ -964,19 +964,19 @@ ParticleContainer return; } // End WriteParticlesHDF5 -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::RestartHDF5 (const std::string& dir, const std::string& file, bool /*is_checkpoint*/) { RestartHDF5(dir, file); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::RestartHDF5 (const std::string& dir, const std::string& file) { BL_PROFILE("ParticleContainer::RestartHDF5()"); @@ -1340,11 +1340,11 @@ ParticleContainer } // Read a batch of particles from the checkpoint file -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::ReadParticlesHDF5 (hsize_t offset, hsize_t cnt, int grd, int lev, hid_t int_dset, hid_t real_dset, int finest_level_in_file, bool convert_ids) diff --git a/Src/Particle/AMReX_ArrayOfStructs.H b/Src/Particle/AMReX_ArrayOfStructs.H index bc27d8c4e3e..43c84bfd014 100644 --- a/Src/Particle/AMReX_ArrayOfStructs.H +++ b/Src/Particle/AMReX_ArrayOfStructs.H @@ -8,11 +8,11 @@ namespace amrex { -template class Allocator=DefaultAllocator> class ArrayOfStructs { public: - using ParticleType = Particle; + using ParticleType = T_ParticleType; using RealType = typename ParticleType::RealType; using ParticleVector = amrex::PODVector >; @@ -86,7 +86,7 @@ public: [[nodiscard]] const ParticleType& operator[] (int i) const { return m_data[i]; } [[nodiscard]] ParticleType& operator[] (int i) { return m_data[i]; } - void swap (ArrayOfStructs& other) + void swap (ArrayOfStructs& other) { m_data.swap(other.m_data); } @@ -113,9 +113,9 @@ private: }; #if __cplusplus < 201703L -template class Allocator> -constexpr int ArrayOfStructs::SizeInReal; +constexpr int ArrayOfStructs::SizeInReal; #endif } // namespace amrex diff --git a/Src/Particle/AMReX_MakeParticle.H b/Src/Particle/AMReX_MakeParticle.H new file mode 100644 index 00000000000..4b9e35597dc --- /dev/null +++ b/Src/Particle/AMReX_MakeParticle.H @@ -0,0 +1,40 @@ +#ifndef AMREX_MAKEPARTICLE_H_ +#define AMREX_MAKEPARTICLE_H_ + +#include + +template< class T > +struct is_soa_particle + : std::integral_constant< + bool, + T::is_soa_particle + > {}; + + +template +struct make_particle +{ + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto& + operator() (PTD const& ptd, int i) + { + // legacy Particle (AoS) + return ptd.m_aos[i]; + } +}; + +template +struct make_particle::value>::type> +{ + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto + operator() (PTD const& ptd, int index) + { + // SoAParticle + return T_ParticleType(ptd, index); + } +}; + +#endif diff --git a/Src/Particle/AMReX_ParIter.H b/Src/Particle/AMReX_ParIter.H index fef7b854293..b14c5849842 100644 --- a/Src/Particle/AMReX_ParIter.H +++ b/Src/Particle/AMReX_ParIter.H @@ -8,18 +8,31 @@ namespace amrex { -template class Allocator> -class ParticleContainer; +class ParticleContainer_impl; -template +struct Particle; + +template +struct SoAParticle; + +// for backwards compatibility +template class Allocator=DefaultAllocator> -class ParIterBase +using ParticleContainer = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> + +class ParIterBase_impl : public MFIter { + private: - using PCType = ParticleContainer; + using PCType = ParticleContainer_impl; using ContainerRef = typename std::conditional::type; using ParticleTileRef = typename std::conditional ::type; @@ -32,18 +45,20 @@ private: public: - using ContainerType = ParticleContainer; + using ContainerType = ParticleContainer_impl; using ParticleTileType = typename ContainerType::ParticleTileType; using AoS = typename ContainerType::AoS; using SoA = typename ContainerType::SoA; - using ParticleType = typename ContainerType::ParticleType; + using ParticleType = T_ParticleType; using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; using ParticleVector = typename ContainerType::ParticleVector; + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; - ParIterBase (ContainerRef pc, int level); + ParIterBase_impl (ContainerRef pc, int level); - ParIterBase (ContainerRef pc, int level, MFItInfo& info); + ParIterBase_impl (ContainerRef pc, int level, MFItInfo& info); #ifdef AMREX_USE_OMP void operator++ () @@ -94,59 +109,61 @@ protected: ContainerRef m_pc; }; -template class Allocator=DefaultAllocator> -class ParIter - : public ParIterBase +class ParIter_impl + : public ParIterBase_impl { public: - using ContainerType = ParticleContainer; + using ParticleType=T_ParticleType; + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; + + using ContainerType = ParticleContainer_impl; + using ConstParticleType = typename ContainerType::ConstParticleType; using ParticleTileType = typename ContainerType::ParticleTileType; using AoS = typename ContainerType::AoS; using SoA = typename ContainerType::SoA; - using ParticleType = typename ContainerType::ParticleType; using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; - ParIter (ContainerType& pc, int level) - : ParIterBase(pc,level) + ParIter_impl (ContainerType& pc, int level) + : ParIterBase_impl(pc,level) {} - ParIter (ContainerType& pc, int level, MFItInfo& info) - : ParIterBase(pc,level,info) + ParIter_impl (ContainerType& pc, int level, MFItInfo& info) + : ParIterBase_impl(pc,level,info) {} }; -template class Allocator=DefaultAllocator> -class ParConstIter - : public ParIterBase +class ParConstIter_impl + : public ParIterBase_impl { public: - using ContainerType = ParticleContainer; + using ParticleType = T_ParticleType; + using ContainerType = ParticleContainer_impl; using ParticleTileType = typename ContainerType::ParticleTileType; using AoS = typename ContainerType::AoS; using SoA = typename ContainerType::SoA; - using ParticleType = typename ContainerType::ParticleType; using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; - ParConstIter (ContainerType const& pc, int level) - : ParIterBase(pc,level) + ParConstIter_impl (ContainerType const& pc, int level) + : ParIterBase_impl(pc,level) {} - ParConstIter (ContainerType const& pc, int level, MFItInfo& info) - : ParIterBase(pc,level,info) + ParConstIter_impl (ContainerType const& pc, int level, MFItInfo& info) + : ParIterBase_impl(pc,level,info) {} }; -template class Allocator> -ParIterBase::ParIterBase +ParIterBase_impl::ParIterBase_impl (ContainerRef pc, int level, MFItInfo& info) : MFIter(*pc.m_dummy_mf[level], pc.do_tiling ? info.EnableTiling(pc.tile_size) : info), @@ -195,9 +212,9 @@ ParIterBase } } -template class Allocator> -ParIterBase::ParIterBase +ParIterBase_impl::ParIterBase_impl (ContainerRef pc, int level) : MFIter(*pc.m_dummy_mf[level], @@ -232,6 +249,28 @@ ParIterBase } } +template class Allocator=DefaultAllocator> +using ParIterBase = ParIterBase_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParIterBaseSoA = ParIterBase_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParConstIter = ParConstIter_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParConstIterSoA = ParConstIter_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParIter = ParIter_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParIterSoA = ParIter_impl, T_NArrayReal, T_NArrayInt, Allocator>; + } #endif diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index f8be6df01a2..5a24546c2da 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -208,6 +208,13 @@ struct ParticleBase }; +struct SoAParticleBase +{ + static constexpr int NReal=0; + static constexpr int NInt=0; + static constexpr bool is_soa_particle = true; +}; + /** \brief The struct used to store particles. * * \tparam T_NReal The number of extra Real components @@ -217,6 +224,10 @@ template struct Particle : ParticleBase { + static constexpr bool is_soa_particle = false; + using StorageParticleType = Particle; + using ConstType = Particle const; + //! \brief number of extra Real components in the particle struct static constexpr int NReal = T_NReal; diff --git a/Src/Particle/AMReX_ParticleCommunication.H b/Src/Particle/AMReX_ParticleCommunication.H index 6113bd36248..14ecdfe02b7 100644 --- a/Src/Particle/AMReX_ParticleCommunication.H +++ b/Src/Particle/AMReX_ParticleCommunication.H @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -293,8 +294,6 @@ void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan& { BL_PROFILE("amrex::packBuffer"); - using ParticleType = typename PC::ParticleType; - Long psize = plan.superParticleSize(); int num_levels = op.numLevels(); @@ -365,17 +364,19 @@ void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan& if (do_periodic_shift) { - ParticleType p; - amrex::Gpu::memcpy(&p, &p_snd_buffer[dst_offset], sizeof(ParticleType)); + ParticleReal pos[AMREX_SPACEDIM]; + amrex::Gpu::memcpy(&pos[0], &p_snd_buffer[dst_offset], + AMREX_SPACEDIM*sizeof(ParticleReal)); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { if (! is_per[idim]) continue; if (pshift[idim] > 0) - p.pos(idim) += phi[idim] - plo[idim]; + pos[idim] += phi[idim] - plo[idim]; else if (pshift[idim] < 0) - p.pos(idim) -= phi[idim] - plo[idim]; + pos[idim] -= phi[idim] - plo[idim]; } - amrex::Gpu::memcpy(&p_snd_buffer[dst_offset], &p, sizeof(ParticleType)); + amrex::Gpu::memcpy(&p_snd_buffer[dst_offset], &pos[0], + AMREX_SPACEDIM*sizeof(ParticleReal)); } } }); diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H index f31b29ee21f..64c44be84ac 100644 --- a/Src/Particle/AMReX_ParticleContainer.H +++ b/Src/Particle/AMReX_ParticleContainer.H @@ -35,8 +35,8 @@ #include #include #include -#include #include +#include #ifdef AMREX_LAZY #include @@ -119,6 +119,11 @@ struct ParticleInitType std::array int_array_data; }; +template class Allocator> + +class ParIterBase_impl; + /** * \brief A distributed container for Particles sorted onto the levels, grids, * and tiles of a block-structured AMR hierarchy. @@ -132,30 +137,33 @@ struct ParticleInitType * \tparam T_NArrayInt The number of extra integer components stored in struct-of-array form * */ -template class Allocator=DefaultAllocator> -class ParticleContainer : public ParticleContainerBase + +class ParticleContainer_impl : public ParticleContainerBase { public: + using ParticleType = T_ParticleType; + using ConstParticleType = typename ParticleType::ConstType; + //! \brief Number of extra Real components in the particle struct - static constexpr int NStructReal = T_NStructReal; + static constexpr int NStructReal = ParticleType::NReal; //! \brief Number of extra integer components in the particle struct - static constexpr int NStructInt = T_NStructInt; + static constexpr int NStructInt = ParticleType::NInt; //! \brief Number of extra Real components stored in struct-of-array form static constexpr int NArrayReal = T_NArrayReal; //! \brief Number of extra integer components stored in struct-of-array form static constexpr int NArrayInt = T_NArrayInt; + //! \brief The type of the "Particle" private: - friend class ParIterBase; - friend class ParIterBase; + friend class ParIterBase_impl; + friend class ParIterBase_impl; public: //! \brief The memory allocator in use. template using AllocatorType = Allocator; - //! \brief The type of Particles we hold. - using ParticleType = Particle; //! \brief The type of the "SuperParticle" which stored all components in AoS form using SuperParticleType = Particle; //! \brief The type of the Real data @@ -167,8 +175,8 @@ public: RealDescriptor ParticleRealDescriptor = FPC::Native64RealDescriptor(); #endif - using ParticleContainerType = ParticleContainer; - using ParticleTileType = ParticleTile; + using ParticleContainerType = ParticleContainer_impl; + using ParticleTileType = ParticleTile; using ParticleInitData = ParticleInitType; //! A single level worth of particles is indexed (grid id, tile id) @@ -182,12 +190,12 @@ public: using ParticleVector = typename AoS::ParticleVector; using CharVector = Gpu::DeviceVector; using SendBuffer = Gpu::PolymorphicVector; - using ParIterType = ParIter; - using ParConstIterType = ParConstIter; + using ParIterType = ParIter_impl; + using ParConstIterType = ParConstIter_impl; //! \brief Default constructor - construct an empty particle container that has no concept //! of a level hierarchy. Must be properly initialized later. - ParticleContainer () + ParticleContainer_impl () : ParticleContainerBase(), h_redistribute_real_comp(AMREX_SPACEDIM + NStructReal + NArrayReal, true), @@ -203,15 +211,15 @@ public: //! DistributionMapping, and BoxArray objects that define the AMR hierarchy. Usually, //! this is generated by an AmrCore or AmrLevel object. //! - ParticleContainer (ParGDBBase* gdb) + ParticleContainer_impl (ParGDBBase* gdb) : ParticleContainerBase(gdb), h_redistribute_real_comp(AMREX_SPACEDIM + NStructReal + NArrayReal, true), h_redistribute_int_comp(2 + NStructInt + NArrayInt, true) { Initialize (); - ParticleContainer::reserveData(); - ParticleContainer::resizeData(); + ParticleContainer_impl::reserveData(); + ParticleContainer_impl::resizeData(); } //! \brief Construct a particle container using a given Geometry, DistributionMapping, @@ -221,17 +229,17 @@ public: //! \param A DistributionMapping, which describes how the boxes are distributed onto MPI tasks //! \param A BoxArray, which gives the set of grid boxes //! - ParticleContainer (const Geometry & geom, - const DistributionMapping & dmap, - const BoxArray & ba) + ParticleContainer_impl (const Geometry & geom, + const DistributionMapping & dmap, + const BoxArray & ba) : ParticleContainerBase(geom, dmap, ba), h_redistribute_real_comp(AMREX_SPACEDIM + NStructReal + NArrayReal, true), h_redistribute_int_comp(2 + NStructInt + NArrayInt, true) { Initialize (); - ParticleContainer::reserveData(); - ParticleContainer::resizeData(); + ParticleContainer_impl::reserveData(); + ParticleContainer_impl::resizeData(); } //! \brief Construct a particle container using a given Geometry, DistributionMapping, @@ -243,18 +251,18 @@ public: //! \param rr A Vector of integer refinement ratios, of size num_levels - 1. rr[n] gives the //! refinement ratio between levels n and n+1 //! - ParticleContainer (const Vector & geom, - const Vector & dmap, - const Vector & ba, - const Vector & rr) + ParticleContainer_impl (const Vector & geom, + const Vector & dmap, + const Vector & ba, + const Vector & rr) : ParticleContainerBase(geom, dmap, ba, rr), h_redistribute_real_comp(AMREX_SPACEDIM + NStructReal + NArrayReal, true), h_redistribute_int_comp(2 + NStructInt + NArrayInt, true) { Initialize (); - ParticleContainer::reserveData(); - ParticleContainer::resizeData(); + ParticleContainer_impl::reserveData(); + ParticleContainer_impl::resizeData(); } //! \brief Same as the above, but accepts different refinement ratios in each direction. @@ -265,27 +273,27 @@ public: //! \param rr A Vector of IntVect refinement ratios, of size num_levels - 1. rr[n] gives the //! refinement ratio between levels n and n+1 //! - ParticleContainer (const Vector & geom, - const Vector & dmap, - const Vector & ba, - const Vector & rr) + ParticleContainer_impl (const Vector & geom, + const Vector & dmap, + const Vector & ba, + const Vector & rr) : ParticleContainerBase(geom, dmap, ba, rr), h_redistribute_real_comp(AMREX_SPACEDIM + NStructReal + NArrayReal, true), h_redistribute_int_comp(2 + NStructInt + NArrayInt, true) { Initialize (); - ParticleContainer::reserveData(); - ParticleContainer::resizeData(); + ParticleContainer_impl::reserveData(); + ParticleContainer_impl::resizeData(); } - ~ParticleContainer () override = default; + ~ParticleContainer_impl () override = default; - ParticleContainer ( const ParticleContainer &) = delete; - ParticleContainer& operator= ( const ParticleContainer & ) = delete; + ParticleContainer_impl ( const ParticleContainer_impl &) = delete; + ParticleContainer_impl& operator= ( const ParticleContainer_impl & ) = delete; - ParticleContainer ( ParticleContainer && ) noexcept = default; - ParticleContainer& operator= ( ParticleContainer && ) noexcept = default; + ParticleContainer_impl ( ParticleContainer_impl && ) noexcept = default; + ParticleContainer_impl& operator= ( ParticleContainer_impl && ) noexcept = default; //! \brief Define a default-constructed ParticleContainer using a ParGDB object. @@ -555,6 +563,7 @@ public: * \param only_valid * \param only_local */ + Long NumberOfParticlesAtLevel (int level, bool only_valid = true, bool only_local = false) const; Vector NumberOfParticlesInGrid (int level, bool only_valid = true, bool only_local = false) const; @@ -1199,7 +1208,8 @@ public: * * \param prt */ - bool PeriodicShift (ParticleType& prt) const; + template + bool PeriodicShift (P& p) const; void SetLevelDirectoriesCreated (bool tf) { levelDirectoriesCreated = tf; } @@ -1262,7 +1272,7 @@ public: /** type trait to translate one particle container to another, with changed allocator */ template class NewAllocator=amrex::DefaultAllocator> - using ContainerLike = amrex::ParticleContainer; + using ContainerLike = amrex::ParticleContainer_impl; /** Create an empty particle container * @@ -1382,6 +1392,13 @@ private: Vector m_particles; }; +template class Allocator> +using ParticleContainer = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; + +template class Allocator=DefaultAllocator> +using ParticleContainerPureSoA = ParticleContainer_impl, T_NArrayReal, T_NArrayInt, Allocator>; + + #include "AMReX_ParticleInit.H" #include "AMReX_ParticleContainerI.H" #include "AMReX_ParticleIO.H" diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 52215eb71c4..bed21486144 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -1,8 +1,10 @@ +#include +#include -template class Allocator> void -ParticleContainer::SetParticleSize () +ParticleContainer_impl::SetParticleSize () { num_real_comm_comps = 0; int comm_comps_start = AMREX_SPACEDIM + NStructReal; @@ -16,15 +18,19 @@ ParticleContainer::Se if (h_redistribute_int_comp[i]) {++num_int_comm_comps;} } - particle_size = sizeof(ParticleType); + if constexpr(!ParticleType::is_soa_particle) { + particle_size = sizeof(ParticleType); + } else { + particle_size = 0; + } superparticle_size = particle_size + num_real_comm_comps*sizeof(ParticleReal) + num_int_comm_comps*sizeof(int); } -template class Allocator> void -ParticleContainer :: Initialize () +ParticleContainer_impl :: Initialize () { levelDirectoriesCreated = false; usePrePost = false; @@ -58,11 +64,11 @@ ParticleContainer :: } } -template class Allocator> template IntVect -ParticleContainer::Index (const P& p, int lev) const +ParticleContainer_impl::Index (const P& p, int lev) const { IntVect iv; const Geometry& geom = Geom(lev); @@ -76,11 +82,11 @@ ParticleContainer::In return iv; } -template class Allocator> template bool -ParticleContainer +ParticleContainer_impl ::Where (const P& p, ParticleLocData& pld, int lev_min, @@ -156,10 +162,10 @@ ParticleContainer return false; } -template class Allocator> bool -ParticleContainer +ParticleContainer_impl ::EnforcePeriodicWhere (ParticleType& p, ParticleLocData& pld, int lev_min, @@ -177,7 +183,10 @@ ParticleContainer AMREX_ASSERT(lev_max <= finestLevel()); // Create a copy "dummy" particle to check for periodic outs. - ParticleType p_prime = p; + Particle<0, 0> p_prime; + AMREX_D_TERM(p_prime.pos(0) = p.pos(0);, + p_prime.pos(1) = p.pos(1);, + p_prime.pos(2) = p.pos(2)); if (PeriodicShift(p_prime)) { std::vector< std::pair > isects; for (int lev = lev_max; lev >= lev_min; lev--) { @@ -230,11 +239,12 @@ ParticleContainer } -template class Allocator> +template bool -ParticleContainer -::PeriodicShift (ParticleType& p) const +ParticleContainer_impl +::PeriodicShift (P& p) const { const auto& geom = Geom(0); const auto plo = geom.ProbLoArray(); @@ -246,10 +256,10 @@ ParticleContainer return enforcePeriodic(p, plo, phi, rlo, rhi, is_per); } -template class Allocator> ParticleLocData -ParticleContainer:: +ParticleContainer_impl:: Reset (ParticleType& p, bool /*update*/, bool verbose, @@ -281,30 +291,30 @@ Reset (ParticleType& p, return pld; } -template class Allocator> void -ParticleContainer::reserveData () +ParticleContainer_impl::reserveData () { this->ParticleContainerBase::reserveData(); m_particles.reserve(maxLevel()+1); } -template class Allocator> void -ParticleContainer::resizeData () +ParticleContainer_impl::resizeData () { this->ParticleContainerBase::resizeData(); int nlevs = std::max(0, finestLevel()+1); m_particles.resize(nlevs); } -template class Allocator> void -ParticleContainer::locateParticle (ParticleType& p, ParticleLocData& pld, - int lev_min, int lev_max, int nGrow, int local_grid) const +ParticleContainer_impl::locateParticle (ParticleType& p, ParticleLocData& pld, + int lev_min, int lev_max, int nGrow, int local_grid) const { bool success; if (Geom(0).outsideRoundoffDomain(AMREX_D_DECL(p.pos(0), p.pos(1), p.pos(2)))) @@ -335,10 +345,10 @@ ParticleContainer::lo } } -template class Allocator> Long -ParticleContainer::TotalNumberOfParticles (bool only_valid, bool only_local) const +ParticleContainer_impl::TotalNumberOfParticles (bool only_valid, bool only_local) const { Long nparticles = 0; for (int lev = 0; lev <= finestLevel(); lev++) { @@ -350,10 +360,10 @@ ParticleContainer::To return nparticles; } -template class Allocator> Vector -ParticleContainer::NumberOfParticlesInGrid (int lev, bool only_valid, bool only_local) const +ParticleContainer_impl::NumberOfParticlesInGrid (int lev, bool only_valid, bool only_local) const { AMREX_ASSERT(lev >= 0 && lev < int(m_particles.size())); @@ -366,9 +376,8 @@ ParticleContainer::Nu if (only_valid) { const auto& ptile = ParticlesAt(lev, pti); - const auto& aos = ptile.GetArrayOfStructs(); - const auto pstruct = aos().dataPtr(); const int np = ptile.numParticles(); + auto const ptd = ptile.getConstParticleTileData(); ReduceOps reduce_op; ReduceData reduce_data(reduce_op); @@ -377,7 +386,7 @@ ParticleContainer::Nu reduce_op.eval(np, reduce_data, [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple { - return (pstruct[i].id() > 0) ? 1 : 0; + return (ptd.id(i) > 0) ? 1 : 0; }); int np_valid = amrex::get<0>(reduce_data.value(reduce_op)); @@ -407,35 +416,34 @@ ParticleContainer::Nu return nparticles; } -template class Allocator> -Long -ParticleContainer::NumberOfParticlesAtLevel (int lev, bool only_valid, bool only_local) const +Long ParticleContainer_impl::NumberOfParticlesAtLevel (int level, bool only_valid, bool only_local) const { Long nparticles = 0; - if (lev < 0 || lev >= int(m_particles.size())) return nparticles; + if (level < 0 || level >= int(m_particles.size())) return nparticles; if (only_valid) { ReduceOps reduce_op; ReduceData reduce_data(reduce_op); using ReduceTuple = typename decltype(reduce_data)::Type; - for (const auto& kv : GetParticles(lev)) { + for (const auto& kv : GetParticles(level)) { const auto& ptile = kv.second; - auto const& ptaos = ptile.GetArrayOfStructs(); - ParticleType const* pp = ptaos().data(); + auto const ptd = ptile.getConstParticleTileData(); - reduce_op.eval(ptaos.numParticles(), reduce_data, + reduce_op.eval(ptile.numParticles(), reduce_data, [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple { - return (pp[i].id() > 0) ? 1 : 0; + return (ptd.id(i) > 0) ? 1 : 0; }); } + nparticles = static_cast(amrex::get<0>(reduce_data.value(reduce_op))); } else { - for (const auto& kv : GetParticles(lev)) { + for (const auto& kv : GetParticles(level)) { const auto& ptile = kv.second; nparticles += ptile.numParticles(); } @@ -452,10 +460,10 @@ ParticleContainer::Nu // This includes both valid and invalid particles since invalid particles still take up space. // -template class Allocator> std::array -ParticleContainer +ParticleContainer_impl ::ByteSpread () const { Long cnt = 0; @@ -496,10 +504,10 @@ ParticleContainer return {mn*sz, mx*sz, cnt*sz}; } -template class Allocator> std::array -ParticleContainer +ParticleContainer_impl ::PrintCapacity () const { Long cnt = 0; @@ -537,10 +545,10 @@ ParticleContainer return {mn, mx, cnt}; } -template class Allocator> void -ParticleContainer::ShrinkToFit () +ParticleContainer_impl::ShrinkToFit () { for (unsigned lev = 0; lev < m_particles.size(); lev++) { auto& pmap = m_particles[lev]; @@ -557,10 +565,10 @@ ParticleContainer::Sh */ -template class Allocator> void -ParticleContainer::Increment (MultiFab& mf, int lev) +ParticleContainer_impl::Increment (MultiFab& mf, int lev) { BL_PROFILE("ParticleContainer::Increment"); @@ -582,20 +590,20 @@ ParticleContainer::In }, false); } -template class Allocator> Long -ParticleContainer::IncrementWithTotal (MultiFab& mf, int lev, bool local) +ParticleContainer_impl::IncrementWithTotal (MultiFab& mf, int lev, bool local) { BL_PROFILE("ParticleContainer::IncrementWithTotal(lev)"); Increment(mf, lev); return TotalNumberOfParticles(true, local); } -template class Allocator> void -ParticleContainer::RemoveParticlesAtLevel (int level) +ParticleContainer_impl::RemoveParticlesAtLevel (int level) { BL_PROFILE("ParticleContainer::RemoveParticlesAtLevel()"); if (level >= int(this->m_particles.size())) return; @@ -606,10 +614,10 @@ ParticleContainer::Re } } -template class Allocator> void -ParticleContainer::RemoveParticlesNotAtFinestLevel () +ParticleContainer_impl::RemoveParticlesNotAtFinestLevel () { BL_PROFILE("ParticleContainer::RemoveParticlesNotAtFinestLevel()"); AMREX_ASSERT(this->finestLevel()+1 == int(this->m_particles.size())); @@ -672,10 +680,10 @@ struct TransformerVirt }; -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CreateVirtualParticles (int level, AoS& virts) const { ParticleTileType ptile; @@ -683,10 +691,10 @@ ParticleContainer ptile.GetArrayOfStructs().swap(virts); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CreateVirtualParticles (int level, ParticleTileType& virts) const { BL_PROFILE("ParticleContainer::CreateVirtualParticles()"); @@ -914,10 +922,10 @@ struct TransformerGhost } }; -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CreateGhostParticles (int level, int nGrow, AoS& ghosts) const { ParticleTileType ptile; @@ -925,10 +933,10 @@ ParticleContainer ptile.GetArrayOfStructs().swap(ghosts); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CreateGhostParticles (int level, int nGrow, ParticleTileType& ghosts) const { BL_PROFILE("ParticleContainer::CreateGhostParticles()"); @@ -959,10 +967,10 @@ ParticleContainer Gpu::streamSynchronize(); } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: clearParticles () { BL_PROFILE("ParticleContainer::clearParticles()"); @@ -974,35 +982,35 @@ clearParticles () } } -template class Allocator> template ::value, int> foo> void -ParticleContainer:: +ParticleContainer_impl:: copyParticles (const PCType& other, bool local) { - using PData = ConstParticleTileData; + using PData = ConstParticleTileData; copyParticles(other, [=] AMREX_GPU_HOST_DEVICE (const PData& /*data*/, int /*i*/) { return 1; }, local); } -template class Allocator> template ::value, int> foo> void -ParticleContainer:: +ParticleContainer_impl:: addParticles (const PCType& other, bool local) { - using PData = ConstParticleTileData; + using PData = ConstParticleTileData; addParticles(other, [=] AMREX_GPU_HOST_DEVICE (const PData& /*data*/, int /*i*/) { return 1; }, local); } -template class Allocator> template ::value, int> foo, std::enable_if_t::value, int> bar> void -ParticleContainer:: +ParticleContainer_impl:: copyParticles (const PCType& other, F&& f, bool local) { BL_PROFILE("ParticleContainer::copyParticles"); @@ -1010,13 +1018,13 @@ copyParticles (const PCType& other, F&& f, bool local) addParticles(other, std::forward(f), local); } -template class Allocator> template ::value, int> foo, std::enable_if_t::value, int> bar> void -ParticleContainer:: +ParticleContainer_impl:: addParticles (const PCType& other, F&& f, bool local) { BL_PROFILE("ParticleContainer::addParticles"); @@ -1049,10 +1057,10 @@ addParticles (const PCType& other, F&& f, bool local) // // This redistributes valid particles and discards invalid ones. // -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::Redistribute (int lev_min, int lev_max, int nGrow, int local, bool remove_negative) { BL_PROFILE_SYNC_START_TIMED("SyncBeforeComms: Redist"); @@ -1073,11 +1081,11 @@ ParticleContainer BL_PROFILE_SYNC_STOP(); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::ReorderParticles (int lev, const MFIter& mfi, const index_type* permutations) { auto& ptile = ParticlesAt(lev, mfi); @@ -1086,7 +1094,7 @@ ParticleContainer const size_t np_total = np + aos.numNeighborParticles(); if (memEfficientSort) { - { + if constexpr(!ParticleType::is_soa_particle) { ParticleVector tmp_particles(np_total); auto src = ptile.getParticleTileData(); ParticleType* dst = tmp_particles.data(); @@ -1139,18 +1147,18 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer::SortParticlesByCell () +ParticleContainer_impl::SortParticlesByCell () { SortParticlesByBin(IntVect(AMREX_D_DECL(1, 1, 1))); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::SortParticlesByBin (IntVect bin_size) { BL_PROFILE("ParticleContainer::SortParticlesByBin()"); @@ -1168,8 +1176,8 @@ ParticleContainer { auto& ptile = ParticlesAt(lev, mfi); auto& aos = ptile.GetArrayOfStructs(); - auto pstruct_ptr = aos().dataPtr(); - const size_t np = aos.numParticles(); + auto *pstruct_ptr = aos().dataPtr(); + const size_t np = ptile.numParticles(); const Box& box = mfi.validbox(); @@ -1181,10 +1189,10 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::SortParticlesForDeposition (IntVect idx_type) { BL_PROFILE("ParticleContainer::SortParticlesForDeposition()"); @@ -1213,10 +1221,10 @@ ParticleContainer // // The GPU implementation of Redistribute // -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::RedistributeGPU (int lev_min, int lev_max, int nGrow, int local, bool remove_negative) { #ifdef AMREX_USE_GPU @@ -1299,11 +1307,12 @@ ParticleContainer auto p_levs = op.m_levels[lev][gid].dataPtr(); auto p_src_indices = op.m_src_indices[lev][gid].dataPtr(); auto p_periodic_shift = op.m_periodic_shift[lev][gid].dataPtr(); - auto p_ptr = &(aos[0]); + auto ptd = src_tile.getParticleTileData(); AMREX_FOR_1D ( num_move, i, { - const auto& p = p_ptr[i + num_stay]; + const auto p = make_particle{}(ptd,i + num_stay); + if (p.id() < 0) { p_boxes[i] = -1; @@ -1405,10 +1414,10 @@ ParticleContainer // // The CPU implementation of Redistribute // -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::RedistributeCPU (int lev_min, int lev_max, int nGrow, int local, bool remove_negative) { BL_PROFILE("ParticleContainer::RedistributeCPU()"); @@ -1502,28 +1511,143 @@ ParticleContainer int thread_num = OpenMP::get_thread_num(); int grid = grid_tile_ids[pmap_it].first; int tile = grid_tile_ids[pmap_it].second; - auto& aos = ptile_ptrs[pmap_it]->GetArrayOfStructs(); auto& soa = ptile_ptrs[pmap_it]->GetStructOfArrays(); - AMREX_ASSERT_WITH_MESSAGE((NumRealComps() == 0 && NumIntComps() == 0) - || aos.size() == soa.size(), - "The AoS and SoA data on this tile are different sizes - " - "perhaps particles have not been initialized correctly?"); - unsigned npart = aos.numParticles(); + auto& aos = ptile_ptrs[pmap_it]->GetArrayOfStructs(); + + // AMREX_ASSERT_WITH_MESSAGE((NumRealComps() == 0 && NumIntComps() == 0) + // || aos.size() == soa.size(), + // "The AoS and SoA data on this tile are different sizes - " + // "perhaps particles have not been initialized correctly?"); + unsigned npart = ptile_ptrs[pmap_it]->numParticles(); ParticleLocData pld; - if (npart != 0) { - Long last = npart - 1; - Long pindex = 0; - while (pindex <= last) { - ParticleType& p = aos[pindex]; + + if constexpr(!ParticleType::is_soa_particle){ + + if (npart != 0) { + Long last = npart - 1; + Long pindex = 0; + while (pindex <= last) { + ParticleType& p = aos[pindex]; + + if ((remove_negative == false) && (p.id() < 0)) { + ++pindex; + continue; + } + + if (p.id() < 0) + { + aos[pindex] = aos[last]; + for (int comp = 0; comp < NumRealComps(); comp++) + soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; + for (int comp = 0; comp < NumIntComps(); comp++) + soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; + correctCellVectors(last, pindex, grid, aos[pindex]); + --last; + continue; + } + + locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1); + + particlePostLocate(p, pld, lev); + + if (p.id() < 0) + { + aos[pindex] = aos[last]; + for (int comp = 0; comp < NumRealComps(); comp++) + soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; + for (int comp = 0; comp < NumIntComps(); comp++) + soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; + correctCellVectors(last, pindex, grid, aos[pindex]); + --last; + continue; + } + + const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); + if (who == MyProc) { + if (pld.m_lev != lev || pld.m_grid != grid || pld.m_tile != tile) { + // We own it but must shift it to another place. + auto index = std::make_pair(pld.m_grid, pld.m_tile); + AMREX_ASSERT(tmp_local[pld.m_lev][index].size() == num_threads); + tmp_local[pld.m_lev][index][thread_num].push_back(p); + for (int comp = 0; comp < NumRealComps(); ++comp) { + RealVector& arr = soa_local[pld.m_lev][index][thread_num].GetRealData(comp); + arr.push_back(soa.GetRealData(comp)[pindex]); + } + for (int comp = 0; comp < NumIntComps(); ++comp) { + IntVector& arr = soa_local[pld.m_lev][index][thread_num].GetIntData(comp); + arr.push_back(soa.GetIntData(comp)[pindex]); + } + + p.id() = -p.id(); // Invalidate the particle + } + } + else { + auto& particles_to_send = tmp_remote[who][thread_num]; + auto old_size = particles_to_send.size(); + auto new_size = old_size + superparticle_size; + particles_to_send.resize(new_size); + std::memcpy(&particles_to_send[old_size], &p, particle_size); + char* dst = &particles_to_send[old_size] + particle_size; + int array_comp_start = AMREX_SPACEDIM + NStructReal; + for (int comp = 0; comp < NumRealComps(); comp++) { + if (h_redistribute_real_comp[array_comp_start + comp]) { + std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal)); + dst += sizeof(ParticleReal); + } + } + array_comp_start = 2 + NStructInt; + for (int comp = 0; comp < NumIntComps(); comp++) { + if (h_redistribute_int_comp[array_comp_start + comp]) { + std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int)); + dst += sizeof(int); + } + } + + p.id() = -p.id(); // Invalidate the particle + } + + if (p.id() < 0) + { + aos[pindex] = aos[last]; + for (int comp = 0; comp < NumRealComps(); comp++) + soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; + for (int comp = 0; comp < NumIntComps(); comp++) + soa.GetIntData(comp)[pindex] = soa.GetIntData(comp)[last]; + correctCellVectors(last, pindex, grid, aos[pindex]); + --last; + continue; + } + + ++pindex; + } + + aos().erase(aos().begin() + last + 1, aos().begin() + npart); + for (int comp = 0; comp < NumRealComps(); comp++) { + RealVector& rdata = soa.GetRealData(comp); + rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart); + } + for (int comp = 0; comp < NumIntComps(); comp++) { + IntVector& idata = soa.GetIntData(comp); + idata.erase(idata.begin() + last + 1, idata.begin() + npart); + } + } + + } else{ // soa particle + + auto particle_tile = ptile_ptrs[pmap_it]; + if (npart != 0) { + Long last = npart - 1; + Long pindex = 0; + while (pindex <= last) { + auto ptd = particle_tile->getParticleTileData(); + ParticleType p(ptd,pindex); if ((remove_negative == false) && (p.id() < 0)) { ++pindex; continue; - } + } - if (p.id() < 0) - { - aos[pindex] = aos[last]; + if (p.id() < 0){ for (int comp = 0; comp < NumRealComps(); comp++) soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; for (int comp = 0; comp < NumIntComps(); comp++) @@ -1531,15 +1655,13 @@ ParticleContainer correctCellVectors(last, pindex, grid, aos[pindex]); --last; continue; - } + } locateParticle(p, pld, lev_min, lev_max, nGrow, local ? grid : -1); particlePostLocate(p, pld, lev); - if (p.id() < 0) - { - aos[pindex] = aos[last]; + if (p.id() < 0) { for (int comp = 0; comp < NumRealComps(); comp++) soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; for (int comp = 0; comp < NumIntComps(); comp++) @@ -1547,7 +1669,7 @@ ParticleContainer correctCellVectors(last, pindex, grid, aos[pindex]); --last; continue; - } + } const int who = ParallelContext::global_to_local_rank(ParticleDistributionMap(pld.m_lev)[pld.m_grid]); if (who == MyProc) { @@ -1566,36 +1688,33 @@ ParticleContainer } p.id() = -p.id(); // Invalidate the particle - } - } + } + } else { auto& particles_to_send = tmp_remote[who][thread_num]; auto old_size = particles_to_send.size(); auto new_size = old_size + superparticle_size; particles_to_send.resize(new_size); - std::memcpy(&particles_to_send[old_size], &p, particle_size); - char* dst = &particles_to_send[old_size] + particle_size; + + char* dst = &particles_to_send[old_size]; int array_comp_start = AMREX_SPACEDIM + NStructReal; for (int comp = 0; comp < NumRealComps(); comp++) { if (h_redistribute_real_comp[array_comp_start + comp]) { std::memcpy(dst, &soa.GetRealData(comp)[pindex], sizeof(ParticleReal)); dst += sizeof(ParticleReal); } - } + } array_comp_start = 2 + NStructInt; for (int comp = 0; comp < NumIntComps(); comp++) { if (h_redistribute_int_comp[array_comp_start + comp]) { std::memcpy(dst, &soa.GetIntData(comp)[pindex], sizeof(int)); dst += sizeof(int); } - } - + } p.id() = -p.id(); // Invalidate the particle - } + } - if (p.id() < 0) - { - aos[pindex] = aos[last]; + if (p.id() < 0){ for (int comp = 0; comp < NumRealComps(); comp++) soa.GetRealData(comp)[pindex] = soa.GetRealData(comp)[last]; for (int comp = 0; comp < NumIntComps(); comp++) @@ -1603,12 +1722,11 @@ ParticleContainer correctCellVectors(last, pindex, grid, aos[pindex]); --last; continue; - } + } ++pindex; - } + } - aos().erase(aos().begin() + last + 1, aos().begin() + npart); for (int comp = 0; comp < NumRealComps(); comp++) { RealVector& rdata = soa.GetRealData(comp); rdata.erase(rdata.begin() + last + 1, rdata.begin() + npart); @@ -1616,8 +1734,9 @@ ParticleContainer for (int comp = 0; comp < NumIntComps(); comp++) { IntVector& idata = soa.GetIntData(comp); idata.erase(idata.begin() + last + 1, idata.begin() + npart); - } - } + } + } + } } } @@ -1737,10 +1856,10 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: RedistributeMPI (std::map >& not_ours, int lev_min, int lev_max, int nGrow, int local) { @@ -1866,12 +1985,39 @@ RedistributeMPI (std::map >& not_ours, for (int i = 0; i < int(Cnt); ++i) { char* pbuf = ((char*) &recvdata[offset]) + i*superparticle_size; - ParticleType p; - std::memcpy(&p, pbuf, sizeof(ParticleType)); - locateParticle(p, pld, lev_min, lev_max, nGrow); + + Particle<0, 0> p; + ParticleReal pos[AMREX_SPACEDIM]; + std::memcpy(&pos[0], pbuf, AMREX_SPACEDIM*sizeof(ParticleReal)); + AMREX_D_TERM(p.pos(0) = pos[0];, + p.pos(1) = pos[1];, + p.pos(2) = pos[2]); + + if constexpr (!ParticleType::is_soa_particle) { + std::memcpy(&(p.m_idcpu), pbuf + NumRealComps()*sizeof(ParticleReal), sizeof(uint64_t)); + } else { + int idcpu[2]; + std::memcpy(&idcpu[0], pbuf + NumRealComps()*sizeof(ParticleReal), 2*sizeof(int)); + + p.id() = idcpu[0]; + p.cpu() = idcpu[1]; + } + + bool success = Where(p, pld, lev_min, lev_max, 0); + if (!success) + { + success = (nGrow > 0) && Where(p, pld, lev_min, lev_min, nGrow); + pld.m_grown_gridbox = pld.m_gridbox; // reset grown box for subsequent calls. + } + if (!success) + { + amrex::Abort("RedistributeMPI_locate:: invalid particle."); + } + rcv_levs[ipart] = pld.m_lev; rcv_grid[ipart] = pld.m_grid; rcv_tile[ipart] = pld.m_tile; + ++ipart; } } @@ -1893,10 +2039,13 @@ RedistributeMPI (std::map >& not_ours, rcv_tile[ipart])]; char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size; - ParticleType p; - std::memcpy(&p, pbuf, sizeof(ParticleType)); - pbuf += sizeof(ParticleType); - ptile.push_back(p); + if constexpr(! ParticleType::is_soa_particle) { + ParticleType p; + std::memcpy(&p, pbuf, sizeof(ParticleType)); + pbuf += sizeof(ParticleType); + ptile.push_back(p); + } + int array_comp_start = AMREX_SPACEDIM + NStructReal; for (int comp = 0; comp < NumRealComps(); ++comp) { if (h_redistribute_real_comp[array_comp_start + comp]) { @@ -1923,6 +2072,7 @@ RedistributeMPI (std::map >& not_ours, ++ipart; } } + #else Vector, Gpu::HostVector > > host_particles; host_particles.reserve(15); @@ -1951,16 +2101,16 @@ RedistributeMPI (std::map >& not_ours, char* pbuf = ((char*) &recvdata[offset]) + j*superparticle_size; - ParticleType p; - std::memcpy(&p, pbuf, sizeof(ParticleType)); - pbuf += sizeof(ParticleType); + if constexpr(! ParticleType::is_soa_particle) { + ParticleType p; + std::memcpy(&p, pbuf, sizeof(ParticleType)); + pbuf += sizeof(ParticleType); + host_particles[lev][ind].push_back(p); + } host_real_attribs[lev][ind].resize(NumRealComps()); host_int_attribs[lev][ind].resize(NumIntComps()); - // add the struct - host_particles[lev][ind].push_back(p); - // add the real... int array_comp_start = AMREX_SPACEDIM + NStructReal; for (int comp = 0; comp < NumRealComps(); ++comp) { @@ -2032,10 +2182,10 @@ RedistributeMPI (std::map >& not_ours, #endif } -template class Allocator> bool -ParticleContainer::OK (int lev_min, int lev_max, int nGrow) const +ParticleContainer_impl::OK (int lev_min, int lev_max, int nGrow) const { BL_PROFILE("ParticleContainer::OK()"); @@ -2045,10 +2195,10 @@ ParticleContainer::OK return (numParticlesOutOfRange(*this, lev_min, lev_max, nGrow) == 0); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::AddParticlesAtLevel (AoS& particles, int level, int nGrow) { ParticleTileType ptile; @@ -2056,10 +2206,10 @@ ParticleContainer AddParticlesAtLevel(ptile, level, nGrow); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::AddParticlesAtLevel (ParticleTileType& particles, int level, int nGrow) { BL_PROFILE("ParticleContainer::AddParticlesAtLevel()"); @@ -2091,10 +2241,10 @@ ParticleContainer } // This is the single-level version for cell-centered density -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: AssignCellDensitySingleLevel (int rho_index, MultiFab& mf_to_be_filled, int lev, @@ -2140,16 +2290,15 @@ AssignCellDensitySingleLevel (int rho_index, mf_pointer->setVal(0); - using ParConstIter = ParConstIter; + using ParConstIter = ParConstIter_impl; #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { FArrayBox local_rho; for (ParConstIter pti(*this, lev); pti.isValid(); ++pti) { - const auto& particles = pti.GetArrayOfStructs(); - const auto pstruct = particles().data(); const Long np = pti.numParticles(); + auto ptd = pti.GetParticleTile().getConstParticleTileData(); FArrayBox& fab = (*mf_pointer)[pti]; auto rhoarr = fab.array(); #ifdef AMREX_USE_OMP @@ -2168,14 +2317,16 @@ AssignCellDensitySingleLevel (int rho_index, { AMREX_HOST_DEVICE_FOR_1D( np, i, { - amrex_deposit_cic(pstruct[i], ncomp, rhoarr, plo, dxi); + auto p = make_particle{}(ptd,i); + amrex_deposit_cic(p, ncomp, rhoarr, plo, dxi); }); } else { AMREX_HOST_DEVICE_FOR_1D( np, i, { - amrex_deposit_particle_dx_cic(pstruct[i], ncomp, rhoarr, plo, dxi, pdxi); + auto p = make_particle{}(ptd,i); + amrex_deposit_particle_dx_cic(p, ncomp, rhoarr, plo, dxi, pdxi); }); } @@ -2229,10 +2380,10 @@ AssignCellDensitySingleLevel (int rho_index, } } -template class Allocator> void -ParticleContainer::Interpolate (Vector >& mesh_data, +ParticleContainer_impl::Interpolate (Vector >& mesh_data, int lev_min, int lev_max) { BL_PROFILE("ParticleContainer::Interpolate()"); @@ -2241,10 +2392,10 @@ ParticleContainer::In } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InterpolateSingleLevel (MultiFab& mesh_data, int lev) { BL_PROFILE("ParticleContainer::InterpolateSingleLevel()"); @@ -2256,7 +2407,7 @@ InterpolateSingleLevel (MultiFab& mesh_data, int lev) const auto plo = gm.ProbLoArray(); const auto dxi = gm.InvCellSizeArray(); - using ParIter = ParIter; + using ParIter = ParIter_impl; #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) @@ -2264,7 +2415,7 @@ InterpolateSingleLevel (MultiFab& mesh_data, int lev) for (ParIter pti(*this, lev); pti.isValid(); ++pti) { auto& particles = pti.GetArrayOfStructs(); - auto pstruct = particles().data(); + auto ptd = pti.GetParticleTile().getParticleTileData(); FArrayBox& fab = mesh_data[pti]; const auto fabarr = fab.array(); const Long np = particles.numParticles(); @@ -2272,15 +2423,16 @@ InterpolateSingleLevel (MultiFab& mesh_data, int lev) int nComp = fab.nComp(); AMREX_FOR_1D( np, i, { - amrex_interpolate_cic(pstruct[i], nComp, fabarr, plo, dxi); + auto p = make_particle{}(ptd,i); + amrex_interpolate_cic(p, nComp, fabarr, plo, dxi); }); } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: ResizeRuntimeRealComp (int new_size, bool communicate) { int old_size = m_num_runtime_real; @@ -2303,10 +2455,10 @@ ResizeRuntimeRealComp (int new_size, bool communicate) } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: ResizeRuntimeIntComp (int new_size, bool communicate) { int old_size = m_num_runtime_int; diff --git a/Src/Particle/AMReX_ParticleIO.H b/Src/Particle/AMReX_ParticleIO.H index 4ee6d9d78be..a9849ca8a3c 100644 --- a/Src/Particle/AMReX_ParticleIO.H +++ b/Src/Particle/AMReX_ParticleIO.H @@ -4,10 +4,10 @@ #include -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WriteParticleRealData (void* data, size_t size, std::ostream& os) const { if (sizeof(typename ParticleType::RealType) == 4) { @@ -18,10 +18,10 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::ReadParticleRealData (void* data, size_t size, std::istream& is) { if (sizeof(typename ParticleType::RealType) == 4) { @@ -32,10 +32,10 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::Checkpoint (const std::string& dir, const std::string& name, bool /*is_checkpoint*/, const Vector& real_comp_names, @@ -83,10 +83,10 @@ ParticleContainer }, true); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name) const { Vector write_real_comp; @@ -117,10 +117,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names, const Vector& int_comp_names) const @@ -143,10 +143,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names) const { @@ -175,10 +175,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& write_real_comp, @@ -211,10 +211,10 @@ ParticleContainer }); } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: WritePlotFile (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -232,11 +232,11 @@ WritePlotFile (const std::string& dir, const std::string& name, }); } -template class Allocator> template &>::value>::type*> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, F&& f) const { Vector write_real_comp; @@ -264,11 +264,11 @@ ParticleContainer std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names, const Vector& int_comp_names, F&& f) const @@ -288,11 +288,11 @@ ParticleContainer std::forward(f)); } -template class Allocator> template >::value>::type*> void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names, F&& f) const { @@ -318,11 +318,11 @@ ParticleContainer std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& write_real_comp, @@ -352,11 +352,11 @@ ParticleContainer std::forward(f)); } -template class Allocator> template void -ParticleContainer:: +ParticleContainer_impl:: WritePlotFile (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -372,11 +372,11 @@ WritePlotFile (const std::string& dir, const std::string& name, std::forward(f)); } -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::WriteBinaryParticleData (const std::string& dir, const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, @@ -397,10 +397,10 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointPre () { if( ! usePrePost) { @@ -454,10 +454,10 @@ ParticleContainer } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::CheckpointPost () { if( ! usePrePost) { @@ -510,30 +510,30 @@ ParticleContainer } } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFilePre () { CheckpointPre(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WritePlotFilePost () { CheckpointPost(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WriteParticles (int lev, std::ofstream& ofs, int fnum, Vector& which, Vector& count, Vector& where, const Vector& write_real_comp, @@ -585,19 +585,19 @@ ParticleContainer } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::Restart (const std::string& dir, const std::string& file, bool /*is_checkpoint*/) { Restart(dir, file); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::Restart (const std::string& dir, const std::string& file) { BL_PROFILE("ParticleContainer::Restart()"); @@ -891,11 +891,11 @@ ParticleContainer } // Read a batch of particles from the checkpoint file -template class Allocator> template void -ParticleContainer +ParticleContainer_impl ::ReadParticles (int cnt, int grd, int lev, std::ifstream& ifs, int finest_level_in_file, bool convert_ids) { @@ -1028,10 +1028,10 @@ ParticleContainer Gpu::streamSynchronize(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::WriteAsciiFile (const std::string& filename) { BL_PROFILE("ParticleContainer::WriteAsciiFile()"); @@ -1119,7 +1119,7 @@ ParticleContainer for (int lev = 0; lev < m_particles.size(); lev++) { auto& pmap = m_particles[lev]; for (const auto& kv : pmap) { - ParticleTile pinned_ptile; pinned_ptile.define(NumRuntimeRealComps(), NumRuntimeIntComps()); pinned_ptile.resize(kv.second.numParticles()); diff --git a/Src/Particle/AMReX_ParticleInit.H b/Src/Particle/AMReX_ParticleInit.H index 5e536db03a9..dbb354800ef 100644 --- a/Src/Particle/AMReX_ParticleInit.H +++ b/Src/Particle/AMReX_ParticleInit.H @@ -30,10 +30,10 @@ across the domain so that you only need to specify a sub-volume of them. By default particles are not replicated. */ -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::InitFromAsciiFile (const std::string& file, int extradata, const IntVect* Nrep) { BL_PROFILE("ParticleContainer::InitFromAsciiFile()"); @@ -233,7 +233,7 @@ ParticleContainer if (m_verbose) { amrex::AllPrint() << "BAD REPLICATED PARTICLE ID WOULD BE " << ParticleType::NextID() << "\n"; } - amrex::Abort("ParticleContainer::InitFromAsciiFile(): invalid replicated particle"); + amrex::Abort("ParticleContainer_impl::InitFromAsciiFile(): invalid replicated particle"); } } @@ -470,10 +470,10 @@ ParticleContainer // Note that there is nothing separating all these values. // They're packed into the binary file like sardines. // -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InitFromBinaryFile (const std::string& file, int extradata) { @@ -614,22 +614,22 @@ InitFromBinaryFile (const std::string& file, // NP MUST be positive! // if (NP <= 0) - amrex::Abort("ParticleContainer::InitFromBinaryFile(): NP <= 0"); + amrex::Abort("ParticleContainer_impl::InitFromBinaryFile(): NP <= 0"); // // DM must equal AMREX_SPACEDIM. // if (DM != AMREX_SPACEDIM) - amrex::Abort("ParticleContainer::InitFromBinaryFile(): DM != AMREX_SPACEDIM"); + amrex::Abort("ParticleContainer_impl::InitFromBinaryFile(): DM != AMREX_SPACEDIM"); // // NX MUST be in [0,N]. // if (NX < 0 || NX > NStructReal) - amrex::Abort("ParticleContainer::InitFromBinaryFile(): NX < 0 || NX > N"); + amrex::Abort("ParticleContainer_impl::InitFromBinaryFile(): NX < 0 || NX > N"); // // Can't ask for more data than exists in the file! // if (extradata > NX) - amrex::Abort("ParticleContainer::InitFromBinaryFile(): extradata > NX"); + amrex::Abort("ParticleContainer_impl::InitFromBinaryFile(): extradata > NX"); // // Figure out whether we're dealing with floats or doubles. // @@ -813,7 +813,7 @@ InitFromBinaryFile (const std::string& file, << p.pos(2)) << "\n"; } - amrex::Abort("ParticleContainer::InitFromBinaryFile(): invalid particle"); + amrex::Abort("ParticleContainer_impl::InitFromBinaryFile(): invalid particle"); } } @@ -905,10 +905,10 @@ InitFromBinaryFile (const std::string& file, // one file name per line. // -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InitFromBinaryMetaFile (const std::string& metafile, int extradata) { @@ -943,10 +943,10 @@ InitFromBinaryMetaFile (const std::string& metafile, } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InitRandom (Long icount, ULong iseed, const ParticleInitData& pdata, @@ -1166,7 +1166,7 @@ InitRandom (Long icount, // locate the particle if (!Where(p, pld)) { - amrex::Abort("ParticleContainer::InitRandom(): invalid particle"); + amrex::Abort("ParticleContainer_impl::InitRandom(): invalid particle"); } AMREX_ASSERT(pld.m_lev >= 0 && pld.m_lev <= finestLevel()); std::pair ind(pld.m_grid, pld.m_tile); @@ -1227,16 +1227,16 @@ InitRandom (Long icount, ParallelDescriptor::ReduceRealMax(stoptime,IOProc); - amrex::Print() << "ParticleContainer::InitRandom() time: " << stoptime << '\n'; + amrex::Print() << "ParticleContainer_impl::InitRandom() time: " << stoptime << '\n'; } Gpu::streamSynchronize(); } -template class Allocator> void -ParticleContainer +ParticleContainer_impl ::InitRandomPerBox (Long icount_per_box, ULong iseed, const ParticleInitData& pdata) @@ -1327,14 +1327,14 @@ ParticleContainer ParallelDescriptor::ReduceRealMax(stoptime,IOProc); - amrex::Print() << "ParticleContainer::InitRandomPerBox() time: " << stoptime << '\n'; + amrex::Print() << "ParticleContainer_impl::InitRandomPerBox() time: " << stoptime << '\n'; } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InitOnePerCell (Real x_off, Real y_off, Real z_off, const ParticleInitData& pdata) { amrex::ignore_unused(y_off,z_off); @@ -1361,7 +1361,7 @@ InitOnePerCell (Real x_off, Real y_off, Real z_off, const ParticleInitData& pdat Box grid = ParticleBoxArray(0)[mfi.index()]; auto ind = std::make_pair(mfi.index(), mfi.LocalTileIndex()); RealBox grid_box (grid,dx,geom.ProbLo()); - ParticleTile ptile_tmp; + ParticleTile ptile_tmp; for (IntVect beg = grid.smallEnd(), end=grid.bigEnd(), cell = grid.smallEnd(); cell <= end; grid.next(cell)) { // the real struct data @@ -1411,14 +1411,14 @@ InitOnePerCell (Real x_off, Real y_off, Real z_off, const ParticleInitData& pdat ParallelDescriptor::ReduceRealMax(stoptime,IOProc); - amrex::Print() << "ParticleContainer::InitOnePerCell() time: " << stoptime << '\n'; + amrex::Print() << "ParticleContainer_impl::InitOnePerCell() time: " << stoptime << '\n'; } } -template class Allocator> void -ParticleContainer:: +ParticleContainer_impl:: InitNRandomPerCell (int n_per_cell, const ParticleInitData& pdata) { BL_PROFILE("ParticleContainer::InitNRandomPerCell()"); @@ -1491,7 +1491,7 @@ InitNRandomPerCell (int n_per_cell, const ParticleInitData& pdata) // locate the particle if (!Where(p, pld)) { - amrex::Abort("ParticleContainer::InitNRandomPerCell(): invalid particle"); + amrex::Abort("ParticleContainer_impl::InitNRandomPerCell(): invalid particle"); } AMREX_ASSERT(pld.m_lev >= 0 && pld.m_lev <= finestLevel()); std::pair ind(pld.m_grid, pld.m_tile); @@ -1552,7 +1552,7 @@ InitNRandomPerCell (int n_per_cell, const ParticleInitData& pdata) ParallelDescriptor::ReduceRealMax(stoptime,IOProc); - amrex::Print() << "ParticleContainer::InitNRandomPerCell() time: " << stoptime << '\n'; + amrex::Print() << "ParticleContainer_impl::InitNRandomPerCell() time: " << stoptime << '\n'; } Gpu::streamSynchronize(); diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H index a0a762862d4..6d1c3b03822 100644 --- a/Src/Particle/AMReX_ParticleTile.H +++ b/Src/Particle/AMReX_ParticleTile.H @@ -7,29 +7,131 @@ #include #include #include +#include +#include #include namespace amrex { -template +// Forward Declaration +template +struct ConstSoAParticle; +template +struct SoAParticle; + +template +struct ConstParticleTileData; + +template struct ParticleTileData { static constexpr int NAR = NArrayReal; static constexpr int NAI = NArrayInt; - using ParticleType = Particle; - using SuperParticleType = Particle; + + using ParticleType = T_ParticleType; + using Self = ParticleTileData; + + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; + + using SuperParticleType = Particle; Long m_size; + ParticleType* AMREX_RESTRICT m_aos; - GpuArray m_rdata; - GpuArray m_idata; + + GpuArray m_rdata; + GpuArray m_idata; int m_num_runtime_real; int m_num_runtime_int; ParticleReal* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_rdata; int* AMREX_RESTRICT * AMREX_RESTRICT m_runtime_idata; + // AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + // ParticleReal pos (const int dir, const int index) const & + // { + // if constexpr(!ParticleType::is_soa_particle) { + // return this->m_aos[index].pos(dir); + // } else { + // return this->m_rdata[dir][index]; + // } + // } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + ParticleReal& pos (const int dir, const int index) const & + { + if constexpr(!ParticleType::is_soa_particle) { + return this->m_aos[index].pos(dir); + } + return this->m_rdata[dir][index]; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto id (const int index) const & + { + if constexpr(!ParticleType::is_soa_particle) { + return this->m_aos[index].id(); + } else { + return this->m_idata[0][index]; + } + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto& id (const int index) & + { + if constexpr(!ParticleType::is_soa_particle) { + return this->m_aos[index].id(); + } else { + return this->m_idata[0][index]; + } + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto cpu (const int index) const & + { + if constexpr(!ParticleType::is_soa_particle) { + return this->m_aos[index].cpu(); + } else { + return this->m_idata[1][index]; + } + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto& cpu (const int index) & + { + if constexpr(!ParticleType::is_soa_particle) { + return this->m_aos[index].cpu(); + } else { + return this->m_idata[1][index]; + } + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto* rdata (const int attribute_index) + { + return this->m_rdata[attribute_index]; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto const * rdata (const int attribute_index) const + { + return this->m_rdata[attribute_index]; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto* idata (const int attribute_index) + { + return this->m_idata[attribute_index]; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto const * idata (const int attribute_index) const + { + return this->m_idata[attribute_index]; + } + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void packParticleData (char* buffer, int src_index, std::size_t dst_offset, const int* comm_real, const int * comm_int) const noexcept @@ -39,7 +141,7 @@ struct ParticleTileData memcpy(dst, m_aos + src_index, sizeof(ParticleType)); dst += sizeof(ParticleType); int array_start_index = AMREX_SPACEDIM + NStructReal; - for (int i = 0; i < NArrayReal; ++i) + for (int i = 0; i < NAR; ++i) { if (comm_real[array_start_index + i]) { @@ -47,7 +149,7 @@ struct ParticleTileData dst += sizeof(ParticleReal); } } - int runtime_start_index = AMREX_SPACEDIM + NStructReal + NArrayReal; + int runtime_start_index = AMREX_SPACEDIM + NStructReal + NAR; for (int i = 0; i < m_num_runtime_real; ++i) { if (comm_real[runtime_start_index + i]) @@ -57,7 +159,7 @@ struct ParticleTileData } } array_start_index = 2 + NStructInt; - for (int i = 0; i < NArrayInt; ++i) + for (int i = 0; i < NAI; ++i) { if (comm_int[array_start_index + i]) { @@ -65,7 +167,7 @@ struct ParticleTileData dst += sizeof(int); } } - runtime_start_index = 2 + NStructInt + NArrayInt; + runtime_start_index = 2 + NStructInt + NAI; for (int i = 0; i < m_num_runtime_int; ++i) { if (comm_int[runtime_start_index + i]) @@ -85,7 +187,7 @@ struct ParticleTileData memcpy(m_aos + dst_index, src, sizeof(ParticleType)); src += sizeof(ParticleType); int array_start_index = AMREX_SPACEDIM + NStructReal; - for (int i = 0; i < NArrayReal; ++i) + for (int i = 0; i < NAR; ++i) { if (comm_real[array_start_index + i]) { @@ -93,7 +195,7 @@ struct ParticleTileData src += sizeof(ParticleReal); } } - int runtime_start_index = AMREX_SPACEDIM + NStructReal + NArrayReal; + int runtime_start_index = AMREX_SPACEDIM + NStructReal + NAR; for (int i = 0; i < m_num_runtime_real; ++i) { if (comm_real[runtime_start_index + i]) @@ -103,7 +205,7 @@ struct ParticleTileData } } array_start_index = 2 + NStructInt; - for (int i = 0; i < NArrayInt; ++i) + for (int i = 0; i < NAI; ++i) { if (comm_int[array_start_index + i]) { @@ -111,7 +213,7 @@ struct ParticleTileData src += sizeof(int); } } - runtime_start_index = 2 + NStructInt + NArrayInt; + runtime_start_index = 2 + NStructInt + NAI; for (int i = 0; i < m_num_runtime_int; ++i) { if (comm_int[runtime_start_index + i]) @@ -122,6 +224,7 @@ struct ParticleTileData } } + template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE SuperParticleType getSuperParticle (int index) const noexcept { @@ -131,13 +234,34 @@ struct ParticleTileData sp.pos(i) = m_aos[index].pos(i); for (int i = 0; i < NStructReal; ++i) sp.rdata(i) = m_aos[index].rdata(i); - for (int i = 0; i < NArrayReal; ++i) + for (int i = 0; i < NAR; ++i) sp.rdata(NStructReal+i) = m_rdata[i][index]; sp.id() = m_aos[index].id(); sp.cpu() = m_aos[index].cpu(); for (int i = 0; i < NStructInt; ++i) sp.idata(i) = m_aos[index].idata(i); - for (int i = 0; i < NArrayInt; ++i) + for (int i = 0; i < NAI; ++i) + sp.idata(NStructInt+i) = m_idata[i][index]; + return sp; + } + + template ::type = 0> + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + SuperParticleType getSuperParticle (int index) const noexcept + { + AMREX_ASSERT(index < m_size); + SuperParticleType sp; + for (int i = 0; i < AMREX_SPACEDIM; ++i) + sp.pos(i) = m_rdata[i][index]; + for (int i = 0; i < NStructReal; ++i) + sp.rdata(i) = m_rdata[i][index]; + for (int i = 0; i < NAR; ++i) + sp.rdata(NStructReal+i) = m_rdata[i][index]; + sp.id() = m_idata[0][index]; + sp.cpu() = m_idata[1][index]; + for (int i = 0; i < NStructInt; ++i) + sp.idata(i) = m_idata[i][index]; + for (int i = 0; i < NAI; ++i) sp.idata(NStructInt+i) = m_idata[i][index]; return sp; } @@ -149,29 +273,235 @@ struct ParticleTileData m_aos[index].pos(i) = sp.pos(i); for (int i = 0; i < NStructReal; ++i) m_aos[index].rdata(i) = sp.rdata(i); - for (int i = 0; i < NArrayReal; ++i) + for (int i = 0; i < NAR; ++i) m_rdata[i][index] = sp.rdata(NStructReal+i); m_aos[index].id() = sp.id(); m_aos[index].cpu() = sp.cpu(); for (int i = 0; i < NStructInt; ++i) m_aos[index].idata(i) = sp.idata(i); - for (int i = 0; i < NArrayInt; ++i) + for (int i = 0; i < NAI; ++i) m_idata[i][index] = sp.idata(NStructInt+i); } }; -template +// SOA Particle Structure +template +struct ConstSoAParticle : SoAParticleBase +{ + static constexpr int NArrayReal = T_NArrayReal; + static constexpr int NArrayInt = T_NArrayInt; + using StorageParticleType = SoAParticleBase; + using ConstPTD = ConstParticleTileData; + static constexpr bool is_soa_particle = true; + static constexpr bool is_constsoa_particle = true; + + using RealType = ParticleReal; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + ConstSoAParticle(ConstPTD const& ptd, long i) : + m_constparticle_tile_data(ptd), m_index(i) + { + } + + //static Long the_next_id; + + //functions to get id and cpu in the SOA data + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + int cpu () const { return this->m_constparticle_tile_data.m_idata[1][m_index]; } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + int id () const { return this->m_constparticle_tile_data.m_idata[0][m_index]; } + + //functions to get positions of the particle in the SOA data + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_constparticle_tile_data->m_rdata[0][m_index], this->m_constparticle_tile_data.m_rdata[1][m_index], this->m_constparticle_tile_data->m_rdata[2][m_index]));} + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const RealType& pos (int position_index) const & + { + AMREX_ASSERT(position_index < AMREX_SPACEDIM); + return this->m_constparticle_tile_data.m_rdata[position_index][m_index]; + } + + static Long NextID (); + + /** + * \brief This version can only be used inside omp critical. + */ + static Long UnprotectedNextID (); + + /** + * \brief Reset on restart. + * + * \param nextid + */ + static void NextID (Long nextid); + + private : + + static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable"); + + ConstPTD m_constparticle_tile_data; + int m_index; +}; + +template +struct SoAParticle : SoAParticleBase +{ + static constexpr int NArrayReal = T_NArrayReal; + static constexpr int NArrayInt = T_NArrayInt; + using StorageParticleType = SoAParticleBase; + using PTD = ParticleTileData; + static constexpr bool is_soa_particle = true; + static constexpr bool is_constsoa_particle = false; + + using ConstType = ConstSoAParticle; + using RealType = ParticleReal; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + SoAParticle(PTD const& ptd, long i) : + m_particle_tile_data(ptd), m_index(i) + { + } + + static Long the_next_id; + + //functions to get id and cpu in the SOA data + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + int& cpu () & { return this->m_particle_tile_data.m_idata[1][m_index]; } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + int& id () & { return this->m_particle_tile_data.m_idata[0][m_index]; } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const int& cpu () const & { return this->m_particle_tile_data.m_idata[1][m_index]; } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + const int& id () const & { return this->m_particle_tile_data.m_idata[0][m_index]; } + + //functions to get positions of the particle in the SOA data + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RealVect pos () const & {return RealVect(AMREX_D_DECL(this->m_particle_tile_data->m_rdata[0][m_index], this->m_particle_tile_data.m_rdata[1][m_index], this->m_particle_tile_data->m_rdata[2][m_index]));} + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RealType& pos (int position_index) & + { + AMREX_ASSERT(position_index < AMREX_SPACEDIM); + return this->m_particle_tile_data.m_rdata[position_index][m_index]; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + RealType pos (int position_index) const & + { + AMREX_ASSERT(position_index < AMREX_SPACEDIM); + return this->m_particle_tile_data.m_rdata[position_index][m_index]; + } + + static Long NextID (); + + /** + * \brief This version can only be used inside omp critical. + */ + static Long UnprotectedNextID (); + + /** + * \brief Reset on restart. + * + * \param nextid + */ + static void NextID (Long nextid); + +private : + + static_assert(std::is_trivially_copyable>(), "ParticleTileData is not trivially copyable"); + + PTD m_particle_tile_data; + int m_index; +}; + +//template Long ConstSoAParticle::the_next_id = 1; +template Long SoAParticle::the_next_id = 1; + +template +Long +SoAParticle::NextID () +{ + Long next; +// we should be able to test on _OPENMP < 201107 for capture (version 3.1) +// but we must work around a bug in gcc < 4.9 +#if defined(AMREX_USE_OMP) && defined(_OPENMP) && _OPENMP < 201307 +#pragma omp critical (amrex_particle_nextid) +#elif defined(AMREX_USE_OMP) +#pragma omp atomic capture +#endif + next = the_next_id++; + + if (next > LastParticleID) + amrex::Abort("SoAParticle::NextID() -- too many particles"); + + return next; +} + +template +Long +SoAParticle::UnprotectedNextID () +{ + Long next = the_next_id++; + if (next > LastParticleID) + amrex::Abort("SoAParticle::NextID() -- too many particles"); + return next; +} + +template +void +SoAParticle::NextID (Long nextid) +{ + the_next_id = nextid; +} + +template struct ConstParticleTileData { static constexpr int NAR = NArrayReal; static constexpr int NAI = NArrayInt; - using ParticleType = Particle; + using ParticleType = T_ParticleType; + + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; + using SuperParticleType = Particle; Long m_size; const ParticleType* AMREX_RESTRICT m_aos; + GpuArray m_rdata; - GpuArray m_idata; + GpuArray m_idata; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto id (const int index) const & + { + if constexpr(!ParticleType::is_soa_particle) { + return this->m_aos[index].id(); + } else { + return this->m_idata[0][index]; + } + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto const* rdata (const int attribute_index) const + { + return this->m_rdata[attribute_index]; + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto const* idata (const int attribute_index) const + { + return this->m_idata[attribute_index]; + } int m_num_runtime_real; int m_num_runtime_int; @@ -224,6 +554,7 @@ struct ConstParticleTileData } } + template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE SuperParticleType getSuperParticle (int index) const noexcept { @@ -233,40 +564,68 @@ struct ConstParticleTileData sp.pos(i) = m_aos[index].pos(i); for (int i = 0; i < NStructReal; ++i) sp.rdata(i) = m_aos[index].rdata(i); - for (int i = 0; i < NArrayReal; ++i) - sp.rdata(NStructReal+i) = m_rdata[i][index]; + if constexpr(NArrayReal > 0) + for (int i = 0; i < NArrayReal; ++i) + sp.rdata(NStructReal+i) = m_rdata[i][index]; sp.id() = m_aos[index].id(); sp.cpu() = m_aos[index].cpu(); for (int i = 0; i < NStructInt; ++i) sp.idata(i) = m_aos[index].idata(i); - for (int i = 0; i < NArrayInt; ++i) + if constexpr(NArrayInt > 0) + for (int i = 0; i < NArrayInt; ++i) + sp.idata(NStructInt+i) = m_idata[i][index]; + return sp; + } + + template ::type = 0> + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + SuperParticleType getSuperParticle (int index) const noexcept + { + AMREX_ASSERT(index < m_size); + SuperParticleType sp; + for (int i = 0; i < AMREX_SPACEDIM; ++i) + sp.pos(i) = m_rdata[i][index]; + for (int i = 0; i < NStructReal; ++i) + sp.rdata(i) = m_rdata[i][index]; + for (int i = 0; i < NAR; ++i) + sp.rdata(NStructReal+i) = m_rdata[i][index]; + sp.id() = m_idata[0][index]; + sp.cpu() = m_idata[1][index]; + for (int i = 0; i < NStructInt; ++i) + sp.idata(i) = m_idata[i][index]; + for (int i = 0; i < NAI; ++i) sp.idata(NStructInt+i) = m_idata[i][index]; return sp; } }; -template class Allocator=DefaultAllocator> struct ParticleTile { template using AllocatorType = Allocator; - using ParticleType = Particle; + using ParticleType = T_ParticleType; static constexpr int NAR = NArrayReal; static constexpr int NAI = NArrayInt; + using RealType = typename ParticleType::RealType; + + static constexpr int NStructReal = ParticleType::NReal; + static constexpr int NStructInt = ParticleType::NInt; using SuperParticleType = Particle; - using AoS = ArrayOfStructs; - using ParticleVector = typename AoS::ParticleVector; + using AoS = ArrayOfStructs; + //using ParticleVector = typename AoS::ParticleVector; using SoA = StructOfArrays; using RealVector = typename SoA::RealVector; using IntVector = typename SoA::IntVector; + using StorageParticleType = typename ParticleType::StorageParticleType; - using ParticleTileDataType = ParticleTileData; - using ConstParticleTileDataType = ConstParticleTileData; + using ParticleTileDataType = ParticleTileData; + using ConstParticleTileDataType = ConstParticleTileData; ParticleTile () : m_defined(false) @@ -282,6 +641,105 @@ struct ParticleTile m_runtime_i_cptrs.resize(a_num_runtime_int); } + // Get cpu data + + // AoS + template ::type = 0> + ParticleCPUWrapper cpu (int index) & { + ParticleType p(this->getParticleTileData(), index); + return p.cpu(); + } + + // const + template ::type = 0> + ConstParticleCPUWrapper cpu (int index) const & { + using ConstParticleType = typename ParticleType::ConstType; + ConstParticleType p(this->getConstParticleTileData(), index); + return p.cpu(); + } + + // SoA + template ::type = 0> + ParticleCPUWrapper cpu (int index) & { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.cpu(); + } + + // const + + template ::type = 0> + ConstParticleCPUWrapper cpu (int index) const & { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.cpu(); + } + + // Get id data + + // AoS + template ::type = 0> + ParticleIDWrapper id (int index) & { + ParticleType p(this->getParticleTileData(), index); + return p.id(); + } + + // const + template ::type = 0> + ConstParticleIDWrapper id (int index) const & { + using ConstParticleType = typename ParticleType::ConstType; + ConstParticleType p(this->getConstParticleTileData(), index); + return p.id(); + } + + // SoA + template ::type = 0> + ParticleIDWrapper id (int index) & { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.id(); + } + + // const + template ::type = 0> + ConstParticleIDWrapper id (int index) const & { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.id(); + } + + // Get positions data + + template ::type = 0> + RealType& pos (int index, int position_index) & { + static_assert(NArrayReal == T::PTD::NAR, "ParticleTile mismatch in R"); + static_assert(NArrayInt == T::PTD::NAI, "ParticleTile mismatch in I"); + static_assert(0 == T::StorageParticleType::NReal, "ParticleTile 2 mismatch in R"); + static_assert(0 == T::StorageParticleType::NInt, "ParticleTile 2 mismatch in I"); + ParticleTileDataType x=this->getParticleTileData(); + ParticleType p(x, index); + return p.pos(position_index); + } + + template ::type = 0> + RealType pos (int index, int position_index) const & + { + using ConstParticleType = typename ParticleType::ConstType; + ConstParticleType p(this->getConstParticleTileData(), index); + return p.pos(position_index); + } + + template ::type = 0> + RealType& pos (int index, int position_index) & { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.pos(position_index); + } + + + template ::type = 0> + RealType pos (int index, int position_index) const & + { + ParticleType& p = m_aos_tile().dataPtr()[index]; + return p.pos(position_index); + } + + AoS& GetArrayOfStructs () { return m_aos_tile; } const AoS& GetArrayOfStructs () const { return m_aos_tile; } @@ -295,32 +753,52 @@ struct ParticleTile * */ + template ::type = 0> std::size_t size () const { return m_aos_tile.size(); } + template ::type = 0> + std::size_t size () const { return m_soa_tile.size(); } + /** * \brief Returns the number of real particles (excluding neighbors) * */ + template ::type = 0> int numParticles () const { return m_aos_tile.numParticles(); } + template ::type = 0> + int numParticles () const { return m_soa_tile.numParticles(); } + /** * \brief Returns the number of real particles (excluding neighbors) * */ + template ::type = 0> int numRealParticles () const { return m_aos_tile.numRealParticles(); } + template ::type = 0> + int numRealParticles () const { return m_soa_tile.numRealParticles(); } + /** * \brief Returns the number of neighbor particles (excluding reals) * */ + template ::type = 0> int numNeighborParticles () const { return m_aos_tile.numNeighborParticles(); } + template ::type = 0> + int numNeighborParticles () const { return m_soa_tile.numNeighborParticles(); } + /** * \brief Returns the total number of particles, real and neighbor * */ + template ::type = 0> int numTotalParticles () const { return m_aos_tile.numTotalParticles() ; } + template ::type = 0> + int numTotalParticles () const { return m_soa_tile.numTotalParticles() ; } + void setNumNeighbors (int num_neighbors) { m_soa_tile.setNumNeighbors(num_neighbors); @@ -519,7 +997,7 @@ struct ParticleTile return nbytes; } - void swap (ParticleTile& other) + void swap (ParticleTile& other) { m_aos_tile().swap(other.GetArrayOfStructs()()); for (int j = 0; j < NumRealComps(); ++j) @@ -571,10 +1049,12 @@ struct ParticleTile ParticleTileDataType ptd; ptd.m_aos = m_aos_tile().dataPtr(); - for (int i = 0; i < NArrayReal; ++i) - ptd.m_rdata[i] = m_soa_tile.GetRealData(i).dataPtr(); - for (int i = 0; i < NArrayInt; ++i) - ptd.m_idata[i] = m_soa_tile.GetIntData(i).dataPtr(); + if constexpr(NArrayReal > 0) + for (int i = 0; i < NArrayReal; ++i) + ptd.m_rdata[i] = m_soa_tile.GetRealData(i).dataPtr(); + if constexpr(NArrayInt > 0) + for (int i = 0; i < NArrayInt; ++i) + ptd.m_idata[i] = m_soa_tile.GetIntData(i).dataPtr(); ptd.m_size = size(); ptd.m_num_runtime_real = m_runtime_r_ptrs.size(); ptd.m_num_runtime_int = m_runtime_i_ptrs.size(); @@ -626,10 +1106,12 @@ struct ParticleTile ConstParticleTileDataType ptd; ptd.m_aos = m_aos_tile().dataPtr(); - for (int i = 0; i < NArrayReal; ++i) - ptd.m_rdata[i] = m_soa_tile.GetRealData(i).dataPtr(); - for (int i = 0; i < NArrayInt; ++i) - ptd.m_idata[i] = m_soa_tile.GetIntData(i).dataPtr(); + if constexpr(NArrayReal > 0) + for (int i = 0; i < NArrayReal; ++i) + ptd.m_rdata[i] = m_soa_tile.GetRealData(i).dataPtr(); + if constexpr(NArrayInt > 0) + for (int i = 0; i < NArrayInt; ++i) + ptd.m_idata[i] = m_soa_tile.GetIntData(i).dataPtr(); ptd.m_size = size(); ptd.m_num_runtime_real = m_runtime_r_cptrs.size(); ptd.m_num_runtime_int = m_runtime_i_cptrs.size(); @@ -659,6 +1141,6 @@ private: mutable amrex::PODVector >m_runtime_i_cptrs; }; -} // namespace amrex; +} // namespace amrex #endif // AMREX_PARTICLETILE_H_ diff --git a/Src/Particle/AMReX_ParticleTransformation.H b/Src/Particle/AMReX_ParticleTransformation.H index 8fe57749c4a..af932788734 100644 --- a/Src/Particle/AMReX_ParticleTransformation.H +++ b/Src/Particle/AMReX_ParticleTransformation.H @@ -26,22 +26,24 @@ namespace amrex * \param dst_i the index in the destination to write to * */ -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void copyParticle (const ParticleTileData& dst, - const ConstParticleTileData& src, +void copyParticle (const ParticleTileData& dst, + const ConstParticleTileData& src, int src_i, int dst_i) noexcept { AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real); AMREX_ASSERT(dst.m_num_runtime_int == src.m_num_runtime_int ); dst.m_aos[dst_i] = src.m_aos[src_i]; - for (int j = 0; j < NAR; ++j) - dst.m_rdata[j][dst_i] = src.m_rdata[j][src_i]; + if constexpr(NAR > 0) + for (int j = 0; j < NAR; ++j) + dst.m_rdata[j][dst_i] = src.m_rdata[j][src_i]; for (int j = 0; j < dst.m_num_runtime_real; ++j) dst.m_runtime_rdata[j][dst_i] = src.m_runtime_rdata[j][src_i]; - for (int j = 0; j < NAI; ++j) - dst.m_idata[j][dst_i] = src.m_idata[j][src_i]; + if constexpr(NAI > 0) + for (int j = 0; j < NAI; ++j) + dst.m_idata[j][dst_i] = src.m_idata[j][src_i]; for (int j = 0; j < dst.m_num_runtime_int; ++j) dst.m_runtime_idata[j][dst_i] = src.m_runtime_idata[j][src_i]; } @@ -60,10 +62,10 @@ void copyParticle (const ParticleTileData& dst, * \param dst_i the index in the destination to write to * */ -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void copyParticle (const ParticleTileData& dst, - const ParticleTileData& src, +void copyParticle (const ParticleTileData& dst, + const ParticleTileData& src, int src_i, int dst_i) noexcept { AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real); @@ -94,10 +96,10 @@ void copyParticle (const ParticleTileData& dst, * \param dst_i the index in the destination to write to * */ -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void swapParticle (const ParticleTileData& dst, - const ParticleTileData& src, +void swapParticle (const ParticleTileData& dst, + const ParticleTileData& src, int src_i, int dst_i) noexcept { AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real); diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index db9dfef8f21..d9d26e8bacb 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -56,10 +57,10 @@ auto call_f (F const& f, SrcData const& src, N i, amrex::RandomEngine const&) no // The next several functions are used by ParticleReduce // Lambda takes a Particle -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i) noexcept -> decltype(f(p.m_aos[i])) { @@ -67,11 +68,11 @@ auto call_f (F const& f, } // Lambda takes a SuperParticle -template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i) noexcept -> decltype(f(p.getSuperParticle(i))) { @@ -79,10 +80,10 @@ auto call_f (F const& f, } // Lambda takes a ConstParticleTileData -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i) noexcept -> decltype(f(p, i)) { @@ -92,10 +93,10 @@ auto call_f (F const& f, // These next several functions are used by ParticleToMesh and MeshToParticle // Lambda takes a Particle -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const& plo, GpuArray const& dxi) noexcept @@ -105,10 +106,10 @@ auto call_f (F const& f, } // Lambda takes a Particle -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const&, GpuArray const&) noexcept @@ -118,10 +119,10 @@ auto call_f (F const& f, } // Lambda takes a Particle -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ParticleTileData& p, + const ParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const& plo, GpuArray const& dxi) noexcept @@ -131,10 +132,10 @@ auto call_f (F const& f, } // Lambda takes a Particle -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ParticleTileData& p, + const ParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const&, GpuArray const&) noexcept @@ -144,11 +145,11 @@ auto call_f (F const& f, } // Lambda takes a SuperParticle -template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const& plo, GpuArray const& dxi) noexcept @@ -158,11 +159,11 @@ auto call_f (F const& f, } // Lambda takes a SuperParticle -template ::type = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const&, GpuArray const&) noexcept @@ -172,10 +173,10 @@ auto call_f (F const& f, } // Lambda takes a ConstParticleTileData -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const&, GpuArray const&) noexcept @@ -185,10 +186,10 @@ auto call_f (F const& f, } // Lambda takes a ConstParticleTileData -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ConstParticleTileData& p, + const ConstParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const& plo, GpuArray const& dxi) noexcept @@ -198,10 +199,10 @@ auto call_f (F const& f, } // Lambda takes a ParticleTileData -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ParticleTileData& p, + const ParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const& plo, GpuArray const& dxi) noexcept @@ -211,10 +212,10 @@ auto call_f (F const& f, } // Lambda takes a ParticleTileData -template +template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE auto call_f (F const& f, - const ParticleTileData& p, + const ParticleTileData& p, const int i, Array4 const& fabarr, GpuArray const&, GpuArray const&) noexcept @@ -254,7 +255,8 @@ numParticlesOutOfRange (Iterator const& pti, int nGrow) * \param nGrow the number of grow cells allowed. * */ -template ::value, int> foo = 0> + +template ::value && !Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0> int numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) { @@ -262,8 +264,7 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) const auto& tile = pti.GetParticleTile(); const auto np = tile.numParticles(); - const auto& aos = tile.GetArrayOfStructs(); - const auto pstruct = aos().dataPtr(); + const auto ptd = tile.getConstParticleTileData(); const auto& geom = pti.Geom(pti.GetLevel()); const auto domain = geom.Domain(); @@ -280,7 +281,7 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) reduce_op.eval(np, reduce_data, [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple { - const ParticleType& p = pstruct[i]; + auto p = make_particle{}(ptd,i); if ((p.id() < 0)) return false; IntVect iv( AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])), @@ -293,6 +294,44 @@ numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) return hv; } +template ::value && Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0> +int +numParticlesOutOfRange (Iterator const& pti, IntVect nGrow) +{ + using ParticleType = typename Iterator::ContainerType::ConstParticleType; + + const auto& tile = pti.GetParticleTile(); + const auto tile_data = tile.getConstParticleTileData(); + const auto np = tile.numParticles(); + const auto& geom = pti.Geom(pti.GetLevel()); + + const auto domain = geom.Domain(); + const auto plo = geom.ProbLoArray(); + const auto dxi = geom.InvCellSizeArray(); + + Box box = pti.tilebox(); + box.grow(nGrow); + + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + using ReduceTuple = typename decltype(reduce_data)::Type; + + reduce_op.eval(np, reduce_data, + [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple + { + ParticleType p(tile_data,i); + if ((p.id() < 0)) { return false; } + auto iv = IntVect( + AMREX_D_DECL(int(amrex::Math::floor((p.pos(0)-plo[0])*dxi[0])), + int(amrex::Math::floor((p.pos(1)-plo[1])*dxi[1])), + int(amrex::Math::floor((p.pos(2)-plo[2])*dxi[2])))); + iv += domain.smallEnd(); + return !box.contains(iv); + }); + int hv = amrex::get<0>(reduce_data.value(reduce_op)); + return hv; +} + /** * \brief Returns the number of particles that are more than nGrow cells * from their assigned box. @@ -584,8 +623,8 @@ bool enforcePeriodic (P& p, p.pos(idim) += static_cast(phi[idim] - plo[idim]); } // clamp to avoid precision issues; - if (p.pos(idim) > rhi[idim]) { - p.pos(idim) = rhi[idim]; + if (p.pos(idim) >= rhi[idim]) { + p.pos(idim) = std::nextafter(p.pos(idim), rlo[idim]); } shifted = true; } @@ -608,13 +647,10 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBuff int lev, int gid, int /*tid*/, int lev_min, int lev_max, int nGrow, bool remove_negative) { - auto& aos = ptile.GetArrayOfStructs(); - const int np = aos.numParticles(); - + const int np = ptile.numParticles(); if (np == 0) return 0; auto getPID = pmap.getPIDFunctor(); - auto p_ptr = &(aos[0]); int pid = ParallelContext::MyProcSub(); constexpr int chunk_size = 256*256*256; @@ -640,35 +676,40 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBuff int assigned_grid; int assigned_lev; - auto& p = p_ptr[i+this_offset]; - - if (p.id() < 0 ) + if (src_data.id(i+this_offset) < 0 ) { assigned_grid = -1; assigned_lev = -1; } else { - auto p_prime = p; + amrex::Particle<0> p_prime; + AMREX_D_TERM(p_prime.pos(0) = src_data.pos(0, i+this_offset);, + p_prime.pos(1) = src_data.pos(1, i+this_offset);, + p_prime.pos(2) = src_data.pos(2, i+this_offset);); + enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per); auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow); assigned_grid = amrex::get<0>(tup_prime); assigned_lev = amrex::get<1>(tup_prime); if (assigned_grid >= 0) { - AMREX_D_TERM(p.pos(0) = p_prime.pos(0);, - p.pos(1) = p_prime.pos(1);, - p.pos(2) = p_prime.pos(2);); + AMREX_D_TERM(src_data.pos(0, i+this_offset) = p_prime.pos(0);, + src_data.pos(1, i+this_offset) = p_prime.pos(1);, + src_data.pos(2, i+this_offset) = p_prime.pos(2);); } else if (lev_min > 0) { - auto tup = ploc(p, lev_min, lev_max, nGrow); + AMREX_D_TERM(p_prime.pos(0) = src_data.pos(0, i+this_offset);, + p_prime.pos(1) = src_data.pos(1, i+this_offset);, + p_prime.pos(2) = src_data.pos(2, i+this_offset);); + auto tup = ploc(p_prime, lev_min, lev_max, nGrow); assigned_grid = amrex::get<0>(tup); assigned_lev = amrex::get<1>(tup); } } - if ((remove_negative == false) && (p.id() < 0)) { + if ((remove_negative == false) && (src_data.id(i+this_offset) < 0)) { return true; } diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H index b021b0378b8..7767972998d 100644 --- a/Src/Particle/AMReX_WriteBinaryParticleData.H +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -752,7 +752,7 @@ void WriteBinaryParticleDataAsync (PC const& pc, } // make tmp particle tiles in pinned memory to write - using PinnedPTile = ParticleTile; auto myptiles = std::make_shared,PinnedPTile> > >(); myptiles->resize(pc.finestLevel()+1); diff --git a/Src/Particle/CMakeLists.txt b/Src/Particle/CMakeLists.txt index 5898abf6cdf..48bf23825e4 100644 --- a/Src/Particle/CMakeLists.txt +++ b/Src/Particle/CMakeLists.txt @@ -29,6 +29,7 @@ target_sources( amrex AMReX_StructOfArrays.H AMReX_ArrayOfStructs.H AMReX_ParticleTile.H + AMReX_MakeParticle.H AMReX_NeighborParticlesCPUImpl.H AMReX_NeighborParticlesGPUImpl.H AMReX_ParticleBufferMap.H diff --git a/Src/Particle/Make.package b/Src/Particle/Make.package index c3f3b082fbe..7959368ef8b 100644 --- a/Src/Particle/Make.package +++ b/Src/Particle/Make.package @@ -13,6 +13,7 @@ CEXE_headers += AMReX_ParIter.H CEXE_headers += AMReX_StructOfArrays.H CEXE_headers += AMReX_ArrayOfStructs.H CEXE_headers += AMReX_ParticleTile.H +CEXE_headers += AMReX_MakeParticle.H CEXE_headers += AMReX_NeighborParticles.H CEXE_headers += AMReX_NeighborParticlesI.H diff --git a/Tests/Particles/ParticleTransformations/main.cpp b/Tests/Particles/ParticleTransformations/main.cpp index 09d6f9c947c..a55111f4b78 100644 --- a/Tests/Particles/ParticleTransformations/main.cpp +++ b/Tests/Particles/ParticleTransformations/main.cpp @@ -39,7 +39,7 @@ class TestParticleContainer public: - using ParticleTileType = ParticleTile; + using ParticleTileType = ParticleTile, NAR, NAI>; TestParticleContainer (const amrex::Geometry & a_geom, diff --git a/Tests/Particles/RedistributeSOA/CMakeLists.txt b/Tests/Particles/RedistributeSOA/CMakeLists.txt new file mode 100644 index 00000000000..bca883b4de5 --- /dev/null +++ b/Tests/Particles/RedistributeSOA/CMakeLists.txt @@ -0,0 +1,11 @@ +set(_sources main.cpp) +if (AMReX_CUDA) + set(_input_files inputs.rt.cuda ) +else () + set(_input_files inputs.rt ) +endif () + +setup_test(_sources _input_files NTASKS 2) + +unset(_sources) +unset(_input_files) diff --git a/Tests/Particles/RedistributeSOA/GNUmakefile b/Tests/Particles/RedistributeSOA/GNUmakefile new file mode 100644 index 00000000000..c9f05029f0c --- /dev/null +++ b/Tests/Particles/RedistributeSOA/GNUmakefile @@ -0,0 +1,22 @@ +AMREX_HOME = ../../../ + +DEBUG = FALSE + +DIM = 3 + +COMP = gcc + +USE_MPI = TRUE +USE_OMP = FALSE +USE_CUDA = FALSE + +TINY_PROFILE = TRUE +USE_PARTICLES = TRUE + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package +include $(AMREX_HOME)/Src/Particle/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/Particles/RedistributeSOA/Make.package b/Tests/Particles/RedistributeSOA/Make.package new file mode 100644 index 00000000000..4497b0e25b9 --- /dev/null +++ b/Tests/Particles/RedistributeSOA/Make.package @@ -0,0 +1,4 @@ +CEXE_sources += main.cpp + + + diff --git a/Tests/Particles/RedistributeSOA/inputs b/Tests/Particles/RedistributeSOA/inputs new file mode 100644 index 00000000000..73d71ccf1b8 --- /dev/null +++ b/Tests/Particles/RedistributeSOA/inputs @@ -0,0 +1,16 @@ +redistribute.size = (256, 256, 384) +redistribute.max_grid_size = 128 +redistribute.is_periodic = 1 +redistribute.num_ppc = 1 +redistribute.move_dir = (1, 1, 1) +redistribute.do_random = 1 +redistribute.nsteps = 500 +redistribute.nlevs = 1 +redistribute.do_regrid = 1 + +redistribute.num_runtime_real = 0 +redistribute.num_runtime_int = 0 + +redistribute.sort = 0 + +amrex.use_gpu_aware_mpi = 0 diff --git a/Tests/Particles/RedistributeSOA/inputs.rt b/Tests/Particles/RedistributeSOA/inputs.rt new file mode 100644 index 00000000000..2cc83a0dda6 --- /dev/null +++ b/Tests/Particles/RedistributeSOA/inputs.rt @@ -0,0 +1,14 @@ +redistribute.size = (32, 64, 64) +redistribute.max_grid_size = 32 +redistribute.is_periodic = 1 +redistribute.num_ppc = 1 +redistribute.move_dir = (1, 1, 1) +redistribute.do_random = 1 +redistribute.nsteps = 100 +redistribute.nlevs = 1 +redistribute.do_regrid = 1 + +redistribute.num_runtime_real = 0 +redistribute.num_runtime_int = 0 + +particles.do_tiling=1 diff --git a/Tests/Particles/RedistributeSOA/inputs.rt.cuda b/Tests/Particles/RedistributeSOA/inputs.rt.cuda new file mode 100644 index 00000000000..9253741d78c --- /dev/null +++ b/Tests/Particles/RedistributeSOA/inputs.rt.cuda @@ -0,0 +1,12 @@ +redistribute.size = (32, 64, 64) +redistribute.max_grid_size = 32 +redistribute.is_periodic = 1 +redistribute.num_ppc = 1 +redistribute.move_dir = (1, 1, 1) +redistribute.do_random = 1 +redistribute.nsteps = 100 +redistribute.nlevs = 1 +redistribute.do_regrid = 1 + +redistribute.num_runtime_real = 2 +redistribute.num_runtime_int = 3 diff --git a/Tests/Particles/RedistributeSOA/inputs.rt.cuda.big b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.big new file mode 100644 index 00000000000..d9066ba21df --- /dev/null +++ b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.big @@ -0,0 +1,12 @@ +redistribute.size = (64, 64, 128) +redistribute.max_grid_size = 64 +redistribute.is_periodic = 1 +redistribute.num_ppc = 4 +redistribute.move_dir = (1, 1, 1) +redistribute.do_random = 1 +redistribute.nsteps = 0 +redistribute.nlevs = 1 +redistribute.do_regrid = 1 + +redistribute.num_runtime_real = 2 +redistribute.num_runtime_int = 3 diff --git a/Tests/Particles/RedistributeSOA/inputs.rt.cuda.mr b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.mr new file mode 100644 index 00000000000..e74b8d243f7 --- /dev/null +++ b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.mr @@ -0,0 +1,13 @@ +redistribute.size = (32, 64, 64) +redistribute.max_grid_size = 32 +redistribute.is_periodic = 1 +redistribute.num_ppc = 1 +redistribute.move_dir = (1, 1, 1) +redistribute.do_random = 1 +redistribute.nsteps = 100 +redistribute.nlevs = 3 +redistribute.test_level_lost = 3 +redistribute.do_regrid = 1 + +redistribute.num_runtime_real = 1 +redistribute.num_runtime_int = 0 diff --git a/Tests/Particles/RedistributeSOA/inputs.rt.cuda.nonperiodic b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.nonperiodic new file mode 100644 index 00000000000..2fc4168c8aa --- /dev/null +++ b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.nonperiodic @@ -0,0 +1,12 @@ +redistribute.size = (32, 64, 64) +redistribute.max_grid_size = 32 +redistribute.is_periodic = 0 +redistribute.num_ppc = 1 +redistribute.move_dir = (1, 1, 1) +redistribute.do_random = 0 +redistribute.nsteps = 100 +redistribute.nlevs = 1 +redistribute.do_regrid = 1 + +redistribute.num_runtime_real = 0 +redistribute.num_runtime_int = 1 diff --git a/Tests/Particles/RedistributeSOA/inputs.rt.cuda.sort b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.sort new file mode 100644 index 00000000000..3cfa52d24a2 --- /dev/null +++ b/Tests/Particles/RedistributeSOA/inputs.rt.cuda.sort @@ -0,0 +1,14 @@ +redistribute.size = (32, 64, 64) +redistribute.max_grid_size = 32 +redistribute.is_periodic = 1 +redistribute.num_ppc = 1 +redistribute.move_dir = (1, 1, 1) +redistribute.do_random = 1 +redistribute.nsteps = 100 +redistribute.nlevs = 1 +redistribute.do_regrid = 1 + +redistribute.sort = 1 + +redistribute.num_runtime_real = 2 +redistribute.num_runtime_int = 3 diff --git a/Tests/Particles/RedistributeSOA/main.cpp b/Tests/Particles/RedistributeSOA/main.cpp new file mode 100644 index 00000000000..0bf11f6f11c --- /dev/null +++ b/Tests/Particles/RedistributeSOA/main.cpp @@ -0,0 +1,482 @@ +#include +#include +#include +#include + +using namespace amrex; + +static constexpr int NR = 7; +static constexpr int NI = 4; + +int num_runtime_real = 0; +int num_runtime_int = 0; + +bool remove_negative = true; + +void get_position_unit_cell (Real* r, const IntVect& nppc, int i_part) +{ + int nx = nppc[0]; +#if AMREX_SPACEDIM > 1 + int ny = nppc[1]; +#else + int ny = 1; +#endif +#if AMREX_SPACEDIM > 2 + int nz = nppc[2]; +#else + int nz = 1; +#endif + + int ix_part = i_part/(ny * nz); + int iy_part = (i_part % (ny * nz)) % ny; + int iz_part = (i_part % (ny * nz)) / ny; + + r[0] = (0.5+ix_part)/nx; + r[1] = (0.5+iy_part)/ny; + r[2] = (0.5+iz_part)/nz; +} + +class TestParticleContainer + : public amrex::ParticleContainerPureSoA +{ + +public: + + TestParticleContainer (const Vector & a_geom, + const Vector & a_dmap, + const Vector & a_ba, + const Vector & a_rr) + : amrex::ParticleContainerPureSoA(a_geom, a_dmap, a_ba, a_rr) + { + for (int i = 0; i < num_runtime_real; ++i) + { + AddRealComp(true); + } + for (int i = 0; i < num_runtime_int; ++i) + { + AddIntComp(true); + } + } + + void RedistributeLocal (bool remove_neg=true) + { + const int lev_min = 0; + const int lev_max = finestLevel(); + const int nGrow = 0; + const int local = 1; + Redistribute(lev_min, lev_max, nGrow, local, remove_neg); + } + + void RedistributeGlobal (bool remove_neg=true) + { + const int lev_min = 0; + const int lev_max = finestLevel(); + const int nGrow = 0; + const int local = 0; + Redistribute(lev_min, lev_max, nGrow, local, remove_neg); + } + + void InitParticles (const amrex::IntVect& a_num_particles_per_cell) + { + BL_PROFILE("InitParticles"); + + const int lev = 0; // only add particles on level 0 + const Real* dx = Geom(lev).CellSize(); + const Real* plo = Geom(lev).ProbLo(); + + const int num_ppc = AMREX_D_TERM( a_num_particles_per_cell[0], + *a_num_particles_per_cell[1], + *a_num_particles_per_cell[2]); + + for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + const Box& tile_box = mfi.tilebox(); + + std::array, NR> host_real; + std::array, NI> host_int; + + std::vector > host_runtime_real(NumRuntimeRealComps()); + std::vector > host_runtime_int(NumRuntimeIntComps()); + + for (IntVect iv = tile_box.smallEnd(); iv <= tile_box.bigEnd(); tile_box.next(iv)) + { + for (int i_part=0; i_part(id)); + host_int[1].push_back(ParallelDescriptor::MyProc()); + host_real[0].push_back(static_cast (plo[0] + (iv[0] + r[0])*dx[0])); +#if AMREX_SPACEDIM > 1 + host_real[1].push_back(static_cast (plo[1] + (iv[1] + r[1])*dx[1])); +#endif +#if AMREX_SPACEDIM > 2 + host_real[2].push_back(static_cast (plo[2] + (iv[2] + r[2])*dx[2])); +#endif + + for (int i = AMREX_SPACEDIM; i < NR; ++i) + host_real[i].push_back(static_cast(id)); + for (int i = 2; i < NI; ++i) + host_int[i].push_back(static_cast(id)); + for (int i = 0; i < NumRuntimeRealComps(); ++i) + host_runtime_real[i].push_back(static_cast(id)); + for (int i = 0; i < NumRuntimeIntComps(); ++i) + host_runtime_int[i].push_back(static_cast(id)); + } + } + + auto& particle_tile = DefineAndReturnParticleTile(lev, mfi.index(), mfi.LocalTileIndex()); + auto old_size = particle_tile.GetArrayOfStructs().size(); + auto new_size = old_size + host_real[0].size(); + particle_tile.resize(new_size); + + auto& soa = particle_tile.GetStructOfArrays(); + for (int i = 0; i < NR; ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_real[i].begin(), + host_real[i].end(), + soa.GetRealData(i).begin() + old_size); + } + + for (int i = 0; i < NI; ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_int[i].begin(), + host_int[i].end(), + soa.GetIntData(i).begin() + old_size); + } + for (int i = 0; i < NumRuntimeRealComps(); ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_runtime_real[i].begin(), + host_runtime_real[i].end(), + soa.GetRealData(NR+i).begin() + old_size); + } + + for (int i = 0; i < NumRuntimeIntComps(); ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_runtime_int[i].begin(), + host_runtime_int[i].end(), + soa.GetIntData(NI+i).begin() + old_size); + } + + Gpu::streamSynchronize(); + } + + RedistributeLocal(); + } + + void moveParticles (const IntVect& move_dir, int do_random) + { + BL_PROFILE("TestParticleContainer::moveParticles"); + + for (int lev = 0; lev <= finestLevel(); ++lev) + { + const auto dx = Geom(lev).CellSizeArray(); + auto& plev = GetParticles(lev); + + for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + int gid = mfi.index(); + int tid = mfi.LocalTileIndex(); + auto& ptile = plev[std::make_pair(gid, tid)]; + auto ptd = ptile.getParticleTileData(); + const size_t np = ptile.numParticles(); + + if (do_random == 0) + { + amrex::ParallelFor( np, [=] AMREX_GPU_DEVICE (int i) noexcept + { + ParticleType p(ptd, i); + p.pos(0) += static_cast (move_dir[0]*dx[0]); +#if AMREX_SPACEDIM > 1 + p.pos(1) += static_cast (move_dir[1]*dx[1]); +#endif +#if AMREX_SPACEDIM > 2 + p.pos(2) += static_cast (move_dir[2]*dx[2]); +#endif + }); + } + else + { + amrex::ParallelForRNG( np, + [=] AMREX_GPU_DEVICE (int i, RandomEngine const& engine) noexcept + { + ParticleType p(ptd, i); + p.pos(0) += static_cast ((2*amrex::Random(engine)-1)*move_dir[0]*dx[0]); +#if AMREX_SPACEDIM > 1 + p.pos(1) += static_cast ((2*amrex::Random(engine)-1)*move_dir[1]*dx[1]); +#endif +#if AMREX_SPACEDIM > 2 + p.pos(2) += static_cast ((2*amrex::Random(engine)-1)*move_dir[2]*dx[2]); +#endif + }); + } + } + } + } + + void negateEven () + { + BL_PROFILE("TestParticleContainer::invalidateEven"); + + for (int lev = 0; lev <= finestLevel(); ++lev) + { + auto& plev = GetParticles(lev); + for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + int gid = mfi.index(); + int tid = mfi.LocalTileIndex(); + auto& ptile = plev[std::make_pair(gid, tid)]; + auto ptd = ptile.getParticleTileData(); + const size_t np = ptile.numParticles(); + amrex::ParallelFor( np, [=] AMREX_GPU_DEVICE (int i) noexcept + { + ParticleType p(ptd, i); + if (p.id() % 2 == 0) { + p.id() = -p.id(); + } + }); + } + } + } + + void checkAnswer () const + { + BL_PROFILE("TestParticleContainer::checkAnswer"); + + AMREX_ALWAYS_ASSERT(OK()); + + int num_rr = NumRuntimeRealComps(); + int num_ii = NumRuntimeIntComps(); + + for (int lev = 0; lev <= finestLevel(); ++lev) + { + const auto & plev = GetParticles(lev); + for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + int gid = mfi.index(); + int tid = mfi.LocalTileIndex(); + const auto & ptile = plev.at(std::make_pair(gid, tid)); + const auto ptd = ptile.getConstParticleTileData(); + const size_t np = ptile.numParticles(); + + AMREX_FOR_1D ( np, i, + { + ConstParticleType p(ptd, i); + for (int j = AMREX_SPACEDIM; j < NR; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_rdata[j][i] == p.id()); + } + for (int j = 2; j < NI; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_idata[j][i] == p.id()); + } + for (int j = 0; j < num_rr; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_runtime_rdata[j][i] == p.id()); + } + for (int j = 0; j < num_ii; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_runtime_idata[j][i] == p.id()); + } + }); + } + } + } +}; + +struct TestParams +{ + IntVect size; + int max_grid_size; + int num_ppc; + int is_periodic; + IntVect move_dir; + int do_random; + int nsteps; + int nlevs; + int do_regrid; + int sort; + int test_level_lost = 0; +}; + +void testRedistribute(); + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + amrex::Print() << "Running redistribute test \n"; + testRedistribute(); + + amrex::Finalize(); +} + +void get_test_params(TestParams& params, const std::string& prefix) +{ + ParmParse pp(prefix); + pp.get("size", params.size); + pp.get("max_grid_size", params.max_grid_size); + pp.get("num_ppc", params.num_ppc); + pp.get("is_periodic", params.is_periodic); + pp.get("move_dir", params.move_dir); + pp.get("do_random", params.do_random); + pp.get("nsteps", params.nsteps); + pp.get("nlevs", params.nlevs); + pp.get("do_regrid", params.do_regrid); + pp.query("test_level_lost", params.test_level_lost); + pp.query("num_runtime_real", num_runtime_real); + pp.query("num_runtime_int", num_runtime_int); + pp.query("remove_negative", remove_negative); + + params.sort = 0; + pp.query("sort", params.sort); +} + +void testRedistribute () +{ + BL_PROFILE("testRedistribute"); + TestParams params; + get_test_params(params, "redistribute"); + + int is_per[BL_SPACEDIM]; + for (int & d : is_per) + d = params.is_periodic; + + Vector rr(params.nlevs-1); + for (int lev = 1; lev < params.nlevs; lev++) + rr[lev-1] = IntVect(AMREX_D_DECL(2,2,2)); + + RealBox real_box; + for (int n = 0; n < BL_SPACEDIM; n++) + { + real_box.setLo(n, 0.0); + real_box.setHi(n, params.size[n]); + } + + IntVect domain_lo(AMREX_D_DECL(0, 0, 0)); + IntVect domain_hi(AMREX_D_DECL(params.size[0]-1,params.size[1]-1,params.size[2]-1)); + const Box base_domain(domain_lo, domain_hi); + + Vector geom(params.nlevs); + geom[0].define(base_domain, &real_box, CoordSys::cartesian, is_per); + for (int lev = 1; lev < params.nlevs; lev++) { + geom[lev].define(amrex::refine(geom[lev-1].Domain(), rr[lev-1]), + &real_box, CoordSys::cartesian, is_per); + } + + Vector ba(params.nlevs); + Vector dm(params.nlevs); + auto lo = IntVect(AMREX_D_DECL(0, 0, 0)); + IntVect size = params.size; + for (int lev = 0; lev < params.nlevs; ++lev) + { + ba[lev].define(Box(lo, lo+params.size-1)); + ba[lev].maxSize(params.max_grid_size); + dm[lev].define(ba[lev]); + lo += size/2; + size *= 2; + } + + TestParticleContainer pc(geom, dm, ba, rr); + + int npc = params.num_ppc; + auto nppc = IntVect(AMREX_D_DECL(npc, npc, npc)); + + amrex::Print() << "About to initialize particles \n"; + + pc.InitParticles(nppc); + + pc.checkAnswer(); + + auto np_old = pc.TotalNumberOfParticles(); + + if (params.sort) pc.SortParticlesByCell(); + + for (int i = 0; i < params.nsteps; ++i) + { + amrex::Print() << "step " << i << "\n"; + pc.moveParticles(params.move_dir, params.do_random); + if (!remove_negative) { + auto old = pc.TotalNumberOfParticles(); + pc.negateEven(); + pc.RedistributeLocal(false); + AMREX_ALWAYS_ASSERT(old == pc.TotalNumberOfParticles(false)); + pc.negateEven(); + } + pc.RedistributeLocal(); + if (params.sort) pc.SortParticlesByCell(); + pc.checkAnswer(); + } + + if (params.do_regrid) + { + const int NProcs = ParallelDescriptor::NProcs(); + { + for (int lev = 0; lev < params.nlevs; ++lev) + { + DistributionMapping new_dm; + Vector pmap; + for (int i = 0; i < ba[lev].size(); ++i) pmap.push_back(i % NProcs); + new_dm.define(pmap); + pc.SetParticleDistributionMap(lev, new_dm); + } + if (!remove_negative) { + auto old = pc.TotalNumberOfParticles(); + pc.negateEven(); + pc.RedistributeGlobal(false); + AMREX_ALWAYS_ASSERT(old == pc.TotalNumberOfParticles(false)); + pc.negateEven(); + } + pc.RedistributeGlobal(); + pc.checkAnswer(); + } + + { + for (int lev = 0; lev < params.nlevs; ++lev) + { + DistributionMapping new_dm; + Vector pmap; + for (int i = 0; i < ba[lev].size(); ++i) pmap.push_back((i+1) % NProcs); + new_dm.define(pmap); + pc.SetParticleDistributionMap(lev, new_dm); + } + if (!remove_negative) { + auto old = pc.TotalNumberOfParticles(); + pc.negateEven(); + pc.RedistributeGlobal(false); + AMREX_ALWAYS_ASSERT(old == pc.TotalNumberOfParticles(false)); + pc.negateEven(); + } + pc.RedistributeGlobal(); + pc.checkAnswer(); + } + + if (params.test_level_lost) { + AMREX_ALWAYS_ASSERT(params.nlevs > 2); + auto np_before_level_lost = pc.TotalNumberOfParticles(); + Vector new_ba = ba; new_ba.resize(ba.size()-1); + Vector new_dm = dm; new_dm.resize(dm.size()-1); + Vector new_geom = geom; new_geom.resize(geom.size()-1); + Vector new_rr = rr; new_rr.resize(rr.size()-1); + pc.ParticleContainerBase::Define(new_geom, new_dm, new_ba, new_rr); + pc.Redistribute(); + amrex::Print() << np_before_level_lost << "\n"; + amrex::Print() << pc.TotalNumberOfParticles() << "\n"; + AMREX_ALWAYS_ASSERT(np_before_level_lost == pc.TotalNumberOfParticles()); + } + } + + if (geom[0].isAllPeriodic()) { + amrex::Print() << np_old << " " << pc.TotalNumberOfParticles() << "\n"; + AMREX_ALWAYS_ASSERT(np_old == pc.TotalNumberOfParticles()); + } + + // the way this test is set up, if we make it here we pass + amrex::Print() << "pass \n"; +} diff --git a/Tests/Particles/SOAParticle/CMakeLists.txt b/Tests/Particles/SOAParticle/CMakeLists.txt new file mode 100644 index 00000000000..26eec60d2d0 --- /dev/null +++ b/Tests/Particles/SOAParticle/CMakeLists.txt @@ -0,0 +1,8 @@ +set(_sources main.cpp) +#set(_input_files) +#set(_input_files inputs) + +setup_test(_sources _input_files NTHREADS 2) + +unset(_sources) +unset(_input_files) diff --git a/Tests/Particles/SOAParticle/main.cpp b/Tests/Particles/SOAParticle/main.cpp new file mode 100644 index 00000000000..56a621daf40 --- /dev/null +++ b/Tests/Particles/SOAParticle/main.cpp @@ -0,0 +1,190 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace amrex; + +template class Allocator=DefaultAllocator> +void addParticles () +{ + int is_per[AMREX_SPACEDIM]; + for (int & d : is_per) + d = 1; + + RealBox real_box; + for (int n = 0; n < AMREX_SPACEDIM; n++) + { + real_box.setLo(n, 0.0); + real_box.setHi(n, 100.0); + } + + IntVect domain_lo(AMREX_D_DECL(0, 0, 0)); + IntVect domain_hi(AMREX_D_DECL(127, 127, 127)); + const Box base_domain(domain_lo, domain_hi); + + Geometry geom(base_domain, &real_box, CoordSys::cartesian, is_per); + BoxArray ba(base_domain); + ba.maxSize(64); + + DistributionMapping dm(ba); + + T_PC pc(geom, dm, ba); + int const NArrayReal = pc.NArrayReal; + int const NArrayInt = pc.NArrayInt; + + using ParticleType = typename T_PC::ParticleType; + using ParticleTileDataType = typename T_PC::ParticleTileType::ParticleTileDataType; + + const int add_num_particles = 5; + + auto& ptile1 = pc.DefineAndReturnParticleTile(0, 0, 0); + ptile1.resize(add_num_particles); + + for (int i = 0; i < add_num_particles; ++i) + { + for (int d = 0; d < AMREX_SPACEDIM; d++) + ptile1.pos(i, d) = 12.0; + ptile1.getParticleTileData().rdata(AMREX_SPACEDIM)[i] = 1.2; // w + + ptile1.push_back_int(0, ParticleType::NextID()); + ptile1.push_back_int(1, amrex::ParallelDescriptor::MyProc()); + } + + int lev=0; + // int numparticles=0; + using MyParIter = ParIter_impl; + for (MyParIter pti(pc, lev); pti.isValid(); ++pti) { + const int np = pti.numParticles(); + // preparing access to particle data: SoA of Reals + auto& soa = pti.GetStructOfArrays(); + auto soa_real = soa.GetRealData(); + auto size = soa.size(); + amrex::ParticleReal* const AMREX_RESTRICT part_x = soa_real[0].dataPtr(); + amrex::ParticleReal* const AMREX_RESTRICT part_y = AMREX_SPACEDIM >= 2 ? soa_real[1].dataPtr() : nullptr; + amrex::ParticleReal* const AMREX_RESTRICT part_z = AMREX_SPACEDIM >= 3 ? soa_real[2].dataPtr() : nullptr; + amrex::ParticleReal* const AMREX_RESTRICT part_w = soa_real[AMREX_SPACEDIM].dataPtr(); + auto& soa_int = pti.GetStructOfArrays().GetIntData(); + amrex::ignore_unused(size, part_x, part_y, part_z, part_w, soa_int); + + // Iterating over old Particles + // ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) + // { + // ParticleType& AMREX_RESTRICT p = aos_ptr[ip]; + // p.pos(0) += 1; + // p.pos(1) += 1; + // p.pos(2) += 1; + + // amrex::ParticleReal & AMREX_RESTRICT x = part_x[ip]; + // amrex::ParticleReal & AMREX_RESTRICT y = part_y[ip]; + // amrex::ParticleReal & AMREX_RESTRICT z = part_z[ip]; + // amrex::ParticleReal & AMREX_RESTRICT a = part_aaa[ip]; + + // x += 1.0; + // y += 1.0; + // z += 1.0; + // a += 1.0; + // }); + + // Iterating over SoA Particles + ParticleTileDataType ptd = pti.GetParticleTile().getParticleTileData(); + + ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) + { + ParticleType p(ptd, ip); + for (int d = 0; d < AMREX_SPACEDIM; d++) { + p.pos(d) += 1_prt; + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ptd.rdata(d)[ip] == 13_prt, + "pos attribute expected to be 13"); + } + + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(ptd.rdata(AMREX_SPACEDIM)[ip] == 1.2_prt, + "w attribute expected to be 1.2"); + }); + + + } + + // create a host-side particle buffer + auto tmp = pc.template make_alike(); + tmp.copyParticles(pc, true); + + using MyPinnedParIter = ParIter_impl; + + for (MyPinnedParIter pti(tmp, lev); pti.isValid(); ++pti) { + auto& particle_attributes = pti.GetStructOfArrays(); + auto& real_comp0 = particle_attributes.GetRealData(0); + auto& int_comp1 = particle_attributes.GetIntData(1); + for (int i = 0; i < pti.numParticles(); ++i) { + real_comp0[i] += 1; + int_comp1[i] += 1; + } + } + + tmp.Redistribute(); + + using ConstPTDType = typename T_PC::ParticleTileType::ConstParticleTileDataType; + amrex::ReduceOps reduce_ops; + auto r = amrex::ParticleReduce< + amrex::ReduceData< + amrex::ParticleReal, amrex::ParticleReal, amrex::ParticleReal, + amrex::ParticleReal, amrex::ParticleReal, amrex::ParticleReal, + amrex::ParticleReal> + >( + pc, + [=] AMREX_GPU_DEVICE(const ConstPTDType& ptd, const int i) noexcept + { + amrex::ParticleReal const x = ptd.rdata(0)[i]; + amrex::ParticleReal const y = AMREX_SPACEDIM >= 2 ? ptd.rdata(1)[i] : 0.0; + amrex::ParticleReal const z = AMREX_SPACEDIM >= 3 ? ptd.rdata(2)[i] : 0.0; + + amrex::ParticleReal const w = ptd.rdata(AMREX_SPACEDIM)[i]; + + return amrex::makeTuple(x, x*x, y, y*y, z, z*z, w); + }, + reduce_ops + ); + amrex::ignore_unused(r); + + // Reduce for SoA Particle Struct + /* + using PTDType = typename T_PC::ParticleTileType::ConstParticleTileDataType; + amrex::ReduceOps reduce_ops; + auto r = amrex::ParticleReduce> ( + pc, [=] AMREX_GPU_DEVICE (const PTDType& ptd, const int i) noexcept + -> amrex::GpuTuple + { + + const amrex::Real a = ptd.rdata(1)[i]; + const amrex::Real b = ptd.rdata(2)[i]; + const int c = ptd.idata(1)[i]; + return {a, b, c}; + }, reduce_ops); + + AMREX_ALWAYS_ASSERT(amrex::get<0>(r) == amrex::Real(std::pow(256, AMREX_SPACEDIM))); + AMREX_ALWAYS_ASSERT(amrex::get<1>(r) == 2.0); + AMREX_ALWAYS_ASSERT(amrex::get<2>(r) == 1); + */ +} + + + + +int main(int argc, char* argv[]) + { + amrex::Initialize(argc,argv); + { + addParticles< ParticleContainerPureSoA<4, 2> > (); + } + amrex::Finalize(); + } + + + +