diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H index 4238793861d..0e0a49f540e 100644 --- a/Src/Base/AMReX_Geometry.H +++ b/Src/Base/AMReX_Geometry.H @@ -69,22 +69,46 @@ public: namespace detail { template - T bisect_prob_hi (amrex::Real plo, amrex::Real phi, amrex::Real idx, int ilo, int ihi, amrex::Real tol) { + T bisect_prob_lo (amrex::Real plo, amrex::Real /*phi*/, amrex::Real dxinv, int ilo, int ihi, amrex::Real tol) { + T lo = static_cast(plo + tol); + bool safe; + { + int i = int(Math::floor((lo - plo)*dxinv)) + ilo; + safe = i >= ilo && i <= ihi; + } + if (safe) { + return lo; + } else { + // bisect the point at which the cell no longer maps to inside the domain + T hi = static_cast(plo + 0.5_rt/dxinv); + T mid = bisect(lo, hi, + [=] AMREX_GPU_HOST_DEVICE (T x) -> T + { + int i = int(Math::floor((x - plo)*dxinv)) + ilo; + bool inside = i >= ilo && i <= ihi; + return static_cast(inside) - T(0.5); + }, static_cast(tol)); + return mid - static_cast(tol); + } + } + + template + T bisect_prob_hi (amrex::Real plo, amrex::Real phi, amrex::Real dxinv, int ilo, int ihi, amrex::Real tol) { T hi = static_cast(phi - tol); bool safe; { - int i = int(Math::floor((hi - plo)*idx)) + ilo; + int i = int(Math::floor((hi - plo)*dxinv)) + ilo; safe = i >= ilo && i <= ihi; } if (safe) { return hi; } else { // bisect the point at which the cell no longer maps to inside the domain - T lo = static_cast(phi - 0.5_rt/idx); + T lo = static_cast(phi - 0.5_rt/dxinv); T mid = bisect(lo, hi, [=] AMREX_GPU_HOST_DEVICE (T x) -> T { - int i = int(Math::floor((x - plo)*idx)) + ilo; + int i = int(Math::floor((x - plo)*dxinv)) + ilo; bool inside = i >= ilo && i <= ihi; return static_cast(inside) - T(0.5); }, static_cast(tol)); @@ -217,6 +241,13 @@ public: return {{AMREX_D_DECL(prob_domain.hi(0),prob_domain.hi(1),prob_domain.hi(2))}}; } + GpuArray ProbLoArrayInParticleReal () const noexcept { +#ifdef AMREX_SINGLE_PRECISION_PARTICLES + return roundoff_lo_f; +#else + return roundoff_lo_d; +#endif + } GpuArray ProbHiArrayInParticleReal () const noexcept { #ifdef AMREX_SINGLE_PRECISION_PARTICLES return roundoff_hi_f; @@ -454,11 +485,11 @@ private: RealBox prob_domain; // Due to round-off errors, not all floating point numbers for which plo >= x < phi - // will map to a cell that is inside "domain". "roundoff_hi_d" and "roundoff_hi_f" each store - // a phi that is very close to that in prob_domain, and for which all doubles and floats less than + // will map to a cell that is inside "domain". "roundoff_{lo,hi}_{f,d}" each store + // a position that is very close to that in prob_domain, and for which all doubles and floats less than // it will map to a cell inside domain. - GpuArray roundoff_hi_d; - GpuArray roundoff_hi_f; + GpuArray roundoff_lo_d, roundoff_hi_d; + GpuArray roundoff_lo_f, roundoff_hi_f; // Box domain; diff --git a/Src/Base/AMReX_Geometry.cpp b/Src/Base/AMReX_Geometry.cpp index 1457db6b8d1..2f80f2eb947 100644 --- a/Src/Base/AMReX_Geometry.cpp +++ b/Src/Base/AMReX_Geometry.cpp @@ -512,14 +512,16 @@ Geometry::computeRoundoffDomain () int ihi = Domain().bigEnd(idim); Real plo = ProbLo(idim); Real phi = ProbHi(idim); - Real idx = InvCellSize(idim); + Real dxinv = InvCellSize(idim); Real deltax = CellSize(idim); Real ftol = std::max(1.e-4_rt*deltax, 2.e-7_rt*phi); Real dtol = std::max(1.e-8_rt*deltax, 1.e-14_rt*phi); - roundoff_hi_f[idim] = detail::bisect_prob_hi (plo, phi, idx, ilo, ihi, ftol); - roundoff_hi_d[idim] = detail::bisect_prob_hi(plo, phi, idx, ilo, ihi, dtol); + roundoff_lo_f[idim] = detail::bisect_prob_lo (plo, phi, dxinv, ilo, ihi, ftol); + roundoff_lo_d[idim] = detail::bisect_prob_lo(plo, phi, dxinv, ilo, ihi, dtol); + roundoff_hi_f[idim] = detail::bisect_prob_hi (plo, phi, dxinv, ilo, ihi, ftol); + roundoff_hi_d[idim] = detail::bisect_prob_hi(plo, phi, dxinv, ilo, ihi, dtol); } } @@ -527,18 +529,18 @@ bool Geometry::outsideRoundoffDomain (AMREX_D_DECL(ParticleReal x, ParticleReal y, ParticleReal z)) const { #ifdef AMREX_SINGLE_PRECISION_PARTICLES - bool outside = AMREX_D_TERM(x < prob_domain.lo(0) + bool outside = AMREX_D_TERM(x < roundoff_lo_f[0] || x >= roundoff_hi_f[0], - || y < prob_domain.lo(1) + || y < roundoff_lo_f[1] || y >= roundoff_hi_f[1], - || z < prob_domain.lo(2) + || z < roundoff_lo_f[2] || z >= roundoff_hi_f[2]); #else - bool outside = AMREX_D_TERM(x < prob_domain.lo(0) + bool outside = AMREX_D_TERM(x < roundoff_lo_d[0] || x >= roundoff_hi_d[0], - || y < prob_domain.lo(1) + || y < roundoff_lo_d[1] || y >= roundoff_hi_d[1], - || z < prob_domain.lo(2) + || z < roundoff_lo_d[2] || z >= roundoff_hi_d[2]); #endif return outside; diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index f6fbe9afc3c..062bd603572 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -239,10 +239,11 @@ ParticleContainer const auto& geom = Geom(0); const auto plo = geom.ProbLoArray(); const auto phi = geom.ProbHiArray(); + const auto rlo = geom.ProbLoArrayInParticleReal(); const auto rhi = geom.ProbHiArrayInParticleReal(); const auto is_per = geom.isPeriodicArray(); - return enforcePeriodic(p, plo, phi, rhi, is_per); + return enforcePeriodic(p, plo, phi, rlo, rhi, is_per); } template ::lo { if (Geom(0).outsideRoundoffDomain(AMREX_D_DECL(p.pos(0), p.pos(1), p.pos(2)))) { - RealBox prob_domain = Geom(0).ProbDomain(); - GpuArray phi = Geom(0).ProbHiArrayInParticleReal(); + GpuArray rhi = Geom(0).ProbHiArrayInParticleReal(); + GpuArray rlo = Geom(0).ProbLoArrayInParticleReal(); for (int idim=0; idim < AMREX_SPACEDIM; ++idim) { - if (p.pos(idim) <= prob_domain.lo(idim)) { - p.pos(idim) = std::nextafter((ParticleReal) prob_domain.lo(idim), phi[idim]); + if (p.pos(idim) <= rlo[idim]) { + p.pos(idim) = std::nextafter(rlo[idim], rhi[idim]); } - if (p.pos(idim) >= phi[idim]) { - p.pos(idim) = std::nextafter(phi[idim], (ParticleReal) prob_domain.lo(idim)); + if (p.pos(idim) >= rhi[idim]) { + p.pos(idim) = std::nextafter(rhi[idim], rlo[idim]); } } @@ -1242,6 +1243,7 @@ ParticleContainer Vector > new_sizes(num_levels); const auto plo = Geom(0).ProbLoArray(); const auto phi = Geom(0).ProbHiArray(); + const auto rlo = Geom(0).ProbLoArrayInParticleReal(); const auto rhi = Geom(0).ProbHiArrayInParticleReal(); const auto is_per = Geom(0).isPeriodicArray(); for (int lev = lev_min; lev <= finest_lev_particles; ++lev) @@ -1263,7 +1265,7 @@ ParticleContainer "perhaps particles have not been initialized correctly?"); int num_stay = partitionParticlesByDest(src_tile, assign_grid, BufferMap(), - plo, phi, rhi, is_per, lev, gid, tid, + plo, phi, rlo, rhi, is_per, lev, gid, tid, lev_min, lev_max, nGrow, remove_negative); int num_move = np - num_stay; diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index 6623f353749..ee59cac67d0 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -517,6 +517,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool enforcePeriodic (P& p, amrex::GpuArray const& plo, amrex::GpuArray const& phi, + amrex::GpuArray const& rlo, amrex::GpuArray const& rhi, amrex::GpuArray const& is_per) noexcept { @@ -529,7 +530,9 @@ bool enforcePeriodic (P& p, p.pos(idim) -= static_cast(phi[idim] - plo[idim]); } // clamp to avoid precision issues; - if (p.pos(idim) < plo[idim]) p.pos(idim) = static_cast(plo[idim]); + if (p.pos(idim) < rlo[idim]) { + p.pos(idim) = rlo[idim]; + } shifted = true; } else if (p.pos(idim) < plo[idim]) { @@ -538,7 +541,7 @@ bool enforcePeriodic (P& p, } // clamp to avoid precision issues; if (p.pos(idim) > rhi[idim]) { - p.pos(idim) = static_cast(rhi[idim]); + p.pos(idim) = rhi[idim]; } shifted = true; } @@ -555,6 +558,7 @@ int partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBufferMap& pmap, const GpuArray& plo, const GpuArray& phi, + const GpuArray& rlo, const GpuArray& rhi, const GpuArray& is_per, int lev, int gid, int /*tid*/, @@ -602,7 +606,7 @@ partitionParticlesByDest (PTile& ptile, const PLocator& ploc, const ParticleBuff else { auto p_prime = p; - enforcePeriodic(p_prime, plo, phi, rhi, is_per); + enforcePeriodic(p_prime, plo, phi, rlo, rhi, is_per); auto tup_prime = ploc(p_prime, lev_min, lev_max, nGrow); assigned_grid = amrex::get<0>(tup_prime); assigned_lev = amrex::get<1>(tup_prime);