diff --git a/Src/Base/AMReX_LUSolver.H b/Src/Base/AMReX_LUSolver.H index bd69822ea5a..bd60fc609dc 100644 --- a/Src/Base/AMReX_LUSolver.H +++ b/Src/Base/AMReX_LUSolver.H @@ -2,7 +2,7 @@ #define AMREX_LU_SOLVER_H_ #include -#include +#include #include #include #include @@ -18,6 +18,7 @@ public: LUSolver () = default; + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE LUSolver (Array2D const& a_mat); void define (Array2D const& a_mat); @@ -74,6 +75,7 @@ public: private: + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void define_innard (); Array2D m_mat; @@ -82,6 +84,7 @@ private: }; template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE LUSolver::LUSolver (Array2D const& a_mat) : m_mat(a_mat) { @@ -96,6 +99,7 @@ void LUSolver::define (Array2D const& a_mat) } template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void LUSolver::define_innard () { static_assert(N > 1); @@ -121,9 +125,9 @@ void LUSolver::define_innard () } if (imax != i) { - std::swap(m_piv(i), m_piv(imax)); + amrex::Swap(m_piv(i), m_piv(imax)); for (int j = 0; j < N; ++j) { - std::swap(m_mat(i,j), m_mat(imax,j)); + amrex::Swap(m_mat(i,j), m_mat(imax,j)); } ++m_npivs; } diff --git a/Src/Base/AMReX_TagParallelFor.H b/Src/Base/AMReX_TagParallelFor.H index ee8d089ee73..7cb5c3e3ef6 100644 --- a/Src/Base/AMReX_TagParallelFor.H +++ b/Src/Base/AMReX_TagParallelFor.H @@ -72,6 +72,26 @@ struct Array4BoxValTag { Box const& box () const noexcept { return dbox; } }; +template +struct Array4BoxOrientationTag { + Array4 fab; + Box bx; + Orientation face; + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + Box const& box() const noexcept { return bx; } +}; + +template +struct Array4BoxOffsetTag { + Array4 fab; + Box bx; + Dim3 offset; + + [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + Box const& box() const noexcept { return bx; } +}; + template struct VectorTag { T* p; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H index 08b5d162ead..8d461d3bb04 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.H @@ -11,7 +11,7 @@ namespace amrex { * \brief curl (alpha curl E) + beta E = rhs * * Here E is an Array of 3 MultiFabs on staggered grid, alpha is a positive - * scalar, and beta is a non-negative scalar. + * scalar, and beta is either a non-negative scalar or a MultiFab. * * It's the caller's responsibility to make sure rhs has consistent nodal * data. If needed, one could call prepareRHS for this. @@ -45,6 +45,9 @@ public: void setScalars (RT a_alpha, RT a_beta) noexcept; + //! This is needed only if there is variable beta coefficient. + void setBeta (const Vector>& a_bcoefs); + //! Synchronize RHS on nodal points. If the user can guarantee it, this //! function does not need to be called. void prepareRHS (Vector const& rhs) const; @@ -133,6 +136,7 @@ private: static constexpr int m_ncomp = 1; Vector>>>> m_lusolver; + Vector,3>>> m_bcoefs; }; } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp index 2ebc49d7b45..740d8d5cc00 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp @@ -22,6 +22,11 @@ void MLCurlCurl::define (const Vector& a_geom, m_dotmask[amrlev].resize(this->m_num_mg_levels[amrlev]); } + m_bcoefs.resize(this->m_num_amr_levels); + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + m_bcoefs[amrlev].resize(this->m_num_mg_levels[amrlev]); + } + m_lusolver.resize(this->m_num_amr_levels); for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { m_lusolver[amrlev].resize(this->m_num_mg_levels[amrlev]); @@ -35,6 +40,57 @@ void MLCurlCurl::setScalars (RT a_alpha, RT a_beta) noexcept AMREX_ASSERT(m_beta > RT(0)); } +void MLCurlCurl::setBeta (const Vector>& a_bcoefs) +{ + Array ng; + for (int idim = 0; idim < 3; ++idim) { + ng[idim] = IntVect(1) - m_etype[idim]; // 1 ghost for cell direction, 0 for node + } + + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + for (int idim = 0; idim < 3; ++idim) { + if (m_bcoefs[amrlev][0][idim] == nullptr) { + m_bcoefs[amrlev][0][idim] = std::make_unique + (a_bcoefs[amrlev][idim]->boxArray(), + a_bcoefs[amrlev][idim]->DistributionMap(), 1, ng[idim]); + } + MultiFab::Copy(*m_bcoefs[amrlev][0][idim], *a_bcoefs[amrlev][idim], 0, 0, 1, 0); + m_bcoefs[amrlev][0][idim]->FillBoundary(m_geom[amrlev][0].periodicity()); + } + } + + // Need to average down from fine AMR level to coarse level and we need + // to support periodic boundary + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_num_amr_levels == 1, + "MLCurlCurl: multi-level not supported yet"); + + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + for (int mglev = 1; mglev < m_num_mg_levels[amrlev]; ++mglev) { + IntVect ratio = (amrlev > 0) ? IntVect(2) + : mg_coarsen_ratio_vec[mglev-1]; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if (m_bcoefs[amrlev][mglev][idim] == nullptr) { + m_bcoefs[amrlev][mglev][idim] = std::make_unique + (amrex::convert(m_grids[amrlev][mglev], m_etype[idim]), + m_dmap[amrlev][mglev], 1, ng[idim]); + } + average_down_edges(*m_bcoefs[amrlev][mglev-1][idim], + *m_bcoefs[amrlev][mglev ][idim], ratio); + m_bcoefs[amrlev][mglev][idim]->FillBoundary(m_geom[amrlev][mglev].periodicity()); + } +#if (AMREX_SPACEDIM == 2) + if (m_bcoefs[amrlev][mglev][2] == nullptr) { + m_bcoefs[amrlev][mglev][2] = std::make_unique + (amrex::convert(m_grids[amrlev][mglev], m_etype[2]), + m_dmap[amrlev][mglev], 1, 0); + } + average_down_nodal(*m_bcoefs[amrlev][mglev-1][2], + *m_bcoefs[amrlev][mglev ][2], ratio); +#endif + } + } +} + void MLCurlCurl::prepareRHS (Vector const& rhs) const { for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { @@ -194,31 +250,62 @@ MLCurlCurl::apply (int amrlev, int mglev, MF& out, MF& in, BCMode /*bc_mode*/, auto const& xin = in[0].array(mfi); auto const& yin = in[1].array(mfi); auto const& zin = in[2].array(mfi); - amrex::ParallelFor(xbx, ybx, zbx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - if (dinfo.is_dirichlet_x_edge(i,j,k)) { - xout(i,j,k) = Real(0.0); - } else { - mlcurlcurl_adotx_x(i,j,k,xout,xin,yin,zin,b,adxinv); - } - }, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - if (dinfo.is_dirichlet_y_edge(i,j,k)) { - yout(i,j,k) = Real(0.0); - } else { - mlcurlcurl_adotx_y(i,j,k,yout,xin,yin,zin,b,adxinv); - } - }, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - if (dinfo.is_dirichlet_z_edge(i,j,k)) { - zout(i,j,k) = Real(0.0); - } else { - mlcurlcurl_adotx_z(i,j,k,zout,xin,yin,zin,b,adxinv); - } - }); + if (m_bcoefs[amrlev][mglev][0]) { + auto const& bcx = m_bcoefs[amrlev][mglev][0]->const_array(mfi); + auto const& bcy = m_bcoefs[amrlev][mglev][1]->const_array(mfi); + auto const& bcz = m_bcoefs[amrlev][mglev][2]->const_array(mfi); + amrex::ParallelFor(xbx, ybx, zbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + if (dinfo.is_dirichlet_x_edge(i,j,k)) { + xout(i,j,k) = Real(0.0); + } else { + mlcurlcurl_adotx_x(i,j,k,xout,xin,yin,zin,bcx(i,j,k),adxinv); + } + }, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + if (dinfo.is_dirichlet_y_edge(i,j,k)) { + yout(i,j,k) = Real(0.0); + } else { + mlcurlcurl_adotx_y(i,j,k,yout,xin,yin,zin,bcy(i,j,k),adxinv); + } + }, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + if (dinfo.is_dirichlet_z_edge(i,j,k)) { + zout(i,j,k) = Real(0.0); + } else { + mlcurlcurl_adotx_z(i,j,k,zout,xin,yin,zin,bcz(i,j,k),adxinv); + } + }); + } else { + amrex::ParallelFor(xbx, ybx, zbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + if (dinfo.is_dirichlet_x_edge(i,j,k)) { + xout(i,j,k) = Real(0.0); + } else { + mlcurlcurl_adotx_x(i,j,k,xout,xin,yin,zin,b,adxinv); + } + }, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + if (dinfo.is_dirichlet_y_edge(i,j,k)) { + yout(i,j,k) = Real(0.0); + } else { + mlcurlcurl_adotx_y(i,j,k,yout,xin,yin,zin,b,adxinv); + } + }, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + if (dinfo.is_dirichlet_z_edge(i,j,k)) { + zout(i,j,k) = Real(0.0); + } else { + mlcurlcurl_adotx_z(i,j,k,zout,xin,yin,zin,b,adxinv); + } + }); + } } } @@ -257,21 +344,32 @@ void MLCurlCurl::smooth4 (int amrlev, int mglev, MF& sol, MF const& rhs, adxinv[idim] *= std::sqrt(m_alpha); } - auto* plusolver = m_lusolver[amrlev][mglev]->dataPtr(); - auto dinfo = getDirichletInfo(amrlev,mglev); auto sinfo = getSymmetryInfo(amrlev,mglev); MultiFab nmf(amrex::convert(rhs[0].boxArray(),IntVect(1)), rhs[0].DistributionMap(), 1, 0, MFInfo().SetAlloc(false)); - ParallelFor(nmf, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) - { - mlcurlcurl_gs4(i,j,k,ex[bno],ey[bno],ez[bno],rhsx[bno],rhsy[bno],rhsz[bno], + if (m_lusolver[amrlev][mglev]) { + auto* plusolver = m_lusolver[amrlev][mglev]->dataPtr(); + ParallelFor(nmf, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + { + mlcurlcurl_gs4(i,j,k,ex[bno],ey[bno],ez[bno],rhsx[bno],rhsy[bno],rhsz[bno], #if (AMREX_SPACEDIM == 2) - b, + b, #endif - adxinv,color,*plusolver,dinfo,sinfo); - }); + adxinv,color,*plusolver,dinfo,sinfo); + }); + } else { + auto const& bcx = m_bcoefs[amrlev][mglev][0]->const_arrays(); + auto const& bcy = m_bcoefs[amrlev][mglev][1]->const_arrays(); + auto const& bcz = m_bcoefs[amrlev][mglev][2]->const_arrays(); + ParallelFor(nmf, [=] AMREX_GPU_DEVICE (int bno, int i, int j, int k) + { + + mlcurlcurl_gs4(i,j,k,ex[bno],ey[bno],ez[bno],rhsx[bno],rhsy[bno],rhsz[bno], + adxinv,color,bcx[bno],bcy[bno],bcz[bno],dinfo,sinfo); + }); + } Gpu::streamSynchronize(); } @@ -341,85 +439,87 @@ void MLCurlCurl::compresid (int amrlev, int mglev, MF& resid, MF const& b) const void MLCurlCurl::prepareForSolve () { - for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { - for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { - auto const& dxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); - Real dxx = dxinv[0]*dxinv[0]; - Real dyy = dxinv[1]*dxinv[1]; - Real dxy = dxinv[0]*dxinv[1]; + if (m_bcoefs[0][0][0] == nullptr) { + for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) { + for (int mglev = 0; mglev < m_num_mg_levels[amrlev]; ++mglev) { + auto const& dxinv = this->m_geom[amrlev][mglev].InvCellSizeArray(); + Real dxx = dxinv[0]*dxinv[0]; + Real dyy = dxinv[1]*dxinv[1]; + Real dxy = dxinv[0]*dxinv[1]; #if (AMREX_SPACEDIM == 2) - Array2D A - {m_alpha*dyy*Real(2.0) + m_beta, - Real(0.0), - -m_alpha*dxy, - m_alpha*dxy, - // - Real(0.0), - m_alpha*dyy*Real(2.0) + m_beta, - m_alpha*dxy, - -m_alpha*dxy, - // - -m_alpha*dxy, - m_alpha*dxy, - m_alpha*dxx*Real(2.0) + m_beta, - Real(0.0), - // - m_alpha*dxy, - -m_alpha*dxy, - Real(0.0), - m_alpha*dxx*Real(2.0) + m_beta}; + Array2D A + {m_alpha*dyy*Real(2.0) + m_beta, + Real(0.0), + -m_alpha*dxy, + m_alpha*dxy, + // + Real(0.0), + m_alpha*dyy*Real(2.0) + m_beta, + m_alpha*dxy, + -m_alpha*dxy, + // + -m_alpha*dxy, + m_alpha*dxy, + m_alpha*dxx*Real(2.0) + m_beta, + Real(0.0), + // + m_alpha*dxy, + -m_alpha*dxy, + Real(0.0), + m_alpha*dxx*Real(2.0) + m_beta}; #else - Real dzz = dxinv[2]*dxinv[2]; - Real dxz = dxinv[0]*dxinv[2]; - Real dyz = dxinv[1]*dxinv[2]; - - Array2D A - {m_alpha*(dyy+dzz)*Real(2.0) + m_beta, - Real(0.0), - -m_alpha*dxy, - m_alpha*dxy, - -m_alpha*dxz, - m_alpha*dxz, - // - Real(0.0), - m_alpha*(dyy+dzz)*Real(2.0) + m_beta, - m_alpha*dxy, - -m_alpha*dxy, - m_alpha*dxz, - -m_alpha*dxz, - // - -m_alpha*dxy, - m_alpha*dxy, - m_alpha*(dxx+dzz)*Real(2.0) + m_beta, - Real(0.0), - -m_alpha*dyz, - m_alpha*dyz, - // - m_alpha*dxy, - -m_alpha*dxy, - Real(0.0), - m_alpha*(dxx+dzz)*Real(2.0) + m_beta, - m_alpha*dyz, - -m_alpha*dyz, - // - -m_alpha*dxz, - m_alpha*dxz, - -m_alpha*dyz, - m_alpha*dyz, - m_alpha*(dxx+dyy)*Real(2.0) + m_beta, - Real(0.0), - // - m_alpha*dxz, - -m_alpha*dxz, - m_alpha*dyz, - -m_alpha*dyz, - Real(0.0), - m_alpha*(dxx+dyy)*Real(2.0) + m_beta}; + Real dzz = dxinv[2]*dxinv[2]; + Real dxz = dxinv[0]*dxinv[2]; + Real dyz = dxinv[1]*dxinv[2]; + + Array2D A + {m_alpha*(dyy+dzz)*Real(2.0) + m_beta, + Real(0.0), + -m_alpha*dxy, + m_alpha*dxy, + -m_alpha*dxz, + m_alpha*dxz, + // + Real(0.0), + m_alpha*(dyy+dzz)*Real(2.0) + m_beta, + m_alpha*dxy, + -m_alpha*dxy, + m_alpha*dxz, + -m_alpha*dxz, + // + -m_alpha*dxy, + m_alpha*dxy, + m_alpha*(dxx+dzz)*Real(2.0) + m_beta, + Real(0.0), + -m_alpha*dyz, + m_alpha*dyz, + // + m_alpha*dxy, + -m_alpha*dxy, + Real(0.0), + m_alpha*(dxx+dzz)*Real(2.0) + m_beta, + m_alpha*dyz, + -m_alpha*dyz, + // + -m_alpha*dxz, + m_alpha*dxz, + -m_alpha*dyz, + m_alpha*dyz, + m_alpha*(dxx+dyy)*Real(2.0) + m_beta, + Real(0.0), + // + m_alpha*dxz, + -m_alpha*dxz, + m_alpha*dyz, + -m_alpha*dyz, + Real(0.0), + m_alpha*(dxx+dyy)*Real(2.0) + m_beta}; #endif - m_lusolver[amrlev][mglev] - = std::make_unique>>(A); + m_lusolver[amrlev][mglev] + = std::make_unique>>(A); + } } } } @@ -531,26 +631,6 @@ void MLCurlCurl::applyBC (int amrlev, int mglev, MF& in, CurlCurlStateType type) } } -#ifdef AMREX_USE_GPU -struct MLCurlCurlBCTag { - Array4 fab; - Box bx; - Orientation face; - - [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Box const& box() const noexcept { return bx; } -}; - -struct MLCurlCurlEdgeBCTag { - Array4 fab; - Box bx; - Dim3 offset; - - [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - Box const& box() const noexcept { return bx; } -}; -#endif - void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlStateType type) const { if (CurlCurlStateType::b == type) { return; } @@ -563,7 +643,7 @@ void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlState MFItInfo mfi_info{}; #ifdef AMREX_USE_GPU - Vector tags; + Vector> tags; mfi_info.DisableDeviceSync(); #endif @@ -598,7 +678,7 @@ void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlState } } #ifdef AMREX_USE_GPU - tags.emplace_back(MLCurlCurlBCTag{a,b,face}); + tags.emplace_back(Array4BoxOrientationTag{a,b,face}); #else amrex::LoopOnCpu(b, [&] (int i, int j, int k) { @@ -611,7 +691,7 @@ void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlState #ifdef AMREX_USE_GPU ParallelFor(tags, - [=] AMREX_GPU_DEVICE (int i, int j, int k, MLCurlCurlBCTag const& tag) noexcept + [=] AMREX_GPU_DEVICE (int i, int j, int k, Array4BoxOrientationTag const& tag) noexcept { mlcurlcurl_bc_symmetry(i, j, k, tag.face, idxtype, tag.fab); }); @@ -621,7 +701,7 @@ void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlState auto sinfo = getSymmetryInfo(amrlev,mglev); #ifdef AMREX_USE_GPU - Vector tags2; + Vector> tags2; #endif #ifdef AMREX_USE_OMP @@ -659,7 +739,7 @@ void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlState b.setRange(jdim,vbx.bigEnd(jdim)+1); } #ifdef AMREX_USE_GPU - tags2.emplace_back(MLCurlCurlEdgeBCTag{a,b,offset}); + tags2.emplace_back(Array4BoxOffsetTag{a,b,offset}); #else amrex::LoopOnCpu(b, [&] (int i, int j, int k) { @@ -676,7 +756,7 @@ void MLCurlCurl::applyPhysBC (int amrlev, int mglev, MultiFab& mf, CurlCurlState #ifdef AMREX_USE_GPU ParallelFor(tags2, - [=] AMREX_GPU_DEVICE (int i, int j, int k, MLCurlCurlEdgeBCTag const& tag) + [=] AMREX_GPU_DEVICE (int i, int j, int k, Array4BoxOffsetTag const& tag) { tag.fab(i,j,k) = tag.fab(i+tag.offset.x,j+tag.offset.y,k+tag.offset.z); }); diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H index 0816d141183..0c1118f7dd3 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCurlCurl_K.H @@ -598,6 +598,267 @@ void mlcurlcurl_gs4 (int i, int j, int k, #endif } +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void mlcurlcurl_gs4 (int i, int j, int k, + Array4 const& ex, + Array4 const& ey, + Array4 const& ez, + Array4 const& rhsx, + Array4 const& rhsy, + Array4 const& rhsz, + GpuArray const& adxinv, + int color, + Array4 const& betax, + Array4 const& betay, + Array4 const& betaz, + CurlCurlDirichletInfo const& dinfo, + CurlCurlSymmetryInfo const& sinfo) +{ + if (dinfo.is_dirichlet_node(i,j,k)) { return; } + + int my_color = i%2 + 2*(j%2); + if (k%2 != 0) { + my_color = 3 - my_color; + } + +#if (AMREX_SPACEDIM == 2) + + Real dxx = adxinv[0] * adxinv[0]; + Real dyy = adxinv[1] * adxinv[1]; + + if (((my_color == 0 || my_color == 3) && (color == 0 || color == 3)) || + ((my_color == 1 || my_color == 2) && (color == 1 || color == 2))) + { + Real gamma = (dxx+dyy)*Real(2.0) + betaz(i,j,k); + Real ccez = - dxx * (ez(i-1,j ,k ) + + ez(i+1,j ,k )) + - dyy * (ez(i ,j-1,k ) + + ez(i ,j+1,k )); + Real res = rhsz(i,j,k) - (gamma*ez(i,j,k) + ccez); + constexpr Real omega = Real(1.15); + ez(i,j,k) += omega/gamma * res; + } + + if (my_color != color) { return; } + + Real dxy = adxinv[0]*adxinv[1]; + + GpuArray b + {rhsx(i-1,j,k) - (-dyy * ( ex(i-1,j-1,k ) + + ex(i-1,j+1,k )) + + dxy * ( ey(i-1,j-1,k ) + -ey(i-1,j ,k ))), + rhsx(i ,j,k) - (-dyy * ( ex(i ,j-1,k ) + + ex(i ,j+1,k )) + + dxy * (-ey(i+1,j-1,k ) + +ey(i+1,j ,k ))), + rhsy(i,j-1,k) - (-dxx * ( ey(i-1,j-1,k ) + + ey(i+1,j-1,k )) + + dxy * ( ex(i-1,j-1,k ) + -ex(i ,j-1,k ))), + rhsy(i,j ,k) - (-dxx * ( ey(i-1,j ,k ) + + ey(i+1,j ,k )) + + dxy * (-ex(i-1,j+1,k ) + +ex(i ,j+1,k )))}; + + GpuArray x; + + if (sinfo.xlo_is_symmetric(i)) { + b[0] = -b[1]; + x[0] = x[1] = betax(i,j,k); + } else if (sinfo.xhi_is_symmetric(i)) { + b[1] = -b[0]; + x[0] = x[1] = betax(i-1,j,k); + } else { + x[0] = betax(i-1,j,k); + x[1] = betax(i ,j,k); + } + + if (sinfo.ylo_is_symmetric(j)) { + b[2] = -b[3]; + x[2] = x[3] = betay(i,j,k); + } else if (sinfo.yhi_is_symmetric(j)) { + b[3] = -b[2]; + x[2] = x[3] = betay(i,j-1,k); + } else { + x[2] = betay(i,j-1,k); + x[3] = betay(i,j ,k); + } + + LUSolver<4,Real> lusolver + ({dyy*Real(2.0) + x[0], + Real(0.0), + -dxy, + dxy, + // + Real(0.0), + dyy*Real(2.0) + x[1], + dxy, + -dxy, + // + -dxy, + dxy, + dxx*Real(2.0) + x[2], + Real(0.0), + // + dxy, + -dxy, + Real(0.0), + dxx*Real(2.0) + x[3]}); + lusolver(x.data(), b.data()); + ex(i-1,j ,k ) = x[0]; + ex(i ,j ,k ) = x[1]; + ey(i ,j-1,k ) = x[2]; + ey(i ,j ,k ) = x[3]; + +#else + + if (my_color != color) { return; } + + Real dxx = adxinv[0]*adxinv[0]; + Real dyy = adxinv[1]*adxinv[1]; + Real dzz = adxinv[2]*adxinv[2]; + Real dxy = adxinv[0]*adxinv[1]; + Real dxz = adxinv[0]*adxinv[2]; + Real dyz = adxinv[1]*adxinv[2]; + + GpuArray b + {rhsx(i-1,j,k) - (-dyy * ( ex(i-1,j-1,k ) + + ex(i-1,j+1,k )) + - dzz * ( ex(i-1,j ,k+1) + + ex(i-1,j ,k-1)) + + dxy * ( ey(i-1,j-1,k ) + -ey(i-1,j ,k )) + + dxz * ( ez(i-1,j ,k-1) + -ez(i-1,j ,k ))), + rhsx(i ,j,k) - (-dyy * ( ex(i ,j-1,k ) + + ex(i ,j+1,k )) + - dzz * ( ex(i ,j ,k+1) + + ex(i ,j ,k-1)) + + dxy * (-ey(i+1,j-1,k ) + +ey(i+1,j ,k )) + + dxz * (-ez(i+1,j ,k-1) + +ez(i+1,j ,k ))), + rhsy(i,j-1,k) - (-dxx * ( ey(i-1,j-1,k ) + + ey(i+1,j-1,k )) + - dzz * ( ey(i ,j-1,k-1) + + ey(i ,j-1,k+1)) + + dxy * ( ex(i-1,j-1,k ) + -ex(i ,j-1,k )) + + dyz * ( ez(i ,j-1,k-1) + -ez(i ,j-1,k ))), + rhsy(i,j ,k) - (-dxx * ( ey(i-1,j ,k ) + + ey(i+1,j ,k )) + - dzz * ( ey(i ,j ,k-1) + + ey(i ,j ,k+1)) + + dxy * (-ex(i-1,j+1,k ) + +ex(i ,j+1,k )) + + dyz * (-ez(i ,j+1,k-1) + +ez(i ,j+1,k ))), + rhsz(i,j,k-1) - (-dxx * ( ez(i-1,j ,k-1) + + ez(i+1,j ,k-1)) + - dyy * ( ez(i ,j-1,k-1) + + ez(i ,j+1,k-1)) + + dxz * ( ex(i-1,j ,k-1) + -ex(i ,j ,k-1)) + + dyz * ( ey(i ,j-1,k-1) + -ey(i ,j ,k-1))), + rhsz(i,j,k ) - (-dxx * ( ez(i-1,j ,k ) + + ez(i+1,j ,k )) + - dyy * ( ez(i ,j-1,k ) + + ez(i ,j+1,k )) + + dxz * (-ex(i-1,j ,k+1) + +ex(i ,j ,k+1)) + + dyz * (-ey(i ,j-1,k+1) + +ey(i ,j ,k+1)))}; + + GpuArray x; + + if (sinfo.xlo_is_symmetric(i)) { + b[0] = -b[1]; + x[0] = x[1] = betax(i,j,k); + } else if (sinfo.xhi_is_symmetric(i)) { + b[1] = -b[0]; + x[0] = x[1] = betax(i-1,j,k); + } else { + x[0] = betax(i-1,j,k); + x[1] = betax(i ,j,k); + } + + if (sinfo.ylo_is_symmetric(j)) { + b[2] = -b[3]; + x[2] = x[3] = betay(i,j,k); + } else if (sinfo.yhi_is_symmetric(j)) { + b[3] = -b[2]; + x[2] = x[3] = betay(i,j-1,k); + } else { + x[2] = betay(i,j-1,k); + x[3] = betay(i,j ,k); + } + + if (sinfo.zlo_is_symmetric(k)) { + b[4] = -b[5]; + x[4] = x[5] = betaz(i,j,k); + } else if (sinfo.zhi_is_symmetric(k)) { + b[5] = -b[4]; + x[4] = x[5] = betaz(i,j,k-1); + } else { + x[4] = betaz(i,j,k-1); + x[5] = betaz(i,j,k ); + } + + LUSolver<6,Real> lusolver + ({(dyy+dzz)*Real(2.0) + x[0], + Real(0.0), + -dxy, + dxy, + -dxz, + dxz, + // + Real(0.0), + (dyy+dzz)*Real(2.0) + x[1], + dxy, + -dxy, + dxz, + -dxz, + // + -dxy, + dxy, + (dxx+dzz)*Real(2.0) + x[2], + Real(0.0), + -dyz, + dyz, + // + dxy, + -dxy, + Real(0.0), + (dxx+dzz)*Real(2.0) + x[3], + dyz, + -dyz, + // + -dxz, + dxz, + -dyz, + dyz, + (dxx+dyy)*Real(2.0) + x[4], + Real(0.0), + // + dxz, + -dxz, + dyz, + -dyz, + Real(0.0), + (dxx+dyy)*Real(2.0) + x[5]}); + lusolver(x.data(), b.data()); + ex(i-1,j ,k ) = x[0]; + ex(i ,j ,k ) = x[1]; + ey(i ,j-1,k ) = x[2]; + ey(i ,j ,k ) = x[3]; + ez(i ,j ,k-1) = x[4]; + ez(i ,j ,k ) = x[5]; +#endif +} + AMREX_GPU_DEVICE AMREX_FORCE_INLINE void mlcurlcurl_interpadd (int dir, int i, int j, int k, Array4 const& fine, diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.H b/Tests/LinearSolvers/CurlCurl/MyTest.H index 050d328c618..73b260b470e 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.H +++ b/Tests/LinearSolvers/CurlCurl/MyTest.H @@ -44,6 +44,7 @@ private: amrex::Real beta_factor = 1.e-2; amrex::Real alpha = 1.0; amrex::Real beta = 1.0; + bool variable_beta = false; }; #endif diff --git a/Tests/LinearSolvers/CurlCurl/MyTest.cpp b/Tests/LinearSolvers/CurlCurl/MyTest.cpp index 51625d5ede7..aa7d4353b7b 100644 --- a/Tests/LinearSolvers/CurlCurl/MyTest.cpp +++ b/Tests/LinearSolvers/CurlCurl/MyTest.cpp @@ -33,6 +33,17 @@ MyTest::solve () LinOpBCType::Periodic)}); mlcc.setScalars(alpha, beta); + if (variable_beta) { + Array bcoef; + for (int idim = 0; idim < 3; ++idim) { + bcoef[idim].define(solution[idim].boxArray(), + solution[idim].DistributionMap(), 1, 0); + bcoef[idim].setVal(beta); + } + mlcc.setBeta({Array{bcoef.data(), + bcoef.data()+1, + bcoef.data()+2}}); + } mlcc.prepareRHS({&rhs}); using V = Array; @@ -98,6 +109,7 @@ MyTest::readParameters () pp.query("beta_factor", beta_factor); pp.query("alpha", alpha); + pp.query("variable_beta", variable_beta); } void