AMReX-Codes · WeiqunZhang · Oct 3, 2022 · Aug 25, 2022 · Aug 31, 2022 · Sep 8, 2022
diff --git a/Src/Base/AMReX_Orientation.H b/Src/Base/AMReX_Orientation.H
@@ -75,7 +75,7 @@ public:
     * according to the above ordering.
     */
     AMREX_GPU_HOST_DEVICE
-    operator int () const noexcept { return val; }
+    constexpr operator int () const noexcept { return val; }
     //! Return opposite orientation.
     AMREX_GPU_HOST_DEVICE
     Orientation flip () const noexcept
@@ -97,6 +97,30 @@ public:
     //! Read from an istream.
     friend std::istream& operator>> (std::istream& os, Orientation& o);
 
+    //! Int value of the x-lo-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int xlo () noexcept { return 0; }
+
+    //! Int value of the x-hi-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int xhi () noexcept { return AMREX_SPACEDIM; }
+
+    //! Int value of the y-lo-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int ylo () noexcept { return 1; }
+
+    //! Int value of the y-hi-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int yhi () noexcept { return 1+AMREX_SPACEDIM; }
+
+    //! Int value of the z-lo-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int zlo () noexcept { return 2; }
+
+    //! Int value of the z-hi-face
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    static constexpr int zhi () noexcept { return 2+AMREX_SPACEDIM; }
+
 private:
     //! Used internally.
     AMREX_GPU_HOST_DEVICE

diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H
@@ -105,7 +105,8 @@ public: // for cuda
 
     void applyBCTensor (int amrlev, int mglev, MultiFab& vel,
                         BCMode bc_mode, StateMode s_mode, const MLMGBndry* bndry) const;
-    void compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const;
+    void compCrossTerms(int amrlev, int mglev, MultiFab const& mf,
+                        const MLMGBndry* bndry) const;
 };
 
 }

diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp b/Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp
@@ -13,11 +13,12 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
     const auto& bcondloc = *m_bcondloc[amrlev][mglev];
     const auto& maskvals = m_maskvals[amrlev][mglev];
 
-    FArrayBox foofab(Box::TheUnitBox(),3);
-    const auto& foo = foofab.array();
+    Array4<Real const> foo;
 
     const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray();
     const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1);
+    const auto dlo = amrex::lbound(domain);
+    const auto dhi = amrex::ubound(domain);
 
     auto factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][mglev].get());
     const FabArray<EBCellFlagFab>* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr;
@@ -39,14 +40,13 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
             const auto & bdlv = bcondloc.bndryLocs(mfi);
             const auto & bdcv = bcondloc.bndryConds(mfi);
 
-            GpuArray<BoundCond,2*AMREX_SPACEDIM*AMREX_SPACEDIM> bct;
-            GpuArray<Real,2*AMREX_SPACEDIM*AMREX_SPACEDIM> bcl;
-            for (OrientationIter face; face; ++face) {
-                Orientation ori = face();
-                const int iface = ori;
-                for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
-                    bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori];
-                    bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori];
+            Array2D<BoundCond,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bct;
+            Array2D<Real,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bcl;
+            for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
+                for (OrientationIter face; face; ++face) {
+                    Orientation ori = face();
+                    bct(ori,icomp) = bdcv[icomp][ori];
+                    bcl(ori,icomp) = bdlv[icomp][ori];
                 }
             }
 
@@ -72,7 +72,7 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
                                       mxlo, mylo, mxhi, myhi,
                                       bvxlo, bvylo, bvxhi, bvyhi,
                                       bct, bcl, inhomog, imaxorder,
-                                      dxinv, domain);
+                                      dxinv, dlo, dhi);
             });
 #else
             const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi);
@@ -83,28 +83,50 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
             const auto& bvzhi = (bndry != nullptr) ?
                 (*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo;
 
-            AMREX_HOST_DEVICE_FOR_1D ( 12, iedge,
+#ifdef AMREX_USE_GPU
+            if (Gpu::inLaunchRegion()) {
+                amrex::launch(12, 64, Gpu::gpuStream(),
+#ifdef AMREX_USE_DPCPP
+                [=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item)
+                {
+                    int bid = item.get_group_linear_id();
+                    int tid = item.get_local_linear_id();
+                    int bdim = item.get_local_range(0);
+#else
+                [=] AMREX_GPU_DEVICE ()
+                {
+                    int bid = blockIdx.x;
+                    int tid = threadIdx.x;
+                    int bdim = blockDim.x;
+#endif
+                    mltensor_fill_edges(bid, tid, bdim, vbx, velfab,
+                                        mxlo, mylo, mzlo, mxhi, myhi, mzhi,
+                                        bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
+                                        bct, bcl, inhomog, imaxorder,
+                                        dxinv, dlo, dhi);
+                });
+            } else
+#endif
             {
-                mltensor_fill_edges(iedge, vbx, velfab,
+                mltensor_fill_edges(vbx, velfab,
                                     mxlo, mylo, mzlo, mxhi, myhi, mzhi,
                                     bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
                                     bct, bcl, inhomog, imaxorder,
-                                    dxinv, domain);
-            });
+                                    dxinv, dlo, dhi);
+            }
 
             AMREX_HOST_DEVICE_FOR_1D ( 8, icorner,
             {
                 mltensor_fill_corners(icorner, vbx, velfab,
                                       mxlo, mylo, mzlo, mxhi, myhi, mzhi,
                                       bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
                                       bct, bcl, inhomog, imaxorder,
-                                      dxinv, domain);
+                                      dxinv, dlo, dhi);
             });
+
 #endif
         }
     }
-
-    // Notet that it is incorrect to call EnforcePeriodicity on vel.
 }
 
 }
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H
@@ -6,10 +6,95 @@
 
 namespace amrex {
 
-namespace {
-    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-    Real mlebtensor_weight (int d) {
-        return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0);
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
+                                Array4<Real const> const& vel,
+                                Array4<Real const> const& etax,
+                                Array4<Real const> const& kapx,
+                                Array4<Real const> const& apx,
+                                Array4<EBCellFlag const> const& flag,
+                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
+{
+    const Real dyi = dxinv[1];
+    const auto lo = amrex::lbound(box);
+    const auto hi = amrex::ubound(box);
+    constexpr Real twoThirds = 2./3.;
+
+    int k = 0;
+    for     (int j = lo.y; j <= hi.y; ++j) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            if (apx(i,j,0) == 0.0)
+            {
+                fx(i,j,0,0) = 0.0;
+                fx(i,j,0,1) = 0.0;
+            }
+            else
+            {
+                int jhip = j + flag(i  ,j,0).isConnected(0, 1,0);
+                int jhim = j - flag(i  ,j,0).isConnected(0,-1,0);
+                int jlop = j + flag(i-1,j,0).isConnected(0, 1,0);
+                int jlom = j - flag(i-1,j,0).isConnected(0,-1,0);
+                Real whi = mlebtensor_weight(jhip-jhim);
+                Real wlo = mlebtensor_weight(jlop-jlom);
+                Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi,
+                                                   whi,wlo,jhip,jhim,jlop,jlom);
+                Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi,
+                                                   whi,wlo,jhip,jhim,jlop,jlom);
+                Real divu = dvdy;
+                Real xif = kapx(i,j,0);
+                Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta
+                Real mut =             etax(i,j,0,1);
+                fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu;
+                fx(i,j,0,1) = -mut*dudy;
+            }
+        }
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
+                                Array4<Real const> const& vel,
+                                Array4<Real const> const& etay,
+                                Array4<Real const> const& kapy,
+                                Array4<Real const> const& apy,
+                                Array4<EBCellFlag const> const& flag,
+                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
+{
+    const Real dxi = dxinv[0];
+    const auto lo = amrex::lbound(box);
+    const auto hi = amrex::ubound(box);
+    constexpr Real twoThirds = 2./3.;
+
+    int k = 0;
+    for     (int j = lo.y; j <= hi.y; ++j) {
+        AMREX_PRAGMA_SIMD
+        for (int i = lo.x; i <= hi.x; ++i) {
+            if (apy(i,j,0) == 0.0)
+            {
+                fy(i,j,0,0) = 0.0;
+                fy(i,j,0,1) = 0.0;
+            }
+            else
+            {
+                int ihip = i + flag(i,j  ,0).isConnected( 1,0,0);
+                int ihim = i - flag(i,j  ,0).isConnected(-1,0,0);
+                int ilop = i + flag(i,j-1,0).isConnected( 1,0,0);
+                int ilom = i - flag(i,j-1,0).isConnected(-1,0,0);
+                Real whi = mlebtensor_weight(ihip-ihim);
+                Real wlo = mlebtensor_weight(ilop-ilom);
+                Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi,
+                                                   whi,wlo,ihip,ihim,ilop,ilom);
+                Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi,
+                                                   whi,wlo,ihip,ihim,ilop,ilom);
+                Real divu = dudx;
+                Real xif = kapy(i,j,0);
+                Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta
+                Real mut =             etay(i,j,0,0);
+                fy(i,j,0,0) = -mut*dvdx;
+                fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu;
+            }
+        }
     }
 }
 
@@ -20,13 +105,20 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
                                 Array4<Real const> const& kapx,
                                 Array4<Real const> const& apx,
                                 Array4<EBCellFlag const> const& flag,
-                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
+                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
+                                Array4<Real const> const& bvxlo,
+                                Array4<Real const> const& bvxhi,
+                                Array2D<BoundCond,
+                                        0,2*AMREX_SPACEDIM,
+                                        0,AMREX_SPACEDIM> const& bct,
+                                Dim3 const& dlo, Dim3 const& dhi) noexcept
 {
     const Real dyi = dxinv[1];
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
     constexpr Real twoThirds = 2./3.;
 
+    int k = 0;
     for     (int j = lo.y; j <= hi.y; ++j) {
         AMREX_PRAGMA_SIMD
         for (int i = lo.x; i <= hi.x; ++i) {
@@ -43,13 +135,15 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
                 int jlom = j - flag(i-1,j,0).isConnected(0,-1,0);
                 Real whi = mlebtensor_weight(jhip-jhim);
                 Real wlo = mlebtensor_weight(jlop-jlom);
-                Real dudy = (0.5*dyi) * ((vel(i  ,jhip,0,0)-vel(i  ,jhim,0,0))*whi
-                                        +(vel(i-1,jlop,0,0)-vel(i-1,jlom,0,0))*wlo);
-                Real dvdy = (0.5*dyi) * ((vel(i  ,jhip,0,1)-vel(i  ,jhim,0,1))*whi
-                                        +(vel(i-1,jlop,0,1)-vel(i-1,jlom,0,1))*wlo);
+                Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi,
+                                                   bvxlo,bvxhi,bct,dlo,dhi,
+                                                   whi,wlo,jhip,jhim,jlop,jlom);
+                Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi,
+                                                   bvxlo,bvxhi,bct,dlo,dhi,
+                                                   whi,wlo,jhip,jhim,jlop,jlom);
                 Real divu = dvdy;
                 Real xif = kapx(i,j,0);
-                Real mun = 0.75*(etax(i,j,0,0)-xif);  // restore the original eta
+                Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta
                 Real mut =       etax(i,j,0,1);
                 fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu;
                 fx(i,j,0,1) = -mut*dudy;
@@ -65,13 +159,20 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
                                 Array4<Real const> const& kapy,
                                 Array4<Real const> const& apy,
                                 Array4<EBCellFlag const> const& flag,
-                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
+                                GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
+                                Array4<Real const> const& bvylo,
+                                Array4<Real const> const& bvyhi,
+                                Array2D<BoundCond,
+                                        0,2*AMREX_SPACEDIM,
+                                        0,AMREX_SPACEDIM> const& bct,
+                                Dim3 const& dlo, Dim3 const& dhi) noexcept
 {
     const Real dxi = dxinv[0];
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
     constexpr Real twoThirds = 2./3.;
 
+    int k = 0;
     for     (int j = lo.y; j <= hi.y; ++j) {
         AMREX_PRAGMA_SIMD
         for (int i = lo.x; i <= hi.x; ++i) {
@@ -88,15 +189,16 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
                 int ilom = i - flag(i,j-1,0).isConnected(-1,0,0);
                 Real whi = mlebtensor_weight(ihip-ihim);
                 Real wlo = mlebtensor_weight(ilop-ilom);
-                Real dudx = (0.5*dxi) * ((vel(ihip,j  ,0,0)-vel(ihim,j  ,0,0))*whi
-                                        +(vel(ilop,j-1,0,0)-vel(ilom,j-1,0,0))*wlo);
-                Real dvdx = (0.5*dxi) * ((vel(ihip,j  ,0,1)-vel(ihim,j  ,0,1))*whi
-                                        +(vel(ilop,j-1,0,1)-vel(ilom,j-1,0,1))*wlo);
-
+                Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi,
+                                                   bvylo,bvyhi,bct,dlo,dhi,
+                                                   whi,wlo,ihip,ihim,ilop,ilom);
+                Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi,
+                                                   bvylo,bvyhi,bct,dlo,dhi,
+                                                   whi,wlo,ihip,ihim,ilop,ilom);
                 Real divu = dudx;
                 Real xif = kapy(i,j,0);
-                Real mun = 0.75*(etay(i,j,0,1)-xif);  // restore the original eta
-                Real mut =       etay(i,j,0,0);
+                Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta
+                Real mut =             etay(i,j,0,0);
                 fy(i,j,0,0) = -mut*dvdx;
                 fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu;
             }