Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Tensor Solver BC #2930

Merged
merged 19 commits into from
Oct 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion Src/Base/AMReX_Orientation.H
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public:
* according to the above ordering.
*/
AMREX_GPU_HOST_DEVICE
operator int () const noexcept { return val; }
constexpr operator int () const noexcept { return val; }
//! Return opposite orientation.
AMREX_GPU_HOST_DEVICE
Orientation flip () const noexcept
Expand All @@ -97,6 +97,30 @@ public:
//! Read from an istream.
friend std::istream& operator>> (std::istream& os, Orientation& o);

//! Int value of the x-lo-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int xlo () noexcept { return 0; }

//! Int value of the x-hi-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int xhi () noexcept { return AMREX_SPACEDIM; }

//! Int value of the y-lo-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int ylo () noexcept { return 1; }

//! Int value of the y-hi-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int yhi () noexcept { return 1+AMREX_SPACEDIM; }

//! Int value of the z-lo-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int zlo () noexcept { return 2; }

//! Int value of the z-hi-face
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
static constexpr int zhi () noexcept { return 2+AMREX_SPACEDIM; }

private:
//! Used internally.
AMREX_GPU_HOST_DEVICE
Expand Down
3 changes: 2 additions & 1 deletion Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.H
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ public: // for cuda

void applyBCTensor (int amrlev, int mglev, MultiFab& vel,
BCMode bc_mode, StateMode s_mode, const MLMGBndry* bndry) const;
void compCrossTerms(int amrlev, int mglev, MultiFab const& mf) const;
void compCrossTerms(int amrlev, int mglev, MultiFab const& mf,
const MLMGBndry* bndry) const;
};

}
Expand Down
300 changes: 151 additions & 149 deletions Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp.cpp

Large diffs are not rendered by default.

58 changes: 40 additions & 18 deletions Src/LinearSolvers/MLMG/AMReX_MLEBTensorOp_bc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
const auto& bcondloc = *m_bcondloc[amrlev][mglev];
const auto& maskvals = m_maskvals[amrlev][mglev];

FArrayBox foofab(Box::TheUnitBox(),3);
const auto& foo = foofab.array();
Array4<Real const> foo;

const auto dxinv = m_geom[amrlev][mglev].InvCellSizeArray();
const Box& domain = m_geom[amrlev][mglev].growPeriodicDomain(1);
const auto dlo = amrex::lbound(domain);
const auto dhi = amrex::ubound(domain);

auto factory = dynamic_cast<EBFArrayBoxFactory const*>(m_factory[amrlev][mglev].get());
const FabArray<EBCellFlagFab>* flags = (factory) ? &(factory->getMultiEBCellFlagFab()) : nullptr;
Expand All @@ -39,14 +40,13 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
const auto & bdlv = bcondloc.bndryLocs(mfi);
const auto & bdcv = bcondloc.bndryConds(mfi);

GpuArray<BoundCond,2*AMREX_SPACEDIM*AMREX_SPACEDIM> bct;
GpuArray<Real,2*AMREX_SPACEDIM*AMREX_SPACEDIM> bcl;
for (OrientationIter face; face; ++face) {
Orientation ori = face();
const int iface = ori;
for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
bct[iface*AMREX_SPACEDIM+icomp] = bdcv[icomp][ori];
bcl[iface*AMREX_SPACEDIM+icomp] = bdlv[icomp][ori];
Array2D<BoundCond,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bct;
Array2D<Real,0,2*AMREX_SPACEDIM,0,AMREX_SPACEDIM> bcl;
for (int icomp = 0; icomp < AMREX_SPACEDIM; ++icomp) {
for (OrientationIter face; face; ++face) {
Orientation ori = face();
bct(ori,icomp) = bdcv[icomp][ori];
bcl(ori,icomp) = bdlv[icomp][ori];
}
}

Expand All @@ -72,7 +72,7 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
mxlo, mylo, mxhi, myhi,
bvxlo, bvylo, bvxhi, bvyhi,
bct, bcl, inhomog, imaxorder,
dxinv, domain);
dxinv, dlo, dhi);
});
#else
const auto& mzlo = maskvals[Orientation(2,Orientation::low )].array(mfi);
Expand All @@ -83,28 +83,50 @@ MLEBTensorOp::applyBCTensor (int amrlev, int mglev, MultiFab& vel,
const auto& bvzhi = (bndry != nullptr) ?
(*bndry)[Orientation(2,Orientation::high)].array(mfi) : foo;

AMREX_HOST_DEVICE_FOR_1D ( 12, iedge,
#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion()) {
amrex::launch(12, 64, Gpu::gpuStream(),
#ifdef AMREX_USE_DPCPP
[=] AMREX_GPU_DEVICE (sycl::nd_item<1> const& item)
{
int bid = item.get_group_linear_id();
int tid = item.get_local_linear_id();
int bdim = item.get_local_range(0);
#else
[=] AMREX_GPU_DEVICE ()
{
int bid = blockIdx.x;
int tid = threadIdx.x;
int bdim = blockDim.x;
#endif
mltensor_fill_edges(bid, tid, bdim, vbx, velfab,
mxlo, mylo, mzlo, mxhi, myhi, mzhi,
bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
bct, bcl, inhomog, imaxorder,
dxinv, dlo, dhi);
});
} else
#endif
{
mltensor_fill_edges(iedge, vbx, velfab,
mltensor_fill_edges(vbx, velfab,
mxlo, mylo, mzlo, mxhi, myhi, mzhi,
bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
bct, bcl, inhomog, imaxorder,
dxinv, domain);
});
dxinv, dlo, dhi);
}

AMREX_HOST_DEVICE_FOR_1D ( 8, icorner,
{
mltensor_fill_corners(icorner, vbx, velfab,
mxlo, mylo, mzlo, mxhi, myhi, mzhi,
bvxlo, bvylo, bvzlo, bvxhi, bvyhi, bvzhi,
bct, bcl, inhomog, imaxorder,
dxinv, domain);
dxinv, dlo, dhi);
});

#endif
}
}

// Notet that it is incorrect to call EnforcePeriodicity on vel.
}

}
138 changes: 120 additions & 18 deletions Src/LinearSolvers/MLMG/AMReX_MLEBTensor_2D_K.H
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,95 @@

namespace amrex {

namespace {
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
Real mlebtensor_weight (int d) {
return (d==2) ? 0.5 : ((d==1) ? 1.0 : 0.0);
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
Array4<Real const> const& vel,
Array4<Real const> const& etax,
Array4<Real const> const& kapx,
Array4<Real const> const& apx,
Array4<EBCellFlag const> const& flag,
GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
{
const Real dyi = dxinv[1];
const auto lo = amrex::lbound(box);
const auto hi = amrex::ubound(box);
constexpr Real twoThirds = 2./3.;

int k = 0;
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
if (apx(i,j,0) == 0.0)
{
fx(i,j,0,0) = 0.0;
fx(i,j,0,1) = 0.0;
}
else
{
int jhip = j + flag(i ,j,0).isConnected(0, 1,0);
int jhim = j - flag(i ,j,0).isConnected(0,-1,0);
int jlop = j + flag(i-1,j,0).isConnected(0, 1,0);
int jlom = j - flag(i-1,j,0).isConnected(0,-1,0);
Real whi = mlebtensor_weight(jhip-jhim);
Real wlo = mlebtensor_weight(jlop-jlom);
Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi,
whi,wlo,jhip,jhim,jlop,jlom);
Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi,
whi,wlo,jhip,jhim,jlop,jlom);
Real divu = dvdy;
Real xif = kapx(i,j,0);
Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta
Real mut = etax(i,j,0,1);
fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu;
fx(i,j,0,1) = -mut*dudy;
}
}
}
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
Array4<Real const> const& vel,
Array4<Real const> const& etay,
Array4<Real const> const& kapy,
Array4<Real const> const& apy,
Array4<EBCellFlag const> const& flag,
GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
{
const Real dxi = dxinv[0];
const auto lo = amrex::lbound(box);
const auto hi = amrex::ubound(box);
constexpr Real twoThirds = 2./3.;

int k = 0;
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
if (apy(i,j,0) == 0.0)
{
fy(i,j,0,0) = 0.0;
fy(i,j,0,1) = 0.0;
}
else
{
int ihip = i + flag(i,j ,0).isConnected( 1,0,0);
int ihim = i - flag(i,j ,0).isConnected(-1,0,0);
int ilop = i + flag(i,j-1,0).isConnected( 1,0,0);
int ilom = i - flag(i,j-1,0).isConnected(-1,0,0);
Real whi = mlebtensor_weight(ihip-ihim);
Real wlo = mlebtensor_weight(ilop-ilom);
Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi,
whi,wlo,ihip,ihim,ilop,ilom);
Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi,
whi,wlo,ihip,ihim,ilop,ilom);
Real divu = dudx;
Real xif = kapy(i,j,0);
Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta
Real mut = etay(i,j,0,0);
fy(i,j,0,0) = -mut*dvdx;
fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu;
}
}
}
}

Expand All @@ -20,13 +105,20 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
Array4<Real const> const& kapx,
Array4<Real const> const& apx,
Array4<EBCellFlag const> const& flag,
GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
Array4<Real const> const& bvxlo,
Array4<Real const> const& bvxhi,
Array2D<BoundCond,
0,2*AMREX_SPACEDIM,
0,AMREX_SPACEDIM> const& bct,
Dim3 const& dlo, Dim3 const& dhi) noexcept
{
const Real dyi = dxinv[1];
const auto lo = amrex::lbound(box);
const auto hi = amrex::ubound(box);
constexpr Real twoThirds = 2./3.;

int k = 0;
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
Expand All @@ -43,13 +135,15 @@ void mlebtensor_cross_terms_fx (Box const& box, Array4<Real> const& fx,
int jlom = j - flag(i-1,j,0).isConnected(0,-1,0);
Real whi = mlebtensor_weight(jhip-jhim);
Real wlo = mlebtensor_weight(jlop-jlom);
Real dudy = (0.5*dyi) * ((vel(i ,jhip,0,0)-vel(i ,jhim,0,0))*whi
+(vel(i-1,jlop,0,0)-vel(i-1,jlom,0,0))*wlo);
Real dvdy = (0.5*dyi) * ((vel(i ,jhip,0,1)-vel(i ,jhim,0,1))*whi
+(vel(i-1,jlop,0,1)-vel(i-1,jlom,0,1))*wlo);
Real dudy = mlebtensor_dy_on_xface(i,j,k,0,vel,dyi,
bvxlo,bvxhi,bct,dlo,dhi,
whi,wlo,jhip,jhim,jlop,jlom);
Real dvdy = mlebtensor_dy_on_xface(i,j,k,1,vel,dyi,
bvxlo,bvxhi,bct,dlo,dhi,
whi,wlo,jhip,jhim,jlop,jlom);
Real divu = dvdy;
Real xif = kapx(i,j,0);
Real mun = 0.75*(etax(i,j,0,0)-xif); // restore the original eta
Real mun = Real(0.75)*(etax(i,j,0,0)-xif);// restore the original eta
Real mut = etax(i,j,0,1);
fx(i,j,0,0) = -mun*(-twoThirds*divu) - xif*divu;
fx(i,j,0,1) = -mut*dudy;
Expand All @@ -65,13 +159,20 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
Array4<Real const> const& kapy,
Array4<Real const> const& apy,
Array4<EBCellFlag const> const& flag,
GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
GpuArray<Real,AMREX_SPACEDIM> const& dxinv,
Array4<Real const> const& bvylo,
Array4<Real const> const& bvyhi,
Array2D<BoundCond,
0,2*AMREX_SPACEDIM,
0,AMREX_SPACEDIM> const& bct,
Dim3 const& dlo, Dim3 const& dhi) noexcept
{
const Real dxi = dxinv[0];
const auto lo = amrex::lbound(box);
const auto hi = amrex::ubound(box);
constexpr Real twoThirds = 2./3.;

int k = 0;
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
Expand All @@ -88,15 +189,16 @@ void mlebtensor_cross_terms_fy (Box const& box, Array4<Real> const& fy,
int ilom = i - flag(i,j-1,0).isConnected(-1,0,0);
Real whi = mlebtensor_weight(ihip-ihim);
Real wlo = mlebtensor_weight(ilop-ilom);
Real dudx = (0.5*dxi) * ((vel(ihip,j ,0,0)-vel(ihim,j ,0,0))*whi
+(vel(ilop,j-1,0,0)-vel(ilom,j-1,0,0))*wlo);
Real dvdx = (0.5*dxi) * ((vel(ihip,j ,0,1)-vel(ihim,j ,0,1))*whi
+(vel(ilop,j-1,0,1)-vel(ilom,j-1,0,1))*wlo);

Real dudx = mlebtensor_dx_on_yface(i,j,k,0,vel,dxi,
bvylo,bvyhi,bct,dlo,dhi,
whi,wlo,ihip,ihim,ilop,ilom);
Real dvdx = mlebtensor_dx_on_yface(i,j,k,1,vel,dxi,
bvylo,bvyhi,bct,dlo,dhi,
whi,wlo,ihip,ihim,ilop,ilom);
Real divu = dudx;
Real xif = kapy(i,j,0);
Real mun = 0.75*(etay(i,j,0,1)-xif); // restore the original eta
Real mut = etay(i,j,0,0);
Real mun = Real(0.75)*(etay(i,j,0,1)-xif);// restore the original eta
Real mut = etay(i,j,0,0);
fy(i,j,0,0) = -mut*dvdx;
fy(i,j,0,1) = -mun*(-twoThirds*divu) - xif*divu;
}
Expand Down
Loading