Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
BenWibking committed Nov 30, 2024
2 parents b1c3359 + f8497c5 commit 8a52706
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 126 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
- [[PR 84]](https://github.com/parthenon-hpc-lab/athenapk/pull/84) Bump Parthenon to latest develop (2024-02-15)

### Fixed (not changing behavior/API/variables/...)
- [[PR 128]](https://github.com/parthenon-hpc-lab/athenapk/pull/128) Fixed `dt_diff` in RKL2

### Infrastructure
- [[PR 128]](https://github.com/parthenon-hpc-lab/athenapk/pull/128) Bump Parthenon to support `dn` based outputs
- [[PR 124]](https://github.com/parthenon-hpc-lab/athenapk/pull/124) Bump Kokkos 4.4.1 (and Parthenon to include view-of-view fix)
- [[PR 117]](https://github.com/parthenon-hpc-lab/athenapk/pull/117) Update devcontainer.json to latest CI container
- [[PR 114]](https://github.com/parthenon-hpc-lab/athenapk/pull/114) Bump Parthenon 24.08 and Kokkos to 4.4.00
Expand Down
2 changes: 1 addition & 1 deletion external/parthenon
110 changes: 93 additions & 17 deletions src/hydro/hydro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,87 @@ parthenon::Packages_t ProcessPackages(std::unique_ptr<ParameterInput> &pin) {
return packages;
}

// Calculate mininum dx, which is used in calculating the divergence cleaning speed c_h
// TODO(PG) eventually move to calculating the timestep once the timestep calc
// has been moved to be done before Step()
Real CalculateGlobalMinDx(MeshData<Real> *md) {
auto *pmb = md->GetBlockData(0)->GetBlockPointer();
auto hydro_pkg = pmb->packages.Get("Hydro");

const auto &prim_pack = md->PackVariables(std::vector<std::string>{"prim"});

IndexRange ib = md->GetBlockData(0)->GetBoundsI(IndexDomain::interior);
IndexRange jb = md->GetBlockData(0)->GetBoundsJ(IndexDomain::interior);
IndexRange kb = md->GetBlockData(0)->GetBoundsK(IndexDomain::interior);

Real mindx = std::numeric_limits<Real>::max();

bool nx2 = prim_pack.GetDim(2) > 1;
bool nx3 = prim_pack.GetDim(3) > 1;
pmb->par_reduce(
"CalculateGlobalMinDx", 0, prim_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lmindx) {
const auto &coords = prim_pack.GetCoords(b);
lmindx = fmin(lmindx, coords.Dxc<1>(k, j, i));
if (nx2) {
lmindx = fmin(lmindx, coords.Dxc<2>(k, j, i));
}
if (nx3) {
lmindx = fmin(lmindx, coords.Dxc<3>(k, j, i));
}
},
Kokkos::Min<Real>(mindx));

return mindx;
}

// Using this per cycle function to populate various variables in
// Params that require global reduction *and* need to be set/known when
// the task list is constructed (versus when the task list is being executed).
// TODO(next person touching this function): If more/separate feature are required
// please separate concerns.
void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, SimTime &tm) {}
void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, SimTime &tm) {
auto hydro_pkg = pmesh->packages.Get("Hydro");

// Calculate hyperbolic divergence cleaning speed
// TODO(pgrete) Calculating mindx is only required after remeshing. Need to
// find a clean solution for this one-off global reduction.
if (hydro_pkg->Param<bool>("calc_c_h") ||
hydro_pkg->Param<DiffInt>("diffint") != DiffInt::none) {

Real mindx = std::numeric_limits<Real>::max();
// Going over default partitions. Not using a (new) single partition containing
// all blocks here as this (default) split is also used main Step() function and
// thus does not create an overhead (such as creating a new MeshBlockPack that is just
// used here). All partitions are executed sequentially. Given that a par_reduce to a
// host var is blocking it's save to dirctly use the return value.
const int num_partitions = pmesh->DefaultNumPartitions();
for (int i = 0; i < num_partitions; i++) {
auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
mindx = std::min(mindx, CalculateGlobalMinDx(mu0.get()));
}
#ifdef MPI_PARALLEL
Real mins[3];
mins[0] = mindx;
mins[1] = hydro_pkg->Param<Real>("dt_hyp");
mins[2] = hydro_pkg->Param<Real>("dt_diff");
PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, mins, 3, MPI_PARTHENON_REAL, MPI_MIN,
MPI_COMM_WORLD));

hydro_pkg->UpdateParam("mindx", mins[0]);
hydro_pkg->UpdateParam("dt_hyp", mins[1]);
hydro_pkg->UpdateParam("dt_diff", mins[2]);
#else
hydro_pkg->UpdateParam("mindx", mindx);
// dt_hyp and dt_diff are already set directly in Params when they're calculated
#endif
// Finally update c_h
const auto &cfl_hyp = hydro_pkg->Param<Real>("cfl");
const auto &dt_hyp = hydro_pkg->Param<Real>("dt_hyp");
hydro_pkg->UpdateParam("c_h", cfl_hyp * mindx / dt_hyp);
}
}

template <Hst hst, int idx = -1>
Real HydroHst(MeshData<Real> *md) {
Expand Down Expand Up @@ -235,13 +310,13 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
// they're all used in the main loop.
// TODO(pgrete) think about which approach (selective versus always is preferable)
pkg->AddParam<Real>(
"c_h", 0.0, Params::Mutability::Restart); // hyperbolic divergence cleaning speed
"c_h", 0.0, Params::Mutability::Mutable); // hyperbolic divergence cleaning speed
// global minimum dx (used to calc c_h)
pkg->AddParam<Real>("mindx", std::numeric_limits<Real>::max(),
Params::Mutability::Restart);
Params::Mutability::Mutable);
// hyperbolic timestep constraint
pkg->AddParam<Real>("dt_hyp", std::numeric_limits<Real>::max(),
Params::Mutability::Restart);
Params::Mutability::Mutable);

const auto recon_str = pin->GetString("hydro", "reconstruction");
int recon_need_nghost = 3; // largest number for the choices below
Expand Down Expand Up @@ -634,7 +709,7 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
pkg->AddParam<>("cfl_diff", cfl_diff);
}
pkg->AddParam<Real>("dt_diff", std::numeric_limits<Real>::max(),
Params::Mutability::Restart); // diffusive timestep constraint
Params::Mutability::Mutable); // diffusive timestep constraint
pkg->AddParam<>("diffint", diffint);

if (fluid == Fluid::euler) {
Expand Down Expand Up @@ -1319,10 +1394,10 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat
const auto &u0_prim = u0_prim_pack(b);
auto &u0_cons = u0_cons_pack(b);

// In principle, the u_cons.fluxes could be updated in parallel by a different
// thread resulting in a race conditon here.
// However, if the fluxes of a cell have been updated (anywhere) then the entire
// kernel will be called again anyway, and, at that point the already fixed
// In principle, the u_cons.fluxes could be updated in parallel by a
// different thread resulting in a race conditon here. However, if the
// fluxes of a cell have been updated (anywhere) then the entire kernel will
// be called again anyway, and, at that point the already fixed
// u0_cons.fluxes will automaticlly be used here.
Real new_cons[NVAR];
for (auto v = 0; v < NVAR; v++) {
Expand Down Expand Up @@ -1350,13 +1425,13 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat
lnum_need_floor += 1;
return;
}
// In principle, there could be a racecondion as this loop goes over all k,j,i
// and we updating the i+1 flux here.
// However, the results are idential because u0_prim is never updated in this
// kernel so we don't worry about it.
// TODO(pgrete) as we need to keep the function signature idential for now (due
// to Cuda compiler bug) we could potentially template these function and get
// rid of the `if constexpr`
// In principle, there could be a racecondion as this loop goes over all
// k,j,i and we updating the i+1 flux here. However, the results are
// idential because u0_prim is never updated in this kernel so we don't
// worry about it.
// TODO(pgrete) as we need to keep the function signature idential for now
// (due to Cuda compiler bug) we could potentially template these function
// and get rid of the `if constexpr`
riemann.Solve(eos, k, j, i, IV1, u0_prim, u0_cons, c_h);
riemann.Solve(eos, k, j, i + 1, IV1, u0_prim, u0_cons, c_h);

Expand All @@ -1373,7 +1448,8 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat
Kokkos::Sum<std::int64_t>(num_corrected),
Kokkos::Sum<std::int64_t>(num_need_floor));
// TODO(pgrete) make this optional and global (potentially store values in Params)
// std::cout << "[" << parthenon::Globals::my_rank << "] Attempt: " << num_attempts
// std::cout << "[" << parthenon::Globals::my_rank << "] Attempt: " <<
// num_attempts
// << " Corrected (center): " << num_corrected
// << " Failed (will rely on floor): " << num_need_floor << std::endl;
num_attempts += 1;
Expand Down
124 changes: 16 additions & 108 deletions src/hydro/hydro_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,46 +37,6 @@ HydroDriver::HydroDriver(ParameterInput *pin, ApplicationInput *app_in, Mesh *pm
pin->CheckDesired("parthenon/time", "cfl");
}

// Calculate mininum dx, which is used in calculating the divergence cleaning speed c_h
TaskStatus CalculateGlobalMinDx(MeshData<Real> *md) {
auto pmb = md->GetBlockData(0)->GetBlockPointer();
auto hydro_pkg = pmb->packages.Get("Hydro");

const auto &prim_pack = md->PackVariables(std::vector<std::string>{"prim"});

IndexRange ib = md->GetBlockData(0)->GetBoundsI(IndexDomain::interior);
IndexRange jb = md->GetBlockData(0)->GetBoundsJ(IndexDomain::interior);
IndexRange kb = md->GetBlockData(0)->GetBoundsK(IndexDomain::interior);

Real mindx = std::numeric_limits<Real>::max();

bool nx2 = prim_pack.GetDim(2) > 1;
bool nx3 = prim_pack.GetDim(3) > 1;
pmb->par_reduce(
"CalculateGlobalMinDx", 0, prim_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lmindx) {
const auto &coords = prim_pack.GetCoords(b);
lmindx = fmin(lmindx, coords.Dxc<1>(k, j, i));
if (nx2) {
lmindx = fmin(lmindx, coords.Dxc<2>(k, j, i));
}
if (nx3) {
lmindx = fmin(lmindx, coords.Dxc<3>(k, j, i));
}
},
Kokkos::Min<Real>(mindx));

// Reduction to host var is blocking and only have one of this tasks run at the same
// time so modifying the package should be safe.
auto mindx_pkg = hydro_pkg->Param<Real>("mindx");
if (mindx < mindx_pkg) {
hydro_pkg->UpdateParam("mindx", mindx);
}

return TaskStatus::complete;
}

// Sets all fluxes to 0
TaskStatus ResetFluxes(MeshData<Real> *md) {
auto pmb = md->GetBlockData(0)->GetBlockPointer();
Expand Down Expand Up @@ -443,60 +403,6 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
}
}

// Calculate hyperbolic divergence cleaning speed
// TODO(pgrete) Calculating mindx is only required after remeshing. Need to find a clean
// solution for this one-off global reduction.
// TODO(PG) move this to PreStepMeshUserWorkInLoop
if ((hydro_pkg->Param<bool>("calc_c_h") ||
hydro_pkg->Param<DiffInt>("diffint") != DiffInt::none) &&
(stage == 1)) {
// need to make sure that there's only one region in order to MPI_reduce to work
TaskRegion &single_task_region = tc.AddRegion(1);
auto &tl = single_task_region[0];
// Adding one task for each partition. Not using a (new) single partition containing
// all blocks here as this (default) split is also used for the following tasks and
// thus does not create an overhead (such as creating a new MeshBlockPack that is just
// used here). Given that all partitions are in one task list they'll be executed
// sequentially. Given that a par_reduce to a host var is blocking it's also save to
// store the variable in the Params for now.
auto prev_task = none;
for (int i = 0; i < num_partitions; i++) {
auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
auto new_mindx = tl.AddTask(prev_task, CalculateGlobalMinDx, mu0.get());
prev_task = new_mindx;
}
auto reduce_c_h = prev_task;
#ifdef MPI_PARALLEL
reduce_c_h = tl.AddTask(
prev_task,
[](StateDescriptor *hydro_pkg) {
Real mins[3];
mins[0] = hydro_pkg->Param<Real>("mindx");
mins[1] = hydro_pkg->Param<Real>("dt_hyp");
mins[2] = hydro_pkg->Param<Real>("dt_diff");
PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, mins, 3, MPI_PARTHENON_REAL,
MPI_MIN, MPI_COMM_WORLD));

hydro_pkg->UpdateParam("mindx", mins[0]);
hydro_pkg->UpdateParam("dt_hyp", mins[1]);
hydro_pkg->UpdateParam("dt_diff", mins[2]);
return TaskStatus::complete;
},
hydro_pkg.get());
#endif
// Finally update c_h
auto update_c_h = tl.AddTask(
reduce_c_h,
[](StateDescriptor *hydro_pkg) {
const auto &mindx = hydro_pkg->Param<Real>("mindx");
const auto &cfl_hyp = hydro_pkg->Param<Real>("cfl");
const auto &dt_hyp = hydro_pkg->Param<Real>("dt_hyp");
hydro_pkg->UpdateParam("c_h", cfl_hyp * mindx / dt_hyp);
return TaskStatus::complete;
},
hydro_pkg.get());
}

// calculate magnetic tower scaling
if ((stage == 1) && hydro_pkg->AllParams().hasKey("magnetic_tower_power_scaling") &&
hydro_pkg->Param<bool>("magnetic_tower_power_scaling")) {
Expand Down Expand Up @@ -659,8 +565,24 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
pmesh->multilevel);
}

TaskRegion &single_tasklist_per_pack_region_3 = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = single_tasklist_per_pack_region_3[i];
auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
auto fill_derived =
tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu0.get());
}
const auto &diffint = hydro_pkg->Param<DiffInt>("diffint");
// If any tasks modify the conserved variables before this place and after FillDerived,
// then the STS tasks should be updated to not assume prim and cons are in sync.
if (diffint == DiffInt::rkl2 && stage == integrator->nstages) {
AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt);
}

// Single task in single (serial) region to reset global vars used in reductions in the
// first stage.
// TODO(pgrete) check if we logically need this reset or if we can reset within the
// timestep task
if (stage == integrator->nstages &&
(hydro_pkg->Param<bool>("calc_c_h") ||
hydro_pkg->Param<DiffInt>("diffint") != DiffInt::none)) {
Expand All @@ -677,20 +599,6 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
hydro_pkg.get());
}

TaskRegion &single_tasklist_per_pack_region_3 = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = single_tasklist_per_pack_region_3[i];
auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
auto fill_derived =
tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu0.get());
}
const auto &diffint = hydro_pkg->Param<DiffInt>("diffint");
// If any tasks modify the conserved variables before this place and after FillDerived,
// then the STS tasks should be updated to not assume prim and cons are in sync.
if (diffint == DiffInt::rkl2 && stage == integrator->nstages) {
AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt);
}

if (stage == integrator->nstages) {
TaskRegion &tr = tc.AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
Expand Down

0 comments on commit 8a52706

Please sign in to comment.