From 63bd7230f1d82bcbd8d2be88ece78cfc5fd5cc32 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 10 Jan 2023 15:40:23 -0500 Subject: [PATCH 01/24] setup --- apps/CMakeLists.txt | 1 + apps/lbmMultiRes/CMakeLists.txt | 17 ++++ apps/lbmMultiRes/lbmMultiRes.cu | 148 ++++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 apps/lbmMultiRes/CMakeLists.txt create mode 100644 apps/lbmMultiRes/lbmMultiRes.cu diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 403a18e3..bd85d7a7 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -4,3 +4,4 @@ add_subdirectory("fractal") add_subdirectory("lbm") add_subdirectory("gameOfLife") add_subdirectory("poisson") +add_subdirectory("lbmMultiRes") diff --git a/apps/lbmMultiRes/CMakeLists.txt b/apps/lbmMultiRes/CMakeLists.txt new file mode 100644 index 00000000..207cb795 --- /dev/null +++ b/apps/lbmMultiRes/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.19 FATAL_ERROR) + +set (APP_NAME app-lbmMultiRes) +file(GLOB_RECURSE SrcFiles lbmMultiRes.cu) + +add_executable(${APP_NAME} ${SrcFiles}) + +target_link_libraries(${APP_NAME} + PUBLIC libNeonSkeleton) + +set_target_properties(${APP_NAME} PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + CUDA_RESOLVE_DEVICE_SYMBOLS ON) +set_target_properties(${APP_NAME} PROPERTIES FOLDER "apps") +source_group(TREE ${CMAKE_CURRENT_LIST_DIR} PREFIX "${APP_NAME}" FILES ${SrcFiles}) + +add_test(NAME ${APP_NAME} COMMAND ${APP_NAME}) \ No newline at end of file diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu new file mode 100644 index 00000000..a345af3f --- /dev/null +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -0,0 +1,148 @@ +#include "Neon/Neon.h" +#include "Neon/domain/mGrid.h" +#include "Neon/skeleton/Skeleton.h" + +template +Neon::domain::Stencil create_stencil(); + +template <> +Neon::domain::Stencil create_stencil<2, 9>() +{ + std::vector stencil; + stencil.reserve(9); + for (int x = -1; x <= 1; ++x) { + for (int y = -1; y <= 1; ++y) { + stencil.emplace_back(Neon::index_3d(x, y, 0)); + } + } + return Neon::domain::Stencil(stencil); +} + +template <> +Neon::domain::Stencil create_stencil<3, 19>() +{ + // filterCenterOut = false; + return Neon::domain::Stencil::s19_t(false); +} + +template +inline void exportVTI(const int t, Field& field) +{ + printf("\n Exporting Frame =%d", t); + int precision = 4; + std::ostringstream oss; + oss << std::setw(precision) << std::setfill('0') << t; + std::string prefix = "lbm" + std::to_string(field.getCardinality()) + "D_"; + std::string fname = prefix + oss.str(); + field.ioToVtk(fname, "field"); +} + +/*Neon::set::Container setOmega(Neon::domain::mGrid& grid, int level) +{ + return grid.getContainer( + "SetOmega" + std::to_string(level), level, + [=](Neon::set::Loader& loader) { + //auto& local = field.load(loader, level, Neon::MultiResCompute::MAP); + return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable {}; + }); +}*/ + +template +NEON_CUDA_HOST_DEVICE T computeOmega(T omega0, int level, int num_levels) +{ + int ilevel = num_levels - level - 1; + // scalbln(1.0, x) = 2^x + return 2 * omega0 / (scalbln(1.0, ilevel + 1) + (1. - scalbln(1.0, ilevel)) * omega0); +} + +template +Neon::set::Container velocity(Neon::domain::mGrid& grid, int level, Neon::domain::mGrid::Field& fin) +{ + return grid.getContainer( + "SetOmega" + std::to_string(level), level, + [=](Neon::set::Loader& loader) { + auto& flocal = fin.load(loader, level, Neon::MultiResCompute::MAP); + return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable {}; + }); +} + +template +void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, + T omega0, + int level, + int max_level, + Neon::domain::mGrid::Field& fin, + Neon::domain::mGrid::Field& fout, + std::vector& containers) +{ +} + +int main(int argc, char** argv) +{ + Neon::init(); + if (Neon::sys::globalSpace::gpuSysObjStorage.numDevs() > 0) { + using T = double; + + //Neon grid + auto runtime = Neon::Runtime::stream; + std::vector gpu_ids{0}; + Neon::Backend backend(gpu_ids, runtime); + + constexpr int DIM = 2; + constexpr int COMP = (DIM == 2) ? 9 : 19; + + const int dim_x = 4; + const int dim_y = 4; + const int dim_z = (DIM < 3) ? 1 : 4; + + const Neon::index_3d grid_dim(dim_x, dim_y, dim_z); + + const Neon::domain::mGridDescriptor descriptor({1, 1, 1}); + + + Neon::domain::mGrid grid( + backend, grid_dim, + {[&](const Neon::index_3d id) -> bool { + return true; + }, + [&](const Neon::index_3d&) -> bool { + return true; + }, + [&](const Neon::index_3d&) -> bool { + return true; + }}, + create_stencil(), descriptor); + + + //LBM problem + const int max_iter = 300; + const T ulb = 0.01; + const T Re = 20; + const T clength = grid_dim.x; + const T visclb = ulb * clength / Re; + const T smagorinskyConstant = 0.02; + const T omega = 1.0 / (3. * visclb + 0.5); + + auto fin = grid.newField("fin", COMP, 0); + auto fout = grid.newField("fout", COMP, 0); + + //TODO init fin and fout + + fin.updateCompute(); + fout.updateCompute(); + + std::vector containers; + + nonUniformTimestepRecursive(grid, omega, 0, descriptor.getDepth(), fin, fout, containers); + + Neon::skeleton::Skeleton skl(grid.getBackend()); + skl.sequence(containers, "MultiResLBM"); + //skl.ioToDot("MultiRes"); + + skl.run(); + + grid.getBackend().syncAll(); + fin.updateIO(); + fout.updateIO(); + } +} \ No newline at end of file From 22991cd7ba84019a2816bcfeab5c785373465f2a Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Thu, 12 Jan 2023 18:59:03 -0500 Subject: [PATCH 02/24] collide --- apps/lbmMultiRes/lbmMultiRes.cu | 197 +++++++++++++++++++++++++++++--- 1 file changed, 178 insertions(+), 19 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index a345af3f..13ca1f45 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -2,7 +2,7 @@ #include "Neon/domain/mGrid.h" #include "Neon/skeleton/Skeleton.h" -template +template Neon::domain::Stencil create_stencil(); template <> @@ -25,6 +25,88 @@ Neon::domain::Stencil create_stencil<3, 19>() return Neon::domain::Stencil::s19_t(false); } +NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity2D[9][2] = { + {0, 0}, + {0, -1}, + {0, 1}, + {-1, 0}, + {-1, -1}, + {-1, 1}, + {1, 0}, + {1, -1}, + {1, 1}}; + +NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity3D[27][3] = { + {0, 0, 0}, + {0, -1, 0}, + {0, 1, 0}, + {-1, 0, 0}, + {-1, -1, 0}, + {-1, 1, 0}, + {1, 0, 0}, + {1, -1, 0}, + {1, 1, 0}, + + {0, 0, -1}, + {0, -1, -1}, + {0, 1, -1}, + {-1, 0, -1}, + {-1, -1, -1}, + {-1, 1, -1}, + {1, 0, -1}, + {1, -1, -1}, + {1, 1, -1}, + + {0, 0, 1}, + {0, -1, 1}, + {0, 1, 1}, + {-1, 0, 1}, + {-1, -1, 1}, + {-1, 1, 1}, + {1, 0, 1}, + {1, -1, 1}, + {1, 1, 1} + +}; + +template +struct latticeWeight +{ + NEON_CUDA_HOST_DEVICE __inline__ constexpr latticeWeight() + : t() + { + if constexpr (DIM == 2) { + + for (int i = 0; i < Q; ++i) { + if (latticeVelocity2D[i][0] * latticeVelocity2D[i][0] + + latticeVelocity2D[i][1] * latticeVelocity2D[i][1] < + 1.1f) { + t[i] = 1.0f / 9.0f; + } else { + t[i] = 1.0f / 36.0f; + } + } + t[0] = 4.0f / 9.0f; + } + + if constexpr (DIM == 3) { + for (int i = 0; i < Q; ++i) { + if (latticeVelocity2D[i][0] * latticeVelocity2D[i][0] + + latticeVelocity2D[i][1] * latticeVelocity2D[i][1] + + latticeVelocity2D[i][2] * latticeVelocity2D[i][2] < + 1.1f) { + t[i] = 2.0f / 36.0f; + } else { + t[i] = 1.0f / 36.0f; + } + } + t[0] = 1.0f / 3.0f; + } + } + float t[Q]; +}; + + template inline void exportVTI(const int t, Field& field) { @@ -47,6 +129,7 @@ inline void exportVTI(const int t, Field& field) }); }*/ + template NEON_CUDA_HOST_DEVICE T computeOmega(T omega0, int level, int num_levels) { @@ -55,31 +138,103 @@ NEON_CUDA_HOST_DEVICE T computeOmega(T omega0, int level, int num_levels) return 2 * omega0 / (scalbln(1.0, ilevel + 1) + (1. - scalbln(1.0, ilevel)) * omega0); } -template -Neon::set::Container velocity(Neon::domain::mGrid& grid, int level, Neon::domain::mGrid::Field& fin) +template +NEON_CUDA_HOST_DEVICE Neon::Vec_3d velocity(const T* fin, + const T rho) +{ + Neon::Vec_3d vel(0, 0, 0); + if constexpr (DIM == 2) { + for (int i = 0; i < Q; ++i) { + const T f = fin[i]; + for (int d = 0; d < DIM; ++d) { + vel.v[d] += f * latticeVelocity2D[i][d]; + } + } + } + + if constexpr (DIM == 3) { + for (int i = 0; i < Q; ++i) { + const T f = fin[i]; + for (int d = 0; d < DIM; ++d) { + vel.v[d] += f * latticeVelocity3D[i][d]; + } + } + } + + for (int d = 0; d < DIM; ++d) { + vel.v[d] /= rho; + } + return vel; +} + +template +Neon::set::Container collide(Neon::domain::mGrid& grid, + T omega0, + int level, + int max_level, + const Neon::domain::mGrid::Field& fin, + Neon::domain::mGrid::Field& fout) { return grid.getContainer( - "SetOmega" + std::to_string(level), level, + "Collide" + std::to_string(level), level, [=](Neon::set::Loader& loader) { - auto& flocal = fin.load(loader, level, Neon::MultiResCompute::MAP); - return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable {}; + const auto& in = fin.load(loader, level, Neon::MultiResCompute::MAP); + auto out = fout.load(loader, level, Neon::MultiResCompute::MAP); + const T omega = computeOmega(omega0, level, max_level); + + return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { + constexpr auto t = latticeWeight(); + + //fin + T ins[Q]; + for (int i = 0; i < Q; ++i) { + ins[i] = in(cell, i); + } + + //density + T rho = 0; + for (int i = 0; i < Q; ++i) { + rho += ins[i]; + } + + //velocity + const Neon::Vec_3d vel = velocity(ins, rho); + + + const T usqr = (3.0 / 2.0) * (vel.x * vel.x + vel.y * vel.y + vel.z * vel.z); + for (int i = 0; i < Q; ++i) { + T cu = 0; + for (int d = 0; d < DIM; ++d) { + cu += latticeVelocity2D[i][d] * vel.v[d]; + } + //equilibrium + T feq = rho * t.t[i] * (1. + cu + 0.5 * cu * cu - usqr); + + //collide + out(cell, i) = ins[i] - omega * (ins[i] - feq); + } + }; }); } -template -void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, - T omega0, - int level, - int max_level, - Neon::domain::mGrid::Field& fin, - Neon::domain::mGrid::Field& fout, - std::vector& containers) + +template +void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, + const T omega0, + const int level, + const int max_level, + const Neon::domain::mGrid::Field& fin, + Neon::domain::mGrid::Field& fout, + std::vector& containers) { + // 1) collision for all voxels at level L=ilevel + containers.push_back(collide(grid, omega0, level, max_level, fin, fout)); } int main(int argc, char** argv) { Neon::init(); + if (Neon::sys::globalSpace::gpuSysObjStorage.numDevs() > 0) { using T = double; @@ -89,7 +244,7 @@ int main(int argc, char** argv) Neon::Backend backend(gpu_ids, runtime); constexpr int DIM = 2; - constexpr int COMP = (DIM == 2) ? 9 : 19; + constexpr int Q = (DIM == 2) ? 9 : 19; const int dim_x = 4; const int dim_y = 4; @@ -111,7 +266,7 @@ int main(int argc, char** argv) [&](const Neon::index_3d&) -> bool { return true; }}, - create_stencil(), descriptor); + create_stencil(), descriptor); //LBM problem @@ -123,8 +278,8 @@ int main(int argc, char** argv) const T smagorinskyConstant = 0.02; const T omega = 1.0 / (3. * visclb + 0.5); - auto fin = grid.newField("fin", COMP, 0); - auto fout = grid.newField("fout", COMP, 0); + auto fin = grid.newField("fin", Q, 0); + auto fout = grid.newField("fout", Q, 0); //TODO init fin and fout @@ -133,7 +288,11 @@ int main(int argc, char** argv) std::vector containers; - nonUniformTimestepRecursive(grid, omega, 0, descriptor.getDepth(), fin, fout, containers); + nonUniformTimestepRecursive(grid, + omega, + descriptor.getDepth() - 1, + descriptor.getDepth(), + fin, fout, containers); Neon::skeleton::Skeleton skl(grid.getBackend()); skl.sequence(containers, "MultiResLBM"); From d3cfef2fdabcd361fe7f2e11077e3af54e3d45f2 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Thu, 12 Jan 2023 19:09:56 -0500 Subject: [PATCH 03/24] steps --- apps/lbmMultiRes/lbmMultiRes.cu | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 13ca1f45..bbf8f698 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -221,14 +221,40 @@ Neon::set::Container collide(Neon::domain::mGrid& grid, template void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, const T omega0, - const int level, + const int ilevel, const int max_level, const Neon::domain::mGrid::Field& fin, Neon::domain::mGrid::Field& fout, std::vector& containers) { // 1) collision for all voxels at level L=ilevel - containers.push_back(collide(grid, omega0, level, max_level, fin, fout)); + containers.push_back(collide(grid, omega0, ilevel, max_level, fin, fout)); + + // 2) Storing fine(ilevel) data for later "coalescence" pulled by the coarse(ilevel) + + // 3) recurse down + if (ilevel != 0) { + nonUniformTimestepRecursive(grid, omega0, ilevel - 1, max_level, fin, fout, containers); + } + + // 4) Streaming step that also performs the necessary "explosion" and "coalescence" steps. + + // 5) stop + if (ilevel == max_level - 1) { + return; + } + + // 6) collision for all voxels at level L = ilevel + containers.push_back(collide(grid, omega0, ilevel, max_level, fin, fout)); + + // 7) Storing fine(ilevel) data for later "coalescence" pulled by the coarse(ilevel) + + // 8) recurse down + if (ilevel != 0) { + nonUniformTimestepRecursive(grid, omega0, ilevel - 1, max_level, fin, fout, containers); + } + + // 9) Streaming step. } int main(int argc, char** argv) From 8a83a62fd55af626ee6a45b7bbe29d8d47a901f6 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Thu, 12 Jan 2023 19:49:03 -0500 Subject: [PATCH 04/24] streaming --- apps/lbmMultiRes/lbmMultiRes.cu | 153 ++++++++++++++++++++++++++------ 1 file changed, 124 insertions(+), 29 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index bbf8f698..5972b220 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -119,16 +119,6 @@ inline void exportVTI(const int t, Field& field) field.ioToVtk(fname, "field"); } -/*Neon::set::Container setOmega(Neon::domain::mGrid& grid, int level) -{ - return grid.getContainer( - "SetOmega" + std::to_string(level), level, - [=](Neon::set::Loader& loader) { - //auto& local = field.load(loader, level, Neon::MultiResCompute::MAP); - return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable {}; - }); -}*/ - template NEON_CUDA_HOST_DEVICE T computeOmega(T omega0, int level, int num_levels) @@ -217,44 +207,149 @@ Neon::set::Container collide(Neon::domain::mGrid& grid, }); } +template +Neon::set::Container stream(Neon::domain::mGrid& grid, + int level, + const Neon::domain::mGrid::Field& fpop_postcollision, + Neon::domain::mGrid::Field& fpop_poststreaming) +{ + //TODO + //regular Streaming of the normal voxels at level L which are not interfaced with L+1 and L-1 levels. + + return grid.getContainer( + "Stream" + std::to_string(level), level, + [=](Neon::set::Loader& loader) { + const auto& fpost_col = fpop_postcollision.load(loader, level, Neon::MultiResCompute::STENCIL); + auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); + + return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { + for (int i = 0; i < Q; ++i) { + + //TODO filter out cells that interface with L+1 or L-1 + //TODO figure out the right direction to push/pull the data in the i-direction + fpost_stm(cell, i) = fpost_col(cell, i); + } + }; + }); +} + +template +Neon::set::Container explosionPull(Neon::domain::mGrid& grid, + int level, + const Neon::domain::mGrid::Field& fpop_postcollision, + Neon::domain::mGrid::Field& fpop_poststreaming) +{ + //TODO + // Initiated by the fine level (hence "pull"), this function performs a coarse (level+1) to + // fine (level) communication or "explosion" by simply distributing copies of coarse grid onto the fine grid. + // In other words, this function updates the "halo" cells of the fine level by making copies of the coarse cell + // values. + + + return grid.getContainer( + "explosionPull" + std::to_string(level), level, + [=](Neon::set::Loader& loader) { + const auto& fpost_col = fpop_postcollision.load(loader, level, Neon::MultiResCompute::STENCIL_UP); + auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); + + return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { + for (int i = 0; i < Q; ++i) { + } + }; + }); +} + + +template +Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, + int level, + Neon::domain::mGrid::Field& fpop_poststreaming) +{ + //TODO + // Initiated by the coarse level (hence "pull"), this function performs fine (level-1) to coarse + // (level) communication or "coalescence" by simply averaging the fine data stored in self.fpop_halo + + return grid.getContainer( + "explosionPull" + std::to_string(level), level, + [=](Neon::set::Loader& loader) { + auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); + + return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { + for (int i = 0; i < Q; ++i) { + } + }; + }); +} + +template +void stream(Neon::domain::mGrid& grid, + int level, + const int max_level, + const Neon::domain::mGrid::Field& fpop_postcollision, + Neon::domain::mGrid::Field& fpop_poststreaming, + std::vector& containers) +{ + containers.push_back(stream(grid, level, fpop_postcollision, fpop_poststreaming)); + + /* + * Streaming for interface voxels that have + * (i) coarser or (ii) finer neighbors at level+1 and level-1 and hence require + * (i) "explosion" or (ii) coalescence + */ + if (level != max_level - 1) { + /* Explosion: pull missing populations from coarser neighbors by copying coarse (level+1) to fine (level) + * neighbors, initiated by the fine level ("Pull"). + */ + containers.push_back(explosionPull(grid, level, fpop_postcollision, fpop_poststreaming)); + } + + if (level != 0) { + /* Coalescence: pull missing populations from finer neighbors by "smart" averaging fine (level-1) + * to coarse (level) communication, initiated by the coarse level ("Pull"). + */ + containers.push_back(coalescencePull(grid, level, fpop_poststreaming)); + } +} template -void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, - const T omega0, - const int ilevel, - const int max_level, - const Neon::domain::mGrid::Field& fin, - Neon::domain::mGrid::Field& fout, - std::vector& containers) +void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, + const T omega0, + const int level, + const int max_level, + Neon::domain::mGrid::Field& fin, + Neon::domain::mGrid::Field& fout, + std::vector& containers) { - // 1) collision for all voxels at level L=ilevel - containers.push_back(collide(grid, omega0, ilevel, max_level, fin, fout)); + // 1) collision for all voxels at level L=level + containers.push_back(collide(grid, omega0, level, max_level, fin, fout)); - // 2) Storing fine(ilevel) data for later "coalescence" pulled by the coarse(ilevel) + // 2) Storing fine(level) data for later "coalescence" pulled by the coarse(level) // 3) recurse down - if (ilevel != 0) { - nonUniformTimestepRecursive(grid, omega0, ilevel - 1, max_level, fin, fout, containers); + if (level != 0) { + nonUniformTimestepRecursive(grid, omega0, level - 1, max_level, fin, fout, containers); } // 4) Streaming step that also performs the necessary "explosion" and "coalescence" steps. + stream(grid, level, max_level, fout, fin, containers); // 5) stop - if (ilevel == max_level - 1) { + if (level == max_level - 1) { return; } - // 6) collision for all voxels at level L = ilevel - containers.push_back(collide(grid, omega0, ilevel, max_level, fin, fout)); + // 6) collision for all voxels at level L = level + containers.push_back(collide(grid, omega0, level, max_level, fin, fout)); - // 7) Storing fine(ilevel) data for later "coalescence" pulled by the coarse(ilevel) + // 7) Storing fine(level) data for later "coalescence" pulled by the coarse(level) // 8) recurse down - if (ilevel != 0) { - nonUniformTimestepRecursive(grid, omega0, ilevel - 1, max_level, fin, fout, containers); + if (level != 0) { + nonUniformTimestepRecursive(grid, omega0, level - 1, max_level, fin, fout, containers); } - // 9) Streaming step. + // 9) Streaming step + stream(grid, level, max_level, fout, fin, containers); } int main(int argc, char** argv) From e48ab8ec56ad619c9625c054ce2a86ad825591bc Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Mon, 16 Jan 2023 13:12:23 -0500 Subject: [PATCH 05/24] WIP streaming --- apps/lbmMultiRes/lbmMultiRes.cu | 13 +++- .../Neon/domain/internal/bGrid/bCell.h | 2 +- .../Neon/domain/internal/bGrid/bCell_imp.h | 10 +++ .../Neon/domain/internal/bGrid/bGrid.h | 2 +- .../Neon/domain/internal/bGrid/bPartition.h | 5 +- .../domain/internal/bGrid/bPartition_imp.h | 25 +++--- .../Neon/domain/internal/mGrid/mField_imp.h | 2 + .../Neon/domain/internal/mGrid/mPartition.h | 37 +++++++-- .../domain/internal/mGrid/mPartition_imp.h | 77 ++++++++++++++----- .../src/domain/internal/bGrid/bGrid.cpp | 4 +- .../unit/sUt_multiRes/src/MultiResParent.h | 6 +- .../unit/sUt_multiRes/src/MultiResSkeleton.h | 6 +- 12 files changed, 138 insertions(+), 51 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 5972b220..c222d4ab 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -223,11 +223,20 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - for (int i = 0; i < Q; ++i) { + auto parent = fpost_stm.getParent(cell); + bool hasChildren = fpost_stm.hasChildren(cell); + for (int i = 0; i < Q; ++i) { //TODO filter out cells that interface with L+1 or L-1 //TODO figure out the right direction to push/pull the data in the i-direction - fpost_stm(cell, i) = fpost_col(cell, i); + // fpost_stm(cell, i) = fpost_col(cell, i); + + //if (parent.isActive()) { + // auto uncle = fpost_stm.setNghCell() + //} + + if (hasChildren) { + } } }; }); diff --git a/libNeonDomain/include/Neon/domain/internal/bGrid/bCell.h b/libNeonDomain/include/Neon/domain/internal/bGrid/bCell.h index 96e09f68..4e5d2bdf 100644 --- a/libNeonDomain/include/Neon/domain/internal/bGrid/bCell.h +++ b/libNeonDomain/include/Neon/domain/internal/bGrid/bCell.h @@ -30,7 +30,7 @@ class bCell //i.e., each entry in the mask array store the state of 32 voxels static constexpr uint32_t sMaskSize = 32; - bCell() = default; + bCell(); virtual ~bCell() = default; NEON_CUDA_HOST_DEVICE inline auto isActive() const -> bool; diff --git a/libNeonDomain/include/Neon/domain/internal/bGrid/bCell_imp.h b/libNeonDomain/include/Neon/domain/internal/bGrid/bCell_imp.h index b5539ea0..d3c14d5f 100644 --- a/libNeonDomain/include/Neon/domain/internal/bGrid/bCell_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/bGrid/bCell_imp.h @@ -2,6 +2,16 @@ namespace Neon::domain::internal::bGrid { +NEON_CUDA_HOST_DEVICE inline bCell::bCell() + : mLocation(std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max()), + mBlockID(std::numeric_limits::max()), + mIsActive(false), + mBlockSize(-1) +{ +} + NEON_CUDA_HOST_DEVICE inline bCell::bCell(const Location& location) { mLocation = location; diff --git a/libNeonDomain/include/Neon/domain/internal/bGrid/bGrid.h b/libNeonDomain/include/Neon/domain/internal/bGrid/bGrid.h index 76e8ca29..2761ec90 100644 --- a/libNeonDomain/include/Neon/domain/internal/bGrid/bGrid.h +++ b/libNeonDomain/include/Neon/domain/internal/bGrid/bGrid.h @@ -94,7 +94,7 @@ class bGrid : public Neon::domain::interface::GridBaseTemplate auto getOrigins() const -> const Neon::set::MemSet_t&; - auto getNeighbourBlocks() const -> const Neon::set::MemSet_t&; + auto getNeighbourBlocks() const -> Neon::set::MemSet_t&; auto getActiveMask() const -> Neon::set::MemSet_t&; auto getBlockOriginTo1D() const -> Neon::domain::tool::PointHashTable&; diff --git a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h index 2080cf42..755d5f20 100644 --- a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h +++ b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h @@ -65,8 +65,9 @@ class bPartition protected: inline NEON_CUDA_HOST_DEVICE auto pitch(const Cell& cell, int card) const -> uint32_t; - inline NEON_CUDA_HOST_DEVICE auto setNghCell(const Cell& cell, const nghIdx_t& offset) const -> Cell; - inline NEON_CUDA_HOST_DEVICE auto shmemPitch(Cell cell, const int card) const -> Cell::Location::Integer; + inline NEON_CUDA_HOST_DEVICE auto setNghCell(const Cell& cell, const nghIdx_t& offset, const uint32_t* neighbourBlocks) const -> Cell; + inline NEON_CUDA_HOST_DEVICE auto shmemPitch(Cell cell, const int card) const -> Cell::Location::Integer; + inline NEON_CUDA_HOST_DEVICE auto getneighbourBlocksPtr(const Cell& cell) const -> const uint32_t*; Neon::DataView mDataView; T* mMem; diff --git a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h index 1bd87452..04e56645 100644 --- a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h @@ -110,9 +110,21 @@ inline NEON_CUDA_HOST_DEVICE auto bPartition::pitch(const Cell& cell, int cell.pitch(card); } +template +NEON_CUDA_HOST_DEVICE inline auto bPartition::getneighbourBlocksPtr(const Cell& cell) const -> const uint32_t* +{ + if (mSharedNeighbourBlocks != nullptr) { + return mSharedNeighbourBlocks; + } else { + return mNeighbourBlocks + (26 * cell.mBlockID); + } +} + + template NEON_CUDA_HOST_DEVICE inline auto bPartition::setNghCell(const Cell& cell, - const nghIdx_t& offset) const -> Cell + const nghIdx_t& offset, + const uint32_t* neighbourBlocks) const -> Cell { Cell ngh_cell(cell.mLocation.x + offset.x, cell.mLocation.y + offset.y, @@ -151,11 +163,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition::setNghCell(const Cell& c ngh_cell.mLocation.z -= cell.mBlockSize; } - if (mSharedNeighbourBlocks != nullptr) { - ngh_cell.mBlockID = mSharedNeighbourBlocks[Cell::getNeighbourBlockID(block_offset)]; - } else { - ngh_cell.mBlockID = mNeighbourBlocks[26 * cell.mBlockID + Cell::getNeighbourBlockID(block_offset)]; - } + ngh_cell.mBlockID = neighbourBlocks[Cell::getNeighbourBlockID(block_offset)]; } else { ngh_cell.mBlockID = cell.mBlockID; @@ -191,7 +199,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition::nghVal(const Cell& cell, Cell swirl_cell = cell.toSwirl(); swirl_cell.mBlockSize = cell.mBlockSize; - Cell ngh_cell = setNghCell(swirl_cell, offset); + Cell ngh_cell = setNghCell(swirl_cell, offset, getneighbourBlocksPtr(swirl_cell)); ngh_cell.mBlockSize = cell.mBlockSize; if (ngh_cell.mBlockID != std::numeric_limits::max()) { //TODO maybe ngh_cell should be mapped to its memory layout @@ -207,8 +215,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition::nghVal(const Cell& cell, } } else { - Cell ngh_cell = setNghCell(cell, offset); - ngh_cell.mBlockSize = cell.mBlockSize; + Cell ngh_cell = setNghCell(cell, offset, getneighbourBlocksPtr(cell)); if (ngh_cell.mBlockID != std::numeric_limits::max()) { ret.isValid = ngh_cell.computeIsActive(mMask); if (ret.isValid) { diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h index 91777e06..9f0d9936 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h @@ -65,6 +65,7 @@ mField::mField(const std::string& name, active_mask.rawMem(gpuID, Neon::DeviceType::CPU), (l == 0) ? nullptr : mData->grid->operator()(l - 1).getActiveMask().rawMem(gpuID, Neon::DeviceType::CPU), //lower-level mask (l == 0) ? nullptr : childBlockID.rawMem(gpuID, Neon::DeviceType::CPU), + (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).getNeighbourBlocks().rawMem(gpuID, Neon::DeviceType::CPU), //parent neighbor outsideVal, stencil_ngh.rawMem(gpuID, Neon::DeviceType::CPU), refFactorSet.rawMem(gpuID, Neon::DeviceType::CPU), @@ -85,6 +86,7 @@ mField::mField(const std::string& name, active_mask.rawMem(gpuID, Neon::DeviceType::CUDA), (l == 0) ? nullptr : mData->grid->operator()(l - 1).getActiveMask().rawMem(gpuID, Neon::DeviceType::CUDA), //lower-level mask (l == 0) ? nullptr : childBlockID.rawMem(gpuID, Neon::DeviceType::CUDA), + (l == int(descriptor.getDepth()) - 1) ? nullptr : mData->grid->operator()(l + 1).getNeighbourBlocks().rawMem(gpuID, Neon::DeviceType::CUDA), //parent neighbor outsideVal, stencil_ngh.rawMem(gpuID, Neon::DeviceType::CUDA), refFactorSet.rawMem(gpuID, Neon::DeviceType::CUDA), diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h index b9b65141..7ba3709c 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h @@ -37,6 +37,7 @@ class mPartition : public Neon::domain::internal::bGrid::bPartition uint32_t* mask, uint32_t* maskLowerLevel, uint32_t* childBlockID, + uint32_t* parentNeighbourBlocks, T defaultValue, nghIdx_t* stencilNghIndex, int* refFactors, @@ -63,6 +64,7 @@ class mPartition : public Neon::domain::internal::bGrid::bPartition NEON_CUDA_HOST_DEVICE inline auto getChild(const Cell& parent_cell, Neon::int8_3d child) const -> Cell; + /** * Given a child cell (as returned by getChild), return the value of this child * @param childCell the child cell as returned by getChild @@ -79,13 +81,35 @@ class mPartition : public Neon::domain::internal::bGrid::bPartition NEON_CUDA_HOST_DEVICE inline auto childVal(const Cell& childCell, int card = 0) const -> const T&; + /** + * Check if the cell is refined i.e., has children + * @param cell the cell i.e., parent at which the children are checked + * @return + */ + NEON_CUDA_HOST_DEVICE inline auto hasChildren(const Cell& cell) const -> bool; + + + /** + * Get a cell that represents the parent of a given cell + * @param cell the child cell for which the parent is queried + */ + NEON_CUDA_HOST_DEVICE inline auto getParent(const Cell& cell) const -> Cell; + /** * Given a cell (child), return the value of the parent * @param eId the cell * @param card the cardinality in case of vector-valued data */ - NEON_CUDA_HOST_DEVICE inline auto parent(const Cell& eId, - int card) -> T&; + NEON_CUDA_HOST_DEVICE inline auto parentVal(const Cell& eId, + int card) -> T&; + + /** + * Given a cell (child), return the value of the parent + * @param eId the cell + * @param card the cardinality in case of vector-valued data + */ + NEON_CUDA_HOST_DEVICE inline auto parentVal(const Cell& eId, + int card) const -> const T&; /** * check if the cell has a parent as defined by the user during the construction of the mGrid @@ -93,12 +117,8 @@ class mPartition : public Neon::domain::internal::bGrid::bPartition */ NEON_CUDA_HOST_DEVICE inline auto hasParent(const Cell& cell) const -> bool; - /** - * Check if the cell is refined i.e., has children - * @param cell the cell i.e., parent at which the children are checked - * @return - */ - NEON_CUDA_HOST_DEVICE inline auto hasChildren(const Cell& cell) const -> bool; + NEON_CUDA_HOST_DEVICE inline auto getUncle(const Cell& cell, + Neon::int8_3d direction) const -> Cell; /** * Get the refinement factor i.e., number of children at each dimension @@ -127,6 +147,7 @@ class mPartition : public Neon::domain::internal::bGrid::bPartition Cell::Location* mParentLocalID; uint32_t* mMaskLowerLevel; uint32_t* mChildBlockID; + uint32_t* mParentNeighbourBlocks; int* mRefFactors; int* mSpacing; }; diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h index 50a7eee3..73fcf4ff 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h @@ -31,6 +31,7 @@ mPartition::mPartition(Neon::DataView dataView, uint32_t* mask, uint32_t* maskLowerLevel, uint32_t* childBlockID, + uint32_t* parentNeighbourBlocks, T outsideValue, nghIdx_t* stencilNghIndex, int* refFactors, @@ -43,6 +44,7 @@ mPartition::mPartition(Neon::DataView dataView, mParentLocalID(parentLocalID), mMaskLowerLevel(maskLowerLevel), mChildBlockID(childBlockID), + mParentNeighbourBlocks(parentNeighbourBlocks), mRefFactors(refFactors), mSpacing(spacing) { @@ -101,29 +103,23 @@ inline NEON_CUDA_HOST_DEVICE auto mPartition::childID(const Cell& cell) co return mChildBlockID[childPitch]; } -template -NEON_CUDA_HOST_DEVICE inline auto mPartition::hasParent(const Cell& cell) const -> bool -{ - if (mMemParent) { - return true; - } - return false; -} - template NEON_CUDA_HOST_DEVICE inline auto mPartition::getChild(const Cell& parent_cell, Neon::int8_3d child) const -> Cell { Cell childCell; - childCell.mBlockID = childID(parent_cell); - childCell.mBlockSize = mRefFactors[mLevel - 1]; - childCell.mLocation.x = child.x; - childCell.mLocation.y = child.y; - childCell.mLocation.z = child.z; - childCell.mIsActive = childCell.computeIsActive(mMaskLowerLevel); + if (hasChildren(parent_cell)) { + childCell.mBlockID = childID(parent_cell); + childCell.mBlockSize = mRefFactors[mLevel - 1]; + childCell.mLocation.x = child.x; + childCell.mLocation.y = child.y; + childCell.mLocation.z = child.z; + childCell.mIsActive = childCell.computeIsActive(mMaskLowerLevel); + } return childCell; } + template NEON_CUDA_HOST_DEVICE inline auto mPartition::childVal(const Cell& childCell, int card) -> T& @@ -176,17 +172,58 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::hasChildren(const Cell& cell } template -NEON_CUDA_HOST_DEVICE inline auto mPartition::parent(const Cell& eId, - int card) -> T& +NEON_CUDA_HOST_DEVICE inline auto mPartition::getParent(const Cell& cell) const -> Cell { + Cell parentCell; if (mMemParent != nullptr) { - Cell parentCell; - parentCell.mBlockID = mParentBlockID[eId.mBlockID]; - parentCell.mLocation = mParentLocalID[eId.mBlockID]; + parentCell.mBlockID = mParentBlockID[cell.mBlockID]; + parentCell.mLocation = mParentLocalID[cell.mBlockID]; parentCell.mBlockSize = mRefFactors[mLevel + 1]; + parentCell.mIsActive = true; + } + return parentCell; +} + +template +NEON_CUDA_HOST_DEVICE inline auto mPartition::parentVal(const Cell& eId, + int card) -> T& +{ + auto parentCell = getParent(eId); + if (parentCell.isActive()) { + return mMemParent[this->pitch(parentCell, card)]; + } +} + +template +NEON_CUDA_HOST_DEVICE inline auto mPartition::parentVal(const Cell& eId, + int card) const -> const T& +{ + auto parentCell = getParent(eId); + if (parentCell.isActive()) { return mMemParent[this->pitch(parentCell, card)]; } } +template +NEON_CUDA_HOST_DEVICE inline auto mPartition::hasParent(const Cell& cell) const -> bool +{ + if (mMemParent) { + return true; + } + return false; +} + +template +NEON_CUDA_HOST_DEVICE inline auto mPartition::getUncle(const Cell& cell, + Neon::int8_3d direction) const -> Cell +{ + Cell uncle = getParent(cell); + if (uncle.isActive()) { + uncle = this->setNghCell(uncle, direction, mParentNeighbourBlocks); + uncle.mIsActive = ngh_cell.mBlockID != std::numeric_limits::max(); + } + return uncle; +} + } // namespace Neon::domain::internal::mGrid \ No newline at end of file diff --git a/libNeonDomain/src/domain/internal/bGrid/bGrid.cpp b/libNeonDomain/src/domain/internal/bGrid/bGrid.cpp index 1f616ac4..0692f6a9 100644 --- a/libNeonDomain/src/domain/internal/bGrid/bGrid.cpp +++ b/libNeonDomain/src/domain/internal/bGrid/bGrid.cpp @@ -40,7 +40,7 @@ auto bGrid::isInsideDomain(const Neon::index_3d& idx) const -> bool Cell cell(static_cast((idx.x / mData->voxelSpacing) % mData->blockSize), static_cast((idx.y / mData->voxelSpacing) % mData->blockSize), - static_cast((idx.z / mData->voxelSpacing) % mData->blockSize)); + static_cast((idx.z / mData->voxelSpacing) % mData->blockSize)); cell.mBlockID = *itr; cell.mBlockSize = mData->blockSize; @@ -123,7 +123,7 @@ auto bGrid::getStencilNghIndex() const -> const Neon::set::MemSet_t& } -auto bGrid::getNeighbourBlocks() const -> const Neon::set::MemSet_t& +auto bGrid::getNeighbourBlocks() const -> Neon::set::MemSet_t& { return mData->mNeighbourBlocks; } diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h index 59ce7e25..c631ee76 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h @@ -70,7 +70,7 @@ void MultiResParent() return [=] NEON_CUDA_HOST_DEVICE(const Neon::domain::mGrid::Cell& cell) mutable { if (xLocal.hasParent(cell)) { hasParentLocal(cell, 0) = 1; - xLocal(cell, 0) = xLocal.parent(cell, 0); + xLocal(cell, 0) = xLocal.parentVal(cell, 0); } else { hasParentLocal(cell, 0) = -1; } @@ -184,11 +184,11 @@ void MultiResAtomicAddParent() if (xLocal.hasParent(cell)) { #ifdef NEON_PLACE_CUDA_DEVICE - atomicAdd(&xLocal.parent(cell, 0), xLocal(cell, 0)); + atomicAdd(&xLocal.parentVal(cell, 0), xLocal(cell, 0)); #else #pragma omp atomic - xLocal.parent(cell, 0) += xLocal(cell, 0); + xLocal.parentVal(cell, 0) += xLocal(cell, 0); #endif } }; diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResSkeleton.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResSkeleton.h index f4584800..324a7577 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResSkeleton.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResSkeleton.h @@ -77,9 +77,9 @@ void MultiResSkeleton() return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { assert(local.hasParent(cell)); - local(cell, 0) = local.parent(cell, 0); - local(cell, 1) = local.parent(cell, 1); - local(cell, 2) = local.parent(cell, 2); + local(cell, 0) = local.parentVal(cell, 0); + local(cell, 1) = local.parentVal(cell, 1); + local(cell, 2) = local.parentVal(cell, 2); }; })); } From 544c9d388fa7139d6026edfebbd657ad3222651f Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 17 Jan 2023 12:29:07 -0500 Subject: [PATCH 06/24] same-level stream filtering --- apps/lbmMultiRes/lbmMultiRes.cu | 48 ++++++++++++------- .../Neon/domain/internal/mGrid/mPartition.h | 8 ++++ .../domain/internal/mGrid/mPartition_imp.h | 16 ++++++- 3 files changed, 53 insertions(+), 19 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index c222d4ab..0b84b5ae 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -213,8 +213,8 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, const Neon::domain::mGrid::Field& fpop_postcollision, Neon::domain::mGrid::Field& fpop_poststreaming) { - //TODO //regular Streaming of the normal voxels at level L which are not interfaced with L+1 and L-1 levels. + //This is "pull" stream return grid.getContainer( "Stream" + std::to_string(level), level, @@ -224,18 +224,29 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { auto parent = fpost_stm.getParent(cell); - bool hasChildren = fpost_stm.hasChildren(cell); - for (int i = 0; i < Q; ++i) { - //TODO filter out cells that interface with L+1 or L-1 - //TODO figure out the right direction to push/pull the data in the i-direction - // fpost_stm(cell, i) = fpost_col(cell, i); - //if (parent.isActive()) { - // auto uncle = fpost_stm.setNghCell() - //} + //If this cell has children i.e., it is been refined, that we should not work on it + //because this cell is only there to allow query and not to operate on + if (!fpost_stm.hasChildren(cell)) { + + for (int8_t q = 0; q < Q; ++q) { + Neon::int8_3d dir; + if constexpr (DIM == 2) { + dir = Neon::int8_3d(-latticeVelocity2D[q][0], -latticeVelocity2D[q][1], 0); + } + if constexpr (DIM == 3) { + dir = Neon::int8_3d(-latticeVelocity3D[q][0], -latticeVelocity3D[q][1], -latticeVelocity3D[q][2]); + } + + auto neighbor = fpost_col.nghVal(cell, dir, q, T(0)); - if (hasChildren) { + if (neighbor.isValid) { + //if the neighbor cell has children, then this 'cell' is interfacing with L-1 along q direction + if (!fpost_stm.hasChildren(cell, dir)) { + fpost_stm(cell, q) = neighbor.value; + } + } } } }; @@ -262,7 +273,7 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - for (int i = 0; i < Q; ++i) { + for (int8_t q = 0; q < Q; ++q) { } }; }); @@ -369,16 +380,16 @@ int main(int argc, char** argv) using T = double; //Neon grid - auto runtime = Neon::Runtime::stream; + auto runtime = Neon::Runtime::openmp; std::vector gpu_ids{0}; Neon::Backend backend(gpu_ids, runtime); constexpr int DIM = 2; constexpr int Q = (DIM == 2) ? 9 : 19; - const int dim_x = 4; - const int dim_y = 4; - const int dim_z = (DIM < 3) ? 1 : 4; + const int dim_x = 12; + const int dim_y = 12; + const int dim_z = (DIM < 3) ? 4 : 4; const Neon::index_3d grid_dim(dim_x, dim_y, dim_z); @@ -388,16 +399,17 @@ int main(int argc, char** argv) Neon::domain::mGrid grid( backend, grid_dim, {[&](const Neon::index_3d id) -> bool { - return true; + return id.x > 7; }, - [&](const Neon::index_3d&) -> bool { - return true; + [&](const Neon::index_3d& id) -> bool { + return id.x > 3; }, [&](const Neon::index_3d&) -> bool { return true; }}, create_stencil(), descriptor); + //grid.topologyToVTK("lbm.vtk", false); //LBM problem const int max_iter = 300; diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h index 7ba3709c..8995401c 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h @@ -88,6 +88,14 @@ class mPartition : public Neon::domain::internal::bGrid::bPartition */ NEON_CUDA_HOST_DEVICE inline auto hasChildren(const Cell& cell) const -> bool; + /** + * Check if a neighbor to 'cell' has children + * @param cell + * @param cell + * @return + */ + NEON_CUDA_HOST_DEVICE inline auto hasChildren(const Cell& cell, const Neon::int8_3d nghDir) const -> bool; + /** * Get a cell that represents the parent of a given cell diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h index 73fcf4ff..986abe29 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h @@ -171,6 +171,20 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::hasChildren(const Cell& cell return true; } +template +NEON_CUDA_HOST_DEVICE inline auto mPartition::hasChildren(const Cell& cell, const Neon::int8_3d nghDir) const -> bool +{ + if (mMemChild == nullptr || mMaskLowerLevel == nullptr || mLevel == 0) { + return false; + } + + Cell nghCell = this->setNghCell(cell, nghDir, this->getneighbourBlocksPtr(cell)); + if (!nghCell.isActive()) { + return false; + } + return hasChildren(nghCell); +} + template NEON_CUDA_HOST_DEVICE inline auto mPartition::getParent(const Cell& cell) const -> Cell { @@ -220,7 +234,7 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::getUncle(const Cell& cell, Cell uncle = getParent(cell); if (uncle.isActive()) { uncle = this->setNghCell(uncle, direction, mParentNeighbourBlocks); - uncle.mIsActive = ngh_cell.mBlockID != std::numeric_limits::max(); + uncle.mIsActive = uncle.mBlockID != std::numeric_limits::max(); } return uncle; } From fe52e7898363bfa19db6734e3bf9222310d42770 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 17 Jan 2023 19:47:44 -0500 Subject: [PATCH 07/24] Explosion Pull --- apps/lbmMultiRes/lbmMultiRes.cu | 72 ++++++++++++++++--- .../Neon/domain/internal/bGrid/bPartition.h | 3 +- .../domain/internal/bGrid/bPartition_imp.h | 10 ++- .../Neon/domain/internal/mGrid/mPartition.h | 5 ++ .../domain/internal/mGrid/mPartition_imp.h | 19 +++++ 5 files changed, 95 insertions(+), 14 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 0b84b5ae..dcf7ed64 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -120,6 +120,27 @@ inline void exportVTI(const int t, Field& field) } +NEON_CUDA_HOST_DEVICE inline Neon::int8_3d explosionUnlceOffset(const Neon::domain::bGrid::Cell& cell, const Neon::int8_3d& q) +{ + //given a local index within a cell and a population direction (q) + //find the uncle's (the parent neighbor) offset from which the desired population (q) should be read + //this offset is wrt the cell containing the localID (i.e., the parent of localID) + + auto off = [](const int8_t i, const int8_t j) { + //0, -1 --> -1 + //1, -1 --> 0 + //0, 1 --> 0 + //0, 0 --> 0 + //1, 1 --> 1 + const int8_t s = i + j; + return (s <= 0) ? s : s - 1; + }; + + Neon::int8_3d offset(off(cell.mLocation.x, q.x), off(cell.mLocation.y, q.y), off(cell.mLocation.z, q.z)); + return offset; +} + + template NEON_CUDA_HOST_DEVICE T computeOmega(T omega0, int level, int num_levels) { @@ -223,9 +244,6 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - auto parent = fpost_stm.getParent(cell); - - //If this cell has children i.e., it is been refined, that we should not work on it //because this cell is only there to allow query and not to operate on if (!fpost_stm.hasChildren(cell)) { @@ -238,12 +256,10 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, if constexpr (DIM == 3) { dir = Neon::int8_3d(-latticeVelocity3D[q][0], -latticeVelocity3D[q][1], -latticeVelocity3D[q][2]); } - - auto neighbor = fpost_col.nghVal(cell, dir, q, T(0)); - - if (neighbor.isValid) { - //if the neighbor cell has children, then this 'cell' is interfacing with L-1 along q direction - if (!fpost_stm.hasChildren(cell, dir)) { + //if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction + if (!fpost_stm.hasChildren(cell, dir)) { + auto neighbor = fpost_col.nghVal(cell, dir, q, T(0)); + if (neighbor.isValid) { fpost_stm(cell, q) = neighbor.value; } } @@ -259,7 +275,6 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, const Neon::domain::mGrid::Field& fpop_postcollision, Neon::domain::mGrid::Field& fpop_poststreaming) { - //TODO // Initiated by the fine level (hence "pull"), this function performs a coarse (level+1) to // fine (level) communication or "explosion" by simply distributing copies of coarse grid onto the fine grid. // In other words, this function updates the "halo" cells of the fine level by making copies of the coarse cell @@ -274,6 +289,39 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { for (int8_t q = 0; q < Q; ++q) { + //If this cell has children i.e., it is been refined, that we should not work on it + //because this cell is only there to allow query and not to operate on + if (!fpost_stm.hasChildren(cell)) { + Neon::int8_3d dir; + if constexpr (DIM == 2) { + dir = Neon::int8_3d(-latticeVelocity2D[q][0], -latticeVelocity2D[q][1], 0); + } + if constexpr (DIM == 3) { + dir = Neon::int8_3d(-latticeVelocity3D[q][0], -latticeVelocity3D[q][1], -latticeVelocity3D[q][2]); + } + + //if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction + //we want to only work on cells that interface with L+1 (coarse) cell along q + if (!fpost_stm.hasChildren(cell, dir)) { + + //try to query the cell along this direction (opposite of the population direction) as we do + //in 'normal' streaming + auto neighborCell = fpost_col.setNghCell(cell, dir); + if (!neighborCell.isActive()) { + //only if we can not do normal streaming, then we may have a coarser neighbor from which + //we can read this pop + + //get the uncle direction/offset i.e., the neighbor of the cell's parent + //this direction/offset is wrt to the cell's parent + Neon::int8_3d uncleDir = explosionUnlceOffset(cell, dir); + + auto uncle = fpost_col.uncleVal(cell, uncleDir, q, T(0)); + if (uncle.isValid) { + fpost_stm(cell, q) = uncle.value; + } + } + } + } } }; }); @@ -290,7 +338,7 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, // (level) communication or "coalescence" by simply averaging the fine data stored in self.fpop_halo return grid.getContainer( - "explosionPull" + std::to_string(level), level, + "coalescencePull" + std::to_string(level), level, [=](Neon::set::Loader& loader) { auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); @@ -372,6 +420,7 @@ void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, stream(grid, level, max_level, fout, fin, containers); } + int main(int argc, char** argv) { Neon::init(); @@ -411,6 +460,7 @@ int main(int argc, char** argv) //grid.topologyToVTK("lbm.vtk", false); + //LBM problem const int max_iter = 300; const T ulb = 0.01; diff --git a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h index 755d5f20..b2b29642 100644 --- a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h +++ b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h @@ -62,9 +62,10 @@ class bPartition NEON_CUDA_HOST_DEVICE inline Neon::index_3d mapToGlobal(const Cell& cell) const; + inline NEON_CUDA_HOST_DEVICE auto setNghCell(const Cell& cell, const nghIdx_t& offset) const -> Cell; protected: - inline NEON_CUDA_HOST_DEVICE auto pitch(const Cell& cell, int card) const -> uint32_t; + inline NEON_CUDA_HOST_DEVICE auto pitch(const Cell& cell, int card) const -> uint32_t; inline NEON_CUDA_HOST_DEVICE auto setNghCell(const Cell& cell, const nghIdx_t& offset, const uint32_t* neighbourBlocks) const -> Cell; inline NEON_CUDA_HOST_DEVICE auto shmemPitch(Cell cell, const int card) const -> Cell::Location::Integer; inline NEON_CUDA_HOST_DEVICE auto getneighbourBlocksPtr(const Cell& cell) const -> const uint32_t*; diff --git a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h index 04e56645..c6681374 100644 --- a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h @@ -120,6 +120,11 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition::getneighbourBlocksPtr(const } } +template +NEON_CUDA_HOST_DEVICE inline auto bPartition::setNghCell(const Cell& cell, const nghIdx_t& offset) const -> Cell +{ + return setNghCell(cell, offset, getneighbourBlocksPtr(cell)); +} template NEON_CUDA_HOST_DEVICE inline auto bPartition::setNghCell(const Cell& cell, @@ -168,6 +173,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition::setNghCell(const Cell& c } else { ngh_cell.mBlockID = cell.mBlockID; } + ngh_cell.mIsActive = (ngh_cell.mBlockID != std::numeric_limits::max()); return ngh_cell; } @@ -201,7 +207,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition::nghVal(const Cell& cell, Cell ngh_cell = setNghCell(swirl_cell, offset, getneighbourBlocksPtr(swirl_cell)); ngh_cell.mBlockSize = cell.mBlockSize; - if (ngh_cell.mBlockID != std::numeric_limits::max()) { + if (ngh_cell.isActive()) { //TODO maybe ngh_cell should be mapped to its memory layout ret.isValid = ngh_cell.computeIsActive(mMask); if (ret.isValid) { @@ -216,7 +222,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition::nghVal(const Cell& cell, } else { Cell ngh_cell = setNghCell(cell, offset, getneighbourBlocksPtr(cell)); - if (ngh_cell.mBlockID != std::numeric_limits::max()) { + if (ngh_cell.isActive()) { ret.isValid = ngh_cell.computeIsActive(mMask); if (ret.isValid) { if (mIsInSharedMem) { diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h index 8995401c..ab9d3042 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h @@ -128,6 +128,11 @@ class mPartition : public Neon::domain::internal::bGrid::bPartition NEON_CUDA_HOST_DEVICE inline auto getUncle(const Cell& cell, Neon::int8_3d direction) const -> Cell; + NEON_CUDA_HOST_DEVICE inline auto uncleVal(const Cell& cell, + Neon::int8_3d direction, + int card, + const T& alternativeVal) const -> NghInfo; + /** * Get the refinement factor i.e., number of children at each dimension * @param level at which the refinement factor is queried diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h index 986abe29..9e427b7b 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h @@ -234,10 +234,29 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::getUncle(const Cell& cell, Cell uncle = getParent(cell); if (uncle.isActive()) { uncle = this->setNghCell(uncle, direction, mParentNeighbourBlocks); + uncle.mBlockSize = mRefFactors[mLevel + 1]; uncle.mIsActive = uncle.mBlockID != std::numeric_limits::max(); } return uncle; } +template +NEON_CUDA_HOST_DEVICE inline auto mPartition::uncleVal(const Cell& cell, + Neon::int8_3d direction, + int card, + const T& alternativeVal) const -> NghInfo +{ + NghInfo ret; + ret.value = alternativeVal; + ret.isValid = false; + + Cell uncle = getUncle(cell, direction); + ret.isValid = uncle.isActive(); + if (ret.isValid) { + ret.value = mMemParent[this->pitch(uncle, card)]; + } + return ret; +} + } // namespace Neon::domain::internal::mGrid \ No newline at end of file From d0c132ad12465f61e028ff78ffb48bd829c716a8 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 24 Jan 2023 15:11:36 -0500 Subject: [PATCH 08/24] store() and coalescencePull() --- apps/lbmMultiRes/lbmMultiRes.cu | 236 ++++++++++++++---- .../Neon/core/types/vec/vec3d_integer.tdecl.h | 2 + .../Neon/core/types/vec/vec3d_integer.timp.h | 7 + 3 files changed, 190 insertions(+), 55 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index dcf7ed64..0cfb6efe 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -69,6 +69,18 @@ NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity3D[27][3] = { }; + +template +NEON_CUDA_HOST_DEVICE Neon::int8_3d getDir(const int8_t q) +{ + if constexpr (DIM == 2) { + return Neon::int8_3d(latticeVelocity2D[q][0], latticeVelocity2D[q][1], 0); + } + if constexpr (DIM == 3) { + return Neon::int8_3d(latticeVelocity3D[q][0], latticeVelocity3D[q][1], latticeVelocity3D[q][2]); + } +} + template struct latticeWeight { @@ -119,8 +131,8 @@ inline void exportVTI(const int t, Field& field) field.ioToVtk(fname, "field"); } - -NEON_CUDA_HOST_DEVICE inline Neon::int8_3d explosionUnlceOffset(const Neon::domain::bGrid::Cell& cell, const Neon::int8_3d& q) +template +NEON_CUDA_HOST_DEVICE inline Neon::int8_3d unlceOffset(const T& cell, const Neon::int8_3d& q) { //given a local index within a cell and a population direction (q) //find the uncle's (the parent neighbor) offset from which the desired population (q) should be read @@ -129,14 +141,14 @@ NEON_CUDA_HOST_DEVICE inline Neon::int8_3d explosionUnlceOffset(const Neon::doma auto off = [](const int8_t i, const int8_t j) { //0, -1 --> -1 //1, -1 --> 0 - //0, 1 --> 0 //0, 0 --> 0 + //0, 1 --> 0 //1, 1 --> 1 const int8_t s = i + j; return (s <= 0) ? s : s - 1; }; - Neon::int8_3d offset(off(cell.mLocation.x, q.x), off(cell.mLocation.y, q.y), off(cell.mLocation.z, q.z)); + Neon::int8_3d offset(off(cell.x, q.x), off(cell.y, q.y), off(cell.z, q.z)); return offset; } @@ -194,35 +206,38 @@ Neon::set::Container collide(Neon::domain::mGrid& grid, const T omega = computeOmega(omega0, level, max_level); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - constexpr auto t = latticeWeight(); + if (!in.hasChildren(cell)) { - //fin - T ins[Q]; - for (int i = 0; i < Q; ++i) { - ins[i] = in(cell, i); - } + constexpr auto t = latticeWeight(); - //density - T rho = 0; - for (int i = 0; i < Q; ++i) { - rho += ins[i]; - } + //fin + T ins[Q]; + for (int i = 0; i < Q; ++i) { + ins[i] = in(cell, i); + } - //velocity - const Neon::Vec_3d vel = velocity(ins, rho); + //density + T rho = 0; + for (int i = 0; i < Q; ++i) { + rho += ins[i]; + } + //velocity + const Neon::Vec_3d vel = velocity(ins, rho); - const T usqr = (3.0 / 2.0) * (vel.x * vel.x + vel.y * vel.y + vel.z * vel.z); - for (int i = 0; i < Q; ++i) { - T cu = 0; - for (int d = 0; d < DIM; ++d) { - cu += latticeVelocity2D[i][d] * vel.v[d]; - } - //equilibrium - T feq = rho * t.t[i] * (1. + cu + 0.5 * cu * cu - usqr); - //collide - out(cell, i) = ins[i] - omega * (ins[i] - feq); + const T usqr = (3.0 / 2.0) * (vel.x * vel.x + vel.y * vel.y + vel.z * vel.z); + for (int i = 0; i < Q; ++i) { + T cu = 0; + for (int d = 0; d < DIM; ++d) { + cu += latticeVelocity2D[i][d] * vel.v[d]; + } + //equilibrium + T feq = rho * t.t[i] * (1. + cu + 0.5 * cu * cu - usqr); + + //collide + out(cell, i) = ins[i] - omega * (ins[i] - feq); + } } }; }); @@ -244,18 +259,13 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - //If this cell has children i.e., it is been refined, that we should not work on it + //If this cell has children i.e., it is been refined, than we should not work on it //because this cell is only there to allow query and not to operate on if (!fpost_stm.hasChildren(cell)) { for (int8_t q = 0; q < Q; ++q) { - Neon::int8_3d dir; - if constexpr (DIM == 2) { - dir = Neon::int8_3d(-latticeVelocity2D[q][0], -latticeVelocity2D[q][1], 0); - } - if constexpr (DIM == 3) { - dir = Neon::int8_3d(-latticeVelocity3D[q][0], -latticeVelocity3D[q][1], -latticeVelocity3D[q][2]); - } + const Neon::int8_3d dir = -getDir(q); + //if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction if (!fpost_stm.hasChildren(cell, dir)) { auto neighbor = fpost_col.nghVal(cell, dir, q, T(0)); @@ -288,20 +298,15 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - for (int8_t q = 0; q < Q; ++q) { - //If this cell has children i.e., it is been refined, that we should not work on it - //because this cell is only there to allow query and not to operate on - if (!fpost_stm.hasChildren(cell)) { - Neon::int8_3d dir; - if constexpr (DIM == 2) { - dir = Neon::int8_3d(-latticeVelocity2D[q][0], -latticeVelocity2D[q][1], 0); - } - if constexpr (DIM == 3) { - dir = Neon::int8_3d(-latticeVelocity3D[q][0], -latticeVelocity3D[q][1], -latticeVelocity3D[q][2]); - } + //If this cell has children i.e., it is been refined, that we should not work on it + //because this cell is only there to allow query and not to operate on + if (!fpost_stm.hasChildren(cell)) { + for (int8_t q = 0; q < Q; ++q) { + + const Neon::int8_3d dir = -getDir(q); //if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction - //we want to only work on cells that interface with L+1 (coarse) cell along q + //we want to only work on cells that interface with L+1 (coarse) cell along q if (!fpost_stm.hasChildren(cell, dir)) { //try to query the cell along this direction (opposite of the population direction) as we do @@ -311,9 +316,9 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, //only if we can not do normal streaming, then we may have a coarser neighbor from which //we can read this pop - //get the uncle direction/offset i.e., the neighbor of the cell's parent - //this direction/offset is wrt to the cell's parent - Neon::int8_3d uncleDir = explosionUnlceOffset(cell, dir); + //get the uncle direction/offset i.e., the neighbor of the cell's parent + //this direction/offset is wrt to the cell's parent + Neon::int8_3d uncleDir = unlceOffset(cell.mLocation, dir); auto uncle = fpost_col.uncleVal(cell, uncleDir, q, T(0)); if (uncle.isValid) { @@ -333,22 +338,124 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, int level, Neon::domain::mGrid::Field& fpop_poststreaming) { - //TODO - // Initiated by the coarse level (hence "pull"), this function performs fine (level-1) to coarse - // (level) communication or "coalescence" by simply averaging the fine data stored in self.fpop_halo + // Initiated by the coarse level (hence "pull"), this function simply read the missing population + // across the interface between coarse<->fine boundary by reading the population prepare during the store() return grid.getContainer( "coalescencePull" + std::to_string(level), level, + [=](Neon::set::Loader& loader) { + auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL); + + return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { + //If this cell has children i.e., it is been refined, than we should not work on it + //because this cell is only there to allow query and not to operate on + if (!fpost_stm.hasChildren(cell)) { + + for (int q = 0; q < Q; ++q) { + const Neon::int8_3d dir = -getDir(q); + //if we have a neighbor at the same level that has been refined, then cell is on + //the interface and this is where we should do the coalescence + if (fpost_stm.hasChildren(cell, dir)) { + auto neighbor = fpost_stm.nghVal(cell, dir, q, T(0)); + if (neighbor.isValid) { + fpost_stm(cell, q) = neighbor.value; + } + } + } + } + }; + }); +} + + +template +Neon::set::Container store(Neon::domain::mGrid& grid, + int level, + Neon::domain::mGrid::Field& fpop_poststreaming) +{ + //Initiated by the coarse level (level), this function prepares and stores the fine (level - 1) + // information for further pulling initiated by the coarse (this) level invoked by coalescence_pull + // + //Where a coarse cell stores its information? at itself i.e., pull + //Where a coarse cell reads the needed info? from its children and neighbor cell's children (level -1) + //This function only operates on a coarse cell that has children. + //For such cell, we check its neighbor cells at the same level. If any of these neighbor has NO + //children, then we need to prepare something for them to be read during coalescence. What + //we prepare is some sort of averaged the data from the children (the cell's children and/or + //its neighbor's children) + + return grid.getContainer( + "store" + std::to_string(level), level, [=](Neon::set::Loader& loader) { auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - for (int i = 0; i < Q; ++i) { + //if the cell is refined, we might need to store something in it for its neighbor + if (fpost_stm.hasChildren(cell)) { + + const int refFactor = fpost_stm.getRefFactor(level); + + bool should_accumelate = ((int(fpost_stm(cell, 0)) % refFactor) != 0); + + fpost_stm(cell, 0) += 1; + + + //for each direction aka for each neighbor + //we skip the center here + for (int8_t q = 1; q < Q; ++q) { + const Neon::int8_3d q_dir = getDir(q); + + //check if the neighbor in this direction has children + if (!fpost_stm.hasChildren(cell, q_dir)) { + //now, we know that there is actually something we need to store for this neighbor + //in cell along q (q_dir) direction + int num = 0; + T sum = 0; + + + //for every neighbor cell including the center cell (i.e., cell) + for (int8_t p = 0; p < Q; ++p) { + const Neon::int8_3d p_dir = getDir(p); + + //relative direction of q w.r.t p + //i.e., in which direction we should move starting from p to land on q + const Neon::int8_3d r_dir = q_dir - p_dir; + + //if this neighbor is refined + if (fpost_stm.hasChildren(cell, p_dir)) { + + //for each children of p + for (int8_t i = 0; i < refFactor; ++i) { + for (int8_t j = 0; j < refFactor; ++j) { + for (int8_t k = 0; k < refFactor; ++k) { + const Neon::int8_3d c(i, j, k); + + //cq is coarse neighbor (i.e., uncle) that we need to go in order to read q + //for c (this is what we do for explosion but here we do this just for the check) + const Neon::int8_3d cq = unlceOffset(c, q_dir); + if (cq == r_dir) { + num++; + sum += fpost_stm.childVal(cell, c, q, 0).value; + } + } + } + } + } + } + + if (should_accumelate) { + fpost_stm(cell, q) += sum / static_cast(num * refFactor); + } else { + fpost_stm(cell, q) = sum / static_cast(num * refFactor); + } + } + } } }; }); } + template void stream(Neon::domain::mGrid& grid, int level, @@ -391,7 +498,11 @@ void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, // 1) collision for all voxels at level L=level containers.push_back(collide(grid, omega0, level, max_level, fin, fout)); - // 2) Storing fine(level) data for later "coalescence" pulled by the coarse(level) + // 2) Storing fine (level - 1) data for later "coalescence" pulled by the coarse (level) + if (level != max_level) { + store(grid, level + 1, fout); + } + // 3) recurse down if (level != 0) { @@ -410,6 +521,9 @@ void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, containers.push_back(collide(grid, omega0, level, max_level, fin, fout)); // 7) Storing fine(level) data for later "coalescence" pulled by the coarse(level) + if (level != max_level) { + store(grid, level + 1, fout); + } // 8) recurse down if (level != 0) { @@ -474,6 +588,18 @@ int main(int argc, char** argv) auto fout = grid.newField("fout", Q, 0); //TODO init fin and fout + for (int l = 0; l < descriptor.getDepth(); ++l) { + fin.forEachActiveCell( + l, + [&](const Neon::int32_3d, const int, T& val) { + val = 0; + }); + fout.forEachActiveCell( + l, + [&](const Neon::int32_3d, const int, T& val) { + val = 0; + }); + } fin.updateCompute(); fout.updateCompute(); diff --git a/libNeonCore/include/Neon/core/types/vec/vec3d_integer.tdecl.h b/libNeonCore/include/Neon/core/types/vec/vec3d_integer.tdecl.h index 358b086f..03a2c28b 100644 --- a/libNeonCore/include/Neon/core/types/vec/vec3d_integer.tdecl.h +++ b/libNeonCore/include/Neon/core/types/vec/vec3d_integer.tdecl.h @@ -391,6 +391,8 @@ class Vec_3d NEON_CUDA_HOST_DEVICE inline bool operator!=(const Integer other[self_t::num_axis]) const; + NEON_CUDA_HOST_DEVICE inline self_t operator-() const; + /** Returns the most north-est-hi point buildable with A and B coordinates" * C = A >> B is: C.v[i] = A.v[i] > B.v[i] ? A.v[i] : B.v[i] * @param[in] B: second point for the operation. diff --git a/libNeonCore/include/Neon/core/types/vec/vec3d_integer.timp.h b/libNeonCore/include/Neon/core/types/vec/vec3d_integer.timp.h index 7118708b..df9d4d94 100644 --- a/libNeonCore/include/Neon/core/types/vec/vec3d_integer.timp.h +++ b/libNeonCore/include/Neon/core/types/vec/vec3d_integer.timp.h @@ -486,6 +486,13 @@ NEON_CUDA_HOST_DEVICE inline Vec_3d Vec_3d +NEON_CUDA_HOST_DEVICE inline Vec_3d Vec_3d::operator-() const +{ + Vec_3d res(-x, -y, -z); + return res; +} + template NEON_CUDA_HOST_DEVICE inline bool Vec_3d::operator>(const Vec_3d& B) const { From 238e444ab7d5031c57595c340de49219c9b055c1 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Thu, 26 Jan 2023 11:28:13 -0500 Subject: [PATCH 09/24] debugging --- apps/lbmMultiRes/lbmMultiRes.cu | 114 ++++++++---------- .../Neon/domain/internal/bGrid/bPartition.h | 10 +- .../domain/internal/bGrid/bPartition_imp.h | 10 +- .../domain/internal/mGrid/mPartition_imp.h | 4 +- 4 files changed, 65 insertions(+), 73 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 0cfb6efe..45a62864 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -119,18 +119,6 @@ struct latticeWeight }; -template -inline void exportVTI(const int t, Field& field) -{ - printf("\n Exporting Frame =%d", t); - int precision = 4; - std::ostringstream oss; - oss << std::setw(precision) << std::setfill('0') << t; - std::string prefix = "lbm" + std::to_string(field.getCardinality()) + "D_"; - std::string fname = prefix + oss.str(); - field.ioToVtk(fname, "field"); -} - template NEON_CUDA_HOST_DEVICE inline Neon::int8_3d unlceOffset(const T& cell, const Neon::int8_3d& q) { @@ -301,7 +289,7 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, //If this cell has children i.e., it is been refined, that we should not work on it //because this cell is only there to allow query and not to operate on if (!fpost_stm.hasChildren(cell)) { - for (int8_t q = 0; q < Q; ++q) { + for (int8_t q = 1; q < Q; ++q) { const Neon::int8_3d dir = -getDir(q); @@ -311,7 +299,7 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, //try to query the cell along this direction (opposite of the population direction) as we do //in 'normal' streaming - auto neighborCell = fpost_col.setNghCell(cell, dir); + auto neighborCell = fpost_col.getNghCell(cell, dir); if (!neighborCell.isActive()) { //only if we can not do normal streaming, then we may have a coarser neighbor from which //we can read this pop @@ -344,14 +332,14 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, return grid.getContainer( "coalescencePull" + std::to_string(level), level, [=](Neon::set::Loader& loader) { - auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL); + auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { //If this cell has children i.e., it is been refined, than we should not work on it //because this cell is only there to allow query and not to operate on if (!fpost_stm.hasChildren(cell)) { - for (int q = 0; q < Q; ++q) { + for (int q = 1; q < Q; ++q) { const Neon::int8_3d dir = -getDir(q); //if we have a neighbor at the same level that has been refined, then cell is on //the interface and this is where we should do the coalescence @@ -395,7 +383,7 @@ Neon::set::Container store(Neon::domain::mGrid& grid, const int refFactor = fpost_stm.getRefFactor(level); - bool should_accumelate = ((int(fpost_stm(cell, 0)) % refFactor) != 0); + bool should_accumelate = ((int(fpost_stm(cell, 0)) % refFactor) != 0); fpost_stm(cell, 0) += 1; @@ -406,47 +394,51 @@ Neon::set::Container store(Neon::domain::mGrid& grid, const Neon::int8_3d q_dir = getDir(q); //check if the neighbor in this direction has children - if (!fpost_stm.hasChildren(cell, q_dir)) { - //now, we know that there is actually something we need to store for this neighbor - //in cell along q (q_dir) direction - int num = 0; - T sum = 0; - - - //for every neighbor cell including the center cell (i.e., cell) - for (int8_t p = 0; p < Q; ++p) { - const Neon::int8_3d p_dir = getDir(p); - - //relative direction of q w.r.t p - //i.e., in which direction we should move starting from p to land on q - const Neon::int8_3d r_dir = q_dir - p_dir; - - //if this neighbor is refined - if (fpost_stm.hasChildren(cell, p_dir)) { - - //for each children of p - for (int8_t i = 0; i < refFactor; ++i) { - for (int8_t j = 0; j < refFactor; ++j) { - for (int8_t k = 0; k < refFactor; ++k) { - const Neon::int8_3d c(i, j, k); - - //cq is coarse neighbor (i.e., uncle) that we need to go in order to read q - //for c (this is what we do for explosion but here we do this just for the check) - const Neon::int8_3d cq = unlceOffset(c, q_dir); - if (cq == r_dir) { - num++; - sum += fpost_stm.childVal(cell, c, q, 0).value; + auto neighborCell = fpost_stm.getNghCell(cell, q_dir); + if (neighborCell.isActive()) { + + if (!fpost_stm.hasChildren(neighborCell)) { + //now, we know that there is actually something we need to store for this neighbor + //in cell along q (q_dir) direction + int num = 0; + T sum = 0; + + + //for every neighbor cell including the center cell (i.e., cell) + for (int8_t p = 0; p < Q; ++p) { + const Neon::int8_3d p_dir = getDir(p); + + //relative direction of q w.r.t p + //i.e., in which direction we should move starting from p to land on q + const Neon::int8_3d r_dir = q_dir - p_dir; + + //if this neighbor is refined + if (fpost_stm.hasChildren(cell, p_dir)) { + + //for each children of p + for (int8_t i = 0; i < refFactor; ++i) { + for (int8_t j = 0; j < refFactor; ++j) { + for (int8_t k = 0; k < refFactor; ++k) { + const Neon::int8_3d c(i, j, k); + + //cq is coarse neighbor (i.e., uncle) that we need to go in order to read q + //for c (this is what we do for explosion but here we do this just for the check) + const Neon::int8_3d cq = unlceOffset(c, q_dir); + if (cq == r_dir) { + num++; + sum += fpost_stm.childVal(cell, c, q, 0).value; + } } } } } } - } - if (should_accumelate) { - fpost_stm(cell, q) += sum / static_cast(num * refFactor); - } else { - fpost_stm(cell, q) = sum / static_cast(num * refFactor); + if (should_accumelate) { + fpost_stm(cell, q) += sum / static_cast(num * refFactor); + } else { + fpost_stm(cell, q) = sum / static_cast(num * refFactor); + } } } } @@ -550,29 +542,29 @@ int main(int argc, char** argv) constexpr int DIM = 2; constexpr int Q = (DIM == 2) ? 9 : 19; - const int dim_x = 12; - const int dim_y = 12; - const int dim_z = (DIM < 3) ? 4 : 4; + const int dim_x = 6; + const int dim_y = 6; + const int dim_z = 4; const Neon::index_3d grid_dim(dim_x, dim_y, dim_z); - const Neon::domain::mGridDescriptor descriptor({1, 1, 1}); + const Neon::domain::mGridDescriptor descriptor({1, 1}); Neon::domain::mGrid grid( backend, grid_dim, {[&](const Neon::index_3d id) -> bool { - return id.x > 7; + return ((id.x == 2 && id.y == 2) || + (id.x == 3 && id.y == 2) || + (id.x == 2 && id.y == 3) || + (id.x == 3 && id.y == 3)); }, [&](const Neon::index_3d& id) -> bool { - return id.x > 3; - }, - [&](const Neon::index_3d&) -> bool { return true; }}, create_stencil(), descriptor); - //grid.topologyToVTK("lbm.vtk", false); + grid.topologyToVTK("lbm.vtk", false); //LBM problem diff --git a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h index b2b29642..f907dfc2 100644 --- a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h +++ b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition.h @@ -49,7 +49,7 @@ class bPartition NEON_CUDA_HOST_DEVICE inline auto nghVal(const Cell& eId, uint8_t nghID, int card, - const T& alternativeVal) const -> NghInfo; + const T& alternativeVal) const -> NghInfo; NEON_CUDA_HOST_DEVICE inline void loadInSharedMemory(const Cell& cell, @@ -62,12 +62,12 @@ class bPartition NEON_CUDA_HOST_DEVICE inline Neon::index_3d mapToGlobal(const Cell& cell) const; - inline NEON_CUDA_HOST_DEVICE auto setNghCell(const Cell& cell, const nghIdx_t& offset) const -> Cell; + inline NEON_CUDA_HOST_DEVICE auto getNghCell(const Cell& cell, const nghIdx_t& offset) const -> Cell; protected: - inline NEON_CUDA_HOST_DEVICE auto pitch(const Cell& cell, int card) const -> uint32_t; - inline NEON_CUDA_HOST_DEVICE auto setNghCell(const Cell& cell, const nghIdx_t& offset, const uint32_t* neighbourBlocks) const -> Cell; - inline NEON_CUDA_HOST_DEVICE auto shmemPitch(Cell cell, const int card) const -> Cell::Location::Integer; + inline NEON_CUDA_HOST_DEVICE auto pitch(const Cell& cell, int card) const -> uint32_t; + inline NEON_CUDA_HOST_DEVICE auto getNghCell(const Cell& cell, const nghIdx_t& offset, const uint32_t* neighbourBlocks) const -> Cell; + inline NEON_CUDA_HOST_DEVICE auto shmemPitch(Cell cell, const int card) const -> Cell::Location::Integer; inline NEON_CUDA_HOST_DEVICE auto getneighbourBlocksPtr(const Cell& cell) const -> const uint32_t*; Neon::DataView mDataView; diff --git a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h index c6681374..8b31e6a8 100644 --- a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h @@ -121,13 +121,13 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition::getneighbourBlocksPtr(const } template -NEON_CUDA_HOST_DEVICE inline auto bPartition::setNghCell(const Cell& cell, const nghIdx_t& offset) const -> Cell +NEON_CUDA_HOST_DEVICE inline auto bPartition::getNghCell(const Cell& cell, const nghIdx_t& offset) const -> Cell { - return setNghCell(cell, offset, getneighbourBlocksPtr(cell)); + return getNghCell(cell, offset, getneighbourBlocksPtr(cell)); } template -NEON_CUDA_HOST_DEVICE inline auto bPartition::setNghCell(const Cell& cell, +NEON_CUDA_HOST_DEVICE inline auto bPartition::getNghCell(const Cell& cell, const nghIdx_t& offset, const uint32_t* neighbourBlocks) const -> Cell { @@ -205,7 +205,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition::nghVal(const Cell& cell, Cell swirl_cell = cell.toSwirl(); swirl_cell.mBlockSize = cell.mBlockSize; - Cell ngh_cell = setNghCell(swirl_cell, offset, getneighbourBlocksPtr(swirl_cell)); + Cell ngh_cell = getNghCell(swirl_cell, offset, getneighbourBlocksPtr(swirl_cell)); ngh_cell.mBlockSize = cell.mBlockSize; if (ngh_cell.isActive()) { //TODO maybe ngh_cell should be mapped to its memory layout @@ -221,7 +221,7 @@ NEON_CUDA_HOST_DEVICE inline auto bPartition::nghVal(const Cell& cell, } } else { - Cell ngh_cell = setNghCell(cell, offset, getneighbourBlocksPtr(cell)); + Cell ngh_cell = getNghCell(cell, offset, getneighbourBlocksPtr(cell)); if (ngh_cell.isActive()) { ret.isValid = ngh_cell.computeIsActive(mMask); if (ret.isValid) { diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h index 9e427b7b..77abe808 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h @@ -178,7 +178,7 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::hasChildren(const Cell& cell return false; } - Cell nghCell = this->setNghCell(cell, nghDir, this->getneighbourBlocksPtr(cell)); + Cell nghCell = this->getNghCell(cell, nghDir, this->getneighbourBlocksPtr(cell)); if (!nghCell.isActive()) { return false; } @@ -233,7 +233,7 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::getUncle(const Cell& cell, { Cell uncle = getParent(cell); if (uncle.isActive()) { - uncle = this->setNghCell(uncle, direction, mParentNeighbourBlocks); + uncle = this->getNghCell(uncle, direction, mParentNeighbourBlocks); uncle.mBlockSize = mRefFactors[mLevel + 1]; uncle.mIsActive = uncle.mBlockID != std::numeric_limits::max(); } From 1fa1d187325e4991150453dd45aa474bfd097a0c Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Thu, 26 Jan 2023 15:38:41 -0500 Subject: [PATCH 10/24] problem setup --- apps/lbmMultiRes/lbmMultiRes.cu | 103 +++++++++++------- .../domain/internal/mGrid/mGridDescriptor.h | 15 ++- 2 files changed, 76 insertions(+), 42 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 45a62864..77cffefe 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -182,16 +182,16 @@ template Neon::set::Container collide(Neon::domain::mGrid& grid, T omega0, int level, - int max_level, + int num_levels, const Neon::domain::mGrid::Field& fin, Neon::domain::mGrid::Field& fout) { return grid.getContainer( - "Collide" + std::to_string(level), level, - [=](Neon::set::Loader& loader) { + "collide_" + std::to_string(level), level, + [&, level, omega0, num_levels](Neon::set::Loader& loader) { const auto& in = fin.load(loader, level, Neon::MultiResCompute::MAP); auto out = fout.load(loader, level, Neon::MultiResCompute::MAP); - const T omega = computeOmega(omega0, level, max_level); + const T omega = computeOmega(omega0, level, num_levels); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { if (!in.hasChildren(cell)) { @@ -241,8 +241,8 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, //This is "pull" stream return grid.getContainer( - "Stream" + std::to_string(level), level, - [=](Neon::set::Loader& loader) { + "stream_" + std::to_string(level), level, + [&, level](Neon::set::Loader& loader) { const auto& fpost_col = fpop_postcollision.load(loader, level, Neon::MultiResCompute::STENCIL); auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); @@ -280,8 +280,8 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, return grid.getContainer( - "explosionPull" + std::to_string(level), level, - [=](Neon::set::Loader& loader) { + "Explosion_" + std::to_string(level), level, + [&, level](Neon::set::Loader& loader) { const auto& fpost_col = fpop_postcollision.load(loader, level, Neon::MultiResCompute::STENCIL_UP); auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); @@ -330,9 +330,9 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, // across the interface between coarse<->fine boundary by reading the population prepare during the store() return grid.getContainer( - "coalescencePull" + std::to_string(level), level, - [=](Neon::set::Loader& loader) { - auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); + "Coalescence" + std::to_string(level), level, + [&, level](Neon::set::Loader& loader) { + auto& fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { //If this cell has children i.e., it is been refined, than we should not work on it @@ -373,9 +373,9 @@ Neon::set::Container store(Neon::domain::mGrid& grid, //its neighbor's children) return grid.getContainer( - "store" + std::to_string(level), level, - [=](Neon::set::Loader& loader) { - auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); + "store_" + std::to_string(level), level, + [&, level](Neon::set::Loader& loader) { + auto& fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { //if the cell is refined, we might need to store something in it for its neighbor @@ -451,7 +451,7 @@ Neon::set::Container store(Neon::domain::mGrid& grid, template void stream(Neon::domain::mGrid& grid, int level, - const int max_level, + const int num_levels, const Neon::domain::mGrid::Field& fpop_postcollision, Neon::domain::mGrid::Field& fpop_poststreaming, std::vector& containers) @@ -463,7 +463,7 @@ void stream(Neon::domain::mGrid& grid, * (i) coarser or (ii) finer neighbors at level+1 and level-1 and hence require * (i) "explosion" or (ii) coalescence */ - if (level != max_level - 1) { + if (level != num_levels - 1) { /* Explosion: pull missing populations from coarser neighbors by copying coarse (level+1) to fine (level) * neighbors, initiated by the fine level ("Pull"). */ @@ -482,50 +482,64 @@ template void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, const T omega0, const int level, - const int max_level, + const int num_levels, Neon::domain::mGrid::Field& fin, Neon::domain::mGrid::Field& fout, std::vector& containers) { // 1) collision for all voxels at level L=level - containers.push_back(collide(grid, omega0, level, max_level, fin, fout)); + containers.push_back(collide(grid, omega0, level, num_levels, fin, fout)); // 2) Storing fine (level - 1) data for later "coalescence" pulled by the coarse (level) - if (level != max_level) { - store(grid, level + 1, fout); + if (level != num_levels - 1) { + containers.push_back(store(grid, level + 1, fout)); } // 3) recurse down if (level != 0) { - nonUniformTimestepRecursive(grid, omega0, level - 1, max_level, fin, fout, containers); + nonUniformTimestepRecursive(grid, omega0, level - 1, num_levels, fin, fout, containers); } // 4) Streaming step that also performs the necessary "explosion" and "coalescence" steps. - stream(grid, level, max_level, fout, fin, containers); + stream(grid, level, num_levels, fout, fin, containers); // 5) stop - if (level == max_level - 1) { + if (level == num_levels - 1) { return; } // 6) collision for all voxels at level L = level - containers.push_back(collide(grid, omega0, level, max_level, fin, fout)); + containers.push_back(collide(grid, omega0, level, num_levels, fin, fout)); // 7) Storing fine(level) data for later "coalescence" pulled by the coarse(level) - if (level != max_level) { - store(grid, level + 1, fout); + if (level != num_levels - 1) { + containers.push_back(store(grid, level + 1, fout)); } // 8) recurse down if (level != 0) { - nonUniformTimestepRecursive(grid, omega0, level - 1, max_level, fin, fout, containers); + nonUniformTimestepRecursive(grid, omega0, level - 1, num_levels, fin, fout, containers); } // 9) Streaming step - stream(grid, level, max_level, fout, fin, containers); + stream(grid, level, num_levels, fout, fin, containers); } +Neon::float_3d mapToCube(Neon::index_3d p, Neon::index_3d dim) +{ + //map p to an axis-aligned cube from -1 to 1 + Neon::float_3d half_dim = dim.newType() * 0.5; + Neon::float_3d ret = (p.newType() - half_dim) / half_dim; + return ret; +} +inline float sdfCube(Neon::float_3d p, float b = 1.0) +{ + Neon::float_3d d(std::abs(p.x) - b, std::abs(p.y) - b, std::abs(p.z) - b); + Neon::float_3d d_max(std::max(d.x, 0.f), std::max(d.y, 0.f), std::max(d.z, 0.f)); + float len = std::sqrt(d_max.x * d_max.x + d_max.y * d_max.y + d_max.z * d_max.z); + return std::min(std::max(d.x, std::max(d.y, d.z)), 0.f) + len; +} int main(int argc, char** argv) { @@ -541,30 +555,37 @@ int main(int argc, char** argv) constexpr int DIM = 2; constexpr int Q = (DIM == 2) ? 9 : 19; + constexpr int depth = 3; - const int dim_x = 6; - const int dim_y = 6; - const int dim_z = 4; + const Neon::index_3d grid_dim(24, 24, 24); - const Neon::index_3d grid_dim(dim_x, dim_y, dim_z); - const Neon::domain::mGridDescriptor descriptor({1, 1}); + const Neon::domain::mGridDescriptor descriptor(depth); + + float levelSDF[depth + 1]; + levelSDF[0] = 0; + levelSDF[1] = -0.3; + levelSDF[2] = -0.6; + levelSDF[3] = -1.0; Neon::domain::mGrid grid( backend, grid_dim, {[&](const Neon::index_3d id) -> bool { - return ((id.x == 2 && id.y == 2) || - (id.x == 3 && id.y == 2) || - (id.x == 2 && id.y == 3) || - (id.x == 3 && id.y == 3)); + return sdfCube(mapToCube(id, grid_dim)) <= levelSDF[0] && + sdfCube(mapToCube(id, grid_dim)) > levelSDF[1]; + }, + [&](const Neon::index_3d& id) -> bool { + return sdfCube(mapToCube(id, grid_dim)) <= levelSDF[1] && + sdfCube(mapToCube(id, grid_dim)) > levelSDF[2]; }, [&](const Neon::index_3d& id) -> bool { - return true; + return sdfCube(mapToCube(id, grid_dim)) <= levelSDF[2] && + sdfCube(mapToCube(id, grid_dim)) > levelSDF[3]; }}, create_stencil(), descriptor); - grid.topologyToVTK("lbm.vtk", false); + //grid.topologyToVTK("lbm.vtk", false); //LBM problem @@ -593,6 +614,8 @@ int main(int argc, char** argv) }); } + fin.ioToVtk("sdf", "f"); + fin.updateCompute(); fout.updateCompute(); @@ -606,7 +629,7 @@ int main(int argc, char** argv) Neon::skeleton::Skeleton skl(grid.getBackend()); skl.sequence(containers, "MultiResLBM"); - //skl.ioToDot("MultiRes"); + //skl.ioToDot("MultiResLBM", "", true); skl.run(); diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mGridDescriptor.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mGridDescriptor.h index 0def37f6..71097d50 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mGridDescriptor.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mGridDescriptor.h @@ -20,7 +20,7 @@ struct mGridDescriptor * @param levels as described above defaulted to 3 levels octree */ mGridDescriptor(std::initializer_list log2RefFactors) - : mLog2RefFactors(log2RefFactors), mSpacing(log2RefFactors) + : mLog2RefFactors(log2RefFactors) { computeSpacing(); } @@ -30,8 +30,19 @@ struct mGridDescriptor * by 8^3 blocks i.e., block data structure */ mGridDescriptor() - : mLog2RefFactors({3}), mSpacing({2 * 2 * 2}) { + mLog2RefFactors.resize(3, 1); + computeSpacing(); + } + + /** + * This constructor can be use for the default mGrid descriptor that defines a grid of single depth partitioned + * by 8^3 blocks i.e., block data structure + */ + mGridDescriptor(int depth) + { + mLog2RefFactors.resize(depth, 1); + computeSpacing(); } From e54a0cb1871b8ce689b253b5b44e195949ca220e Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Thu, 26 Jan 2023 17:06:47 -0500 Subject: [PATCH 11/24] problem setup --- apps/lbmMultiRes/lbmMultiRes.cu | 124 ++++++++++++++++++++++++-------- 1 file changed, 96 insertions(+), 28 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 77cffefe..2251dd5f 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -218,7 +218,11 @@ Neon::set::Container collide(Neon::domain::mGrid& grid, for (int i = 0; i < Q; ++i) { T cu = 0; for (int d = 0; d < DIM; ++d) { - cu += latticeVelocity2D[i][d] * vel.v[d]; + if constexpr (DIM == 2) { + cu += latticeVelocity2D[i][d] * vel.v[d]; + } else { + cu += latticeVelocity3D[i][d] * vel.v[d]; + } } //equilibrium T feq = rho * t.t[i] * (1. + cu + 0.5 * cu * cu - usqr); @@ -526,21 +530,53 @@ void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, stream(grid, level, num_levels, fout, fin, containers); } -Neon::float_3d mapToCube(Neon::index_3d p, Neon::index_3d dim) -{ - //map p to an axis-aligned cube from -1 to 1 - Neon::float_3d half_dim = dim.newType() * 0.5; - Neon::float_3d ret = (p.newType() - half_dim) / half_dim; - return ret; -} -inline float sdfCube(Neon::float_3d p, float b = 1.0) + +inline float sdfCube(Neon::index_3d id, Neon::index_3d dim, float b = 1.0) { + auto mapToCube = [&](Neon::index_3d id) { + //map p to an axis-aligned cube from -1 to 1 + Neon::float_3d half_dim = dim.newType() * 0.5; + Neon::float_3d ret = (id.newType() - half_dim) / half_dim; + return ret; + }; + Neon::float_3d p = mapToCube(id); + Neon::float_3d d(std::abs(p.x) - b, std::abs(p.y) - b, std::abs(p.z) - b); Neon::float_3d d_max(std::max(d.x, 0.f), std::max(d.y, 0.f), std::max(d.z, 0.f)); float len = std::sqrt(d_max.x * d_max.x + d_max.y * d_max.y + d_max.z * d_max.z); return std::min(std::max(d.x, std::max(d.y, d.z)), 0.f) + len; } +template +void postProcess(Neon::domain::mGrid& grid, + const int num_levels, + const Neon::domain::mGrid::Field& fpop, + Neon::domain::mGrid::Field& vel, + Neon::domain::mGrid::Field& rho) +{ + fpop.updateIO(); + + for (int level = 0; level < num_levels; ++level) { + grid.getContainer( + "postProcess_" + std::to_string(level), level, + [&, level](Neon::set::Loader& loader) { + const auto& pop = fpop.load(loader, level, Neon::MultiResCompute::STENCIL); + auto u = vel.load(loader, level, Neon::MultiResCompute::MAP); + auto rh = rho.load(loader, level, Neon::MultiResCompute::MAP); + + return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { + + }; + }); + } + + vel.updateIO(); + rho.updateIO(); + + vel.ioToVtk("vel_", "vel"); + rho.ioToVtk("rho_", "rho"); +} + int main(int argc, char** argv) { Neon::init(); @@ -572,16 +608,16 @@ int main(int argc, char** argv) Neon::domain::mGrid grid( backend, grid_dim, {[&](const Neon::index_3d id) -> bool { - return sdfCube(mapToCube(id, grid_dim)) <= levelSDF[0] && - sdfCube(mapToCube(id, grid_dim)) > levelSDF[1]; + return sdfCube(id, grid_dim) <= levelSDF[0] && + sdfCube(id, grid_dim) > levelSDF[1]; }, [&](const Neon::index_3d& id) -> bool { - return sdfCube(mapToCube(id, grid_dim)) <= levelSDF[1] && - sdfCube(mapToCube(id, grid_dim)) > levelSDF[2]; + return sdfCube(id, grid_dim) <= levelSDF[1] && + sdfCube(id, grid_dim) > levelSDF[2]; }, [&](const Neon::index_3d& id) -> bool { - return sdfCube(mapToCube(id, grid_dim)) <= levelSDF[2] && - sdfCube(mapToCube(id, grid_dim)) > levelSDF[3]; + return sdfCube(id, grid_dim) <= levelSDF[2] && + sdfCube(id, grid_dim) > levelSDF[3]; }}, create_stencil(), descriptor); @@ -589,32 +625,62 @@ int main(int argc, char** argv) //LBM problem - const int max_iter = 300; - const T ulb = 0.01; - const T Re = 20; - const T clength = grid_dim.x; - const T visclb = ulb * clength / Re; - const T smagorinskyConstant = 0.02; - const T omega = 1.0 / (3. * visclb + 0.5); + const int max_iter = 300; + const T ulb = 0.02; + const T Re = 100; + const T clength = grid_dim.x; + const T visclb = ulb * clength / Re; + const T smagorinskyConstant = 0.02; + const T omega = 1.0 / (3. * visclb + 0.5); + const Neon::double_3d ulid(1.0, 0., 0.); auto fin = grid.newField("fin", Q, 0); auto fout = grid.newField("fout", Q, 0); + auto vel = grid.newField("vel", 3, 0); + auto rho = grid.newField("rho", 1, 0); + //TODO init fin and fout + constexpr auto t = latticeWeight(); + + auto init = [&](const Neon::int32_3d idx, const int q) { + T ret = t.t[q]; + + if (idx.x == 0 || idx.x == grid_dim.x - 1 || + idx.y == 0 || idx.y == grid_dim.y - 1 || + idx.z == 0 || idx.z == grid_dim.z - 1) { + + if (idx.x == grid_dim.x - 1) { + ret = 0; + for (int d = 0; d < DIM; ++d) { + if (DIM == 2) { + ret += latticeVelocity2D[q][d] * ulid.v[d]; + } else { + ret += latticeVelocity3D[q][d] * ulid.v[d]; + } + } + ret *= -6. * t.t[q] * ulb; + } else { + ret = 0; + } + } + return ret; + }; + for (int l = 0; l < descriptor.getDepth(); ++l) { fin.forEachActiveCell( l, - [&](const Neon::int32_3d, const int, T& val) { - val = 0; + [&](const Neon::int32_3d idx, const int q, T& val) { + val = init(idx, q); }); fout.forEachActiveCell( l, - [&](const Neon::int32_3d, const int, T& val) { - val = 0; + [&](const Neon::int32_3d idx, const int q, T& val) { + val = init(idx, q); }); } - fin.ioToVtk("sdf", "f"); + //fin.ioToVtk("sdf", "f"); fin.updateCompute(); fout.updateCompute(); @@ -631,7 +697,9 @@ int main(int argc, char** argv) skl.sequence(containers, "MultiResLBM"); //skl.ioToDot("MultiResLBM", "", true); - skl.run(); + for (int t = 0; t < max_iter; ++t) { + skl.run(); + } grid.getBackend().syncAll(); fin.updateIO(); From 31d7e97365c3d8492a35fb5b1b3c1a69a86bf267 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 31 Jan 2023 11:11:21 -0500 Subject: [PATCH 12/24] init pop, export velocity and density, and boundary treatment --- apps/lbmMultiRes/lbmMultiRes.cu | 426 +++++++++++------- .../domain/internal/bGrid/bPartition_imp.h | 2 +- 2 files changed, 272 insertions(+), 156 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 2251dd5f..44b8667d 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -2,6 +2,14 @@ #include "Neon/domain/mGrid.h" #include "Neon/skeleton/Skeleton.h" +enum CellType : int +{ + bounceBack = 0, + movingWall = 1, + bulk = 2, + undefined = 3, +}; + template Neon::domain::Stencil create_stencil(); @@ -21,54 +29,55 @@ Neon::domain::Stencil create_stencil<2, 9>() template <> Neon::domain::Stencil create_stencil<3, 19>() { - // filterCenterOut = false; return Neon::domain::Stencil::s19_t(false); } NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity2D[9][2] = { - {0, 0}, - {0, -1}, - {0, 1}, - {-1, 0}, - {-1, -1}, - {-1, 1}, - {1, 0}, - {1, -1}, - {1, 1}}; + {0, 0}, //0 -> 0 || 0000 -> 0000 + {0, -1}, //1 -> 2 || 0001 -> 0010 + {0, 1}, //2 -> 1 || 0010 -> 0001 + {-1, 0}, //3 -> 6 || 0100 -> 0110 + {-1, -1}, //4 -> 8 || 0100 -> 1000 + {-1, 1}, //5 -> 7 || 0101 -> 0111 + {1, 0}, //6 -> 3 || 0110 -> 0011 + {1, -1}, //7 -> 5 || 0111 -> 0101 + {1, 1}}; //8 -> 4 || 1000 -> 0100 NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity3D[27][3] = { - {0, 0, 0}, - {0, -1, 0}, - {0, 1, 0}, - {-1, 0, 0}, - {-1, -1, 0}, - {-1, 1, 0}, - {1, 0, 0}, - {1, -1, 0}, - {1, 1, 0}, - - {0, 0, -1}, - {0, -1, -1}, - {0, 1, -1}, - {-1, 0, -1}, - {-1, -1, -1}, - {-1, 1, -1}, - {1, 0, -1}, - {1, -1, -1}, - {1, 1, -1}, - - {0, 0, 1}, - {0, -1, 1}, - {0, 1, 1}, - {-1, 0, 1}, - {-1, -1, 1}, - {-1, 1, 1}, - {1, 0, 1}, - {1, -1, 1}, - {1, 1, 1} + {0, 0, 0}, //0 -> 0 || 00000 -> 00000 + {0, -1, 0}, //1 -> 2 || 00001 -> 00010 + {0, 1, 0}, //2 -> 1 || 00010 -> 00001 + {-1, 0, 0}, //3 -> 6 || 00100 -> 00110 + {-1, -1, 0}, //4 -> 8 || 00100 -> 01000 + {-1, 1, 0}, //5 -> 7 || 00101 -> 00111 + {1, 0, 0}, //6 -> 3 || 00110 -> 00011 + {1, -1, 0}, //7 -> 5 || 00111 -> 00101 + {1, 1, 0}, //8 -> 4 || 01000 -> 00100 + + {0, 0, -1}, //9 -> 18 || 01000 -> 10010 + {0, -1, -1}, //10 -> 20 || 01010 -> 10100 + {0, 1, -1}, //11 -> 19 || 01011 -> 10011 + {-1, 0, -1}, //12 -> 24 || 01100 -> 11000 + {-1, -1, -1}, //13 -> 26 || 01101 -> 11010 + {-1, 1, -1}, //14 -> 25 || 01110 -> 11001 + {1, 0, -1}, //15 -> 21 || 01111 -> 10101 + {1, -1, -1}, //16 -> 23 || 10000 -> 10111 + {1, 1, -1}, //17 -> 22 || 10001 -> 10110 + + {0, 0, 1}, //18 -> 9 || 10010 -> 01001 + {0, -1, 1}, //19 -> 11 || 10011 -> 01011 + {0, 1, 1}, //20 -> 10 || 10100 -> 01010 + {-1, 0, 1}, //21 -> 15 || 10101 -> 01111 + {-1, -1, 1}, //22 -> 17 || 10110 -> 10001 + {-1, 1, 1}, //23 -> 16 || 10111 -> 10000 + {1, 0, 1}, //24 -> 12 || 11000 -> 01100 + {1, -1, 1}, //25 -> 14 || 11001 -> 01110 + {1, 1, 1} //26 -> 13 || 11010 -> 01101 }; - +NEON_CUDA_DEVICE_ONLY static constexpr char latticeOppositeID[27] = { + //opposite q for 2d is a subset of what is in 3d so only use one + 0, 2, 1, 6, 8, 7, 3, 5, 4, 18, 20, 19, 24, 26, 25, 21, 23, 22, 9, 11, 10, 15, 17, 16, 12, 14, 13}; template NEON_CUDA_HOST_DEVICE Neon::int8_3d getDir(const int8_t q) @@ -142,9 +151,9 @@ NEON_CUDA_HOST_DEVICE inline Neon::int8_3d unlceOffset(const T& cell, const Neon template -NEON_CUDA_HOST_DEVICE T computeOmega(T omega0, int level, int num_levels) +NEON_CUDA_HOST_DEVICE T computeOmega(T omega0, int level, int numLevels) { - int ilevel = num_levels - level - 1; + int ilevel = numLevels - level - 1; // scalbln(1.0, x) = 2^x return 2 * omega0 / (scalbln(1.0, ilevel + 1) + (1. - scalbln(1.0, ilevel)) * omega0); } @@ -179,56 +188,61 @@ NEON_CUDA_HOST_DEVICE Neon::Vec_3d velocity(const T* fin, } template -Neon::set::Container collide(Neon::domain::mGrid& grid, - T omega0, - int level, - int num_levels, - const Neon::domain::mGrid::Field& fin, - Neon::domain::mGrid::Field& fout) +Neon::set::Container collide(Neon::domain::mGrid& grid, + T omega0, + int level, + int numLevels, + const Neon::domain::mGrid::Field& cellType, + const Neon::domain::mGrid::Field& fin, + Neon::domain::mGrid::Field& fout) { return grid.getContainer( "collide_" + std::to_string(level), level, - [&, level, omega0, num_levels](Neon::set::Loader& loader) { + [&, level, omega0, numLevels](Neon::set::Loader& loader) { + const auto& type = cellType.load(loader, level, Neon::MultiResCompute::MAP); const auto& in = fin.load(loader, level, Neon::MultiResCompute::MAP); auto out = fout.load(loader, level, Neon::MultiResCompute::MAP); - const T omega = computeOmega(omega0, level, num_levels); + const T omega = computeOmega(omega0, level, numLevels); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - if (!in.hasChildren(cell)) { + if (type(cell, 0) == CellType::bulk) { - constexpr auto t = latticeWeight(); + if (!in.hasChildren(cell)) { - //fin - T ins[Q]; - for (int i = 0; i < Q; ++i) { - ins[i] = in(cell, i); - } + constexpr auto t = latticeWeight(); - //density - T rho = 0; - for (int i = 0; i < Q; ++i) { - rho += ins[i]; - } + //fin + T ins[Q]; + for (int i = 0; i < Q; ++i) { + ins[i] = in(cell, i); + } - //velocity - const Neon::Vec_3d vel = velocity(ins, rho); + //density + T rho = 0; + for (int i = 0; i < Q; ++i) { + rho += ins[i]; + } + //velocity + const Neon::Vec_3d vel = velocity(ins, rho); - const T usqr = (3.0 / 2.0) * (vel.x * vel.x + vel.y * vel.y + vel.z * vel.z); - for (int i = 0; i < Q; ++i) { - T cu = 0; - for (int d = 0; d < DIM; ++d) { - if constexpr (DIM == 2) { - cu += latticeVelocity2D[i][d] * vel.v[d]; - } else { - cu += latticeVelocity3D[i][d] * vel.v[d]; + + const T usqr = (3.0 / 2.0) * (vel.x * vel.x + vel.y * vel.y + vel.z * vel.z); + for (int i = 0; i < Q; ++i) { + T cu = 0; + for (int d = 0; d < DIM; ++d) { + if constexpr (DIM == 2) { + cu += latticeVelocity2D[i][d] * vel.v[d]; + } else { + cu += latticeVelocity3D[i][d] * vel.v[d]; + } } - } - //equilibrium - T feq = rho * t.t[i] * (1. + cu + 0.5 * cu * cu - usqr); + //equilibrium + T feq = rho * t.t[i] * (1. + cu + 0.5 * cu * cu - usqr); - //collide - out(cell, i) = ins[i] - omega * (ins[i] - feq); + //collide + out(cell, i) = ins[i] - omega * (ins[i] - feq); + } } } }; @@ -236,10 +250,11 @@ Neon::set::Container collide(Neon::domain::mGrid& grid, } template -Neon::set::Container stream(Neon::domain::mGrid& grid, - int level, - const Neon::domain::mGrid::Field& fpop_postcollision, - Neon::domain::mGrid::Field& fpop_poststreaming) +Neon::set::Container stream(Neon::domain::mGrid& grid, + int level, + const Neon::domain::mGrid::Field& cellType, + const Neon::domain::mGrid::Field& postCollision, + Neon::domain::mGrid::Field& postStreaming) { //regular Streaming of the normal voxels at level L which are not interfaced with L+1 and L-1 levels. //This is "pull" stream @@ -247,22 +262,28 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, return grid.getContainer( "stream_" + std::to_string(level), level, [&, level](Neon::set::Loader& loader) { - const auto& fpost_col = fpop_postcollision.load(loader, level, Neon::MultiResCompute::STENCIL); - auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); + const auto& type = cellType.load(loader, level, Neon::MultiResCompute::STENCIL); + const auto& fpost_col = postCollision.load(loader, level, Neon::MultiResCompute::STENCIL); + auto fpost_stm = postStreaming.load(loader, level, Neon::MultiResCompute::MAP); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - //If this cell has children i.e., it is been refined, than we should not work on it - //because this cell is only there to allow query and not to operate on - if (!fpost_stm.hasChildren(cell)) { + if (type(cell, 0) == CellType::bulk) { + //If this cell has children i.e., it is been refined, than we should not work on it + //because this cell is only there to allow query and not to operate on + if (!fpost_stm.hasChildren(cell)) { - for (int8_t q = 0; q < Q; ++q) { - const Neon::int8_3d dir = -getDir(q); + for (int8_t q = 0; q < Q; ++q) { + const Neon::int8_3d dir = -getDir(q); - //if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction - if (!fpost_stm.hasChildren(cell, dir)) { - auto neighbor = fpost_col.nghVal(cell, dir, q, T(0)); - if (neighbor.isValid) { - fpost_stm(cell, q) = neighbor.value; + //if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction + if (!fpost_stm.hasChildren(cell, dir)) { + + if (type.nghVal(cell, dir, 0, CellType::undefined).value == CellType::bulk) { + fpost_stm(cell, q) = fpost_col.nghVal(cell, dir, q, T(0)).value; + } else { + const int8_t opposte_q = latticeOppositeID[q]; + fpost_stm(cell, q) = fpost_col(cell, opposte_q) + fpost_col.nghVal(cell, dir, opposte_q, T(0)).value; + } } } } @@ -274,8 +295,8 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, template Neon::set::Container explosionPull(Neon::domain::mGrid& grid, int level, - const Neon::domain::mGrid::Field& fpop_postcollision, - Neon::domain::mGrid::Field& fpop_poststreaming) + const Neon::domain::mGrid::Field& postCollision, + Neon::domain::mGrid::Field& postStreaming) { // Initiated by the fine level (hence "pull"), this function performs a coarse (level+1) to // fine (level) communication or "explosion" by simply distributing copies of coarse grid onto the fine grid. @@ -286,8 +307,8 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, return grid.getContainer( "Explosion_" + std::to_string(level), level, [&, level](Neon::set::Loader& loader) { - const auto& fpost_col = fpop_postcollision.load(loader, level, Neon::MultiResCompute::STENCIL_UP); - auto fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::MAP); + const auto& fpost_col = postCollision.load(loader, level, Neon::MultiResCompute::STENCIL_UP); + auto fpost_stm = postStreaming.load(loader, level, Neon::MultiResCompute::MAP); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { //If this cell has children i.e., it is been refined, that we should not work on it @@ -328,7 +349,7 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, template Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, int level, - Neon::domain::mGrid::Field& fpop_poststreaming) + Neon::domain::mGrid::Field& postStreaming) { // Initiated by the coarse level (hence "pull"), this function simply read the missing population // across the interface between coarse<->fine boundary by reading the population prepare during the store() @@ -336,7 +357,7 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, return grid.getContainer( "Coalescence" + std::to_string(level), level, [&, level](Neon::set::Loader& loader) { - auto& fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); + auto& fpost_stm = postStreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { //If this cell has children i.e., it is been refined, than we should not work on it @@ -363,7 +384,7 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, template Neon::set::Container store(Neon::domain::mGrid& grid, int level, - Neon::domain::mGrid::Field& fpop_poststreaming) + Neon::domain::mGrid::Field& postStreaming) { //Initiated by the coarse level (level), this function prepares and stores the fine (level - 1) // information for further pulling initiated by the coarse (this) level invoked by coalescence_pull @@ -379,7 +400,7 @@ Neon::set::Container store(Neon::domain::mGrid& grid, return grid.getContainer( "store_" + std::to_string(level), level, [&, level](Neon::set::Loader& loader) { - auto& fpost_stm = fpop_poststreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); + auto& fpost_stm = postStreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { //if the cell is refined, we might need to store something in it for its neighbor @@ -453,81 +474,83 @@ Neon::set::Container store(Neon::domain::mGrid& grid, template -void stream(Neon::domain::mGrid& grid, - int level, - const int num_levels, - const Neon::domain::mGrid::Field& fpop_postcollision, - Neon::domain::mGrid::Field& fpop_poststreaming, - std::vector& containers) +void stream(Neon::domain::mGrid& grid, + int level, + const int numLevels, + const Neon::domain::mGrid::Field& cellType, + const Neon::domain::mGrid::Field& postCollision, + Neon::domain::mGrid::Field& postStreaming, + std::vector& containers) { - containers.push_back(stream(grid, level, fpop_postcollision, fpop_poststreaming)); + containers.push_back(stream(grid, level, cellType, postCollision, postStreaming)); /* * Streaming for interface voxels that have * (i) coarser or (ii) finer neighbors at level+1 and level-1 and hence require * (i) "explosion" or (ii) coalescence */ - if (level != num_levels - 1) { + if (level != numLevels - 1) { /* Explosion: pull missing populations from coarser neighbors by copying coarse (level+1) to fine (level) * neighbors, initiated by the fine level ("Pull"). */ - containers.push_back(explosionPull(grid, level, fpop_postcollision, fpop_poststreaming)); + containers.push_back(explosionPull(grid, level, postCollision, postStreaming)); } if (level != 0) { /* Coalescence: pull missing populations from finer neighbors by "smart" averaging fine (level-1) * to coarse (level) communication, initiated by the coarse level ("Pull"). */ - containers.push_back(coalescencePull(grid, level, fpop_poststreaming)); + containers.push_back(coalescencePull(grid, level, postStreaming)); } } template -void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, - const T omega0, - const int level, - const int num_levels, - Neon::domain::mGrid::Field& fin, - Neon::domain::mGrid::Field& fout, - std::vector& containers) +void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, + const T omega0, + const int level, + const int numLevels, + const Neon::domain::mGrid::Field& cellType, + Neon::domain::mGrid::Field& fin, + Neon::domain::mGrid::Field& fout, + std::vector& containers) { // 1) collision for all voxels at level L=level - containers.push_back(collide(grid, omega0, level, num_levels, fin, fout)); + containers.push_back(collide(grid, omega0, level, numLevels, cellType, fin, fout)); // 2) Storing fine (level - 1) data for later "coalescence" pulled by the coarse (level) - if (level != num_levels - 1) { + if (level != numLevels - 1) { containers.push_back(store(grid, level + 1, fout)); } // 3) recurse down if (level != 0) { - nonUniformTimestepRecursive(grid, omega0, level - 1, num_levels, fin, fout, containers); + nonUniformTimestepRecursive(grid, omega0, level - 1, numLevels, cellType, fin, fout, containers); } // 4) Streaming step that also performs the necessary "explosion" and "coalescence" steps. - stream(grid, level, num_levels, fout, fin, containers); + stream(grid, level, numLevels, cellType, fout, fin, containers); // 5) stop - if (level == num_levels - 1) { + if (level == numLevels - 1) { return; } // 6) collision for all voxels at level L = level - containers.push_back(collide(grid, omega0, level, num_levels, fin, fout)); + containers.push_back(collide(grid, omega0, level, numLevels, cellType, fin, fout)); // 7) Storing fine(level) data for later "coalescence" pulled by the coarse(level) - if (level != num_levels - 1) { + if (level != numLevels - 1) { containers.push_back(store(grid, level + 1, fout)); } // 8) recurse down if (level != 0) { - nonUniformTimestepRecursive(grid, omega0, level - 1, num_levels, fin, fout, containers); + nonUniformTimestepRecursive(grid, omega0, level - 1, numLevels, cellType, fin, fout, containers); } // 9) Streaming step - stream(grid, level, num_levels, fout, fin, containers); + stream(grid, level, numLevels, cellType, fout, fin, containers); } @@ -548,30 +571,94 @@ inline float sdfCube(Neon::index_3d id, Neon::index_3d dim, float b = 1.0) } template -void postProcess(Neon::domain::mGrid& grid, - const int num_levels, - const Neon::domain::mGrid::Field& fpop, - Neon::domain::mGrid::Field& vel, - Neon::domain::mGrid::Field& rho) +void postProcess(Neon::domain::mGrid& grid, + const int numLevels, + const Neon::domain::mGrid::Field& fpop, + const Neon::domain::mGrid::Field& cellType, + Neon::domain::mGrid::Field& vel, + Neon::domain::mGrid::Field& rho) { - fpop.updateIO(); + //fpop.updateIO(); + + for (int level = 0; level < numLevels; ++level) { + auto container = + grid.getContainer( + "postProcess_" + std::to_string(level), level, + [&, level](Neon::set::Loader& loader) { + const auto& pop = fpop.load(loader, level, Neon::MultiResCompute::STENCIL); + const auto& type = cellType.load(loader, level, Neon::MultiResCompute::MAP); + auto& u = vel.load(loader, level, Neon::MultiResCompute::MAP); + auto& rh = rho.load(loader, level, Neon::MultiResCompute::MAP); - for (int level = 0; level < num_levels; ++level) { - grid.getContainer( - "postProcess_" + std::to_string(level), level, - [&, level](Neon::set::Loader& loader) { - const auto& pop = fpop.load(loader, level, Neon::MultiResCompute::STENCIL); - auto u = vel.load(loader, level, Neon::MultiResCompute::MAP); - auto rh = rho.load(loader, level, Neon::MultiResCompute::MAP); - return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { + return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { + constexpr auto t = latticeWeight(); - }; - }); + if (type(cell, 0) == CellType::bulk) { + + //fin + T ins[Q]; + for (int i = 0; i < Q; ++i) { + ins[i] = pop(cell, i); + } + + //density + T r = 0; + for (int i = 0; i < Q; ++i) { + r += ins[i]; + } + rh(cell, 0) = r; + + //velocity + const Neon::Vec_3d vel = velocity(ins, r); + + u(cell, 0) = vel.v[0]; + u(cell, 1) = vel.v[1]; + u(cell, 2) = vel.v[2]; + + + /*T st[Q]; + T r = 0; + if (!pop.hasChildren(cell)) { + assert(!pop.hasChildren(cell)); + for (int8_t q = 0; q < Q; ++q) { + const Neon::int8_3d dir = -getDir(q); + if (!pop.hasChildren(cell, dir)) { + auto neighbor = pop.nghVal(cell, dir, q, T(0)); + if (neighbor.isValid) { + st[q] = neighbor.value; + r += st[q]; + } + } + } + + rh(cell, 0) = r; + + const Neon::Vec_3d vel = velocity(ins, rho); + + for (int d = 0; d < DIM; ++d) { + u(cell, d) = vel.v[d]; + } + }*/ + } + if (type(cell, 0) == CellType::movingWall) { + rh(cell, 0) = 1.0; + + for (int d = 0; d < DIM; ++d) { + int i = (d == 0) ? 3 : ((d == 1) ? 1 : 9); + u(cell, d) = pop(cell, i) / (6.0 * 1.0 / 18.0); + } + } + }; + }); + + container.run(0); } - vel.updateIO(); - rho.updateIO(); + grid.getBackend().syncAll(); + + //vel.updateIO(); + //rho.updateIO(); vel.ioToVtk("vel_", "vel"); rho.ioToVtk("rho_", "rho"); @@ -593,7 +680,7 @@ int main(int argc, char** argv) constexpr int Q = (DIM == 2) ? 9 : 19; constexpr int depth = 3; - const Neon::index_3d grid_dim(24, 24, 24); + const Neon::index_3d grid_dim(24, 24, 4); const Neon::domain::mGridDescriptor descriptor(depth); @@ -632,15 +719,38 @@ int main(int argc, char** argv) const T visclb = ulb * clength / Re; const T smagorinskyConstant = 0.02; const T omega = 1.0 / (3. * visclb + 0.5); - const Neon::double_3d ulid(1.0, 0., 0.); + const Neon::double_3d ulid(ulb, 0., 0.); + //alloc fields auto fin = grid.newField("fin", Q, 0); auto fout = grid.newField("fout", Q, 0); + auto cellType = grid.newField("CellType", 1, CellType::bulk); auto vel = grid.newField("vel", 3, 0); auto rho = grid.newField("rho", 1, 0); - //TODO init fin and fout + //classify voxels + for (int l = 0; l < descriptor.getDepth(); ++l) { + cellType.forEachActiveCell( + l, + [&](const Neon::int32_3d idx, const int q, CellType& val) { + val = CellType::bulk; + if (idx.x == 0 || idx.x == grid_dim.x - 1 || + idx.y == 0 || idx.y == grid_dim.y - 1 || + idx.z == 0 || idx.z == grid_dim.z - 1) { + + val = CellType::bounceBack; + + if (idx.y == grid_dim.y - 1) { + val = CellType::movingWall; + } + } + }); + } + cellType.updateCompute(); + + + // init fin and fout constexpr auto t = latticeWeight(); auto init = [&](const Neon::int32_3d idx, const int q) { @@ -650,7 +760,7 @@ int main(int argc, char** argv) idx.y == 0 || idx.y == grid_dim.y - 1 || idx.z == 0 || idx.z == grid_dim.z - 1) { - if (idx.x == grid_dim.x - 1) { + if (idx.y == grid_dim.y - 1) { ret = 0; for (int d = 0; d < DIM; ++d) { if (DIM == 2) { @@ -679,30 +789,36 @@ int main(int argc, char** argv) val = init(idx, q); }); } - - //fin.ioToVtk("sdf", "f"); - fin.updateCompute(); fout.updateCompute(); - std::vector containers; + //fin.ioToVtk("fin", "f"); + postProcess(grid, descriptor.getDepth(), fout, cellType, vel, rho); + + //skeleton + std::vector containers; nonUniformTimestepRecursive(grid, omega, descriptor.getDepth() - 1, descriptor.getDepth(), + cellType, fin, fout, containers); Neon::skeleton::Skeleton skl(grid.getBackend()); skl.sequence(containers, "MultiResLBM"); //skl.ioToDot("MultiResLBM", "", true); + //execution for (int t = 0; t < max_iter; ++t) { skl.run(); + if (t % 10 == 0) { + postProcess(grid, descriptor.getDepth(), fout, cellType, vel, rho); + } } grid.getBackend().syncAll(); - fin.updateIO(); - fout.updateIO(); + //fin.updateIO(); + //fout.updateIO(); } } \ No newline at end of file diff --git a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h index 8b31e6a8..338617c2 100644 --- a/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/bGrid/bPartition_imp.h @@ -13,7 +13,7 @@ bPartition::bPartition() mNeighbourBlocks(nullptr), mOrigin(nullptr), mMask(nullptr), - mOutsideValue(0), + mOutsideValue(T()), mStencilNghIndex(nullptr), mIsInSharedMem(false), mMemSharedMem(nullptr), From 0294cbd8766fa21f53f19f91b2722abc2b8332b8 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 31 Jan 2023 18:01:52 -0500 Subject: [PATCH 13/24] problem setup --- apps/lbmMultiRes/lbmMultiRes.cu | 75 +++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 44b8667d..7ee8de08 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -27,9 +27,9 @@ Neon::domain::Stencil create_stencil<2, 9>() } template <> -Neon::domain::Stencil create_stencil<3, 19>() +Neon::domain::Stencil create_stencil<3, 27>() { - return Neon::domain::Stencil::s19_t(false); + return Neon::domain::Stencil::s27_t(false); } NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity2D[9][2] = { @@ -112,9 +112,9 @@ struct latticeWeight if constexpr (DIM == 3) { for (int i = 0; i < Q; ++i) { - if (latticeVelocity2D[i][0] * latticeVelocity2D[i][0] + - latticeVelocity2D[i][1] * latticeVelocity2D[i][1] + - latticeVelocity2D[i][2] * latticeVelocity2D[i][2] < + if (latticeVelocity3D[i][0] * latticeVelocity3D[i][0] + + latticeVelocity3D[i][1] * latticeVelocity3D[i][1] + + latticeVelocity3D[i][2] * latticeVelocity3D[i][2] < 1.1f) { t[i] = 2.0f / 36.0f; } else { @@ -554,7 +554,7 @@ void nonUniformTimestepRecursive(Neon::domain::mGrid& gri } -inline float sdfCube(Neon::index_3d id, Neon::index_3d dim, float b = 1.0) +inline float sdfCube(Neon::index_3d id, Neon::index_3d dim, Neon::float_3d b = {1.0, 1.0, 1.0}) { auto mapToCube = [&](Neon::index_3d id) { //map p to an axis-aligned cube from -1 to 1 @@ -564,10 +564,12 @@ inline float sdfCube(Neon::index_3d id, Neon::index_3d dim, float b = 1.0) }; Neon::float_3d p = mapToCube(id); - Neon::float_3d d(std::abs(p.x) - b, std::abs(p.y) - b, std::abs(p.z) - b); + Neon::float_3d d(std::abs(p.x) - b.x, std::abs(p.y) - b.y, std::abs(p.z) - b.z); + Neon::float_3d d_max(std::max(d.x, 0.f), std::max(d.y, 0.f), std::max(d.z, 0.f)); float len = std::sqrt(d_max.x * d_max.x + d_max.y * d_max.y + d_max.z * d_max.z); - return std::min(std::max(d.x, std::max(d.y, d.z)), 0.f) + len; + float val = std::min(std::max(d.x, std::max(d.y, d.z)), 0.f) + len; + return val; } template @@ -575,6 +577,7 @@ void postProcess(Neon::domain::mGrid& grid, const int numLevels, const Neon::domain::mGrid::Field& fpop, const Neon::domain::mGrid::Field& cellType, + const int iteration, Neon::domain::mGrid::Field& vel, Neon::domain::mGrid::Field& rho) { @@ -660,8 +663,8 @@ void postProcess(Neon::domain::mGrid& grid, //vel.updateIO(); //rho.updateIO(); - vel.ioToVtk("vel_", "vel"); - rho.ioToVtk("rho_", "rho"); + vel.ioToVtk("vel_" + std::to_string(iteration), "vel"); + rho.ioToVtk("rho_" + std::to_string(iteration), "rho"); } int main(int argc, char** argv) @@ -676,35 +679,41 @@ int main(int argc, char** argv) std::vector gpu_ids{0}; Neon::Backend backend(gpu_ids, runtime); - constexpr int DIM = 2; - constexpr int Q = (DIM == 2) ? 9 : 19; + constexpr int DIM = 3; + constexpr int Q = (DIM == 2) ? 9 : 27; constexpr int depth = 3; - const Neon::index_3d grid_dim(24, 24, 4); + const Neon::domain::mGridDescriptor descriptor(depth); + //const Neon::index_3d grid_dim(144, 144, 144); + //float levelSDF[depth + 1]; + //levelSDF[0] = 0; + //levelSDF[1] = -28.0 / 144.0; + //levelSDF[2] = -56.0 / 144.0; + //levelSDF[3] = -1.0; - const Neon::domain::mGridDescriptor descriptor(depth); - float levelSDF[depth + 1]; + const Neon::index_3d grid_dim(24, 24, 24); + float levelSDF[depth + 1]; levelSDF[0] = 0; - levelSDF[1] = -0.3; - levelSDF[2] = -0.6; + levelSDF[1] = -8 / 24.0; + levelSDF[2] = -16 / 24.0; levelSDF[3] = -1.0; Neon::domain::mGrid grid( backend, grid_dim, {[&](const Neon::index_3d id) -> bool { - return sdfCube(id, grid_dim) <= levelSDF[0] && - sdfCube(id, grid_dim) > levelSDF[1]; + return sdfCube(id, grid_dim - 1) <= levelSDF[0] && + sdfCube(id, grid_dim - 1) > levelSDF[1]; }, [&](const Neon::index_3d& id) -> bool { - return sdfCube(id, grid_dim) <= levelSDF[1] && - sdfCube(id, grid_dim) > levelSDF[2]; + return sdfCube(id, grid_dim - 1) <= levelSDF[1] && + sdfCube(id, grid_dim - 1) > levelSDF[2]; }, [&](const Neon::index_3d& id) -> bool { - return sdfCube(id, grid_dim) <= levelSDF[2] && - sdfCube(id, grid_dim) > levelSDF[3]; + return sdfCube(id, grid_dim - 1) <= levelSDF[2] && + sdfCube(id, grid_dim - 1) > levelSDF[3]; }}, create_stencil(), descriptor); @@ -712,12 +721,11 @@ int main(int argc, char** argv) //LBM problem - const int max_iter = 300; - const T ulb = 0.02; - const T Re = 100; + const int max_iter = 3000; + const T ulb = 0.04; + const T Re = 1000; const T clength = grid_dim.x; const T visclb = ulb * clength / Re; - const T smagorinskyConstant = 0.02; const T omega = 1.0 / (3. * visclb + 0.5); const Neon::double_3d ulid(ulb, 0., 0.); @@ -788,14 +796,18 @@ int main(int argc, char** argv) [&](const Neon::int32_3d idx, const int q, T& val) { val = init(idx, q); }); + vel.forEachActiveCell(l, [&](const Neon::int32_3d, const int, T& val) { + val = 0; + }); + rho.forEachActiveCell(l, [&](const Neon::int32_3d, const int, T& val) { + val = 0; + }); } fin.updateCompute(); fout.updateCompute(); //fin.ioToVtk("fin", "f"); - postProcess(grid, descriptor.getDepth(), fout, cellType, vel, rho); - //skeleton std::vector containers; nonUniformTimestepRecursive(grid, @@ -812,9 +824,10 @@ int main(int argc, char** argv) //execution for (int t = 0; t < max_iter; ++t) { skl.run(); - if (t % 10 == 0) { - postProcess(grid, descriptor.getDepth(), fout, cellType, vel, rho); + if (t % 100 == 0) { + postProcess(grid, descriptor.getDepth(), fout, cellType, t, vel, rho); } + } grid.getBackend().syncAll(); From 6c7b0db83adac7b408545697b723fca355924ffa Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 31 Jan 2023 19:05:25 -0500 Subject: [PATCH 14/24] remove 2d-related bits --- apps/lbmMultiRes/lbmMultiRes.cu | 237 ++++++++++++++------------------ 1 file changed, 106 insertions(+), 131 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 7ee8de08..47036ad7 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -10,29 +10,7 @@ enum CellType : int undefined = 3, }; -template -Neon::domain::Stencil create_stencil(); - -template <> -Neon::domain::Stencil create_stencil<2, 9>() -{ - std::vector stencil; - stencil.reserve(9); - for (int x = -1; x <= 1; ++x) { - for (int y = -1; y <= 1; ++y) { - stencil.emplace_back(Neon::index_3d(x, y, 0)); - } - } - return Neon::domain::Stencil(stencil); -} - -template <> -Neon::domain::Stencil create_stencil<3, 27>() -{ - return Neon::domain::Stencil::s27_t(false); -} - -NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity2D[9][2] = { +/*NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity2D[9][2] = { {0, 0}, //0 -> 0 || 0000 -> 0000 {0, -1}, //1 -> 2 || 0001 -> 0010 {0, 1}, //2 -> 1 || 0010 -> 0001 @@ -41,9 +19,9 @@ NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity2D[9][2] = { {-1, 1}, //5 -> 7 || 0101 -> 0111 {1, 0}, //6 -> 3 || 0110 -> 0011 {1, -1}, //7 -> 5 || 0111 -> 0101 - {1, 1}}; //8 -> 4 || 1000 -> 0100 + {1, 1}}; //8 -> 4 || 1000 -> 0100*/ -NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity3D[27][3] = { +/*NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity3D[27][3] = { {0, 0, 0}, //0 -> 0 || 00000 -> 00000 {0, -1, 0}, //1 -> 2 || 00001 -> 00010 {0, 1, 0}, //2 -> 1 || 00010 -> 00001 @@ -75,53 +53,70 @@ NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity3D[27][3] = { {1, 1, 1} //26 -> 13 || 11010 -> 01101 }; + NEON_CUDA_DEVICE_ONLY static constexpr char latticeOppositeID[27] = { //opposite q for 2d is a subset of what is in 3d so only use one - 0, 2, 1, 6, 8, 7, 3, 5, 4, 18, 20, 19, 24, 26, 25, 21, 23, 22, 9, 11, 10, 15, 17, 16, 12, 14, 13}; + 0, 2, 1, 6, 8, 7, 3, 5, 4, 18, 20, 19, 24, 26, 25, 21, 23, 22, 9, 11, 10, 15, 17, 16, 12, 14, 13};*/ + +NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity3D[19][3] = { + {0, 0, 0}, //0 -> 0 || 00000 -> 00000 + {0, -1, 0}, //1 -> 2 || 00001 -> 00010 + {0, 1, 0}, //2 -> 1 || 00010 -> 00001 + {-1, 0, 0}, //3 -> 6 || 00100 -> 00110 + {-1, -1, 0}, //4 -> 8 || 00100 -> 01000 + {-1, 1, 0}, //5 -> 7 || 00101 -> 00111 + {1, 0, 0}, //6 -> 3 || 00110 -> 00011 + {1, -1, 0}, //7 -> 5 || 00111 -> 00101 + {1, 1, 0}, //8 -> 4 || 01000 -> 00100 + + {0, 0, -1}, //9 -> 18 || 01000 -> 10010 + {0, -1, -1}, //10 -> 20 || 01010 -> 10100 + {0, 1, -1}, //11 -> 19 || 01011 -> 10011 + {-1, 0, -1}, //12 -> 24 || 01100 -> 11000 + //{-1, -1, -1},//13 -> 26 || 01101 -> 11010 + //{-1, 1, -1}, //14 -> 25 || 01110 -> 11001 + {1, 0, -1}, //15 -> 21 || 01111 -> 10101 + //{1, -1, -1}, //16 -> 23 || 10000 -> 10111 + //{1, 1, -1}, //17 -> 22 || 10001 -> 10110 + + + {0, 0, 1}, //18 -> 9 || 10010 -> 01001 + {0, -1, 1}, //19 -> 11 || 10011 -> 01011 + {0, 1, 1}, //20 -> 10 || 10100 -> 01010 + {-1, 0, 1}, //21 -> 15 || 10101 -> 01111 + //{-1, -1, 1},//22 -> 17 || 10110 -> 10001 + //{-1, 1, 1}, //23 -> 16 || 10111 -> 10000 + {1, 0, 1}, //24 -> 12 || 11000 -> 01100 + //{1, -1, 1}, //25 -> 14 || 11001 -> 01110 + //{1, 1, 1} //26 -> 13 || 11010 -> 01101 + +}; +NEON_CUDA_DEVICE_ONLY static constexpr char latticeOppositeID[19] = { + //opposite q for 2d is a subset of what is in 3d so only use one + 0, 2, 1, 6, 8, 7, 3, 5, 4, 18, 20, 19, 24, 21, 9, 11, 10, 15, 12}; + -template NEON_CUDA_HOST_DEVICE Neon::int8_3d getDir(const int8_t q) { - if constexpr (DIM == 2) { - return Neon::int8_3d(latticeVelocity2D[q][0], latticeVelocity2D[q][1], 0); - } - if constexpr (DIM == 3) { - return Neon::int8_3d(latticeVelocity3D[q][0], latticeVelocity3D[q][1], latticeVelocity3D[q][2]); - } + return Neon::int8_3d(latticeVelocity3D[q][0], latticeVelocity3D[q][1], latticeVelocity3D[q][2]); } -template +template struct latticeWeight { NEON_CUDA_HOST_DEVICE __inline__ constexpr latticeWeight() : t() { - if constexpr (DIM == 2) { - - for (int i = 0; i < Q; ++i) { - if (latticeVelocity2D[i][0] * latticeVelocity2D[i][0] + - latticeVelocity2D[i][1] * latticeVelocity2D[i][1] < - 1.1f) { - t[i] = 1.0f / 9.0f; - } else { - t[i] = 1.0f / 36.0f; - } + t[0] = 1.0f / 3.0f; + for (int i = 1; i < Q; ++i) { + if (latticeVelocity3D[i][0] * latticeVelocity3D[i][0] + + latticeVelocity3D[i][1] * latticeVelocity3D[i][1] + + latticeVelocity3D[i][2] * latticeVelocity3D[i][2] < + 1.1f) { + t[i] = 2.0f / 36.0f; + } else { + t[i] = 1.0f / 36.0f; } - t[0] = 4.0f / 9.0f; - } - - if constexpr (DIM == 3) { - for (int i = 0; i < Q; ++i) { - if (latticeVelocity3D[i][0] * latticeVelocity3D[i][0] + - latticeVelocity3D[i][1] * latticeVelocity3D[i][1] + - latticeVelocity3D[i][2] * latticeVelocity3D[i][2] < - 1.1f) { - t[i] = 2.0f / 36.0f; - } else { - t[i] = 1.0f / 36.0f; - } - } - t[0] = 1.0f / 3.0f; } } float t[Q]; @@ -158,36 +153,26 @@ NEON_CUDA_HOST_DEVICE T computeOmega(T omega0, int level, int numLevels) return 2 * omega0 / (scalbln(1.0, ilevel + 1) + (1. - scalbln(1.0, ilevel)) * omega0); } -template +template NEON_CUDA_HOST_DEVICE Neon::Vec_3d velocity(const T* fin, const T rho) { Neon::Vec_3d vel(0, 0, 0); - if constexpr (DIM == 2) { - for (int i = 0; i < Q; ++i) { - const T f = fin[i]; - for (int d = 0; d < DIM; ++d) { - vel.v[d] += f * latticeVelocity2D[i][d]; - } - } - } - if constexpr (DIM == 3) { - for (int i = 0; i < Q; ++i) { - const T f = fin[i]; - for (int d = 0; d < DIM; ++d) { - vel.v[d] += f * latticeVelocity3D[i][d]; - } + for (int i = 0; i < Q; ++i) { + const T f = fin[i]; + for (int d = 0; d < 3; ++d) { + vel.v[d] += f * latticeVelocity3D[i][d]; } } - for (int d = 0; d < DIM; ++d) { + for (int d = 0; d < 3; ++d) { vel.v[d] /= rho; } return vel; } -template +template Neon::set::Container collide(Neon::domain::mGrid& grid, T omega0, int level, @@ -209,7 +194,7 @@ Neon::set::Container collide(Neon::domain::mGrid& grid, if (!in.hasChildren(cell)) { - constexpr auto t = latticeWeight(); + constexpr auto t = latticeWeight(); //fin T ins[Q]; @@ -224,18 +209,14 @@ Neon::set::Container collide(Neon::domain::mGrid& grid, } //velocity - const Neon::Vec_3d vel = velocity(ins, rho); + const Neon::Vec_3d vel = velocity(ins, rho); const T usqr = (3.0 / 2.0) * (vel.x * vel.x + vel.y * vel.y + vel.z * vel.z); for (int i = 0; i < Q; ++i) { T cu = 0; - for (int d = 0; d < DIM; ++d) { - if constexpr (DIM == 2) { - cu += latticeVelocity2D[i][d] * vel.v[d]; - } else { - cu += latticeVelocity3D[i][d] * vel.v[d]; - } + for (int d = 0; d < 3; ++d) { + cu += latticeVelocity3D[i][d] * vel.v[d]; } //equilibrium T feq = rho * t.t[i] * (1. + cu + 0.5 * cu * cu - usqr); @@ -249,7 +230,7 @@ Neon::set::Container collide(Neon::domain::mGrid& grid, }); } -template +template Neon::set::Container stream(Neon::domain::mGrid& grid, int level, const Neon::domain::mGrid::Field& cellType, @@ -273,7 +254,7 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, if (!fpost_stm.hasChildren(cell)) { for (int8_t q = 0; q < Q; ++q) { - const Neon::int8_3d dir = -getDir(q); + const Neon::int8_3d dir = -getDir(q); //if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction if (!fpost_stm.hasChildren(cell, dir)) { @@ -292,7 +273,7 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, }); } -template +template Neon::set::Container explosionPull(Neon::domain::mGrid& grid, int level, const Neon::domain::mGrid::Field& postCollision, @@ -316,7 +297,7 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, if (!fpost_stm.hasChildren(cell)) { for (int8_t q = 1; q < Q; ++q) { - const Neon::int8_3d dir = -getDir(q); + const Neon::int8_3d dir = -getDir(q); //if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction //we want to only work on cells that interface with L+1 (coarse) cell along q @@ -346,7 +327,7 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, } -template +template Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, int level, Neon::domain::mGrid::Field& postStreaming) @@ -365,7 +346,7 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, if (!fpost_stm.hasChildren(cell)) { for (int q = 1; q < Q; ++q) { - const Neon::int8_3d dir = -getDir(q); + const Neon::int8_3d dir = -getDir(q); //if we have a neighbor at the same level that has been refined, then cell is on //the interface and this is where we should do the coalescence if (fpost_stm.hasChildren(cell, dir)) { @@ -381,7 +362,7 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, } -template +template Neon::set::Container store(Neon::domain::mGrid& grid, int level, Neon::domain::mGrid::Field& postStreaming) @@ -416,7 +397,7 @@ Neon::set::Container store(Neon::domain::mGrid& grid, //for each direction aka for each neighbor //we skip the center here for (int8_t q = 1; q < Q; ++q) { - const Neon::int8_3d q_dir = getDir(q); + const Neon::int8_3d q_dir = getDir(q); //check if the neighbor in this direction has children auto neighborCell = fpost_stm.getNghCell(cell, q_dir); @@ -431,7 +412,7 @@ Neon::set::Container store(Neon::domain::mGrid& grid, //for every neighbor cell including the center cell (i.e., cell) for (int8_t p = 0; p < Q; ++p) { - const Neon::int8_3d p_dir = getDir(p); + const Neon::int8_3d p_dir = getDir(p); //relative direction of q w.r.t p //i.e., in which direction we should move starting from p to land on q @@ -473,7 +454,7 @@ Neon::set::Container store(Neon::domain::mGrid& grid, } -template +template void stream(Neon::domain::mGrid& grid, int level, const int numLevels, @@ -482,7 +463,7 @@ void stream(Neon::domain::mGrid& grid, Neon::domain::mGrid::Field& postStreaming, std::vector& containers) { - containers.push_back(stream(grid, level, cellType, postCollision, postStreaming)); + containers.push_back(stream(grid, level, cellType, postCollision, postStreaming)); /* * Streaming for interface voxels that have @@ -493,18 +474,18 @@ void stream(Neon::domain::mGrid& grid, /* Explosion: pull missing populations from coarser neighbors by copying coarse (level+1) to fine (level) * neighbors, initiated by the fine level ("Pull"). */ - containers.push_back(explosionPull(grid, level, postCollision, postStreaming)); + containers.push_back(explosionPull(grid, level, postCollision, postStreaming)); } if (level != 0) { /* Coalescence: pull missing populations from finer neighbors by "smart" averaging fine (level-1) * to coarse (level) communication, initiated by the coarse level ("Pull"). */ - containers.push_back(coalescencePull(grid, level, postStreaming)); + containers.push_back(coalescencePull(grid, level, postStreaming)); } } -template +template void nonUniformTimestepRecursive(Neon::domain::mGrid& grid, const T omega0, const int level, @@ -515,21 +496,21 @@ void nonUniformTimestepRecursive(Neon::domain::mGrid& gri std::vector& containers) { // 1) collision for all voxels at level L=level - containers.push_back(collide(grid, omega0, level, numLevels, cellType, fin, fout)); + containers.push_back(collide(grid, omega0, level, numLevels, cellType, fin, fout)); // 2) Storing fine (level - 1) data for later "coalescence" pulled by the coarse (level) if (level != numLevels - 1) { - containers.push_back(store(grid, level + 1, fout)); + containers.push_back(store(grid, level + 1, fout)); } // 3) recurse down if (level != 0) { - nonUniformTimestepRecursive(grid, omega0, level - 1, numLevels, cellType, fin, fout, containers); + nonUniformTimestepRecursive(grid, omega0, level - 1, numLevels, cellType, fin, fout, containers); } // 4) Streaming step that also performs the necessary "explosion" and "coalescence" steps. - stream(grid, level, numLevels, cellType, fout, fin, containers); + stream(grid, level, numLevels, cellType, fout, fin, containers); // 5) stop if (level == numLevels - 1) { @@ -537,20 +518,20 @@ void nonUniformTimestepRecursive(Neon::domain::mGrid& gri } // 6) collision for all voxels at level L = level - containers.push_back(collide(grid, omega0, level, numLevels, cellType, fin, fout)); + containers.push_back(collide(grid, omega0, level, numLevels, cellType, fin, fout)); // 7) Storing fine(level) data for later "coalescence" pulled by the coarse(level) if (level != numLevels - 1) { - containers.push_back(store(grid, level + 1, fout)); + containers.push_back(store(grid, level + 1, fout)); } // 8) recurse down if (level != 0) { - nonUniformTimestepRecursive(grid, omega0, level - 1, numLevels, cellType, fin, fout, containers); + nonUniformTimestepRecursive(grid, omega0, level - 1, numLevels, cellType, fin, fout, containers); } // 9) Streaming step - stream(grid, level, numLevels, cellType, fout, fin, containers); + stream(grid, level, numLevels, cellType, fout, fin, containers); } @@ -572,7 +553,7 @@ inline float sdfCube(Neon::index_3d id, Neon::index_3d dim, Neon::float_3d b = { return val; } -template +template void postProcess(Neon::domain::mGrid& grid, const int numLevels, const Neon::domain::mGrid::Field& fpop, @@ -595,7 +576,7 @@ void postProcess(Neon::domain::mGrid& grid, return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - constexpr auto t = latticeWeight(); + constexpr auto t = latticeWeight(); if (type(cell, 0) == CellType::bulk) { @@ -613,7 +594,7 @@ void postProcess(Neon::domain::mGrid& grid, rh(cell, 0) = r; //velocity - const Neon::Vec_3d vel = velocity(ins, r); + const Neon::Vec_3d vel = velocity(ins, r); u(cell, 0) = vel.v[0]; u(cell, 1) = vel.v[1]; @@ -625,7 +606,7 @@ void postProcess(Neon::domain::mGrid& grid, if (!pop.hasChildren(cell)) { assert(!pop.hasChildren(cell)); for (int8_t q = 0; q < Q; ++q) { - const Neon::int8_3d dir = -getDir(q); + const Neon::int8_3d dir = -getDir(q); if (!pop.hasChildren(cell, dir)) { auto neighbor = pop.nghVal(cell, dir, q, T(0)); if (neighbor.isValid) { @@ -637,9 +618,9 @@ void postProcess(Neon::domain::mGrid& grid, rh(cell, 0) = r; - const Neon::Vec_3d vel = velocity(ins, rho); + const Neon::Vec_3d vel = velocity(ins, rho); - for (int d = 0; d < DIM; ++d) { + for (int d = 0; d < 3; ++d) { u(cell, d) = vel.v[d]; } }*/ @@ -647,7 +628,7 @@ void postProcess(Neon::domain::mGrid& grid, if (type(cell, 0) == CellType::movingWall) { rh(cell, 0) = 1.0; - for (int d = 0; d < DIM; ++d) { + for (int d = 0; d < 3; ++d) { int i = (d == 0) ? 3 : ((d == 1) ? 1 : 9); u(cell, d) = pop(cell, i) / (6.0 * 1.0 / 18.0); } @@ -679,8 +660,7 @@ int main(int argc, char** argv) std::vector gpu_ids{0}; Neon::Backend backend(gpu_ids, runtime); - constexpr int DIM = 3; - constexpr int Q = (DIM == 2) ? 9 : 27; + constexpr int Q = 19; constexpr int depth = 3; const Neon::domain::mGridDescriptor descriptor(depth); @@ -715,7 +695,7 @@ int main(int argc, char** argv) return sdfCube(id, grid_dim - 1) <= levelSDF[2] && sdfCube(id, grid_dim - 1) > levelSDF[3]; }}, - create_stencil(), descriptor); + Neon::domain::Stencil::s19_t(false), descriptor); //grid.topologyToVTK("lbm.vtk", false); @@ -759,7 +739,7 @@ int main(int argc, char** argv) // init fin and fout - constexpr auto t = latticeWeight(); + constexpr auto t = latticeWeight(); auto init = [&](const Neon::int32_3d idx, const int q) { T ret = t.t[q]; @@ -770,12 +750,8 @@ int main(int argc, char** argv) if (idx.y == grid_dim.y - 1) { ret = 0; - for (int d = 0; d < DIM; ++d) { - if (DIM == 2) { - ret += latticeVelocity2D[q][d] * ulid.v[d]; - } else { - ret += latticeVelocity3D[q][d] * ulid.v[d]; - } + for (int d = 0; d < 3; ++d) { + ret += latticeVelocity3D[q][d] * ulid.v[d]; } ret *= -6. * t.t[q] * ulb; } else { @@ -810,12 +786,12 @@ int main(int argc, char** argv) //skeleton std::vector containers; - nonUniformTimestepRecursive(grid, - omega, - descriptor.getDepth() - 1, - descriptor.getDepth(), - cellType, - fin, fout, containers); + nonUniformTimestepRecursive(grid, + omega, + descriptor.getDepth() - 1, + descriptor.getDepth(), + cellType, + fin, fout, containers); Neon::skeleton::Skeleton skl(grid.getBackend()); skl.sequence(containers, "MultiResLBM"); @@ -825,9 +801,8 @@ int main(int argc, char** argv) for (int t = 0; t < max_iter; ++t) { skl.run(); if (t % 100 == 0) { - postProcess(grid, descriptor.getDepth(), fout, cellType, t, vel, rho); + postProcess(grid, descriptor.getDepth(), fout, cellType, t, vel, rho); } - } grid.getBackend().syncAll(); From 4e1e083b0209cce029836eea425744ab3a2da7df Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 31 Jan 2023 20:59:51 -0500 Subject: [PATCH 15/24] fix initialization and stream bugs --- apps/lbmMultiRes/lbmMultiRes.cu | 187 ++++++++++++++------------------ 1 file changed, 80 insertions(+), 107 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 47036ad7..385400b6 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -59,41 +59,32 @@ NEON_CUDA_DEVICE_ONLY static constexpr char latticeOppositeID[27] = { 0, 2, 1, 6, 8, 7, 3, 5, 4, 18, 20, 19, 24, 26, 25, 21, 23, 22, 9, 11, 10, 15, 17, 16, 12, 14, 13};*/ NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity3D[19][3] = { - {0, 0, 0}, //0 -> 0 || 00000 -> 00000 - {0, -1, 0}, //1 -> 2 || 00001 -> 00010 - {0, 1, 0}, //2 -> 1 || 00010 -> 00001 - {-1, 0, 0}, //3 -> 6 || 00100 -> 00110 - {-1, -1, 0}, //4 -> 8 || 00100 -> 01000 - {-1, 1, 0}, //5 -> 7 || 00101 -> 00111 - {1, 0, 0}, //6 -> 3 || 00110 -> 00011 - {1, -1, 0}, //7 -> 5 || 00111 -> 00101 - {1, 1, 0}, //8 -> 4 || 01000 -> 00100 - - {0, 0, -1}, //9 -> 18 || 01000 -> 10010 - {0, -1, -1}, //10 -> 20 || 01010 -> 10100 - {0, 1, -1}, //11 -> 19 || 01011 -> 10011 - {-1, 0, -1}, //12 -> 24 || 01100 -> 11000 - //{-1, -1, -1},//13 -> 26 || 01101 -> 11010 - //{-1, 1, -1}, //14 -> 25 || 01110 -> 11001 - {1, 0, -1}, //15 -> 21 || 01111 -> 10101 - //{1, -1, -1}, //16 -> 23 || 10000 -> 10111 - //{1, 1, -1}, //17 -> 22 || 10001 -> 10110 - - - {0, 0, 1}, //18 -> 9 || 10010 -> 01001 - {0, -1, 1}, //19 -> 11 || 10011 -> 01011 - {0, 1, 1}, //20 -> 10 || 10100 -> 01010 - {-1, 0, 1}, //21 -> 15 || 10101 -> 01111 - //{-1, -1, 1},//22 -> 17 || 10110 -> 10001 - //{-1, 1, 1}, //23 -> 16 || 10111 -> 10000 - {1, 0, 1}, //24 -> 12 || 11000 -> 01100 - //{1, -1, 1}, //25 -> 14 || 11001 -> 01110 - //{1, 1, 1} //26 -> 13 || 11010 -> 01101 + {0, 0, 0}, //0 -> 0 + {0, -1, 0}, //1 -> 2 + {0, 1, 0}, //2 -> 1 + {-1, 0, 0}, //3 -> 6 + {-1, -1, 0}, //4 -> 8 + {-1, 1, 0}, //5 -> 7 + {1, 0, 0}, //6 -> 3 + {1, -1, 0}, //7 -> 5 + {1, 1, 0}, //8 -> 4 + + {0, 0, -1}, //9 -> 14 + {0, -1, -1}, //10 -> 16 + {0, 1, -1}, //11 -> 15 + {-1, 0, -1}, //12 -> 18 + {1, 0, -1}, //13 -> 17 + + {0, 0, 1}, //14 -> 9 + {0, -1, 1}, //15 -> 11 + {0, 1, 1}, //16 -> 10 + {-1, 0, 1}, //17 -> 13 + {1, 0, 1}, //18 -> 12 }; NEON_CUDA_DEVICE_ONLY static constexpr char latticeOppositeID[19] = { //opposite q for 2d is a subset of what is in 3d so only use one - 0, 2, 1, 6, 8, 7, 3, 5, 4, 18, 20, 19, 24, 21, 9, 11, 10, 15, 12}; + 0, 2, 1, 6, 8, 7, 3, 5, 4, 14, 16, 15, 18, 17, 9, 11, 10, 13, 12}; NEON_CUDA_HOST_DEVICE Neon::int8_3d getDir(const int8_t q) @@ -258,12 +249,15 @@ Neon::set::Container stream(Neon::domain::mGrid& grid, //if the neighbor cell has children, then this 'cell' is interfacing with L-1 (fine) along q direction if (!fpost_stm.hasChildren(cell, dir)) { - - if (type.nghVal(cell, dir, 0, CellType::undefined).value == CellType::bulk) { - fpost_stm(cell, q) = fpost_col.nghVal(cell, dir, q, T(0)).value; - } else { - const int8_t opposte_q = latticeOppositeID[q]; - fpost_stm(cell, q) = fpost_col(cell, opposte_q) + fpost_col.nghVal(cell, dir, opposte_q, T(0)).value; + auto nghType = type.nghVal(cell, dir, 0, CellType::undefined); + + if (nghType.isValid) { + if (nghType.value == CellType::bulk) { + fpost_stm(cell, q) = fpost_col.nghVal(cell, dir, q, T(0)).value; + } else { + const int8_t opposte_q = latticeOppositeID[q]; + fpost_stm(cell, q) = fpost_col(cell, opposte_q) + fpost_col.nghVal(cell, dir, opposte_q, T(0)).value; + } } } } @@ -578,59 +572,36 @@ void postProcess(Neon::domain::mGrid& grid, return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { constexpr auto t = latticeWeight(); - if (type(cell, 0) == CellType::bulk) { + if (!pop.hasChildren(cell)) { + if (type(cell, 0) == CellType::bulk) { - //fin - T ins[Q]; - for (int i = 0; i < Q; ++i) { - ins[i] = pop(cell, i); - } - - //density - T r = 0; - for (int i = 0; i < Q; ++i) { - r += ins[i]; - } - rh(cell, 0) = r; - - //velocity - const Neon::Vec_3d vel = velocity(ins, r); - - u(cell, 0) = vel.v[0]; - u(cell, 1) = vel.v[1]; - u(cell, 2) = vel.v[2]; - - - /*T st[Q]; - T r = 0; - if (!pop.hasChildren(cell)) { - assert(!pop.hasChildren(cell)); - for (int8_t q = 0; q < Q; ++q) { - const Neon::int8_3d dir = -getDir(q); - if (!pop.hasChildren(cell, dir)) { - auto neighbor = pop.nghVal(cell, dir, q, T(0)); - if (neighbor.isValid) { - st[q] = neighbor.value; - r += st[q]; - } - } + //fin + T ins[Q]; + for (int i = 0; i < Q; ++i) { + ins[i] = pop(cell, i); } + //density + T r = 0; + for (int i = 0; i < Q; ++i) { + r += ins[i]; + } rh(cell, 0) = r; - const Neon::Vec_3d vel = velocity(ins, rho); + //velocity + const Neon::Vec_3d vel = velocity(ins, r); + + u(cell, 0) = vel.v[0]; + u(cell, 1) = vel.v[1]; + u(cell, 2) = vel.v[2]; + } + if (type(cell, 0) == CellType::movingWall) { + rh(cell, 0) = 1.0; for (int d = 0; d < 3; ++d) { - u(cell, d) = vel.v[d]; + int i = (d == 0) ? 3 : ((d == 1) ? 1 : 9); + u(cell, d) = pop(cell, i) / (6.0 * 1.0 / 18.0); } - }*/ - } - if (type(cell, 0) == CellType::movingWall) { - rh(cell, 0) = 1.0; - - for (int d = 0; d < 3; ++d) { - int i = (d == 0) ? 3 : ((d == 1) ? 1 : 9); - u(cell, d) = pop(cell, i) / (6.0 * 1.0 / 18.0); } } }; @@ -665,22 +636,22 @@ int main(int argc, char** argv) const Neon::domain::mGridDescriptor descriptor(depth); - //const Neon::index_3d grid_dim(144, 144, 144); - //float levelSDF[depth + 1]; - //levelSDF[0] = 0; - //levelSDF[1] = -28.0 / 144.0; - //levelSDF[2] = -56.0 / 144.0; - //levelSDF[3] = -1.0; - - - const Neon::index_3d grid_dim(24, 24, 24); + const Neon::index_3d grid_dim(144, 144, 144); float levelSDF[depth + 1]; levelSDF[0] = 0; - levelSDF[1] = -8 / 24.0; - levelSDF[2] = -16 / 24.0; + levelSDF[1] = -28.0 / 144.0; + levelSDF[2] = -56.0 / 144.0; levelSDF[3] = -1.0; + //const Neon::index_3d grid_dim(24, 24, 24); + //float levelSDF[depth + 1]; + //levelSDF[0] = 0; + //levelSDF[1] = -8 / 24.0; + //levelSDF[2] = -16 / 24.0; + //levelSDF[3] = -1.0; + + Neon::domain::mGrid grid( backend, grid_dim, {[&](const Neon::index_3d id) -> bool { @@ -741,21 +712,23 @@ int main(int argc, char** argv) // init fin and fout constexpr auto t = latticeWeight(); - auto init = [&](const Neon::int32_3d idx, const int q) { + auto init = [&](const Neon::int32_3d idx, const int q, const int l) { T ret = t.t[q]; - if (idx.x == 0 || idx.x == grid_dim.x - 1 || - idx.y == 0 || idx.y == grid_dim.y - 1 || - idx.z == 0 || idx.z == grid_dim.z - 1) { + if (l == 0) { + if (idx.x == 0 || idx.x == grid_dim.x - 1 || + idx.y == 0 || idx.y == grid_dim.y - 1 || + idx.z == 0 || idx.z == grid_dim.z - 1) { - if (idx.y == grid_dim.y - 1) { - ret = 0; - for (int d = 0; d < 3; ++d) { - ret += latticeVelocity3D[q][d] * ulid.v[d]; + if (idx.y == grid_dim.y - 1) { + ret = 0; + for (int d = 0; d < 3; ++d) { + ret += latticeVelocity3D[q][d] * ulid.v[d]; + } + ret *= -6. * t.t[q] * ulb; + } else { + ret = 0; } - ret *= -6. * t.t[q] * ulb; - } else { - ret = 0; } } return ret; @@ -765,12 +738,12 @@ int main(int argc, char** argv) fin.forEachActiveCell( l, [&](const Neon::int32_3d idx, const int q, T& val) { - val = init(idx, q); + val = init(idx, q, l); }); fout.forEachActiveCell( l, [&](const Neon::int32_3d idx, const int q, T& val) { - val = init(idx, q); + val = init(idx, q, l); }); vel.forEachActiveCell(l, [&](const Neon::int32_3d, const int, T& val) { val = 0; @@ -795,7 +768,7 @@ int main(int argc, char** argv) Neon::skeleton::Skeleton skl(grid.getBackend()); skl.sequence(containers, "MultiResLBM"); - //skl.ioToDot("MultiResLBM", "", true); + // skl.ioToDot("MultiResLBM", "", true); //execution for (int t = 0; t < max_iter; ++t) { From 999efd4295b114b8c014f11b9198c0b8e91a88ed Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Thu, 2 Feb 2023 11:18:03 -0500 Subject: [PATCH 16/24] better multi-res ioToVTK and minor fixes to multi-res LBM --- apps/lbmMultiRes/lbmMultiRes.cu | 63 +++--- .../Neon/domain/internal/mGrid/mField.h | 8 +- .../Neon/domain/internal/mGrid/mField_imp.h | 179 +++++++++++++++--- .../Neon/domain/internal/mGrid/mGrid.h | 1 - .../src/domain/internal/mGrid/mGrid.cpp | 144 -------------- .../tests/unit/gUt_mGrid/src/gUt_mGrid.cpp | 4 +- .../unit/sUt_multiRes/src/MultiResChild.h | 5 +- .../unit/sUt_multiRes/src/MultiResDemo.h | 6 +- .../tests/unit/sUt_multiRes/src/MultiResMap.h | 5 +- .../unit/sUt_multiRes/src/MultiResParent.h | 5 +- .../unit/sUt_multiRes/src/MultiResSkeleton.h | 5 +- .../unit/sUt_multiRes/src/MultiResStencil.h | 5 +- 12 files changed, 207 insertions(+), 223 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 385400b6..49463060 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -2,6 +2,7 @@ #include "Neon/domain/mGrid.h" #include "Neon/skeleton/Skeleton.h" + enum CellType : int { bounceBack = 0, @@ -209,6 +210,8 @@ Neon::set::Container collide(Neon::domain::mGrid& grid, for (int d = 0; d < 3; ++d) { cu += latticeVelocity3D[i][d] * vel.v[d]; } + cu *= 3.0; + //equilibrium T feq = rho * t.t[i] * (1. + cu + 0.5 * cu * cu - usqr); @@ -556,7 +559,7 @@ void postProcess(Neon::domain::mGrid& grid, Neon::domain::mGrid::Field& vel, Neon::domain::mGrid::Field& rho) { - //fpop.updateIO(); + grid.getBackend().syncAll(); for (int level = 0; level < numLevels; ++level) { auto container = @@ -612,11 +615,17 @@ void postProcess(Neon::domain::mGrid& grid, grid.getBackend().syncAll(); - //vel.updateIO(); - //rho.updateIO(); + if (grid.getBackend().runtime() == Neon::Runtime::stream) { + vel.updateIO(); + rho.updateIO(); + } + + int precision = 4; + std::ostringstream suffix; + suffix << std::setw(precision) << std::setfill('0') << iteration; - vel.ioToVtk("vel_" + std::to_string(iteration), "vel"); - rho.ioToVtk("rho_" + std::to_string(iteration), "rho"); + vel.ioToVtk("Velocity_" + suffix.str()); + rho.ioToVtk("Density_" + suffix.str()); } int main(int argc, char** argv) @@ -636,22 +645,22 @@ int main(int argc, char** argv) const Neon::domain::mGridDescriptor descriptor(depth); - const Neon::index_3d grid_dim(144, 144, 144); - float levelSDF[depth + 1]; - levelSDF[0] = 0; - levelSDF[1] = -28.0 / 144.0; - levelSDF[2] = -56.0 / 144.0; - levelSDF[3] = -1.0; - - - //const Neon::index_3d grid_dim(24, 24, 24); + //const Neon::index_3d grid_dim(144, 144, 144); //float levelSDF[depth + 1]; //levelSDF[0] = 0; - //levelSDF[1] = -8 / 24.0; - //levelSDF[2] = -16 / 24.0; + //levelSDF[1] = -28.0 / 144.0; + //levelSDF[2] = -56.0 / 144.0; //levelSDF[3] = -1.0; + const Neon::index_3d grid_dim(24, 24, 24); + float levelSDF[depth + 1]; + levelSDF[0] = 0; + levelSDF[1] = -8 / 24.0; + levelSDF[2] = -16 / 24.0; + levelSDF[3] = -1.0; + + Neon::domain::mGrid grid( backend, grid_dim, {[&](const Neon::index_3d id) -> bool { @@ -668,14 +677,11 @@ int main(int argc, char** argv) }}, Neon::domain::Stencil::s19_t(false), descriptor); - //grid.topologyToVTK("lbm.vtk", false); - - //LBM problem const int max_iter = 3000; const T ulb = 0.04; - const T Re = 1000; - const T clength = grid_dim.x; + const T Re = 50; + const T clength = grid_dim.x * std::pow(2, -(descriptor.getDepth() - 1)); const T visclb = ulb * clength / Re; const T omega = 1.0 / (3. * visclb + 0.5); const Neon::double_3d ulid(ulb, 0., 0.); @@ -685,8 +691,9 @@ int main(int argc, char** argv) auto fout = grid.newField("fout", Q, 0); auto cellType = grid.newField("CellType", 1, CellType::bulk); - auto vel = grid.newField("vel", 3, 0); - auto rho = grid.newField("rho", 1, 0); + auto vel = grid.newField("vel", 3, -50); + auto rho = grid.newField("rho", 1, -50); + //classify voxels for (int l = 0; l < descriptor.getDepth(); ++l) { @@ -725,7 +732,7 @@ int main(int argc, char** argv) for (int d = 0; d < 3; ++d) { ret += latticeVelocity3D[q][d] * ulid.v[d]; } - ret *= -6. * t.t[q] * ulb; + ret *= -6. * t.t[q]; } else { ret = 0; } @@ -755,8 +762,6 @@ int main(int argc, char** argv) fin.updateCompute(); fout.updateCompute(); - //fin.ioToVtk("fin", "f"); - //skeleton std::vector containers; nonUniformTimestepRecursive(grid, @@ -773,13 +778,11 @@ int main(int argc, char** argv) //execution for (int t = 0; t < max_iter; ++t) { skl.run(); - if (t % 100 == 0) { + if (t % 10 == 0) { postProcess(grid, descriptor.getDepth(), fout, cellType, t, vel, rho); } } - grid.getBackend().syncAll(); - //fin.updateIO(); - //fout.updateIO(); + postProcess(grid, descriptor.getDepth(), fout, cellType, max_iter, vel, rho); } } \ No newline at end of file diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mField.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mField.h index 4458b1e0..0c0405bd 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mField.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mField.h @@ -88,9 +88,11 @@ class mField Neon::computeMode_t::computeMode_e mode = Neon::computeMode_t::computeMode_e::par) -> void; - auto ioToVtk(const std::string& fileName, - const std::string& FieldName, - Neon::IoFileType ioFileType = Neon::IoFileType::ASCII) const -> void; + auto ioToVtk(std::string fileName, + bool outputLevels = true, + bool outputBlockID = true, + bool outputVoxelID = true, + bool filterOverlaps = true) const -> void; auto load(Neon::set::Loader loader, int level, Neon::MultiResCompute compute) -> typename xField::Partition&; diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h index 9f0d9936..1df303fd 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h @@ -262,39 +262,172 @@ auto mField::load(Neon::set::Loader loader, template -auto mField::ioToVtk(const std::string& fileName, - const std::string& fieldName, - Neon::IoFileType ioFileType) const -> void +auto mField::ioToVtk(std::string fileName, + bool outputLevels, + bool outputBlockID, + bool outputVoxelID, + bool filterOverlaps) const -> void { - const auto& descriptor = mData->grid->getDescriptor(); + auto l0Dim = mData->grid->getDimension(0); + + std::ofstream file(fileName + ".vtk"); + file << "# vtk DataFile Version 2.0\n"; + file << "mGrid\n"; + file << "ASCII\n"; + file << "DATASET UNSTRUCTURED_GRID\n"; + file << "POINTS " << (l0Dim.rMax() + 1) * (l0Dim.rMax() + 1) * (l0Dim.rMax() + 1) << " float \n"; + for (int z = 0; z < l0Dim.rMax() + 1; ++z) { + for (int y = 0; y < l0Dim.rMax() + 1; ++y) { + for (int x = 0; x < l0Dim.rMax() + 1; ++x) { + file << x << " " << y << " " << z << "\n"; + } + } + } - for (int l = 0; l < descriptor.getDepth(); ++l) { + uint64_t num_cells = 0; + + auto mapTo1D = [&](int x, int y, int z) { + return x + + y * (l0Dim.rMax() + 1) + + z * (l0Dim.rMax() + 1) * (l0Dim.rMax() + 1); + }; + + enum class Op : int + { + Count = 0, + OutputTopology = 1, + OutputLevels = 2, + OutputBlockID = 3, + OutputVoxelID = 4, + OutputData = 5, + }; + + auto desc = mData->grid->getDescriptor(); + auto card = (*this)(0).getCardinality(); + + auto loopOverActiveBlocks = [&](const Op op) { + for (int l = 0; l < desc.getDepth(); ++l) { + const int refFactor = desc.getRefFactor(l); + const int childSpacing = desc.getSpacing(l - 1); + + (*(mData->grid))(l).getBlockOriginTo1D().forEach([&](const Neon::int32_3d blockOrigin, const uint32_t blockIdx) { + // TODO need to figure out which device owns this block + SetIdx devID(0); + + + for (int z = 0; z < refFactor; z++) { + for (int y = 0; y < refFactor; y++) { + for (int x = 0; x < refFactor; x++) { + Cell cell(static_cast(x), + static_cast(y), + static_cast(z)); + cell.mBlockID = blockIdx; + cell.mBlockSize = refFactor; + + if (cell.computeIsActive( + (*(mData->grid))(l).getActiveMask().rawMem(devID, Neon::DeviceType::CPU))) { + + Neon::int32_3d corner(blockOrigin.x + x * childSpacing, + blockOrigin.y + y * childSpacing, + blockOrigin.z + z * childSpacing); + + bool draw = true; + if (filterOverlaps && l != 0) { + auto cornerIDIter = (*(mData->grid))(l - 1).getBlockOriginTo1D().getMetadata(corner); + if (cornerIDIter) { + draw = false; + } + } + + if (draw) { + if (op == Op::Count) { + num_cells++; + } else if (op == Op::OutputTopology) { + + file << "8 "; + //x,y,z + file << mapTo1D(corner.x, corner.y, corner.z) << " "; + //+x,y,z + file << mapTo1D(corner.x + childSpacing, corner.y, corner.z) << " "; + + //x,+y,z + file << mapTo1D(corner.x, corner.y + childSpacing, corner.z) << " "; + + //+x,+y,z + file << mapTo1D(corner.x + childSpacing, corner.y + childSpacing, corner.z) << " "; + + //x,y,+z + file << mapTo1D(corner.x, corner.y, corner.z + childSpacing) << " "; + + //+x,y,+z + file << mapTo1D(corner.x + childSpacing, corner.y, corner.z + childSpacing) << " "; + + //x,+y,+z + file << mapTo1D(corner.x, corner.y + childSpacing, corner.z + childSpacing) << " "; + + //+x,+y,+z + file << mapTo1D(corner.x + childSpacing, corner.y + childSpacing, corner.z + childSpacing) << " "; + file << "\n"; + } else if (op == Op::OutputLevels) { + file << l << "\n"; + } else if (op == Op::OutputBlockID) { + file << blockIdx << "\n"; + } else if (op == Op::OutputVoxelID) { + file << x + y * refFactor + z * refFactor * refFactor + << "\n"; + } else if (op == Op::OutputData) { + for (int c = 0; c < card; ++c) { + file << (*this)(l).getPartition(Neon::Execution::host, devID, Neon::DataView::STANDARD)(cell, c)<< "\n"; + } + } + } + } + } + } + } + }); + } + }; - double spacing = double(descriptor.getSpacing(l - 1)); + loopOverActiveBlocks(Op::Count); - auto iovtk = IoToVTK(fileName + "_level" + std::to_string(l), - mData->grid->getDimension(l) + 1, - {spacing, spacing, spacing}, - {0, 0, 0}, - ioFileType); + file << "CELLS " << num_cells << " " << num_cells * 9 << " \n"; + loopOverActiveBlocks(Op::OutputTopology); - iovtk.addField( - [&](Neon::index_3d idx, int card) -> T { - idx = descriptor.toBaseIndexSpace(idx, l); + file << "CELL_TYPES " << num_cells << " \n"; + for (uint64_t i = 0; i < num_cells; ++i) { + file << 11 << "\n"; + } - if (mData->grid->isInsideDomain(idx, l)) { - return getReference(idx, card, l); - } else { - return mData->fields[l].getOutsideValue(); - } - }, - mData->fields[l].getCardinality(), fieldName, ioToVTKns::VtiDataType_e::voxel); + file << "CELL_DATA " << num_cells << " \n"; - iovtk.flushAndClear(); + //data + file << "SCALARS " << (*this)(0).getName() << " float " << card << "\n"; + file << "LOOKUP_TABLE default \n"; + loopOverActiveBlocks(Op::OutputData); + + if (outputLevels) { + file << "SCALARS Level int 1 \n"; + file << "LOOKUP_TABLE default \n"; + loopOverActiveBlocks(Op::OutputLevels); + } + + if (outputBlockID) { + file << "SCALARS BlockID int 1 \n"; + file << "LOOKUP_TABLE default \n"; + loopOverActiveBlocks(Op::OutputBlockID); } -} + if (outputVoxelID) { + file << "SCALARS VoxelID int 1 \n"; + file << "LOOKUP_TABLE default \n"; + loopOverActiveBlocks(Op::OutputVoxelID); + } + + + file.close(); +} template auto mField::getSharedMemoryBytes(const int32_t stencilRadius, int level) const -> size_t diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mGrid.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mGrid.h index 982b0040..72b50d14 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mGrid.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mGrid.h @@ -175,7 +175,6 @@ class mGrid auto getDescriptor() const -> const mGridDescriptor&; auto getRefFactors() const -> const Neon::set::MemSet_t&; auto getLevelSpacing() const -> const Neon::set::MemSet_t&; - void topologyToVTK(std::string fileName, bool filterOverlaps) const; auto getBackend() const -> const Backend&; auto getBackend() -> Backend&; diff --git a/libNeonDomain/src/domain/internal/mGrid/mGrid.cpp b/libNeonDomain/src/domain/internal/mGrid/mGrid.cpp index 5848e3f2..285b92e0 100644 --- a/libNeonDomain/src/domain/internal/mGrid/mGrid.cpp +++ b/libNeonDomain/src/domain/internal/mGrid/mGrid.cpp @@ -596,148 +596,4 @@ auto mGrid::getBackend() -> Backend& return mData->backend; } -void mGrid::topologyToVTK(std::string fileName, bool filterOverlaps) const -{ - - std::ofstream file(fileName); - file << "# vtk DataFile Version 2.0\n"; - file << "mGrid\n"; - file << "ASCII\n"; - file << "DATASET UNSTRUCTURED_GRID\n"; - file << "POINTS " << (getDimension(0).rMax() + 1) * (getDimension(0).rMax() + 1) * (getDimension(0).rMax() + 1) << " float \n"; - for (int z = 0; z < getDimension(0).rMax() + 1; ++z) { - for (int y = 0; y < getDimension(0).rMax() + 1; ++y) { - for (int x = 0; x < getDimension(0).rMax() + 1; ++x) { - file << x << " " << y << " " << z << "\n"; - } - } - } - - uint64_t num_cells = 0; - - auto mapTo1D = [&](int x, int y, int z) { - return x + - y * (getDimension(0).rMax() + 1) + - z * (getDimension(0).rMax() + 1) * (getDimension(0).rMax() + 1); - }; - - enum class Op : int - { - Count = 0, - OutputTopology = 1, - OutputLevels = 2, - OutputBlockID = 3, - OutputVoxelID = 4, - }; - - auto loopOverActiveBlocks = [&](const Op op) { - for (int l = 0; l < mData->mDescriptor.getDepth(); ++l) { - const int refFactor = mData->mDescriptor.getRefFactor(l); - const int childSpacing = mData->mDescriptor.getSpacing(l - 1); - - mData->grids[l].getBlockOriginTo1D().forEach([&](const Neon::int32_3d blockOrigin, const uint32_t blockIdx) { - // TODO need to figure out which device owns this block - SetIdx devID(0); - - - for (int z = 0; z < refFactor; z++) { - for (int y = 0; y < refFactor; y++) { - for (int x = 0; x < refFactor; x++) { - Cell cell(static_cast(x), - static_cast(y), - static_cast(z)); - cell.mBlockID = blockIdx; - cell.mBlockSize = refFactor; - - if (cell.computeIsActive( - mData->grids[l].getActiveMask().rawMem(devID, Neon::DeviceType::CPU))) { - - Neon::int32_3d corner(blockOrigin.x + x * childSpacing, - blockOrigin.y + y * childSpacing, - blockOrigin.z + z * childSpacing); - - bool draw = true; - if (filterOverlaps && l != 0) { - auto cornerIDIter = mData->grids[l - 1].getBlockOriginTo1D().getMetadata(corner); - if (cornerIDIter) { - draw = false; - } - } - - if (draw) { - if (op == Op::Count) { - num_cells++; - } else if (op == Op::OutputTopology) { - - file << "8 "; - //x,y,z - file << mapTo1D(corner.x, corner.y, corner.z) << " "; - //+x,y,z - file << mapTo1D(corner.x + childSpacing, corner.y, corner.z) << " "; - - //x,+y,z - file << mapTo1D(corner.x, corner.y + childSpacing, corner.z) << " "; - - //+x,+y,z - file << mapTo1D(corner.x + childSpacing, corner.y + childSpacing, corner.z) << " "; - - //x,y,+z - file << mapTo1D(corner.x, corner.y, corner.z + childSpacing) << " "; - - //+x,y,+z - file << mapTo1D(corner.x + childSpacing, corner.y, corner.z + childSpacing) << " "; - - //x,+y,+z - file << mapTo1D(corner.x, corner.y + childSpacing, corner.z + childSpacing) << " "; - - //+x,+y,+z - file << mapTo1D(corner.x + childSpacing, corner.y + childSpacing, corner.z + childSpacing) << " "; - file << "\n"; - } else if (op == Op::OutputLevels) { - file << l << "\n"; - } else if (op == Op::OutputBlockID) { - file << blockIdx << "\n"; - } else if (op == Op::OutputVoxelID) { - file << blockIdx << "\n"; - } - } - } - } - } - } - }); - } - }; - - loopOverActiveBlocks(Op::Count); - - file << "CELLS " << num_cells << " " << num_cells * 9 << " \n"; - - loopOverActiveBlocks(Op::OutputTopology); - - file << "CELL_TYPES " << num_cells << " \n"; - for (uint64_t i = 0; i < num_cells; ++i) { - file << 11 << "\n"; - } - - file << "CELL_DATA " << num_cells << " \n"; - - file << "SCALARS Level int 1 \n"; - file << "LOOKUP_TABLE default \n"; - loopOverActiveBlocks(Op::OutputLevels); - - - file << "SCALARS BlockID int 1 \n"; - file << "LOOKUP_TABLE default \n"; - loopOverActiveBlocks(Op::OutputBlockID); - - //file << "SCALARS VoxelID int 1 \n"; - //file << "LOOKUP_TABLE default \n"; - //loopOverActiveBlocks(Op::OutputVoxelID); - - - file.close(); -} - - } // namespace Neon::domain::internal::mGrid \ No newline at end of file diff --git a/libNeonDomain/tests/unit/gUt_mGrid/src/gUt_mGrid.cpp b/libNeonDomain/tests/unit/gUt_mGrid/src/gUt_mGrid.cpp index bbf3af07..f854bf15 100644 --- a/libNeonDomain/tests/unit/gUt_mGrid/src/gUt_mGrid.cpp +++ b/libNeonDomain/tests/unit/gUt_mGrid/src/gUt_mGrid.cpp @@ -29,8 +29,6 @@ TEST(mGrid, multiRes) Neon::domain::Stencil::s7_Laplace_t(), descriptor); - grid.topologyToVTK("bGrid112.vtk", false); - auto field = grid.newField("myField", 1, 0); for (int l = 0; l < descriptor.getDepth(); ++l) { @@ -42,7 +40,7 @@ TEST(mGrid, multiRes) Neon::computeMode_t::computeMode_e::seq); } - field.ioToVtk("f", "f"); + field.ioToVtk("f"); } } diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResChild.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResChild.h index efec28db..abbec0f8 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResChild.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResChild.h @@ -32,8 +32,7 @@ void MultiResChild() }}, Neon::domain::Stencil::s7_Laplace_t(), descriptor); - //grid.topologyToVTK("grid112.vtk", false); - + auto XField = grid.newField("XField", 1, -1); auto isRefinedField = grid.newField("isRefined", 1, -1); @@ -56,7 +55,7 @@ void MultiResChild() XField.updateCompute(); isRefinedField.updateCompute(); } - //XField.ioToVtk("f", "f"); + //XField.ioToVtk("f"); for (int level = 0; level < descriptor.getDepth(); ++level) { diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResDemo.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResDemo.h index a23b979b..a9f40076 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResDemo.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResDemo.h @@ -128,9 +128,7 @@ void MultiResDemo() for (int i = 0; i < descriptor.getDepth(); ++i) { s << descriptor.getLog2RefFactor(i); } - - grid.topologyToVTK(s.str() + ".vtk", false); - + auto field = grid.newField("myField", 1, -10000); for (int l = 0; l < descriptor.getDepth(); ++l) { @@ -184,7 +182,7 @@ void MultiResDemo() field.updateIO(); - field.ioToVtk(s.str(), "f"); + field.ioToVtk(s.str()); } diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResMap.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResMap.h index 46070ea8..009b31dd 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResMap.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResMap.h @@ -30,8 +30,7 @@ void MultiResSingleMap() }}, Neon::domain::Stencil::s7_Laplace_t(), descriptor); - //grid.topologyToVTK("grid111.vtk", false); - + auto XField = grid.newField("XField", 1, -1); auto YField = grid.newField("YField", 1, -1); @@ -54,7 +53,7 @@ void MultiResSingleMap() XField.updateCompute(); YField.updateCompute(); } - //XField.ioToVtk("f", "f"); + //XField.ioToVtk("f"); for (int level = 0; level < descriptor.getDepth(); ++level) { diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h index c631ee76..0066e316 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResParent.h @@ -154,8 +154,7 @@ void MultiResAtomicAddParent() }}, Neon::domain::Stencil::s7_Laplace_t(), descriptor); - //grid.topologyToVTK("grid111.vtk", false); - + auto XField = grid.newField("XField", 1, -1); @@ -171,7 +170,7 @@ void MultiResAtomicAddParent() if (bk.runtime() == Neon::Runtime::stream) { XField.updateCompute(); } - //XField.ioToVtk("f", "f"); + //XField.ioToVtk("f"); for (int level = 0; level < descriptor.getDepth(); ++level) { diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResSkeleton.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResSkeleton.h index 324a7577..9b978977 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResSkeleton.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResSkeleton.h @@ -29,8 +29,7 @@ void MultiResSkeleton() }}, Neon::domain::Stencil::s7_Laplace_t(), descriptor); - //grid.topologyToVTK("grid111.vtk", false); - + auto field = grid.newField("field", 3, -1); @@ -46,7 +45,7 @@ void MultiResSkeleton() if (bk.runtime() == Neon::Runtime::stream) { field.updateCompute(); } - //field.ioToVtk("f", "f"); + //field.ioToVtk("f"); std::vector containers; diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResStencil.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResStencil.h index e4a1f72e..3000d6bf 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResStencil.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResStencil.h @@ -29,8 +29,7 @@ void MultiResSameLevelStencil() }}, Neon::domain::Stencil::s7_Laplace_t(), descriptor); - //grid.topologyToVTK("grid111.vtk", false); - + auto XField = grid.newField("XField", 1, -1); auto YField = grid.newField("YField", 1, -1); @@ -53,7 +52,7 @@ void MultiResSameLevelStencil() XField.updateCompute(); YField.updateCompute(); } - //XField.ioToVtk("f", "f"); + //XField.ioToVtk("f"); for (int level = 0; level < descriptor.getDepth(); ++level) { From b09c79c4a4b598274f00d73a3f1c0d4b7f01eb6b Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Thu, 2 Feb 2023 15:23:26 -0500 Subject: [PATCH 17/24] matching single resolution results --- apps/lbmMultiRes/lbmMultiRes.cu | 32 ++++++------------- .../src/domain/internal/bGrid/bGrid.cpp | 10 +++--- 2 files changed, 15 insertions(+), 27 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 49463060..7e0a0743 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -11,17 +11,6 @@ enum CellType : int undefined = 3, }; -/*NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity2D[9][2] = { - {0, 0}, //0 -> 0 || 0000 -> 0000 - {0, -1}, //1 -> 2 || 0001 -> 0010 - {0, 1}, //2 -> 1 || 0010 -> 0001 - {-1, 0}, //3 -> 6 || 0100 -> 0110 - {-1, -1}, //4 -> 8 || 0100 -> 1000 - {-1, 1}, //5 -> 7 || 0101 -> 0111 - {1, 0}, //6 -> 3 || 0110 -> 0011 - {1, -1}, //7 -> 5 || 0111 -> 0101 - {1, 1}}; //8 -> 4 || 1000 -> 0100*/ - /*NEON_CUDA_DEVICE_ONLY static constexpr char latticeVelocity3D[27][3] = { {0, 0, 0}, //0 -> 0 || 00000 -> 00000 {0, -1, 0}, //1 -> 2 || 00001 -> 00010 @@ -617,7 +606,7 @@ void postProcess(Neon::domain::mGrid& grid, if (grid.getBackend().runtime() == Neon::Runtime::stream) { vel.updateIO(); - rho.updateIO(); + //rho.updateIO(); } int precision = 4; @@ -625,7 +614,7 @@ void postProcess(Neon::domain::mGrid& grid, suffix << std::setw(precision) << std::setfill('0') << iteration; vel.ioToVtk("Velocity_" + suffix.str()); - rho.ioToVtk("Density_" + suffix.str()); + //rho.ioToVtk("Density_" + suffix.str()); } int main(int argc, char** argv) @@ -636,7 +625,7 @@ int main(int argc, char** argv) using T = double; //Neon grid - auto runtime = Neon::Runtime::openmp; + auto runtime = Neon::Runtime::stream; std::vector gpu_ids{0}; Neon::Backend backend(gpu_ids, runtime); @@ -652,7 +641,6 @@ int main(int argc, char** argv) //levelSDF[2] = -56.0 / 144.0; //levelSDF[3] = -1.0; - const Neon::index_3d grid_dim(24, 24, 24); float levelSDF[depth + 1]; levelSDF[0] = 0; @@ -678,10 +666,10 @@ int main(int argc, char** argv) Neon::domain::Stencil::s19_t(false), descriptor); //LBM problem - const int max_iter = 3000; - const T ulb = 0.04; - const T Re = 50; - const T clength = grid_dim.x * std::pow(2, -(descriptor.getDepth() - 1)); + const int max_iter = 5000; + const T ulb = 0.02; + const T Re = 100; + const T clength = (grid_dim.x * std::pow(2, -(descriptor.getDepth() - 1)) - 1); const T visclb = ulb * clength / Re; const T omega = 1.0 / (3. * visclb + 0.5); const Neon::double_3d ulid(ulb, 0., 0.); @@ -691,8 +679,8 @@ int main(int argc, char** argv) auto fout = grid.newField("fout", Q, 0); auto cellType = grid.newField("CellType", 1, CellType::bulk); - auto vel = grid.newField("vel", 3, -50); - auto rho = grid.newField("rho", 1, -50); + auto vel = grid.newField("vel", 3, 0); + auto rho = grid.newField("rho", 1, 0); //classify voxels @@ -778,7 +766,7 @@ int main(int argc, char** argv) //execution for (int t = 0; t < max_iter; ++t) { skl.run(); - if (t % 10 == 0) { + if (t % 100 == 0) { postProcess(grid, descriptor.getDepth(), fout, cellType, t, vel, rho); } } diff --git a/libNeonDomain/src/domain/internal/bGrid/bGrid.cpp b/libNeonDomain/src/domain/internal/bGrid/bGrid.cpp index 0692f6a9..69479186 100644 --- a/libNeonDomain/src/domain/internal/bGrid/bGrid.cpp +++ b/libNeonDomain/src/domain/internal/bGrid/bGrid.cpp @@ -72,14 +72,14 @@ auto bGrid::setReduceEngine(Neon::sys::patterns::Engine eng) -> void } } -auto bGrid::getLaunchParameters(Neon::DataView dataView, +auto bGrid::getLaunchParameters([[maybe_unused]] Neon::DataView dataView, [[maybe_unused]] const Neon::index_3d& blockSize, const size_t& sharedMem) const -> Neon::set::LaunchParameters { - if (dataView != Neon::DataView::STANDARD) { - NEON_WARNING("Requesting LaunchParameters on {} data view but bGrid only supports Standard data view on a single GPU", - Neon::DataViewUtil::toString(dataView)); - } + //if (dataView != Neon::DataView::STANDARD) { + // NEON_WARNING("Requesting LaunchParameters on {} data view but bGrid only supports Standard data view on a single GPU", + // Neon::DataViewUtil::toString(dataView)); + //} const Neon::int32_3d cuda_block(mData->blockSize, mData->blockSize, From 99e1869fc8e690ba37b84665c22a2b14f77fbb8d Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Fri, 3 Feb 2023 19:09:43 -0500 Subject: [PATCH 18/24] fix dependency for stencil up/down --- .../Neon/domain/internal/mGrid/mField_imp.h | 14 +++++++++----- .../tests/unit/sUt_multiRes/src/MultiResDemo.h | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h index 1df303fd..563b0b89 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mField_imp.h @@ -216,12 +216,14 @@ auto mField::load(Neon::set::Loader loader, break; } case Neon::MultiResCompute::STENCIL_UP: { - loader.load(operator()(level + 1), Neon::Compute::MAP); + const auto& parent = operator()(level + 1); + loader.load(parent, Neon::Compute::STENCIL); return loader.load(operator()(level), Neon::Compute::MAP); break; } case Neon::MultiResCompute::STENCIL_DOWN: { - loader.load(operator()(level - 1), Neon::Compute::MAP); + const auto& child = operator()(level - 1); + loader.load(child, Neon::Compute::STENCIL); return loader.load(operator()(level), Neon::Compute::MAP); break; } @@ -246,12 +248,14 @@ auto mField::load(Neon::set::Loader loader, break; } case Neon::MultiResCompute::STENCIL_UP: { - loader.load(operator()(level + 1), Neon::Compute::MAP); + const auto& parent = operator()(level + 1); + loader.load(parent, Neon::Compute::STENCIL); return loader.load(operator()(level), Neon::Compute::MAP); break; } case Neon::MultiResCompute::STENCIL_DOWN: { - loader.load(operator()(level - 1), Neon::Compute::MAP); + const auto& child = operator()(level - 1); + loader.load(child, Neon::Compute::STENCIL); return loader.load(operator()(level), Neon::Compute::MAP); break; } @@ -377,7 +381,7 @@ auto mField::ioToVtk(std::string fileName, << "\n"; } else if (op == Op::OutputData) { for (int c = 0; c < card; ++c) { - file << (*this)(l).getPartition(Neon::Execution::host, devID, Neon::DataView::STANDARD)(cell, c)<< "\n"; + file << (*this)(l).getPartition(Neon::Execution::host, devID, Neon::DataView::STANDARD)(cell, c) << "\n"; } } } diff --git a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResDemo.h b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResDemo.h index a9f40076..21b65ed1 100644 --- a/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResDemo.h +++ b/libNeonSkeleton/tests/unit/sUt_multiRes/src/MultiResDemo.h @@ -39,6 +39,7 @@ inline float sdfDodecahedron(Neon::float_3d p, float r = 1.0) inline float sdfMenger(Neon::float_3d p) { + //https://github.com/tovacinni/sdf-explorer/blob/master/data-files/sdf/Fractal/Menger.glsl auto mod = [](float x, float y) { return x - y * floor(x / y); }; auto length = [&](const Neon::float_3d& x) -> float { From 743eeff082787a3b599275033f13a9f29de747d0 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Mon, 6 Feb 2023 19:18:18 -0500 Subject: [PATCH 19/24] multi-res lbm minor fixes --- apps/lbmMultiRes/lbmMultiRes.cu | 118 ++++++++++++++++++++++++++------ 1 file changed, 98 insertions(+), 20 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 7e0a0743..a9d58fe4 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -314,9 +314,10 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, template -Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, - int level, - Neon::domain::mGrid::Field& postStreaming) +Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, + int level, + const Neon::domain::mGrid::Field& postCollision, + Neon::domain::mGrid::Field& postStreaming) { // Initiated by the coarse level (hence "pull"), this function simply read the missing population // across the interface between coarse<->fine boundary by reading the population prepare during the store() @@ -324,7 +325,8 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, return grid.getContainer( "Coalescence" + std::to_string(level), level, [&, level](Neon::set::Loader& loader) { - auto& fpost_stm = postStreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); + const auto& fpost_col = postCollision.load(loader, level, Neon::MultiResCompute::STENCIL); + auto& fpost_stm = postStreaming.load(loader, level, Neon::MultiResCompute::MAP); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { //If this cell has children i.e., it is been refined, than we should not work on it @@ -336,7 +338,7 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, //if we have a neighbor at the same level that has been refined, then cell is on //the interface and this is where we should do the coalescence if (fpost_stm.hasChildren(cell, dir)) { - auto neighbor = fpost_stm.nghVal(cell, dir, q, T(0)); + auto neighbor = fpost_col.nghVal(cell, dir, q, T(0)); if (neighbor.isValid) { fpost_stm(cell, q) = neighbor.value; } @@ -351,7 +353,7 @@ Neon::set::Container coalescencePull(Neon::domain::mGrid& grid, template Neon::set::Container store(Neon::domain::mGrid& grid, int level, - Neon::domain::mGrid::Field& postStreaming) + Neon::domain::mGrid::Field& postCollision) { //Initiated by the coarse level (level), this function prepares and stores the fine (level - 1) // information for further pulling initiated by the coarse (this) level invoked by coalescence_pull @@ -367,17 +369,18 @@ Neon::set::Container store(Neon::domain::mGrid& grid, return grid.getContainer( "store_" + std::to_string(level), level, [&, level](Neon::set::Loader& loader) { - auto& fpost_stm = postStreaming.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); + auto& fpost_col = postCollision.load(loader, level, Neon::MultiResCompute::STENCIL_DOWN); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { //if the cell is refined, we might need to store something in it for its neighbor - if (fpost_stm.hasChildren(cell)) { + if (fpost_col.hasChildren(cell)) { - const int refFactor = fpost_stm.getRefFactor(level); + const int refFactor = fpost_col.getRefFactor(level); - bool should_accumelate = ((int(fpost_stm(cell, 0)) % refFactor) != 0); + bool should_accumelate = ((int(fpost_col(cell, 0)) % refFactor) != 0); - fpost_stm(cell, 0) += 1; + //fpost_col(cell, 0) += 1; + fpost_col(cell, 0) = (int(fpost_col(cell, 0)) + 1) % refFactor; //for each direction aka for each neighbor @@ -386,10 +389,10 @@ Neon::set::Container store(Neon::domain::mGrid& grid, const Neon::int8_3d q_dir = getDir(q); //check if the neighbor in this direction has children - auto neighborCell = fpost_stm.getNghCell(cell, q_dir); + auto neighborCell = fpost_col.getNghCell(cell, q_dir); if (neighborCell.isActive()) { - if (!fpost_stm.hasChildren(neighborCell)) { + if (!fpost_col.hasChildren(neighborCell)) { //now, we know that there is actually something we need to store for this neighbor //in cell along q (q_dir) direction int num = 0; @@ -405,7 +408,7 @@ Neon::set::Container store(Neon::domain::mGrid& grid, const Neon::int8_3d r_dir = q_dir - p_dir; //if this neighbor is refined - if (fpost_stm.hasChildren(cell, p_dir)) { + if (fpost_col.hasChildren(cell, p_dir)) { //for each children of p for (int8_t i = 0; i < refFactor; ++i) { @@ -418,7 +421,7 @@ Neon::set::Container store(Neon::domain::mGrid& grid, const Neon::int8_3d cq = unlceOffset(c, q_dir); if (cq == r_dir) { num++; - sum += fpost_stm.childVal(cell, c, q, 0).value; + sum += fpost_col.childVal(cell, c, q, 0).value; } } } @@ -427,9 +430,9 @@ Neon::set::Container store(Neon::domain::mGrid& grid, } if (should_accumelate) { - fpost_stm(cell, q) += sum / static_cast(num * refFactor); + fpost_col(cell, q) += sum / static_cast(num * refFactor); } else { - fpost_stm(cell, q) = sum / static_cast(num * refFactor); + fpost_col(cell, q) = sum / static_cast(num * refFactor); } } } @@ -467,7 +470,7 @@ void stream(Neon::domain::mGrid& grid, /* Coalescence: pull missing populations from finer neighbors by "smart" averaging fine (level-1) * to coarse (level) communication, initiated by the coarse level ("Pull"). */ - containers.push_back(coalescencePull(grid, level, postStreaming)); + containers.push_back(coalescencePull(grid, level, postCollision, postStreaming)); } } @@ -641,6 +644,7 @@ int main(int argc, char** argv) //levelSDF[2] = -56.0 / 144.0; //levelSDF[3] = -1.0; + //const Neon::index_3d grid_dim(48, 48, 48); const Neon::index_3d grid_dim(24, 24, 24); float levelSDF[depth + 1]; levelSDF[0] = 0; @@ -666,7 +670,7 @@ int main(int argc, char** argv) Neon::domain::Stencil::s19_t(false), descriptor); //LBM problem - const int max_iter = 5000; + const int max_iter = 20000; const T ulb = 0.02; const T Re = 100; const T clength = (grid_dim.x * std::pow(2, -(descriptor.getDepth() - 1)) - 1); @@ -750,6 +754,79 @@ int main(int argc, char** argv) fin.updateCompute(); fout.updateCompute(); + //init fields + for (int level = 0; level < descriptor.getDepth(); ++level) { + auto container = + grid.getContainer( + "Init_" + std::to_string(level), level, + [&fin, &fout, &cellType, &vel, &rho, level, grid_dim, ulid, Q](Neon::set::Loader& loader) { + auto& in = fin.load(loader, level, Neon::MultiResCompute::MAP); + auto& out = fout.load(loader, level, Neon::MultiResCompute::MAP); + auto& type = cellType.load(loader, level, Neon::MultiResCompute::MAP); + auto& u = vel.load(loader, level, Neon::MultiResCompute::MAP); + auto& rh = rho.load(loader, level, Neon::MultiResCompute::MAP); + + return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { + constexpr auto t = latticeWeight(); + + //velocity and density + /*u(cell, 0) = 0; + u(cell, 1) = 0; + u(cell, 2) = 0; + rh(cell, 0) = 0; + type(cell, 0) = CellType::bulk;*/ + + if (!in.hasChildren(cell)) { + /*const Neon::index_3d idx = in.mapToGlobal(cell); + + //pop + for (int q = 0; q < Q; ++q) { + T pop_init_val = t.t[q]; + + if (level == 0) { + if (idx.x == 0 || idx.x == grid_dim.x - 1 || + idx.y == 0 || idx.y == grid_dim.y - 1 || + idx.z == 0 || idx.z == grid_dim.z - 1) { + type(cell, 0) = CellType::bounceBack; + + if (idx.y == grid_dim.y - 1) { + type(cell, 0) = CellType::movingWall; + pop_init_val = 0; + for (int d = 0; d < 3; ++d) { + pop_init_val += latticeVelocity3D[q][d] * ulid.v[d]; + } + pop_init_val *= -6. * t.t[q]; + } else { + pop_init_val = 0; + } + } + } + + out(cell, q) = pop_init_val; + in(cell, q) = pop_init_val; + }*/ + + + } else { + in(cell, 0) = 0; + out(cell, 0) = 0; + + /*for (int q = 0; q < Q; ++q) { + in(cell, q) = 0; + out(cell, q) = 0; + }*/ + } + }; + }); + + container.run(0); + } + + //rho.updateIO(); + //vel.updateIO(); + //fin.updateIO(); + //fout.updateIO(); + //skeleton std::vector containers; nonUniformTimestepRecursive(grid, @@ -765,8 +842,9 @@ int main(int argc, char** argv) //execution for (int t = 0; t < max_iter; ++t) { + printf("\n Iteration = %d", t); skl.run(); - if (t % 100 == 0) { + if (t % 20 == 0) { postProcess(grid, descriptor.getDepth(), fout, cellType, t, vel, rho); } } From b80e4e6f05a2141aa304362d54edd89e04103d5b Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 7 Feb 2023 19:03:26 -0500 Subject: [PATCH 20/24] debugging multi-res lbm --- apps/lbmMultiRes/lbmMultiRes.cu | 107 +++--------------- .../domain/internal/mGrid/mPartition_imp.h | 2 +- 2 files changed, 17 insertions(+), 92 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index a9d58fe4..3e897dde 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -105,7 +105,7 @@ struct latticeWeight template -NEON_CUDA_HOST_DEVICE inline Neon::int8_3d unlceOffset(const T& cell, const Neon::int8_3d& q) +NEON_CUDA_HOST_DEVICE inline Neon::int8_3d uncleOffset(const T& cell, const Neon::int8_3d& q) { //given a local index within a cell and a population direction (q) //find the uncle's (the parent neighbor) offset from which the desired population (q) should be read @@ -298,7 +298,9 @@ Neon::set::Container explosionPull(Neon::domain::mGrid& grid, //get the uncle direction/offset i.e., the neighbor of the cell's parent //this direction/offset is wrt to the cell's parent - Neon::int8_3d uncleDir = unlceOffset(cell.mLocation, dir); + Neon::int8_3d uncleDir = uncleOffset(cell.mLocation, dir); + + auto uncleLoc = fpost_col.getUncle(cell, uncleDir); auto uncle = fpost_col.uncleVal(cell, uncleDir, q, T(0)); if (uncle.isValid) { @@ -418,7 +420,7 @@ Neon::set::Container store(Neon::domain::mGrid& grid, //cq is coarse neighbor (i.e., uncle) that we need to go in order to read q //for c (this is what we do for explosion but here we do this just for the check) - const Neon::int8_3d cq = unlceOffset(c, q_dir); + const Neon::int8_3d cq = uncleOffset(c, q_dir); if (cq == r_dir) { num++; sum += fpost_col.childVal(cell, c, q, 0).value; @@ -607,10 +609,10 @@ void postProcess(Neon::domain::mGrid& grid, grid.getBackend().syncAll(); - if (grid.getBackend().runtime() == Neon::Runtime::stream) { - vel.updateIO(); - //rho.updateIO(); - } + + vel.updateIO(); + //rho.updateIO(); + int precision = 4; std::ostringstream suffix; @@ -644,8 +646,7 @@ int main(int argc, char** argv) //levelSDF[2] = -56.0 / 144.0; //levelSDF[3] = -1.0; - //const Neon::index_3d grid_dim(48, 48, 48); - const Neon::index_3d grid_dim(24, 24, 24); + const Neon::index_3d grid_dim(48, 48, 48); float levelSDF[depth + 1]; levelSDF[0] = 0; levelSDF[1] = -8 / 24.0; @@ -687,73 +688,6 @@ int main(int argc, char** argv) auto rho = grid.newField("rho", 1, 0); - //classify voxels - for (int l = 0; l < descriptor.getDepth(); ++l) { - cellType.forEachActiveCell( - l, - [&](const Neon::int32_3d idx, const int q, CellType& val) { - val = CellType::bulk; - if (idx.x == 0 || idx.x == grid_dim.x - 1 || - idx.y == 0 || idx.y == grid_dim.y - 1 || - idx.z == 0 || idx.z == grid_dim.z - 1) { - - val = CellType::bounceBack; - - if (idx.y == grid_dim.y - 1) { - val = CellType::movingWall; - } - } - }); - } - cellType.updateCompute(); - - - // init fin and fout - constexpr auto t = latticeWeight(); - - auto init = [&](const Neon::int32_3d idx, const int q, const int l) { - T ret = t.t[q]; - - if (l == 0) { - if (idx.x == 0 || idx.x == grid_dim.x - 1 || - idx.y == 0 || idx.y == grid_dim.y - 1 || - idx.z == 0 || idx.z == grid_dim.z - 1) { - - if (idx.y == grid_dim.y - 1) { - ret = 0; - for (int d = 0; d < 3; ++d) { - ret += latticeVelocity3D[q][d] * ulid.v[d]; - } - ret *= -6. * t.t[q]; - } else { - ret = 0; - } - } - } - return ret; - }; - - for (int l = 0; l < descriptor.getDepth(); ++l) { - fin.forEachActiveCell( - l, - [&](const Neon::int32_3d idx, const int q, T& val) { - val = init(idx, q, l); - }); - fout.forEachActiveCell( - l, - [&](const Neon::int32_3d idx, const int q, T& val) { - val = init(idx, q, l); - }); - vel.forEachActiveCell(l, [&](const Neon::int32_3d, const int, T& val) { - val = 0; - }); - rho.forEachActiveCell(l, [&](const Neon::int32_3d, const int, T& val) { - val = 0; - }); - } - fin.updateCompute(); - fout.updateCompute(); - //init fields for (int level = 0; level < descriptor.getDepth(); ++level) { auto container = @@ -770,14 +704,14 @@ int main(int argc, char** argv) constexpr auto t = latticeWeight(); //velocity and density - /*u(cell, 0) = 0; + u(cell, 0) = 0; u(cell, 1) = 0; u(cell, 2) = 0; rh(cell, 0) = 0; - type(cell, 0) = CellType::bulk;*/ + type(cell, 0) = CellType::bulk; if (!in.hasChildren(cell)) { - /*const Neon::index_3d idx = in.mapToGlobal(cell); + const Neon::index_3d idx = in.mapToGlobal(cell); //pop for (int q = 0; q < Q; ++q) { @@ -804,17 +738,10 @@ int main(int argc, char** argv) out(cell, q) = pop_init_val; in(cell, q) = pop_init_val; - }*/ - - + } } else { in(cell, 0) = 0; out(cell, 0) = 0; - - /*for (int q = 0; q < Q; ++q) { - in(cell, q) = 0; - out(cell, q) = 0; - }*/ } }; }); @@ -822,10 +749,8 @@ int main(int argc, char** argv) container.run(0); } - //rho.updateIO(); - //vel.updateIO(); - //fin.updateIO(); - //fout.updateIO(); + grid.getBackend().syncAll(); + //skeleton std::vector containers; diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h index 77abe808..fd264720 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition_imp.h @@ -233,7 +233,7 @@ NEON_CUDA_HOST_DEVICE inline auto mPartition::getUncle(const Cell& cell, { Cell uncle = getParent(cell); if (uncle.isActive()) { - uncle = this->getNghCell(uncle, direction, mParentNeighbourBlocks); + uncle = this->getNghCell(uncle, direction, (mParentNeighbourBlocks + (26 * uncle.mBlockID))); uncle.mBlockSize = mRefFactors[mLevel + 1]; uncle.mIsActive = uncle.mBlockID != std::numeric_limits::max(); } From 42c8a752bdd9c36f137b5f509a8640506906b536 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Wed, 8 Feb 2023 07:50:39 -0500 Subject: [PATCH 21/24] fix windows compilation error --- apps/lbmMultiRes/lbmMultiRes.cu | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index 3e897dde..e2edfcf9 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -639,20 +639,20 @@ int main(int argc, char** argv) const Neon::domain::mGridDescriptor descriptor(depth); - //const Neon::index_3d grid_dim(144, 144, 144); - //float levelSDF[depth + 1]; - //levelSDF[0] = 0; - //levelSDF[1] = -28.0 / 144.0; - //levelSDF[2] = -56.0 / 144.0; - //levelSDF[3] = -1.0; - - const Neon::index_3d grid_dim(48, 48, 48); + const Neon::index_3d grid_dim(144, 144, 144); float levelSDF[depth + 1]; levelSDF[0] = 0; - levelSDF[1] = -8 / 24.0; - levelSDF[2] = -16 / 24.0; + levelSDF[1] = -28.0 / 144.0; + levelSDF[2] = -56.0 / 144.0; levelSDF[3] = -1.0; + //const Neon::index_3d grid_dim(48, 48, 48); + //float levelSDF[depth + 1]; + //levelSDF[0] = 0; + //levelSDF[1] = -8 / 24.0; + //levelSDF[2] = -16 / 24.0; + //levelSDF[3] = -1.0; + Neon::domain::mGrid grid( backend, grid_dim, @@ -690,10 +690,12 @@ int main(int argc, char** argv) //init fields for (int level = 0; level < descriptor.getDepth(); ++level) { + constexpr auto t = latticeWeight(); + auto container = grid.getContainer( "Init_" + std::to_string(level), level, - [&fin, &fout, &cellType, &vel, &rho, level, grid_dim, ulid, Q](Neon::set::Loader& loader) { + [&fin, &fout, &cellType, &vel, &rho, level, grid_dim, ulid, Q, t](Neon::set::Loader& loader) { auto& in = fin.load(loader, level, Neon::MultiResCompute::MAP); auto& out = fout.load(loader, level, Neon::MultiResCompute::MAP); auto& type = cellType.load(loader, level, Neon::MultiResCompute::MAP); @@ -701,8 +703,6 @@ int main(int argc, char** argv) auto& rh = rho.load(loader, level, Neon::MultiResCompute::MAP); return [=] NEON_CUDA_HOST_DEVICE(const typename Neon::domain::bGrid::Cell& cell) mutable { - constexpr auto t = latticeWeight(); - //velocity and density u(cell, 0) = 0; u(cell, 1) = 0; @@ -769,7 +769,7 @@ int main(int argc, char** argv) for (int t = 0; t < max_iter; ++t) { printf("\n Iteration = %d", t); skl.run(); - if (t % 20 == 0) { + if (t % 100 == 0) { postProcess(grid, descriptor.getDepth(), fout, cellType, t, vel, rho); } } From da517f8eb77eb6716535222b23c9cd681655abc8 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Thu, 9 Feb 2023 12:00:55 -0500 Subject: [PATCH 22/24] bug fix in store() --- apps/lbmMultiRes/lbmMultiRes.cu | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index e2edfcf9..e4a8634a 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -405,6 +405,7 @@ Neon::set::Container store(Neon::domain::mGrid& grid, for (int8_t p = 0; p < Q; ++p) { const Neon::int8_3d p_dir = getDir(p); + const auto p_cell = fpost_col.getNghCell(cell, p_dir); //relative direction of q w.r.t p //i.e., in which direction we should move starting from p to land on q const Neon::int8_3d r_dir = q_dir - p_dir; @@ -422,8 +423,11 @@ Neon::set::Container store(Neon::domain::mGrid& grid, //for c (this is what we do for explosion but here we do this just for the check) const Neon::int8_3d cq = uncleOffset(c, q_dir); if (cq == r_dir) { - num++; - sum += fpost_col.childVal(cell, c, q, 0).value; + auto childVal = fpost_col.childVal(p_cell, c, q, 0); + if (childVal.isValid) { + num++; + sum += childVal.value; + } } } } @@ -647,31 +651,35 @@ int main(int argc, char** argv) levelSDF[3] = -1.0; //const Neon::index_3d grid_dim(48, 48, 48); - //float levelSDF[depth + 1]; + //float levelSDF[depth + 1]; //levelSDF[0] = 0; //levelSDF[1] = -8 / 24.0; //levelSDF[2] = -16 / 24.0; //levelSDF[3] = -1.0; + //const Neon::index_3d grid_dim(12, 12, 4); Neon::domain::mGrid grid( backend, grid_dim, {[&](const Neon::index_3d id) -> bool { return sdfCube(id, grid_dim - 1) <= levelSDF[0] && sdfCube(id, grid_dim - 1) > levelSDF[1]; + //return id.x < 4 && id.z == 0; }, [&](const Neon::index_3d& id) -> bool { return sdfCube(id, grid_dim - 1) <= levelSDF[1] && sdfCube(id, grid_dim - 1) > levelSDF[2]; + //return id.x >= 4 && id.x < 8 && id.z == 0; }, [&](const Neon::index_3d& id) -> bool { return sdfCube(id, grid_dim - 1) <= levelSDF[2] && sdfCube(id, grid_dim - 1) > levelSDF[3]; + //return id.z == 0; }}, Neon::domain::Stencil::s19_t(false), descriptor); //LBM problem - const int max_iter = 20000; + const int max_iter = 200000; const T ulb = 0.02; const T Re = 100; const T clength = (grid_dim.x * std::pow(2, -(descriptor.getDepth() - 1)) - 1); @@ -752,6 +760,8 @@ int main(int argc, char** argv) grid.getBackend().syncAll(); + //vel.ioToVtk("test"); + //skeleton std::vector containers; nonUniformTimestepRecursive(grid, @@ -769,7 +779,7 @@ int main(int argc, char** argv) for (int t = 0; t < max_iter; ++t) { printf("\n Iteration = %d", t); skl.run(); - if (t % 100 == 0) { + if (t % 200 == 0) { postProcess(grid, descriptor.getDepth(), fout, cellType, t, vel, rho); } } From 7623d74765343b785d37dacba759f3ed594eb7f6 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Thu, 9 Feb 2023 15:57:52 -0500 Subject: [PATCH 23/24] parameter tuning --- apps/lbmMultiRes/lbmMultiRes.cu | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/apps/lbmMultiRes/lbmMultiRes.cu b/apps/lbmMultiRes/lbmMultiRes.cu index e4a8634a..099d8f1a 100644 --- a/apps/lbmMultiRes/lbmMultiRes.cu +++ b/apps/lbmMultiRes/lbmMultiRes.cu @@ -643,11 +643,11 @@ int main(int argc, char** argv) const Neon::domain::mGridDescriptor descriptor(depth); - const Neon::index_3d grid_dim(144, 144, 144); + const Neon::index_3d grid_dim(160, 160, 160); float levelSDF[depth + 1]; levelSDF[0] = 0; - levelSDF[1] = -28.0 / 144.0; - levelSDF[2] = -56.0 / 144.0; + levelSDF[1] = -31.0 / 160.0; + levelSDF[2] = -64 / 160.0; levelSDF[3] = -1.0; //const Neon::index_3d grid_dim(48, 48, 48); @@ -657,32 +657,27 @@ int main(int argc, char** argv) //levelSDF[2] = -16 / 24.0; //levelSDF[3] = -1.0; - //const Neon::index_3d grid_dim(12, 12, 4); - Neon::domain::mGrid grid( backend, grid_dim, {[&](const Neon::index_3d id) -> bool { return sdfCube(id, grid_dim - 1) <= levelSDF[0] && sdfCube(id, grid_dim - 1) > levelSDF[1]; - //return id.x < 4 && id.z == 0; }, [&](const Neon::index_3d& id) -> bool { return sdfCube(id, grid_dim - 1) <= levelSDF[1] && sdfCube(id, grid_dim - 1) > levelSDF[2]; - //return id.x >= 4 && id.x < 8 && id.z == 0; }, [&](const Neon::index_3d& id) -> bool { return sdfCube(id, grid_dim - 1) <= levelSDF[2] && sdfCube(id, grid_dim - 1) > levelSDF[3]; - //return id.z == 0; }}, Neon::domain::Stencil::s19_t(false), descriptor); //LBM problem const int max_iter = 200000; - const T ulb = 0.02; - const T Re = 100; - const T clength = (grid_dim.x * std::pow(2, -(descriptor.getDepth() - 1)) - 1); + const T ulb = 0.04; + const T Re = 1000; + const T clength = T(grid.getDimension(descriptor.getDepth() - 1).x); const T visclb = ulb * clength / Re; const T omega = 1.0 / (3. * visclb + 0.5); const Neon::double_3d ulid(ulb, 0., 0.); @@ -694,8 +689,7 @@ int main(int argc, char** argv) auto vel = grid.newField("vel", 3, 0); auto rho = grid.newField("rho", 1, 0); - - + //init fields for (int level = 0; level < descriptor.getDepth(); ++level) { constexpr auto t = latticeWeight(); @@ -760,8 +754,6 @@ int main(int argc, char** argv) grid.getBackend().syncAll(); - //vel.ioToVtk("test"); - //skeleton std::vector containers; nonUniformTimestepRecursive(grid, From 5631a0cbd532ae26c962b23920f6bbb21a99bdf7 Mon Sep 17 00:00:00 2001 From: Ahmed Mahmoud Date: Tue, 14 Feb 2023 08:09:46 -0500 Subject: [PATCH 24/24] docs --- .../Neon/domain/internal/mGrid/mPartition.h | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h index ab9d3042..5921d36c 100644 --- a/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h +++ b/libNeonDomain/include/Neon/domain/internal/mGrid/mPartition.h @@ -83,16 +83,14 @@ class mPartition : public Neon::domain::internal::bGrid::bPartition /** * Check if the cell is refined i.e., has children - * @param cell the cell i.e., parent at which the children are checked - * @return + * @param cell the cell i.e., parent at which the children are checked */ NEON_CUDA_HOST_DEVICE inline auto hasChildren(const Cell& cell) const -> bool; /** * Check if a neighbor to 'cell' has children - * @param cell - * @param cell - * @return + * @param cell the main cell + * @param nghDir the direction relative to cell */ NEON_CUDA_HOST_DEVICE inline auto hasChildren(const Cell& cell, const Neon::int8_3d nghDir) const -> bool; @@ -125,9 +123,23 @@ class mPartition : public Neon::domain::internal::bGrid::bPartition */ NEON_CUDA_HOST_DEVICE inline auto hasParent(const Cell& cell) const -> bool; + /** + * The uncle of a cell at level L is a cell at level L+1 and is a neighbor to the cell's parent. + * This function returns the uncle of a given cell in a certain direction w.r.t the cell's parent + * @param cell the main cell at level L + * @param direction the direction w.r.t the parent of cell + */ NEON_CUDA_HOST_DEVICE inline auto getUncle(const Cell& cell, Neon::int8_3d direction) const -> Cell; + /** + * The uncle of a cell at level L is a cell at level L+1 and is a neighbor to the cell's parent. + * This function returns the value of a give cell in a certain direction w.r.t the cell's parent along a certain cardinality. + * @param cell the main cell at level L + * @param direction the direction w.r.t the parent of cell + * @param card the cardinality + * @param alternativeVal alternative value in case the uncle does not exist. + */ NEON_CUDA_HOST_DEVICE inline auto uncleVal(const Cell& cell, Neon::int8_3d direction, int card,