Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

external force density in first integration step #3144

Merged
merged 12 commits into from
Sep 11, 2019
2 changes: 1 addition & 1 deletion samples/billiard.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def main():
mass = np.array([0.17])

num_types = len(wca_sig)

def mix_eps(eps1, eps2, rule='LB'):
return math.sqrt(eps1 * eps2)

Expand Down
113 changes: 57 additions & 56 deletions src/core/grid_based_algorithms/lb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,45 @@ static double fluidstep = 0.0;
#include "grid.hpp"

/********************** The Main LB Part *************************************/
std::vector<LB_FluidNode>
lb_get_initialized_fields(LB_Parameters const &lb_parameters,
Lattice const &lb_lattice) {
std::vector<LB_FluidNode> fields(lb_lattice.halo_grid_volume);
for (auto &field : fields) {
field.force_density = lb_parameters.ext_force_density;
#ifdef LB_BOUNDARIES
field.boundary = false;
#endif // LB_BOUNDARIES
}
return fields;
}

/** (Re-)allocate memory for the fluid and initialize pointers. */
void lb_realloc_fluid(LB_FluidData &lb_fluid_a, LB_FluidData &lb_fluid_b,
const Lattice::index_t halo_grid_volume,
LB_Fluid &lb_fluid, LB_Fluid &lb_fluid_post) {
const std::array<int, 2> size = {{D3Q19::n_vel, halo_grid_volume}};

lb_fluid_a.resize(size);
lb_fluid_b.resize(size);

using Utils::Span;
for (int i = 0; i < size[0]; i++) {
lb_fluid[i] = Span<double>(lb_fluid_a[i].origin(), size[1]);
lb_fluid_post[i] = Span<double>(lb_fluid_b[i].origin(), size[1]);
}
}

void lb_set_equilibrium_populations(const Lattice &lb_lattice,
const LB_Parameters &lb_parameters) {
for (Lattice::index_t index = 0; index < lb_lattice.halo_grid_volume;
++index) {
lb_set_population_from_density_momentum_density_stress(
index, lb_parameters.density, Utils::Vector3d{} /*momentum density*/,
Utils::Vector6d{} /*stress*/);
}
}

void lb_init(const LB_Parameters &lb_parameters) {
if (lb_parameters.agrid <= 0.0) {
runtimeErrorMsg()
Expand All @@ -183,7 +222,6 @@ void lb_init(const LB_Parameters &lb_parameters) {
return;

/* initialize the local lattice domain */

try {
lblattice = Lattice(lb_parameters.agrid, 0.5 /*offset*/, 1 /*halo size*/,
local_geo.length(), local_geo.my_right(),
Expand All @@ -195,40 +233,28 @@ void lb_init(const LB_Parameters &lb_parameters) {

/* allocate memory for data structures */
lb_realloc_fluid(lbfluid_a, lbfluid_b, lblattice.halo_grid_volume, lbfluid,
lbfluid_post, lbfields);
lbfluid_post);

lbfields = lb_get_initialized_fields(lbpar, lblattice);

/* prepare the halo communication */
lb_prepare_communication(update_halo_comm, lblattice);

/* initialize derived parameters */
lb_reinit_parameters(lbpar);

/* setup the initial populations */
lb_reinit_fluid(lbfields, lblattice, lbpar);
}

void lb_reinit_fluid(std::vector<LB_FluidNode> &lb_fields,
const Lattice &lb_lattice,
const LB_Parameters &lb_parameters) {
std::fill(lb_fields.begin(), lb_fields.end(), LB_FluidNode());
/* default values for fields in lattice units */
Utils::Vector3d momentum_density{};
Utils::Vector6d stress{};

for (Lattice::index_t index = 0; index < lb_lattice.halo_grid_volume;
++index) {
// sets equilibrium distribution
lb_set_population_from_density_momentum_density_stress(
index, lb_parameters.density, momentum_density, stress);

#ifdef LB_BOUNDARIES
lb_fields[index].boundary = 0;
#endif // LB_BOUNDARIES
}
lb_set_equilibrium_populations(lblattice, lbpar);

#ifdef LB_BOUNDARIES
LBBoundaries::lb_init_boundaries();
#endif // LB_BOUNDARIES
#endif
}

void lb_reinit_fluid(std::vector<LB_FluidNode> &lb_fields,
Lattice const &lb_lattice,
LB_Parameters const &lb_parameters) {
lb_set_equilibrium_populations(lb_lattice, lb_parameters);
lb_fields = lb_get_initialized_fields(lb_parameters, lb_lattice);
}

void lb_reinit_parameters(LB_Parameters &lb_parameters) {
Expand Down Expand Up @@ -589,25 +615,6 @@ void lb_fluid_set_rng_state(uint64_t counter) {

/***********************************************************************/

/** (Re-)allocate memory for the fluid and initialize pointers. */
void lb_realloc_fluid(LB_FluidData &lb_fluid_a, LB_FluidData &lb_fluid_b,
const Lattice::index_t halo_grid_volume,
LB_Fluid &lb_fluid, LB_Fluid &lb_fluid_post,
std::vector<LB_FluidNode> &lb_fields) {
const std::array<int, 2> size = {{D3Q19::n_vel, halo_grid_volume}};

lb_fluid_a.resize(size);
lb_fluid_b.resize(size);

using Utils::Span;
for (int i = 0; i < size[0]; i++) {
lb_fluid[i] = Span<double>(lb_fluid_a[i].origin(), size[1]);
lb_fluid_post[i] = Span<double>(lb_fluid_b[i].origin(), size[1]);
}

lb_fields.resize(halo_grid_volume);
}

/** Set up the structures for exchange of the halo regions.
* See also \ref halo.cpp
*/
Expand Down Expand Up @@ -940,18 +947,6 @@ inline void lb_collide_stream() {
}
#endif // LB_BOUNDARIES

#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
// Safeguard the node forces so that we can later use them for the IBM
// particle update
// In the following loop the lbfields[XX].force are reset to zero
// Safeguard the node forces so that we can later use them for the IBM
// particle update In the following loop the lbfields[XX].force are reset to
// zero
for (int i = 0; i < lblattice.halo_grid_volume; ++i) {
lbfields[i].force_density_buf = lbfields[i].force_density;
}
#endif

Lattice::index_t index = lblattice.halo_offset;
for (int z = 1; z <= lblattice.grid[2]; z++) {
for (int y = 1; y <= lblattice.grid[1]; y++) {
Expand All @@ -976,6 +971,12 @@ inline void lb_collide_stream() {
auto const modes_with_forces =
lb_apply_forces(index, thermalized_modes, lbpar, lbfields);

#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
// Safeguard the node forces so that we can later use them for the IBM
// particle update
lbfields[index].force_density_buf = lbfields[index].force_density;
#endif

/* reset the force density */
lbfields[index].force_density = lbpar.ext_force_density;

Expand Down
10 changes: 3 additions & 7 deletions src/core/grid_based_algorithms/lb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,6 @@ extern Lattice lblattice;

extern HaloCommunicator update_halo_comm;

void lb_realloc_fluid(boost::multi_array<double, 2> &lb_fluid_a,
boost::multi_array<double, 2> &lb_fluid_b,
Lattice::index_t halo_grid_volume,
std::array<Utils::Span<double>, 19> &lb_fluid,
std::array<Utils::Span<double>, 19> &lb_fluid_post,
std::vector<LB_FluidNode> &lb_fields);

void lb_init(const LB_Parameters &lb_parameters);

void lb_reinit_fluid(std::vector<LB_FluidNode> &lb_fields,
Expand Down Expand Up @@ -287,6 +280,9 @@ void lb_calc_fluid_momentum(double *result, const LB_Parameters &lb_parameters,
const std::vector<LB_FluidNode> &lb_fields,
const Lattice &lb_lattice);
void lb_collect_boundary_forces(double *result);
std::vector<LB_FluidNode>
lb_get_initialized_fields(LB_Parameters const &lb_parameters,
Lattice const &lb_lattice);

/*@}*/

Expand Down
13 changes: 13 additions & 0 deletions src/core/grid_based_algorithms/lb_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,17 @@ void mpi_lb_set_population(Utils::Vector3i const &index,

REGISTER_CALLBACK(mpi_lb_set_population)

void mpi_lb_set_force_density(Utils::Vector3i const &index,
Utils::Vector3d const &force_density) {
lb_set(index, [&](auto index) {
auto const linear_index =
get_linear_index(lblattice.local_index(index), lblattice.halo_grid);
lbfields[linear_index].force_density = force_density;
});
}

REGISTER_CALLBACK(mpi_lb_set_force_density)

auto mpi_lb_get_momentum_density(Utils::Vector3i const &index) {
return lb_calc_fluid_kernel(index, [&](auto modes, auto force_density) {
return lb_calc_momentum_density(modes, force_density);
Expand Down Expand Up @@ -1265,6 +1276,7 @@ void lb_lbnode_set_velocity(const Utils::Vector3i &ind,
lb_get_population_from_density_momentum_density_stress(
density, momentum_density, stress);
mpi_call_all(mpi_lb_set_population, ind, population);
mpi_call_all(mpi_lb_set_force_density, ind, Utils::Vector3d{});
}
}

Expand Down Expand Up @@ -1308,6 +1320,7 @@ void lb_lbfluid_on_lb_params_change(LBParam field) {
break;
case LBParam::VISCOSITY:
case LBParam::EXT_FORCE_DENSITY:
lbfields = lb_get_initialized_fields(lbpar, lblattice);
case LBParam::BULKVISC:
case LBParam::KT:
case LBParam::GAMMA_ODD:
Expand Down
4 changes: 0 additions & 4 deletions src/core/grid_based_algorithms/lbgpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,6 @@ void lb_init_boundaries_GPU(int n_lb_boundaries, int number_of_boundnodes,
int *host_boundary_index_list,
float *lb_bounday_velocity);
#endif
void lb_init_extern_nodeforcedensities_GPU(
int n_extern_node_force_densities,
LB_extern_nodeforcedensity_gpu *host_extern_node_force_densities,
LB_parameters_gpu *lbpar_gpu);

void lb_set_agrid_gpu(double agrid);

Expand Down
107 changes: 35 additions & 72 deletions src/core/grid_based_algorithms/lbgpu_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1070,7 +1070,7 @@ __device__ void apply_forces(unsigned int index, Utils::Array<float, 19> &mode,
* @param[in] node_f Local node force
* @param[in] index Node index / thread index
* @param[in] print_index Node index / thread index
* TODO: code duplication with \ref calc_values_from_m_in_LB_units
* TODO: code duplication with \ref calc_values_from_m
*/
__device__ void
calc_values_in_LB_units(LB_nodes_gpu n_a, Utils::Array<float, 19> &mode,
Expand Down Expand Up @@ -1188,10 +1188,9 @@ calc_values_in_LB_units(LB_nodes_gpu n_a, Utils::Array<float, 19> &mode,
* @param[out] j_out Momentum
* @param[out] pi_out Pressure tensor
*/
__device__ void
calc_values_from_m_in_LB_units(Utils::Array<float, 19> &mode_single,
LB_rho_v_gpu *d_v_single, float *rho_out,
float *j_out, float *pi_out) {
__device__ void calc_values_from_m(Utils::Array<float, 19> &mode_single,
LB_rho_v_gpu *d_v_single, float *rho_out,
float *j_out, float *pi_out) {
Utils::Array<float, 6> modes_from_pi_eq{};
Utils::Array<float, 6> j{};
float Rho;
Expand Down Expand Up @@ -1832,6 +1831,21 @@ __global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v,
}
}

__global__ void set_force_density(int single_nodeindex, float *force_density,
LB_node_force_density_gpu node_f) {
unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
blockDim.x * blockIdx.x + threadIdx.x;

if (index == 0) {
node_f.force_density[0 * para->number_of_nodes + single_nodeindex] =
force_density[0];
node_f.force_density[1 * para->number_of_nodes + single_nodeindex] =
force_density[1];
node_f.force_density[2 * para->number_of_nodes + single_nodeindex] =
force_density[2];
}
}

/** Kernel to calculate local populations from hydrodynamic fields
* from given flow field velocities. The mapping is given in terms of
* the equilibrium distribution.
Expand Down Expand Up @@ -1875,8 +1889,8 @@ __global__ void set_u_from_rho_v_pi(LB_nodes_gpu n_a, int single_nodeindex,
// Calculate the density, velocity, and pressure tensor
// in LB unit for this node

calc_values_from_m_in_LB_units(mode_for_pi, &d_v[single_nodeindex],
&rho_from_m, j_from_m, pi_from_m);
calc_values_from_m(mode_for_pi, &d_v[single_nodeindex], &rho_from_m,
j_from_m, pi_from_m);

// Take LB component density and calculate the equilibrium part
local_rho = rho_from_m;
Expand Down Expand Up @@ -2014,34 +2028,12 @@ __global__ void reinit_node_force(LB_node_force_density_gpu node_f) {
blockDim.x * blockIdx.x + threadIdx.x;

if (index < para->number_of_nodes) {
node_f.force_density[0 * para->number_of_nodes + index] = 0.0f;
node_f.force_density[1 * para->number_of_nodes + index] = 0.0f;
node_f.force_density[2 * para->number_of_nodes + index] = 0.0f;
}
}

/** Set external force on single nodes kernel
* @param[in] n_extern_node_force_densities Number of nodes
* @param[in] extern_node_force_densities External node force array
* @param[out] node_f Node force struct
*/
__global__ void init_extern_node_force_densities(
int n_extern_node_force_densities,
LB_extern_nodeforcedensity_gpu *extern_node_force_densities,
LB_node_force_density_gpu node_f) {
unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
blockDim.x * blockIdx.x + threadIdx.x;
float factor = powf(para->agrid, 2) * para->tau * para->tau;
if (index < n_extern_node_force_densities) {
node_f.force_density[0 * para->number_of_nodes +
extern_node_force_densities[index].index] =
extern_node_force_densities[index].force_density[0] * factor;
node_f.force_density[1 * para->number_of_nodes +
extern_node_force_densities[index].index] =
extern_node_force_densities[index].force_density[1] * factor;
node_f.force_density[2 * para->number_of_nodes +
extern_node_force_densities[index].index] =
extern_node_force_densities[index].force_density[2] * factor;
node_f.force_density[0 * para->number_of_nodes + index] =
para->ext_force_density[0];
node_f.force_density[1 * para->number_of_nodes + index] =
para->ext_force_density[1];
node_f.force_density[2 * para->number_of_nodes + index] =
para->ext_force_density[2];
}
}

Expand Down Expand Up @@ -2583,42 +2575,6 @@ void lb_reinit_extern_nodeforce_GPU(LB_parameters_gpu *lbpar_gpu) {

KERNELCALL(reinit_node_force, dim_grid, threads_per_block, node_f);
}
/** Setup and call extern single node force initialization from the host
* @param n_extern_node_force_densities Number of nodes on which the
* external force has to be applied
* @param host_extern_node_force_densities Host extern node forces
* @param lbpar_gpu Host parameter struct
*/
void lb_init_extern_nodeforcedensities_GPU(
int n_extern_node_force_densities,
LB_extern_nodeforcedensity_gpu *host_extern_node_force_densities,
LB_parameters_gpu *lbpar_gpu) {

size_of_extern_node_force_densities =
n_extern_node_force_densities * sizeof(LB_extern_nodeforcedensity_gpu);
cuda_safe_mem(cudaMalloc((void **)&extern_node_force_densities,
size_of_extern_node_force_densities));
cuda_safe_mem(
cudaMemcpy(extern_node_force_densities, host_extern_node_force_densities,
size_of_extern_node_force_densities, cudaMemcpyHostToDevice));

cuda_safe_mem(cudaMemcpyToSymbol(HIP_SYMBOL(para), lbpar_gpu,
sizeof(LB_parameters_gpu)));

int threads_per_block_exf = 64;
int blocks_per_grid_exf_y = 4;
int blocks_per_grid_exf_x =
(n_extern_node_force_densities +
threads_per_block_exf * blocks_per_grid_exf_y - 1) /
(threads_per_block_exf * blocks_per_grid_exf_y);
dim3 dim_grid_exf =
make_uint3(blocks_per_grid_exf_x, blocks_per_grid_exf_y, 1);

KERNELCALL(init_extern_node_force_densities, dim_grid_exf,
threads_per_block_exf, n_extern_node_force_densities,
extern_node_force_densities, node_f);
cudaFree(extern_node_force_densities);
}

/** Setup and call particle kernel from the host
* @tparam no_of_neighbours The number of neighbours to consider for
Expand Down Expand Up @@ -2856,8 +2812,15 @@ void lb_set_node_velocity_GPU(int single_nodeindex, float *host_velocity) {
KERNELCALL(set_u_from_rho_v_pi, dim_grid_flag, threads_per_block_flag,
*current_nodes, single_nodeindex, device_velocity, device_rho_v,
node_f);

float force_density[3] = {0.0f, 0.0f, 0.0f};
float *device_force_density;
cuda_safe_mem(cudaMalloc((void **)&device_force_density, 3 * sizeof(float)));
cuda_safe_mem(cudaMemcpy(device_force_density, force_density,
3 * sizeof(float), cudaMemcpyHostToDevice));
KERNELCALL(set_force_density, dim_grid_flag, threads_per_block_flag,
single_nodeindex, device_force_density, node_f);
cudaFree(device_velocity);
cudaFree(device_force_density);
}

/** Reinitialize parameters
Expand Down
Loading