espressomd · bors · Sep 11, 2019 · Sep 9, 2019 · Sep 9, 2019 · Sep 9, 2019
diff --git a/samples/billiard.py b/samples/billiard.py
@@ -186,7 +186,7 @@ def main():
     mass = np.array([0.17])
 
     num_types = len(wca_sig)
-    
+
     def mix_eps(eps1, eps2, rule='LB'):
         return math.sqrt(eps1 * eps2)
 

diff --git a/src/core/grid_based_algorithms/lb.cpp b/src/core/grid_based_algorithms/lb.cpp
@@ -174,6 +174,45 @@ static double fluidstep = 0.0;
 #include "grid.hpp"
 
 /********************** The Main LB Part *************************************/
+std::vector<LB_FluidNode>
+lb_get_initialized_fields(LB_Parameters const &lb_parameters,
+                          Lattice const &lb_lattice) {
+  std::vector<LB_FluidNode> fields(lb_lattice.halo_grid_volume);
+  for (auto &field : fields) {
+    field.force_density = lb_parameters.ext_force_density;
+#ifdef LB_BOUNDARIES
+    field.boundary = false;
+#endif // LB_BOUNDARIES
+  }
+  return fields;
+}
+
+/** (Re-)allocate memory for the fluid and initialize pointers. */
+void lb_realloc_fluid(LB_FluidData &lb_fluid_a, LB_FluidData &lb_fluid_b,
+                      const Lattice::index_t halo_grid_volume,
+                      LB_Fluid &lb_fluid, LB_Fluid &lb_fluid_post) {
+  const std::array<int, 2> size = {{D3Q19::n_vel, halo_grid_volume}};
+
+  lb_fluid_a.resize(size);
+  lb_fluid_b.resize(size);
+
+  using Utils::Span;
+  for (int i = 0; i < size[0]; i++) {
+    lb_fluid[i] = Span<double>(lb_fluid_a[i].origin(), size[1]);
+    lb_fluid_post[i] = Span<double>(lb_fluid_b[i].origin(), size[1]);
+  }
+}
+
+void lb_set_equilibrium_populations(const Lattice &lb_lattice,
+                                    const LB_Parameters &lb_parameters) {
+  for (Lattice::index_t index = 0; index < lb_lattice.halo_grid_volume;
+       ++index) {
+    lb_set_population_from_density_momentum_density_stress(
+        index, lb_parameters.density, Utils::Vector3d{} /*momentum density*/,
+        Utils::Vector6d{} /*stress*/);
+  }
+}
+
 void lb_init(const LB_Parameters &lb_parameters) {
   if (lb_parameters.agrid <= 0.0) {
     runtimeErrorMsg()
@@ -183,7 +222,6 @@ void lb_init(const LB_Parameters &lb_parameters) {
     return;
 
   /* initialize the local lattice domain */
-
   try {
     lblattice = Lattice(lb_parameters.agrid, 0.5 /*offset*/, 1 /*halo size*/,
                         local_geo.length(), local_geo.my_right(),
@@ -195,40 +233,28 @@ void lb_init(const LB_Parameters &lb_parameters) {
 
   /* allocate memory for data structures */
   lb_realloc_fluid(lbfluid_a, lbfluid_b, lblattice.halo_grid_volume, lbfluid,
-                   lbfluid_post, lbfields);
+                   lbfluid_post);
+
+  lbfields = lb_get_initialized_fields(lbpar, lblattice);
 
   /* prepare the halo communication */
   lb_prepare_communication(update_halo_comm, lblattice);
 
   /* initialize derived parameters */
   lb_reinit_parameters(lbpar);
 
-  /* setup the initial populations */
-  lb_reinit_fluid(lbfields, lblattice, lbpar);
-}
-
-void lb_reinit_fluid(std::vector<LB_FluidNode> &lb_fields,
-                     const Lattice &lb_lattice,
-                     const LB_Parameters &lb_parameters) {
-  std::fill(lb_fields.begin(), lb_fields.end(), LB_FluidNode());
-  /* default values for fields in lattice units */
-  Utils::Vector3d momentum_density{};
-  Utils::Vector6d stress{};
-
-  for (Lattice::index_t index = 0; index < lb_lattice.halo_grid_volume;
-       ++index) {
-    // sets equilibrium distribution
-    lb_set_population_from_density_momentum_density_stress(
-        index, lb_parameters.density, momentum_density, stress);
-
-#ifdef LB_BOUNDARIES
-    lb_fields[index].boundary = 0;
-#endif // LB_BOUNDARIES
-  }
+  lb_set_equilibrium_populations(lblattice, lbpar);
 
 #ifdef LB_BOUNDARIES
   LBBoundaries::lb_init_boundaries();
-#endif // LB_BOUNDARIES
+#endif
+}
+
+void lb_reinit_fluid(std::vector<LB_FluidNode> &lb_fields,
+                     Lattice const &lb_lattice,
+                     LB_Parameters const &lb_parameters) {
+  lb_set_equilibrium_populations(lb_lattice, lb_parameters);
+  lb_fields = lb_get_initialized_fields(lb_parameters, lb_lattice);
 }
 
 void lb_reinit_parameters(LB_Parameters &lb_parameters) {
@@ -589,25 +615,6 @@ void lb_fluid_set_rng_state(uint64_t counter) {
 
 /***********************************************************************/
 
-/** (Re-)allocate memory for the fluid and initialize pointers. */
-void lb_realloc_fluid(LB_FluidData &lb_fluid_a, LB_FluidData &lb_fluid_b,
-                      const Lattice::index_t halo_grid_volume,
-                      LB_Fluid &lb_fluid, LB_Fluid &lb_fluid_post,
-                      std::vector<LB_FluidNode> &lb_fields) {
-  const std::array<int, 2> size = {{D3Q19::n_vel, halo_grid_volume}};
-
-  lb_fluid_a.resize(size);
-  lb_fluid_b.resize(size);
-
-  using Utils::Span;
-  for (int i = 0; i < size[0]; i++) {
-    lb_fluid[i] = Span<double>(lb_fluid_a[i].origin(), size[1]);
-    lb_fluid_post[i] = Span<double>(lb_fluid_b[i].origin(), size[1]);
-  }
-
-  lb_fields.resize(halo_grid_volume);
-}
-
 /** Set up the structures for exchange of the halo regions.
  *  See also \ref halo.cpp
  */
@@ -940,18 +947,6 @@ inline void lb_collide_stream() {
   }
 #endif // LB_BOUNDARIES
 
-#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
-  // Safeguard the node forces so that we can later use them for the IBM
-  // particle update
-  // In the following loop the lbfields[XX].force are reset to zero
-  // Safeguard the node forces so that we can later use them for the IBM
-  // particle update In the following loop the lbfields[XX].force are reset to
-  // zero
-  for (int i = 0; i < lblattice.halo_grid_volume; ++i) {
-    lbfields[i].force_density_buf = lbfields[i].force_density;
-  }
-#endif
-
   Lattice::index_t index = lblattice.halo_offset;
   for (int z = 1; z <= lblattice.grid[2]; z++) {
     for (int y = 1; y <= lblattice.grid[1]; y++) {
@@ -976,6 +971,12 @@ inline void lb_collide_stream() {
           auto const modes_with_forces =
               lb_apply_forces(index, thermalized_modes, lbpar, lbfields);
 
+#ifdef VIRTUAL_SITES_INERTIALESS_TRACERS
+          // Safeguard the node forces so that we can later use them for the IBM
+          // particle update
+          lbfields[index].force_density_buf = lbfields[index].force_density;
+#endif
+
           /* reset the force density */
           lbfields[index].force_density = lbpar.ext_force_density;
 

diff --git a/src/core/grid_based_algorithms/lb.hpp b/src/core/grid_based_algorithms/lb.hpp
@@ -143,13 +143,6 @@ extern Lattice lblattice;
 
 extern HaloCommunicator update_halo_comm;
 
-void lb_realloc_fluid(boost::multi_array<double, 2> &lb_fluid_a,
-                      boost::multi_array<double, 2> &lb_fluid_b,
-                      Lattice::index_t halo_grid_volume,
-                      std::array<Utils::Span<double>, 19> &lb_fluid,
-                      std::array<Utils::Span<double>, 19> &lb_fluid_post,
-                      std::vector<LB_FluidNode> &lb_fields);
-
 void lb_init(const LB_Parameters &lb_parameters);
 
 void lb_reinit_fluid(std::vector<LB_FluidNode> &lb_fields,
@@ -287,6 +280,9 @@ void lb_calc_fluid_momentum(double *result, const LB_Parameters &lb_parameters,
                             const std::vector<LB_FluidNode> &lb_fields,
                             const Lattice &lb_lattice);
 void lb_collect_boundary_forces(double *result);
+std::vector<LB_FluidNode>
+lb_get_initialized_fields(LB_Parameters const &lb_parameters,
+                          Lattice const &lb_lattice);
 
 /*@}*/
 

diff --git a/src/core/grid_based_algorithms/lb_interface.cpp b/src/core/grid_based_algorithms/lb_interface.cpp
@@ -89,6 +89,17 @@ void mpi_lb_set_population(Utils::Vector3i const &index,
 
 REGISTER_CALLBACK(mpi_lb_set_population)
 
+void mpi_lb_set_force_density(Utils::Vector3i const &index,
+                              Utils::Vector3d const &force_density) {
+  lb_set(index, [&](auto index) {
+    auto const linear_index =
+        get_linear_index(lblattice.local_index(index), lblattice.halo_grid);
+    lbfields[linear_index].force_density = force_density;
+  });
+}
+
+REGISTER_CALLBACK(mpi_lb_set_force_density)
+
 auto mpi_lb_get_momentum_density(Utils::Vector3i const &index) {
   return lb_calc_fluid_kernel(index, [&](auto modes, auto force_density) {
     return lb_calc_momentum_density(modes, force_density);
@@ -1265,6 +1276,7 @@ void lb_lbnode_set_velocity(const Utils::Vector3i &ind,
         lb_get_population_from_density_momentum_density_stress(
             density, momentum_density, stress);
     mpi_call_all(mpi_lb_set_population, ind, population);
+    mpi_call_all(mpi_lb_set_force_density, ind, Utils::Vector3d{});
   }
 }
 
@@ -1308,6 +1320,7 @@ void lb_lbfluid_on_lb_params_change(LBParam field) {
     break;
   case LBParam::VISCOSITY:
   case LBParam::EXT_FORCE_DENSITY:
+    lbfields = lb_get_initialized_fields(lbpar, lblattice);
   case LBParam::BULKVISC:
   case LBParam::KT:
   case LBParam::GAMMA_ODD:

diff --git a/src/core/grid_based_algorithms/lbgpu.hpp b/src/core/grid_based_algorithms/lbgpu.hpp
@@ -213,10 +213,6 @@ void lb_init_boundaries_GPU(int n_lb_boundaries, int number_of_boundnodes,
                             int *host_boundary_index_list,
                             float *lb_bounday_velocity);
 #endif
-void lb_init_extern_nodeforcedensities_GPU(
-    int n_extern_node_force_densities,
-    LB_extern_nodeforcedensity_gpu *host_extern_node_force_densities,
-    LB_parameters_gpu *lbpar_gpu);
 
 void lb_set_agrid_gpu(double agrid);
 

diff --git a/src/core/grid_based_algorithms/lbgpu_cuda.cu b/src/core/grid_based_algorithms/lbgpu_cuda.cu
@@ -1070,7 +1070,7 @@ __device__ void apply_forces(unsigned int index, Utils::Array<float, 19> &mode,
  *  @param[in]  node_f  Local node force
  *  @param[in]  index   Node index / thread index
  *  @param[in]  print_index  Node index / thread index
- *  TODO: code duplication with \ref calc_values_from_m_in_LB_units
+ *  TODO: code duplication with \ref calc_values_from_m
  */
 __device__ void
 calc_values_in_LB_units(LB_nodes_gpu n_a, Utils::Array<float, 19> &mode,
@@ -1188,10 +1188,9 @@ calc_values_in_LB_units(LB_nodes_gpu n_a, Utils::Array<float, 19> &mode,
  *  @param[out] j_out         Momentum
  *  @param[out] pi_out        Pressure tensor
  */
-__device__ void
-calc_values_from_m_in_LB_units(Utils::Array<float, 19> &mode_single,
-                               LB_rho_v_gpu *d_v_single, float *rho_out,
-                               float *j_out, float *pi_out) {
+__device__ void calc_values_from_m(Utils::Array<float, 19> &mode_single,
+                                   LB_rho_v_gpu *d_v_single, float *rho_out,
+                                   float *j_out, float *pi_out) {
   Utils::Array<float, 6> modes_from_pi_eq{};
   Utils::Array<float, 6> j{};
   float Rho;
@@ -1832,6 +1831,21 @@ __global__ void calc_n_from_rho_j_pi(LB_nodes_gpu n_a, LB_rho_v_gpu *d_v,
   }
 }
 
+__global__ void set_force_density(int single_nodeindex, float *force_density,
+                                  LB_node_force_density_gpu node_f) {
+  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
+                       blockDim.x * blockIdx.x + threadIdx.x;
+
+  if (index == 0) {
+    node_f.force_density[0 * para->number_of_nodes + single_nodeindex] =
+        force_density[0];
+    node_f.force_density[1 * para->number_of_nodes + single_nodeindex] =
+        force_density[1];
+    node_f.force_density[2 * para->number_of_nodes + single_nodeindex] =
+        force_density[2];
+  }
+}
+
 /** Kernel to calculate local populations from hydrodynamic fields
  *  from given flow field velocities. The mapping is given in terms of
  *  the equilibrium distribution.
@@ -1875,8 +1889,8 @@ __global__ void set_u_from_rho_v_pi(LB_nodes_gpu n_a, int single_nodeindex,
     // Calculate the density, velocity, and pressure tensor
     // in LB unit for this node
 
-    calc_values_from_m_in_LB_units(mode_for_pi, &d_v[single_nodeindex],
-                                   &rho_from_m, j_from_m, pi_from_m);
+    calc_values_from_m(mode_for_pi, &d_v[single_nodeindex], &rho_from_m,
+                       j_from_m, pi_from_m);
 
     // Take LB component density and calculate the equilibrium part
     local_rho = rho_from_m;
@@ -2014,34 +2028,12 @@ __global__ void reinit_node_force(LB_node_force_density_gpu node_f) {
                        blockDim.x * blockIdx.x + threadIdx.x;
 
   if (index < para->number_of_nodes) {
-    node_f.force_density[0 * para->number_of_nodes + index] = 0.0f;
-    node_f.force_density[1 * para->number_of_nodes + index] = 0.0f;
-    node_f.force_density[2 * para->number_of_nodes + index] = 0.0f;
-  }
-}
-
-/** Set external force on single nodes kernel
- *  @param[in]  n_extern_node_force_densities  Number of nodes
- *  @param[in]  extern_node_force_densities    External node force array
- *  @param[out] node_f                         Node force struct
- */
-__global__ void init_extern_node_force_densities(
-    int n_extern_node_force_densities,
-    LB_extern_nodeforcedensity_gpu *extern_node_force_densities,
-    LB_node_force_density_gpu node_f) {
-  unsigned int index = blockIdx.y * gridDim.x * blockDim.x +
-                       blockDim.x * blockIdx.x + threadIdx.x;
-  float factor = powf(para->agrid, 2) * para->tau * para->tau;
-  if (index < n_extern_node_force_densities) {
-    node_f.force_density[0 * para->number_of_nodes +
-                         extern_node_force_densities[index].index] =
-        extern_node_force_densities[index].force_density[0] * factor;
-    node_f.force_density[1 * para->number_of_nodes +
-                         extern_node_force_densities[index].index] =
-        extern_node_force_densities[index].force_density[1] * factor;
-    node_f.force_density[2 * para->number_of_nodes +
-                         extern_node_force_densities[index].index] =
-        extern_node_force_densities[index].force_density[2] * factor;
+    node_f.force_density[0 * para->number_of_nodes + index] =
+        para->ext_force_density[0];
+    node_f.force_density[1 * para->number_of_nodes + index] =
+        para->ext_force_density[1];
+    node_f.force_density[2 * para->number_of_nodes + index] =
+        para->ext_force_density[2];
   }
 }
 
@@ -2583,42 +2575,6 @@ void lb_reinit_extern_nodeforce_GPU(LB_parameters_gpu *lbpar_gpu) {
 
   KERNELCALL(reinit_node_force, dim_grid, threads_per_block, node_f);
 }
-/** Setup and call extern single node force initialization from the host
- *  @param n_extern_node_force_densities     Number of nodes on which the
- *                                           external force has to be applied
- *  @param host_extern_node_force_densities  Host extern node forces
- *  @param lbpar_gpu                         Host parameter struct
- */
-void lb_init_extern_nodeforcedensities_GPU(
-    int n_extern_node_force_densities,
-    LB_extern_nodeforcedensity_gpu *host_extern_node_force_densities,
-    LB_parameters_gpu *lbpar_gpu) {
-
-  size_of_extern_node_force_densities =
-      n_extern_node_force_densities * sizeof(LB_extern_nodeforcedensity_gpu);
-  cuda_safe_mem(cudaMalloc((void **)&extern_node_force_densities,
-                           size_of_extern_node_force_densities));
-  cuda_safe_mem(
-      cudaMemcpy(extern_node_force_densities, host_extern_node_force_densities,
-                 size_of_extern_node_force_densities, cudaMemcpyHostToDevice));
-
-  cuda_safe_mem(cudaMemcpyToSymbol(HIP_SYMBOL(para), lbpar_gpu,
-                                   sizeof(LB_parameters_gpu)));
-
-  int threads_per_block_exf = 64;
-  int blocks_per_grid_exf_y = 4;
-  int blocks_per_grid_exf_x =
-      (n_extern_node_force_densities +
-       threads_per_block_exf * blocks_per_grid_exf_y - 1) /
-      (threads_per_block_exf * blocks_per_grid_exf_y);
-  dim3 dim_grid_exf =
-      make_uint3(blocks_per_grid_exf_x, blocks_per_grid_exf_y, 1);
-
-  KERNELCALL(init_extern_node_force_densities, dim_grid_exf,
-             threads_per_block_exf, n_extern_node_force_densities,
-             extern_node_force_densities, node_f);
-  cudaFree(extern_node_force_densities);
-}
 
 /** Setup and call particle kernel from the host
  *  @tparam no_of_neighbours       The number of neighbours to consider for
@@ -2856,8 +2812,15 @@ void lb_set_node_velocity_GPU(int single_nodeindex, float *host_velocity) {
   KERNELCALL(set_u_from_rho_v_pi, dim_grid_flag, threads_per_block_flag,
              *current_nodes, single_nodeindex, device_velocity, device_rho_v,
              node_f);
-
+  float force_density[3] = {0.0f, 0.0f, 0.0f};
+  float *device_force_density;
+  cuda_safe_mem(cudaMalloc((void **)&device_force_density, 3 * sizeof(float)));
+  cuda_safe_mem(cudaMemcpy(device_force_density, force_density,
+                           3 * sizeof(float), cudaMemcpyHostToDevice));
+  KERNELCALL(set_force_density, dim_grid_flag, threads_per_block_flag,
+             single_nodeindex, device_force_density, node_f);
   cudaFree(device_velocity);
+  cudaFree(device_force_density);
 }
 
 /** Reinitialize parameters