Skip to content

Commit

Permalink
core: Fix broken Utils::Mpi::gather_buffer()
Browse files Browse the repository at this point in the history
The function must move the first n_elem of the buffer on rank root
to the correct place, otherwise the first n_elem of the buffer on
rank 0 will overwrite them, and a hole of size n_elem can be found
somewhere in the final buffer. This bug doesn't affect code where
root is 0.
  • Loading branch information
jngrad committed Dec 26, 2020
1 parent 1e66474 commit 049b5f2
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 10 deletions.
40 changes: 30 additions & 10 deletions src/utils/include/utils/mpi/gather_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,16 @@ namespace Mpi {
*
* Gathers buffers with different lengths from all nodes to root.
* The buffer is assumed to be large enough to hold the data from
* all the nodes and is owned by the caller. On the root node no
* data is copied, and the first n_elem elements of buffer are not
* touched. This combines a common combination of MPI_Gather and
* MPI_{Send,Recv}.
* all the nodes and is owned by the caller. On the @p root node,
* the first @p n_elem elements of @p buffer are moved, if need
* be. On the other nodes, @p buffer is not touched.
*
* This encapsulates a common combination of <tt>MPI_Gather()</tt>
* and <tt>MPI_{Send,Recv}()</tt>.
*
* @param buffer On the master the target buffer that has to be
large enough to hold all elements and has the local
part in the beginning. On the slaves the local buffer.
* large enough to hold all elements and has the local
* part in the beginning. On the slaves the local buffer.
* @param n_elem The number of elements in the local buffer.
* @param comm The MPI communicator.
* @param root The rank where the data should be gathered.
Expand All @@ -63,11 +65,19 @@ int gather_buffer(T *buffer, int n_elem, boost::mpi::communicator comm,
auto const total_size =
detail::size_and_offset<T>(sizes, displ, n_elem, comm, root);

/* Move the original data to its new location */
if (sizes[root] && displ[root]) {
for (int i = sizes[root]; i >= 0; --i) {
buffer[i + displ[root]] = buffer[i];
}
}

/* Gather data */
gatherv(comm, buffer, 0, buffer, sizes.data(), displ.data(), root);

return total_size;
}
/* Send local size */
detail::size_and_offset(n_elem, comm, root);
/* Send data */
gatherv(comm, buffer, n_elem, static_cast<T *>(nullptr), nullptr, nullptr,
Expand All @@ -80,12 +90,15 @@ int gather_buffer(T *buffer, int n_elem, boost::mpi::communicator comm,
* @brief Gather buffer with different size on each node.
*
* Gathers buffers with different lengths from all nodes to root.
* The buffer is resized to the total size. On the root node no
* data is copied, and the first n_elem elements of buffer are not
* touched. On the slaves, the buffer is not touched.
* The buffer is resized to the total size. On the @p root node,
* the first @p n_elem elements of @p buffer are moved, if need
* be. On the other nodes, @p buffer is not touched.
*
* This encapsulates a common combination of <tt>MPI_Gather()</tt>
* and <tt>MPI_{Send,Recv}()</tt>.
*
* @param buffer On the master the target buffer that has the local
part in the beginning. On the slaves the local buffer.
* part in the beginning. On the slaves the local buffer.
* @param comm The MPI communicator.
* @param root The rank where the data should be gathered.
*/
Expand All @@ -104,6 +117,13 @@ void gather_buffer(std::vector<T, Allocator> &buffer,
/* Resize the buffer */
buffer.resize(tot_size);

/* Move the original data to its new location */
if (sizes[root] && displ[root]) {
for (int i = sizes[root]; i >= 0; --i) {
buffer[i + displ[root]] = buffer[i];
}
}

/* Gather data */
gatherv(comm, buffer.data(), buffer.size(), buffer.data(), sizes.data(),
displ.data(), root);
Expand Down
12 changes: 12 additions & 0 deletions src/utils/tests/gather_buffer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ BOOST_AUTO_TEST_CASE(pointer) {
check_pointer(world, 0);
}

BOOST_AUTO_TEST_CASE(pointer_overlap) {
mpi::communicator world;
if (world.size() >= 2)
check_pointer(world, 1);
}

BOOST_AUTO_TEST_CASE(pointer_root) {
mpi::communicator world;

Expand All @@ -161,6 +167,12 @@ BOOST_AUTO_TEST_CASE(vector) {
check_vector(world, 0);
}

BOOST_AUTO_TEST_CASE(vector_overlap) {
mpi::communicator world;
if (world.size() >= 2)
check_vector(world, 1);
}

BOOST_AUTO_TEST_CASE(vector_root) {
mpi::communicator world;

Expand Down

0 comments on commit 049b5f2

Please sign in to comment.