Skip to content

Commit

Permalink
fix(fm): parallel aligned prefix sum
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielSeemaier committed Sep 30, 2024
1 parent b05d656 commit 6acd93e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 30 deletions.
39 changes: 13 additions & 26 deletions kaminpar-common/parallel/aligned_prefix_sum.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

#include "kaminpar-common/assert.h"
#include "kaminpar-common/math.h"
#include "kaminpar-common/parallel/algorithm.h"
#include "kaminpar-common/parallel/loops.h"

namespace kaminpar::parallel {
Expand All @@ -24,7 +23,6 @@ std::size_t aligned_prefix_sum_seq(OutputIt begin, OutputIt end, AlignedValueLam
return 0;
}

*begin = 0;
--n;
if (n == 0) {
return 0;
Expand All @@ -33,14 +31,17 @@ std::size_t aligned_prefix_sum_seq(OutputIt begin, OutputIt end, AlignedValueLam
for (std::size_t i = 0; i < n; ++i) {
const auto [alignment, value] = l(i);

if (alignment > 0) {
if (i > 0 && alignment > 0) {
*(begin + i) += (alignment - (*(begin + i) % alignment)) % alignment;
KASSERT(*(begin + i) % alignment == 0);
}

*(begin + i + 1) = *(begin + i) + value;
*(begin + i + 1) = (i > 0 ? *(begin + i) : 0) + value;
}

const auto [last_alignment, last_value] = l(n);
*(begin + n) += (last_alignment - (*(begin + n) % last_alignment)) % last_alignment;

return *(begin + n);
}

Expand All @@ -64,19 +65,10 @@ std::size_t aligned_prefix_sum(OutputIt begin, OutputIt end, AlignedValueLambda
const int ncpus = parallel::deterministic_for<std::size_t>(
0,
n,
[&](const std::size_t from, const std::size_t to, const int cpu) {
for (std::size_t i = from; i < to; ++i) {
const auto [alignment, value] = l(i);

if (alignment > 0) {
*(begin + i) += compute_alignment_offset(alignment, *(begin + i));
KASSERT(*(begin + i) % alignment == 0);
}

if (i + 1 < to) {
*(begin + i + 1) = *(begin + i) + value;
}
}
[&](const std::size_t from, const std::size_t to, int) {
aligned_prefix_sum_seq(begin + from, begin + to + 1, [&](const std::size_t i) {
return l(from + i);
});
}
);

Expand All @@ -88,26 +80,21 @@ std::size_t aligned_prefix_sum(OutputIt begin, OutputIt end, AlignedValueLambda
continue;
}

const auto [alignment, value] = l(to - 1);
const std::size_t last_offset = (*(begin + to - 1) += value);

prefix_sums[cpu] = prefix_sums[cpu - 1] + last_offset +
compute_alignment_offset(alignment, prefix_sums[cpu - 1] + last_offset);
const auto value = prefix_sums[cpu - 1] + *(begin + from);
prefix_sums[cpu] = value + compute_alignment_offset(8, value);
}

parallel::deterministic_for<std::size_t>(
0,
n,
[&](const std::size_t from, const std::size_t to, const int cpu) {
for (std::size_t i = from; i < to; ++i) {
for (std::size_t i = from + 1; i < to + 1; ++i) {
*(begin + i) += prefix_sums[cpu];
}
}
);

*(begin + n) += l(n - 1).second;

return *(begin + n);
return *(begin + n) + l(n).second;
}

} // namespace kaminpar::parallel
13 changes: 9 additions & 4 deletions tests/common/parallel/aligned_prefix_sum_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <vector>

#include <gmock/gmock.h>
#include <tbb/global_control.h>

#include "kaminpar-common/parallel/aligned_prefix_sum.h"

Expand Down Expand Up @@ -36,10 +37,10 @@ TEST(ParallelAlignedPrefixSumTest, works_with_aligned_values) {
return std::make_pair(2, 2);
});

EXPECT_EQ(result, 20);
for (std::size_t i = 0; i < 10; ++i) {
EXPECT_EQ(storage[i], 2 * i);
EXPECT_EQ(storage[i] % 2, 0);
}
EXPECT_GE(result, storage.back() + 2);
}

TEST(ParallelAlignedPrefixSumTest, works_with_unaligned_values_4) {
Expand All @@ -52,6 +53,7 @@ TEST(ParallelAlignedPrefixSumTest, works_with_unaligned_values_4) {
for (std::size_t i = 0; i < 10; ++i) {
EXPECT_EQ(storage[i] % 4, 0);
}
EXPECT_GE(result, storage.back() + storage.size() - 1);
}

TEST(ParallelAlignedPrefixSumTest, works_with_unaligned_values_8) {
Expand All @@ -64,18 +66,21 @@ TEST(ParallelAlignedPrefixSumTest, works_with_unaligned_values_8) {
for (std::size_t i = 0; i < 20; ++i) {
EXPECT_EQ(storage[i] % 8, 0);
}
EXPECT_GE(result, storage.back() + storage.size() - 1);
}

TEST(ParallelAlignedPrefixSumTest, works_with_multiple_alignments) {
std::vector<int> storage(20);
const std::size_t result =
parallel::aligned_prefix_sum(storage.begin(), storage.end(), [](const std::size_t i) {
return std::make_pair(2 * i, i);
return std::make_pair(1 << (i % 4), i);
});

for (std::size_t i = 0; i < 20; ++i) {
EXPECT_EQ(storage[i] % (2 * i), 0);
EXPECT_EQ(storage[i] % (1 << (i % 4)), 0);
}

EXPECT_GE(result, storage.back() + storage.size() - 1);
}

} // namespace

0 comments on commit 6acd93e

Please sign in to comment.