Skip to content

Commit

Permalink
test and fix qselect
Browse files Browse the repository at this point in the history
  • Loading branch information
Dominik Rosch committed Oct 10, 2024
1 parent f3d1a8e commit 2b5397b
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 25 deletions.
72 changes: 47 additions & 25 deletions kaminpar-shm/coarsening/sparsification/sparsification_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,44 +44,50 @@ inline void parallel_for_downward_edges(const CSRGraph &g, Lambda function) {
});
}

template <typename T, typename Iterator> T medians_of_medians(Iterator begin, Iterator end);

template <typename T, typename Iterator>
T quickselect_k_smallest(size_t k, Iterator begin, Iterator end) {
T sortselect_k_smallest(size_t k, Iterator begin, Iterator end) {
size_t size = std::distance(begin, end);
std::vector<T> sorted(size);
for (size_t i = 0; i < size; i++) {
sorted[i] = begin[i];
}
std::sort(sorted.begin(), sorted.end());
return sorted[k - 1];
}

size_t size = begin - end;
if (size == 1)
return *begin;
T pivot = medians_of_medians(begin, end);
template <typename T, typename Iterator>
T quickselect_k_smallest(size_t k, Iterator begin, Iterator end) {
size_t size = std::distance(begin, end);
if (size <= 5)
return sortselect_k_smallest<T, Iterator>(k, begin, end);
T pivot = medians_of_medians<T, Iterator>(begin, end);
tbb::concurrent_vector<T> less = {}, greater = {};
tbb::parallel_for(begin, end, [&](auto x) {
tbb::parallel_for(0ul, size, [&](size_t i) {
T x = begin[i];
if (x <= pivot)
less.push_back(x);
else
greater.push_back(x);
});

if (k < less.size())
return select_k_smallest(k, less.begin(), less.end());
if (k <= less.size())
return quickselect_k_smallest<T, typename tbb::concurrent_vector<T>::iterator>(
k, less.begin(), less.end()
);
else
return select_k_smallest(k - less.size(), greater.begin(), greater.end());
return quickselect_k_smallest<T, typename tbb::concurrent_vector<T>::iterator>(
k - less.size(), greater.begin(), greater.end()
);
}

template <typename T, typename Iterator> T medians_of_medians(Iterator begin, Iterator end) {
size_t size = begin - end;
if (size <= 5)
return median(begin, end);

size_t number_of_sections = (size + 4) / 5;
StaticArray<T> medians(number_of_sections);
tbb::parallel_for(0, number_of_sections, [&](auto i) {
medians[i] = median(begin + 5 * i, begin + std::min(5 * (i + 1), size));
});

return quickselect_k_smallest<T, Iterator>(number_of_sections / 2, medians.begin(), medians.end());
}
template <typename T, typename Iterator> T median(Iterator begin, Iterator end) {
size_t size = begin - end;
StaticArray<T> sorted(size);
for (auto i = 0; i != size; i++) {sorted[i] = begin[i];}
size_t size = std::distance(begin, end);
std::vector<T> sorted(size);
for (auto i = 0; i != size; i++) {
sorted[i] = begin[i];
}
std::sort(begin, end);
if (size % 2 == 1) { // odd size
return sorted[size / 2];
Expand All @@ -90,6 +96,22 @@ template <typename T, typename Iterator> T median(Iterator begin, Iterator end)
}
}

template <typename T, typename Iterator> T medians_of_medians(Iterator begin, Iterator end) {
size_t size = std::distance(begin, end);
if (size <= 5)
return median<T, Iterator>(begin, end);

size_t number_of_sections = (size + 4) / 5;
StaticArray<T> medians(number_of_sections);
tbb::parallel_for(0ul, number_of_sections, [&](auto i) {
medians[i] = median<T, Iterator>(begin + 5 * i, begin + std::min(5 * (i + 1), size));
});

return quickselect_k_smallest<T, typename StaticArray<T>::iterator>(
number_of_sections / 2, medians.begin(), medians.end()
);
}

template <typename WeightIterator>
StaticArray<size_t>
sample_k_without_replacement(WeightIterator weights_begin, WeightIterator weights_end, size_t k) {
Expand Down
3 changes: 3 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ kaminpar_add_shm_test(test_shm_partition_utils shm/graphutils/partition_utils_te
# KaMinPar -> Coarsening
kaminpar_add_shm_test(test_shm_graph_utils shm/coarsening/cluster_contraction_test.cc)

# KaMinPar -> Coarsening
kaminpar_add_shm_test(test_shm_sparsification_utils shm/coarsening/sparsification/sparsfication_utils_test.cc)

# KaMinPar -> Data structures
kaminpar_add_shm_test(test_shm_graph shm/datastructures/graph_test.cc)
kaminpar_add_shm_test(test_shm_delta_partitioned_graph shm/datastructures/delta_partitioned_graph_test.cc)
Expand Down
42 changes: 42 additions & 0 deletions tests/shm/coarsening/sparsification/sparsfication_utils_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include <gmock/internal/gmock-internal-utils.h>
#include <gtest/gtest.h>

#include "kaminpar-shm/coarsening/sparsification/sparsification_utils.h"
namespace kaminpar::shm::testing {
TEST(SparsificationUtils, QselctOnPermutation) {
std::vector<int> permutation_of_1_to_10 = {8, 1, 3, 5, 7, 9, 2, 10, 6, 4};
ASSERT_EQ(permutation_of_1_to_10.size(), 10);
ASSERT_EQ(std::distance(permutation_of_1_to_10.begin(), permutation_of_1_to_10.end()), 10);
for (size_t k = 1; k <= permutation_of_1_to_10.size(); k++) {
ASSERT_EQ(
kaminpar::shm::sparsification::utils::quickselect_k_smallest<int>(
k, permutation_of_1_to_10.begin(), permutation_of_1_to_10.end()
),
k
);
}
}

TEST(SparsificationUtils, QselectOnRandomNumbers) {
size_t size = 1024;
StaticArray<double> numbers(size);
StaticArray<double> sorted_numbers(size);
for (size_t i = 0; i != size; i++) {
double x = Random::instance().random_double();
sorted_numbers[i] = x;
numbers[i] = x;
}
std::sort(sorted_numbers.begin(), sorted_numbers.end());

size_t number_of_ks = 42;
std::vector<size_t> ks(number_of_ks);
for (size_t i = 0; i != number_of_ks; i++)
ks[i] = Random::instance().random_index(1, size + 1);
for (size_t k : ks) {
ASSERT_EQ(
sparsification::utils::quickselect_k_smallest<double>(k, numbers.begin(), numbers.end()),
sorted_numbers[k - 1]
);
}
}
} // namespace kaminpar::shm::testing

0 comments on commit 2b5397b

Please sign in to comment.