Skip to content

Commit

Permalink
Merge branch 'dsalwasser/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielSeemaier committed Jul 31, 2024
2 parents e942b97 + b2b066c commit 716c742
Show file tree
Hide file tree
Showing 49 changed files with 3,362 additions and 2,499 deletions.
26 changes: 8 additions & 18 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,18 @@ option(KAMINPAR_COMPRESSION_EDGE_WEIGHTS "Whether to compress edge weights." ON)
option(KAMINPAR_COMPRESSION_HIGH_DEGREE_ENCODING "Use high-degree encoding for the compressed graph." ON)
option(KAMINPAR_COMPRESSION_INTERVAL_ENCODING "Use interval encoding for the compressed graph." ON)
option(KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING "Use run-length encoding for the compressed graph." OFF)
option(KAMINPAR_COMPRESSION_STREAM_ENCODING "Use stream encoding for the compressed graph." OFF)
option(KAMINPAR_COMPRESSION_STREAMVBYTE_ENCODING "Use StreamVByte encoding for the compressed graph." OFF)
option(KAMINPAR_COMPRESSION_FAST_DECODING "Use fast decoding for the compressed graph." OFF)
option(KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION "Whether all isolated nodes are the last nodes of the input graph" OFF)

if (KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING AND KAMINPAR_COMPRESSION_STREAM_ENCODING)
message(FATAL_ERROR "Either run-length or stream encoding can be used for varints but not both.")
message(FATAL_ERROR "Either run-length or StreamVByte encoding can be used for varints but not both.")
endif ()

if (KAMINPAR_64BIT_NODE_IDS AND KAMINPAR_COMPRESSION_STREAM_ENCODING)
message(FATAL_ERROR "Stream encoding cannot be used with 64-bit NodeIDs.")
message(FATAL_ERROR "StreamVByte encoding cannot be used with 64-bit NodeIDs.")
endif ()

if (KAMINPAR_COMPRESSION_EDGE_WEIGHTS AND KAMINPAR_COMPRESSION_STREAM_ENCODING)
message(FATAL_ERROR "Stream encoding cannot be used together with compressed edge weights.")
elseif (KAMINPAR_COMPRESSION_EDGE_WEIGHTS AND KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING)
if (KAMINPAR_COMPRESSION_EDGE_WEIGHTS AND KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING)
message(FATAL_ERROR "Run-length encoding cannot be used together with compressed edge weights.")
endif ()

Expand Down Expand Up @@ -245,11 +242,11 @@ else ()
message(" Run-length encoding: disabled")
endif ()

if (KAMINPAR_COMPRESSION_STREAM_ENCODING)
list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_COMPRESSION_STREAM_ENCODING")
message(" Stream encoding: enabled")
if (KAMINPAR_COMPRESSION_STREAMVBYTE_ENCODING)
list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_COMPRESSION_STREAMVBYTE_ENCODING")
message(" StreamVByte encoding: enabled")
else ()
message(" Stream encoding: disabled")
message(" StreamVByte encoding: disabled")
endif ()

if (KAMINPAR_COMPRESSION_FAST_DECODING)
Expand All @@ -260,13 +257,6 @@ else ()
message(" Fast decoding: disabled")
endif ()

if (KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION)
list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION")
message(" Isolated nodes separation: enabled")
else ()
message(" Isolated nodes separation: disabled")
endif ()

if (KAMINPAR_64BIT_NODE_IDS OR KAMINPAR_64BIT_IDS)
list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_64BIT_NODE_IDS")
set(KAMINPAR_SHM_NODE_ID_STR "std::uint64_t")
Expand Down
22 changes: 11 additions & 11 deletions apps/benchmarks/shm_variable_length_codec_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
#include "kaminpar-cli/CLI11.h"

#include "kaminpar-common/console_io.h"
#include "kaminpar-common/graph-compression/varint_codec.h"
#include "kaminpar-common/graph-compression/varint_run_length_codec.h"
#include "kaminpar-common/graph-compression/varint_stream_codec.h"
#include "kaminpar-common/graph-compression/streamvbyte.h"
#include "kaminpar-common/graph-compression/varint.h"
#include "kaminpar-common/graph-compression/varint_rle.h"
#include "kaminpar-common/logger.h"
#include "kaminpar-common/timer.h"

Expand Down Expand Up @@ -112,7 +112,7 @@ sv_encode_values(std::string_view name, const std::size_t count, Lambda &&l) {
auto encoded_values = std::make_unique<std::uint8_t[]>(count * sizeof(Int) + count);

TIMED_SCOPE(name) {
VarIntStreamEncoder<Int> encoder(encoded_values.get(), count);
streamvbyte::StreamVByteEncoder<Int> encoder(count, encoded_values.get());

for (std::size_t i = 0; i < count; ++i) {
const std::size_t bytes_written = encoder.add(l(i));
Expand Down Expand Up @@ -218,9 +218,7 @@ void benchmark(
SCOPED_TIMER(name);

for (std::size_t i = 0; i < count; ++i) {
const auto [value, bytes_decoded] = l(values_ptr);
values_ptr += bytes_decoded;

const auto value = l(&values_ptr);
do_not_optimize(value);
}
}
Expand All @@ -229,15 +227,15 @@ template <typename Int>
void benchmark_rle(std::string_view name, const std::size_t count, const std::uint8_t *values_ptr) {
SCOPED_TIMER(name);

VarIntRunLengthDecoder<Int> decoder(values_ptr, count);
VarIntRunLengthDecoder<Int> decoder(count, values_ptr);
decoder.decode([](const Int value) { do_not_optimize(value); });
}

template <typename Int>
void benchmark_sve(std::string_view name, const std::size_t count, const std::uint8_t *values_ptr) {
SCOPED_TIMER(name);

VarIntStreamDecoder<Int> decoder(values_ptr, count);
streamvbyte::StreamVByteDecoder<Int> decoder(count, values_ptr);
decoder.decode([](const Int value) { do_not_optimize(value); });
}

Expand Down Expand Up @@ -299,7 +297,7 @@ template <typename Int> void run_benchmark(std::size_t count) {
encoded_zero_values.get(),
encoded_max_values.get(),
encoded_random_values.get(),
[](const std::uint8_t *ptr) { return varint_decode_general<Int>(ptr); }
[](const std::uint8_t **ptr) { return varint_decode_loop<Int>(ptr); }
);

benchmark(
Expand All @@ -308,9 +306,10 @@ template <typename Int> void run_benchmark(std::size_t count) {
encoded_zero_values.get(),
encoded_max_values.get(),
encoded_random_values.get(),
[](const std::uint8_t *ptr) { return varint_decode<Int>(ptr); }
[](const std::uint8_t **ptr) { return varint_decode_pext_unrolled<Int>(ptr); }
);

/*
std::vector<std::make_signed_t<Int>> random_signed_values =
generate_random_values<std::make_signed_t<Int>>(count);
Expand All @@ -336,6 +335,7 @@ template <typename Int> void run_benchmark(std::size_t count) {
encoded_random_signed_values.get(),
[](const std::uint8_t *ptr) { return signed_varint_decode<std::make_signed_t<Int>>(ptr); }
);
*/

const auto [rl_encoded_zero_values, rl_encoded_max_values, rl_encoded_random_values] =
rl_encode_values<Int>(count, random_values);
Expand Down
49 changes: 18 additions & 31 deletions apps/io/shm_compressed_graph_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ struct CompressedBinaryHeader {
bool use_high_degree_encoding;
bool use_interval_encoding;
bool use_run_length_encoding;
bool use_stream_vbyte_encoding;
bool use_isolated_nodes_separation;
bool use_streamvbyte_encoding;

std::uint64_t high_degree_threshold;
std::uint64_t high_degree_part_length;
Expand Down Expand Up @@ -66,8 +65,7 @@ CompressedBinaryHeader create_header(const CompressedGraph &graph) {
CompressedGraph::kHighDegreeEncoding,
CompressedGraph::kIntervalEncoding,
CompressedGraph::kRunLengthEncoding,
CompressedGraph::kStreamEncoding,
CompressedGraph::kIsolatedNodesSeparation,
CompressedGraph::kStreamVByteEncoding,

CompressedGraph::kHighDegreeThreshold,
CompressedGraph::kHighDegreePartLength,
Expand All @@ -91,12 +89,12 @@ template <typename T> static void write_int(std::ofstream &out, const T id) {

static void write_header(std::ofstream &out, const CompressedBinaryHeader header) {
const std::uint16_t boolean_values =
(header.use_isolated_nodes_separation << 12) | (header.use_stream_vbyte_encoding << 11) |
(header.use_run_length_encoding << 10) | (header.use_interval_encoding << 9) |
(header.use_high_degree_encoding << 8) | (header.compress_edge_weights << 7) |
(header.use_degree_bucket_order << 6) | (header.has_64_bit_edge_weight << 5) |
(header.has_64_bit_node_weight << 4) | (header.has_64_bit_edge_id << 3) |
(header.has_64_bit_node_id << 2) | (header.has_edge_weights << 1) | (header.has_node_weights);
(header.use_streamvbyte_encoding << 11) | (header.use_run_length_encoding << 10) |
(header.use_interval_encoding << 9) | (header.use_high_degree_encoding << 8) |
(header.compress_edge_weights << 7) | (header.use_degree_bucket_order << 6) |
(header.has_64_bit_edge_weight << 5) | (header.has_64_bit_node_weight << 4) |
(header.has_64_bit_edge_id << 3) | (header.has_64_bit_node_id << 2) |
(header.has_edge_weights << 1) | (header.has_node_weights);
write_int(out, boolean_values);

write_int(out, header.high_degree_threshold);
Expand Down Expand Up @@ -155,14 +153,14 @@ template <typename T> static T read_int(std::ifstream &in) {
CompressedBinaryHeader read_header(std::ifstream &in) {
const auto boolean_values = read_int<std::uint16_t>(in);
return {
(boolean_values & 1) != 0, (boolean_values & 2) != 0, (boolean_values & 4) != 0,
(boolean_values & 8) != 0, (boolean_values & 16) != 0, (boolean_values & 32) != 0,
(boolean_values & 64) != 0, (boolean_values & 128) != 0, (boolean_values & 256) != 0,
(boolean_values & 512) != 0, (boolean_values & 1024) != 0, (boolean_values & 2048) != 0,
(boolean_values & 4096) != 0, read_int<std::uint64_t>(in), read_int<std::uint64_t>(in),
read_int<std::uint64_t>(in), read_int<std::uint64_t>(in), read_int<std::uint64_t>(in),
read_int<std::int64_t>(in), read_int<std::uint64_t>(in), read_int<std::uint64_t>(in),
read_int<std::uint64_t>(in), read_int<std::uint64_t>(in),
(boolean_values & 1) != 0, (boolean_values & 2) != 0, (boolean_values & 4) != 0,
(boolean_values & 8) != 0, (boolean_values & 16) != 0, (boolean_values & 32) != 0,
(boolean_values & 64) != 0, (boolean_values & 128) != 0, (boolean_values & 256) != 0,
(boolean_values & 512) != 0, (boolean_values & 1024) != 0, (boolean_values & 2048) != 0,
read_int<std::uint64_t>(in), read_int<std::uint64_t>(in), read_int<std::uint64_t>(in),
read_int<std::uint64_t>(in), read_int<std::uint64_t>(in), read_int<std::int64_t>(in),
read_int<std::uint64_t>(in), read_int<std::uint64_t>(in), read_int<std::uint64_t>(in),
read_int<std::uint64_t>(in),
};
}

Expand Down Expand Up @@ -263,26 +261,15 @@ void verify_header(const CompressedBinaryHeader header) {
std::exit(1);
}

if (header.use_stream_vbyte_encoding != CompressedGraph::kStreamEncoding) {
if (header.use_stream_vbyte_encoding) {
if (header.use_streamvbyte_encoding != CompressedGraph::kStreamVByteEncoding) {
if (header.use_streamvbyte_encoding) {
LOG_ERROR << "The stored compressed graph uses stream encoding but this build does not.";
} else {
LOG_ERROR << "The stored compressed graph does not use stream encoding but this build does.";
}
std::exit(1);
}

if (header.use_isolated_nodes_separation != CompressedGraph::kIsolatedNodesSeparation) {
if (header.use_isolated_nodes_separation) {
LOG_ERROR
<< "The stored compressed graph uses isolated nodes separation but this build does not.";
} else {
LOG_ERROR << "The stored compressed graph does not use isolated nodes separation but this "
"build does.";
}
std::exit(1);
}

if (header.high_degree_threshold != CompressedGraph::kHighDegreeThreshold) {
LOG_ERROR << "The stored compressed graph uses " << header.high_degree_threshold
<< " as the high degree threshold but this build uses "
Expand Down
30 changes: 13 additions & 17 deletions apps/io/shm_parhip_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

#include <cstddef>
#include <cstdint>
#include <fstream>
#include <functional>

#include <tbb/parallel_for.h>
Expand Down Expand Up @@ -105,7 +104,7 @@ class ParHIPHeader {
(has_node_weights ? num_nodes * _node_weight_width : 0);
}

[[nodiscard]] NodeID map_edge_offset(const EdgeID edge_offset) const {
[[nodiscard]] EdgeID map_edge_offset(const EdgeID edge_offset) const {
return (edge_offset - _nodes_offset_base) / _node_id_width;
}

Expand All @@ -120,13 +119,13 @@ class ParHIPHeader {
std::exit(1);
}

if (has_64_bit_node_weight && sizeof(NodeWeight) == 4) {
if (has_node_weights && has_64_bit_node_weight && sizeof(NodeWeight) == 4) {
LOG_ERROR
<< "The stored graph uses 64-Bit node weights but this build uses 32-Bit node weights.";
std::exit(1);
}

if (has_64_bit_edge_weight && sizeof(EdgeWeight) == 4) {
if (has_edge_weights && has_64_bit_edge_weight && sizeof(EdgeWeight) == 4) {
LOG_ERROR
<< "The stored graph uses 64-Bit edge weights but this build uses 32-Bit edge weights.";
std::exit(1);
Expand Down Expand Up @@ -351,32 +350,29 @@ CompressedGraph compressed_read_parallel(const std::string &filename, const Node

const bool sort_by_degree_bucket = ordering == NodeOrdering::DEGREE_BUCKETS;
if (sort_by_degree_bucket) {
RECORD("degrees") StaticArray<NodeID> degrees(header.num_nodes, static_array::noinit);
TIMED_SCOPE("Read degrees") {
tbb::parallel_for(tbb::blocked_range<NodeID>(0, header.num_nodes), [&](const auto &r) {
for (NodeID u = r.begin(); u != r.end(); ++u) {
degrees[u] = header.map_edge_offset(node(u + 1)) - header.map_edge_offset(node(u));
}
});
const auto degree = [&](const NodeID u) {
return static_cast<NodeID>(
header.map_edge_offset(node(u + 1)) - header.map_edge_offset(node(u))
);
};
const auto [perm, inv_perm] =
graph::sort_by_degree_buckets(header.num_nodes, [&](const NodeID u) {
return degrees[u];
});

return parallel_compress(
auto [perm, inv_perm] = graph::sort_by_degree_buckets(header.num_nodes, degree);
CompressedGraph compressed_graph = parallel_compress(
header.num_nodes,
header.num_edges,
header.has_node_weights,
header.has_edge_weights,
true,
[&](const NodeID u) { return inv_perm[u]; },
[&](const NodeID u) { return degrees[u]; },
degree,
[&](const NodeID u) { return header.map_edge_offset(node(u)); },
[&](const EdgeID e) { return perm[edge(e)]; },
[&](const NodeID u) { return node_weight(u); },
[&](const EdgeID e) { return edge_weight(e); }
);

compressed_graph.set_permutation(std::move(perm));
return compressed_graph;
} else {
return parallel_compress(
header.num_nodes,
Expand Down
Loading

0 comments on commit 716c742

Please sign in to comment.