Merge branch 'dsalwasser/main'

KaHIP · Jul 31, 2024 · 716c742 · 716c742
2 parents e942b97 + b2b066c
commit 716c742
Show file tree

Hide file tree

Showing 49 changed files with 3,362 additions and 2,499 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -73,21 +73,18 @@ option(KAMINPAR_COMPRESSION_EDGE_WEIGHTS "Whether to compress edge weights." ON)
 option(KAMINPAR_COMPRESSION_HIGH_DEGREE_ENCODING "Use high-degree encoding for the compressed graph." ON)
 option(KAMINPAR_COMPRESSION_INTERVAL_ENCODING "Use interval encoding for the compressed graph." ON)
 option(KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING "Use run-length encoding for the compressed graph." OFF)
-option(KAMINPAR_COMPRESSION_STREAM_ENCODING "Use stream encoding for the compressed graph." OFF)
+option(KAMINPAR_COMPRESSION_STREAMVBYTE_ENCODING "Use StreamVByte encoding for the compressed graph." OFF)
 option(KAMINPAR_COMPRESSION_FAST_DECODING "Use fast decoding for the compressed graph." OFF)
-option(KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION "Whether all isolated nodes are the last nodes of the input graph" OFF)
 
 if (KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING AND KAMINPAR_COMPRESSION_STREAM_ENCODING)
-    message(FATAL_ERROR "Either run-length or stream encoding can be used for varints but not both.")
+    message(FATAL_ERROR "Either run-length or StreamVByte encoding can be used for varints but not both.")
 endif ()
 
 if (KAMINPAR_64BIT_NODE_IDS AND KAMINPAR_COMPRESSION_STREAM_ENCODING)
-    message(FATAL_ERROR "Stream encoding cannot be used with 64-bit NodeIDs.")
+    message(FATAL_ERROR "StreamVByte encoding cannot be used with 64-bit NodeIDs.")
 endif ()
 
-if (KAMINPAR_COMPRESSION_EDGE_WEIGHTS AND KAMINPAR_COMPRESSION_STREAM_ENCODING)
-    message(FATAL_ERROR "Stream encoding cannot be used together with compressed edge weights.")
-elseif (KAMINPAR_COMPRESSION_EDGE_WEIGHTS AND KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING)
+if (KAMINPAR_COMPRESSION_EDGE_WEIGHTS AND KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING)
     message(FATAL_ERROR "Run-length encoding cannot be used together with compressed edge weights.")
 endif ()
 
@@ -245,11 +242,11 @@ else ()
     message("  Run-length encoding: disabled")
 endif ()
 
-if (KAMINPAR_COMPRESSION_STREAM_ENCODING)
-    list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_COMPRESSION_STREAM_ENCODING")
-    message("  Stream encoding: enabled")
+if (KAMINPAR_COMPRESSION_STREAMVBYTE_ENCODING)
+    list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_COMPRESSION_STREAMVBYTE_ENCODING")
+    message("  StreamVByte encoding: enabled")
 else ()
-    message("  Stream encoding: disabled")
+    message("  StreamVByte encoding: disabled")
 endif ()
 
 if (KAMINPAR_COMPRESSION_FAST_DECODING)
@@ -260,13 +257,6 @@ else ()
     message("  Fast decoding: disabled")
 endif ()
 
-if (KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION)
-    list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION")
-    message("  Isolated nodes separation: enabled")
-else ()
-    message("  Isolated nodes separation: disabled")
-endif ()
-
 if (KAMINPAR_64BIT_NODE_IDS OR KAMINPAR_64BIT_IDS)
     list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_64BIT_NODE_IDS")
     set(KAMINPAR_SHM_NODE_ID_STR "std::uint64_t")

diff --git a/apps/benchmarks/shm_variable_length_codec_benchmark.cc b/apps/benchmarks/shm_variable_length_codec_benchmark.cc
@@ -13,9 +13,9 @@
 #include "kaminpar-cli/CLI11.h"
 
 #include "kaminpar-common/console_io.h"
-#include "kaminpar-common/graph-compression/varint_codec.h"
-#include "kaminpar-common/graph-compression/varint_run_length_codec.h"
-#include "kaminpar-common/graph-compression/varint_stream_codec.h"
+#include "kaminpar-common/graph-compression/streamvbyte.h"
+#include "kaminpar-common/graph-compression/varint.h"
+#include "kaminpar-common/graph-compression/varint_rle.h"
 #include "kaminpar-common/logger.h"
 #include "kaminpar-common/timer.h"
 
@@ -112,7 +112,7 @@ sv_encode_values(std::string_view name, const std::size_t count, Lambda &&l) {
   auto encoded_values = std::make_unique<std::uint8_t[]>(count * sizeof(Int) + count);
 
   TIMED_SCOPE(name) {
-    VarIntStreamEncoder<Int> encoder(encoded_values.get(), count);
+    streamvbyte::StreamVByteEncoder<Int> encoder(count, encoded_values.get());
 
     for (std::size_t i = 0; i < count; ++i) {
       const std::size_t bytes_written = encoder.add(l(i));
@@ -218,9 +218,7 @@ void benchmark(
   SCOPED_TIMER(name);
 
   for (std::size_t i = 0; i < count; ++i) {
-    const auto [value, bytes_decoded] = l(values_ptr);
-    values_ptr += bytes_decoded;
-
+    const auto value = l(&values_ptr);
     do_not_optimize(value);
   }
 }
@@ -229,15 +227,15 @@ template <typename Int>
 void benchmark_rle(std::string_view name, const std::size_t count, const std::uint8_t *values_ptr) {
   SCOPED_TIMER(name);
 
-  VarIntRunLengthDecoder<Int> decoder(values_ptr, count);
+  VarIntRunLengthDecoder<Int> decoder(count, values_ptr);
   decoder.decode([](const Int value) { do_not_optimize(value); });
 }
 
 template <typename Int>
 void benchmark_sve(std::string_view name, const std::size_t count, const std::uint8_t *values_ptr) {
   SCOPED_TIMER(name);
 
-  VarIntStreamDecoder<Int> decoder(values_ptr, count);
+  streamvbyte::StreamVByteDecoder<Int> decoder(count, values_ptr);
   decoder.decode([](const Int value) { do_not_optimize(value); });
 }
 
@@ -299,7 +297,7 @@ template <typename Int> void run_benchmark(std::size_t count) {
       encoded_zero_values.get(),
       encoded_max_values.get(),
       encoded_random_values.get(),
-      [](const std::uint8_t *ptr) { return varint_decode_general<Int>(ptr); }
+      [](const std::uint8_t **ptr) { return varint_decode_loop<Int>(ptr); }
   );
 
   benchmark(
@@ -308,9 +306,10 @@ template <typename Int> void run_benchmark(std::size_t count) {
       encoded_zero_values.get(),
       encoded_max_values.get(),
       encoded_random_values.get(),
-      [](const std::uint8_t *ptr) { return varint_decode<Int>(ptr); }
+      [](const std::uint8_t **ptr) { return varint_decode_pext_unrolled<Int>(ptr); }
   );
 
+  /*
   std::vector<std::make_signed_t<Int>> random_signed_values =
       generate_random_values<std::make_signed_t<Int>>(count);
 
@@ -336,6 +335,7 @@ template <typename Int> void run_benchmark(std::size_t count) {
       encoded_random_signed_values.get(),
       [](const std::uint8_t *ptr) { return signed_varint_decode<std::make_signed_t<Int>>(ptr); }
   );
+  */
 
   const auto [rl_encoded_zero_values, rl_encoded_max_values, rl_encoded_random_values] =
       rl_encode_values<Int>(count, random_values);

diff --git a/apps/io/shm_compressed_graph_binary.cc b/apps/io/shm_compressed_graph_binary.cc
@@ -31,8 +31,7 @@ struct CompressedBinaryHeader {
   bool use_high_degree_encoding;
   bool use_interval_encoding;
   bool use_run_length_encoding;
-  bool use_stream_vbyte_encoding;
-  bool use_isolated_nodes_separation;
+  bool use_streamvbyte_encoding;
 
   std::uint64_t high_degree_threshold;
   std::uint64_t high_degree_part_length;
@@ -66,8 +65,7 @@ CompressedBinaryHeader create_header(const CompressedGraph &graph) {
       CompressedGraph::kHighDegreeEncoding,
       CompressedGraph::kIntervalEncoding,
       CompressedGraph::kRunLengthEncoding,
-      CompressedGraph::kStreamEncoding,
-      CompressedGraph::kIsolatedNodesSeparation,
+      CompressedGraph::kStreamVByteEncoding,
 
       CompressedGraph::kHighDegreeThreshold,
       CompressedGraph::kHighDegreePartLength,
@@ -91,12 +89,12 @@ template <typename T> static void write_int(std::ofstream &out, const T id) {
 
 static void write_header(std::ofstream &out, const CompressedBinaryHeader header) {
   const std::uint16_t boolean_values =
-      (header.use_isolated_nodes_separation << 12) | (header.use_stream_vbyte_encoding << 11) |
-      (header.use_run_length_encoding << 10) | (header.use_interval_encoding << 9) |
-      (header.use_high_degree_encoding << 8) | (header.compress_edge_weights << 7) |
-      (header.use_degree_bucket_order << 6) | (header.has_64_bit_edge_weight << 5) |
-      (header.has_64_bit_node_weight << 4) | (header.has_64_bit_edge_id << 3) |
-      (header.has_64_bit_node_id << 2) | (header.has_edge_weights << 1) | (header.has_node_weights);
+      (header.use_streamvbyte_encoding << 11) | (header.use_run_length_encoding << 10) |
+      (header.use_interval_encoding << 9) | (header.use_high_degree_encoding << 8) |
+      (header.compress_edge_weights << 7) | (header.use_degree_bucket_order << 6) |
+      (header.has_64_bit_edge_weight << 5) | (header.has_64_bit_node_weight << 4) |
+      (header.has_64_bit_edge_id << 3) | (header.has_64_bit_node_id << 2) |
+      (header.has_edge_weights << 1) | (header.has_node_weights);
   write_int(out, boolean_values);
 
   write_int(out, header.high_degree_threshold);
@@ -155,14 +153,14 @@ template <typename T> static T read_int(std::ifstream &in) {
 CompressedBinaryHeader read_header(std::ifstream &in) {
   const auto boolean_values = read_int<std::uint16_t>(in);
   return {
-      (boolean_values & 1) != 0,    (boolean_values & 2) != 0,    (boolean_values & 4) != 0,
-      (boolean_values & 8) != 0,    (boolean_values & 16) != 0,   (boolean_values & 32) != 0,
-      (boolean_values & 64) != 0,   (boolean_values & 128) != 0,  (boolean_values & 256) != 0,
-      (boolean_values & 512) != 0,  (boolean_values & 1024) != 0, (boolean_values & 2048) != 0,
-      (boolean_values & 4096) != 0, read_int<std::uint64_t>(in),  read_int<std::uint64_t>(in),
-      read_int<std::uint64_t>(in),  read_int<std::uint64_t>(in),  read_int<std::uint64_t>(in),
-      read_int<std::int64_t>(in),   read_int<std::uint64_t>(in),  read_int<std::uint64_t>(in),
-      read_int<std::uint64_t>(in),  read_int<std::uint64_t>(in),
+      (boolean_values & 1) != 0,   (boolean_values & 2) != 0,    (boolean_values & 4) != 0,
+      (boolean_values & 8) != 0,   (boolean_values & 16) != 0,   (boolean_values & 32) != 0,
+      (boolean_values & 64) != 0,  (boolean_values & 128) != 0,  (boolean_values & 256) != 0,
+      (boolean_values & 512) != 0, (boolean_values & 1024) != 0, (boolean_values & 2048) != 0,
+      read_int<std::uint64_t>(in), read_int<std::uint64_t>(in),  read_int<std::uint64_t>(in),
+      read_int<std::uint64_t>(in), read_int<std::uint64_t>(in),  read_int<std::int64_t>(in),
+      read_int<std::uint64_t>(in), read_int<std::uint64_t>(in),  read_int<std::uint64_t>(in),
+      read_int<std::uint64_t>(in),
   };
 }
 
@@ -263,26 +261,15 @@ void verify_header(const CompressedBinaryHeader header) {
     std::exit(1);
   }
 
-  if (header.use_stream_vbyte_encoding != CompressedGraph::kStreamEncoding) {
-    if (header.use_stream_vbyte_encoding) {
+  if (header.use_streamvbyte_encoding != CompressedGraph::kStreamVByteEncoding) {
+    if (header.use_streamvbyte_encoding) {
       LOG_ERROR << "The stored compressed graph uses stream encoding but this build does not.";
     } else {
       LOG_ERROR << "The stored compressed graph does not use stream encoding but this build does.";
     }
     std::exit(1);
   }
 
-  if (header.use_isolated_nodes_separation != CompressedGraph::kIsolatedNodesSeparation) {
-    if (header.use_isolated_nodes_separation) {
-      LOG_ERROR
-          << "The stored compressed graph uses isolated nodes separation but this build does not.";
-    } else {
-      LOG_ERROR << "The stored compressed graph does not use isolated nodes separation but this "
-                   "build does.";
-    }
-    std::exit(1);
-  }
-
   if (header.high_degree_threshold != CompressedGraph::kHighDegreeThreshold) {
     LOG_ERROR << "The stored compressed graph uses " << header.high_degree_threshold
               << " as the high degree threshold but this build uses "

diff --git a/apps/io/shm_parhip_parser.cc b/apps/io/shm_parhip_parser.cc
@@ -9,7 +9,6 @@
 
 #include <cstddef>
 #include <cstdint>
-#include <fstream>
 #include <functional>
 
 #include <tbb/parallel_for.h>
@@ -105,7 +104,7 @@ class ParHIPHeader {
            (has_node_weights ? num_nodes * _node_weight_width : 0);
   }
 
-  [[nodiscard]] NodeID map_edge_offset(const EdgeID edge_offset) const {
+  [[nodiscard]] EdgeID map_edge_offset(const EdgeID edge_offset) const {
     return (edge_offset - _nodes_offset_base) / _node_id_width;
   }
 
@@ -120,13 +119,13 @@ class ParHIPHeader {
       std::exit(1);
     }
 
-    if (has_64_bit_node_weight && sizeof(NodeWeight) == 4) {
+    if (has_node_weights && has_64_bit_node_weight && sizeof(NodeWeight) == 4) {
       LOG_ERROR
           << "The stored graph uses 64-Bit node weights but this build uses 32-Bit node weights.";
       std::exit(1);
     }
 
-    if (has_64_bit_edge_weight && sizeof(EdgeWeight) == 4) {
+    if (has_edge_weights && has_64_bit_edge_weight && sizeof(EdgeWeight) == 4) {
       LOG_ERROR
           << "The stored graph uses 64-Bit edge weights but this build uses 32-Bit edge weights.";
       std::exit(1);
@@ -351,32 +350,29 @@ CompressedGraph compressed_read_parallel(const std::string &filename, const Node
 
     const bool sort_by_degree_bucket = ordering == NodeOrdering::DEGREE_BUCKETS;
     if (sort_by_degree_bucket) {
-      RECORD("degrees") StaticArray<NodeID> degrees(header.num_nodes, static_array::noinit);
-      TIMED_SCOPE("Read degrees") {
-        tbb::parallel_for(tbb::blocked_range<NodeID>(0, header.num_nodes), [&](const auto &r) {
-          for (NodeID u = r.begin(); u != r.end(); ++u) {
-            degrees[u] = header.map_edge_offset(node(u + 1)) - header.map_edge_offset(node(u));
-          }
-        });
+      const auto degree = [&](const NodeID u) {
+        return static_cast<NodeID>(
+            header.map_edge_offset(node(u + 1)) - header.map_edge_offset(node(u))
+        );
       };
-      const auto [perm, inv_perm] =
-          graph::sort_by_degree_buckets(header.num_nodes, [&](const NodeID u) {
-            return degrees[u];
-          });
 
-      return parallel_compress(
+      auto [perm, inv_perm] = graph::sort_by_degree_buckets(header.num_nodes, degree);
+      CompressedGraph compressed_graph = parallel_compress(
           header.num_nodes,
           header.num_edges,
           header.has_node_weights,
           header.has_edge_weights,
           true,
           [&](const NodeID u) { return inv_perm[u]; },
-          [&](const NodeID u) { return degrees[u]; },
+          degree,
           [&](const NodeID u) { return header.map_edge_offset(node(u)); },
           [&](const EdgeID e) { return perm[edge(e)]; },
           [&](const NodeID u) { return node_weight(u); },
           [&](const EdgeID e) { return edge_weight(e); }
       );
+
+      compressed_graph.set_permutation(std::move(perm));
+      return compressed_graph;
     } else {
       return parallel_compress(
           header.num_nodes,