Skip to content

Commit

Permalink
Merged refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
vigna committed Feb 3, 2024
2 parents a1ebc35 + 2c96b46 commit fd9142b
Show file tree
Hide file tree
Showing 89 changed files with 5,997 additions and 4,929 deletions.
44 changes: 22 additions & 22 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,44 +11,44 @@ categories = ["compression", "data-structures"]


[features]
default = ["std"]
std = ["anyhow/std"]
alloc = []
default = []
slow_tests = []
skips = []
be_bins = []
le_bins = []
fuzz = ["dep:arbitrary", "dep:zip", "dsi-bitstream/fuzz"]

[dependencies]
anyhow = { version = "1.0.70", features=["backtrace"]}
java-properties = "1.4.1"
mmap-rs = "0.5.0"
anyhow = { version = "1.0.79", features=["backtrace"]}
java-properties = "2.0.0"
mmap-rs = "0.6.1"
bitvec = { version = "1.0.1", features = ["atomic"] }
num_cpus = "1.15.0"
num_cpus = "1.16.0"
epserde = "0.3.1"
sux = "0.1"
dsi-bitstream = "=0.1.3"
# Bin dependancies
clap = { version = "4.1.6", features = ["derive"] }
dsi-progress-logger = "0.2.0"
log = "0.4.17"
sux = "0.1.3"
dsi-bitstream = "0.2.0"
clap = { version = "4.4.18", features = ["derive"] }
dsi-progress-logger = "0.2.2"
log = "0.4.20"
stderrlog = "0.5.4"
rand = { version = "0.8.5", features = ["small_rng"] }
rayon = "1.7.0"
rayon = "1.8.1"
tempfile = "3.5.0"
#itertools = "0.10.5"
bytemuck = "1.13.1"
arbitrary = { version = "1", features = ["derive"], optional = true }
bytemuck = "1.14.0"
arbitrary = { version = "1.3.2", features = ["derive"], optional = true }
zip = {version="0.6.6", optional=true}
libc = "0.2.147"
itertools = "0.11.0"
nougat = "0.2.4"
lender = "0.2.7"
itertools = "0.12.0"
lender = "0.2.9"
common_traits = "0.10.0"
impl-tools = "0.10.0"
bitflags = "2.4.2"
dary_heap = "0.3.6"
rdst = { version ="0.20.12", features = ["multi-threaded"] }
sealed = "0.5.0"

[dev-dependencies]
rand = { version = "0.8.5", features = ["small_rng"] }
env_logger = "0.9.0"
env_logger = "0.11.0"

[profile.release] # Used for the examples
opt-level = 3 # like --release
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# `webgraph`
# WebGraph

[![downloads](https://img.shields.io/crates/d/webgraph)](https://crates.io/crates/webgraph)
[![dependents](https://img.shields.io/librariesio/dependents/cargo/webgraph)](https://crates.io/crates/webgraph/reverse_dependencies)
![GitHub CI](https://github.com/vigna/webgraph-rs/actions/workflows/rust.yml/badge.svg)
![license](https://img.shields.io/crates/l/webgraph)
[![](https://tokei.rs/b1/github/vigna/webgraph-rs)](https://github.com/vigna/webgraph-rs).

A pure Rust implementation of the [WebGraph framework](https://webgraph.di.unimi.it/) for graph compression.

# Acknowledgments
## Acknowledgments

This software has been partially supported by project SERICS (PE00000014) under the NRRP MUR program funded by the EU - NGEU,
and by project ANR COREGRAPHIE, grant ANR-20-CE23-0002 of the French Agence Nationale de la Recherche.
111 changes: 111 additions & 0 deletions examples/bench_sort_pairs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* SPDX-FileCopyrightText: 2023 Inria
* SPDX-FileCopyrightText: 2023 Sebastiano Vigna
*
* SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
*/

use std::hint::black_box;

use anyhow::Result;
use clap::Parser;
use dsi_bitstream::traits::BitRead;
use dsi_bitstream::traits::BitWrite;
use dsi_bitstream::traits::Endianness;
use dsi_progress_logger::*;
use rand::rngs::SmallRng;
use rand::RngCore;
use rand::SeedableRng;
use tempfile::Builder;
use webgraph::prelude::*;
#[derive(Parser, Debug)]
#[command(about = "Tests the merge speed of SortPairs", long_about = None)]
struct Args {
n: usize,
batch: usize,
/// Use 128-bit labels that are neither read nor written.
#[arg(short = 'l', long)]
labeled: bool,
}

/// No-op serializer/deserializer (as we want to check the merge speed)
#[derive(Debug, Clone)]
struct Mock();
impl<E: Endianness, W: BitWrite<E>> BitSerializer<E, W> for Mock {
type SerType = u128;

fn serialize(
&self,
_value: &Self::SerType,
_bitstream: &mut W,
) -> Result<usize, <W as BitWrite<E>>::Error> {
Ok(0)
}
}
impl<E: Endianness, W: BitRead<E>> BitDeserializer<E, W> for Mock {
type DeserType = u128;

fn deserialize(&self, _bitstream: &mut W) -> Result<Self::DeserType, <W as BitRead<E>>::Error> {
Ok(0)
}
}

pub fn main() -> Result<()> {
let args = Args::parse();

stderrlog::new()
.verbosity(2)
.timestamp(stderrlog::Timestamp::Second)
.init()
.unwrap();

let dir = Builder::new().prefix("bench_sort_pairs").tempdir()?;

if args.labeled {
let mut sp = SortPairs::<Mock, Mock>::new_labeled(args.batch, dir.path(), Mock(), Mock())?;

let mut r = SmallRng::seed_from_u64(0);

let mut pl = ProgressLogger::default();

pl.start("Writing...");
for _ in 0..args.n {
sp.push_labeled(r.next_u64() as usize, r.next_u64() as usize, 0)?;
pl.light_update();
}
pl.done();

let mut iter = sp.iter()?;

pl.start("Reading...");
for _ in 0..args.n {
black_box(iter.next().unwrap());
pl.light_update();
}
pl.done();
return Ok(());
} else {
let mut sp = SortPairs::new(args.batch, dir.path())?;

let mut r = SmallRng::seed_from_u64(0);

let mut pl = ProgressLogger::default();

pl.start("Writing...");
for _ in 0..args.n {
sp.push(r.next_u64() as usize, r.next_u64() as usize)?;
pl.light_update();
}
pl.done();

let mut iter = sp.iter()?;

pl.start("Reading...");
for _ in 0..args.n {
black_box(iter.next().unwrap());
pl.light_update();
}
pl.done();
}
Ok(())
}
2 changes: 1 addition & 1 deletion examples/bench_swh_labels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use clap::Parser;
use dsi_progress_logger::*;
use lender::*;
use std::hint::black_box;
use webgraph::label::swh_labels::SwhLabels;
use webgraph::labels::swh_labels::SwhLabels;
use webgraph::prelude::*;
#[derive(Parser, Debug)]
#[command(about = "Breadth-first visits a graph.", long_about = None)]
Expand Down
55 changes: 41 additions & 14 deletions examples/bench_unit_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
* SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
*/

mod bench_sort_pairs;

use anyhow::Result;
use clap::Parser;
use dsi_bitstream::prelude::*;
use dsi_progress_logger::*;
use lender::*;
use std::hint::black_box;
Expand All @@ -18,20 +21,21 @@ struct Args {
basename: String,
}

pub fn main() -> Result<()> {
let args = Args::parse();

stderrlog::new()
.verbosity(2)
.timestamp(stderrlog::Timestamp::Second)
.init()
.unwrap();

let graph = webgraph::graph::bvgraph::load(&args.basename)?;
fn bench_impl<E: Endianness + 'static>(args: Args) -> Result<()>
where
for<'a> BufBitReader<E, MemWordReader<u32, &'a [u32]>>: CodeRead<E> + BitSeek,
{
let graph = BVGraph::with_basename(&args.basename)
.endianness::<E>()
.load()?;
let unit = UnitLabelGraph(&graph);
let labelled = Zip(
webgraph::graph::bvgraph::load(&args.basename)?,
webgraph::graph::bvgraph::load(&args.basename)?,
let labeled = Zip(
BVGraph::with_basename(&args.basename)
.endianness::<E>()
.load()?,
BVGraph::with_basename(&args.basename)
.endianness::<E>()
.load()?,
);
for _ in 0..10 {
let mut pl = ProgressLogger::default();
Expand Down Expand Up @@ -77,7 +81,7 @@ pub fn main() -> Result<()> {
pl.start("Zipped-projected graph successors...");
for x in 0..unit.num_nodes() {
black_box(x);
for (i, _) in labelled.successors(x) {
for (i, _) in labeled.successors(x) {
black_box(i);
}
}
Expand All @@ -86,3 +90,26 @@ pub fn main() -> Result<()> {

Ok(())
}

pub fn main() -> Result<()> {
let args = Args::parse();

stderrlog::new()
.verbosity(2)
.timestamp(stderrlog::Timestamp::Second)
.init()?;

match get_endianess(&args.basename)?.as_str() {
#[cfg(any(
feature = "be_bins",
not(any(feature = "be_bins", feature = "le_bins"))
))]
BE::NAME => bench_impl::<BE>(args),
#[cfg(any(
feature = "le_bins",
not(any(feature = "be_bins", feature = "le_bins"))
))]
LE::NAME => bench_impl::<LE>(args),
e => panic!("Unknown endianness: {}", e),
}
}
69 changes: 51 additions & 18 deletions examples/bench_unit_transpose.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ use std::hint::black_box;

use anyhow::Result;
use clap::Parser;
use dsi_bitstream::prelude::*;
use dsi_progress_logger::*;
use lender::prelude::*;
use webgraph::graph::arc_list_graph;
use webgraph::utils::proj::Left;
use webgraph::{algorithms, prelude::*};
use tempfile::Builder;
use webgraph::graphs::arc_list_graph::{self, ArcListGraph};
use webgraph::labels::Left;
use webgraph::utils::sort_pairs::{BatchIterator, KMergeIters};
use webgraph::{algo, prelude::*};
#[derive(Parser, Debug)]
#[command(about = "Benchmark direct transposition and labelled transposition on a unit graph.", long_about = None)]
#[command(about = "Benchmark direct transposition and labeled transposition on a unit graph.", long_about = None)]
struct Args {
/// The basename of the graph.
basename: String,
Expand All @@ -25,12 +28,20 @@ pub fn transpose(
graph: &impl SequentialGraph,
batch_size: usize,
) -> Result<
arc_list_graph::ArcListGraph<
std::iter::Map<KMergeIters<BatchIterator>, fn((usize, usize, ())) -> (usize, usize)>,
Left<
ArcListGraph<
std::iter::Map<
std::iter::Map<
KMergeIters<BatchIterator>,
fn((usize, usize, ())) -> (usize, usize),
>,
fn((usize, usize)) -> (usize, usize, ()),
>,
>,
>,
> {
let dir = tempfile::tempdir()?;
let mut sorted = SortPairs::new(batch_size, dir.into_path())?;
let dir = Builder::new().prefix("bench_unit_transpose").tempdir()?;
let mut sorted = SortPairs::new(batch_size, dir.path())?;

let mut pl = ProgressLogger::default();
pl.item_name("node")
Expand All @@ -48,18 +59,17 @@ pub fn transpose(
let sorted = arc_list_graph::ArcListGraph::new(graph.num_nodes(), sorted.iter()?.map(map));
pl.done();

Ok(sorted)
Ok(Left(sorted))
}
pub fn main() -> Result<()> {
let args = Args::parse();

stderrlog::new()
.verbosity(2)
.timestamp(stderrlog::Timestamp::Second)
.init()
.unwrap();
fn bench_impl<E: Endianness + 'static>(args: Args) -> Result<()>
where
for<'a> BufBitReader<E, MemWordReader<u32, &'a [u32]>>: CodeRead<E> + BitSeek,
{
let graph = webgraph::graphs::bvgraph::sequential::BVGraphSeq::with_basename(args.basename)
.endianness::<E>()
.load()?;

let graph = webgraph::graph::bvgraph::load(&args.basename)?;
let unit = UnitLabelGraph(&graph);

for _ in 0..10 {
Expand All @@ -76,7 +86,7 @@ pub fn main() -> Result<()> {
pl.done_with_count(graph.num_nodes());

pl.start("Transposing unit graph...");
let mut iter = Left(algorithms::transpose_labelled(&unit, 10_000_000, (), ())?).iter();
let mut iter = Left(algo::transpose_labeled(&unit, 10_000_000, (), ())?).iter();
while let Some((x, s)) = iter.next() {
black_box(x);
for i in s {
Expand All @@ -88,3 +98,26 @@ pub fn main() -> Result<()> {

Ok(())
}

pub fn main() -> Result<()> {
let args = Args::parse();

stderrlog::new()
.verbosity(2)
.timestamp(stderrlog::Timestamp::Second)
.init()?;

match get_endianess(&args.basename)?.as_str() {
#[cfg(any(
feature = "be_bins",
not(any(feature = "be_bins", feature = "le_bins"))
))]
BE::NAME => bench_impl::<BE>(args),
#[cfg(any(
feature = "le_bins",
not(any(feature = "be_bins", feature = "le_bins"))
))]
LE::NAME => bench_impl::<LE>(args),
e => panic!("Unknown endianness: {}", e),
}
}
Loading

0 comments on commit fd9142b

Please sign in to comment.