Skip to content

Commit

Permalink
Merge pull request #18 from andyquinterom/int_symbols
Browse files Browse the repository at this point in the history
Performance Improvements
  • Loading branch information
andyquinterom authored Jun 21, 2024
2 parents 9dd7984 + d5df7b4 commit 399fcab
Show file tree
Hide file tree
Showing 16 changed files with 760 additions and 375 deletions.
2 changes: 1 addition & 1 deletion benches/directed_graph.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use orbweaver::directed::{acyclic::DirectedAcyclicGraph, DirectedGraphBuilder};
use orbweaver::prelude::*;

const MEDIUM_TXT_PATH: &str = "assets/medium.txt";

Expand Down
26 changes: 14 additions & 12 deletions src/directed/acyclic/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
use crate::{directed::DirectedGraph, prelude::*};
use crate::{
directed::DirectedGraph,
prelude::*,
utils::{node_set::NodeVec, sym::Sym},
};
use std::ops::Deref;
mod topological_sort;
#[cfg(feature = "serde")]
Expand Down Expand Up @@ -41,26 +45,24 @@ impl DirectedAcyclicGraph {
&self,
from: impl AsRef<str>,
to: impl AsRef<str>,
) -> GraphInteractionResult<Vec<Vec<&str>>> {
const PATH_DELIM: u32 = 0;

) -> GraphInteractionResult<Vec<NodeVec>> {
// Helper function to perform DFS
#[inline]
fn dfs(
graph: &DirectedAcyclicGraph,
current: u32,
goal_id: u32,
current_path: &mut Vec<u32>,
all_paths: &mut Vec<u32>,
children_buffer: &mut Vec<u32>,
current: Sym,
goal_id: Sym,
current_path: &mut Vec<Sym>,
all_paths: &mut Vec<Sym>,
children_buffer: &mut Vec<Sym>,
) {
// Add current node to path
current_path.push(current);

// Check if the current node is the goal
if current == goal_id {
all_paths.extend_from_slice(current_path);
all_paths.push(PATH_DELIM);
all_paths.push(Sym::RESERVED);
} else {
let children_start_index_local = children_buffer.len();
graph.children_u32(&[current], children_buffer);
Expand Down Expand Up @@ -94,7 +96,7 @@ impl DirectedAcyclicGraph {
dfs(self, from, to, current_path, all_paths, children);

Ok(all_paths
.split(|&n| n == PATH_DELIM)
.split(|&n| n.is_reserved())
.filter(|p| !p.is_empty())
.map(|path| self.resolve_mul_slice(path))
.collect())
Expand Down Expand Up @@ -181,10 +183,10 @@ mod tests {
assert_eq!(
paths,
vec![
vec!["0", "4"],
vec!["0", "999", "4"],
vec!["0", "111", "222", "333", "444", "4"],
vec!["0", "1", "2", "3", "4"],
vec!["0", "4"],
]
);
}
Expand Down
7 changes: 5 additions & 2 deletions src/directed/acyclic/topological_sort.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use crate::{directed::LazySet, prelude::*};
use crate::{
prelude::*,
utils::{node_map::LazySet, sym::Sym},
};

pub fn topological_sort(dg: &DirectedGraph) -> Result<Vec<u32>, GraphHasCycle> {
pub fn topological_sort(dg: &DirectedGraph) -> Result<Vec<Sym>, GraphHasCycle> {
let mut dg = dg.clone();
let mut res = Vec::new();
let mut no_deps = dg.leaves.clone();
Expand Down
135 changes: 135 additions & 0 deletions src/directed/builder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
use std::rc::Rc;

use crate::utils::{interner::InternerBuilder, node_map::NodeMap, sym::Sym};
use rayon::prelude::*;

use super::{DirectedAcyclicGraph, DirectedGraph, GraphHasCycle};

#[derive(Clone)]
pub struct DirectedGraphBuilder {
pub(crate) parents: Vec<Sym>,
pub(crate) children: Vec<Sym>,
pub(crate) interner: InternerBuilder,
}

fn find_leaves(parents: &[Sym], children: &[Sym]) -> Vec<Sym> {
let mut leaves: Vec<_> = children
.par_iter()
.filter(|child| parents.binary_search(child).is_err())
.copied()
.collect();
leaves.sort_unstable();
leaves.dedup();
leaves
}

fn find_roots(parents: &[Sym], children: &[Sym]) -> Vec<Sym> {
let mut roots: Vec<_> = parents
.par_iter()
.filter(|parent| children.binary_search(parent).is_err())
.copied()
.collect();
roots.sort_unstable();
roots.dedup();
roots
}

impl DirectedGraphBuilder {
pub fn new() -> Self {
DirectedGraphBuilder {
interner: InternerBuilder::new(),
children: Vec::new(),
parents: Vec::new(),
}
}

#[inline(always)]
pub(crate) fn get_or_intern(&mut self, val: impl AsRef<str>) -> Sym {
self.interner.get_or_intern(val)
}
pub fn add_edge(&mut self, from: impl AsRef<str>, to: impl AsRef<str>) -> &mut Self {
let from = self.get_or_intern(&from);
let to = self.get_or_intern(&to);
self.parents.push(from);
self.children.push(to);
self
}
pub fn add_path(&mut self, path: impl IntoIterator<Item = impl AsRef<str>>) -> &mut Self {
let mut path = path.into_iter().peekable();
while let (Some(from), Some(to)) = (path.next(), path.peek()) {
self.add_edge(from.as_ref(), to.as_ref());
}
self
}

pub fn build_directed(self) -> DirectedGraph {
// When we build we will do some optimizations
let mut unique_parents = self.parents.clone();
unique_parents.sort_unstable();
unique_parents.dedup();
unique_parents.shrink_to_fit();

let mut unique_children = self.children.clone();
unique_children.sort_unstable();
unique_children.dedup();
unique_parents.shrink_to_fit();

let mut nodes = Vec::new();
nodes.extend_from_slice(&unique_parents);
nodes.extend_from_slice(&unique_children);
nodes.sort_unstable();
nodes.dedup();
nodes.shrink_to_fit();

let leaves = find_leaves(&unique_parents, &unique_children);
let roots = find_roots(&unique_parents, &unique_children);

let mut n_edges = 0;

let interner = Rc::new(self.interner.build());

// Maps parents to their children
let mut children_map = NodeMap::new(interner.len());

for i in 0..self.parents.len() {
let was_added = children_map
.get_mut(self.parents[i])
.or_init()
.insert(self.children[i]);
if was_added {
n_edges += 1;
}
}

// Maps children to their parents
let mut parent_map = NodeMap::new(interner.len());

for i in 0..self.parents.len() {
parent_map
.get_mut(self.children[i])
.or_init()
.insert(self.parents[i]);
}

DirectedGraph {
interner,
leaves,
roots,
nodes,
children_map,
parent_map,
n_edges,
buf: Default::default(),
}
}

pub fn build_acyclic(self) -> Result<DirectedAcyclicGraph, GraphHasCycle> {
DirectedAcyclicGraph::build(self.build_directed())
}
}

impl Default for DirectedGraphBuilder {
fn default() -> Self {
Self::new()
}
}
152 changes: 152 additions & 0 deletions src/directed/debug.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
use super::{DirectedGraph, LazySet};

const DEFAULT_MAX_PRINT_SIZE: usize = 15;

fn get_max_str_length(graph: &DirectedGraph) -> usize {
let mut n_printed = 0;
let mut max_string_length = DEFAULT_MAX_PRINT_SIZE;
'outer: for (parent, children) in graph.children_map.iter() {
if let LazySet::Initialized(children) = children {
for &child in children.iter() {
n_printed += 1;
max_string_length = max_string_length
.max(graph.resolve(parent).len())
.max(graph.resolve(child).len());
if n_printed == 10 {
break 'outer;
}
}
}
}
max_string_length
}

impl std::fmt::Debug for DirectedGraph {
#[inline]
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
let n_nodes = self.children_map.len();
let n_edges = self.n_edges;
let n_roots = self.roots.len();
let n_leaves = self.leaves.len();
let max_string_length = get_max_str_length(self);
writeln!(f, "# of nodes: {n_nodes}")?;
writeln!(f, "# of edges: {n_edges}")?;
writeln!(f, "# of roots: {n_roots}")?;
writeln!(f, "# of leaves: {n_leaves}")?;
writeln!(f)?;
writeln!(
f,
"| {:^width$} | {:^width$} |",
"Parent",
"Child",
width = max_string_length
)?;
writeln!(
f,
"| {:-<width$} | {:-<width$} |",
"",
"",
width = max_string_length
)?;
let mut n_printed = 0;
'outer: for (parent, children) in self.children_map.iter() {
match children {
LazySet::Initialized(children) => {
for &child in children.iter() {
n_printed += 1;
let parent = self.resolve(parent);
let child = self.resolve(child);
writeln!(
f,
"| {:width$.width$} | {:width$.width$} |",
parent,
child,
width = max_string_length
)?;
if n_printed == 10 {
break 'outer;
}
}
}
LazySet::Uninitialized => continue,
}
}

if n_nodes > 10 {
writeln!(f, "Omitted {} nodes", n_nodes - 10)?;
}

Ok(())
}
}

//#[cfg(test)]
//mod tests {
// use crate::directed::DirectedGraphBuilder;
//
// #[test]
// fn test_debug_printing() {
// let mut builder = DirectedGraphBuilder::new();
// builder.add_path(["0", "111", "222", "333", "444", "4"]);
// builder.add_path(["0", "999", "4"]);
// builder.add_path(["0", "1", "2", "3", "4"]);
// builder.add_path(["0", "4"]);
// let graph = builder.build_acyclic().unwrap();
//
// assert_eq!(
// format!("{:?}", graph),
// r#"# of nodes: 10
//# of edges: 12
//# of roots: 1
//# of leaves: 1
//
//| Parent | Child |
//| --------------- | --------------- |
//| 0 | 1 |
//| 0 | 111 |
//| 0 | 999 |
//| 0 | 4 |
//| 111 | 222 |
//| 222 | 333 |
//| 333 | 444 |
//| 444 | 4 |
//| 999 | 4 |
//| 1 | 2 |
//Omitted 1 nodes
//"#,
// );
// }
//
// #[test]
// fn test_debug_printing_longer_than_15() {
// let mut builder = DirectedGraphBuilder::new();
// builder.add_edge("AAAAAAAAAAAAAAAAAAAAA", "B");
// builder.add_edge("C", "AAAAAAAAAAAAAAAAAAAAA");
// let graph = builder.build_acyclic().unwrap();
//
// panic!("{:?}", graph);
//
// assert_eq!(
// format!("{:?}", graph),
// r#"# of nodes: 11
//# of edges: 12
//# of roots: 1
//# of leaves: 1
//
//| Parent | Child |
//| --------------- | --------------- |
//| 0 | 1 |
//| 0 | 111 |
//| 0 | 999 |
//| 0 | 4 |
//| 111 | 222 |
//| 222 | 333 |
//| 333 | 444 |
//| 444 | 4 |
//| 999 | 4 |
//| 1 | 2 |
//Omitted 1 nodes
//"#,
// );
// }
//}
4 changes: 3 additions & 1 deletion src/directed/get_rel2_on_rel1.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use crate::utils::sym::Sym;

use super::{LazySet, NodeMap};

/// Gets the equivalent values in `rel2` to a set in
/// `rel1`.
#[inline]
pub(crate) fn get_values_on_rel_map(ids: &[u32], map: &NodeMap, out: &mut Vec<u32>) {
pub(crate) fn get_values_on_rel_map(ids: &[Sym], map: &NodeMap, out: &mut Vec<Sym>) {
ids.iter().for_each(|&id| {
if let LazySet::Initialized(values) = map.get(id) {
out.extend(values.iter().copied());
Expand Down
Loading

0 comments on commit 399fcab

Please sign in to comment.