Skip to content

Commit

Permalink
Optimise data transfer in TopologicalSorter
Browse files Browse the repository at this point in the history
This optimises the creation of the `list` objects returned from
`TopologicalSorter.get_ready` (by avoiding a temporary `Vec`) and allows
`done` to accept a single index, avoiding the need for the user to
allocate many small lists in several real-world uses.
  • Loading branch information
jakelishman committed Apr 5, 2024
1 parent 9216f86 commit 2979762
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 62 deletions.
7 changes: 7 additions & 0 deletions releasenotes/notes/toposort-done-single-5d5db2c654e5b498.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
features:
- |
:meth:`.TopologicalSorter.done` now accepts single integers, in addition to lists of integers.
This can be a sizeable performance improvement for algorithms that iterate through nodes, and
only conditionally mark them as `done`; there is no longer a need to allocate a temporary Python
array.
2 changes: 1 addition & 1 deletion rustworkx/rustworkx.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ class TopologicalSorter:
) -> None: ...
def is_active(self) -> bool: ...
def get_ready(self) -> list[int]: ...
def done(self, nodes: Sequence[int]) -> None: ...
def done(self, nodes: int | Sequence[int]) -> None: ...

# isomorpism

Expand Down
135 changes: 74 additions & 61 deletions src/toposort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use hashbrown::HashMap;

use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::PyList;
use pyo3::Python;

use petgraph::stable_graph::NodeIndex;
Expand Down Expand Up @@ -84,7 +85,8 @@ pub struct TopologicalSorter {
node2state: HashMap<NodeIndex, NodeState>,
num_passed_out: usize,
num_finished: usize,
reverse: bool,
in_dir: petgraph::Direction,
out_dir: petgraph::Direction,
}

#[pymethods]
Expand All @@ -105,7 +107,7 @@ impl TopologicalSorter {
}
}

let (in_dir, _) = traversal_directions(reverse);
let (in_dir, out_dir) = traversal_directions(reverse);
let mut predecessor_count = HashMap::new();
let ready_nodes = if let Some(initial) = initial {
let dag = &dag.borrow(py);
Expand Down Expand Up @@ -145,7 +147,8 @@ impl TopologicalSorter {
node2state: HashMap::new(),
num_passed_out: 0,
num_finished: 0,
reverse,
in_dir,
out_dir,
})
}

Expand All @@ -167,24 +170,21 @@ impl TopologicalSorter {
///
/// :returns: A list of node indices of all the ready nodes.
/// :rtype: List
fn get_ready(&mut self) -> Vec<usize> {
let mut out = Vec::with_capacity(self.ready_nodes.len());
for nx in &self.ready_nodes {
out.push(nx.index());
self.node2state.insert(*nx, NodeState::Ready);
}

self.ready_nodes.clear();
self.num_passed_out += out.len();
out
fn get_ready<'py>(&mut self, py: Python<'py>) -> Bound<'py, PyList> {
self.num_passed_out += self.ready_nodes.len();
PyList::new_bound(py, self.ready_nodes.drain(..).map(|nx| {
self.node2state.insert(nx, NodeState::Ready);
nx.index()
}))
}

/// Marks a set of nodes returned by "get_ready" as processed.
///
/// This method unblocks any successor of each node in *nodes* for being returned
/// in the future by a call to "get_ready".
///
/// :param list nodes: A list of node indices to mark as done.
/// :param nodes: A node index or list of node indices to mark as done.
/// :type nodes: int | list[int]
///
/// :raises `ValueError`: If any node in *nodes* has already been marked as
/// processed by a previous call to this method or node has not yet been returned
Expand All @@ -193,60 +193,73 @@ impl TopologicalSorter {
/// of the nodes given to :meth:`done`. This can only happen if the ``initial`` nodes had
/// even a partial topological ordering amongst themselves, which is not a valid
/// starting input.
fn done(&mut self, py: Python, nodes: Vec<usize>) -> PyResult<()> {
let dag = &self.dag.borrow(py);
let (in_dir, out_dir) = traversal_directions(self.reverse);
for node in nodes {
let node = NodeIndex::new(node);
match self.node2state.get_mut(&node) {
None => {
return Err(PyValueError::new_err(format!(
"node {} was not passed out (still not ready).",
node.index()
)));
}
Some(NodeState::Done) => {
return Err(PyValueError::new_err(format!(
"node {} was already marked done.",
node.index()
)));
}
Some(state) => {
debug_assert_eq!(*state, NodeState::Ready);
*state = NodeState::Done;
}
fn done(&mut self, nodes: &Bound<PyAny>) -> PyResult<()> {
if let Ok(node) = nodes.extract::<usize>() {
self.done_single(nodes.py(), NodeIndex::new(node))
} else if let Ok(nodes) = nodes.downcast::<PyList>() {
for node in nodes {
self.done_single(nodes.py(), NodeIndex::new(node.extract()?))?
}
Ok(())
} else {
for node in nodes.iter()? {
self.done_single(nodes.py(), NodeIndex::new(node?.extract()?))?
}
Ok(())
}
}
}

impl TopologicalSorter {
#[inline(always)]
fn done_single(&mut self, py: Python, node: NodeIndex) -> PyResult<()> {
let dag = self.dag.borrow(py);
match self.node2state.get_mut(&node) {
None => {
return Err(PyValueError::new_err(format!(
"node {} was not passed out (still not ready).",
node.index()
)));
}
Some(NodeState::Done) => {
return Err(PyValueError::new_err(format!(
"node {} was already marked done.",
node.index()
)));
}
Some(state) => {
debug_assert_eq!(*state, NodeState::Ready);
*state = NodeState::Done;
}
}

for succ in dag.graph.neighbors_directed(node, out_dir) {
match self.predecessor_count.entry(succ) {
Entry::Occupied(mut entry) => {
let in_degree = entry.get_mut();
if *in_degree == 0 {
return Err(PyValueError::new_err(
"at least one initial node is reachable from another",
));
} else if *in_degree == 1 {
self.ready_nodes.push(succ);
entry.remove_entry();
} else {
*in_degree -= 1;
}
for succ in dag.graph.neighbors_directed(node, self.out_dir) {
match self.predecessor_count.entry(succ) {
Entry::Occupied(mut entry) => {
let in_degree = entry.get_mut();
if *in_degree == 0 {
return Err(PyValueError::new_err(
"at least one initial node is reachable from another",
));
} else if *in_degree == 1 {
self.ready_nodes.push(succ);
entry.remove_entry();
} else {
*in_degree -= 1;
}
Entry::Vacant(entry) => {
let in_degree = dag.graph.neighbors_directed(succ, in_dir).count() - 1;

if in_degree == 0 {
self.ready_nodes.push(succ);
} else {
entry.insert(in_degree);
}
}
Entry::Vacant(entry) => {
let in_degree = dag.graph.neighbors_directed(succ, self.in_dir).count() - 1;

if in_degree == 0 {
self.ready_nodes.push(succ);
} else {
entry.insert(in_degree);
}
}
}

self.num_finished += 1;
}

self.num_finished += 1;
Ok(())
}
}
22 changes: 22 additions & 0 deletions tests/digraph/test_toposort.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,28 @@ def test_topo_sort(self):
nodes = sorter.get_ready()
self.assertEqual(nodes, [])

def test_topo_sort_single_indices(self):
sorter = rustworkx.TopologicalSorter(self.graph)
nodes = sorter.get_ready()
self.assertEqual(set(nodes), {0, 1})
sorter.done(0)
sorter.done(1)
nodes = sorter.get_ready()
self.assertEqual(set(nodes), {2})
sorter.done(2)
nodes = sorter.get_ready()
self.assertEqual(set(nodes), {3, 4})

# Try not calling 'done' on everything, and going a little out-of-order.
sorter.done(3)
self.assertEqual(set(sorter.get_ready()), {5})
sorter.done(5)
self.assertEqual(set(sorter.get_ready()), set())
self.assertTrue(sorter.is_active())
sorter.done(4)
self.assertEqual(set(sorter.get_ready()), set())
self.assertFalse(sorter.is_active())

def test_topo_sort_do_not_emit_if_node_has_undone_preds(self):
sorter = rustworkx.TopologicalSorter(self.graph)
nodes = sorter.get_ready()
Expand Down

0 comments on commit 2979762

Please sign in to comment.