Skip to content
This repository has been archived by the owner on Jan 21, 2023. It is now read-only.

Commit

Permalink
Add impl Drop for Node to avoid overflowing the stack.
Browse files Browse the repository at this point in the history
The implict drop is correct, but recursive. See the doc-comment for details.
  • Loading branch information
SimonSapin committed Jul 27, 2015
1 parent 58ccd6f commit c05dec7
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 2 deletions.
23 changes: 23 additions & 0 deletions examples/stack-overflow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
extern crate kuchiki;

fn main() {
let mut depth = 2;
// 20 M nodes is a few GB of memory.
while depth <= 20_000_000 {

let mut node = kuchiki::NodeRef::new_text("");
for _ in 0..depth {
let parent = kuchiki::NodeRef::new_text("");
parent.append(node);
node = parent;
}

println!("Trying to drop {} nodes...", depth);
// Without an explicit `impl Drop for Node`,
// depth = 20_000 causes "thread '<main>' has overflowed its stack"
// on my machine (Linux x86_64).
::std::mem::drop(node);

depth *= 10;
}
}
17 changes: 17 additions & 0 deletions src/cell_option.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,23 @@ impl<T> CellOption<Weak<T>> {
}
}

impl<T> CellOption<Rc<T>> {
/// Return `Some` if this `Rc` is the only strong reference count,
/// even if there are weak references.
#[inline]
pub fn take_if_unique_strong(&self) -> Option<Rc<T>> {
unsafe {
match *self.0.get() {
None => None,
Some(ref rc) if Rc::strong_count(rc) > 1 => None,
// Not borrowing the `Rc<T>` here
// as we would be invalidating that borrow while it is outstanding:
Some(_) => self.take(),
}
}
}
}

impl<T> CellOption<T> where T: WellBehavedClone {
#[inline]
pub fn clone_inner(&self) -> Option<T> {
Expand Down
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#![feature(unboxed_closures, plugin, rc_weak)]
#![feature(unboxed_closures, plugin, rc_weak, rc_counts)]
#![plugin(string_cache_plugin)]

extern crate html5ever;
Expand All @@ -10,6 +10,7 @@ extern crate tendril;

pub use parser::{Html, ParseOpts};
pub use select::{Selectors, Select};
pub use tree::NodeRef;

pub mod tree;

Expand Down
56 changes: 55 additions & 1 deletion src/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,60 @@ impl fmt::Debug for Node {
}
}

/// Prevent implicit recursion when dropping nodes to avoid overflowing the stack.
///
/// The implict drop is correct, but recursive.
/// In the worst case (where no node has both a next sibling and a child),
/// a tree of a few tens of thousands of nodes could cause a stack overflow.
///
/// This `Drop` implementations makes sure the recursion does not happen.
/// Instead, at has a explicit `Vec<Rc<Node>>` stack to traverse the subtree,

This comment has been minimized.

Copy link
@Ygg01

Ygg01 Jul 28, 2015

Contributor

Tiny nitpick. Shouldn't Instead, at, be Instead, it. Looks great otherwise :)

This comment has been minimized.

Copy link
@SimonSapin

SimonSapin Jul 28, 2015

Author Collaborator

Fixed, thanks!

/// but only following `Rc<Node>` references that are "unique":
/// that have a strong reference count of 1.
/// Those are the nodes that would have been dropped recursively.
///
/// The stack holds ancestors of the current node rather than previous siblings,
/// on the assumption that large document trees are typically wider than deep.
impl Drop for Node {
fn drop(&mut self) {
let mut stack = Vec::new();
if let Some(rc) = self.first_child.take_if_unique_strong() {
non_recursive_drop_unique_rc(rc, &mut stack);
}
if let Some(rc) = self.next_sibling.take_if_unique_strong() {
non_recursive_drop_unique_rc(rc, &mut stack);
}

fn non_recursive_drop_unique_rc(mut rc: Rc<Node>, stack: &mut Vec<Rc<Node>>) {
loop {
if let Some(child) = rc.first_child.take_if_unique_strong() {
stack.push(rc);
rc = child;
continue
}
if let Some(sibling) = rc.next_sibling.take_if_unique_strong() {
// The previous value of `rc: Rc<Node>` is dropped here.
// Since it was unique, the corresponding `Node` is dropped as well.
// `<Node as Drop>::drop` does not call `drop_rc`
// as both the first child and next sibling were already taken.
// Weak reference counts decremented here for `CellOption` that are `Some`:
// * `rc.parent`: still has a strong reference in `stack` or elsewhere
// * `rc.last_child`: this is the last weak ref. Deallocated now.
// * `rc.previous_sibling`: this is the last weak ref. Deallocated now.
rc = sibling;
continue
}
if let Some(parent) = stack.pop() {
// Same as in the above comment.
rc = parent;
continue
}
return
}
}
}
}

impl NodeRef {
/// Create a new node from its associated data.
pub fn new(data: NodeData) -> NodeRef {
Expand Down Expand Up @@ -665,4 +719,4 @@ impl<T: fmt::Debug> fmt::Debug for NodeDataRef<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
fmt::Debug::fmt(&**self, f)
}
}
}

0 comments on commit c05dec7

Please sign in to comment.