Skip to content

Commit

Permalink
Add NewUniqueNodeIterator() to skip shared nodes
Browse files Browse the repository at this point in the history
NewUniqueNodeIterator() can be used to optimize node iteration for
forest.  It skips shared sub-tries that were visited and only iterates
unique nodes.
  • Loading branch information
fxamacker committed Feb 1, 2022
1 parent 45d23da commit 04e5c08
Show file tree
Hide file tree
Showing 2 changed files with 236 additions and 5 deletions.
50 changes: 45 additions & 5 deletions ledger/complete/mtrie/flattener/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type NodeIterator struct {
// no children, it can be recalled without restriction.
// * When popping node `n` from the stack, its parent `p` (if it exists) is now the
// head of the stack.
// - If `p` has only one child, this child is must be `n`.
// - If `p` has only one child, this child must be `n`.
// Therefore, by recalling `n`, we have recalled all ancestors of `p`.
// - If `n` is the right child, we haven already searched through all of `p`
// descendents (as the `p.LeftChild` must have been searched before)
Expand All @@ -53,6 +53,15 @@ type NodeIterator struct {
// This has the advantage, that we gracefully handle tries whose root node is nil.
unprocessedRoot *node.Node
stack []*node.Node
// visitedNodes are nodes that were visited and can be skipped during
// traversal through dig(). visitedNodes is used to optimize node traveral
// IN FOREST by skipping nodes in shared sub-tries after they are visited,
// because sub-tries are shared between tries (original MTrie before register updates
// and updated MTrie after register writes).
// NodeIterator only uses visitedNodes for read operation.
// No special handling is needed if visitedNodes is nil.
// WARNING: visitedNodes is not safe for concurrent use.
visitedNodes map[*node.Node]uint64
}

// NewNodeIterator returns a node NodeIterator, which iterates through all nodes
Expand All @@ -75,6 +84,30 @@ func NewNodeIterator(mTrie *trie.MTrie) *NodeIterator {
return i
}

// NewUniqueNodeIterator returns a node NodeIterator, which iterates through all unique nodes
// that weren't visited. This should be used for forest node iteration to avoid repeatedly
// traversing shared sub-tries.
// The Iterator guarantees a DESCENDANTS-FIRST-RELATIONSHIP in the sequence of nodes it generates:
// * Consider the sequence of nodes, in the order they are generated by NodeIterator.
// Let `node[k]` denote the node with index `k` in this sequence.
// * Descendents-First-Relationship means that for any `node[k]`, all its descendents
// have indices strictly smaller than k in the iterator's sequence.
// The Descendents-First-Relationship has the following important property:
// When re-building the Trie from the sequence of nodes, one can build the trie on the fly,
// as for each node, the children have been previously encountered.
// WARNING: visitedNodes is not safe for concurrent use.
func NewUniqueNodeIterator(mTrie *trie.MTrie, visitedNodes map[*node.Node]uint64) *NodeIterator {
// For a Trie with height H (measured by number of edges), the longest possible path
// contains H+1 vertices.
stackSize := ledger.NodeMaxHeight + 1
i := &NodeIterator{
stack: make([]*node.Node, 0, stackSize),
visitedNodes: visitedNodes,
}
i.unprocessedRoot = mTrie.RootNode()
return i
}

func (i *NodeIterator) Next() bool {
if i.unprocessedRoot != nil {
// initial call to Next() for a non-empty trie
Expand Down Expand Up @@ -125,15 +158,22 @@ func (i *NodeIterator) dig(n *node.Node) {
if n == nil {
return
}
if _, found := i.visitedNodes[n]; found {
return
}
for {
i.stack = append(i.stack, n)
if lChild := n.LeftChild(); lChild != nil {
n = lChild
continue
if _, found := i.visitedNodes[lChild]; !found {
n = lChild
continue
}
}
if rChild := n.RightChild(); rChild != nil {
n = rChild
continue
if _, found := i.visitedNodes[rChild]; !found {
n = rChild
continue
}
}
return
}
Expand Down
191 changes: 191 additions & 0 deletions ledger/complete/mtrie/flattener/iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/onflow/flow-go/ledger"
"github.com/onflow/flow-go/ledger/common/utils"
"github.com/onflow/flow-go/ledger/complete/mtrie/flattener"
"github.com/onflow/flow-go/ledger/complete/mtrie/node"
"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
)

Expand Down Expand Up @@ -73,3 +74,193 @@ func TestPopulatedTrie(t *testing.T) {
require.False(t, itr.Next())
require.True(t, nil == itr.Value())
}

func TestUniqueNodeIterator(t *testing.T) {
t.Run("empty trie", func(t *testing.T) {
emptyTrie := trie.NewEmptyMTrie()

// visitedNodes is nil
itr := flattener.NewUniqueNodeIterator(emptyTrie, nil)
require.False(t, itr.Next())
require.True(t, nil == itr.Value()) // initial iterator should return nil

// visitedNodes is empty map
visitedNodes := make(map[*node.Node]uint64)
itr = flattener.NewUniqueNodeIterator(emptyTrie, visitedNodes)
require.False(t, itr.Next())
require.True(t, nil == itr.Value()) // initial iterator should return nil
})

t.Run("trie", func(t *testing.T) {
emptyTrie := trie.NewEmptyMTrie()

// key: 0000...
p1 := utils.PathByUint8(1)
v1 := utils.LightPayload8('A', 'a')

// key: 0100....
p2 := utils.PathByUint8(64)
v2 := utils.LightPayload8('B', 'b')

paths := []ledger.Path{p1, p2}
payloads := []ledger.Payload{*v1, *v2}

updatedTrie, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
require.NoError(t, err)

// n4
// /
// /
// n3
// / \
// / \
// n1 (p1/v1) n2 (p2/v2)
//

expectedNodes := []*node.Node{
updatedTrie.RootNode().LeftChild().LeftChild(), // n1
updatedTrie.RootNode().LeftChild().RightChild(), // n2
updatedTrie.RootNode().LeftChild(), // n3
updatedTrie.RootNode(), // n4
}

// visitedNodes is nil
i := 0
for itr := flattener.NewUniqueNodeIterator(updatedTrie, nil); itr.Next(); {
n := itr.Value()
require.True(t, i < len(expectedNodes))
require.Equal(t, expectedNodes[i], n)
i++
}
require.Equal(t, i, len(expectedNodes))

// visitedNodes is not nil, but it's pointless for iterating a single trie because
// there isn't any shared sub-trie.
visitedNodes := make(map[*node.Node]uint64)
i = 0
for itr := flattener.NewUniqueNodeIterator(updatedTrie, visitedNodes); itr.Next(); {
n := itr.Value()
visitedNodes[n] = uint64(i)

require.True(t, i < len(expectedNodes))
require.Equal(t, expectedNodes[i], n)
i++
}
require.Equal(t, i, len(expectedNodes))
})

t.Run("forest", func(t *testing.T) {

// Forest is a slice of mtries to guarantee order.
f := make([]*trie.MTrie, 0)

emptyTrie := trie.NewEmptyMTrie()

// key: 0000...
p1 := utils.PathByUint8(1)
v1 := utils.LightPayload8('A', 'a')

// key: 0100....
p2 := utils.PathByUint8(64)
v2 := utils.LightPayload8('B', 'b')

paths := []ledger.Path{p1, p2}
payloads := []ledger.Payload{*v1, *v2}

trie1, err := trie.NewTrieWithUpdatedRegisters(emptyTrie, paths, payloads, true)
require.NoError(t, err)

f = append(f, trie1)

// n4
// /
// /
// n3
// / \
// / \
// n1 (p1/v1) n2 (p2/v2)
//

// New trie reuses its parent's left sub-trie.

// key: 1000...
p3 := utils.PathByUint8(128)
v3 := utils.LightPayload8('C', 'c')

// key: 1100....
p4 := utils.PathByUint8(192)
v4 := utils.LightPayload8('D', 'd')

paths = []ledger.Path{p3, p4}
payloads = []ledger.Payload{*v3, *v4}

trie2, err := trie.NewTrieWithUpdatedRegisters(trie1, paths, payloads, true)
require.NoError(t, err)

f = append(f, trie2)

// n8
// / \
// / \
// n3 n7
// (shared) / \
// / \
// n5 n6
// (p3/v3) (p4/v4)

// New trie reuses its parent's right sub-trie, and left sub-trie's leaf node.

// key: 0000...
v5 := utils.LightPayload8('E', 'e')

paths = []ledger.Path{p1}
payloads = []ledger.Payload{*v5}

trie3, err := trie.NewTrieWithUpdatedRegisters(trie2, paths, payloads, true)
require.NoError(t, err)

f = append(f, trie3)

// n11
// / \
// / \
// n10 n7
// / \ (shared)
// / \
// n9 n2
// (p1/v5) (shared)

expectedNodes := []*node.Node{
// unique nodes from trie1
trie1.RootNode().LeftChild().LeftChild(), // n1
trie1.RootNode().LeftChild().RightChild(), // n2
trie1.RootNode().LeftChild(), // n3
trie1.RootNode(), // n4
// unique nodes from trie2
trie2.RootNode().RightChild().LeftChild(), // n5
trie2.RootNode().RightChild().RightChild(), // n6
trie2.RootNode().RightChild(), // n7
trie2.RootNode(), // n8
// unique nodes from trie3
trie3.RootNode().LeftChild().LeftChild(), // n9
trie3.RootNode().LeftChild(), // n10
trie3.RootNode(), // n11

}

// Use visitedNodes to prevent revisiting shared sub-tries.
visitedNodes := make(map[*node.Node]uint64)
i := 0
for _, trie := range f {
for itr := flattener.NewUniqueNodeIterator(trie, visitedNodes); itr.Next(); {
n := itr.Value()
visitedNodes[n] = uint64(i)

require.True(t, i < len(expectedNodes))
require.Equal(t, expectedNodes[i], n)
i++
}
}
require.Equal(t, i, len(expectedNodes))
})
}

0 comments on commit 04e5c08

Please sign in to comment.