diff --git a/internal/trie/node/subvalue.go b/internal/trie/node/subvalue.go new file mode 100644 index 0000000000..d00aab788f --- /dev/null +++ b/internal/trie/node/subvalue.go @@ -0,0 +1,17 @@ +// Copyright 2022 ChainSafe Systems (ON) +// SPDX-License-Identifier: LGPL-3.0-only + +package node + +import "bytes" + +// SubValueEqual returns true if the node subvalue is equal to the +// subvalue given as argument. In particular, it returns false +// if one subvalue is nil and the other subvalue is the empty slice. +func (n Node) SubValueEqual(subValue []byte) (equal bool) { + if len(subValue) == 0 && len(n.SubValue) == 0 { + return (subValue == nil && n.SubValue == nil) || + (subValue != nil && n.SubValue != nil) + } + return bytes.Equal(n.SubValue, subValue) +} diff --git a/internal/trie/node/subvalue_test.go b/internal/trie/node/subvalue_test.go new file mode 100644 index 0000000000..3d190acc63 --- /dev/null +++ b/internal/trie/node/subvalue_test.go @@ -0,0 +1,57 @@ +// Copyright 2022 ChainSafe Systems (ON) +// SPDX-License-Identifier: LGPL-3.0-only + +package node + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_Node_SubValueEqual(t *testing.T) { + t.Parallel() + + testCases := map[string]struct { + node Node + subValue []byte + equal bool + }{ + "nil node subvalue and nil subvalue": { + equal: true, + }, + "empty node subvalue and empty subvalue": { + node: Node{SubValue: []byte{}}, + subValue: []byte{}, + equal: true, + }, + "nil node subvalue and empty subvalue": { + subValue: []byte{}, + }, + "empty node subvalue and nil subvalue": { + node: Node{SubValue: []byte{}}, + }, + "equal non empty values": { + node: Node{SubValue: []byte{1, 2}}, + subValue: []byte{1, 2}, + equal: true, + }, + "not equal non empty values": { + node: Node{SubValue: []byte{1, 2}}, + subValue: []byte{1, 3}, + }, + } + + for name, testCase := range testCases { + testCase := testCase + t.Run(name, func(t *testing.T) { + t.Parallel() + + node := testCase.node + + equal := node.SubValueEqual(testCase.subValue) + + assert.Equal(t, testCase.equal, equal) + }) + } +} diff --git a/lib/trie/trie.go b/lib/trie/trie.go index 96e9c9192e..74d46eeb7e 100644 --- a/lib/trie/trie.go +++ b/lib/trie/trie.go @@ -18,9 +18,13 @@ var EmptyHash, _ = NewEmptyTrie().Hash() // Trie is a base 16 modified Merkle Patricia trie. type Trie struct { - generation uint64 - root *Node - childTries map[common.Hash]*Trie + generation uint64 + root *Node + childTries map[common.Hash]*Trie + // deletedMerkleValues are the node Merkle values that were deleted + // from this trie since the last snapshot. These are used by the online + // pruner to detect with database keys (trie node Merkle values) can + // be deleted. deletedMerkleValues map[string]struct{} } @@ -319,20 +323,22 @@ func findNextKeyChild(children []*Node, startIndex byte, // key specified in little Endian format. func (t *Trie) Put(keyLE, value []byte) { nibblesKey := codec.KeyLEToNibbles(keyLE) - t.root, _ = t.insert(t.root, nibblesKey, value) + t.root, _, _ = t.insert(t.root, nibblesKey, value) } // insert inserts a value in the trie at the key specified. // It may create one or more new nodes or update an existing node. -func (t *Trie) insert(parent *Node, key, value []byte) (newParent *Node, nodesCreated uint32) { +func (t *Trie) insert(parent *Node, key, value []byte) ( + newParent *Node, mutated bool, nodesCreated uint32) { if parent == nil { - const nodesCreated = 1 + mutated = true + nodesCreated = 1 return &Node{ Key: key, SubValue: value, Generation: t.generation, Dirty: true, - }, nodesCreated + }, mutated, nodesCreated } // TODO ensure all values have dirty set to true @@ -344,23 +350,26 @@ func (t *Trie) insert(parent *Node, key, value []byte) (newParent *Node, nodesCr } func (t *Trie) insertInLeaf(parentLeaf *Node, key, value []byte) ( - newParent *Node, nodesCreated uint32) { + newParent *Node, mutated bool, nodesCreated uint32) { if bytes.Equal(parentLeaf.Key, key) { nodesCreated = 0 - if bytes.Equal(value, parentLeaf.SubValue) { - return parentLeaf, nodesCreated + if parentLeaf.SubValueEqual(value) { + mutated = false + return parentLeaf, mutated, nodesCreated } copySettings := node.DefaultCopySettings copySettings.CopyValue = false parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) parentLeaf.SubValue = value - return parentLeaf, nodesCreated + mutated = true + return parentLeaf, mutated, nodesCreated } commonPrefixLength := lenCommonPrefix(key, parentLeaf.Key) // Convert the current leaf parent into a branch parent + mutated = true newBranchParent := &Node{ Key: key[:commonPrefixLength], Generation: t.generation, @@ -376,15 +385,18 @@ func (t *Trie) insertInLeaf(parentLeaf *Node, key, value []byte) ( if len(key) < len(parentLeafKey) { // Move the current leaf parent as a child to the new branch. copySettings := node.DefaultCopySettings - parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) childIndex := parentLeafKey[commonPrefixLength] - parentLeaf.Key = parentLeaf.Key[commonPrefixLength+1:] + newParentLeafKey := parentLeaf.Key[commonPrefixLength+1:] + if !bytes.Equal(parentLeaf.Key, newParentLeafKey) { + parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) + parentLeaf.Key = newParentLeafKey + } newBranchParent.Children[childIndex] = parentLeaf newBranchParent.Descendants++ nodesCreated++ } - return newBranchParent, nodesCreated + return newBranchParent, mutated, nodesCreated } if len(parentLeaf.Key) == commonPrefixLength { @@ -393,9 +405,12 @@ func (t *Trie) insertInLeaf(parentLeaf *Node, key, value []byte) ( } else { // make the leaf a child of the new branch copySettings := node.DefaultCopySettings - parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) childIndex := parentLeafKey[commonPrefixLength] - parentLeaf.Key = parentLeaf.Key[commonPrefixLength+1:] + newParentLeafKey := parentLeaf.Key[commonPrefixLength+1:] + if !bytes.Equal(parentLeaf.Key, newParentLeafKey) { + parentLeaf = t.prepLeafForMutation(parentLeaf, copySettings) + parentLeaf.Key = newParentLeafKey + } newBranchParent.Children[childIndex] = parentLeaf newBranchParent.Descendants++ nodesCreated++ @@ -410,17 +425,22 @@ func (t *Trie) insertInLeaf(parentLeaf *Node, key, value []byte) ( newBranchParent.Descendants++ nodesCreated++ - return newBranchParent, nodesCreated + return newBranchParent, mutated, nodesCreated } func (t *Trie) insertInBranch(parentBranch *Node, key, value []byte) ( - newParent *Node, nodesCreated uint32) { + newParent *Node, mutated bool, nodesCreated uint32) { copySettings := node.DefaultCopySettings - parentBranch = t.prepBranchForMutation(parentBranch, copySettings) if bytes.Equal(key, parentBranch.Key) { + if parentBranch.SubValueEqual(value) { + mutated = false + return parentBranch, mutated, 0 + } + parentBranch = t.prepBranchForMutation(parentBranch, copySettings) parentBranch.SubValue = value - return parentBranch, 0 + mutated = true + return parentBranch, mutated, 0 } if bytes.HasPrefix(key, parentBranch.Key) { @@ -438,17 +458,27 @@ func (t *Trie) insertInBranch(parentBranch *Node, key, value []byte) ( Dirty: true, } nodesCreated = 1 - } else { - child, nodesCreated = t.insert(child, remainingKey, value) + parentBranch = t.prepBranchForMutation(parentBranch, copySettings) + parentBranch.Children[childIndex] = child + parentBranch.Descendants += nodesCreated + mutated = true + return parentBranch, mutated, nodesCreated + } + + child, mutated, nodesCreated = t.insert(child, remainingKey, value) + if !mutated { + return parentBranch, mutated, 0 } + parentBranch = t.prepBranchForMutation(parentBranch, copySettings) parentBranch.Children[childIndex] = child parentBranch.Descendants += nodesCreated - return parentBranch, nodesCreated + return parentBranch, mutated, nodesCreated } // we need to branch out at the point where the keys diverge // update partial keys, new branch has key up to matching length + mutated = true nodesCreated = 1 commonPrefixLength := lenCommonPrefix(key, parentBranch.Key) newParentBranch := &Node{ @@ -461,6 +491,8 @@ func (t *Trie) insertInBranch(parentBranch *Node, key, value []byte) ( oldParentIndex := parentBranch.Key[commonPrefixLength] remainingOldParentKey := parentBranch.Key[commonPrefixLength+1:] + // Note: parentBranch.Key != remainingOldParentKey + parentBranch = t.prepBranchForMutation(parentBranch, copySettings) parentBranch.Key = remainingOldParentKey newParentBranch.Children[oldParentIndex] = parentBranch newParentBranch.Descendants += 1 + parentBranch.Descendants @@ -471,12 +503,12 @@ func (t *Trie) insertInBranch(parentBranch *Node, key, value []byte) ( childIndex := key[commonPrefixLength] remainingKey := key[commonPrefixLength+1:] var additionalNodesCreated uint32 - newParentBranch.Children[childIndex], additionalNodesCreated = t.insert(nil, remainingKey, value) + newParentBranch.Children[childIndex], _, additionalNodesCreated = t.insert(nil, remainingKey, value) nodesCreated += additionalNodesCreated newParentBranch.Descendants += additionalNodesCreated } - return newParentBranch, nodesCreated + return newParentBranch, mutated, nodesCreated } // LoadFromMap loads the given data mapping of key to value into the trie. @@ -792,7 +824,11 @@ func (t *Trie) deleteNodesLimit(parent *Node, limit uint32) ( copySettings := node.DefaultCopySettings branch = t.prepBranchForMutation(branch, copySettings) + branch.Children[i], newDeleted, newNodesRemoved = t.deleteNodesLimit(child, limit) + // Note: newDeleted can never be zero here since the limit isn't zero + // and the child is not nil. Therefore it is safe to prepare the branch + // for mutation right before this call. if branch.Children[i] == nil { nilChildren++ } diff --git a/lib/trie/trie_test.go b/lib/trie/trie_test.go index c68dd23433..34157ea80e 100644 --- a/lib/trie/trie_test.go +++ b/lib/trie/trie_test.go @@ -1063,6 +1063,7 @@ func Test_Trie_insert(t *testing.T) { key []byte value []byte newNode *Node + mutated bool nodesCreated uint32 }{ "nil parent": { @@ -1077,6 +1078,7 @@ func Test_Trie_insert(t *testing.T) { Generation: 1, Dirty: true, }, + mutated: true, nodesCreated: 1, }, "branch parent": { @@ -1110,6 +1112,7 @@ func Test_Trie_insert(t *testing.T) { {Key: []byte{2}, SubValue: []byte{1}}, }), }, + mutated: true, nodesCreated: 1, }, "override leaf parent": { @@ -1128,6 +1131,7 @@ func Test_Trie_insert(t *testing.T) { Generation: 1, Dirty: true, }, + mutated: true, }, "write same leaf value to leaf parent": { trie: Trie{ @@ -1169,6 +1173,7 @@ func Test_Trie_insert(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 1, }, "write leaf as divergent child next to parent leaf": { @@ -1202,9 +1207,10 @@ func Test_Trie_insert(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 2, }, - "write leaf into nil value leaf": { + "override leaf value": { trie: Trie{ generation: 1, }, @@ -1220,8 +1226,9 @@ func Test_Trie_insert(t *testing.T) { Dirty: true, Generation: 1, }, + mutated: true, }, - "write leaf as child to nil value leaf": { + "write leaf as child to leaf": { trie: Trie{ generation: 1, }, @@ -1247,6 +1254,7 @@ func Test_Trie_insert(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 1, }, } @@ -1259,9 +1267,10 @@ func Test_Trie_insert(t *testing.T) { trie := testCase.trie expectedTrie := *trie.DeepCopy() - newNode, nodesCreated := trie.insert(testCase.parent, testCase.key, testCase.value) + newNode, mutated, nodesCreated := trie.insert(testCase.parent, testCase.key, testCase.value) assert.Equal(t, testCase.newNode, newNode) + assert.Equal(t, testCase.mutated, mutated) assert.Equal(t, testCase.nodesCreated, nodesCreated) assert.Equal(t, expectedTrie, trie) }) @@ -1276,8 +1285,29 @@ func Test_Trie_insertInBranch(t *testing.T) { key []byte value []byte newNode *Node + mutated bool nodesCreated uint32 }{ + "insert existing value to branch": { + parent: &Node{ + Key: []byte{2}, + SubValue: []byte("same"), + Descendants: 1, + Children: padRightChildren([]*Node{ + {Key: []byte{1}, SubValue: []byte{1}}, + }), + }, + key: []byte{2}, + value: []byte("same"), + newNode: &Node{ + Key: []byte{2}, + SubValue: []byte("same"), + Descendants: 1, + Children: padRightChildren([]*Node{ + {Key: []byte{1}, SubValue: []byte{1}}, + }), + }, + }, "update with branch": { parent: &Node{ Key: []byte{2}, @@ -1298,6 +1328,7 @@ func Test_Trie_insertInBranch(t *testing.T) { {Key: []byte{1}, SubValue: []byte{1}}, }), }, + mutated: true, }, "update with leaf": { parent: &Node{ @@ -1319,6 +1350,7 @@ func Test_Trie_insertInBranch(t *testing.T) { {Key: []byte{1}, SubValue: []byte{1}}, }), }, + mutated: true, }, "add leaf as direct child": { parent: &Node{ @@ -1346,8 +1378,29 @@ func Test_Trie_insertInBranch(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 1, }, + "insert same leaf as existing direct child leaf": { + parent: &Node{ + Key: []byte{2}, + SubValue: []byte{5}, + Descendants: 1, + Children: padRightChildren([]*Node{ + {Key: []byte{1}, SubValue: []byte{1}}, + }), + }, + key: []byte{2, 0, 1}, + value: []byte{1}, + newNode: &Node{ + Key: []byte{2}, + SubValue: []byte{5}, + Descendants: 1, + Children: padRightChildren([]*Node{ + {Key: []byte{1}, SubValue: []byte{1}}, + }), + }, + }, "add leaf as nested child": { parent: &Node{ Key: []byte{2}, @@ -1389,6 +1442,7 @@ func Test_Trie_insertInBranch(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 1, }, "split branch for longer key": { @@ -1424,6 +1478,7 @@ func Test_Trie_insertInBranch(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 2, }, "split root branch": { @@ -1459,6 +1514,7 @@ func Test_Trie_insertInBranch(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 2, }, "update with leaf at empty key": { @@ -1490,6 +1546,7 @@ func Test_Trie_insertInBranch(t *testing.T) { }, }), }, + mutated: true, nodesCreated: 1, }, } @@ -1501,9 +1558,10 @@ func Test_Trie_insertInBranch(t *testing.T) { trie := new(Trie) - newNode, nodesCreated := trie.insertInBranch(testCase.parent, testCase.key, testCase.value) + newNode, mutated, nodesCreated := trie.insertInBranch(testCase.parent, testCase.key, testCase.value) assert.Equal(t, testCase.newNode, newNode) + assert.Equal(t, testCase.mutated, mutated) assert.Equal(t, testCase.nodesCreated, nodesCreated) assert.Equal(t, new(Trie), trie) // check no mutation })