Revert "trie: remove internal nodes between shortNode and child in pa…

…th mode (ethereum#28163)" This reverts commit f881c71.
HorizenOfficial · Nov 10, 2023 · dffc613 · dffc613
1 parent c40d791
commit dffc613
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 238 deletions.
diff --git a/core/rawdb/accessors_trie.go b/core/rawdb/accessors_trie.go
@@ -89,16 +89,6 @@ func HasAccountTrieNode(db ethdb.KeyValueReader, path []byte, hash common.Hash)
 	return h.hash(data) == hash
 }
 
-// ExistsAccountTrieNode checks the presence of the account trie node with the
-// specified node path, regardless of the node hash.
-func ExistsAccountTrieNode(db ethdb.KeyValueReader, path []byte) bool {
-	has, err := db.Has(accountTrieNodeKey(path))
-	if err != nil {
-		return false
-	}
-	return has
-}
-
 // WriteAccountTrieNode writes the provided account trie node into database.
 func WriteAccountTrieNode(db ethdb.KeyValueWriter, path []byte, node []byte) {
 	if err := db.Put(accountTrieNodeKey(path), node); err != nil {
@@ -137,16 +127,6 @@ func HasStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path [
 	return h.hash(data) == hash
 }
 
-// ExistsStorageTrieNode checks the presence of the storage trie node with the
-// specified account hash and node path, regardless of the node hash.
-func ExistsStorageTrieNode(db ethdb.KeyValueReader, accountHash common.Hash, path []byte) bool {
-	has, err := db.Has(storageTrieNodeKey(accountHash, path))
-	if err != nil {
-		return false
-	}
-	return has
-}
-
 // WriteStorageTrieNode writes the provided storage trie node into database.
 func WriteStorageTrieNode(db ethdb.KeyValueWriter, accountHash common.Hash, path []byte, node []byte) {
 	if err := db.Put(storageTrieNodeKey(accountHash, path), node); err != nil {

diff --git a/trie/sync.go b/trie/sync.go
@@ -27,7 +27,6 @@ import (
 	"github.com/ethereum/go-ethereum/core/types"
 	"github.com/ethereum/go-ethereum/ethdb"
 	"github.com/ethereum/go-ethereum/log"
-	"github.com/ethereum/go-ethereum/metrics"
 )
 
 // ErrNotRequested is returned by the trie sync when it's requested to process a
@@ -43,16 +42,6 @@ var ErrAlreadyProcessed = errors.New("already processed")
 // memory if the node was configured with a significant number of peers.
 const maxFetchesPerDepth = 16384
 
-var (
-	// deletionGauge is the metric to track how many trie node deletions
-	// are performed in total during the sync process.
-	deletionGauge = metrics.NewRegisteredGauge("trie/sync/delete", nil)
-
-	// lookupGauge is the metric to track how many trie node lookups are
-	// performed to determine if node needs to be deleted.
-	lookupGauge = metrics.NewRegisteredGauge("trie/sync/lookup", nil)
-)
-
 // SyncPath is a path tuple identifying a particular trie node either in a single
 // trie (account) or a layered trie (account -> storage).
 //
@@ -104,10 +93,9 @@ type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Ha
 
 // nodeRequest represents a scheduled or already in-flight trie node retrieval request.
 type nodeRequest struct {
-	hash    common.Hash // Hash of the trie node to retrieve
-	path    []byte      // Merkle path leading to this node for prioritization
-	data    []byte      // Data content of the node, cached until all subtrees complete
-	deletes [][]byte    // List of internal path segments for trie nodes to delete
+	hash common.Hash // Hash of the trie node to retrieve
+	path []byte      // Merkle path leading to this node for prioritization
+	data []byte      // Data content of the node, cached until all subtrees complete
 
 	parent   *nodeRequest // Parent state node referencing this entry
 	deps     int          // Number of dependencies before allowed to commit this node
@@ -137,20 +125,18 @@ type CodeSyncResult struct {
 // syncMemBatch is an in-memory buffer of successfully downloaded but not yet
 // persisted data items.
 type syncMemBatch struct {
-	nodes   map[string][]byte      // In-memory membatch of recently completed nodes
-	hashes  map[string]common.Hash // Hashes of recently completed nodes
-	deletes map[string]struct{}    // List of paths for trie node to delete
-	codes   map[common.Hash][]byte // In-memory membatch of recently completed codes
-	size    uint64                 // Estimated batch-size of in-memory data.
+	nodes  map[string][]byte      // In-memory membatch of recently completed nodes
+	hashes map[string]common.Hash // Hashes of recently completed nodes
+	codes  map[common.Hash][]byte // In-memory membatch of recently completed codes
+	size   uint64                 // Estimated batch-size of in-memory data.
 }
 
 // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
 func newSyncMemBatch() *syncMemBatch {
 	return &syncMemBatch{
-		nodes:   make(map[string][]byte),
-		hashes:  make(map[string]common.Hash),
-		deletes: make(map[string]struct{}),
-		codes:   make(map[common.Hash][]byte),
+		nodes:  make(map[string][]byte),
+		hashes: make(map[string]common.Hash),
+		codes:  make(map[common.Hash][]byte),
 	}
 }
 
@@ -361,23 +347,16 @@ func (s *Sync) ProcessNode(result NodeSyncResult) error {
 // Commit flushes the data stored in the internal membatch out to persistent
 // storage, returning any occurred error.
 func (s *Sync) Commit(dbw ethdb.Batch) error {
-	// Flush the pending node writes into database batch.
+	// Dump the membatch into a database dbw
 	for path, value := range s.membatch.nodes {
 		owner, inner := ResolvePath([]byte(path))
 		rawdb.WriteTrieNode(dbw, owner, inner, s.membatch.hashes[path], value, s.scheme)
 	}
-	// Flush the pending node deletes into the database batch.
-	// Please note that each written and deleted node has a
-	// unique path, ensuring no duplication occurs.
-	for path := range s.membatch.deletes {
-		owner, inner := ResolvePath([]byte(path))
-		rawdb.DeleteTrieNode(dbw, owner, inner, common.Hash{} /* unused */, s.scheme)
-	}
-	// Flush the pending code writes into database batch.
 	for hash, value := range s.membatch.codes {
 		rawdb.WriteCode(dbw, hash, value)
 	}
-	s.membatch = newSyncMemBatch() // reset the batch
+	// Drop the membatch data and return
+	s.membatch = newSyncMemBatch()
 	return nil
 }
 
@@ -446,39 +425,6 @@ func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) {
 			node: node.Val,
 			path: append(append([]byte(nil), req.path...), key...),
 		}}
-		// Mark all internal nodes between shortNode and its **in disk**
-		// child as invalid. This is essential in the case of path mode
-		// scheme; otherwise, state healing might overwrite existing child
-		// nodes silently while leaving a dangling parent node within the
-		// range of this internal path on disk. This would break the
-		// guarantee for state healing.
-		//
-		// While it's possible for this shortNode to overwrite a previously
-		// existing full node, the other branches of the fullNode can be
-		// retained as they remain untouched and complete.
-		//
-		// This step is only necessary for path mode, as there is no deletion
-		// in hash mode at all.
-		if _, ok := node.Val.(hashNode); ok && s.scheme == rawdb.PathScheme {
-			owner, inner := ResolvePath(req.path)
-			for i := 1; i < len(key); i++ {
-				// While checking for a non-existent item in Pebble can be less efficient
-				// without a bloom filter, the relatively low frequency of lookups makes
-				// the performance impact negligible.
-				var exists bool
-				if owner == (common.Hash{}) {
-					exists = rawdb.ExistsAccountTrieNode(s.database, append(inner, key[:i]...))
-				} else {
-					exists = rawdb.ExistsStorageTrieNode(s.database, owner, append(inner, key[:i]...))
-				}
-				if exists {
-					req.deletes = append(req.deletes, key[:i])
-					deletionGauge.Inc(1)
-					log.Debug("Detected dangling node", "owner", owner, "path", append(inner, key[:i]...))
-				}
-			}
-			lookupGauge.Inc(int64(len(key) - 1))
-		}
 	case *fullNode:
 		for i := 0; i < 17; i++ {
 			if node.Children[i] != nil {
@@ -563,19 +509,10 @@ func (s *Sync) commitNodeRequest(req *nodeRequest) error {
 	// Write the node content to the membatch
 	s.membatch.nodes[string(req.path)] = req.data
 	s.membatch.hashes[string(req.path)] = req.hash
-
 	// The size tracking refers to the db-batch, not the in-memory data.
-	if s.scheme == rawdb.PathScheme {
-		s.membatch.size += uint64(len(req.path) + len(req.data))
-	} else {
-		s.membatch.size += common.HashLength + uint64(len(req.data))
-	}
-	// Delete the internal nodes which are marked as invalid
-	for _, segment := range req.deletes {
-		path := append(req.path, segment...)
-		s.membatch.deletes[string(path)] = struct{}{}
-		s.membatch.size += uint64(len(path))
-	}
+	// Therefore, we ignore the req.path, and account only for the hash+data
+	// which eventually is written to db.
+	s.membatch.size += common.HashLength + uint64(len(req.data))
 	delete(s.nodeReqs, string(req.path))
 	s.fetches[len(req.path)]--