From 4c2c35fa86c71c14829c0df0960b6dee65af798c Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Fri, 5 Mar 2021 14:15:15 +0800 Subject: [PATCH 01/75] eth/protocols: persist received state segments --- eth/protocols/snap/sync.go | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 2924fa080208..f192fa88d059 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -21,6 +21,7 @@ import ( "encoding/json" "errors" "fmt" + "github.com/ethereum/go-ethereum/core/state/snapshot" "math/big" "math/rand" "sync" @@ -435,9 +436,8 @@ type Syncer struct { bytecodeHealDups uint64 // Number of bytecodes already processed bytecodeHealNops uint64 // Number of bytecodes not requested - startTime time.Time // Time instance when snapshot sync started - startAcc common.Hash // Account hash where sync started from - logTime time.Time // Time instance when status was last reported + startTime time.Time // Time instance when snapshot sync started + logTime time.Time // Time instance when status was last reported pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root) @@ -1694,7 +1694,7 @@ func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) { // processStorageResponse integrates an already validated storage response // into the account tasks. func (s *Syncer) processStorageResponse(res *storageResponse) { - // Switch the suntask from pending to idle + // Switch the subtask from pending to idle if res.subTask != nil { res.subTask.req = nil } @@ -1826,6 +1826,13 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { nodes++ } it.Release() + + // Persist the received storage segements. These flat state maybe + // outdated during the sync, but it can be fixed later during the + // snapshot generation. + for j := 0; j < len(res.hashes[i]); j++ { + rawdb.WriteStorageSnapshot(batch, account, res.hashes[i][j], res.slots[i][j]) + } } if err := batch.Write(); err != nil { log.Crit("Failed to persist storage slots", "err", err) @@ -1983,6 +1990,16 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { } it.Release() + // Persist the received account segements. These flat state maybe + // outdated during the sync, but it can be fixed later during the + // snapshot generation. + for i, hash := range res.hashes { + if task.needCode[i] || task.needState[i] { + break + } + blob := snapshot.SlimAccountRLP(res.accounts[i].Nonce, res.accounts[i].Balance, res.accounts[i].Root, res.accounts[i].CodeHash) + rawdb.WriteAccountSnapshot(batch, hash, blob) + } if err := batch.Write(); err != nil { log.Crit("Failed to persist accounts", "err", err) } From 4a7650f57ae3fdfbc9feab86addab2014fc43d0f Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Mon, 8 Mar 2021 20:12:09 +0800 Subject: [PATCH 02/75] core: initial implementation --- core/rawdb/accessors_snapshot.go | 14 +- core/state/snapshot/generate.go | 400 ++++++++++++++++++--------- core/state/snapshot/generate_test.go | 50 +++- core/state/snapshot/journal.go | 15 +- core/state/snapshot/snapshot.go | 11 +- core/state/snapshot/wipe.go | 20 +- 6 files changed, 339 insertions(+), 171 deletions(-) diff --git a/core/rawdb/accessors_snapshot.go b/core/rawdb/accessors_snapshot.go index c3616ba3aaa5..6b72378f76ae 100644 --- a/core/rawdb/accessors_snapshot.go +++ b/core/rawdb/accessors_snapshot.go @@ -72,6 +72,12 @@ func DeleteAccountSnapshot(db ethdb.KeyValueWriter, hash common.Hash) { } } +// IterateAccountSnapshots returns an iterator for walking the account snapshots +// with the specified start position. +func IterateAccountSnapshots(db ethdb.Iteratee, start common.Hash) ethdb.Iterator { + return db.NewIterator(SnapshotAccountPrefix, start.Bytes()) +} + // ReadStorageSnapshot retrieves the snapshot entry of an storage trie leaf. func ReadStorageSnapshot(db ethdb.KeyValueReader, accountHash, storageHash common.Hash) []byte { data, _ := db.Get(storageSnapshotKey(accountHash, storageHash)) @@ -92,10 +98,10 @@ func DeleteStorageSnapshot(db ethdb.KeyValueWriter, accountHash, storageHash com } } -// IterateStorageSnapshots returns an iterator for walking the entire storage -// space of a specific account. -func IterateStorageSnapshots(db ethdb.Iteratee, accountHash common.Hash) ethdb.Iterator { - return db.NewIterator(storageSnapshotsKey(accountHash), nil) +// IterateStorageSnapshots returns an iterator for walking the storage space of +// a specific account with specified start position. +func IterateStorageSnapshots(db ethdb.Iteratee, accountHash common.Hash, start common.Hash) ethdb.Iterator { + return db.NewIterator(storageSnapshotsKey(accountHash), start.Bytes()) } // ReadSnapshotJournal retrieves the serialized in-memory diff layers saved at diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 2b41dd551376..515f214a00e4 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -19,6 +19,7 @@ package snapshot import ( "bytes" "encoding/binary" + "errors" "fmt" "math/big" "time" @@ -40,17 +41,28 @@ var ( // emptyCode is the known hash of the empty EVM bytecode. emptyCode = crypto.Keccak256Hash(nil) + + // accountCheckRange is the upper limit of the number of accounts involved in + // each range check. This is a value estimated based on experience. If this + // value is too large, the failure rate of range prove will increase. Otherwise + // the the value is too small, the efficiency of the state recovery will decrease. + accountCheckRange = 100 + + // storageCheckRange is the upper limit of the number of storage slots involved + // in each range check. This is a value estimated based on experience. If this + // value is too large, the failure rate of range prove will increase. Otherwise + // the the value is too small, the efficiency of the state recovery will decrease. + storageCheckRange = 100 ) // generatorStats is a collection of statistics gathered by the snapshot generator // for logging purposes. type generatorStats struct { - wiping chan struct{} // Notification channel if wiping is in progress origin uint64 // Origin prefix where generation started start time.Time // Timestamp when generation started - accounts uint64 // Number of accounts indexed - slots uint64 // Number of storage slots indexed - storage common.StorageSize // Account and storage slot size + accounts uint64 // Number of accounts indexed(generated or recovered) + slots uint64 // Number of storage slots indexed(generated or recovered) + storage common.StorageSize // Total account and storage slot size(generation or recovery) } // Log creates an contextual log with the given message and the context pulled @@ -94,15 +106,10 @@ func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) { // generateSnapshot regenerates a brand new snapshot based on an existing state // database and head block asynchronously. The snapshot is returned immediately // and generation is continued in the background until done. -func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, wiper chan struct{}) *diskLayer { - // Wipe any previously existing snapshot from the database if no wiper is - // currently in progress. - if wiper == nil { - wiper = wipeSnapshot(diskdb, true) - } +func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) *diskLayer { // Create a new disk layer with an initialized state marker at zero var ( - stats = &generatorStats{wiping: wiper, start: time.Now()} + stats = &generatorStats{start: time.Now()} batch = diskdb.NewBatch() genMarker = []byte{} // Initialized but empty! ) @@ -135,7 +142,6 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta Marker: marker, } if stats != nil { - entry.Wiping = (stats.wiping != nil) entry.Accounts = stats.accounts entry.Slots = stats.slots entry.Storage = uint64(stats.storage) @@ -159,169 +165,277 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta rawdb.WriteSnapshotGenerator(db, blob) } -// generate is a background thread that iterates over the state and storage tries, -// constructing the state snapshot. All the arguments are purely for statistics -// gathering and logging, since the method surfs the blocks as they arrive, often -// being restarted. -func (dl *diskLayer) generate(stats *generatorStats) { - // If a database wipe is in operation, wait until it's done - if stats.wiping != nil { - stats.Log("Wiper running, state snapshotting paused", common.Hash{}, dl.genMarker) - select { - // If wiper is done, resume normal mode of operation - case <-stats.wiping: - stats.wiping = nil - stats.start = time.Now() +// proveRange proves the state segment with particular prefix is "valid". +// The iteration start point will be assigned if the iterator is restored from +// the last interruption. Max will be assigned in order to limit the maximum +// amount of data involved in each iteration. +func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix []byte, kind string, origin []byte, max int, onValue func([]byte) ([]byte, error)) ([][]byte, [][]byte, []byte, bool, error) { + var ( + keys [][]byte + vals [][]byte + count int + last []byte + proof = rawdb.NewMemoryDatabase() + iter = dl.diskdb.NewIterator(prefix, origin) + ) + for iter.Next() && count < max { + key := iter.Key() + if len(key) != len(prefix)+common.HashLength { + continue + } + if !bytes.HasPrefix(key, prefix) { + continue + } + last = common.CopyBytes(key[len(prefix):]) + keys = append(keys, common.CopyBytes(key[len(prefix):])) - // If generator was aborted during wipe, return - case abort := <-dl.genAbort: - abort <- stats - return + if onValue == nil { + vals = append(vals, common.CopyBytes(iter.Value())) + } else { + converted, err := onValue(common.CopyBytes(iter.Value())) + if err != nil { + log.Debug("Failed to convert the flat state", "kind", kind, "key", common.BytesToHash(key[len(prefix):]), "error", err) + return nil, nil, last, false, err + } + vals = append(vals, converted) + } + count += 1 + } + // Generate the Merkle proofs for the first and last element + if origin == nil { + origin = common.Hash{}.Bytes() + } + if err := tr.Prove(origin, 0, proof); err != nil { + log.Debug("Failed to prove range", "kind", kind, "origin", origin, "err", err) + return nil, nil, last, false, err + } + if last != nil { + if err := tr.Prove(last, 0, proof); err != nil { + log.Debug("Failed to prove range", "kind", kind, "last", last, "err", err) + return nil, nil, last, false, err } } - // Create an account and state iterator pointing to the current generator marker - accTrie, err := trie.NewSecure(dl.root, dl.triedb) + // Verify the state segment with range prover, ensure that all flat states + // in this range correspond to merkle trie. + _, _, _, cont, err := trie.VerifyRangeProof(root, origin, last, keys, vals, proof) + if err != nil { + return nil, nil, last, false, err + } + // Range prover says the trie still has some elements on the right side but + // the database is exhausted, then data loss is detected. + if cont && count < max { + return nil, nil, last, false, errors.New("data loss in the state range") + } + return keys, vals, last, !cont, nil +} + +// genRange generates the state segment with particular prefix. Generation can +// either verify the correctness of existing state through rangeproof and skip +// generation, or iterate trie to regenerate state on demand. +func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, batch ethdb.Batch, regen bool) error, onValue func([]byte) ([]byte, error)) (bool, []byte, error) { + tr, err := trie.NewSecure(root, dl.triedb) if err != nil { // The account trie is missing (GC), surf the chain until one becomes available - stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker) + stats.Log("Trie missing, state snapshotting paused", root, dl.genMarker) abort := <-dl.genAbort abort <- stats - return + return false, nil, errors.New("trie is missing") + } + // Use range prover to check the validity of the flat state in the range + batch := dl.diskdb.NewBatch() + keys, vals, last, exhausted, err := dl.proveRange(root, tr, prefix, kind, origin, max, onValue) + if err == nil { + // The verification is passed, process each state with the given + // callback function. If this state represents a contract, the + // corresponding storage check will be performed in the callback + for i := 0; i < len(keys); i++ { + if err := onState(keys[i], vals[i], batch, false); err != nil { + return false, nil, err + } + } + if batch.ValueSize() > 0 { + batch.Write() + batch.Reset() + } + return exhausted, last, nil + } + // The verifcation is failed, the flat state in this range cannot match the + // merkle trie. Alternatively, use the fallback generation mechanism to regenerate + // the correct flat state by iterating trie. But wiping the existent outdated flat + // data in this range first. + // + // Note if the returned last is nil(no more flat state can be found in the database), + // then all the entries under the given prefix will be wiped totally. + if err := wipeKeyRange(dl.diskdb, kind, prefix, origin, last, len(prefix)+common.HashLength); err != nil { + return false, nil, err + } + trIter := trie.NewIterator(tr.NodeIterator(origin)) + for trIter.Next() { + if last != nil && bytes.Compare(trIter.Key, last) > 0 { + return false, last, nil // Apparently the trie is not exhausted + } + if err := onState(trIter.Key, trIter.Value, batch, true); err != nil { + return false, nil, err + } + } + if trIter.Err != nil { + return false, nil, trIter.Err } - stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker) + if batch.ValueSize() > 0 { + batch.Write() + batch.Reset() + } + return true, nil, nil // The entire trie is exhausted +} +// generate is a background thread that iterates over the state and storage tries, +// constructing the state snapshot. All the arguments are purely for statistics +// gathering and logging, since the method surfs the blocks as they arrive, often +// being restarted. +func (dl *diskLayer) generate(stats *generatorStats) { var accMarker []byte if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that accMarker = dl.genMarker[:common.HashLength] } - accIt := trie.NewIterator(accTrie.NodeIterator(accMarker)) - batch := dl.diskdb.NewBatch() - - // Iterate from the previous marker and continue generating the state snapshot logged := time.Now() - for accIt.Next() { - // Retrieve the current account and flatten it into the internal format - accountHash := common.BytesToHash(accIt.Key) - var acc struct { - Nonce uint64 - Balance *big.Int - Root common.Hash - CodeHash []byte - } - if err := rlp.DecodeBytes(accIt.Value, &acc); err != nil { - log.Crit("Invalid account encountered during snapshot creation", "err", err) - } - data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash) - - // If the account is not yet in-progress, write it out - if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) { - rawdb.WriteAccountSnapshot(batch, accountHash, data) - stats.storage += common.StorageSize(1 + common.HashLength + len(data)) - stats.accounts++ - } - // If we've exceeded our batch allowance or termination was requested, flush to disk - var abort chan *generatorStats - select { - case abort = <-dl.genAbort: - default: - } - if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { - // Only write and set the marker if we actually did something useful - if batch.ValueSize() > 0 { - // Ensure the generator entry is in sync with the data - marker := accountHash[:] - journalProgress(batch, marker, stats) - - batch.Write() - batch.Reset() + accOrigin := common.CopyBytes(accMarker) + for { + exhausted, last, err := dl.genRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountCheckRange, stats, func(key []byte, val []byte, batch ethdb.Batch, regen bool) error { + // Retrieve the current account and flatten it into the internal format + accountHash := common.BytesToHash(key) - dl.lock.Lock() - dl.genMarker = marker - dl.lock.Unlock() + var acc struct { + Nonce uint64 + Balance *big.Int + Root common.Hash + CodeHash []byte } - if abort != nil { - stats.Log("Aborting state snapshot generation", dl.root, accountHash[:]) - abort <- stats - return + if err := rlp.DecodeBytes(val, &acc); err != nil { + log.Crit("Invalid account encountered during snapshot creation", "err", err) } - } - // If the account is in-progress, continue where we left off (otherwise iterate all) - if acc.Root != emptyRoot { - storeTrie, err := trie.NewSecure(acc.Root, dl.triedb) - if err != nil { - log.Error("Generator failed to access storage trie", "root", dl.root, "account", accountHash, "stroot", acc.Root, "err", err) - abort := <-dl.genAbort - abort <- stats - return + data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash) + + // If the account is not yet in-progress, write it out + if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) { + if regen { + rawdb.WriteAccountSnapshot(batch, accountHash, data) + } + stats.storage += common.StorageSize(1 + common.HashLength + len(data)) + stats.accounts++ } - var storeMarker []byte - if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { - storeMarker = dl.genMarker[common.HashLength:] + // If we've exceeded our batch allowance or termination was requested, flush to disk + var abort chan *generatorStats + select { + case abort = <-dl.genAbort: + default: } - storeIt := trie.NewIterator(storeTrie.NodeIterator(storeMarker)) - for storeIt.Next() { - rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(storeIt.Key), storeIt.Value) - stats.storage += common.StorageSize(1 + 2*common.HashLength + len(storeIt.Value)) - stats.slots++ + if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { + // Only write and set the marker if we actually did something useful + if batch.ValueSize() > 0 { + // Ensure the generator entry is in sync with the data + marker := accountHash[:] + journalProgress(batch, marker, stats) - // If we've exceeded our batch allowance or termination was requested, flush to disk - var abort chan *generatorStats - select { - case abort = <-dl.genAbort: - default: + batch.Write() + batch.Reset() + + dl.lock.Lock() + dl.genMarker = marker + dl.lock.Unlock() + } + if abort != nil { + stats.Log("Aborting state snapshot generation", dl.root, accountHash[:]) + abort <- stats + return errors.New("aborted") } - if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { - // Only write and set the marker if we actually did something useful - if batch.ValueSize() > 0 { - // Ensure the generator entry is in sync with the data - marker := append(accountHash[:], storeIt.Key...) - journalProgress(batch, marker, stats) + } + // If the iterated account is the contract, create a further loop to + // verify or regenerate the contract storage. + if acc.Root != emptyRoot { + var storeMarker []byte + if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { + storeMarker = dl.genMarker[common.HashLength:] + } + var storeOrigin = common.CopyBytes(storeMarker) + for { + exhausted, last, err := dl.genRange(acc.Root, rawdb.SnapshotStoragePrefix, "storage", storeOrigin, storageCheckRange, stats, func(key []byte, val []byte, db ethdb.Batch, regen bool) error { + if regen { + rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(key), val) + } + stats.storage += common.StorageSize(1 + 2*common.HashLength + len(val)) + stats.slots++ - batch.Write() - batch.Reset() + // If we've exceeded our batch allowance or termination was requested, flush to disk + var abort chan *generatorStats + select { + case abort = <-dl.genAbort: + default: + } + if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { + // Only write and set the marker if we actually did something useful + if batch.ValueSize() > 0 { + // Ensure the generator entry is in sync with the data + marker := append(accountHash[:], key...) + journalProgress(batch, marker, stats) - dl.lock.Lock() - dl.genMarker = marker - dl.lock.Unlock() + batch.Write() + batch.Reset() + + dl.lock.Lock() + dl.genMarker = marker + dl.lock.Unlock() + } + if abort != nil { + stats.Log("Aborting state snapshot generation", dl.root, append(accountHash[:], key...)) + abort <- stats + return errors.New("aborted") + } + if time.Since(logged) > 8*time.Second { + stats.Log("Generating state snapshot", dl.root, append(accountHash[:], key...)) + logged = time.Now() + } + } + return nil + }, nil) + if err != nil { + return err } - if abort != nil { - stats.Log("Aborting state snapshot generation", dl.root, append(accountHash[:], storeIt.Key...)) - abort <- stats - return + if exhausted { + return nil } - if time.Since(logged) > 8*time.Second { - stats.Log("Generating state snapshot", dl.root, append(accountHash[:], storeIt.Key...)) - logged = time.Now() + storeOrigin = increseKey(last) + if bytes.Equal(storeOrigin, common.Hash{}.Bytes()) { + return nil // special case, the last is 0xffffffff...fff } } } - if err := storeIt.Err; err != nil { - log.Error("Generator failed to iterate storage trie", "accroot", dl.root, "acchash", common.BytesToHash(accIt.Key), "stroot", acc.Root, "err", err) - abort := <-dl.genAbort - abort <- stats - return + if time.Since(logged) > 8*time.Second { + stats.Log("Generating state snapshot", dl.root, key) + logged = time.Now() } + // Some account processed, unmark the marker + accMarker = nil + return nil + }, FullAccountRLP) + if err != nil { + abort := <-dl.genAbort + abort <- nil + return } - if time.Since(logged) > 8*time.Second { - stats.Log("Generating state snapshot", dl.root, accIt.Key) - logged = time.Now() + if exhausted { + break + } + accOrigin = increseKey(last) + if bytes.Equal(accOrigin, common.Hash{}.Bytes()) { + break // special case, the last is 0xffffffff...fff } - // Some account processed, unmark the marker - accMarker = nil - } - if err := accIt.Err; err != nil { - log.Error("Generator failed to iterate account trie", "root", dl.root, "err", err) - abort := <-dl.genAbort - abort <- stats - return } // Snapshot fully generated, set the marker to nil. // Note even there is nothing to commit, persist the // generator anyway to mark the snapshot is complete. - journalProgress(batch, nil, stats) - batch.Write() + journalProgress(dl.diskdb, nil, stats) log.Info("Generated state snapshot", "accounts", stats.accounts, "slots", stats.slots, "storage", stats.storage, "elapsed", common.PrettyDuration(time.Since(stats.start))) @@ -335,3 +449,13 @@ func (dl *diskLayer) generate(stats *generatorStats) { abort := <-dl.genAbort abort <- nil } + +func increseKey(key []byte) []byte { + for i := len(key) - 1; i >= 0; i-- { + key[i]++ + if key[i] != 0x0 { + break + } + } + return key +} diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 03263f3976a6..b0569da6dfc9 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -27,6 +27,50 @@ import ( "github.com/ethereum/go-ethereum/trie" ) +// Tests that snapshot generation from an empty database. +func TestGeneration(t *testing.T) { + // We can't use statedb to make a test trie (circular dependency), so make + // a fake one manually. We're going with a small account trie of 3 accounts, + // two of which also has the same 3-slot storage trie attached. + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + stTrie, _ := trie.NewSecure(common.Hash{}, triedb) + stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0 + stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371 + stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78 + stTrie.Commit(nil) // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + + accTrie, _ := trie.NewSecure(common.Hash{}, triedb) + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + + acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + + acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd + triedb.Commit(common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), false, nil) + + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd")) + select { + case <-snap.genPending: + // Snapshot generation succeeded + + case <-time.After(250 * time.Millisecond): + t.Errorf("Snapshot generation failed") + } + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} + // Tests that snapshot generation errors out correctly in case of a missing trie // node in the account trie. func TestGenerateCorruptAccountTrie(t *testing.T) { @@ -55,7 +99,7 @@ func TestGenerateCorruptAccountTrie(t *testing.T) { triedb.Commit(common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978"), false, nil) diskdb.Delete(common.HexToHash("0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7").Bytes()) - snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978"), nil) + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978")) select { case <-snap.genPending: // Snapshot generation succeeded @@ -115,7 +159,7 @@ func TestGenerateMissingStorageTrie(t *testing.T) { // Delete a storage trie root and ensure the generator chokes diskdb.Delete(common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67").Bytes()) - snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), nil) + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd")) select { case <-snap.genPending: // Snapshot generation succeeded @@ -174,7 +218,7 @@ func TestGenerateCorruptStorageTrie(t *testing.T) { // Delete a storage trie leaf and ensure the generator chokes diskdb.Delete(common.HexToHash("0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371").Bytes()) - snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), nil) + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd")) select { case <-snap.genPending: // Snapshot generation succeeded diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go index d7e454ccebd2..b31e921ca9f7 100644 --- a/core/state/snapshot/journal.go +++ b/core/state/snapshot/journal.go @@ -37,7 +37,10 @@ const journalVersion uint64 = 0 // journalGenerator is a disk layer entry containing the generator progress marker. type journalGenerator struct { - Wiping bool // Whether the database was in progress of being wiped + // Indicator that whether the database was in progress of being wiped. + // It's deprecated but keep it here for background compatibility. + Wiping bool + Done bool // Whether the generator finished creating the snapshot Marker []byte Accounts uint64 @@ -193,14 +196,6 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, } // Everything loaded correctly, resume any suspended operations if !generator.Done { - // If the generator was still wiping, restart one from scratch (fine for - // now as it's rare and the wiper deletes the stuff it touches anyway, so - // restarting won't incur a lot of extra database hops. - var wiper chan struct{} - if generator.Wiping { - log.Info("Resuming previous snapshot wipe") - wiper = wipeSnapshot(diskdb, false) - } // Whether or not wiping was in progress, load any generator progress too base.genMarker = generator.Marker if base.genMarker == nil { @@ -214,7 +209,6 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, origin = binary.BigEndian.Uint64(generator.Marker) } go base.generate(&generatorStats{ - wiping: wiper, origin: origin, start: time.Now(), accounts: generator.Accounts, @@ -381,7 +375,6 @@ func (dl *diskLayer) LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) { Marker: dl.genMarker, } if stats != nil { - entry.Wiping = (stats.wiping != nil) entry.Accounts = stats.accounts entry.Slots = stats.slots entry.Storage = uint64(stats.storage) diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index eccf377264a5..0466f16a58dd 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -479,7 +479,7 @@ func diffToDisk(bottom *diffLayer) *diskLayer { rawdb.DeleteAccountSnapshot(batch, hash) base.cache.Set(hash[:], nil) - it := rawdb.IterateStorageSnapshots(base.diskdb, hash) + it := rawdb.IterateStorageSnapshots(base.diskdb, hash, common.Hash{}) for it.Next() { if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator batch.Delete(key) @@ -656,9 +656,6 @@ func (t *Tree) Rebuild(root common.Hash) { // building a brand new snapshot. rawdb.DeleteSnapshotRecoveryNumber(t.diskdb) - // Track whether there's a wipe currently running and keep it alive if so - var wiper chan struct{} - // Iterate over and mark all layers stale for _, layer := range t.layers { switch layer := layer.(type) { @@ -667,10 +664,6 @@ func (t *Tree) Rebuild(root common.Hash) { if layer.genAbort != nil { abort := make(chan *generatorStats) layer.genAbort <- abort - - if stats := <-abort; stats != nil { - wiper = stats.wiping - } } // Layer should be inactive now, mark it as stale layer.lock.Lock() @@ -691,7 +684,7 @@ func (t *Tree) Rebuild(root common.Hash) { // generator will run a wiper first if there's not one running right now. log.Info("Rebuilding state snapshot") t.layers = map[common.Hash]snapshot{ - root: generateSnapshot(t.diskdb, t.triedb, t.cache, root, wiper), + root: generateSnapshot(t.diskdb, t.triedb, t.cache, root), } } diff --git a/core/state/snapshot/wipe.go b/core/state/snapshot/wipe.go index 14b63031a5b9..6997c7688ed1 100644 --- a/core/state/snapshot/wipe.go +++ b/core/state/snapshot/wipe.go @@ -27,7 +27,7 @@ import ( ) // wipeSnapshot starts a goroutine to iterate over the entire key-value database -// and delete all the data associated with the snapshot (accounts, storage, +// and delete all the data associated with the snapshot (accounts, storage, // metadata). After all is done, the snapshot range of the database is compacted // to free up unused data blocks. func wipeSnapshot(db ethdb.KeyValueStore, full bool) chan struct{} { @@ -53,10 +53,10 @@ func wipeSnapshot(db ethdb.KeyValueStore, full bool) chan struct{} { // removed in sync to avoid data races. After all is done, the snapshot range of // the database is compacted to free up unused data blocks. func wipeContent(db ethdb.KeyValueStore) error { - if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, len(rawdb.SnapshotAccountPrefix)+common.HashLength); err != nil { + if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, nil, nil, len(rawdb.SnapshotAccountPrefix)+common.HashLength); err != nil { return err } - if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength); err != nil { + if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, nil, nil, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength); err != nil { return err } // Compact the snapshot section of the database to get rid of unused space @@ -82,8 +82,9 @@ func wipeContent(db ethdb.KeyValueStore) error { } // wipeKeyRange deletes a range of keys from the database starting with prefix -// and having a specific total key length. -func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int) error { +// and having a specific total key length. The start and limit is optional for +// specifying a particular key range for deletion. +func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, origin []byte, limit []byte, keylen int) error { // Batch deletions together to avoid holding an iterator for too long var ( batch = db.NewBatch() @@ -92,7 +93,11 @@ func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int // Iterate over the key-range and delete all of them start, logged := time.Now(), time.Now() - it := db.NewIterator(prefix, nil) + it := db.NewIterator(prefix, origin) + var stop []byte + if limit != nil { + stop = append(prefix, limit...) + } for it.Next() { // Skip any keys with the correct prefix but wrong length (trie nodes) key := it.Key() @@ -102,6 +107,9 @@ func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, keylen int if len(key) != keylen { continue } + if stop != nil && bytes.Compare(key, stop) >= 0 { + break + } // Delete the key and periodically recreate the batch and iterator batch.Delete(key) items++ From 2b6523395127b718e4de376d5fc05adfa117feed Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 9 Mar 2021 14:32:09 +0800 Subject: [PATCH 03/75] core/state/snapshot: add tests --- core/state/snapshot/generate.go | 4 +- core/state/snapshot/generate_test.go | 66 ++++++++++++++++++++++++++++ eth/protocols/snap/sync.go | 2 +- 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 515f214a00e4..4ff85d756466 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -52,7 +52,7 @@ var ( // in each range check. This is a value estimated based on experience. If this // value is too large, the failure rate of range prove will increase. Otherwise // the the value is too small, the efficiency of the state recovery will decrease. - storageCheckRange = 100 + storageCheckRange = 1024 ) // generatorStats is a collection of statistics gathered by the snapshot generator @@ -360,7 +360,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { } var storeOrigin = common.CopyBytes(storeMarker) for { - exhausted, last, err := dl.genRange(acc.Root, rawdb.SnapshotStoragePrefix, "storage", storeOrigin, storageCheckRange, stats, func(key []byte, val []byte, db ethdb.Batch, regen bool) error { + exhausted, last, err := dl.genRange(acc.Root, append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, func(key []byte, val []byte, db ethdb.Batch, regen bool) error { if regen { rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(key), val) } diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index b0569da6dfc9..1a1ea06b5d76 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -22,9 +22,11 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" + "golang.org/x/crypto/sha3" ) // Tests that snapshot generation from an empty database. @@ -71,6 +73,70 @@ func TestGeneration(t *testing.T) { <-stop } +func hashData(input []byte) common.Hash { + var hasher = sha3.NewLegacyKeccak256() + var hash common.Hash + hasher.Reset() + hasher.Write(input) + hasher.Sum(hash[:0]) + return hash +} + +// Tests that snapshot generation with existent flat state. +func TestGenerateExistentState(t *testing.T) { + // We can't use statedb to make a test trie (circular dependency), so make + // a fake one manually. We're going with a small account trie of 3 accounts, + // two of which also has the same 3-slot storage trie attached. + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + stTrie, _ := trie.NewSecure(common.Hash{}, triedb) + stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0 + stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371 + stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78 + stTrie.Commit(nil) // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + + accTrie, _ := trie.NewSecure(common.Hash{}, triedb) + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-1")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-3")), []byte("val-3")) + + acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + diskdb.Put(hashData([]byte("acc-2")).Bytes(), val) + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-2")), val) + + acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-3")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-3")), []byte("val-3")) + + accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd + triedb.Commit(common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), false, nil) + + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd")) + select { + case <-snap.genPending: + // Snapshot generation succeeded + + case <-time.After(250 * time.Millisecond): + t.Errorf("Snapshot generation failed") + } + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} + // Tests that snapshot generation errors out correctly in case of a missing trie // node in the account trie. func TestGenerateCorruptAccountTrie(t *testing.T) { diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index f192fa88d059..a7f214d7eb4e 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -52,7 +52,7 @@ const ( // maxRequestSize is the maximum number of bytes to request from a remote peer. maxRequestSize = 512 * 1024 - // maxStorageSetRequestCountis th maximum number of contracts to request the + // maxStorageSetRequestCount is the maximum number of contracts to request the // storage of in a single query. If this number is too low, we're not filling // responses fully and waste round trip times. If it's too high, we're capping // responses and waste bandwidth. From d74a7f11d03354e0e0d524d8c2fa726cc8679fb6 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 9 Mar 2021 15:19:16 +0800 Subject: [PATCH 04/75] core, eth: updates --- core/rawdb/accessors_snapshot.go | 14 ++++---------- core/state/snapshot/snapshot.go | 2 +- eth/protocols/snap/sync.go | 2 +- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/core/rawdb/accessors_snapshot.go b/core/rawdb/accessors_snapshot.go index 6b72378f76ae..c3616ba3aaa5 100644 --- a/core/rawdb/accessors_snapshot.go +++ b/core/rawdb/accessors_snapshot.go @@ -72,12 +72,6 @@ func DeleteAccountSnapshot(db ethdb.KeyValueWriter, hash common.Hash) { } } -// IterateAccountSnapshots returns an iterator for walking the account snapshots -// with the specified start position. -func IterateAccountSnapshots(db ethdb.Iteratee, start common.Hash) ethdb.Iterator { - return db.NewIterator(SnapshotAccountPrefix, start.Bytes()) -} - // ReadStorageSnapshot retrieves the snapshot entry of an storage trie leaf. func ReadStorageSnapshot(db ethdb.KeyValueReader, accountHash, storageHash common.Hash) []byte { data, _ := db.Get(storageSnapshotKey(accountHash, storageHash)) @@ -98,10 +92,10 @@ func DeleteStorageSnapshot(db ethdb.KeyValueWriter, accountHash, storageHash com } } -// IterateStorageSnapshots returns an iterator for walking the storage space of -// a specific account with specified start position. -func IterateStorageSnapshots(db ethdb.Iteratee, accountHash common.Hash, start common.Hash) ethdb.Iterator { - return db.NewIterator(storageSnapshotsKey(accountHash), start.Bytes()) +// IterateStorageSnapshots returns an iterator for walking the entire storage +// space of a specific account. +func IterateStorageSnapshots(db ethdb.Iteratee, accountHash common.Hash) ethdb.Iterator { + return db.NewIterator(storageSnapshotsKey(accountHash), nil) } // ReadSnapshotJournal retrieves the serialized in-memory diff layers saved at diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index 0466f16a58dd..725ae8e329c7 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -479,7 +479,7 @@ func diffToDisk(bottom *diffLayer) *diskLayer { rawdb.DeleteAccountSnapshot(batch, hash) base.cache.Set(hash[:], nil) - it := rawdb.IterateStorageSnapshots(base.diskdb, hash, common.Hash{}) + it := rawdb.IterateStorageSnapshots(base.diskdb, hash) for it.Next() { if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator batch.Delete(key) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index a7f214d7eb4e..f9f42c37578c 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -21,7 +21,6 @@ import ( "encoding/json" "errors" "fmt" - "github.com/ethereum/go-ethereum/core/state/snapshot" "math/big" "math/rand" "sync" @@ -30,6 +29,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" + "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" From 5a26f7f92bfdd2c73002be4c14a36505458872a7 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 9 Mar 2021 15:34:48 +0800 Subject: [PATCH 05/75] eth/protocols/snapshot: count flat state size --- eth/protocols/snap/sync.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index f9f42c37578c..6177532045de 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -1832,6 +1832,7 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // snapshot generation. for j := 0; j < len(res.hashes[i]); j++ { rawdb.WriteStorageSnapshot(batch, account, res.hashes[i][j], res.slots[i][j]) + bytes += common.StorageSize(1 + 2*common.HashLength + len(res.slots[i][j])) } } if err := batch.Write(); err != nil { @@ -1999,6 +2000,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { } blob := snapshot.SlimAccountRLP(res.accounts[i].Nonce, res.accounts[i].Balance, res.accounts[i].Root, res.accounts[i].CodeHash) rawdb.WriteAccountSnapshot(batch, hash, blob) + bytes += common.StorageSize(1 + common.HashLength + len(blob)) } if err := batch.Write(); err != nil { log.Crit("Failed to persist accounts", "err", err) From d21e5a2370f0f1fe008a98597169f86da064f1cb Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 9 Mar 2021 15:51:12 +0800 Subject: [PATCH 06/75] core/state: add metrics --- core/state/snapshot/generate.go | 27 ++++++++++++++++++++++++++- core/state/snapshot/wipe.go | 10 +++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 4ff85d756466..be8b604d160b 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -31,6 +31,7 @@ import ( "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" ) @@ -55,6 +56,18 @@ var ( storageCheckRange = 1024 ) +// Metrics in generation +var ( + snapGeneratedAccountMeter = metrics.NewRegisteredMeter("state/snapshot/generation/account/generated", nil) + snapRecoveredAccountMeter = metrics.NewRegisteredMeter("state/snapshot/generation/account/recovered", nil) + snapWipedAccountMeter = metrics.NewRegisteredMeter("state/snapshot/generation/account/wiped", nil) + snapGeneratedStorageMeter = metrics.NewRegisteredMeter("state/snapshot/generation/storage/generated", nil) + snapRecoveredStorageMeter = metrics.NewRegisteredMeter("state/snapshot/generation/storage/recovered", nil) + snapWipedStorageMeter = metrics.NewRegisteredMeter("state/snapshot/generation/storage/wiped", nil) + snapSuccessfulRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/success", nil) + snapFailedRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/failure", nil) +) + // generatorStats is a collection of statistics gathered by the snapshot generator // for logging purposes. type generatorStats struct { @@ -246,6 +259,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig batch := dl.diskdb.NewBatch() keys, vals, last, exhausted, err := dl.proveRange(root, tr, prefix, kind, origin, max, onValue) if err == nil { + snapSuccessfulRangeProofMeter.Mark(1) // The verification is passed, process each state with the given // callback function. If this state represents a contract, the // corresponding storage check will be performed in the callback @@ -260,6 +274,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } return exhausted, last, nil } + snapFailedRangeProofMeter.Mark(1) // The verifcation is failed, the flat state in this range cannot match the // merkle trie. Alternatively, use the fallback generation mechanism to regenerate // the correct flat state by iterating trie. But wiping the existent outdated flat @@ -267,7 +282,11 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig // // Note if the returned last is nil(no more flat state can be found in the database), // then all the entries under the given prefix will be wiped totally. - if err := wipeKeyRange(dl.diskdb, kind, prefix, origin, last, len(prefix)+common.HashLength); err != nil { + wipedMeter := snapWipedAccountMeter + if kind == "storage" { + wipedMeter = snapWipedStorageMeter + } + if err := wipeKeyRange(dl.diskdb, kind, prefix, origin, last, len(prefix)+common.HashLength, wipedMeter); err != nil { return false, nil, err } trIter := trie.NewIterator(tr.NodeIterator(origin)) @@ -321,6 +340,9 @@ func (dl *diskLayer) generate(stats *generatorStats) { if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) { if regen { rawdb.WriteAccountSnapshot(batch, accountHash, data) + snapGeneratedAccountMeter.Mark(1) + } else { + snapRecoveredAccountMeter.Mark(1) } stats.storage += common.StorageSize(1 + common.HashLength + len(data)) stats.accounts++ @@ -363,6 +385,9 @@ func (dl *diskLayer) generate(stats *generatorStats) { exhausted, last, err := dl.genRange(acc.Root, append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, func(key []byte, val []byte, db ethdb.Batch, regen bool) error { if regen { rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(key), val) + snapGeneratedStorageMeter.Mark(1) + } else { + snapRecoveredStorageMeter.Mark(1) } stats.storage += common.StorageSize(1 + 2*common.HashLength + len(val)) stats.slots++ diff --git a/core/state/snapshot/wipe.go b/core/state/snapshot/wipe.go index 6997c7688ed1..b67d442443cc 100644 --- a/core/state/snapshot/wipe.go +++ b/core/state/snapshot/wipe.go @@ -24,6 +24,7 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" ) // wipeSnapshot starts a goroutine to iterate over the entire key-value database @@ -53,10 +54,10 @@ func wipeSnapshot(db ethdb.KeyValueStore, full bool) chan struct{} { // removed in sync to avoid data races. After all is done, the snapshot range of // the database is compacted to free up unused data blocks. func wipeContent(db ethdb.KeyValueStore) error { - if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, nil, nil, len(rawdb.SnapshotAccountPrefix)+common.HashLength); err != nil { + if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, nil, nil, len(rawdb.SnapshotAccountPrefix)+common.HashLength, snapWipedAccountMeter); err != nil { return err } - if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, nil, nil, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength); err != nil { + if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, nil, nil, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength, snapWipedStorageMeter); err != nil { return err } // Compact the snapshot section of the database to get rid of unused space @@ -84,7 +85,7 @@ func wipeContent(db ethdb.KeyValueStore) error { // wipeKeyRange deletes a range of keys from the database starting with prefix // and having a specific total key length. The start and limit is optional for // specifying a particular key range for deletion. -func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, origin []byte, limit []byte, keylen int) error { +func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, origin []byte, limit []byte, keylen int, meter metrics.Meter) error { // Batch deletions together to avoid holding an iterator for too long var ( batch = db.NewBatch() @@ -134,6 +135,9 @@ func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, origin []b if err := batch.Write(); err != nil { return err } + if meter != nil { + meter.Mark(int64(items)) + } log.Info("Deleted state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start))) return nil } From 65cee53990ecdc5f1f29c53b1df2c4bb1db89ef9 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 9 Mar 2021 16:16:03 +0800 Subject: [PATCH 07/75] core/state/snapshot: skip unnecessary deletion --- core/state/snapshot/generate.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index be8b604d160b..377d24e43667 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -275,19 +275,21 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig return exhausted, last, nil } snapFailedRangeProofMeter.Mark(1) + // The verifcation is failed, the flat state in this range cannot match the // merkle trie. Alternatively, use the fallback generation mechanism to regenerate // the correct flat state by iterating trie. But wiping the existent outdated flat // data in this range first. - // - // Note if the returned last is nil(no more flat state can be found in the database), - // then all the entries under the given prefix will be wiped totally. - wipedMeter := snapWipedAccountMeter - if kind == "storage" { - wipedMeter = snapWipedStorageMeter - } - if err := wipeKeyRange(dl.diskdb, kind, prefix, origin, last, len(prefix)+common.HashLength, wipedMeter); err != nil { - return false, nil, err + if last != nil { + // Note if the returned last is nil(no more flat state can be found in the database), + // the wiping can be skipped. + wipedMeter := snapWipedAccountMeter + if kind == "storage" { + wipedMeter = snapWipedStorageMeter + } + if err := wipeKeyRange(dl.diskdb, kind, prefix, origin, last, len(prefix)+common.HashLength, wipedMeter); err != nil { + return false, nil, err + } } trIter := trie.NewIterator(tr.NodeIterator(origin)) for trIter.Next() { From 557b81c92100cd22d806bc22b10bceae3ab06af0 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 9 Mar 2021 16:29:58 +0800 Subject: [PATCH 08/75] core/state/snapshot: rename --- core/state/snapshot/generate.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 377d24e43667..121a56cfb882 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -291,17 +291,17 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig return false, nil, err } } - trIter := trie.NewIterator(tr.NodeIterator(origin)) - for trIter.Next() { - if last != nil && bytes.Compare(trIter.Key, last) > 0 { + iter := trie.NewIterator(tr.NodeIterator(origin)) + for iter.Next() { + if last != nil && bytes.Compare(iter.Key, last) > 0 { return false, last, nil // Apparently the trie is not exhausted } - if err := onState(trIter.Key, trIter.Value, batch, true); err != nil { + if err := onState(iter.Key, iter.Value, batch, true); err != nil { return false, nil, err } } - if trIter.Err != nil { - return false, nil, trIter.Err + if iter.Err != nil { + return false, nil, iter.Err } if batch.ValueSize() > 0 { batch.Write() From 45fead814d9222c8b4a85c11958001a0f88ef1be Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 9 Mar 2021 20:36:07 +0800 Subject: [PATCH 09/75] core/state/snapshot: use the global batch --- core/state/snapshot/generate.go | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 121a56cfb882..29465d588eab 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -245,7 +245,7 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix [] // genRange generates the state segment with particular prefix. Generation can // either verify the correctness of existing state through rangeproof and skip // generation, or iterate trie to regenerate state on demand. -func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, batch ethdb.Batch, regen bool) error, onValue func([]byte) ([]byte, error)) (bool, []byte, error) { +func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, regen bool) error, onValue func([]byte) ([]byte, error)) (bool, []byte, error) { tr, err := trie.NewSecure(root, dl.triedb) if err != nil { // The account trie is missing (GC), surf the chain until one becomes available @@ -256,7 +256,6 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig return false, nil, errors.New("trie is missing") } // Use range prover to check the validity of the flat state in the range - batch := dl.diskdb.NewBatch() keys, vals, last, exhausted, err := dl.proveRange(root, tr, prefix, kind, origin, max, onValue) if err == nil { snapSuccessfulRangeProofMeter.Mark(1) @@ -264,14 +263,10 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig // callback function. If this state represents a contract, the // corresponding storage check will be performed in the callback for i := 0; i < len(keys); i++ { - if err := onState(keys[i], vals[i], batch, false); err != nil { + if err := onState(keys[i], vals[i], false); err != nil { return false, nil, err } } - if batch.ValueSize() > 0 { - batch.Write() - batch.Reset() - } return exhausted, last, nil } snapFailedRangeProofMeter.Mark(1) @@ -296,17 +291,13 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig if last != nil && bytes.Compare(iter.Key, last) > 0 { return false, last, nil // Apparently the trie is not exhausted } - if err := onState(iter.Key, iter.Value, batch, true); err != nil { + if err := onState(iter.Key, iter.Value, true); err != nil { return false, nil, err } } if iter.Err != nil { return false, nil, iter.Err } - if batch.ValueSize() > 0 { - batch.Write() - batch.Reset() - } return true, nil, nil // The entire trie is exhausted } @@ -319,11 +310,13 @@ func (dl *diskLayer) generate(stats *generatorStats) { if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that accMarker = dl.genMarker[:common.HashLength] } - logged := time.Now() - - accOrigin := common.CopyBytes(accMarker) + var ( + batch = dl.diskdb.NewBatch() + logged = time.Now() + accOrigin = common.CopyBytes(accMarker) + ) for { - exhausted, last, err := dl.genRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountCheckRange, stats, func(key []byte, val []byte, batch ethdb.Batch, regen bool) error { + exhausted, last, err := dl.genRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountCheckRange, stats, func(key []byte, val []byte, regen bool) error { // Retrieve the current account and flatten it into the internal format accountHash := common.BytesToHash(key) @@ -384,7 +377,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { } var storeOrigin = common.CopyBytes(storeMarker) for { - exhausted, last, err := dl.genRange(acc.Root, append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, func(key []byte, val []byte, db ethdb.Batch, regen bool) error { + exhausted, last, err := dl.genRange(acc.Root, append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, func(key []byte, val []byte, regen bool) error { if regen { rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(key), val) snapGeneratedStorageMeter.Mark(1) @@ -462,7 +455,9 @@ func (dl *diskLayer) generate(stats *generatorStats) { // Snapshot fully generated, set the marker to nil. // Note even there is nothing to commit, persist the // generator anyway to mark the snapshot is complete. - journalProgress(dl.diskdb, nil, stats) + journalProgress(batch, nil, stats) + batch.Write() + batch.Reset() log.Info("Generated state snapshot", "accounts", stats.accounts, "slots", stats.slots, "storage", stats.storage, "elapsed", common.PrettyDuration(time.Since(stats.start))) From a9bea12adf91c9b893d623898482e49dc895ce38 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 10 Mar 2021 11:36:07 +0800 Subject: [PATCH 10/75] core/state/snapshot: add logs and fix wiping --- core/state/snapshot/generate.go | 10 +++++++++- core/state/snapshot/wipe.go | 12 +++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 29465d588eab..209db5b5a5d4 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -259,6 +259,8 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig keys, vals, last, exhausted, err := dl.proveRange(root, tr, prefix, kind, origin, max, onValue) if err == nil { snapSuccessfulRangeProofMeter.Mark(1) + log.Debug("Proved state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) + // The verification is passed, process each state with the given // callback function. If this state represents a contract, the // corresponding storage check will be performed in the callback @@ -267,9 +269,11 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig return false, nil, err } } + log.Debug("Recovered state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) return exhausted, last, nil } snapFailedRangeProofMeter.Mark(1) + log.Debug("Detected outdated state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) // The verifcation is failed, the flat state in this range cannot match the // merkle trie. Alternatively, use the fallback generation mechanism to regenerate @@ -282,13 +286,16 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig if kind == "storage" { wipedMeter = snapWipedStorageMeter } - if err := wipeKeyRange(dl.diskdb, kind, prefix, origin, last, len(prefix)+common.HashLength, wipedMeter); err != nil { + limit := increseKey(common.CopyBytes(last)) + if err := wipeKeyRange(dl.diskdb, kind, prefix, origin, limit, len(prefix)+common.HashLength, wipedMeter, false); err != nil { return false, nil, err } + log.Debug("Wiped currupted state range", "kind", kind, "prefix", prefix, "origin", origin, "limit", limit) } iter := trie.NewIterator(tr.NodeIterator(origin)) for iter.Next() { if last != nil && bytes.Compare(iter.Key, last) > 0 { + log.Debug("Regenerated state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) return false, last, nil // Apparently the trie is not exhausted } if err := onState(iter.Key, iter.Value, true); err != nil { @@ -298,6 +305,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig if iter.Err != nil { return false, nil, iter.Err } + log.Debug("Regenerated state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) return true, nil, nil // The entire trie is exhausted } diff --git a/core/state/snapshot/wipe.go b/core/state/snapshot/wipe.go index b67d442443cc..521f7b16fbf5 100644 --- a/core/state/snapshot/wipe.go +++ b/core/state/snapshot/wipe.go @@ -54,10 +54,10 @@ func wipeSnapshot(db ethdb.KeyValueStore, full bool) chan struct{} { // removed in sync to avoid data races. After all is done, the snapshot range of // the database is compacted to free up unused data blocks. func wipeContent(db ethdb.KeyValueStore) error { - if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, nil, nil, len(rawdb.SnapshotAccountPrefix)+common.HashLength, snapWipedAccountMeter); err != nil { + if err := wipeKeyRange(db, "accounts", rawdb.SnapshotAccountPrefix, nil, nil, len(rawdb.SnapshotAccountPrefix)+common.HashLength, snapWipedAccountMeter, true); err != nil { return err } - if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, nil, nil, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength, snapWipedStorageMeter); err != nil { + if err := wipeKeyRange(db, "storage", rawdb.SnapshotStoragePrefix, nil, nil, len(rawdb.SnapshotStoragePrefix)+2*common.HashLength, snapWipedStorageMeter, true); err != nil { return err } // Compact the snapshot section of the database to get rid of unused space @@ -85,7 +85,7 @@ func wipeContent(db ethdb.KeyValueStore) error { // wipeKeyRange deletes a range of keys from the database starting with prefix // and having a specific total key length. The start and limit is optional for // specifying a particular key range for deletion. -func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, origin []byte, limit []byte, keylen int, meter metrics.Meter) error { +func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, origin []byte, limit []byte, keylen int, meter metrics.Meter, report bool) error { // Batch deletions together to avoid holding an iterator for too long var ( batch = db.NewBatch() @@ -125,7 +125,7 @@ func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, origin []b seekPos := key[len(prefix):] it = db.NewIterator(prefix, seekPos) - if time.Since(logged) > 8*time.Second { + if time.Since(logged) > 8*time.Second && report { log.Info("Deleting state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start))) logged = time.Now() } @@ -138,6 +138,8 @@ func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, origin []b if meter != nil { meter.Mark(int64(items)) } - log.Info("Deleted state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start))) + if report { + log.Info("Deleted state snapshot leftovers", "kind", kind, "wiped", items, "elapsed", common.PrettyDuration(time.Since(start))) + } return nil } From 310db7c237326e50ef3945d16eeb55b2682e0160 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 10 Mar 2021 11:43:15 +0800 Subject: [PATCH 11/75] core/state/snapshot: fix --- core/state/snapshot/generate.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 209db5b5a5d4..8da377ff0beb 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -434,7 +434,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { return nil } storeOrigin = increseKey(last) - if bytes.Equal(storeOrigin, common.Hash{}.Bytes()) { + if storeOrigin == nil { return nil // special case, the last is 0xffffffff...fff } } @@ -456,7 +456,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { break } accOrigin = increseKey(last) - if bytes.Equal(accOrigin, common.Hash{}.Bytes()) { + if accOrigin == nil { break // special case, the last is 0xffffffff...fff } } @@ -480,12 +480,14 @@ func (dl *diskLayer) generate(stats *generatorStats) { abort <- nil } +// increseKey increase the input key by one bit. Return nil if the entire +// addition operation overflows, func increseKey(key []byte) []byte { for i := len(key) - 1; i >= 0; i-- { key[i]++ if key[i] != 0x0 { - break + return key } } - return key + return nil } From 61020443d31bfd3079e51fc7053bc81b74f7363f Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 10 Mar 2021 13:08:25 +0800 Subject: [PATCH 12/75] core/state/snapshot: save generation progress even if the batch is empty --- core/state/snapshot/generate.go | 54 +++++++++++++++++---------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 8da377ff0beb..131836d9edec 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -323,6 +323,8 @@ func (dl *diskLayer) generate(stats *generatorStats) { logged = time.Now() accOrigin = common.CopyBytes(accMarker) ) + stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker) + for { exhausted, last, err := dl.genRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountCheckRange, stats, func(key []byte, val []byte, regen bool) error { // Retrieve the current account and flatten it into the internal format @@ -357,19 +359,19 @@ func (dl *diskLayer) generate(stats *generatorStats) { default: } if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { - // Only write and set the marker if we actually did something useful - if batch.ValueSize() > 0 { - // Ensure the generator entry is in sync with the data - marker := accountHash[:] - journalProgress(batch, marker, stats) - - batch.Write() - batch.Reset() - - dl.lock.Lock() - dl.genMarker = marker - dl.lock.Unlock() - } + // Flush out the batch anyway no matter it's empty or not. + // It's possible that all the states are recovered and the + // generation indeed makes progress. + marker := accountHash[:] + journalProgress(batch, marker, stats) + + batch.Write() + batch.Reset() + + dl.lock.Lock() + dl.genMarker = marker + dl.lock.Unlock() + if abort != nil { stats.Log("Aborting state snapshot generation", dl.root, accountHash[:]) abort <- stats @@ -402,19 +404,19 @@ func (dl *diskLayer) generate(stats *generatorStats) { default: } if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { - // Only write and set the marker if we actually did something useful - if batch.ValueSize() > 0 { - // Ensure the generator entry is in sync with the data - marker := append(accountHash[:], key...) - journalProgress(batch, marker, stats) - - batch.Write() - batch.Reset() - - dl.lock.Lock() - dl.genMarker = marker - dl.lock.Unlock() - } + // Flush out the batch anyway no matter it's empty or not. + // It's possible that all the states are recovered and the + // generation indeed makes progress. + marker := append(accountHash[:], key...) + journalProgress(batch, marker, stats) + + batch.Write() + batch.Reset() + + dl.lock.Lock() + dl.genMarker = marker + dl.lock.Unlock() + if abort != nil { stats.Log("Aborting state snapshot generation", dl.root, append(accountHash[:], key...)) abort <- stats From dd4ced3716eb7f32ff97cd7039bc833a5e9cd82d Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 10 Mar 2021 15:03:58 +0800 Subject: [PATCH 13/75] core/state/snapshot: fixes --- core/state/snapshot/generate.go | 43 ++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 131836d9edec..056f452e7aa8 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -47,7 +47,7 @@ var ( // each range check. This is a value estimated based on experience. If this // value is too large, the failure rate of range prove will increase. Otherwise // the the value is too small, the efficiency of the state recovery will decrease. - accountCheckRange = 100 + accountCheckRange = 128 // storageCheckRange is the upper limit of the number of storage slots involved // in each range check. This is a value estimated based on experience. If this @@ -205,12 +205,12 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix [] if onValue == nil { vals = append(vals, common.CopyBytes(iter.Value())) } else { - converted, err := onValue(common.CopyBytes(iter.Value())) + val, err := onValue(common.CopyBytes(iter.Value())) if err != nil { log.Debug("Failed to convert the flat state", "kind", kind, "key", common.BytesToHash(key[len(prefix):]), "error", err) return nil, nil, last, false, err } - vals = append(vals, converted) + vals = append(vals, val) } count += 1 } @@ -248,11 +248,7 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix [] func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, regen bool) error, onValue func([]byte) ([]byte, error)) (bool, []byte, error) { tr, err := trie.NewSecure(root, dl.triedb) if err != nil { - // The account trie is missing (GC), surf the chain until one becomes available stats.Log("Trie missing, state snapshotting paused", root, dl.genMarker) - - abort := <-dl.genAbort - abort <- stats return false, nil, errors.New("trie is missing") } // Use range prover to check the validity of the flat state in the range @@ -322,6 +318,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { batch = dl.diskdb.NewBatch() logged = time.Now() accOrigin = common.CopyBytes(accMarker) + abort chan *generatorStats ) stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker) @@ -353,7 +350,6 @@ func (dl *diskLayer) generate(stats *generatorStats) { stats.accounts++ } // If we've exceeded our batch allowance or termination was requested, flush to disk - var abort chan *generatorStats select { case abort = <-dl.genAbort: default: @@ -365,7 +361,9 @@ func (dl *diskLayer) generate(stats *generatorStats) { marker := accountHash[:] journalProgress(batch, marker, stats) - batch.Write() + if err := batch.Write(); err != nil { + return err + } batch.Reset() dl.lock.Lock() @@ -374,7 +372,6 @@ func (dl *diskLayer) generate(stats *generatorStats) { if abort != nil { stats.Log("Aborting state snapshot generation", dl.root, accountHash[:]) - abort <- stats return errors.New("aborted") } } @@ -398,7 +395,6 @@ func (dl *diskLayer) generate(stats *generatorStats) { stats.slots++ // If we've exceeded our batch allowance or termination was requested, flush to disk - var abort chan *generatorStats select { case abort = <-dl.genAbort: default: @@ -410,7 +406,9 @@ func (dl *diskLayer) generate(stats *generatorStats) { marker := append(accountHash[:], key...) journalProgress(batch, marker, stats) - batch.Write() + if err := batch.Write(); err != nil { + return err + } batch.Reset() dl.lock.Lock() @@ -419,7 +417,6 @@ func (dl *diskLayer) generate(stats *generatorStats) { if abort != nil { stats.Log("Aborting state snapshot generation", dl.root, append(accountHash[:], key...)) - abort <- stats return errors.New("aborted") } if time.Since(logged) > 8*time.Second { @@ -429,6 +426,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { } return nil }, nil) + if err != nil { return err } @@ -449,11 +447,16 @@ func (dl *diskLayer) generate(stats *generatorStats) { accMarker = nil return nil }, FullAccountRLP) + + // The procedure it aborted, either by external signal or internal error if err != nil { - abort := <-dl.genAbort - abort <- nil + if abort == nil { // aborted by internal error, wait the signal + abort = <-dl.genAbort + } + abort <- stats return } + // Abort the procedure if the entire snapshot is generated if exhausted { break } @@ -466,7 +469,13 @@ func (dl *diskLayer) generate(stats *generatorStats) { // Note even there is nothing to commit, persist the // generator anyway to mark the snapshot is complete. journalProgress(batch, nil, stats) - batch.Write() + if err := batch.Write(); err != nil { + log.Error("Failed to flush batch", "error", err) + + abort := <-dl.genAbort + abort <- stats + return + } batch.Reset() log.Info("Generated state snapshot", "accounts", stats.accounts, "slots", stats.slots, @@ -478,7 +487,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { dl.lock.Unlock() // Someone will be looking for us, wait it out - abort := <-dl.genAbort + abort = <-dl.genAbort abort <- nil } From 5eb07d486d60cb8cd97210ab28cfa6de961c73f3 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 10 Mar 2021 15:54:59 +0800 Subject: [PATCH 14/75] core/state/snapshot: fix initial account range length --- core/state/snapshot/generate.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 056f452e7aa8..34d52f31779f 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -310,10 +310,16 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig // gathering and logging, since the method surfs the blocks as they arrive, often // being restarted. func (dl *diskLayer) generate(stats *generatorStats) { - var accMarker []byte + var ( + accMarker []byte + accountRange = accountCheckRange + ) if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that accMarker = dl.genMarker[:common.HashLength] } + if len(dl.genMarker) == 2*common.HashLength { + accountRange = 1 // We already fall into the storage generation last time, only pick one account + } var ( batch = dl.diskdb.NewBatch() logged = time.Now() @@ -323,7 +329,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker) for { - exhausted, last, err := dl.genRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountCheckRange, stats, func(key []byte, val []byte, regen bool) error { + exhausted, last, err := dl.genRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountRange, stats, func(key []byte, val []byte, regen bool) error { // Retrieve the current account and flatten it into the internal format accountHash := common.BytesToHash(key) @@ -464,6 +470,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { if accOrigin == nil { break // special case, the last is 0xffffffff...fff } + accountRange = accountCheckRange } // Snapshot fully generated, set the marker to nil. // Note even there is nothing to commit, persist the From 62eb8561b3ea2a78b0027fcea3efba7fe221f33e Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Thu, 11 Mar 2021 15:35:44 +0800 Subject: [PATCH 15/75] core/state/snapshot: fix initial account range --- core/state/snapshot/generate.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 34d52f31779f..04df0523f6b5 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -316,9 +316,10 @@ func (dl *diskLayer) generate(stats *generatorStats) { ) if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that accMarker = dl.genMarker[:common.HashLength] - } - if len(dl.genMarker) == 2*common.HashLength { - accountRange = 1 // We already fall into the storage generation last time, only pick one account + + // Always reset the initial account range as 1 whenever recover + // from the interruption. + accountRange = 1 } var ( batch = dl.diskdb.NewBatch() From b2dbe3bf81a5fc2da78ebcba0a2873f107fa6786 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Thu, 11 Mar 2021 20:16:01 +0800 Subject: [PATCH 16/75] eth/protocols/snap: store flat states during the healing --- eth/protocols/snap/sync.go | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 6177532045de..7eddf7c46e7c 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -1853,6 +1853,55 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // task assigners to pick up and fill. } +// stateWriter is a database batch replayer that takes a batch of write operations +// and extract the flat states from them. The target flat states will be persisted +// blindly and can be fixed by the generator later. +type stateWriter struct { + res *trienodeHealResponse + syncer *Syncer + accountMarker map[common.Hash]int + storageMarker map[common.Hash]int +} + +func newStateWriter(res *trienodeHealResponse, syncer *Syncer) *stateWriter { + var ( + accountMarker = make(map[common.Hash]int) + storageMarker = make(map[common.Hash]int) + ) + for i, path := range res.paths { + if len(path) == 1 && len(path[0]) == common.HashLength { + accountMarker[res.hashes[i]] = i + } + if len(path) == 2 && len(path[1]) == common.HashLength { + storageMarker[res.hashes[i]] = i + } + } + return &stateWriter{ + res: res, + syncer: syncer, + accountMarker: accountMarker, + storageMarker: storageMarker, + } +} + +// Put reacts to database writes and implements flat state persistence. +func (w *stateWriter) Put(key []byte, val []byte) error { + hash := common.BytesToHash(key) + if index, ok := w.accountMarker[hash]; ok { + rawdb.WriteAccountSnapshot(w.syncer.db, common.BytesToHash(w.res.paths[index][0]), w.res.nodes[index]) + w.syncer.bytecodeHealBytes += common.StorageSize(1 + common.HashLength + len(w.res.nodes[index])) + } + if index, ok := w.storageMarker[hash]; ok { + rawdb.WriteStorageSnapshot(w.syncer.db, common.BytesToHash(w.res.paths[index][0]), common.BytesToHash(w.res.paths[index][1]), w.res.nodes[index]) + w.syncer.bytecodeHealBytes += common.StorageSize(1 + 2*common.HashLength + len(w.res.nodes[index])) + } + return nil +} + +func (w *stateWriter) Delete(key []byte) error { + panic("not implemented") +} + // processTrienodeHealResponse integrates an already validated trienode response // into the healer tasks. func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { @@ -1886,6 +1935,9 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { if err := batch.Write(); err != nil { log.Crit("Failed to persist healing data", "err", err) } + if err := batch.Replay(newStateWriter(res, s)); err != nil { + log.Crit("Failed to replay the committed batch", "err", err) + } log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) } From 529147f8f88025645c20ee14f39d9eafa25b45f4 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Fri, 12 Mar 2021 11:04:07 +0800 Subject: [PATCH 17/75] eth/protocols/snap: print logs --- eth/protocols/snap/sync.go | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 7eddf7c46e7c..3b7523e55205 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -1857,13 +1857,18 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // and extract the flat states from them. The target flat states will be persisted // blindly and can be fixed by the generator later. type stateWriter struct { + db ethdb.KeyValueWriter res *trienodeHealResponse - syncer *Syncer accountMarker map[common.Hash]int storageMarker map[common.Hash]int + + accountSynced uint64 // Number of accounts persisted + accountBytes common.StorageSize // Number of account bytes persisted to disk + storageSynced uint64 // Number of storage slots persisted + storageBytes common.StorageSize // Number of storage bytes persisted to disk } -func newStateWriter(res *trienodeHealResponse, syncer *Syncer) *stateWriter { +func newStateWriter(db ethdb.KeyValueWriter, res *trienodeHealResponse) *stateWriter { var ( accountMarker = make(map[common.Hash]int) storageMarker = make(map[common.Hash]int) @@ -1877,8 +1882,8 @@ func newStateWriter(res *trienodeHealResponse, syncer *Syncer) *stateWriter { } } return &stateWriter{ + db: db, res: res, - syncer: syncer, accountMarker: accountMarker, storageMarker: storageMarker, } @@ -1888,12 +1893,14 @@ func newStateWriter(res *trienodeHealResponse, syncer *Syncer) *stateWriter { func (w *stateWriter) Put(key []byte, val []byte) error { hash := common.BytesToHash(key) if index, ok := w.accountMarker[hash]; ok { - rawdb.WriteAccountSnapshot(w.syncer.db, common.BytesToHash(w.res.paths[index][0]), w.res.nodes[index]) - w.syncer.bytecodeHealBytes += common.StorageSize(1 + common.HashLength + len(w.res.nodes[index])) + rawdb.WriteAccountSnapshot(w.db, common.BytesToHash(w.res.paths[index][0]), w.res.nodes[index]) + w.accountSynced += 1 + w.accountBytes += common.StorageSize(1 + common.HashLength + len(w.res.nodes[index])) } if index, ok := w.storageMarker[hash]; ok { - rawdb.WriteStorageSnapshot(w.syncer.db, common.BytesToHash(w.res.paths[index][0]), common.BytesToHash(w.res.paths[index][1]), w.res.nodes[index]) - w.syncer.bytecodeHealBytes += common.StorageSize(1 + 2*common.HashLength + len(w.res.nodes[index])) + rawdb.WriteStorageSnapshot(w.db, common.BytesToHash(w.res.paths[index][0]), common.BytesToHash(w.res.paths[index][1]), w.res.nodes[index]) + w.storageSynced += 1 + w.storageBytes += common.StorageSize(1 + 2*common.HashLength + len(w.res.nodes[index])) } return nil } @@ -1902,6 +1909,16 @@ func (w *stateWriter) Delete(key []byte) error { panic("not implemented") } +func (w *stateWriter) log() (ret []interface{}) { + if w.accountSynced > 0 { + ret = append(ret, "accounts", w.accountSynced, "bytes", w.accountBytes) + } + if w.storageSynced > 0 { + ret = append(ret, "storages", w.storageSynced, "bytes", w.storageSynced) + } + return ret +} + // processTrienodeHealResponse integrates an already validated trienode response // into the healer tasks. func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { @@ -1935,10 +1952,11 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { if err := batch.Write(); err != nil { log.Crit("Failed to persist healing data", "err", err) } - if err := batch.Replay(newStateWriter(res, s)); err != nil { + stateWriter := newStateWriter(s.db, res) + if err := batch.Replay(stateWriter); err != nil { log.Crit("Failed to replay the committed batch", "err", err) } - log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) + log.Info("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize()), stateWriter.log()) } // processBytecodeHealResponse integrates an already validated bytecode response From da8f26bd1335db43f026df9c4d6829a2149a83c8 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Fri, 12 Mar 2021 04:54:28 +0100 Subject: [PATCH 18/75] core/state/snapshot: refactor (#4) * core/state/snapshot: refactor * core/state/snapshot: tiny fix and polish Co-authored-by: rjl493456442 --- core/state/snapshot/generate.go | 217 ++++++++++++++------------------ 1 file changed, 97 insertions(+), 120 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 04df0523f6b5..5a6790bd0e5d 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -315,11 +315,9 @@ func (dl *diskLayer) generate(stats *generatorStats) { accountRange = accountCheckRange ) if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that - accMarker = dl.genMarker[:common.HashLength] - - // Always reset the initial account range as 1 whenever recover - // from the interruption. - accountRange = 1 + // Always reset the initial account range as 1 + // whenever recover from the interruption. + accMarker, accountRange = dl.genMarker[:common.HashLength], 1 } var ( batch = dl.diskdb.NewBatch() @@ -329,132 +327,111 @@ func (dl *diskLayer) generate(stats *generatorStats) { ) stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker) - for { - exhausted, last, err := dl.genRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountRange, stats, func(key []byte, val []byte, regen bool) error { - // Retrieve the current account and flatten it into the internal format - accountHash := common.BytesToHash(key) - - var acc struct { - Nonce uint64 - Balance *big.Int - Root common.Hash - CodeHash []byte + checkAndFlush := func(currentLocation []byte) error { + select { + case abort = <-dl.genAbort: + default: + } + if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { + // Flush out the batch anyway no matter it's empty or not. + // It's possible that all the states are recovered and the + // generation indeed makes progress. + marker := currentLocation + journalProgress(batch, marker, stats) + + if err := batch.Write(); err != nil { + return err } - if err := rlp.DecodeBytes(val, &acc); err != nil { - log.Crit("Invalid account encountered during snapshot creation", "err", err) + batch.Reset() + + dl.lock.Lock() + dl.genMarker = marker + dl.lock.Unlock() + + if abort != nil { + stats.Log("Aborting state snapshot generation", dl.root, marker) + return errors.New("aborted") } - data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash) + } + if time.Since(logged) > 8*time.Second { + stats.Log("Generating state snapshot", dl.root, marker) + logged = time.Now() + } + return nil + } - // If the account is not yet in-progress, write it out - if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) { + onAccount := func(key []byte, val []byte, regen bool) error { + // Retrieve the current account and flatten it into the internal format + accountHash := common.BytesToHash(key) + var acc struct { + Nonce uint64 + Balance *big.Int + Root common.Hash + CodeHash []byte + } + if err := rlp.DecodeBytes(val, &acc); err != nil { + log.Crit("Invalid account encountered during snapshot creation", "err", err) + } + data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash) + + // If the account is not yet in-progress, write it out + if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) { + if regen { + rawdb.WriteAccountSnapshot(batch, accountHash, data) + snapGeneratedAccountMeter.Mark(1) + } else { + snapRecoveredAccountMeter.Mark(1) + } + stats.storage += common.StorageSize(1 + common.HashLength + len(data)) + stats.accounts++ + } + // If we've exceeded our batch allowance or termination was requested, flush to disk + if err := checkAndFlush(accountHash[:]); err != nil { + return err + } + // If the iterated account is the contract, create a further loop to + // verify or regenerate the contract storage. + if acc.Root != emptyRoot { + var storeMarker []byte + if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { + storeMarker = dl.genMarker[common.HashLength:] + } + onStorage := func(key []byte, val []byte, regen bool) error { if regen { - rawdb.WriteAccountSnapshot(batch, accountHash, data) - snapGeneratedAccountMeter.Mark(1) + rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(key), val) + snapGeneratedStorageMeter.Mark(1) } else { - snapRecoveredAccountMeter.Mark(1) + snapRecoveredStorageMeter.Mark(1) } - stats.storage += common.StorageSize(1 + common.HashLength + len(data)) - stats.accounts++ - } - // If we've exceeded our batch allowance or termination was requested, flush to disk - select { - case abort = <-dl.genAbort: - default: - } - if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { - // Flush out the batch anyway no matter it's empty or not. - // It's possible that all the states are recovered and the - // generation indeed makes progress. - marker := accountHash[:] - journalProgress(batch, marker, stats) - - if err := batch.Write(); err != nil { + stats.storage += common.StorageSize(1 + 2*common.HashLength + len(val)) + stats.slots++ + // If we've exceeded our batch allowance or termination was requested, flush to disk + if err := checkAndFlush(append(accountHash[:], key...)); err != nil { return err } - batch.Reset() - - dl.lock.Lock() - dl.genMarker = marker - dl.lock.Unlock() - - if abort != nil { - stats.Log("Aborting state snapshot generation", dl.root, accountHash[:]) - return errors.New("aborted") - } + return nil } - // If the iterated account is the contract, create a further loop to - // verify or regenerate the contract storage. - if acc.Root != emptyRoot { - var storeMarker []byte - if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { - storeMarker = dl.genMarker[common.HashLength:] + var storeOrigin = common.CopyBytes(storeMarker) + for { + exhausted, last, err := dl.genRange(acc.Root, append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, onStorage, nil) + if err != nil { + return err } - var storeOrigin = common.CopyBytes(storeMarker) - for { - exhausted, last, err := dl.genRange(acc.Root, append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, func(key []byte, val []byte, regen bool) error { - if regen { - rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(key), val) - snapGeneratedStorageMeter.Mark(1) - } else { - snapRecoveredStorageMeter.Mark(1) - } - stats.storage += common.StorageSize(1 + 2*common.HashLength + len(val)) - stats.slots++ - - // If we've exceeded our batch allowance or termination was requested, flush to disk - select { - case abort = <-dl.genAbort: - default: - } - if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { - // Flush out the batch anyway no matter it's empty or not. - // It's possible that all the states are recovered and the - // generation indeed makes progress. - marker := append(accountHash[:], key...) - journalProgress(batch, marker, stats) - - if err := batch.Write(); err != nil { - return err - } - batch.Reset() - - dl.lock.Lock() - dl.genMarker = marker - dl.lock.Unlock() - - if abort != nil { - stats.Log("Aborting state snapshot generation", dl.root, append(accountHash[:], key...)) - return errors.New("aborted") - } - if time.Since(logged) > 8*time.Second { - stats.Log("Generating state snapshot", dl.root, append(accountHash[:], key...)) - logged = time.Now() - } - } - return nil - }, nil) - - if err != nil { - return err - } - if exhausted { - return nil - } - storeOrigin = increseKey(last) - if storeOrigin == nil { - return nil // special case, the last is 0xffffffff...fff - } + if exhausted { + return nil + } + storeOrigin = increseKey(last) + if storeOrigin == nil { + return nil // special case, the last is 0xffffffff...fff } } - if time.Since(logged) > 8*time.Second { - stats.Log("Generating state snapshot", dl.root, key) - logged = time.Now() - } - // Some account processed, unmark the marker - accMarker = nil - return nil - }, FullAccountRLP) - + } + // Some account processed, unmark the marker + accMarker = nil + return nil + } + for { + exhausted, last, err := dl.genRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountRange, stats, onAccount, FullAccountRLP) // The procedure it aborted, either by external signal or internal error if err != nil { if abort == nil { // aborted by internal error, wait the signal @@ -480,7 +457,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { if err := batch.Write(); err != nil { log.Error("Failed to flush batch", "error", err) - abort := <-dl.genAbort + abort = <-dl.genAbort abort <- stats return } From 74331513246c233f1eb07faef75f528dc39994c2 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Fri, 12 Mar 2021 12:12:39 +0800 Subject: [PATCH 19/75] core, eth: fixes --- core/state/snapshot/generate.go | 9 ++++----- eth/protocols/snap/sync.go | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 5a6790bd0e5d..1f2549a68d94 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -336,8 +336,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { // Flush out the batch anyway no matter it's empty or not. // It's possible that all the states are recovered and the // generation indeed makes progress. - marker := currentLocation - journalProgress(batch, marker, stats) + journalProgress(batch, currentLocation, stats) if err := batch.Write(); err != nil { return err @@ -345,16 +344,16 @@ func (dl *diskLayer) generate(stats *generatorStats) { batch.Reset() dl.lock.Lock() - dl.genMarker = marker + dl.genMarker = currentLocation dl.lock.Unlock() if abort != nil { - stats.Log("Aborting state snapshot generation", dl.root, marker) + stats.Log("Aborting state snapshot generation", dl.root, currentLocation) return errors.New("aborted") } } if time.Since(logged) > 8*time.Second { - stats.Log("Generating state snapshot", dl.root, marker) + stats.Log("Generating state snapshot", dl.root, currentLocation) logged = time.Now() } return nil diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 3b7523e55205..75d2191c5eaa 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -1891,6 +1891,9 @@ func newStateWriter(db ethdb.KeyValueWriter, res *trienodeHealResponse) *stateWr // Put reacts to database writes and implements flat state persistence. func (w *stateWriter) Put(key []byte, val []byte) error { + if len(key) != common.HashLength { + return nil + } hash := common.BytesToHash(key) if index, ok := w.accountMarker[hash]; ok { rawdb.WriteAccountSnapshot(w.db, common.BytesToHash(w.res.paths[index][0]), w.res.nodes[index]) @@ -1914,7 +1917,7 @@ func (w *stateWriter) log() (ret []interface{}) { ret = append(ret, "accounts", w.accountSynced, "bytes", w.accountBytes) } if w.storageSynced > 0 { - ret = append(ret, "storages", w.storageSynced, "bytes", w.storageSynced) + ret = append(ret, "storages", w.storageSynced, "bytes", w.storageBytes) } return ret } @@ -1956,7 +1959,14 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { if err := batch.Replay(stateWriter); err != nil { log.Crit("Failed to replay the committed batch", "err", err) } - log.Info("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize()), stateWriter.log()) + ctx := []interface{}{ + "type", "trienodes", + "bytes", common.StorageSize(batch.ValueSize()), + } + if logs := stateWriter.log(); logs != nil { + ctx = append(ctx, logs...) + } + log.Info("Persisted set of healing data", ctx...) } // processBytecodeHealResponse integrates an already validated bytecode response From 70734578983b073a61807a43c5b9d274cc910fe6 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Fri, 12 Mar 2021 14:20:48 +0800 Subject: [PATCH 20/75] core, eth: fix healing writer --- core/state/sync.go | 22 +++++-- core/state/sync_test.go | 12 ++-- eth/downloader/statesync.go | 2 +- eth/protocols/snap/sync.go | 123 ++++++++++++------------------------ 4 files changed, 65 insertions(+), 94 deletions(-) diff --git a/core/state/sync.go b/core/state/sync.go index 1018b78e5ecb..73e025ea9719 100644 --- a/core/state/sync.go +++ b/core/state/sync.go @@ -26,17 +26,31 @@ import ( ) // NewStateSync create a new state trie download scheduler. -func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.SyncBloom) *trie.Sync { +func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.SyncBloom, onLeaf func(path []byte, leaf []byte) error) *trie.Sync { + // Register the storage slot callback if the external callback is specified. + var onSlot func(path []byte, leaf []byte, parent common.Hash) error + if onLeaf != nil { + onSlot = func(path []byte, leaf []byte, parent common.Hash) error { + return onLeaf(path, leaf) + } + } + // Register the account callback to connect the state trie and the storage + // trie belongs to the contract. var syncer *trie.Sync - callback := func(path []byte, leaf []byte, parent common.Hash) error { + onAccount := func(path []byte, leaf []byte, parent common.Hash) error { + if onLeaf != nil { + if err := onLeaf(path, leaf); err != nil { + return err + } + } var obj Account if err := rlp.Decode(bytes.NewReader(leaf), &obj); err != nil { return err } - syncer.AddSubTrie(obj.Root, path, parent, nil) + syncer.AddSubTrie(obj.Root, path, parent, onSlot) syncer.AddCodeEntry(common.BytesToHash(obj.CodeHash), path, parent) return nil } - syncer = trie.NewSync(root, database, callback, bloom) + syncer = trie.NewSync(root, database, onAccount, bloom) return syncer } diff --git a/core/state/sync_test.go b/core/state/sync_test.go index 9c4867093d1e..a13fcf56a376 100644 --- a/core/state/sync_test.go +++ b/core/state/sync_test.go @@ -133,7 +133,7 @@ func checkStateConsistency(db ethdb.Database, root common.Hash) error { // Tests that an empty state is not scheduled for syncing. func TestEmptyStateSync(t *testing.T) { empty := common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") - sync := NewStateSync(empty, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New())) + sync := NewStateSync(empty, rawdb.NewMemoryDatabase(), trie.NewSyncBloom(1, memorydb.New()), nil) if nodes, paths, codes := sync.Missing(1); len(nodes) != 0 || len(paths) != 0 || len(codes) != 0 { t.Errorf(" content requested for empty state: %v, %v, %v", nodes, paths, codes) } @@ -170,7 +170,7 @@ func testIterativeStateSync(t *testing.T, count int, commit bool, bypath bool) { // Create a destination state and sync with the scheduler dstDb := rawdb.NewMemoryDatabase() - sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb)) + sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil) nodes, paths, codes := sched.Missing(count) var ( @@ -249,7 +249,7 @@ func TestIterativeDelayedStateSync(t *testing.T) { // Create a destination state and sync with the scheduler dstDb := rawdb.NewMemoryDatabase() - sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb)) + sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil) nodes, _, codes := sched.Missing(0) queue := append(append([]common.Hash{}, nodes...), codes...) @@ -297,7 +297,7 @@ func testIterativeRandomStateSync(t *testing.T, count int) { // Create a destination state and sync with the scheduler dstDb := rawdb.NewMemoryDatabase() - sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb)) + sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil) queue := make(map[common.Hash]struct{}) nodes, _, codes := sched.Missing(count) @@ -347,7 +347,7 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) { // Create a destination state and sync with the scheduler dstDb := rawdb.NewMemoryDatabase() - sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb)) + sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil) queue := make(map[common.Hash]struct{}) nodes, _, codes := sched.Missing(0) @@ -414,7 +414,7 @@ func TestIncompleteStateSync(t *testing.T) { // Create a destination state and sync with the scheduler dstDb := rawdb.NewMemoryDatabase() - sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb)) + sched := NewStateSync(srcRoot, dstDb, trie.NewSyncBloom(1, dstDb), nil) var added []common.Hash diff --git a/eth/downloader/statesync.go b/eth/downloader/statesync.go index 6231588ad286..ff84a3a8f015 100644 --- a/eth/downloader/statesync.go +++ b/eth/downloader/statesync.go @@ -298,7 +298,7 @@ func newStateSync(d *Downloader, root common.Hash) *stateSync { return &stateSync{ d: d, root: root, - sched: state.NewStateSync(root, d.stateDB, d.stateBloom), + sched: state.NewStateSync(root, d.stateDB, d.stateBloom, nil), keccak: sha3.NewLegacyKeccak256().(crypto.KeccakState), trieTasks: make(map[common.Hash]*trieTask), codeTasks: make(map[common.Hash]*codeTask), diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 75d2191c5eaa..7ed9ac33c3a2 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -436,6 +436,12 @@ type Syncer struct { bytecodeHealDups uint64 // Number of bytecodes already processed bytecodeHealNops uint64 // Number of bytecodes not requested + stateWriter ethdb.Batch // Shared batch writer used for persisting raw states + accountHealed uint64 // Number of accounts downloaded during the healing stage + accountHealedBytes common.StorageSize // Number of raw account bytes persisted to disk during the healing stage + storageHealed uint64 // Number of storage slots downloaded during the healing stage + storageHealedBytes common.StorageSize // Number of raw storage bytes persisted to disk during the healing stage + startTime time.Time // Time instance when snapshot sync started logTime time.Time // Time instance when status was last reported @@ -477,6 +483,7 @@ func NewSyncer(db ethdb.KeyValueStore) *Syncer { bytecodeHealReqFails: make(chan *bytecodeHealRequest), trienodeHealResps: make(chan *trienodeHealResponse), bytecodeHealResps: make(chan *bytecodeHealResponse), + stateWriter: db.NewBatch(), } } @@ -544,7 +551,7 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { s.lock.Lock() s.root = root s.healer = &healTask{ - scheduler: state.NewStateSync(root, s.db, nil), + scheduler: state.NewStateSync(root, s.db, nil, s.onHealState), trieTasks: make(map[common.Hash]trie.SyncPath), codeTasks: make(map[common.Hash]struct{}), } @@ -569,6 +576,14 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { }() log.Debug("Starting snapshot sync cycle", "root", root) + + // Flush out the last committed raw states + defer func() { + if s.stateWriter.ValueSize() > 0 { + s.stateWriter.Write() + s.stateWriter.Reset() + } + }() defer s.report(true) // Whether sync completed or not, disregard any future packets @@ -1853,75 +1868,6 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { // task assigners to pick up and fill. } -// stateWriter is a database batch replayer that takes a batch of write operations -// and extract the flat states from them. The target flat states will be persisted -// blindly and can be fixed by the generator later. -type stateWriter struct { - db ethdb.KeyValueWriter - res *trienodeHealResponse - accountMarker map[common.Hash]int - storageMarker map[common.Hash]int - - accountSynced uint64 // Number of accounts persisted - accountBytes common.StorageSize // Number of account bytes persisted to disk - storageSynced uint64 // Number of storage slots persisted - storageBytes common.StorageSize // Number of storage bytes persisted to disk -} - -func newStateWriter(db ethdb.KeyValueWriter, res *trienodeHealResponse) *stateWriter { - var ( - accountMarker = make(map[common.Hash]int) - storageMarker = make(map[common.Hash]int) - ) - for i, path := range res.paths { - if len(path) == 1 && len(path[0]) == common.HashLength { - accountMarker[res.hashes[i]] = i - } - if len(path) == 2 && len(path[1]) == common.HashLength { - storageMarker[res.hashes[i]] = i - } - } - return &stateWriter{ - db: db, - res: res, - accountMarker: accountMarker, - storageMarker: storageMarker, - } -} - -// Put reacts to database writes and implements flat state persistence. -func (w *stateWriter) Put(key []byte, val []byte) error { - if len(key) != common.HashLength { - return nil - } - hash := common.BytesToHash(key) - if index, ok := w.accountMarker[hash]; ok { - rawdb.WriteAccountSnapshot(w.db, common.BytesToHash(w.res.paths[index][0]), w.res.nodes[index]) - w.accountSynced += 1 - w.accountBytes += common.StorageSize(1 + common.HashLength + len(w.res.nodes[index])) - } - if index, ok := w.storageMarker[hash]; ok { - rawdb.WriteStorageSnapshot(w.db, common.BytesToHash(w.res.paths[index][0]), common.BytesToHash(w.res.paths[index][1]), w.res.nodes[index]) - w.storageSynced += 1 - w.storageBytes += common.StorageSize(1 + 2*common.HashLength + len(w.res.nodes[index])) - } - return nil -} - -func (w *stateWriter) Delete(key []byte) error { - panic("not implemented") -} - -func (w *stateWriter) log() (ret []interface{}) { - if w.accountSynced > 0 { - ret = append(ret, "accounts", w.accountSynced, "bytes", w.accountBytes) - } - if w.storageSynced > 0 { - ret = append(ret, "storages", w.storageSynced, "bytes", w.storageBytes) - } - return ret -} - // processTrienodeHealResponse integrates an already validated trienode response // into the healer tasks. func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { @@ -1955,18 +1901,7 @@ func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { if err := batch.Write(); err != nil { log.Crit("Failed to persist healing data", "err", err) } - stateWriter := newStateWriter(s.db, res) - if err := batch.Replay(stateWriter); err != nil { - log.Crit("Failed to replay the committed batch", "err", err) - } - ctx := []interface{}{ - "type", "trienodes", - "bytes", common.StorageSize(batch.ValueSize()), - } - if logs := stateWriter.log(); logs != nil { - ctx = append(ctx, logs...) - } - log.Info("Persisted set of healing data", ctx...) + log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) } // processBytecodeHealResponse integrates an already validated bytecode response @@ -2668,6 +2603,28 @@ func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) e return nil } +// onHealState is a callback method to invoke when a flat state(account +// or storage slot) is downloded during the healing stage. The flat states +// can be persisted blindly and can be fixed later in the generation stage. +// Note it's not concurrent safe, please handle the concurrent issue outside. +func (s *Syncer) onHealState(path []byte, value []byte) error { + if len(path) == common.HashLength { + rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(path), value) + s.accountHealed += 1 + s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(value)) + } + if len(path) == 2*common.HashLength { + rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(path[:common.HashLength]), common.BytesToHash(path[common.HashLength:]), value) + s.storageHealed += 1 + s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) + } + if s.stateWriter.ValueSize() > ethdb.IdealBatchSize { + s.stateWriter.Write() // It's fine to ignore the error here + s.stateWriter.Reset() + } + return nil +} + // hashSpace is the total size of the 256 bit hash space for accounts. var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil) @@ -2733,5 +2690,5 @@ func (s *Syncer) reportHealProgress(force bool) { bytecode = fmt.Sprintf("%d@%v", s.bytecodeHealSynced, s.bytecodeHealBytes.TerminalString()) ) log.Info("State heal in progress", "nodes", trienode, "codes", bytecode, - "pending", s.healer.scheduler.Pending()) + "accounts", s.accountHealed, "bytes", s.accountHealedBytes, "storages", s.storageHealed, "bytes", s.storageHealedBytes, "pending", s.healer.scheduler.Pending()) } From 70c9b327743af18b8f6fb3f9f95759ddd0f06ce4 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Fri, 12 Mar 2021 15:33:37 +0800 Subject: [PATCH 21/75] core, trie, eth: fix paths --- core/state/statedb.go | 2 +- core/state/sync.go | 16 ++++++++-------- eth/protocols/snap/sync.go | 10 +++++----- trie/committer.go | 4 ++-- trie/sync.go | 9 ++++++++- trie/trie.go | 17 ++++++++++++++--- 6 files changed, 38 insertions(+), 20 deletions(-) diff --git a/core/state/statedb.go b/core/state/statedb.go index 2e5d6e47c83c..90f4709bfc46 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -948,7 +948,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { // The onleaf func is called _serially_, so we can reuse the same account // for unmarshalling every time. var account Account - root, err := s.trie.Commit(func(path []byte, leaf []byte, parent common.Hash) error { + root, err := s.trie.Commit(func(_ [][]byte, _ []byte, leaf []byte, parent common.Hash) error { if err := rlp.DecodeBytes(leaf, &account); err != nil { return nil } diff --git a/core/state/sync.go b/core/state/sync.go index 73e025ea9719..7a5852fb1945 100644 --- a/core/state/sync.go +++ b/core/state/sync.go @@ -26,20 +26,20 @@ import ( ) // NewStateSync create a new state trie download scheduler. -func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.SyncBloom, onLeaf func(path []byte, leaf []byte) error) *trie.Sync { +func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.SyncBloom, onLeaf func(paths [][]byte, leaf []byte) error) *trie.Sync { // Register the storage slot callback if the external callback is specified. - var onSlot func(path []byte, leaf []byte, parent common.Hash) error + var onSlot func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error if onLeaf != nil { - onSlot = func(path []byte, leaf []byte, parent common.Hash) error { - return onLeaf(path, leaf) + onSlot = func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error { + return onLeaf(paths, leaf) } } // Register the account callback to connect the state trie and the storage // trie belongs to the contract. var syncer *trie.Sync - onAccount := func(path []byte, leaf []byte, parent common.Hash) error { + onAccount := func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error { if onLeaf != nil { - if err := onLeaf(path, leaf); err != nil { + if err := onLeaf(paths, leaf); err != nil { return err } } @@ -47,8 +47,8 @@ func NewStateSync(root common.Hash, database ethdb.KeyValueReader, bloom *trie.S if err := rlp.Decode(bytes.NewReader(leaf), &obj); err != nil { return err } - syncer.AddSubTrie(obj.Root, path, parent, onSlot) - syncer.AddCodeEntry(common.BytesToHash(obj.CodeHash), path, parent) + syncer.AddSubTrie(obj.Root, hexpath, parent, onSlot) + syncer.AddCodeEntry(common.BytesToHash(obj.CodeHash), hexpath, parent) return nil } syncer = trie.NewSync(root, database, onAccount, bloom) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 7ed9ac33c3a2..6572b64f8bfe 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2607,14 +2607,14 @@ func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) e // or storage slot) is downloded during the healing stage. The flat states // can be persisted blindly and can be fixed later in the generation stage. // Note it's not concurrent safe, please handle the concurrent issue outside. -func (s *Syncer) onHealState(path []byte, value []byte) error { - if len(path) == common.HashLength { - rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(path), value) +func (s *Syncer) onHealState(paths [][]byte, value []byte) error { + if len(paths) == 1 { + rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), value) s.accountHealed += 1 s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(value)) } - if len(path) == 2*common.HashLength { - rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(path[:common.HashLength]), common.BytesToHash(path[common.HashLength:]), value) + if len(paths) == 2 { + rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) s.storageHealed += 1 s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) } diff --git a/trie/committer.go b/trie/committer.go index 33fd9e982339..ce4065f5fd12 100644 --- a/trie/committer.go +++ b/trie/committer.go @@ -220,13 +220,13 @@ func (c *committer) commitLoop(db *Database) { switch n := n.(type) { case *shortNode: if child, ok := n.Val.(valueNode); ok { - c.onleaf(nil, child, hash) + c.onleaf(nil, nil, child, hash) } case *fullNode: // For children in range [0, 15], it's impossible // to contain valuenode. Only check the 17th child. if n.Children[16] != nil { - c.onleaf(nil, n.Children[16].(valueNode), hash) + c.onleaf(nil, nil, n.Children[16].(valueNode), hash) } } } diff --git a/trie/sync.go b/trie/sync.go index dd8279b66528..3a6076ff8f7a 100644 --- a/trie/sync.go +++ b/trie/sync.go @@ -398,7 +398,14 @@ func (s *Sync) children(req *request, object node) ([]*request, error) { // Notify any external watcher of a new key/value node if req.callback != nil { if node, ok := (child.node).(valueNode); ok { - if err := req.callback(child.path, node, req.hash); err != nil { + var paths [][]byte + if len(child.path) == 2*common.HashLength { + paths = append(paths, hexToKeybytes(child.path)) + } else if len(child.path) == 4*common.HashLength { + paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength])) + paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:])) + } + if err := req.callback(paths, child.path, node, req.hash); err != nil { return nil, err } } diff --git a/trie/trie.go b/trie/trie.go index 87b72ecf17f8..7ed235fa8ac6 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -37,9 +37,20 @@ var ( ) // LeafCallback is a callback type invoked when a trie operation reaches a leaf -// node. It's used by state sync and commit to allow handling external references -// between account and storage tries. -type LeafCallback func(path []byte, leaf []byte, parent common.Hash) error +// node. +// +// The paths is a path tuple identifying a particular trie node either in a single +// trie (account) or a layered trie (account -> storage). Each path in the tuple +// is in the raw format(32 bytes). +// +// The hexpath is a composite hexary path identifying the trie node. All the key +// bytes are converted to the hexary nibbles and composited with the parent path +// if the trie node is in a layered trie. +// +// It's used by state sync and commit to allow handling external references +// between account and storage tries. And also it's used in the state healing +// for extracting the raw states(leaf nodes) with corresponding paths. +type LeafCallback func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error // Trie is a Merkle Patricia Trie. // The zero value is an empty trie with no database. From d865c47f09a7e0cac3fbda0061c3fb7e18cfaefb Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Fri, 12 Mar 2021 16:14:55 +0800 Subject: [PATCH 22/75] eth/protocols/snap: fix encoding --- eth/protocols/snap/sync.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 6572b64f8bfe..725ed8bb6617 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2609,9 +2609,14 @@ func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) e // Note it's not concurrent safe, please handle the concurrent issue outside. func (s *Syncer) onHealState(paths [][]byte, value []byte) error { if len(paths) == 1 { - rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), value) + var account state.Account + if err := rlp.DecodeBytes(value, &account); err != nil { + return nil + } + blob := snapshot.SlimAccountRLP(account.Nonce, account.Balance, account.Root, account.CodeHash) + rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) s.accountHealed += 1 - s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(value)) + s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) } if len(paths) == 2 { rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) From 6e94768f2097953591c380f3a0260d40a08c3690 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Mon, 15 Mar 2021 10:29:08 +0800 Subject: [PATCH 23/75] eth, core: add debug log --- core/state/snapshot/generate.go | 7 +++++-- eth/protocols/snap/sync.go | 4 ++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 1f2549a68d94..f7bbfa998c38 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -189,8 +189,11 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix [] count int last []byte proof = rawdb.NewMemoryDatabase() - iter = dl.diskdb.NewIterator(prefix, origin) ) + + iter := dl.diskdb.NewIterator(prefix, origin) + defer iter.Release() + for iter.Next() && count < max { key := iter.Key() if len(key) != len(prefix)+common.HashLength { @@ -269,7 +272,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig return exhausted, last, nil } snapFailedRangeProofMeter.Mark(1) - log.Debug("Detected outdated state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) + log.Debug("Detected outdated state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last, "error", err) // The verifcation is failed, the flat state in this range cannot match the // merkle trie. Alternatively, use the fallback generation mechanism to regenerate diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 725ed8bb6617..50aa931229d1 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2611,21 +2611,25 @@ func (s *Syncer) onHealState(paths [][]byte, value []byte) error { if len(paths) == 1 { var account state.Account if err := rlp.DecodeBytes(value, &account); err != nil { + log.Info("Failed to decode account", "error", err) // DEBUG LOG, REMOVE IT return nil } blob := snapshot.SlimAccountRLP(account.Nonce, account.Balance, account.Root, account.CodeHash) rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) s.accountHealed += 1 s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) + log.Info("Heal state account", "hash", paths[0]) // DEBUG LOG, REMOVE IT } if len(paths) == 2 { rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) s.storageHealed += 1 s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) + log.Info("Heal state storage", "account", paths[0], "hash", paths[1]) // DEBUG LOG, REMOVE IT } if s.stateWriter.ValueSize() > ethdb.IdealBatchSize { s.stateWriter.Write() // It's fine to ignore the error here s.stateWriter.Reset() + log.Info("Flush state heal writer") // DEBUG LOG, REMOVE IT } return nil } From afecd67faf1c22f7dbfa2294fe970d56c79a499d Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 15 Mar 2021 04:17:33 +0100 Subject: [PATCH 24/75] core/state/generate: release iterator asap (#5) core/state/snapshot: less copy core/state/snapshot: revert split loop core/state/snapshot: handle storage becoming empty, improve test robustness core/state: test modified codehash core/state/snapshot: polish --- core/state/snapshot/generate.go | 50 +++++++----- core/state/snapshot/generate_test.go | 114 +++++++++++++++++++++++++-- 2 files changed, 137 insertions(+), 27 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index f7bbfa998c38..6c637ce0bde5 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -182,40 +182,37 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta // The iteration start point will be assigned if the iterator is restored from // the last interruption. Max will be assigned in order to limit the maximum // amount of data involved in each iteration. -func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix []byte, kind string, origin []byte, max int, onValue func([]byte) ([]byte, error)) ([][]byte, [][]byte, []byte, bool, error) { +func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) ([][]byte, [][]byte, []byte, bool, error) { var ( keys [][]byte vals [][]byte - count int - last []byte proof = rawdb.NewMemoryDatabase() ) - iter := dl.diskdb.NewIterator(prefix, origin) defer iter.Release() - for iter.Next() && count < max { + for iter.Next() && len(keys) < max { key := iter.Key() if len(key) != len(prefix)+common.HashLength { continue } - if !bytes.HasPrefix(key, prefix) { - continue - } - last = common.CopyBytes(key[len(prefix):]) keys = append(keys, common.CopyBytes(key[len(prefix):])) - if onValue == nil { + if valueConvertFn == nil { vals = append(vals, common.CopyBytes(iter.Value())) - } else { - val, err := onValue(common.CopyBytes(iter.Value())) - if err != nil { - log.Debug("Failed to convert the flat state", "kind", kind, "key", common.BytesToHash(key[len(prefix):]), "error", err) - return nil, nil, last, false, err - } - vals = append(vals, val) + continue } - count += 1 + val, err := valueConvertFn(iter.Value()) + if err != nil { + log.Debug("Failed to convert the flat state", "kind", kind, "key", common.BytesToHash(key[len(prefix):]), "error", err) + return nil, nil, keys[len(keys)-1], false, err + } + vals = append(vals, val) + } + // Find out the key of last iterated element. + var last []byte + if len(keys) > 0 { + last = keys[len(keys)-1] } // Generate the Merkle proofs for the first and last element if origin == nil { @@ -239,7 +236,7 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix [] } // Range prover says the trie still has some elements on the right side but // the database is exhausted, then data loss is detected. - if cont && count < max { + if cont && len(keys) < max { return nil, nil, last, false, errors.New("data loss in the state range") } return keys, vals, last, !cont, nil @@ -248,14 +245,14 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix [] // genRange generates the state segment with particular prefix. Generation can // either verify the correctness of existing state through rangeproof and skip // generation, or iterate trie to regenerate state on demand. -func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, regen bool) error, onValue func([]byte) ([]byte, error)) (bool, []byte, error) { +func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, regen bool) error, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { tr, err := trie.NewSecure(root, dl.triedb) if err != nil { stats.Log("Trie missing, state snapshotting paused", root, dl.genMarker) return false, nil, errors.New("trie is missing") } // Use range prover to check the validity of the flat state in the range - keys, vals, last, exhausted, err := dl.proveRange(root, tr, prefix, kind, origin, max, onValue) + keys, vals, last, exhausted, err := dl.proveRange(root, tr, prefix, kind, origin, max, valueConvertFn) if err == nil { snapSuccessfulRangeProofMeter.Mark(1) log.Debug("Proved state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) @@ -427,6 +424,17 @@ func (dl *diskLayer) generate(stats *generatorStats) { return nil // special case, the last is 0xffffffff...fff } } + } else { + // If the root is empty, we still need to ensure that any previous snapshot + // storage values are cleared + // TODO: investigate if this can be avoided, this will be very costly since it + // affects every single EOA account + // - Perhaps we can avoid if where codeHash is emptyCode + prefix := append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...) + keyLen := len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength + if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { + return err + } } // Some account processed, unmark the marker accMarker = nil diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 1a1ea06b5d76..ccf817cfe930 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -23,6 +23,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" @@ -56,10 +57,13 @@ func TestGeneration(t *testing.T) { acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} val, _ = rlp.EncodeToBytes(acc) accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 - accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd - triedb.Commit(common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), false, nil) + root, _ := accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd + triedb.Commit(root, false, nil) - snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd")) + if have, want := root, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"); have != want { + t.Fatalf("have %#x want %#x", have, want) + } + snap := generateSnapshot(diskdb, triedb, 16, root) select { case <-snap.genPending: // Snapshot generation succeeded @@ -67,6 +71,7 @@ func TestGeneration(t *testing.T) { case <-time.After(250 * time.Millisecond): t.Errorf("Snapshot generation failed") } + checkSnapRoot(t, snap, root) // Signal abortion to the generator and wait for it to tear down stop := make(chan *generatorStats) snap.genAbort <- stop @@ -120,10 +125,106 @@ func TestGenerateExistentState(t *testing.T) { rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-2")), []byte("val-2")) rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-3")), []byte("val-3")) - accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd - triedb.Commit(common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), false, nil) + root, _ := accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd + triedb.Commit(root, false, nil) - snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd")) + snap := generateSnapshot(diskdb, triedb, 16, root) + select { + case <-snap.genPending: + // Snapshot generation succeeded + + case <-time.After(250 * time.Millisecond): + t.Errorf("Snapshot generation failed") + } + checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} + +func checkSnapRoot(t *testing.T, snap *diskLayer, trieRoot common.Hash) { + t.Helper() + accIt := snap.AccountIterator(common.Hash{}) + defer accIt.Release() + snapRoot, err := generateTrieRoot(nil, accIt, common.Hash{}, stackTrieGenerate, + func(db ethdb.KeyValueWriter, accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) { + storageIt, _ := snap.StorageIterator(accountHash, common.Hash{}) + defer storageIt.Release() + + hash, err := generateTrieRoot(nil, storageIt, accountHash, stackTrieGenerate, nil, stat, false) + if err != nil { + return common.Hash{}, err + } + return hash, nil + }, newGenerateStats(), true) + + if err != nil { + t.Fatal(err) + } + if snapRoot != trieRoot { + t.Fatalf("snaproot: %#x != trieroot #%x", snapRoot, trieRoot) + } +} + +// Tests that snapshot generation with existent flat state, where the flat state contains +// some errors +func TestGenerateExistentStateWithExtraStorage(t *testing.T) { + //log.Root().SetHandler(log.LvlFilterHandler(log.LvlInfo, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + + // We can't use statedb to make a test trie (circular dependency), so make + // a fake one manually. We're going with a small account trie of 3 accounts, + // two of which also has the same 3-slot storage trie attached. + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + stTrie, _ := trie.NewSecure(common.Hash{}, triedb) + stTrie.Update([]byte("key-1"), []byte("val-1")) + stTrie.Update([]byte("key-2"), []byte("val-2")) + stTrie.Update([]byte("key-3"), []byte("val-3")) + stTrie.Commit(nil) + + accTrie, _ := trie.NewSecure(common.Hash{}, triedb) + + { // Account one + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-1")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-3")), []byte("val-3")) + } + { // Account two + // The storage root is emptyHash, but the flat db has some storage values. This can happen + // if the storage was unset during sync + acc := &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + diskdb.Put(hashData([]byte("acc-2")).Bytes(), val) + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-2")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("key-1")), []byte("val-1")) + } + + { // Account three + // This account changed codehash + acc := &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + acc.CodeHash = hashData([]byte("codez")).Bytes() + val, _ = rlp.EncodeToBytes(acc) + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-3")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-3")), []byte("val-3")) + } + + root, _ := accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd + t.Logf("Root: %#x\n", root) + triedb.Commit(root, false, nil) + + snap := generateSnapshot(diskdb, triedb, 16, root) select { case <-snap.genPending: // Snapshot generation succeeded @@ -131,6 +232,7 @@ func TestGenerateExistentState(t *testing.T) { case <-time.After(250 * time.Millisecond): t.Errorf("Snapshot generation failed") } + checkSnapRoot(t, snap, root) // Signal abortion to the generator and wait for it to tear down stop := make(chan *generatorStats) snap.genAbort <- stop From 9cf7677a181b00230c964d207aaac38a71dcbee8 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Mon, 15 Mar 2021 13:42:43 +0800 Subject: [PATCH 25/75] core/state/snapshot: optimize stats counter --- core/state/snapshot/generate.go | 18 ++++++++++++------ core/state/snapshot/snapshot.go | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 6c637ce0bde5..7e51920fd1f5 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -265,7 +265,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig return false, nil, err } } - log.Debug("Recovered state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) + log.Debug("Recovered state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last, "count", len(keys)) return exhausted, last, nil } snapFailedRangeProofMeter.Mark(1) @@ -288,20 +288,24 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } log.Debug("Wiped currupted state range", "kind", kind, "prefix", prefix, "origin", origin, "limit", limit) } - iter := trie.NewIterator(tr.NodeIterator(origin)) + var ( + count int + iter = trie.NewIterator(tr.NodeIterator(origin)) + ) for iter.Next() { if last != nil && bytes.Compare(iter.Key, last) > 0 { - log.Debug("Regenerated state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) + log.Debug("Regenerated state range", "kind", kind, "prefix", prefix, "root", root, "origin", origin, "last", last, "count", count) return false, last, nil // Apparently the trie is not exhausted } if err := onState(iter.Key, iter.Value, true); err != nil { return false, nil, err } + count += 1 } if iter.Err != nil { return false, nil, iter.Err } - log.Debug("Regenerated state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) + log.Debug("Regenerated state range", "kind", kind, "prefix", prefix, "root", root, "origin", origin, "last", last, "count", count) return true, nil, nil // The entire trie is exhausted } @@ -417,11 +421,11 @@ func (dl *diskLayer) generate(stats *generatorStats) { return err } if exhausted { - return nil + break } storeOrigin = increseKey(last) if storeOrigin == nil { - return nil // special case, the last is 0xffffffff...fff + break // special case, the last is 0xffffffff...fff } } } else { @@ -440,6 +444,8 @@ func (dl *diskLayer) generate(stats *generatorStats) { accMarker = nil return nil } + + // Global loop for regerating the entire state trie + all layered storage tries. for { exhausted, last, err := dl.genRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountRange, stats, onAccount, FullAccountRLP) // The procedure it aborted, either by external signal or internal error diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index 725ae8e329c7..842946399481 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -662,7 +662,7 @@ func (t *Tree) Rebuild(root common.Hash) { case *diskLayer: // If the base layer is generating, abort it and save if layer.genAbort != nil { - abort := make(chan *generatorStats) + abort := make(chan *generatorStats, 1) // Discard the stats layer.genAbort <- abort } // Layer should be inactive now, mark it as stale From 13d4b9b8ac91b5799b8823864a86a5b7e1a38d31 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Mon, 15 Mar 2021 15:51:29 +0800 Subject: [PATCH 26/75] core, eth: add metric --- core/state/snapshot/generate.go | 14 ++++++++++++++ eth/protocols/snap/sync.go | 4 ---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 7e51920fd1f5..ca39d645de9c 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -61,9 +61,11 @@ var ( snapGeneratedAccountMeter = metrics.NewRegisteredMeter("state/snapshot/generation/account/generated", nil) snapRecoveredAccountMeter = metrics.NewRegisteredMeter("state/snapshot/generation/account/recovered", nil) snapWipedAccountMeter = metrics.NewRegisteredMeter("state/snapshot/generation/account/wiped", nil) + snapMissallAccountMeter = metrics.NewRegisteredMeter("state/snapshot/generation/account/missall", nil) snapGeneratedStorageMeter = metrics.NewRegisteredMeter("state/snapshot/generation/storage/generated", nil) snapRecoveredStorageMeter = metrics.NewRegisteredMeter("state/snapshot/generation/storage/recovered", nil) snapWipedStorageMeter = metrics.NewRegisteredMeter("state/snapshot/generation/storage/wiped", nil) + snapMissallStorageMeter = metrics.NewRegisteredMeter("state/snapshot/generation/storage/missall", nil) snapSuccessfulRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/success", nil) snapFailedRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/failure", nil) ) @@ -271,6 +273,18 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig snapFailedRangeProofMeter.Mark(1) log.Debug("Detected outdated state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last, "error", err) + // Special case, the entire trie is missing. In the original trie scheme, + // all the duplicated subtries will be filter out(only one copy of data + // will be stored). While in the snapshot model, all the storage tries + // belong to different contracts will be kept even they are duplicated. + // Track it to a certain extent remove the noise data used for statistics. + if origin == nil && last == nil { + meter := snapMissallAccountMeter + if kind == "storage" { + meter = snapMissallStorageMeter + } + meter.Mark(1) + } // The verifcation is failed, the flat state in this range cannot match the // merkle trie. Alternatively, use the fallback generation mechanism to regenerate // the correct flat state by iterating trie. But wiping the existent outdated flat diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 50aa931229d1..725ed8bb6617 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2611,25 +2611,21 @@ func (s *Syncer) onHealState(paths [][]byte, value []byte) error { if len(paths) == 1 { var account state.Account if err := rlp.DecodeBytes(value, &account); err != nil { - log.Info("Failed to decode account", "error", err) // DEBUG LOG, REMOVE IT return nil } blob := snapshot.SlimAccountRLP(account.Nonce, account.Balance, account.Root, account.CodeHash) rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) s.accountHealed += 1 s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) - log.Info("Heal state account", "hash", paths[0]) // DEBUG LOG, REMOVE IT } if len(paths) == 2 { rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) s.storageHealed += 1 s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) - log.Info("Heal state storage", "account", paths[0], "hash", paths[1]) // DEBUG LOG, REMOVE IT } if s.stateWriter.ValueSize() > ethdb.IdealBatchSize { s.stateWriter.Write() // It's fine to ignore the error here s.stateWriter.Reset() - log.Info("Flush state heal writer") // DEBUG LOG, REMOVE IT } return nil } From 0cceb3c1a555e3bd96072b339818ab3d8e23e4db Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Mon, 15 Mar 2021 15:56:39 +0800 Subject: [PATCH 27/75] core/state/snapshot: update comments --- core/state/snapshot/wipe.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/state/snapshot/wipe.go b/core/state/snapshot/wipe.go index 521f7b16fbf5..2cab57393bef 100644 --- a/core/state/snapshot/wipe.go +++ b/core/state/snapshot/wipe.go @@ -85,6 +85,8 @@ func wipeContent(db ethdb.KeyValueStore) error { // wipeKeyRange deletes a range of keys from the database starting with prefix // and having a specific total key length. The start and limit is optional for // specifying a particular key range for deletion. +// +// Origin is included for wiping and limit is excluded if they are specified. func wipeKeyRange(db ethdb.KeyValueStore, kind string, prefix []byte, origin []byte, limit []byte, keylen int, meter metrics.Meter, report bool) error { // Batch deletions together to avoid holding an iterator for too long var ( From 1f0590862b965b390e3c469d82c5fbb0853dddc8 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Mon, 15 Mar 2021 16:36:00 +0800 Subject: [PATCH 28/75] core/state/snapshot: improve tests --- core/state/snapshot/generate_test.go | 54 +++++++++++++++++++--------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index ccf817cfe930..aabf998d4f7e 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -168,8 +168,11 @@ func checkSnapRoot(t *testing.T, snap *diskLayer, trieRoot common.Hash) { } // Tests that snapshot generation with existent flat state, where the flat state contains -// some errors -func TestGenerateExistentStateWithExtraStorage(t *testing.T) { +// some errors: +// - the contract with empty storage root but has storage entries in the disk +// - the contract(non-empty storage) misses some storage slots +// - the contract(non-empty storage) has wrong storage slots +func TestGenerateExistentStateWithWrongStorage(t *testing.T) { //log.Root().SetHandler(log.LvlFilterHandler(log.LvlInfo, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) // We can't use statedb to make a test trie (circular dependency), so make @@ -187,37 +190,56 @@ func TestGenerateExistentStateWithExtraStorage(t *testing.T) { accTrie, _ := trie.NewSecure(common.Hash{}, triedb) - { // Account one + { // Account one, miss storage slots in the end(key-3) acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + accTrie.Update([]byte("acc-1"), val) rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-1")), val) rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-1")), []byte("val-1")) rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-2")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-3")), []byte("val-3")) } - { // Account two + { // Account two, miss storage slots in the beginning(key-1) + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-2"), val) + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-2")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("key-3")), []byte("val-3")) + } + { // Account three // The storage root is emptyHash, but the flat db has some storage values. This can happen // if the storage was unset during sync acc := &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 - diskdb.Put(hashData([]byte("acc-2")).Bytes(), val) - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-2")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("key-1")), []byte("val-1")) + accTrie.Update([]byte("acc-3"), val) + diskdb.Put(hashData([]byte("acc-3")).Bytes(), val) + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-3")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-1")), []byte("val-1")) } - { // Account three + { // Account four // This account changed codehash acc := &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + accTrie.Update([]byte("acc-4"), val) acc.CodeHash = hashData([]byte("codez")).Bytes() val, _ = rlp.EncodeToBytes(acc) - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-3")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-1")), []byte("val-1")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-2")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-3")), []byte("val-3")) + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-4")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-4")), hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-4")), hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-4")), hashData([]byte("key-3")), []byte("val-3")) + } + + { // Account five + // This account has the wrong storage slot + acc := &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-5"), val) + val, _ = rlp.EncodeToBytes(acc) + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-5")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-1")), []byte("badval-1")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-2")), []byte("badval-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-3")), []byte("badval-3")) } root, _ := accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd From 07e35759e06ef6bc109305207386e44a0cd93060 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 16 Mar 2021 11:00:10 +0800 Subject: [PATCH 29/75] core/state/snapshot: replace secure trie with standard trie --- core/state/snapshot/generate.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index ca39d645de9c..78657e1d0400 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -184,7 +184,7 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta // The iteration start point will be assigned if the iterator is restored from // the last interruption. Max will be assigned in order to limit the maximum // amount of data involved in each iteration. -func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) ([][]byte, [][]byte, []byte, bool, error) { +func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) ([][]byte, [][]byte, []byte, bool, error) { var ( keys [][]byte vals [][]byte @@ -248,7 +248,7 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.SecureTrie, prefix [] // either verify the correctness of existing state through rangeproof and skip // generation, or iterate trie to regenerate state on demand. func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, regen bool) error, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { - tr, err := trie.NewSecure(root, dl.triedb) + tr, err := trie.New(root, dl.triedb) if err != nil { stats.Log("Trie missing, state snapshotting paused", root, dl.genMarker) return false, nil, errors.New("trie is missing") From 1d3517e332dbe7a09e624306d9f6675832cf119b Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 16 Mar 2021 13:53:10 +0800 Subject: [PATCH 30/75] core/state/snapshot: wrap return as the struct --- core/state/snapshot/generate.go | 113 +++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 18 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 78657e1d0400..d0fab6835b84 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -180,11 +180,59 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta rawdb.WriteSnapshotGenerator(db, blob) } +// proofResult contains the output of range proving which can be used +// for further processing no matter it's successful or not. +type proofResult struct { + keys [][]byte // The key set of all elements being iterated, even proving is failed + vals [][]byte // The val set of all elements being iterated, even proving is failed + cont bool // Indicator if there exists more elements in the range, only meaningful when proving is successful + err error // Error occurs in the proving +} + +// valid returns the indicator that range proof is successful or not. +func (result *proofResult) valid() bool { + return result.err == nil +} + +// last returns the last verified element key no matter the range proof is +// successful or not. Nil is returned if nothing involved in the proving. +func (result *proofResult) last() []byte { + var last []byte + if len(result.keys) > 0 { + last = result.keys[len(result.keys)-1] + } + return last +} + +// kvset constructs the set for all touched entries in the map format. +func (result *proofResult) kvset() map[string][]byte { + ret := make(map[string][]byte) + for i := 0; i < len(result.keys); i++ { + ret[string(result.keys[i])] = result.vals[i] + } + return ret +} + +// forEach iterates all the visited elements and applies the given callback on them. +// The iteration is aborted if the callback returns non-nil error. +func (result *proofResult) forEach(callback func(key []byte, val []byte) error) error { + if callback == nil { + return nil + } + for i := 0; i < len(result.keys); i++ { + key, val := result.keys[i], result.vals[i] + if err := callback(key, val); err != nil { + return err + } + } + return nil +} + // proveRange proves the state segment with particular prefix is "valid". // The iteration start point will be assigned if the iterator is restored from // the last interruption. Max will be assigned in order to limit the maximum // amount of data involved in each iteration. -func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) ([][]byte, [][]byte, []byte, bool, error) { +func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) *proofResult { var ( keys [][]byte vals [][]byte @@ -207,11 +255,17 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, val, err := valueConvertFn(iter.Value()) if err != nil { log.Debug("Failed to convert the flat state", "kind", kind, "key", common.BytesToHash(key[len(prefix):]), "error", err) - return nil, nil, keys[len(keys)-1], false, err + return &proofResult{keys: keys, vals: vals, cont: false, err: err} } vals = append(vals, val) } - // Find out the key of last iterated element. + // The snap state is exhausted, pass the entire key/val set for verification + if origin == nil && len(keys) == max { + _, _, _, _, err := trie.VerifyRangeProof(root, nil, nil, keys, vals, nil) + return &proofResult{keys: keys, vals: vals, cont: false, err: err} + } + // Snap state is chunked, generate edge proofs for verification. + // Firstly find out the key of last iterated element. var last []byte if len(keys) > 0 { last = keys[len(keys)-1] @@ -222,26 +276,26 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, } if err := tr.Prove(origin, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "origin", origin, "err", err) - return nil, nil, last, false, err + return &proofResult{keys: keys, vals: vals, cont: false, err: err} } if last != nil { if err := tr.Prove(last, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "last", last, "err", err) - return nil, nil, last, false, err + return &proofResult{keys: keys, vals: vals, cont: false, err: err} } } // Verify the state segment with range prover, ensure that all flat states // in this range correspond to merkle trie. _, _, _, cont, err := trie.VerifyRangeProof(root, origin, last, keys, vals, proof) if err != nil { - return nil, nil, last, false, err + return &proofResult{keys: keys, vals: vals, cont: false, err: err} } // Range prover says the trie still has some elements on the right side but // the database is exhausted, then data loss is detected. if cont && len(keys) < max { - return nil, nil, last, false, errors.New("data loss in the state range") + return &proofResult{keys: keys, vals: vals, cont: false, err: errors.New("data loss in the state range")} } - return keys, vals, last, !cont, nil + return &proofResult{keys: keys, vals: vals, cont: cont, err: nil} } // genRange generates the state segment with particular prefix. Generation can @@ -251,26 +305,27 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig tr, err := trie.New(root, dl.triedb) if err != nil { stats.Log("Trie missing, state snapshotting paused", root, dl.genMarker) - return false, nil, errors.New("trie is missing") + return false, nil, err } // Use range prover to check the validity of the flat state in the range - keys, vals, last, exhausted, err := dl.proveRange(root, tr, prefix, kind, origin, max, valueConvertFn) - if err == nil { + result := dl.proveRange(root, tr, prefix, kind, origin, max, valueConvertFn) + if result.valid() { + last := result.last() snapSuccessfulRangeProofMeter.Mark(1) log.Debug("Proved state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) // The verification is passed, process each state with the given // callback function. If this state represents a contract, the // corresponding storage check will be performed in the callback - for i := 0; i < len(keys); i++ { - if err := onState(keys[i], vals[i], false); err != nil { - return false, nil, err - } + if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, false) }); err != nil { + return false, nil, err } - log.Debug("Recovered state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last, "count", len(keys)) - return exhausted, last, nil + log.Debug("Recovered state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last, "count", len(result.keys)) + return !result.cont, result.last(), nil } snapFailedRangeProofMeter.Mark(1) + + last := result.last() log.Debug("Detected outdated state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last, "error", err) // Special case, the entire trie is missing. In the original trie scheme, @@ -297,9 +352,31 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig wipedMeter = snapWipedStorageMeter } limit := increseKey(common.CopyBytes(last)) - if err := wipeKeyRange(dl.diskdb, kind, prefix, origin, limit, len(prefix)+common.HashLength, wipedMeter, false); err != nil { + + // Batch deletions together to avoid holding an iterator for too long + var batch = dl.diskdb.NewBatch() + + // Iterate over the key-range and delete all of them + if err := result.forEach(func(key []byte, val []byte) error { + if err := batch.Delete(append(prefix, key...)); err != nil { + return err + } + if batch.ValueSize() > ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + return err + } + batch.Reset() + } + return nil + }); err != nil { return false, nil, err } + if batch.ValueSize() > 0 { + if err := batch.Write(); err != nil { + return false, nil, err + } + } + wipedMeter.Mark(int64(len(result.keys))) log.Debug("Wiped currupted state range", "kind", kind, "prefix", prefix, "origin", origin, "limit", limit) } var ( From 7c8e43ce112b82cdb4135f3b4546bc8dcb274b1e Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 16 Mar 2021 16:07:53 +0800 Subject: [PATCH 31/75] core/state/snapshot: skip wiping correct states --- core/state/snapshot/generate.go | 163 +++++++++++++++++--------------- 1 file changed, 89 insertions(+), 74 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index d0fab6835b84..af4d8cdbe8e8 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -183,15 +183,15 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta // proofResult contains the output of range proving which can be used // for further processing no matter it's successful or not. type proofResult struct { - keys [][]byte // The key set of all elements being iterated, even proving is failed - vals [][]byte // The val set of all elements being iterated, even proving is failed - cont bool // Indicator if there exists more elements in the range, only meaningful when proving is successful - err error // Error occurs in the proving + keys [][]byte // The key set of all elements being iterated, even proving is failed + vals [][]byte // The val set of all elements being iterated, even proving is failed + cont bool // Indicator if there exists more elements in the range, only meaningful when proving is successful + proofErr error // Indicator whether the given state range is valid or not } // valid returns the indicator that range proof is successful or not. func (result *proofResult) valid() bool { - return result.err == nil + return result.proofErr == nil } // last returns the last verified element key no matter the range proof is @@ -204,15 +204,6 @@ func (result *proofResult) last() []byte { return last } -// kvset constructs the set for all touched entries in the map format. -func (result *proofResult) kvset() map[string][]byte { - ret := make(map[string][]byte) - for i := 0; i < len(result.keys); i++ { - ret[string(result.keys[i])] = result.vals[i] - } - return ret -} - // forEach iterates all the visited elements and applies the given callback on them. // The iteration is aborted if the callback returns non-nil error. func (result *proofResult) forEach(callback func(key []byte, val []byte) error) error { @@ -232,7 +223,10 @@ func (result *proofResult) forEach(callback func(key []byte, val []byte) error) // The iteration start point will be assigned if the iterator is restored from // the last interruption. Max will be assigned in order to limit the maximum // amount of data involved in each iteration. -func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) *proofResult { +// +// The proof result will be returned if the range proving is finished, otherwise +// the error will be returned to abort the entire procedure. +func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) { var ( keys [][]byte vals [][]byte @@ -255,14 +249,14 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, val, err := valueConvertFn(iter.Value()) if err != nil { log.Debug("Failed to convert the flat state", "kind", kind, "key", common.BytesToHash(key[len(prefix):]), "error", err) - return &proofResult{keys: keys, vals: vals, cont: false, err: err} + return nil, err } vals = append(vals, val) } // The snap state is exhausted, pass the entire key/val set for verification if origin == nil && len(keys) == max { _, _, _, _, err := trie.VerifyRangeProof(root, nil, nil, keys, vals, nil) - return &proofResult{keys: keys, vals: vals, cont: false, err: err} + return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err}, nil } // Snap state is chunked, generate edge proofs for verification. // Firstly find out the key of last iterated element. @@ -276,39 +270,42 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, } if err := tr.Prove(origin, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "origin", origin, "err", err) - return &proofResult{keys: keys, vals: vals, cont: false, err: err} + return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err}, nil } if last != nil { if err := tr.Prove(last, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "last", last, "err", err) - return &proofResult{keys: keys, vals: vals, cont: false, err: err} + return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err}, nil } } // Verify the state segment with range prover, ensure that all flat states // in this range correspond to merkle trie. _, _, _, cont, err := trie.VerifyRangeProof(root, origin, last, keys, vals, proof) if err != nil { - return &proofResult{keys: keys, vals: vals, cont: false, err: err} + return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err}, nil } // Range prover says the trie still has some elements on the right side but // the database is exhausted, then data loss is detected. if cont && len(keys) < max { - return &proofResult{keys: keys, vals: vals, cont: false, err: errors.New("data loss in the state range")} + return &proofResult{keys: keys, vals: vals, cont: false, proofErr: errors.New("data loss in the state range")}, nil } - return &proofResult{keys: keys, vals: vals, cont: cont, err: nil} + return &proofResult{keys: keys, vals: vals, cont: cont, proofErr: nil}, nil } // genRange generates the state segment with particular prefix. Generation can // either verify the correctness of existing state through rangeproof and skip // generation, or iterate trie to regenerate state on demand. -func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, regen bool) error, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { +func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, write bool, delete bool) error, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { tr, err := trie.New(root, dl.triedb) if err != nil { stats.Log("Trie missing, state snapshotting paused", root, dl.genMarker) return false, nil, err } // Use range prover to check the validity of the flat state in the range - result := dl.proveRange(root, tr, prefix, kind, origin, max, valueConvertFn) + result, err := dl.proveRange(root, tr, prefix, kind, origin, max, valueConvertFn) + if err != nil { + return false, nil, err + } if result.valid() { last := result.last() snapSuccessfulRangeProofMeter.Mark(1) @@ -317,7 +314,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig // The verification is passed, process each state with the given // callback function. If this state represents a contract, the // corresponding storage check will be performed in the callback - if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, false) }); err != nil { + if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, false, false) }); err != nil { return false, nil, err } log.Debug("Recovered state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last, "count", len(result.keys)) @@ -340,64 +337,72 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } meter.Mark(1) } - // The verifcation is failed, the flat state in this range cannot match the - // merkle trie. Alternatively, use the fallback generation mechanism to regenerate - // the correct flat state by iterating trie. But wiping the existent outdated flat - // data in this range first. - if last != nil { - // Note if the returned last is nil(no more flat state can be found in the database), - // the wiping can be skipped. - wipedMeter := snapWipedAccountMeter - if kind == "storage" { - wipedMeter = snapWipedStorageMeter + var ( + aborted bool + iter = trie.NewIterator(tr.NodeIterator(origin)) + kvkeys, kvvals = result.keys, result.vals + + // counters + count = 0 // number of states delivered by iterator + created = 0 // states created from the trie + updated = 0 // states updated from the trie + deleted = 0 // states not in trie, but were in snapshot + untouched = 0 // states already correct + ) + for iter.Next() { + if last != nil && bytes.Compare(iter.Key, last) > 0 { + aborted = true + break } - limit := increseKey(common.CopyBytes(last)) - - // Batch deletions together to avoid holding an iterator for too long - var batch = dl.diskdb.NewBatch() + count += 1 - // Iterate over the key-range and delete all of them - if err := result.forEach(func(key []byte, val []byte) error { - if err := batch.Delete(append(prefix, key...)); err != nil { - return err + // Delete all stale snapshot states in the front + var cmp int + for len(kvkeys) > 0 { + cmp := bytes.Compare(kvkeys[0], iter.Key) + if cmp >= 0 { + break } - if batch.ValueSize() > ethdb.IdealBatchSize { - if err := batch.Write(); err != nil { - return err - } - batch.Reset() + if err := onState(kvkeys[0], kvvals[0], false, true); err != nil { + return false, nil, err } - return nil - }); err != nil { - return false, nil, err + kvkeys = kvkeys[1:] + kvvals = kvvals[1:] + deleted += 1 } - if batch.ValueSize() > 0 { - if err := batch.Write(); err != nil { + // Create the missing snapshot states by trie + if len(kvkeys) == 0 || cmp > 0 { + created += 1 + if err := onState(iter.Key, iter.Value, true, false); err != nil { return false, nil, err } + continue } - wipedMeter.Mark(int64(len(result.keys))) - log.Debug("Wiped currupted state range", "kind", kind, "prefix", prefix, "origin", origin, "limit", limit) - } - var ( - count int - iter = trie.NewIterator(tr.NodeIterator(origin)) - ) - for iter.Next() { - if last != nil && bytes.Compare(iter.Key, last) > 0 { - log.Debug("Regenerated state range", "kind", kind, "prefix", prefix, "root", root, "origin", origin, "last", last, "count", count) - return false, last, nil // Apparently the trie is not exhausted - } - if err := onState(iter.Key, iter.Value, true); err != nil { - return false, nil, err + // Update the stale states by trie + if !bytes.Equal(kvvals[0], iter.Value) { + updated += 1 + if err := onState(iter.Key, iter.Value, true, false); err != nil { + return false, nil, err + } + } else { + // The "stale state" is actually not stale, skip it + untouched += 1 } - count += 1 + kvkeys = kvkeys[1:] + kvvals = kvvals[1:] } if iter.Err != nil { return false, nil, iter.Err } + // Delete all stale snapshot states behind + for i := 0; i < len(kvkeys); i++ { + if err := onState(kvkeys[i], kvvals[i], false, true); err != nil { + return false, nil, err + } + deleted += 1 + } log.Debug("Regenerated state range", "kind", kind, "prefix", prefix, "root", root, "origin", origin, "last", last, "count", count) - return true, nil, nil // The entire trie is exhausted + return !aborted, nil, nil // The entire trie is exhausted } // generate is a background thread that iterates over the state and storage tries, @@ -454,9 +459,13 @@ func (dl *diskLayer) generate(stats *generatorStats) { return nil } - onAccount := func(key []byte, val []byte, regen bool) error { - // Retrieve the current account and flatten it into the internal format + onAccount := func(key []byte, val []byte, write bool, delete bool) error { accountHash := common.BytesToHash(key) + if delete { + rawdb.DeleteAccountSnapshot(batch, accountHash) + return nil + } + // Retrieve the current account and flatten it into the internal format var acc struct { Nonce uint64 Balance *big.Int @@ -470,7 +479,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { // If the account is not yet in-progress, write it out if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) { - if regen { + if write { rawdb.WriteAccountSnapshot(batch, accountHash, data) snapGeneratedAccountMeter.Mark(1) } else { @@ -490,8 +499,13 @@ func (dl *diskLayer) generate(stats *generatorStats) { if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { storeMarker = dl.genMarker[common.HashLength:] } - onStorage := func(key []byte, val []byte, regen bool) error { - if regen { + onStorage := func(key []byte, val []byte, write bool, delete bool) error { + if delete { + rawdb.DeleteStorageSnapshot(batch, accountHash, common.BytesToHash(key)) + snapWipedStorageMeter.Mark(1) + return nil + } + if write { rawdb.WriteStorageSnapshot(batch, accountHash, common.BytesToHash(key), val) snapGeneratedStorageMeter.Mark(1) } else { @@ -499,6 +513,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { } stats.storage += common.StorageSize(1 + 2*common.HashLength + len(val)) stats.slots++ + // If we've exceeded our batch allowance or termination was requested, flush to disk if err := checkAndFlush(append(accountHash[:], key...)); err != nil { return err From 4539a8ff7ecb7138ea4d64b77192b6a80c1b4353 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 16 Mar 2021 21:02:40 +0800 Subject: [PATCH 32/75] core/state/snapshot: updates --- core/state/snapshot/generate.go | 21 +++++++++++---------- core/state/snapshot/generate_test.go | 1 - 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index af4d8cdbe8e8..07c7c8203e46 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -26,6 +26,7 @@ import ( "github.com/VictoriaMetrics/fastcache" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/common/math" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/crypto" @@ -306,10 +307,12 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig if err != nil { return false, nil, err } + last := result.last() + + // The range prover says the range is correct, skip trie iteration if result.valid() { - last := result.last() snapSuccessfulRangeProofMeter.Mark(1) - log.Debug("Proved state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last) + log.Debug("Proved state range", "kind", kind, "prefix", hexutil.Encode(prefix), "origin", hexutil.Encode(origin), "last", hexutil.Encode(last)) // The verification is passed, process each state with the given // callback function. If this state represents a contract, the @@ -317,13 +320,11 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, false, false) }); err != nil { return false, nil, err } - log.Debug("Recovered state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last, "count", len(result.keys)) - return !result.cont, result.last(), nil + log.Debug("Recovered state range", "kind", kind, "prefix", hexutil.Encode(prefix), "origin", hexutil.Encode(origin), "last", hexutil.Encode(last), "count", len(result.keys)) + return !result.cont, last, nil } snapFailedRangeProofMeter.Mark(1) - - last := result.last() - log.Debug("Detected outdated state range", "kind", kind, "prefix", prefix, "origin", origin, "last", last, "error", err) + log.Debug("Detected outdated state range", "kind", kind, "prefix", hexutil.Encode(prefix), "origin", hexutil.Encode(origin), "last", hexutil.Encode(last), "error", err) // Special case, the entire trie is missing. In the original trie scheme, // all the duplicated subtries will be filter out(only one copy of data @@ -359,7 +360,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig // Delete all stale snapshot states in the front var cmp int for len(kvkeys) > 0 { - cmp := bytes.Compare(kvkeys[0], iter.Key) + cmp = bytes.Compare(kvkeys[0], iter.Key) if cmp >= 0 { break } @@ -401,8 +402,8 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } deleted += 1 } - log.Debug("Regenerated state range", "kind", kind, "prefix", prefix, "root", root, "origin", origin, "last", last, "count", count) - return !aborted, nil, nil // The entire trie is exhausted + log.Debug("Regenerated state range", "kind", kind, "prefix", hexutil.Encode(prefix), "root", root, "origin", hexutil.Encode(origin), "last", hexutil.Encode(last), "count", count) + return !aborted, last, nil } // generate is a background thread that iterates over the state and storage tries, diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index aabf998d4f7e..5ac0164cda2a 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -235,7 +235,6 @@ func TestGenerateExistentStateWithWrongStorage(t *testing.T) { acc := &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) accTrie.Update([]byte("acc-5"), val) - val, _ = rlp.EncodeToBytes(acc) rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-5")), val) rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-1")), []byte("badval-1")) rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-2")), []byte("badval-2")) From 240e9d655f9370b006511f1082afdf245a65df3f Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Tue, 16 Mar 2021 21:38:02 +0800 Subject: [PATCH 33/75] core/state/snapshot: fixes --- core/state/snapshot/generate.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 07c7c8203e46..e998e9d82c93 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -388,6 +388,9 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } else { // The "stale state" is actually not stale, skip it untouched += 1 + if err := onState(iter.Key, iter.Value, false, false); err != nil { + return false, nil, err + } } kvkeys = kvkeys[1:] kvvals = kvvals[1:] From 2fd4ce2c569111c83878d658b210d493864b0dcb Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 15:18:18 +0100 Subject: [PATCH 34/75] core/state/snapshot: fix panic due to reference flaw in closure --- core/state/snapshot/conversion.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go index bb87ecddf189..2f27eac03230 100644 --- a/core/state/snapshot/conversion.go +++ b/core/state/snapshot/conversion.go @@ -322,7 +322,7 @@ func generateTrieRoot(db ethdb.KeyValueWriter, it Iterator, account common.Hash, return } if !bytes.Equal(account.Root, subroot.Bytes()) { - results <- fmt.Errorf("invalid subroot(%x), want %x, got %x", it.Hash(), account.Root, subroot) + results <- fmt.Errorf("invalid subroot(%x), want %x, got %x", hash, account.Root, subroot) return } results <- nil From b221f1a950a0cf2e0af70ff23da693993b16f40b Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 15:20:28 +0100 Subject: [PATCH 35/75] core/state/snapshot: fix errors in state generation logic + fix log output --- core/state/snapshot/generate.go | 75 +++++++++++++--------------- core/state/snapshot/generate_test.go | 13 +++-- 2 files changed, 43 insertions(+), 45 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index e998e9d82c93..4219920a372d 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -310,9 +310,14 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig last := result.last() // The range prover says the range is correct, skip trie iteration + logCtx := []interface{}{"kind", kind, "prefix", hexutil.Encode(prefix)} + if len(origin) > 0 { + logCtx = append(logCtx, "origin", hexutil.Encode(origin)) + } + logger := log.New(logCtx...) if result.valid() { snapSuccessfulRangeProofMeter.Mark(1) - log.Debug("Proved state range", "kind", kind, "prefix", hexutil.Encode(prefix), "origin", hexutil.Encode(origin), "last", hexutil.Encode(last)) + logger.Debug("Proved state range", "last", hexutil.Encode(last)) // The verification is passed, process each state with the given // callback function. If this state represents a contract, the @@ -320,11 +325,10 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, false, false) }); err != nil { return false, nil, err } - log.Debug("Recovered state range", "kind", kind, "prefix", hexutil.Encode(prefix), "origin", hexutil.Encode(origin), "last", hexutil.Encode(last), "count", len(result.keys)) return !result.cont, last, nil } + logger.Debug("Detected outdated state range", "last", hexutil.Encode(last), "error", err) snapFailedRangeProofMeter.Mark(1) - log.Debug("Detected outdated state range", "kind", kind, "prefix", hexutil.Encode(prefix), "origin", hexutil.Encode(origin), "last", hexutil.Encode(last), "error", err) // Special case, the entire trie is missing. In the original trie scheme, // all the duplicated subtries will be filter out(only one copy of data @@ -351,62 +355,53 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig untouched = 0 // states already correct ) for iter.Next() { - if last != nil && bytes.Compare(iter.Key, last) > 0 { + if count == max { + // Don't keep iterating indefinitely aborted = true break } count += 1 - + write := true // Delete all stale snapshot states in the front - var cmp int for len(kvkeys) > 0 { - cmp = bytes.Compare(kvkeys[0], iter.Key) - if cmp >= 0 { - break - } - if err := onState(kvkeys[0], kvvals[0], false, true); err != nil { - return false, nil, err - } - kvkeys = kvkeys[1:] - kvvals = kvvals[1:] - deleted += 1 - } - // Create the missing snapshot states by trie - if len(kvkeys) == 0 || cmp > 0 { - created += 1 - if err := onState(iter.Key, iter.Value, true, false); err != nil { - return false, nil, err + if cmp := bytes.Compare(kvkeys[0], iter.Key); cmp < 0 { + // delete the key + if err := onState(kvkeys[0], kvvals[0], false, true); err != nil { + return false, nil, err + } + kvkeys = kvkeys[1:] + kvvals = kvvals[1:] + deleted += 1 + } else if cmp == 0 { + // the snapshot key can be overwritten + if write = !bytes.Equal(kvvals[0], iter.Value); write { + updated++ + } else { + untouched++ + } + kvkeys = kvkeys[1:] + kvvals = kvvals[1:] } - continue + break } - // Update the stale states by trie - if !bytes.Equal(kvvals[0], iter.Value) { - updated += 1 - if err := onState(iter.Key, iter.Value, true, false); err != nil { - return false, nil, err - } - } else { - // The "stale state" is actually not stale, skip it - untouched += 1 - if err := onState(iter.Key, iter.Value, false, false); err != nil { - return false, nil, err - } + if err := onState(iter.Key, iter.Value, write, false); err != nil { + return false, nil, err } - kvkeys = kvkeys[1:] - kvvals = kvvals[1:] + last = common.CopyBytes(iter.Key) } if iter.Err != nil { return false, nil, iter.Err } - // Delete all stale snapshot states behind + // Delete all stale snapshot states remaining for i := 0; i < len(kvkeys); i++ { if err := onState(kvkeys[i], kvvals[i], false, true); err != nil { return false, nil, err } deleted += 1 } - log.Debug("Regenerated state range", "kind", kind, "prefix", hexutil.Encode(prefix), "root", root, "origin", hexutil.Encode(origin), "last", hexutil.Encode(last), "count", count) - return !aborted, last, nil + logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last), + "count", count, "created", created, "updated", updated, "deleted", deleted, "untouched", untouched) + return !aborted, last, nil // The entire trie is exhausted } // generate is a background thread that iterates over the state and storage tries, diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 5ac0164cda2a..bea0d4e6ee96 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -17,7 +17,9 @@ package snapshot import ( + "github.com/ethereum/go-ethereum/log" "math/big" + "os" "testing" "time" @@ -173,7 +175,7 @@ func checkSnapRoot(t *testing.T, snap *diskLayer, trieRoot common.Hash) { // - the contract(non-empty storage) misses some storage slots // - the contract(non-empty storage) has wrong storage slots func TestGenerateExistentStateWithWrongStorage(t *testing.T) { - //log.Root().SetHandler(log.LvlFilterHandler(log.LvlInfo, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) // We can't use statedb to make a test trie (circular dependency), so make // a fake one manually. We're going with a small account trie of 3 accounts, @@ -231,14 +233,15 @@ func TestGenerateExistentStateWithWrongStorage(t *testing.T) { } { // Account five - // This account has the wrong storage slot + // This account has the wrong storage slot - they've been rotated. + // This test that the update-or-replace check works acc := &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) accTrie.Update([]byte("acc-5"), val) rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-5")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-1")), []byte("badval-1")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-2")), []byte("badval-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-3")), []byte("badval-3")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-1")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-2")), []byte("val-3")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-3")), []byte("val-1")) } root, _ := accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd From 6c93d5a638d770c0fe0c206d99e5cacdc3a714ae Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 16:58:05 +0100 Subject: [PATCH 36/75] core/state/snapshot: remove an error case --- core/state/snapshot/generate.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 4219920a372d..f83e7e228a9b 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -287,9 +287,10 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, } // Range prover says the trie still has some elements on the right side but // the database is exhausted, then data loss is detected. - if cont && len(keys) < max { - return &proofResult{keys: keys, vals: vals, cont: false, proofErr: errors.New("data loss in the state range")}, nil - } + // TODO: Investigate if this is needed (the assumption is that it's not needed) + //if cont && len(keys) < max { + //return &proofResult{keys: keys, vals: vals, cont: true, proofErr: nil}, nil + //} return &proofResult{keys: keys, vals: vals, cont: cont, proofErr: nil}, nil } From 2cf335c1822fd8ea3174d1bbd26a6f684f77a4eb Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 18:39:56 +0100 Subject: [PATCH 37/75] core/state/snapshot: fix condition-check for exhausted snap state --- core/state/snapshot/generate.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index f83e7e228a9b..eb126dd052ef 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -229,17 +229,21 @@ func (result *proofResult) forEach(callback func(key []byte, val []byte) error) // the error will be returned to abort the entire procedure. func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) { var ( - keys [][]byte - vals [][]byte - proof = rawdb.NewMemoryDatabase() + keys [][]byte + vals [][]byte + proof = rawdb.NewMemoryDatabase() + aborted = false ) iter := dl.diskdb.NewIterator(prefix, origin) defer iter.Release() - for iter.Next() && len(keys) < max { + for iter.Next() { key := iter.Key() if len(key) != len(prefix)+common.HashLength { - continue + panic("remove this panic later on") + } + if len(keys) == max { + aborted = true } keys = append(keys, common.CopyBytes(key[len(prefix):])) @@ -255,7 +259,7 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, vals = append(vals, val) } // The snap state is exhausted, pass the entire key/val set for verification - if origin == nil && len(keys) == max { + if origin == nil && !aborted { _, _, _, _, err := trie.VerifyRangeProof(root, nil, nil, keys, vals, nil) return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err}, nil } From f25852ef7cdc52b468d00a3f12e858ffa851a383 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 18:45:05 +0100 Subject: [PATCH 38/75] core/state/snapshot: use stackTrie for small tries --- core/state/snapshot/generate.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index eb126dd052ef..c91d268196d5 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -260,8 +260,14 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, } // The snap state is exhausted, pass the entire key/val set for verification if origin == nil && !aborted { - _, _, _, _, err := trie.VerifyRangeProof(root, nil, nil, keys, vals, nil) - return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err}, nil + stackTr := trie.NewStackTrie(nil) + for i, key := range keys { + stackTr.TryUpdate(key, common.CopyBytes(vals[i])) + } + if gotRoot := stackTr.Hash(); gotRoot != root { + return &proofResult{keys: keys, vals: vals, cont: false, proofErr: errors.New("wrong root")}, nil + } + return &proofResult{keys: keys, vals: vals, cont: false, proofErr: nil}, nil } // Snap state is chunked, generate edge proofs for verification. // Firstly find out the key of last iterated element. From dab43b34a148944986f9e65cf25398180d4bff36 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 18:56:11 +0100 Subject: [PATCH 39/75] core/state/snapshot: don't resolve small storage tries in vain --- core/state/snapshot/generate.go | 39 ++++++++++++++++------------ core/state/snapshot/generate_test.go | 4 ++- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index c91d268196d5..ea295779a855 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -184,10 +184,11 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta // proofResult contains the output of range proving which can be used // for further processing no matter it's successful or not. type proofResult struct { - keys [][]byte // The key set of all elements being iterated, even proving is failed - vals [][]byte // The val set of all elements being iterated, even proving is failed - cont bool // Indicator if there exists more elements in the range, only meaningful when proving is successful - proofErr error // Indicator whether the given state range is valid or not + keys [][]byte // The key set of all elements being iterated, even proving is failed + vals [][]byte // The val set of all elements being iterated, even proving is failed + cont bool // Indicator if there exists more elements in the range, only meaningful when proving is successful + proofErr error // Indicator whether the given state range is valid or not + tr *trie.Trie // The trie, in case the trie was resolved by the prover (may be nil) } // valid returns the indicator that range proof is successful or not. @@ -227,7 +228,7 @@ func (result *proofResult) forEach(callback func(key []byte, val []byte) error) // // The proof result will be returned if the range proving is finished, otherwise // the error will be returned to abort the entire procedure. -func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) { +func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) { var ( keys [][]byte vals [][]byte @@ -236,7 +237,6 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, ) iter := dl.diskdb.NewIterator(prefix, origin) defer iter.Release() - for iter.Next() { key := iter.Key() if len(key) != len(prefix)+common.HashLength { @@ -269,6 +269,11 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, } return &proofResult{keys: keys, vals: vals, cont: false, proofErr: nil}, nil } + tr, err := trie.New(root, dl.triedb) + if err != nil { + log.Error("Missing trie", "root", root, "err", err) + return nil, err + } // Snap state is chunked, generate edge proofs for verification. // Firstly find out the key of last iterated element. var last []byte @@ -281,19 +286,19 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, } if err := tr.Prove(origin, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "origin", origin, "err", err) - return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err}, nil + return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err, tr: tr}, nil } if last != nil { if err := tr.Prove(last, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "last", last, "err", err) - return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err}, nil + return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err, tr: tr}, nil } } // Verify the state segment with range prover, ensure that all flat states // in this range correspond to merkle trie. _, _, _, cont, err := trie.VerifyRangeProof(root, origin, last, keys, vals, proof) if err != nil { - return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err}, nil + return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err, tr: tr}, nil } // Range prover says the trie still has some elements on the right side but // the database is exhausted, then data loss is detected. @@ -301,20 +306,15 @@ func (dl *diskLayer) proveRange(root common.Hash, tr *trie.Trie, prefix []byte, //if cont && len(keys) < max { //return &proofResult{keys: keys, vals: vals, cont: true, proofErr: nil}, nil //} - return &proofResult{keys: keys, vals: vals, cont: cont, proofErr: nil}, nil + return &proofResult{keys: keys, vals: vals, cont: cont, proofErr: nil, tr: tr}, nil } // genRange generates the state segment with particular prefix. Generation can // either verify the correctness of existing state through rangeproof and skip // generation, or iterate trie to regenerate state on demand. func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, write bool, delete bool) error, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { - tr, err := trie.New(root, dl.triedb) - if err != nil { - stats.Log("Trie missing, state snapshotting paused", root, dl.genMarker) - return false, nil, err - } // Use range prover to check the validity of the flat state in the range - result, err := dl.proveRange(root, tr, prefix, kind, origin, max, valueConvertFn) + result, err := dl.proveRange(root, prefix, kind, origin, max, valueConvertFn) if err != nil { return false, nil, err } @@ -353,6 +353,13 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } meter.Mark(1) } + tr := result.tr + if tr == nil { + tr, err = trie.New(root, dl.triedb) + if err != nil { + return false, nil, err + } + } var ( aborted bool iter = trie.NewIterator(tr.NodeIterator(origin)) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index bea0d4e6ee96..10c3d4f5e655 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -175,7 +175,9 @@ func checkSnapRoot(t *testing.T, snap *diskLayer, trieRoot common.Hash) { // - the contract(non-empty storage) misses some storage slots // - the contract(non-empty storage) has wrong storage slots func TestGenerateExistentStateWithWrongStorage(t *testing.T) { - log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + if false { + log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + } // We can't use statedb to make a test trie (circular dependency), so make // a fake one manually. We're going with a small account trie of 3 accounts, From 2fce60d39cb8fd375a269f626a5a975653123e58 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 19:29:32 +0100 Subject: [PATCH 40/75] core/state/snapshot: properly clean up storage of deleted accounts --- core/state/snapshot/generate.go | 13 ++++-- core/state/snapshot/generate_test.go | 62 ++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index ea295779a855..28326c3e79a6 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -240,6 +240,7 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or for iter.Next() { key := iter.Key() if len(key) != len(prefix)+common.HashLength { + // TODO! Why is this check neeed? panic("remove this panic later on") } if len(keys) == max { @@ -384,7 +385,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig for len(kvkeys) > 0 { if cmp := bytes.Compare(kvkeys[0], iter.Key); cmp < 0 { // delete the key - if err := onState(kvkeys[0], kvvals[0], false, true); err != nil { + if err := onState(kvkeys[0], nil, false, true); err != nil { return false, nil, err } kvkeys = kvkeys[1:] @@ -411,8 +412,8 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig return false, nil, iter.Err } // Delete all stale snapshot states remaining - for i := 0; i < len(kvkeys); i++ { - if err := onState(kvkeys[i], kvvals[i], false, true); err != nil { + for _, key := range kvkeys { + if err := onState(key, nil, false, true); err != nil { return false, nil, err } deleted += 1 @@ -480,6 +481,12 @@ func (dl *diskLayer) generate(stats *generatorStats) { accountHash := common.BytesToHash(key) if delete { rawdb.DeleteAccountSnapshot(batch, accountHash) + // We also need to ensure that any previous snapshot + prefix := append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...) + keyLen := len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength + if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { + return err + } return nil } // Retrieve the current account and flatten it into the internal format diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 10c3d4f5e655..edb487dedd71 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -17,6 +17,7 @@ package snapshot import ( + "fmt" "github.com/ethereum/go-ethereum/log" "math/big" "os" @@ -426,3 +427,64 @@ func TestGenerateCorruptStorageTrie(t *testing.T) { snap.genAbort <- stop <-stop } + +func getStorageTrie(n int, triedb *trie.Database) *trie.SecureTrie { + stTrie, _ := trie.NewSecure(common.Hash{}, triedb) + for i := 0; i < n; i++ { + k := fmt.Sprintf("key-%d", i) + v := fmt.Sprintf("val-%d", i) + stTrie.Update([]byte(k), []byte(v)) + } + stTrie.Commit(nil) + return stTrie +} + +// Tests that snapshot generation when an extra account with storage exists in the snap state. +func TestGenerateWithExtraAccounts(t *testing.T) { + + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + stTrie = getStorageTrie(5, triedb) + ) + accTrie, _ := trie.NewSecure(common.Hash{}, triedb) + { // Account one in the trie + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + // Identical in the snap + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-1")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-3")), []byte("val-3")) + } + { // Account two exists only in the snapshot + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-2")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1")), []byte("b-val-1")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-2")), []byte("b-val-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-3")), []byte("b-val-3")) + } + root, _ := accTrie.Commit(nil) + t.Logf("root: %x", root) + triedb.Commit(root, false, nil) + + snap := generateSnapshot(diskdb, triedb, 16, root) + select { + case <-snap.genPending: + // Snapshot generation succeeded + + case <-time.After(250 * time.Millisecond): + t.Errorf("Snapshot generation failed") + } + checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop + // If we now inspect the snap db, there should exist no extraneous storage items + if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { + t.Fatalf("expected slot to be removed, got %v", string(data)) + } +} From 148bde6619556acf93494245fedbe84c9283c2a4 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 19:52:48 +0100 Subject: [PATCH 41/75] core/state/snapshot: avoid RLP-encoding in some cases + minor nitpicks --- core/state/snapshot/generate.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 28326c3e79a6..b74f58d84451 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -499,17 +499,18 @@ func (dl *diskLayer) generate(stats *generatorStats) { if err := rlp.DecodeBytes(val, &acc); err != nil { log.Crit("Invalid account encountered during snapshot creation", "err", err) } - data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash) - // If the account is not yet in-progress, write it out if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) { - if write { + dataLen := len(val) // Approximate size, saves us a round of RLP-encoding + if !write { + snapRecoveredAccountMeter.Mark(1) + } else { + data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash) + dataLen = len(data) rawdb.WriteAccountSnapshot(batch, accountHash, data) snapGeneratedAccountMeter.Mark(1) - } else { - snapRecoveredAccountMeter.Mark(1) } - stats.storage += common.StorageSize(1 + common.HashLength + len(data)) + stats.storage += common.StorageSize(1 + common.HashLength + dataLen) stats.accounts++ } // If we've exceeded our batch allowance or termination was requested, flush to disk @@ -553,8 +554,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { if exhausted { break } - storeOrigin = increseKey(last) - if storeOrigin == nil { + if storeOrigin = increaseKey(last); storeOrigin == nil { break // special case, the last is 0xffffffff...fff } } @@ -590,8 +590,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { if exhausted { break } - accOrigin = increseKey(last) - if accOrigin == nil { + if accOrigin = increaseKey(last); accOrigin == nil { break // special case, the last is 0xffffffff...fff } accountRange = accountCheckRange @@ -622,9 +621,9 @@ func (dl *diskLayer) generate(stats *generatorStats) { abort <- nil } -// increseKey increase the input key by one bit. Return nil if the entire +// increaseKey increase the input key by one bit. Return nil if the entire // addition operation overflows, -func increseKey(key []byte) []byte { +func increaseKey(key []byte) []byte { for i := len(key) - 1; i >= 0; i-- { key[i]++ if key[i] != 0x0 { From c24ef635132fda1ebcd9093773ada60865dc5eba Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 19:58:50 +0100 Subject: [PATCH 42/75] core/state/snapshot: fix error (+testcase) --- core/state/snapshot/generate.go | 1 + core/state/snapshot/generate_test.go | 50 ++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index b74f58d84451..cda7c7c06b61 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -391,6 +391,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig kvkeys = kvkeys[1:] kvvals = kvvals[1:] deleted += 1 + continue } else if cmp == 0 { // the snapshot key can be overwritten if write = !bytes.Equal(kvvals[0], iter.Value); write { diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index edb487dedd71..42756116c44d 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -488,3 +488,53 @@ func TestGenerateWithExtraAccounts(t *testing.T) { t.Fatalf("expected slot to be removed, got %v", string(data)) } } + +// Tests that snapshot generation when an extra account with storage exists in the snap state. +func TestGenerateWithManyExtraAccounts(t *testing.T) { + + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + stTrie = getStorageTrie(5, triedb) + ) + accTrie, _ := trie.NewSecure(common.Hash{}, triedb) + { // Account one in the trie + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + // Identical in the snap + rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-1")), val) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-3")), []byte("val-3")) + } + { // 100 accounts exist only in snapshot + for i := 0; i < 100; i++ { + acc := &Account{Balance: big.NewInt(int64(i)), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + key := hashData([]byte(fmt.Sprintf("acc-%d", i))) + rawdb.WriteAccountSnapshot(diskdb, key, val) + } + } + root, _ := accTrie.Commit(nil) + t.Logf("root: %x", root) + triedb.Commit(root, false, nil) + + snap := generateSnapshot(diskdb, triedb, 16, root) + select { + case <-snap.genPending: + // Snapshot generation succeeded + + case <-time.After(250 * time.Millisecond): + t.Errorf("Snapshot generation failed") + } + checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop + // If we now inspect the snap db, there should exist no extraneous storage items + if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { + t.Fatalf("expected slot to be removed, got %v", string(data)) + } +} From 2625274bb1d2a04185f5a8d4505968c2bf23d2a2 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 20:06:11 +0100 Subject: [PATCH 43/75] core/state/snapshot: clean up tests a bit --- core/state/snapshot/generate_test.go | 31 ++++++++++++++++------------ 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 42756116c44d..9ea7104ccf49 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -453,18 +453,22 @@ func TestGenerateWithExtraAccounts(t *testing.T) { val, _ := rlp.EncodeToBytes(acc) accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e // Identical in the snap - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-1")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-1")), []byte("val-1")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-2")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-3")), []byte("val-3")) + key := hashData([]byte("acc-1")) + rawdb.WriteAccountSnapshot(diskdb, key, val) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-3")), []byte("val-3")) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-4")), []byte("val-4")) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-5")), []byte("val-5")) } { // Account two exists only in the snapshot acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-2")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1")), []byte("b-val-1")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-2")), []byte("b-val-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-3")), []byte("b-val-3")) + key := hashData([]byte("acc-2")) + rawdb.WriteAccountSnapshot(diskdb, key, val) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("b-key-1")), []byte("b-val-1")) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("b-key-2")), []byte("b-val-2")) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("b-key-3")), []byte("b-val-3")) } root, _ := accTrie.Commit(nil) t.Logf("root: %x", root) @@ -495,7 +499,7 @@ func TestGenerateWithManyExtraAccounts(t *testing.T) { var ( diskdb = memorydb.New() triedb = trie.NewDatabase(diskdb) - stTrie = getStorageTrie(5, triedb) + stTrie = getStorageTrie(3, triedb) ) accTrie, _ := trie.NewSecure(common.Hash{}, triedb) { // Account one in the trie @@ -503,10 +507,11 @@ func TestGenerateWithManyExtraAccounts(t *testing.T) { val, _ := rlp.EncodeToBytes(acc) accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e // Identical in the snap - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-1")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-1")), []byte("val-1")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-2")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-3")), []byte("val-3")) + key := hashData([]byte("acc-1")) + rawdb.WriteAccountSnapshot(diskdb, key, val) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-3")), []byte("val-3")) } { // 100 accounts exist only in snapshot for i := 0; i < 100; i++ { From ac645a12ee52b59c1612e3c993e9814a030f6b7d Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 16 Mar 2021 22:48:09 +0100 Subject: [PATCH 44/75] core/state/snapshot: work in progress on better tests --- core/state/snapshot/generate_test.go | 164 +++++++++++++++------------ 1 file changed, 91 insertions(+), 73 deletions(-) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 9ea7104ccf49..400cce7052cc 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -18,9 +18,7 @@ package snapshot import ( "fmt" - "github.com/ethereum/go-ethereum/log" "math/big" - "os" "testing" "time" @@ -170,88 +168,108 @@ func checkSnapRoot(t *testing.T, snap *diskLayer, trieRoot common.Hash) { } } -// Tests that snapshot generation with existent flat state, where the flat state contains -// some errors: -// - the contract with empty storage root but has storage entries in the disk -// - the contract(non-empty storage) misses some storage slots -// - the contract(non-empty storage) has wrong storage slots -func TestGenerateExistentStateWithWrongStorage(t *testing.T) { - if false { - log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) - } - - // We can't use statedb to make a test trie (circular dependency), so make - // a fake one manually. We're going with a small account trie of 3 accounts, - // two of which also has the same 3-slot storage trie attached. - var ( - diskdb = memorydb.New() - triedb = trie.NewDatabase(diskdb) - ) - stTrie, _ := trie.NewSecure(common.Hash{}, triedb) - stTrie.Update([]byte("key-1"), []byte("val-1")) - stTrie.Update([]byte("key-2"), []byte("val-2")) - stTrie.Update([]byte("key-3"), []byte("val-3")) - stTrie.Commit(nil) +type testHelper struct { + diskdb *memorydb.Database + triedb *trie.Database + accTrie *trie.SecureTrie +} +func newHelper() *testHelper { + diskdb := memorydb.New() + triedb := trie.NewDatabase(diskdb) accTrie, _ := trie.NewSecure(common.Hash{}, triedb) - - { // Account one, miss storage slots in the end(key-3) - acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-1"), val) - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-1")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-1")), []byte("val-1")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-1")), hashData([]byte("key-2")), []byte("val-2")) + return &testHelper{ + diskdb: diskdb, + triedb: triedb, + accTrie: accTrie, } - { // Account two, miss storage slots in the beginning(key-1) - acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-2"), val) - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-2")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("key-2")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("key-3")), []byte("val-3")) +} + +// addAccount adds an account to the trie and snapshot, and return t +func (t *testHelper) addAccount(accPreKey string, acc *Account) { + val, _ := rlp.EncodeToBytes(acc) + // Add to trie + t.accTrie.Update([]byte(accPreKey), val) + key := hashData([]byte(accPreKey)) + // Add account to snapshot + rawdb.WriteAccountSnapshot(t.diskdb, key, val) +} + +func (t *testHelper) addSnapStorage(accPreKey string, slotKeys []string, slotVals []string) { + key := hashData([]byte(accPreKey)) + // Add any storage slots + for i, sKey := range slotKeys { + sVal := []byte(slotVals[i]) + rawdb.WriteStorageSnapshot(t.diskdb, key, hashData([]byte(sKey)), sVal) } - { // Account three - // The storage root is emptyHash, but the flat db has some storage values. This can happen - // if the storage was unset during sync - acc := &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} - val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-3"), val) - diskdb.Put(hashData([]byte("acc-3")).Bytes(), val) - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-3")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-3")), hashData([]byte("key-1")), []byte("val-1")) +} + +func (t *testHelper) writeSnapAccount(accPreKey string, acc *Account) { + val, _ := rlp.EncodeToBytes(acc) + key := hashData([]byte(accPreKey)) + rawdb.WriteAccountSnapshot(t.diskdb, key, val) +} + +func (t *testHelper) makeStorageTrie(keys []string, values []string) []byte { + stTrie, _ := trie.NewSecure(common.Hash{}, t.triedb) + for i, k := range keys { + stTrie.Update([]byte(k), []byte(values[i])) + } + root, _ := stTrie.Commit(nil) + return root.Bytes() +} - { // Account four - // This account changed codehash - acc := &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-4"), val) +func (t *testHelper) Generate() (common.Hash, *diskLayer) { + root, _ := t.accTrie.Commit(nil) + t.triedb.Commit(root, false, nil) + snap := generateSnapshot(t.diskdb, t.triedb, 16, root) + return root, snap +} + +// Tests that snapshot generation with existent flat state, where the flat state contains +// some errors: +// - the contract with empty storage root but has storage entries in the disk +// - the contract(non-empty storage) misses some storage slots +// - the contract(non-empty storage) has wrong storage slots +func TestGenerateExistentStateWithWrongStorage(t *testing.T) { + //log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + + helper := newHelper() + stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + // Account one, miss storage slots in the end(key-3) + helper.addAccount("acc-1", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-1", []string{"key-1", "key-2"}, []string{"val-1", "val-2"}) + + // Account two, miss storage slots in the beginning(key-1) + helper.addAccount("acc-2", &Account{Balance: big.NewInt(2), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-2", []string{"key-2", "key-3"}, []string{"val-2", "val-3"}) + + // Account three + // The storage root is emptyHash, but the flat db has some storage values. This can happen + // if the storage was unset during sync + helper.addAccount("acc-3", &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-3", []string{"key-1"}, []string{"val-1"}) + + // Account four has a modified codehash + { + acc := &Account{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()} + helper.addAccount("acc-4", acc) + helper.addSnapStorage("acc-4", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + // Overwrite the codehash in the snapdata acc.CodeHash = hashData([]byte("codez")).Bytes() - val, _ = rlp.EncodeToBytes(acc) - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-4")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-4")), hashData([]byte("key-1")), []byte("val-1")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-4")), hashData([]byte("key-2")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-4")), hashData([]byte("key-3")), []byte("val-3")) + helper.writeSnapAccount("acc-4", acc) } - { // Account five - // This account has the wrong storage slot - they've been rotated. - // This test that the update-or-replace check works - acc := &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} - val, _ := rlp.EncodeToBytes(acc) - accTrie.Update([]byte("acc-5"), val) - rawdb.WriteAccountSnapshot(diskdb, hashData([]byte("acc-5")), val) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-1")), []byte("val-2")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-2")), []byte("val-3")) - rawdb.WriteStorageSnapshot(diskdb, hashData([]byte("acc-5")), hashData([]byte("key-3")), []byte("val-1")) - } + // Account 5 has wrong storage slot values - they've been rotated. + // This test that the update-or-replace check works + helper.addAccount("acc-5", &Account{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-5", []string{"key-1", "key-2", "key-3"}, []string{"val-2", "val-3", "val-1"}) - root, _ := accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd - t.Logf("Root: %#x\n", root) - triedb.Commit(root, false, nil) + root, snap := helper.Generate() + t.Logf("Root: %#x\n", root) // Root: 0x3a97ece15e2539ab3524783c37ca153a62e28faba76a752e826da24a9020d44f - snap := generateSnapshot(diskdb, triedb, 16, root) select { case <-snap.genPending: // Snapshot generation succeeded From 823d0c5fe359ad7610a2528362ebaa66a9334428 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 17 Mar 2021 10:50:39 +0800 Subject: [PATCH 45/75] core/state/snapshot: polish code --- core/state/snapshot/generate.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index cda7c7c06b61..811cb91016b6 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -254,13 +254,14 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or } val, err := valueConvertFn(iter.Value()) if err != nil { + // TODO! The corrupted slim state should somehow be recovered log.Debug("Failed to convert the flat state", "kind", kind, "key", common.BytesToHash(key[len(prefix):]), "error", err) return nil, err } vals = append(vals, val) } // The snap state is exhausted, pass the entire key/val set for verification - if origin == nil && !aborted { + if origin == nil && aborted { stackTr := trie.NewStackTrie(nil) for i, key := range keys { stackTr.TryUpdate(key, common.CopyBytes(vals[i])) @@ -301,12 +302,6 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or if err != nil { return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err, tr: tr}, nil } - // Range prover says the trie still has some elements on the right side but - // the database is exhausted, then data loss is detected. - // TODO: Investigate if this is needed (the assumption is that it's not needed) - //if cont && len(keys) < max { - //return &proofResult{keys: keys, vals: vals, cont: true, proofErr: nil}, nil - //} return &proofResult{keys: keys, vals: vals, cont: cont, proofErr: nil, tr: tr}, nil } @@ -321,12 +316,14 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } last := result.last() - // The range prover says the range is correct, skip trie iteration + // Construct contextual logger logCtx := []interface{}{"kind", kind, "prefix", hexutil.Encode(prefix)} if len(origin) > 0 { logCtx = append(logCtx, "origin", hexutil.Encode(origin)) } logger := log.New(logCtx...) + + // The range prover says the range is correct, skip trie iteration if result.valid() { snapSuccessfulRangeProofMeter.Mark(1) logger.Debug("Proved state range", "last", hexutil.Encode(last)) @@ -380,8 +377,8 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig break } count += 1 + write := true - // Delete all stale snapshot states in the front for len(kvkeys) > 0 { if cmp := bytes.Compare(kvkeys[0], iter.Key); cmp < 0 { // delete the key @@ -482,7 +479,9 @@ func (dl *diskLayer) generate(stats *generatorStats) { accountHash := common.BytesToHash(key) if delete { rawdb.DeleteAccountSnapshot(batch, accountHash) - // We also need to ensure that any previous snapshot + snapWipedAccountMeter.Mark(1) + + // Ensure that any previous snapshot storage values are cleared prefix := append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...) keyLen := len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { From 5a36506439138f8a36f31f7e0c2f0de07b1f14e0 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 17 Mar 2021 12:44:59 +0800 Subject: [PATCH 46/75] core/state/snapshot: fix trie iteration abortion trigger --- core/state/snapshot/generate.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 811cb91016b6..c2cffc1b508c 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -371,8 +371,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig untouched = 0 // states already correct ) for iter.Next() { - if count == max { - // Don't keep iterating indefinitely + if last != nil && bytes.Compare(iter.Key, last) > 0 { aborted = true break } @@ -404,7 +403,6 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig if err := onState(iter.Key, iter.Value, write, false); err != nil { return false, nil, err } - last = common.CopyBytes(iter.Key) } if iter.Err != nil { return false, nil, iter.Err From 664615fc8dc7fe5b78610497bd499b14a5d7f3a8 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 17 Mar 2021 13:16:26 +0800 Subject: [PATCH 47/75] core/state/snapshot: fixes flaws --- core/state/snapshot/generate.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index c2cffc1b508c..678295ad0bf1 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -237,6 +237,7 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or ) iter := dl.diskdb.NewIterator(prefix, origin) defer iter.Release() + for iter.Next() { key := iter.Key() if len(key) != len(prefix)+common.HashLength { @@ -254,14 +255,19 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or } val, err := valueConvertFn(iter.Value()) if err != nil { - // TODO! The corrupted slim state should somehow be recovered - log.Debug("Failed to convert the flat state", "kind", kind, "key", common.BytesToHash(key[len(prefix):]), "error", err) - return nil, err + // Sepcial case, the state data is corrupted(invalid slim-format account), + // don't abort the entire procedure directly. Instead, let the fallback + // generation to heal the invalid data. + // + // Here append the original value to ensure that the number of key and + // value are the same. + vals = append(vals, common.CopyBytes(iter.Value())) + continue } vals = append(vals, val) } // The snap state is exhausted, pass the entire key/val set for verification - if origin == nil && aborted { + if origin == nil && !aborted { stackTr := trie.NewStackTrie(nil) for i, key := range keys { stackTr.TryUpdate(key, common.CopyBytes(vals[i])) @@ -271,12 +277,12 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or } return &proofResult{keys: keys, vals: vals, cont: false, proofErr: nil}, nil } + // Snap state is chunked, generate edge proofs for verification. tr, err := trie.New(root, dl.triedb) if err != nil { log.Error("Missing trie", "root", root, "err", err) return nil, err } - // Snap state is chunked, generate edge proofs for verification. // Firstly find out the key of last iterated element. var last []byte if len(keys) > 0 { From 95118e7d9871dd690b10e01496e978fbebbdc29f Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 17 Mar 2021 13:36:47 +0800 Subject: [PATCH 48/75] core/state/snapshot: remove panic --- core/state/snapshot/generate.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 678295ad0bf1..7fada2eb6898 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -241,8 +241,7 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or for iter.Next() { key := iter.Key() if len(key) != len(prefix)+common.HashLength { - // TODO! Why is this check neeed? - panic("remove this panic later on") + continue } if len(keys) == max { aborted = true @@ -422,7 +421,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last), "count", count, "created", created, "updated", updated, "deleted", deleted, "untouched", untouched) - return !aborted, last, nil // The entire trie is exhausted + return !aborted, last, nil } // generate is a background thread that iterates over the state and storage tries, From bba0f1c60b7d20f608c5a65ee1ed7431319b7b3c Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 17 Mar 2021 14:32:06 +0800 Subject: [PATCH 49/75] core/state/snapshot: fix abort --- core/state/snapshot/generate.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 7fada2eb6898..4e65e7fb0d60 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -245,6 +245,7 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or } if len(keys) == max { aborted = true + break } keys = append(keys, common.CopyBytes(key[len(prefix):])) @@ -341,7 +342,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } return !result.cont, last, nil } - logger.Debug("Detected outdated state range", "last", hexutil.Encode(last), "error", err) + logger.Debug("Detected outdated state range", "last", hexutil.Encode(last), "error", result.proofErr) snapFailedRangeProofMeter.Mark(1) // Special case, the entire trie is missing. In the original trie scheme, @@ -420,7 +421,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig deleted += 1 } logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last), - "count", count, "created", created, "updated", updated, "deleted", deleted, "untouched", untouched) + "count", count, "created", created, "updated", updated, "untouched", untouched, "deleted", deleted) return !aborted, last, nil } From 3c643895524692a4e13742ff28360523fa33035a Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 17 Mar 2021 08:40:27 +0100 Subject: [PATCH 50/75] core/state/snapshot: more tests (plus failing testcase) --- core/state/snapshot/generate_test.go | 71 ++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 400cce7052cc..5ea1beedd17e 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -19,6 +19,7 @@ package snapshot import ( "fmt" "math/big" + "os" "testing" "time" @@ -26,6 +27,7 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "golang.org/x/crypto/sha3" @@ -233,7 +235,6 @@ func (t *testHelper) Generate() (common.Hash, *diskLayer) { // - the contract(non-empty storage) misses some storage slots // - the contract(non-empty storage) has wrong storage slots func TestGenerateExistentStateWithWrongStorage(t *testing.T) { - //log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) helper := newHelper() stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) @@ -511,9 +512,15 @@ func TestGenerateWithExtraAccounts(t *testing.T) { } } +func enableLogging() { + log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) +} + // Tests that snapshot generation when an extra account with storage exists in the snap state. func TestGenerateWithManyExtraAccounts(t *testing.T) { - + if true { + enableLogging() + } var ( diskdb = memorydb.New() triedb = trie.NewDatabase(diskdb) @@ -532,8 +539,9 @@ func TestGenerateWithManyExtraAccounts(t *testing.T) { rawdb.WriteStorageSnapshot(diskdb, key, hashData([]byte("key-3")), []byte("val-3")) } { // 100 accounts exist only in snapshot - for i := 0; i < 100; i++ { - acc := &Account{Balance: big.NewInt(int64(i)), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + for i := 0; i < 1000; i++ { + //acc := &Account{Balance: big.NewInt(int64(i)), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + acc := &Account{Balance: big.NewInt(int64(i)), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} val, _ := rlp.EncodeToBytes(acc) key := hashData([]byte(fmt.Sprintf("acc-%d", i))) rawdb.WriteAccountSnapshot(diskdb, key, val) @@ -561,3 +569,58 @@ func TestGenerateWithManyExtraAccounts(t *testing.T) { t.Fatalf("expected slot to be removed, got %v", string(data)) } } + +// Tests this case +// maxAccountRange 3 +// snapshot-accounts: 01, 02, 03, 04, 05, 06, 07 +// trie-accounts: 03, 07 +// +// We iterate three snapshot storage slots (max = 3) from the database. They are 0x01, 0x02, 0x03. +// The trie has a lot of deletions. +// So in trie, we iterate 2 entries 0x03, 0x07. We create the 0x07 in the database and abort the procedure, because the trie is exhausted. +// But in the database, we still have the stale storage slots 0x04, 0x05. They are not iterated yet, but the procedure is finished. +func TestGenerateWithExtraBeforeAndAfter(t *testing.T) { + accountCheckRange = 3 + log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + accTrie, _ := trie.New(common.Hash{}, triedb) + { + acc := &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update(common.HexToHash("0x03").Bytes(), val) + accTrie.Update(common.HexToHash("0x07").Bytes(), val) + + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x01"), val) + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x02"), val) + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x03"), val) + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x04"), val) + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x05"), val) + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x06"), val) + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x07"), val) + } + + root, _ := accTrie.Commit(nil) + t.Logf("root: %x", root) + triedb.Commit(root, false, nil) + + snap := generateSnapshot(diskdb, triedb, 16, root) + select { + case <-snap.genPending: + // Snapshot generation succeeded + + case <-time.After(250 * time.Millisecond): + t.Errorf("Snapshot generation failed") + } + checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop + // If we now inspect the snap db, there should exist no extraneous storage items + if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { + t.Fatalf("expected slot to be removed, got %v", string(data)) + } +} From 47bc0ef512c95c8a9697acf27e2b644c624fb8c2 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 17 Mar 2021 09:02:45 +0100 Subject: [PATCH 51/75] core/state/snapshot: more testcases + fix for failing test --- core/state/snapshot/generate.go | 36 ++++++++++++++++------------ core/state/snapshot/generate_test.go | 2 +- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 4e65e7fb0d60..67de363a69a2 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -184,11 +184,11 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta // proofResult contains the output of range proving which can be used // for further processing no matter it's successful or not. type proofResult struct { - keys [][]byte // The key set of all elements being iterated, even proving is failed - vals [][]byte // The val set of all elements being iterated, even proving is failed - cont bool // Indicator if there exists more elements in the range, only meaningful when proving is successful - proofErr error // Indicator whether the given state range is valid or not - tr *trie.Trie // The trie, in case the trie was resolved by the prover (may be nil) + keys [][]byte // The key set of all elements being iterated, even proving is failed + vals [][]byte // The val set of all elements being iterated, even proving is failed + hasMoreElems bool // Set if the db iteration was aborted on max elements. + proofErr error // Indicator whether the given state range is valid or not + tr *trie.Trie // The trie, in case the trie was resolved by the prover (may be nil) } // valid returns the indicator that range proof is successful or not. @@ -273,9 +273,9 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or stackTr.TryUpdate(key, common.CopyBytes(vals[i])) } if gotRoot := stackTr.Hash(); gotRoot != root { - return &proofResult{keys: keys, vals: vals, cont: false, proofErr: errors.New("wrong root")}, nil + return &proofResult{keys: keys, vals: vals, hasMoreElems: aborted, proofErr: errors.New("wrong root")}, nil } - return &proofResult{keys: keys, vals: vals, cont: false, proofErr: nil}, nil + return &proofResult{keys: keys, vals: vals, hasMoreElems: aborted, proofErr: nil}, nil } // Snap state is chunked, generate edge proofs for verification. tr, err := trie.New(root, dl.triedb) @@ -294,21 +294,24 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or } if err := tr.Prove(origin, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "origin", origin, "err", err) - return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err, tr: tr}, nil + return &proofResult{keys: keys, vals: vals, hasMoreElems: aborted, proofErr: err, tr: tr}, nil } if last != nil { if err := tr.Prove(last, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "last", last, "err", err) - return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err, tr: tr}, nil + return &proofResult{keys: keys, vals: vals, hasMoreElems: aborted, proofErr: err, tr: tr}, nil } } // Verify the state segment with range prover, ensure that all flat states // in this range correspond to merkle trie. - _, _, _, cont, err := trie.VerifyRangeProof(root, origin, last, keys, vals, proof) - if err != nil { - return &proofResult{keys: keys, vals: vals, cont: false, proofErr: err, tr: tr}, nil - } - return &proofResult{keys: keys, vals: vals, cont: cont, proofErr: nil, tr: tr}, nil + _, _, _, _, err = trie.VerifyRangeProof(root, origin, last, keys, vals, proof) + // Previously, we took the 'cont' from trie.VerifyRangeProof and used below. + // Is that needed? If so, make a testcase to show it + // TODO + //if err != nil { + // return &proofResult{keys: keys, vals: vals, hasMoreElems: cont || aborted, proofErr: err, tr: tr}, nil + //} + return &proofResult{keys: keys, vals: vals, hasMoreElems: aborted, proofErr: err, tr: tr}, nil } // genRange generates the state segment with particular prefix. Generation can @@ -340,7 +343,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, false, false) }); err != nil { return false, nil, err } - return !result.cont, last, nil + return !result.hasMoreElems, last, nil } logger.Debug("Detected outdated state range", "last", hexutil.Encode(last), "error", result.proofErr) snapFailedRangeProofMeter.Mark(1) @@ -420,6 +423,9 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } deleted += 1 } + // If there are either more trie items, or there are more snap items + // (in the next segment), then we need to keep working + aborted = aborted || result.hasMoreElems logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last), "count", count, "created", created, "updated", updated, "untouched", untouched, "deleted", deleted) return !aborted, last, nil diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 5ea1beedd17e..b126712442a2 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -518,7 +518,7 @@ func enableLogging() { // Tests that snapshot generation when an extra account with storage exists in the snap state. func TestGenerateWithManyExtraAccounts(t *testing.T) { - if true { + if false { enableLogging() } var ( From 71ea5ad38fe410a8907f30eb81435eafb54df4ad Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 17 Mar 2021 09:20:11 +0100 Subject: [PATCH 52/75] core/state/snapshot: testcase for malformed data --- core/state/snapshot/generate.go | 5 +-- core/state/snapshot/generate_test.go | 46 ++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 67de363a69a2..129d8b656248 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -209,9 +209,6 @@ func (result *proofResult) last() []byte { // forEach iterates all the visited elements and applies the given callback on them. // The iteration is aborted if the callback returns non-nil error. func (result *proofResult) forEach(callback func(key []byte, val []byte) error) error { - if callback == nil { - return nil - } for i := 0; i < len(result.keys); i++ { key, val := result.keys[i], result.vals[i] if err := callback(key, val); err != nil { @@ -255,7 +252,7 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or } val, err := valueConvertFn(iter.Value()) if err != nil { - // Sepcial case, the state data is corrupted(invalid slim-format account), + // Special case, the state data is corrupted (invalid slim-format account), // don't abort the entire procedure directly. Instead, let the fallback // generation to heal the invalid data. // diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index b126712442a2..35d16410f5a8 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -624,3 +624,49 @@ func TestGenerateWithExtraBeforeAndAfter(t *testing.T) { t.Fatalf("expected slot to be removed, got %v", string(data)) } } + +// TestGenerateWithMalformedSnapdata tests what happes if we have some junk +// in the snapshot database, which cannot be parsed back to an account +func TestGenerateWithMalformedSnapdata(t *testing.T) { + accountCheckRange = 3 + log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + accTrie, _ := trie.New(common.Hash{}, triedb) + { + acc := &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update(common.HexToHash("0x03").Bytes(), val) + + junk := make([]byte, 100) + copy(junk, []byte{0xde, 0xad}) + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x02"), junk) + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x03"), junk) + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x04"), junk) + rawdb.WriteAccountSnapshot(diskdb, common.HexToHash("0x05"), junk) + } + + root, _ := accTrie.Commit(nil) + t.Logf("root: %x", root) + triedb.Commit(root, false, nil) + + snap := generateSnapshot(diskdb, triedb, 16, root) + select { + case <-snap.genPending: + // Snapshot generation succeeded + + case <-time.After(250 * time.Millisecond): + t.Errorf("Snapshot generation failed") + } + checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop + // If we now inspect the snap db, there should exist no extraneous storage items + if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { + t.Fatalf("expected slot to be removed, got %v", string(data)) + } +} From 63f4998826c7ca3f696640c8a6b5fd5271bf63a0 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 17 Mar 2021 12:39:34 +0100 Subject: [PATCH 53/75] core/state/snapshot: some test nitpicks --- core/state/snapshot/generate_test.go | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 35d16410f5a8..77d9a9fc84f2 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -492,6 +492,10 @@ func TestGenerateWithExtraAccounts(t *testing.T) { root, _ := accTrie.Commit(nil) t.Logf("root: %x", root) triedb.Commit(root, false, nil) + // To verify the test: If we now inspect the snap db, there should exist extraneous storage items + if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data == nil { + t.Fatalf("expected snap storage to exist") + } snap := generateSnapshot(diskdb, triedb, 16, root) select { @@ -564,10 +568,6 @@ func TestGenerateWithManyExtraAccounts(t *testing.T) { stop := make(chan *generatorStats) snap.genAbort <- stop <-stop - // If we now inspect the snap db, there should exist no extraneous storage items - if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { - t.Fatalf("expected slot to be removed, got %v", string(data)) - } } // Tests this case @@ -619,10 +619,6 @@ func TestGenerateWithExtraBeforeAndAfter(t *testing.T) { stop := make(chan *generatorStats) snap.genAbort <- stop <-stop - // If we now inspect the snap db, there should exist no extraneous storage items - if data := rawdb.ReadStorageSnapshot(diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { - t.Fatalf("expected slot to be removed, got %v", string(data)) - } } // TestGenerateWithMalformedSnapdata tests what happes if we have some junk From ac8ebf3097268e5151f09d24a50fcd0e7b949cdc Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 17 Mar 2021 14:32:48 +0100 Subject: [PATCH 54/75] core/state/snapshot: improvements to logging --- core/state/snapshot/generate.go | 6 +++-- core/state/snapshot/generate_test.go | 35 ++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 129d8b656248..1577574442a6 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -332,7 +332,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig // The range prover says the range is correct, skip trie iteration if result.valid() { snapSuccessfulRangeProofMeter.Mark(1) - logger.Debug("Proved state range", "last", hexutil.Encode(last)) + logger.Trace("Proved state range", "last", hexutil.Encode(last)) // The verification is passed, process each state with the given // callback function. If this state represents a contract, the @@ -342,7 +342,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } return !result.hasMoreElems, last, nil } - logger.Debug("Detected outdated state range", "last", hexutil.Encode(last), "error", result.proofErr) + logger.Trace("Detected outdated state range", "last", hexutil.Encode(last), "error", result.proofErr) snapFailedRangeProofMeter.Mark(1) // Special case, the entire trie is missing. In the original trie scheme, @@ -384,6 +384,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig count += 1 write := true + created++ for len(kvkeys) > 0 { if cmp := bytes.Compare(kvkeys[0], iter.Key); cmp < 0 { // delete the key @@ -396,6 +397,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig continue } else if cmp == 0 { // the snapshot key can be overwritten + created-- if write = !bytes.Equal(kvvals[0], iter.Value); write { updated++ } else { diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 77d9a9fc84f2..365cc9e4b333 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -187,6 +187,13 @@ func newHelper() *testHelper { } } +// addAccount adds an account to the trie and snapshot, and return t +func (t *testHelper) addTrieAccount(accPreKey string, acc *Account) { + val, _ := rlp.EncodeToBytes(acc) + // Add to trie + t.accTrie.Update([]byte(accPreKey), val) +} + // addAccount adds an account to the trie and snapshot, and return t func (t *testHelper) addAccount(accPreKey string, acc *Account) { val, _ := rlp.EncodeToBytes(acc) @@ -666,3 +673,31 @@ func TestGenerateWithMalformedSnapdata(t *testing.T) { t.Fatalf("expected slot to be removed, got %v", string(data)) } } + +func TestGenerateFromEmptySnap(t *testing.T) { + //enableLogging() + accountCheckRange = 10 + storageCheckRange = 20 + helper := newHelper() + stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + // Add 1K accounts to the trie + for i := 0; i < 400; i++ { + helper.addTrieAccount(fmt.Sprintf("acc-%d", i), + &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + } + root, snap := helper.Generate() + t.Logf("Root: %#x\n", root) // Root: 0x3a97ece15e2539ab3524783c37ca153a62e28faba76a752e826da24a9020d44f + + select { + case <-snap.genPending: + // Snapshot generation succeeded + + case <-time.After(1 * time.Second): + t.Errorf("Snapshot generation failed") + } + checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} From fda1c2adff180eb6f54e55cbe1eae764dd5c1fb0 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Thu, 18 Mar 2021 09:17:28 +0100 Subject: [PATCH 55/75] core/state/snapshot: testcase to demo error in abortion --- core/state/snapshot/generate_test.go | 48 +++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 365cc9e4b333..0813f77e29c8 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -223,7 +223,6 @@ func (t *testHelper) makeStorageTrie(keys []string, values []string) []byte { stTrie, _ := trie.NewSecure(common.Hash{}, t.triedb) for i, k := range keys { stTrie.Update([]byte(k), []byte(values[i])) - } root, _ := stTrie.Commit(nil) return root.Bytes() @@ -701,3 +700,50 @@ func TestGenerateFromEmptySnap(t *testing.T) { snap.genAbort <- stop <-stop } + +// Tests that snapshot generation with existent flat state, where the flat state +// storage is correct, but incomplete. +// The incomplete part is on the second range +// snap: [ 0x01, 0x02, 0x03, 0x04] , [ 0x05, 0x06, 0x07, {missing}] (with storageCheck = 4) +// trie: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 +// This hits a case where the snap verification passes, but there are more elements in the trie +// which we must also add. +func TestGenerateWithIncompleteStorage(t *testing.T) { + storageCheckRange = 4 + helper := newHelper() + stKeys := []string{"1", "2", "3", "4", "5", "6", "7", "8"} + stVals := []string{"v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8"} + stRoot := helper.makeStorageTrie(stKeys, stVals) + // We add 8 accounts, each one is missing exactly one of the storage slots. This means + // we don't have to order the keys and figure out exactly which hash-key winds up + // on the sensitive spots at the boundaries + for i := 0; i < 8; i++ { + accKey := fmt.Sprintf("acc-%d", i) + helper.addAccount(accKey, &Account{Balance: big.NewInt(int64(i)), Root: stRoot, CodeHash: emptyCode.Bytes()}) + var moddedKeys []string + var moddedVals []string + for ii := 0; ii < 8; ii++ { + if ii != i { + moddedKeys = append(moddedKeys, stKeys[ii]) + moddedVals = append(moddedVals, stVals[ii]) + } + } + helper.addSnapStorage(accKey, moddedKeys, moddedVals) + } + + root, snap := helper.Generate() + t.Logf("Root: %#x\n", root) // Root: 0x3a97ece15e2539ab3524783c37ca153a62e28faba76a752e826da24a9020d44f + + select { + case <-snap.genPending: + // Snapshot generation succeeded + + case <-time.After(250 * time.Millisecond): + t.Errorf("Snapshot generation failed") + } + checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} From f58f3c72985cba314cbfd1ff580789c3bd6eb5cb Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Thu, 18 Mar 2021 14:32:47 +0800 Subject: [PATCH 56/75] core/state/snapshot: fix abortion --- core/state/snapshot/generate.go | 88 ++++++++++++++++----------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 1577574442a6..f3ea0b8ee70e 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -184,11 +184,12 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta // proofResult contains the output of range proving which can be used // for further processing no matter it's successful or not. type proofResult struct { - keys [][]byte // The key set of all elements being iterated, even proving is failed - vals [][]byte // The val set of all elements being iterated, even proving is failed - hasMoreElems bool // Set if the db iteration was aborted on max elements. - proofErr error // Indicator whether the given state range is valid or not - tr *trie.Trie // The trie, in case the trie was resolved by the prover (may be nil) + keys [][]byte // The key set of all elements being iterated, even proving is failed + vals [][]byte // The val set of all elements being iterated, even proving is failed + diskMore bool // Set when the database has extra snapshot states since last iteration + trieMore bool // Set when the trie has extra snapshot states(only meaningful for successful proving) + proofErr error // Indicator whether the given state range is valid or not + tr *trie.Trie // The trie, in case the trie was resolved by the prover (may be nil) } // valid returns the indicator that range proof is successful or not. @@ -227,10 +228,10 @@ func (result *proofResult) forEach(callback func(key []byte, val []byte) error) // the error will be returned to abort the entire procedure. func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) { var ( - keys [][]byte - vals [][]byte - proof = rawdb.NewMemoryDatabase() - aborted = false + keys [][]byte + vals [][]byte + proof = rawdb.NewMemoryDatabase() + diskMore = false ) iter := dl.diskdb.NewIterator(prefix, origin) defer iter.Release() @@ -240,39 +241,41 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or if len(key) != len(prefix)+common.HashLength { continue } - if len(keys) == max { - aborted = true - break - } keys = append(keys, common.CopyBytes(key[len(prefix):])) if valueConvertFn == nil { vals = append(vals, common.CopyBytes(iter.Value())) - continue + } else { + val, err := valueConvertFn(iter.Value()) + if err != nil { + // Special case, the state data is corrupted (invalid slim-format account), + // don't abort the entire procedure directly. Instead, let the fallback + // generation to heal the invalid data. + // + // Here append the original value to ensure that the number of key and + // value are the same. + vals = append(vals, common.CopyBytes(iter.Value())) + } else { + vals = append(vals, val) + } } - val, err := valueConvertFn(iter.Value()) - if err != nil { - // Special case, the state data is corrupted (invalid slim-format account), - // don't abort the entire procedure directly. Instead, let the fallback - // generation to heal the invalid data. - // - // Here append the original value to ensure that the number of key and - // value are the same. - vals = append(vals, common.CopyBytes(iter.Value())) - continue + // A trick is applied here, whenever the maximum items are reached, + // also check the database iterator is exhausted or not. + if len(keys) == max { + diskMore = iter.Next() + break } - vals = append(vals, val) } // The snap state is exhausted, pass the entire key/val set for verification - if origin == nil && !aborted { + if origin == nil && !diskMore { stackTr := trie.NewStackTrie(nil) for i, key := range keys { stackTr.TryUpdate(key, common.CopyBytes(vals[i])) } if gotRoot := stackTr.Hash(); gotRoot != root { - return &proofResult{keys: keys, vals: vals, hasMoreElems: aborted, proofErr: errors.New("wrong root")}, nil + return &proofResult{keys: keys, vals: vals, diskMore: false, trieMore: false, proofErr: errors.New("wrong root")}, nil } - return &proofResult{keys: keys, vals: vals, hasMoreElems: aborted, proofErr: nil}, nil + return &proofResult{keys: keys, vals: vals, diskMore: false, trieMore: false, proofErr: nil}, nil } // Snap state is chunked, generate edge proofs for verification. tr, err := trie.New(root, dl.triedb) @@ -291,24 +294,18 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or } if err := tr.Prove(origin, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "origin", origin, "err", err) - return &proofResult{keys: keys, vals: vals, hasMoreElems: aborted, proofErr: err, tr: tr}, nil + return &proofResult{keys: keys, vals: vals, diskMore: diskMore, trieMore: false, proofErr: err, tr: tr}, nil } if last != nil { if err := tr.Prove(last, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "last", last, "err", err) - return &proofResult{keys: keys, vals: vals, hasMoreElems: aborted, proofErr: err, tr: tr}, nil + return &proofResult{keys: keys, vals: vals, diskMore: diskMore, trieMore: false, proofErr: err, tr: tr}, nil } } // Verify the state segment with range prover, ensure that all flat states // in this range correspond to merkle trie. - _, _, _, _, err = trie.VerifyRangeProof(root, origin, last, keys, vals, proof) - // Previously, we took the 'cont' from trie.VerifyRangeProof and used below. - // Is that needed? If so, make a testcase to show it - // TODO - //if err != nil { - // return &proofResult{keys: keys, vals: vals, hasMoreElems: cont || aborted, proofErr: err, tr: tr}, nil - //} - return &proofResult{keys: keys, vals: vals, hasMoreElems: aborted, proofErr: err, tr: tr}, nil + _, _, _, cont, err := trie.VerifyRangeProof(root, origin, last, keys, vals, proof) + return &proofResult{keys: keys, vals: vals, diskMore: diskMore, trieMore: cont, proofErr: err, tr: tr}, nil } // genRange generates the state segment with particular prefix. Generation can @@ -340,7 +337,8 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, false, false) }); err != nil { return false, nil, err } - return !result.hasMoreElems, last, nil + // Only abort the iteration when both database and trie are exhausted + return !result.diskMore && !result.trieMore, last, nil } logger.Trace("Detected outdated state range", "last", hexutil.Encode(last), "error", result.proofErr) snapFailedRangeProofMeter.Mark(1) @@ -365,7 +363,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } } var ( - aborted bool + trieMore bool iter = trie.NewIterator(tr.NodeIterator(origin)) kvkeys, kvvals = result.keys, result.vals @@ -378,7 +376,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig ) for iter.Next() { if last != nil && bytes.Compare(iter.Key, last) > 0 { - aborted = true + trieMore = true break } count += 1 @@ -422,12 +420,12 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } deleted += 1 } - // If there are either more trie items, or there are more snap items - // (in the next segment), then we need to keep working - aborted = aborted || result.hasMoreElems logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last), "count", count, "created", created, "updated", updated, "untouched", untouched, "deleted", deleted) - return !aborted, last, nil + + // If there are either more trie items, or there are more snap items + // (in the next segment), then we need to keep working + return !trieMore && !result.diskMore, last, nil } // generate is a background thread that iterates over the state and storage tries, From ea540aa6af6da99cd7dd58475ec1a3c11dc9b1c0 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Thu, 18 Mar 2021 09:57:20 +0100 Subject: [PATCH 57/75] cmd/geth: make verify-state report the root --- cmd/geth/snapshot.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/geth/snapshot.go b/cmd/geth/snapshot.go index e8f6a354387c..686101a08c80 100644 --- a/cmd/geth/snapshot.go +++ b/cmd/geth/snapshot.go @@ -205,10 +205,10 @@ func verifyState(ctx *cli.Context) error { } } if err := snaptree.Verify(root); err != nil { - log.Error("Failed to verfiy state", "error", err) + log.Error("Failed to verfiy state", "root", root, "error", err) return err } - log.Info("Verified the state") + log.Info("Verified the state", "root", root) return nil } From 37b342615ca713032a007d49d3ee6d9035b8d477 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Thu, 18 Mar 2021 11:56:26 +0100 Subject: [PATCH 58/75] trie: fix failing test --- trie/trie_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trie/trie_test.go b/trie/trie_test.go index 3aa4098d1421..d6930fdee729 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -569,7 +569,7 @@ func BenchmarkCommitAfterHash(b *testing.B) { benchmarkCommitAfterHash(b, nil) }) var a account - onleaf := func(path []byte, leaf []byte, parent common.Hash) error { + onleaf := func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error { rlp.DecodeBytes(leaf, &a) return nil } From d172f808d9af0621512ef9351e684fe707831359 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Thu, 18 Mar 2021 19:28:09 +0800 Subject: [PATCH 59/75] core/state/snapshot: add timer metrics --- core/state/snapshot/generate.go | 77 ++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index f3ea0b8ee70e..b8f546f48f8b 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -69,6 +69,27 @@ var ( snapMissallStorageMeter = metrics.NewRegisteredMeter("state/snapshot/generation/storage/missall", nil) snapSuccessfulRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/success", nil) snapFailedRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/failure", nil) + + snapAccountProveTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/account/prove", nil) + snapAccountTrieReadTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/account/trieread", nil) + snapAccountSnapReadTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/account/snapread", nil) + snapAccountWriteTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/account/write", nil) + snapStorageProveTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/storage/prove", nil) + snapStorageTrieReadTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/storage/trieread", nil) + snapStorageSnapReadTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/storage/snapread", nil) + snapStorageWriteTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/storage/write", nil) +) + +// Global timer for metrics +var ( + accountProving time.Duration // The total time spent on the account proving + accountTrieRead time.Duration // The total time spent on the account trie iteration + accountSnapRead time.Duration // The total time spent on the snapshot account iteration + accountWrite time.Duration // The total time spent on writing/updating/deleting accounts + storageProving time.Duration // The total time spent on the storage proving + storageTrieRead time.Duration // The total time spent on the storage trie iteration + storageSnapRead time.Duration // The total time spent on the snapshot storage iteration + storageWrite time.Duration // The total time spent on writing/updating/deleting storages ) // generatorStats is a collection of statistics gathered by the snapshot generator @@ -236,6 +257,7 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or iter := dl.diskdb.NewIterator(prefix, origin) defer iter.Release() + var start = time.Now() for iter.Next() { key := iter.Key() if len(key) != len(prefix)+common.HashLength { @@ -266,6 +288,24 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or break } } + // Update metrics for database iteration and merkle proving + if kind == "storage" { + storageSnapRead += time.Since(start) + snapStorageSnapReadTimer.Update(int64(storageSnapRead)) + } else { + accountSnapRead += time.Since(start) + snapAccountSnapReadTimer.Update(int64(accountSnapRead)) + } + defer func(start time.Time) { + if kind == "storage" { + storageProving += time.Since(start) + snapStorageProveTimer.Update(int64(storageProving)) + } else { + accountProving += time.Since(start) + snapAccountProveTimer.Update(int64(accountProving)) + } + }(time.Now()) + // The snap state is exhausted, pass the entire key/val set for verification if origin == nil && !diskMore { stackTr := trie.NewStackTrie(nil) @@ -373,6 +413,11 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig updated = 0 // states updated from the trie deleted = 0 // states not in trie, but were in snapshot untouched = 0 // states already correct + + // timers + start = time.Now() + istart time.Time + internal time.Duration ) for iter.Next() { if last != nil && bytes.Compare(iter.Key, last) > 0 { @@ -386,12 +431,14 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig for len(kvkeys) > 0 { if cmp := bytes.Compare(kvkeys[0], iter.Key); cmp < 0 { // delete the key + istart = time.Now() if err := onState(kvkeys[0], nil, false, true); err != nil { return false, nil, err } kvkeys = kvkeys[1:] kvvals = kvvals[1:] deleted += 1 + internal += time.Since(istart) continue } else if cmp == 0 { // the snapshot key can be overwritten @@ -406,20 +453,33 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig } break } + istart = time.Now() if err := onState(iter.Key, iter.Value, write, false); err != nil { return false, nil, err } + internal += time.Since(istart) } if iter.Err != nil { return false, nil, iter.Err } // Delete all stale snapshot states remaining + istart = time.Now() for _, key := range kvkeys { if err := onState(key, nil, false, true); err != nil { return false, nil, err } deleted += 1 } + internal += time.Since(istart) + + // Update metrics for counting trie iteration + if kind == "storage" { + storageTrieRead += time.Since(start) - internal + snapStorageSnapReadTimer.Update(int64(storageTrieRead)) + } else { + accountTrieRead += time.Since(start) - internal + snapAccountSnapReadTimer.Update(int64(accountTrieRead)) + } logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last), "count", count, "created", created, "updated", updated, "untouched", untouched, "deleted", deleted) @@ -483,7 +543,10 @@ func (dl *diskLayer) generate(stats *generatorStats) { } onAccount := func(key []byte, val []byte, write bool, delete bool) error { - accountHash := common.BytesToHash(key) + var ( + start = time.Now() + accountHash = common.BytesToHash(key) + ) if delete { rawdb.DeleteAccountSnapshot(batch, accountHash) snapWipedAccountMeter.Mark(1) @@ -494,6 +557,8 @@ func (dl *diskLayer) generate(stats *generatorStats) { if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { return err } + accountWrite += time.Since(start) + snapAccountWriteTimer.Update(int64(accountWrite)) return nil } // Retrieve the current account and flatten it into the internal format @@ -527,11 +592,19 @@ func (dl *diskLayer) generate(stats *generatorStats) { // If the iterated account is the contract, create a further loop to // verify or regenerate the contract storage. if acc.Root != emptyRoot { + accountWrite += time.Since(start) + snapAccountWriteTimer.Update(int64(accountWrite)) + var storeMarker []byte if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { storeMarker = dl.genMarker[common.HashLength:] } onStorage := func(key []byte, val []byte, write bool, delete bool) error { + defer func(start time.Time) { + storageWrite += time.Since(start) + snapStorageWriteTimer.Update(int64(storageWrite)) + }(time.Now()) + if delete { rawdb.DeleteStorageSnapshot(batch, accountHash, common.BytesToHash(key)) snapWipedStorageMeter.Mark(1) @@ -576,6 +649,8 @@ func (dl *diskLayer) generate(stats *generatorStats) { if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { return err } + accountWrite += time.Since(start) + snapAccountWriteTimer.Update(int64(accountWrite)) } // Some account processed, unmark the marker accMarker = nil From 3c9073c8612c833ccb42798afc1a24c314b82ab6 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Fri, 19 Mar 2021 11:04:14 +0800 Subject: [PATCH 60/75] core/state/snapshot: fix metrics --- core/state/snapshot/generate.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index b8f546f48f8b..f303ccde7103 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -475,10 +475,10 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig // Update metrics for counting trie iteration if kind == "storage" { storageTrieRead += time.Since(start) - internal - snapStorageSnapReadTimer.Update(int64(storageTrieRead)) + snapStorageTrieReadTimer.Update(int64(storageTrieRead)) } else { accountTrieRead += time.Since(start) - internal - snapAccountSnapReadTimer.Update(int64(accountTrieRead)) + snapAccountTrieReadTimer.Update(int64(accountTrieRead)) } logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last), "count", count, "created", created, "updated", updated, "untouched", untouched, "deleted", deleted) From 1e42318c9c0d8df0983c8092e63a3301fa868d5d Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Fri, 19 Mar 2021 13:57:20 +0800 Subject: [PATCH 61/75] core/state/snapshot: udpate tests --- core/state/snapshot/generate_test.go | 184 +++++++++++++++++++-------- 1 file changed, 132 insertions(+), 52 deletions(-) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 0813f77e29c8..95724e9164c7 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -187,42 +187,33 @@ func newHelper() *testHelper { } } -// addAccount adds an account to the trie and snapshot, and return t -func (t *testHelper) addTrieAccount(accPreKey string, acc *Account) { +func (t *testHelper) addTrieAccount(acckey string, acc *Account) { val, _ := rlp.EncodeToBytes(acc) - // Add to trie - t.accTrie.Update([]byte(accPreKey), val) + t.accTrie.Update([]byte(acckey), val) } -// addAccount adds an account to the trie and snapshot, and return t -func (t *testHelper) addAccount(accPreKey string, acc *Account) { +func (t *testHelper) addSnapAccount(acckey string, acc *Account) { val, _ := rlp.EncodeToBytes(acc) - // Add to trie - t.accTrie.Update([]byte(accPreKey), val) - key := hashData([]byte(accPreKey)) - // Add account to snapshot + key := hashData([]byte(acckey)) rawdb.WriteAccountSnapshot(t.diskdb, key, val) } -func (t *testHelper) addSnapStorage(accPreKey string, slotKeys []string, slotVals []string) { - key := hashData([]byte(accPreKey)) - // Add any storage slots - for i, sKey := range slotKeys { - sVal := []byte(slotVals[i]) - rawdb.WriteStorageSnapshot(t.diskdb, key, hashData([]byte(sKey)), sVal) - } +func (t *testHelper) addAccount(acckey string, acc *Account) { + t.addTrieAccount(acckey, acc) + t.addSnapAccount(acckey, acc) } -func (t *testHelper) writeSnapAccount(accPreKey string, acc *Account) { - val, _ := rlp.EncodeToBytes(acc) - key := hashData([]byte(accPreKey)) - rawdb.WriteAccountSnapshot(t.diskdb, key, val) +func (t *testHelper) addSnapStorage(accKey string, keys []string, vals []string) { + accHash := hashData([]byte(accKey)) + for i, key := range keys { + rawdb.WriteStorageSnapshot(t.diskdb, accHash, hashData([]byte(key)), []byte(vals[i])) + } } -func (t *testHelper) makeStorageTrie(keys []string, values []string) []byte { +func (t *testHelper) makeStorageTrie(keys []string, vals []string) []byte { stTrie, _ := trie.NewSecure(common.Hash{}, t.triedb) for i, k := range keys { - stTrie.Update([]byte(k), []byte(values[i])) + stTrie.Update([]byte(k), []byte(vals[i])) } root, _ := stTrie.Commit(nil) return root.Bytes() @@ -235,47 +226,136 @@ func (t *testHelper) Generate() (common.Hash, *diskLayer) { return root, snap } -// Tests that snapshot generation with existent flat state, where the flat state contains -// some errors: +// Tests that snapshot generation with existent flat state, where the flat state +// contains some errors: // - the contract with empty storage root but has storage entries in the disk +// - the contract with non empty storage root but empty storage slots // - the contract(non-empty storage) misses some storage slots +// - miss in the beginning +// - miss in the middle +// - miss in the end // - the contract(non-empty storage) has wrong storage slots +// - wrong slots in the beginning +// - wrong slots in the middle +// - wrong slots in the end +// - the contract(non-empty storage) has extra storage slots +// - extra slots in the beginning +// - extra slots in the middle +// - extra slots in the end func TestGenerateExistentStateWithWrongStorage(t *testing.T) { - helper := newHelper() stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) - // Account one, miss storage slots in the end(key-3) - helper.addAccount("acc-1", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) - helper.addSnapStorage("acc-1", []string{"key-1", "key-2"}, []string{"val-1", "val-2"}) + // Account one, empty root but non-empty database + helper.addAccount("acc-1", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + // Account two, non empty root but empty database + helper.addAccount("acc-2", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + + // Miss slots + { + // Account three, non empty root but misses slots in the beginning + helper.addAccount("acc-3", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-3", []string{"key-2", "key-3"}, []string{"val-2", "val-3"}) + + // Account four, non empty root but misses slots in the middle + helper.addAccount("acc-4", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-4", []string{"key-1", "key-3"}, []string{"val-1", "val-3"}) + + // Account five, non empty root but misses slots in the end + helper.addAccount("acc-5", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-5", []string{"key-1", "key-2"}, []string{"val-1", "val-2"}) + } + + // Wrong storage slots + { + // Account six, non empty root but wrong slots in the beginning + helper.addAccount("acc-6", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"badval-1", "val-2", "val-3"}) + + // Account seven, non empty root but wrong slots in the middle + helper.addAccount("acc-7", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-7", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "badval-2", "val-3"}) + + // Account eight, non empty root but wrong slots in the end + helper.addAccount("acc-8", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-8", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "badval-3"}) + + // Account 9, non empty root but rotated slots + helper.addAccount("acc-9", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-9", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-3", "val-2"}) + } + + // Extra storage slots + { + // Account 10, non empty root but extra slots in the beginning + helper.addAccount("acc-10", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-10", []string{"key-0", "key-1", "key-2", "key-3"}, []string{"val-0", "val-1", "val-2", "val-3"}) + + // Account 11, non empty root but extra slots in the middle + helper.addAccount("acc-11", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-11", []string{"key-1", "key-2", "key-2-1", "key-3"}, []string{"val-1", "val-2", "val-2-1", "val-3"}) + + // Account 12, non empty root but extra slots in the end + helper.addAccount("acc-12", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapStorage("acc-12", []string{"key-1", "key-2", "key-3", "key-4"}, []string{"val-1", "val-2", "val-3", "val-4"}) + } + + root, snap := helper.Generate() + t.Logf("Root: %#x\n", root) // Root = 0x8746cce9fd9c658b2cfd639878ed6584b7a2b3e73bb40f607fcfa156002429a0 + + select { + case <-snap.genPending: + // Snapshot generation succeeded - // Account two, miss storage slots in the beginning(key-1) - helper.addAccount("acc-2", &Account{Balance: big.NewInt(2), Root: stRoot, CodeHash: emptyCode.Bytes()}) - helper.addSnapStorage("acc-2", []string{"key-2", "key-3"}, []string{"val-2", "val-3"}) + case <-time.After(250 * time.Millisecond): + t.Errorf("Snapshot generation failed") + } + checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} - // Account three - // The storage root is emptyHash, but the flat db has some storage values. This can happen - // if the storage was unset during sync - helper.addAccount("acc-3", &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) - helper.addSnapStorage("acc-3", []string{"key-1"}, []string{"val-1"}) +// Tests that snapshot generation with existent flat state, where the flat state +// contains some errors: +// - miss accounts +// - wrong accounts +// - extra accounts +func TestGenerateExistentStateWithWrongAccounts(t *testing.T) { + helper := newHelper() + stRoot := helper.makeStorageTrie([]string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + // Trie accounts [acc-1, acc-2, acc-3, acc-4, acc-6] + // Extra accounts [acc-0, acc-5, acc-7] + + // Missing accounts, only in the trie + { + helper.addTrieAccount("acc-1", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // Beginning + helper.addTrieAccount("acc-4", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // Middle + helper.addTrieAccount("acc-6", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) // End + } - // Account four has a modified codehash + // Wrong accounts { - acc := &Account{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()} - helper.addAccount("acc-4", acc) - helper.addSnapStorage("acc-4", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) - // Overwrite the codehash in the snapdata - acc.CodeHash = hashData([]byte("codez")).Bytes() - helper.writeSnapAccount("acc-4", acc) + helper.addTrieAccount("acc-2", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapAccount("acc-2", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: common.Hex2Bytes("0x1234")}) + + helper.addTrieAccount("acc-3", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) + helper.addSnapAccount("acc-3", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()}) } - // Account 5 has wrong storage slot values - they've been rotated. - // This test that the update-or-replace check works - helper.addAccount("acc-5", &Account{Balance: big.NewInt(3), Root: stRoot, CodeHash: emptyCode.Bytes()}) - helper.addSnapStorage("acc-5", []string{"key-1", "key-2", "key-3"}, []string{"val-2", "val-3", "val-1"}) + // Extra accounts, only in the snap + { + helper.addSnapAccount("acc-0", &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyRoot.Bytes()}) // before the beginning + helper.addSnapAccount("acc-5", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: common.Hex2Bytes("0x1234")}) // Middle + helper.addSnapAccount("acc-7", &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyRoot.Bytes()}) // after the end + } root, snap := helper.Generate() - t.Logf("Root: %#x\n", root) // Root: 0x3a97ece15e2539ab3524783c37ca153a62e28faba76a752e826da24a9020d44f + t.Logf("Root: %#x\n", root) // Root = 0x825891472281463511e7ebcc7f109e4f9200c20fa384754e11fd605cd98464e8 select { case <-snap.genPending: @@ -285,6 +365,7 @@ func TestGenerateExistentStateWithWrongStorage(t *testing.T) { t.Errorf("Snapshot generation failed") } checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down stop := make(chan *generatorStats) snap.genAbort <- stop @@ -466,7 +547,6 @@ func getStorageTrie(n int, triedb *trie.Database) *trie.SecureTrie { // Tests that snapshot generation when an extra account with storage exists in the snap state. func TestGenerateWithExtraAccounts(t *testing.T) { - var ( diskdb = memorydb.New() triedb = trie.NewDatabase(diskdb) @@ -685,7 +765,7 @@ func TestGenerateFromEmptySnap(t *testing.T) { &Account{Balance: big.NewInt(1), Root: stRoot, CodeHash: emptyCode.Bytes()}) } root, snap := helper.Generate() - t.Logf("Root: %#x\n", root) // Root: 0x3a97ece15e2539ab3524783c37ca153a62e28faba76a752e826da24a9020d44f + t.Logf("Root: %#x\n", root) // Root: 0x6f7af6d2e1a1bf2b84a3beb3f8b64388465fbc1e274ca5d5d3fc787ca78f59e4 select { case <-snap.genPending: @@ -732,7 +812,7 @@ func TestGenerateWithIncompleteStorage(t *testing.T) { } root, snap := helper.Generate() - t.Logf("Root: %#x\n", root) // Root: 0x3a97ece15e2539ab3524783c37ca153a62e28faba76a752e826da24a9020d44f + t.Logf("Root: %#x\n", root) // Root: 0xca73f6f05ba4ca3024ef340ef3dfca8fdabc1b677ff13f5a9571fd49c16e67ff select { case <-snap.genPending: From 4563da20791c5b681e13ad5ab63e1d21b5f21f71 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Fri, 26 Mar 2021 11:54:27 +0100 Subject: [PATCH 62/75] eth/protocols/snap: write snapshot account even if code or state is needed --- eth/protocols/snap/sync.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 725ed8bb6617..b703f5d7c908 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2010,9 +2010,6 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { // outdated during the sync, but it can be fixed later during the // snapshot generation. for i, hash := range res.hashes { - if task.needCode[i] || task.needState[i] { - break - } blob := snapshot.SlimAccountRLP(res.accounts[i].Nonce, res.accounts[i].Balance, res.accounts[i].Root, res.accounts[i].CodeHash) rawdb.WriteAccountSnapshot(batch, hash, blob) bytes += common.StorageSize(1 + common.HashLength + len(blob)) From c78f9f43cdd988b86038eb3f6d4b114c469e6ba4 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Fri, 26 Mar 2021 11:59:55 +0100 Subject: [PATCH 63/75] core/state/snapshot: fix diskmore check --- core/state/snapshot/generate.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index f303ccde7103..878cb088b8cf 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -263,6 +263,12 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or if len(key) != len(prefix)+common.HashLength { continue } + if len(keys) == max { + // Break if we've reached the max size, and signal that we're not + // done yet. + diskMore = true + break + } keys = append(keys, common.CopyBytes(key[len(prefix):])) if valueConvertFn == nil { @@ -281,12 +287,6 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or vals = append(vals, val) } } - // A trick is applied here, whenever the maximum items are reached, - // also check the database iterator is exhausted or not. - if len(keys) == max { - diskMore = iter.Next() - break - } } // Update metrics for database iteration and merkle proving if kind == "storage" { From c27ea4f363dfd152f6dc60f8a6433caad282f011 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Fri, 26 Mar 2021 12:19:22 +0100 Subject: [PATCH 64/75] core/state/snapshot: review fixes --- core/state/snapshot/generate.go | 57 ++++++++++++++++++---------- core/state/snapshot/generate_test.go | 8 +++- 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 878cb088b8cf..6ff89ff067a5 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -153,7 +153,7 @@ func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache i rawdb.WriteSnapshotRoot(batch, root) journalProgress(batch, genMarker, stats) if err := batch.Write(); err != nil { - log.Crit("Failed to write initialized state marker", "error", err) + log.Crit("Failed to write initialized state marker", "err", err) } base := &diskLayer{ diskdb: diskdb, @@ -348,10 +348,19 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or return &proofResult{keys: keys, vals: vals, diskMore: diskMore, trieMore: cont, proofErr: err, tr: tr}, nil } -// genRange generates the state segment with particular prefix. Generation can +// onStateCallback is a function that is called by generateRange, when processing a range of +// accounts or storage slots. For each element, the callback is invoked. +// If 'delete' is true, then this element (and potential slots) needs to be deleted from the snapshot. +// If 'write' is true, then this element needs to be updated with the 'val'. +// If 'write' is false, then this element is already correct, and needs no update. However, +// for accounts, the storage trie of the account needs to be checked. +// The 'val' is the canonical encoding of the value (not the slim format for accounts) +type onStateCallback func(key []byte, val []byte, write bool, delete bool) error + +// generateRange generates the state segment with particular prefix. Generation can // either verify the correctness of existing state through rangeproof and skip // generation, or iterate trie to regenerate state on demand. -func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState func(key []byte, val []byte, write bool, delete bool) error, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { +func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState onStateCallback, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { // Use range prover to check the validity of the flat state in the range result, err := dl.proveRange(root, prefix, kind, origin, max, valueConvertFn) if err != nil { @@ -380,7 +389,7 @@ func (dl *diskLayer) genRange(root common.Hash, prefix []byte, kind string, orig // Only abort the iteration when both database and trie are exhausted return !result.diskMore && !result.trieMore, last, nil } - logger.Trace("Detected outdated state range", "last", hexutil.Encode(last), "error", result.proofErr) + logger.Trace("Detected outdated state range", "last", hexutil.Encode(last), "err", result.proofErr) snapFailedRangeProofMeter.Mark(1) // Special case, the entire trie is missing. In the original trie scheme, @@ -575,6 +584,12 @@ func (dl *diskLayer) generate(stats *generatorStats) { if accMarker == nil || !bytes.Equal(accountHash[:], accMarker) { dataLen := len(val) // Approximate size, saves us a round of RLP-encoding if !write { + if bytes.Equal(acc.CodeHash, emptyCode[:]) { + dataLen -= 32 + } + if acc.Root == emptyRoot { + dataLen -= 32 + } snapRecoveredAccountMeter.Mark(1) } else { data := SlimAccountRLP(acc.Nonce, acc.Balance, acc.Root, acc.CodeHash) @@ -591,7 +606,20 @@ func (dl *diskLayer) generate(stats *generatorStats) { } // If the iterated account is the contract, create a further loop to // verify or regenerate the contract storage. - if acc.Root != emptyRoot { + if acc.Root == emptyRoot { + // If the root is empty, we still need to ensure that any previous snapshot + // storage values are cleared + // TODO: investigate if this can be avoided, this will be very costly since it + // affects every single EOA account + // - Perhaps we can avoid if where codeHash is emptyCode + prefix := append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...) + keyLen := len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength + if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { + return err + } + accountWrite += time.Since(start) + snapAccountWriteTimer.Update(int64(accountWrite)) + } else { accountWrite += time.Since(start) snapAccountWriteTimer.Update(int64(accountWrite)) @@ -627,7 +655,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { } var storeOrigin = common.CopyBytes(storeMarker) for { - exhausted, last, err := dl.genRange(acc.Root, append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, onStorage, nil) + exhausted, last, err := dl.generateRange(acc.Root, append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...), "storage", storeOrigin, storageCheckRange, stats, onStorage, nil) if err != nil { return err } @@ -638,19 +666,6 @@ func (dl *diskLayer) generate(stats *generatorStats) { break // special case, the last is 0xffffffff...fff } } - } else { - // If the root is empty, we still need to ensure that any previous snapshot - // storage values are cleared - // TODO: investigate if this can be avoided, this will be very costly since it - // affects every single EOA account - // - Perhaps we can avoid if where codeHash is emptyCode - prefix := append(rawdb.SnapshotStoragePrefix, accountHash.Bytes()...) - keyLen := len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength - if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { - return err - } - accountWrite += time.Since(start) - snapAccountWriteTimer.Update(int64(accountWrite)) } // Some account processed, unmark the marker accMarker = nil @@ -659,7 +674,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { // Global loop for regerating the entire state trie + all layered storage tries. for { - exhausted, last, err := dl.genRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountRange, stats, onAccount, FullAccountRLP) + exhausted, last, err := dl.generateRange(dl.root, rawdb.SnapshotAccountPrefix, "account", accOrigin, accountRange, stats, onAccount, FullAccountRLP) // The procedure it aborted, either by external signal or internal error if err != nil { if abort == nil { // aborted by internal error, wait the signal @@ -682,7 +697,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { // generator anyway to mark the snapshot is complete. journalProgress(batch, nil, stats) if err := batch.Write(); err != nil { - log.Error("Failed to flush batch", "error", err) + log.Error("Failed to flush batch", "err", err) abort = <-dl.genAbort abort <- stats diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 95724e9164c7..3a669085f748 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -667,7 +667,9 @@ func TestGenerateWithManyExtraAccounts(t *testing.T) { // But in the database, we still have the stale storage slots 0x04, 0x05. They are not iterated yet, but the procedure is finished. func TestGenerateWithExtraBeforeAndAfter(t *testing.T) { accountCheckRange = 3 - log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + if false { + enableLogging() + } var ( diskdb = memorydb.New() triedb = trie.NewDatabase(diskdb) @@ -711,7 +713,9 @@ func TestGenerateWithExtraBeforeAndAfter(t *testing.T) { // in the snapshot database, which cannot be parsed back to an account func TestGenerateWithMalformedSnapdata(t *testing.T) { accountCheckRange = 3 - log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + if false { + enableLogging() + } var ( diskdb = memorydb.New() triedb = trie.NewDatabase(diskdb) From 46fc218c04747fcecd214ea9dfbe8f505a6a9626 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 31 Mar 2021 15:52:50 +0800 Subject: [PATCH 65/75] core/state/snapshot: improve error message --- core/state/snapshot/generate.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 6ff89ff067a5..2853899f4420 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -55,6 +55,10 @@ var ( // value is too large, the failure rate of range prove will increase. Otherwise // the the value is too small, the efficiency of the state recovery will decrease. storageCheckRange = 1024 + + // errMissingTrie is returned if the target trie is missing while the generation + // is running. In this case the generation is aborted and wait the new signal. + errMissingTrie = errors.New("missing trie") ) // Metrics in generation @@ -247,7 +251,7 @@ func (result *proofResult) forEach(callback func(key []byte, val []byte) error) // // The proof result will be returned if the range proving is finished, otherwise // the error will be returned to abort the entire procedure. -func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) { +func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) { var ( keys [][]byte vals [][]byte @@ -320,8 +324,8 @@ func (dl *diskLayer) proveRange(root common.Hash, prefix []byte, kind string, or // Snap state is chunked, generate edge proofs for verification. tr, err := trie.New(root, dl.triedb) if err != nil { - log.Error("Missing trie", "root", root, "err", err) - return nil, err + stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker) + return nil, errMissingTrie } // Firstly find out the key of last iterated element. var last []byte @@ -362,7 +366,7 @@ type onStateCallback func(key []byte, val []byte, write bool, delete bool) error // generation, or iterate trie to regenerate state on demand. func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, origin []byte, max int, stats *generatorStats, onState onStateCallback, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { // Use range prover to check the validity of the flat state in the range - result, err := dl.proveRange(root, prefix, kind, origin, max, valueConvertFn) + result, err := dl.proveRange(stats, root, prefix, kind, origin, max, valueConvertFn) if err != nil { return false, nil, err } @@ -408,7 +412,8 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, if tr == nil { tr, err = trie.New(root, dl.triedb) if err != nil { - return false, nil, err + stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker) + return false, nil, errMissingTrie } } var ( From bb149d949337a1986a31d62dc2ad221f646d4cee Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 14 Apr 2021 11:41:14 +0200 Subject: [PATCH 66/75] cmd/geth: rename 'error' to 'err' in logs --- cmd/geth/snapshot.go | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/cmd/geth/snapshot.go b/cmd/geth/snapshot.go index 686101a08c80..1af458af207e 100644 --- a/cmd/geth/snapshot.go +++ b/cmd/geth/snapshot.go @@ -155,7 +155,7 @@ func pruneState(ctx *cli.Context) error { chaindb := utils.MakeChainDatabase(ctx, stack, false) pruner, err := pruner.NewPruner(chaindb, stack.ResolvePath(""), stack.ResolvePath(config.Eth.TrieCleanCacheJournal), ctx.GlobalUint64(utils.BloomFilterSizeFlag.Name)) if err != nil { - log.Error("Failed to open snapshot tree", "error", err) + log.Error("Failed to open snapshot tree", "err", err) return err } if ctx.NArg() > 1 { @@ -166,12 +166,12 @@ func pruneState(ctx *cli.Context) error { if ctx.NArg() == 1 { targetRoot, err = parseRoot(ctx.Args()[0]) if err != nil { - log.Error("Failed to resolve state root", "error", err) + log.Error("Failed to resolve state root", "err", err) return err } } if err = pruner.Prune(targetRoot); err != nil { - log.Error("Failed to prune state", "error", err) + log.Error("Failed to prune state", "err", err) return err } return nil @@ -189,7 +189,7 @@ func verifyState(ctx *cli.Context) error { } snaptree, err := snapshot.New(chaindb, trie.NewDatabase(chaindb), 256, headBlock.Root(), false, false, false) if err != nil { - log.Error("Failed to open snapshot tree", "error", err) + log.Error("Failed to open snapshot tree", "err", err) return err } if ctx.NArg() > 1 { @@ -200,12 +200,12 @@ func verifyState(ctx *cli.Context) error { if ctx.NArg() == 1 { root, err = parseRoot(ctx.Args()[0]) if err != nil { - log.Error("Failed to resolve state root", "error", err) + log.Error("Failed to resolve state root", "err", err) return err } } if err := snaptree.Verify(root); err != nil { - log.Error("Failed to verfiy state", "root", root, "error", err) + log.Error("Failed to verfiy state", "root", root, "err", err) return err } log.Info("Verified the state", "root", root) @@ -236,7 +236,7 @@ func traverseState(ctx *cli.Context) error { if ctx.NArg() == 1 { root, err = parseRoot(ctx.Args()[0]) if err != nil { - log.Error("Failed to resolve state root", "error", err) + log.Error("Failed to resolve state root", "err", err) return err } log.Info("Start traversing the state", "root", root) @@ -247,7 +247,7 @@ func traverseState(ctx *cli.Context) error { triedb := trie.NewDatabase(chaindb) t, err := trie.NewSecure(root, triedb) if err != nil { - log.Error("Failed to open trie", "root", root, "error", err) + log.Error("Failed to open trie", "root", root, "err", err) return err } var ( @@ -262,13 +262,13 @@ func traverseState(ctx *cli.Context) error { accounts += 1 var acc state.Account if err := rlp.DecodeBytes(accIter.Value, &acc); err != nil { - log.Error("Invalid account encountered during traversal", "error", err) + log.Error("Invalid account encountered during traversal", "err", err) return err } if acc.Root != emptyRoot { storageTrie, err := trie.NewSecure(acc.Root, triedb) if err != nil { - log.Error("Failed to open storage trie", "root", acc.Root, "error", err) + log.Error("Failed to open storage trie", "root", acc.Root, "err", err) return err } storageIter := trie.NewIterator(storageTrie.NodeIterator(nil)) @@ -276,7 +276,7 @@ func traverseState(ctx *cli.Context) error { slots += 1 } if storageIter.Err != nil { - log.Error("Failed to traverse storage trie", "root", acc.Root, "error", storageIter.Err) + log.Error("Failed to traverse storage trie", "root", acc.Root, "err", storageIter.Err) return storageIter.Err } } @@ -294,7 +294,7 @@ func traverseState(ctx *cli.Context) error { } } if accIter.Err != nil { - log.Error("Failed to traverse state trie", "root", root, "error", accIter.Err) + log.Error("Failed to traverse state trie", "root", root, "err", accIter.Err) return accIter.Err } log.Info("State is complete", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start))) @@ -326,7 +326,7 @@ func traverseRawState(ctx *cli.Context) error { if ctx.NArg() == 1 { root, err = parseRoot(ctx.Args()[0]) if err != nil { - log.Error("Failed to resolve state root", "error", err) + log.Error("Failed to resolve state root", "err", err) return err } log.Info("Start traversing the state", "root", root) @@ -337,7 +337,7 @@ func traverseRawState(ctx *cli.Context) error { triedb := trie.NewDatabase(chaindb) t, err := trie.NewSecure(root, triedb) if err != nil { - log.Error("Failed to open trie", "root", root, "error", err) + log.Error("Failed to open trie", "root", root, "err", err) return err } var ( @@ -368,13 +368,13 @@ func traverseRawState(ctx *cli.Context) error { accounts += 1 var acc state.Account if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil { - log.Error("Invalid account encountered during traversal", "error", err) + log.Error("Invalid account encountered during traversal", "err", err) return errors.New("invalid account") } if acc.Root != emptyRoot { storageTrie, err := trie.NewSecure(acc.Root, triedb) if err != nil { - log.Error("Failed to open storage trie", "root", acc.Root, "error", err) + log.Error("Failed to open storage trie", "root", acc.Root, "err", err) return errors.New("missing storage trie") } storageIter := storageTrie.NodeIterator(nil) @@ -397,7 +397,7 @@ func traverseRawState(ctx *cli.Context) error { } } if storageIter.Error() != nil { - log.Error("Failed to traverse storage trie", "root", acc.Root, "error", storageIter.Error()) + log.Error("Failed to traverse storage trie", "root", acc.Root, "err", storageIter.Error()) return storageIter.Error() } } @@ -416,7 +416,7 @@ func traverseRawState(ctx *cli.Context) error { } } if accIter.Error() != nil { - log.Error("Failed to traverse state trie", "root", root, "error", accIter.Error()) + log.Error("Failed to traverse state trie", "root", root, "err", accIter.Error()) return accIter.Error() } log.Info("State is complete", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start))) From 78cdadd2585b722fa30a91ab2cc8b62339414a4d Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 14 Apr 2021 11:45:10 +0200 Subject: [PATCH 67/75] core/state/snapshot: fix some review concerns --- core/state/snapshot/conversion.go | 2 +- core/state/snapshot/generate.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go index 2f27eac03230..f70cbf1e686b 100644 --- a/core/state/snapshot/conversion.go +++ b/core/state/snapshot/conversion.go @@ -322,7 +322,7 @@ func generateTrieRoot(db ethdb.KeyValueWriter, it Iterator, account common.Hash, return } if !bytes.Equal(account.Root, subroot.Bytes()) { - results <- fmt.Errorf("invalid subroot(%x), want %x, got %x", hash, account.Root, subroot) + results <- fmt.Errorf("invalid subroot(path %x), want %x, have %x", hash, account.Root, subroot) return } results <- nil diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 2853899f4420..57c35e091289 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -207,7 +207,7 @@ func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorSta } // proofResult contains the output of range proving which can be used -// for further processing no matter it's successful or not. +// for further processing regardless if it is successful or not. type proofResult struct { keys [][]byte // The key set of all elements being iterated, even proving is failed vals [][]byte // The val set of all elements being iterated, even proving is failed @@ -222,7 +222,7 @@ func (result *proofResult) valid() bool { return result.proofErr == nil } -// last returns the last verified element key no matter the range proof is +// last returns the last verified element key regardless of whether the range proof is // successful or not. Nil is returned if nothing involved in the proving. func (result *proofResult) last() []byte { var last []byte @@ -244,7 +244,7 @@ func (result *proofResult) forEach(callback func(key []byte, val []byte) error) return nil } -// proveRange proves the state segment with particular prefix is "valid". +// proveRange proves the snapshot segment with particular prefix is "valid". // The iteration start point will be assigned if the iterator is restored from // the last interruption. Max will be assigned in order to limit the maximum // amount of data involved in each iteration. @@ -346,7 +346,7 @@ func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix return &proofResult{keys: keys, vals: vals, diskMore: diskMore, trieMore: false, proofErr: err, tr: tr}, nil } } - // Verify the state segment with range prover, ensure that all flat states + // Verify the snapshot segment with range prover, ensure that all flat states // in this range correspond to merkle trie. _, _, _, cont, err := trie.VerifyRangeProof(root, origin, last, keys, vals, proof) return &proofResult{keys: keys, vals: vals, diskMore: diskMore, trieMore: cont, proofErr: err, tr: tr}, nil From 1693ca3269de77b921c47585144a551bc798e16e Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 14 Apr 2021 13:39:08 +0200 Subject: [PATCH 68/75] core/state/snapshot, eth/protocols/snap: clear snapshot marker when starting/resuming snap sync --- core/state/snapshot/generate.go | 10 ++++++++++ eth/protocols/snap/sync.go | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 57c35e091289..a71a31500723 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -144,6 +144,16 @@ func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) { log.Info(msg, ctx...) } +// ClearSnapshotMarker sets the snapshot marker to zero, meaning that snapshots +// are not usable. +func ClearSnapshotMarker(diskdb ethdb.KeyValueStore) { + batch := diskdb.NewBatch() + journalProgress(batch, []byte{}, nil) + if err := batch.Write(); err != nil { + log.Crit("Failed to write initialized state marker", "err", err) + } +} + // generateSnapshot regenerates a brand new snapshot based on an existing state // database and head block asynchronously. The snapshot is returned immediately // and generation is continued in the background until done. diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index b703f5d7c908..f4a5efa26cbd 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -567,6 +567,11 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { log.Debug("Snapshot sync already completed") return nil } + // If sync is still not finished, we need to ensure that any marker is wiped. + // Otherwise, it may happen that requests for e.g. genesis-data is delivered + // from the snapshot data, instead of from the trie + snapshot.ClearSnapshotMarker(s.db) + defer func() { // Persist any progress, independent of failure for _, task := range s.tasks { s.forwardAccountTask(task) From c2e79524fcb0e742c0ba93235a1f3ed776b05c17 Mon Sep 17 00:00:00 2001 From: rjl493456442 Date: Wed, 14 Apr 2021 19:44:54 +0800 Subject: [PATCH 69/75] core: add error log --- core/state/snapshot/generate.go | 1 + 1 file changed, 1 insertion(+) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index a71a31500723..31fa36ff6edd 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -297,6 +297,7 @@ func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix // Here append the original value to ensure that the number of key and // value are the same. vals = append(vals, common.CopyBytes(iter.Value())) + log.Error("Failed to convert account state data", "err", err) } else { vals = append(vals, val) } From 1cc3ca3bb7a430897a3a1e1fe9698c48ba353200 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 14 Apr 2021 14:03:05 +0200 Subject: [PATCH 70/75] core/state/snapshot: use proper timers for metrics collection --- core/state/snapshot/generate.go | 66 +++++++++++++-------------------- 1 file changed, 26 insertions(+), 40 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 31fa36ff6edd..26e3d24da0ef 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -74,26 +74,22 @@ var ( snapSuccessfulRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/success", nil) snapFailedRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/failure", nil) - snapAccountProveTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/account/prove", nil) - snapAccountTrieReadTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/account/trieread", nil) - snapAccountSnapReadTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/account/snapread", nil) - snapAccountWriteTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/account/write", nil) - snapStorageProveTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/storage/prove", nil) - snapStorageTrieReadTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/storage/trieread", nil) - snapStorageSnapReadTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/storage/snapread", nil) - snapStorageWriteTimer = metrics.NewRegisteredGauge("state/snapshot/generation/duration/storage/write", nil) -) - -// Global timer for metrics -var ( - accountProving time.Duration // The total time spent on the account proving - accountTrieRead time.Duration // The total time spent on the account trie iteration - accountSnapRead time.Duration // The total time spent on the snapshot account iteration - accountWrite time.Duration // The total time spent on writing/updating/deleting accounts - storageProving time.Duration // The total time spent on the storage proving - storageTrieRead time.Duration // The total time spent on the storage trie iteration - storageSnapRead time.Duration // The total time spent on the snapshot storage iteration - storageWrite time.Duration // The total time spent on writing/updating/deleting storages + // snapAccountProveTimer measures time spent on the account proving + snapAccountProveTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/account/prove", nil) + // snapAccountTrieReadTimer measures time spent on the account trie iteration + snapAccountTrieReadTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/account/trieread", nil) + // snapAccountSnapReadTimer measues time spent on the snapshot account iteration + snapAccountSnapReadTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/account/snapread", nil) + // snapAccountWriteTimer measures time spent on writing/updating/deleting accounts + snapAccountWriteTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/account/write", nil) + // snapStorageProveTimer measures time spent on storage proving + snapStorageProveTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/storage/prove", nil) + // snapStorageTrieReadTimer measures time spent on the storage trie iteration + snapStorageTrieReadTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/storage/trieread", nil) + // snapStorageSnapReadTimer measures time spent on the snapshot storage iteration + snapStorageSnapReadTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/storage/snapread", nil) + // snapStorageWriteTimer measures time spent on writing/updating/deleting storages + snapStorageWriteTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/storage/write", nil) ) // generatorStats is a collection of statistics gathered by the snapshot generator @@ -305,19 +301,15 @@ func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix } // Update metrics for database iteration and merkle proving if kind == "storage" { - storageSnapRead += time.Since(start) - snapStorageSnapReadTimer.Update(int64(storageSnapRead)) + snapStorageSnapReadTimer.Update(time.Since(start)) } else { - accountSnapRead += time.Since(start) - snapAccountSnapReadTimer.Update(int64(accountSnapRead)) + snapAccountSnapReadTimer.Update(time.Since(start)) } defer func(start time.Time) { if kind == "storage" { - storageProving += time.Since(start) - snapStorageProveTimer.Update(int64(storageProving)) + snapStorageProveTimer.Update(time.Since(start)) } else { - accountProving += time.Since(start) - snapAccountProveTimer.Update(int64(accountProving)) + snapAccountProveTimer.Update(time.Since(start)) } }(time.Now()) @@ -499,11 +491,9 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, // Update metrics for counting trie iteration if kind == "storage" { - storageTrieRead += time.Since(start) - internal - snapStorageTrieReadTimer.Update(int64(storageTrieRead)) + snapStorageTrieReadTimer.Update(time.Since(start) - internal) } else { - accountTrieRead += time.Since(start) - internal - snapAccountTrieReadTimer.Update(int64(accountTrieRead)) + snapAccountTrieReadTimer.Update(time.Since(start) - internal) } logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last), "count", count, "created", created, "updated", updated, "untouched", untouched, "deleted", deleted) @@ -582,8 +572,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { return err } - accountWrite += time.Since(start) - snapAccountWriteTimer.Update(int64(accountWrite)) + snapAccountWriteTimer.Update(time.Since(start)) return nil } // Retrieve the current account and flatten it into the internal format @@ -633,11 +622,9 @@ func (dl *diskLayer) generate(stats *generatorStats) { if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { return err } - accountWrite += time.Since(start) - snapAccountWriteTimer.Update(int64(accountWrite)) + snapAccountWriteTimer.Update(time.Since(start)) } else { - accountWrite += time.Since(start) - snapAccountWriteTimer.Update(int64(accountWrite)) + snapAccountWriteTimer.Update(time.Since(start)) var storeMarker []byte if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { @@ -645,8 +632,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { } onStorage := func(key []byte, val []byte, write bool, delete bool) error { defer func(start time.Time) { - storageWrite += time.Since(start) - snapStorageWriteTimer.Update(int64(storageWrite)) + snapStorageWriteTimer.Update(time.Since(start)) }(time.Now()) if delete { From 1a6143f6803856c510c8eda505ed33e980d12acd Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 14 Apr 2021 14:51:27 +0200 Subject: [PATCH 71/75] core/state/snapshot: address some review concerns --- core/state/snapshot/generate.go | 45 ++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 26e3d24da0ef..fcf8cfa883d7 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -320,9 +320,13 @@ func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix stackTr.TryUpdate(key, common.CopyBytes(vals[i])) } if gotRoot := stackTr.Hash(); gotRoot != root { - return &proofResult{keys: keys, vals: vals, diskMore: false, trieMore: false, proofErr: errors.New("wrong root")}, nil + return &proofResult{ + keys: keys, + vals: vals, + proofErr: fmt.Errorf("wrong root: have %#x want %#x", gotRoot, root), + }, nil } - return &proofResult{keys: keys, vals: vals, diskMore: false, trieMore: false, proofErr: nil}, nil + return &proofResult{keys: keys, vals: vals}, nil } // Snap state is chunked, generate edge proofs for verification. tr, err := trie.New(root, dl.triedb) @@ -341,18 +345,37 @@ func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix } if err := tr.Prove(origin, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "origin", origin, "err", err) - return &proofResult{keys: keys, vals: vals, diskMore: diskMore, trieMore: false, proofErr: err, tr: tr}, nil + return &proofResult{ + keys: keys, + vals: vals, + diskMore: diskMore, + proofErr: err, + tr: tr, + }, nil } if last != nil { if err := tr.Prove(last, 0, proof); err != nil { log.Debug("Failed to prove range", "kind", kind, "last", last, "err", err) - return &proofResult{keys: keys, vals: vals, diskMore: diskMore, trieMore: false, proofErr: err, tr: tr}, nil + return &proofResult{ + keys: keys, + vals: vals, + diskMore: diskMore, + proofErr: err, + tr: tr, + }, nil } } // Verify the snapshot segment with range prover, ensure that all flat states // in this range correspond to merkle trie. _, _, _, cont, err := trie.VerifyRangeProof(root, origin, last, keys, vals, proof) - return &proofResult{keys: keys, vals: vals, diskMore: diskMore, trieMore: cont, proofErr: err, tr: tr}, nil + return &proofResult{ + keys: keys, + vals: vals, + diskMore: diskMore, + trieMore: cont, + proofErr: err, + tr: tr}, + nil } // onStateCallback is a function that is called by generateRange, when processing a range of @@ -433,7 +456,6 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, // timers start = time.Now() - istart time.Time internal time.Duration ) for iter.Next() { @@ -441,20 +463,19 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, trieMore = true break } - count += 1 - + count++ write := true created++ for len(kvkeys) > 0 { if cmp := bytes.Compare(kvkeys[0], iter.Key); cmp < 0 { // delete the key - istart = time.Now() + istart := time.Now() if err := onState(kvkeys[0], nil, false, true); err != nil { return false, nil, err } kvkeys = kvkeys[1:] kvvals = kvvals[1:] - deleted += 1 + deleted++ internal += time.Since(istart) continue } else if cmp == 0 { @@ -470,7 +491,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, } break } - istart = time.Now() + istart := time.Now() if err := onState(iter.Key, iter.Value, write, false); err != nil { return false, nil, err } @@ -480,7 +501,7 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, return false, nil, iter.Err } // Delete all stale snapshot states remaining - istart = time.Now() + istart := time.Now() for _, key := range kvkeys { if err := onState(key, nil, false, true); err != nil { return false, nil, err From 920e2ebce4fe5d6ac17d3a18968b0ea41cb0e471 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 14 Apr 2021 14:57:36 +0200 Subject: [PATCH 72/75] eth/protocols/snap: improved log message --- eth/protocols/snap/sync.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index f4a5efa26cbd..88c8e99e24c2 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2697,5 +2697,7 @@ func (s *Syncer) reportHealProgress(force bool) { bytecode = fmt.Sprintf("%d@%v", s.bytecodeHealSynced, s.bytecodeHealBytes.TerminalString()) ) log.Info("State heal in progress", "nodes", trienode, "codes", bytecode, - "accounts", s.accountHealed, "bytes", s.accountHealedBytes, "storages", s.storageHealed, "bytes", s.storageHealedBytes, "pending", s.healer.scheduler.Pending()) + "accounts", s.accountHealed, "account size", s.accountHealedBytes, + "storages", s.storageHealed, "storage size", s.storageHealedBytes, + "pending", s.healer.scheduler.Pending()) } From fa8b4679f92f05d2818153ecb05f30bef3b52fe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Wed, 14 Apr 2021 16:23:05 +0300 Subject: [PATCH 73/75] eth/protocols/snap: fix heal logs to condense infos --- eth/protocols/snap/sync.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 88c8e99e24c2..22b0c8604dd0 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2695,9 +2695,9 @@ func (s *Syncer) reportHealProgress(force bool) { var ( trienode = fmt.Sprintf("%d@%v", s.trienodeHealSynced, s.trienodeHealBytes.TerminalString()) bytecode = fmt.Sprintf("%d@%v", s.bytecodeHealSynced, s.bytecodeHealBytes.TerminalString()) + accounts = fmt.Sprintf("%d@%v", s.accountHealed, s.accountHealedBytes.TerminalString()) + storage = fmt.Sprintf("%d@%v", s.storageHealed, s.storageHealedBytes.TerminalString()) ) - log.Info("State heal in progress", "nodes", trienode, "codes", bytecode, - "accounts", s.accountHealed, "account size", s.accountHealedBytes, - "storages", s.storageHealed, "storage size", s.storageHealedBytes, - "pending", s.healer.scheduler.Pending()) + log.Info("State heal in progress", "accounts", accounts, "slots", storage, + "codes", bytecode, "nodes", trienode, "pending", s.healer.scheduler.Pending()) } From 3a548307ffc279c52a03899da15441bebe286ed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Wed, 14 Apr 2021 21:35:17 +0300 Subject: [PATCH 74/75] core/state/snapshot: wait for generator termination before restarting --- core/state/snapshot/snapshot.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index 842946399481..710ba4d4c227 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -662,8 +662,9 @@ func (t *Tree) Rebuild(root common.Hash) { case *diskLayer: // If the base layer is generating, abort it and save if layer.genAbort != nil { - abort := make(chan *generatorStats, 1) // Discard the stats + abort := make(chan *generatorStats) layer.genAbort <- abort + <-abort } // Layer should be inactive now, mark it as stale layer.lock.Lock() From 12a1a850b7002b69721861978470c45789873960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Wed, 14 Apr 2021 21:42:10 +0300 Subject: [PATCH 75/75] core/state/snapshot: revert timers to counters to track total time --- core/state/snapshot/generate.go | 52 ++++++++++++++++----------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index fcf8cfa883d7..98c8d42a1a6c 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -74,22 +74,22 @@ var ( snapSuccessfulRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/success", nil) snapFailedRangeProofMeter = metrics.NewRegisteredMeter("state/snapshot/generation/proof/failure", nil) - // snapAccountProveTimer measures time spent on the account proving - snapAccountProveTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/account/prove", nil) - // snapAccountTrieReadTimer measures time spent on the account trie iteration - snapAccountTrieReadTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/account/trieread", nil) - // snapAccountSnapReadTimer measues time spent on the snapshot account iteration - snapAccountSnapReadTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/account/snapread", nil) - // snapAccountWriteTimer measures time spent on writing/updating/deleting accounts - snapAccountWriteTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/account/write", nil) - // snapStorageProveTimer measures time spent on storage proving - snapStorageProveTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/storage/prove", nil) - // snapStorageTrieReadTimer measures time spent on the storage trie iteration - snapStorageTrieReadTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/storage/trieread", nil) - // snapStorageSnapReadTimer measures time spent on the snapshot storage iteration - snapStorageSnapReadTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/storage/snapread", nil) - // snapStorageWriteTimer measures time spent on writing/updating/deleting storages - snapStorageWriteTimer = metrics.NewRegisteredTimer("state/snapshot/generation/duration/storage/write", nil) + // snapAccountProveCounter measures time spent on the account proving + snapAccountProveCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/account/prove", nil) + // snapAccountTrieReadCounter measures time spent on the account trie iteration + snapAccountTrieReadCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/account/trieread", nil) + // snapAccountSnapReadCounter measues time spent on the snapshot account iteration + snapAccountSnapReadCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/account/snapread", nil) + // snapAccountWriteCounter measures time spent on writing/updating/deleting accounts + snapAccountWriteCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/account/write", nil) + // snapStorageProveCounter measures time spent on storage proving + snapStorageProveCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/storage/prove", nil) + // snapStorageTrieReadCounter measures time spent on the storage trie iteration + snapStorageTrieReadCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/storage/trieread", nil) + // snapStorageSnapReadCounter measures time spent on the snapshot storage iteration + snapStorageSnapReadCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/storage/snapread", nil) + // snapStorageWriteCounter measures time spent on writing/updating/deleting storages + snapStorageWriteCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/storage/write", nil) ) // generatorStats is a collection of statistics gathered by the snapshot generator @@ -301,15 +301,15 @@ func (dl *diskLayer) proveRange(stats *generatorStats, root common.Hash, prefix } // Update metrics for database iteration and merkle proving if kind == "storage" { - snapStorageSnapReadTimer.Update(time.Since(start)) + snapStorageSnapReadCounter.Inc(time.Since(start).Nanoseconds()) } else { - snapAccountSnapReadTimer.Update(time.Since(start)) + snapAccountSnapReadCounter.Inc(time.Since(start).Nanoseconds()) } defer func(start time.Time) { if kind == "storage" { - snapStorageProveTimer.Update(time.Since(start)) + snapStorageProveCounter.Inc(time.Since(start).Nanoseconds()) } else { - snapAccountProveTimer.Update(time.Since(start)) + snapAccountProveCounter.Inc(time.Since(start).Nanoseconds()) } }(time.Now()) @@ -512,9 +512,9 @@ func (dl *diskLayer) generateRange(root common.Hash, prefix []byte, kind string, // Update metrics for counting trie iteration if kind == "storage" { - snapStorageTrieReadTimer.Update(time.Since(start) - internal) + snapStorageTrieReadCounter.Inc((time.Since(start) - internal).Nanoseconds()) } else { - snapAccountTrieReadTimer.Update(time.Since(start) - internal) + snapAccountTrieReadCounter.Inc((time.Since(start) - internal).Nanoseconds()) } logger.Debug("Regenerated state range", "root", root, "last", hexutil.Encode(last), "count", count, "created", created, "updated", updated, "untouched", untouched, "deleted", deleted) @@ -593,7 +593,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { return err } - snapAccountWriteTimer.Update(time.Since(start)) + snapAccountWriteCounter.Inc(time.Since(start).Nanoseconds()) return nil } // Retrieve the current account and flatten it into the internal format @@ -643,9 +643,9 @@ func (dl *diskLayer) generate(stats *generatorStats) { if err := wipeKeyRange(dl.diskdb, "storage", prefix, nil, nil, keyLen, snapWipedStorageMeter, false); err != nil { return err } - snapAccountWriteTimer.Update(time.Since(start)) + snapAccountWriteCounter.Inc(time.Since(start).Nanoseconds()) } else { - snapAccountWriteTimer.Update(time.Since(start)) + snapAccountWriteCounter.Inc(time.Since(start).Nanoseconds()) var storeMarker []byte if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { @@ -653,7 +653,7 @@ func (dl *diskLayer) generate(stats *generatorStats) { } onStorage := func(key []byte, val []byte, write bool, delete bool) error { defer func(start time.Time) { - snapStorageWriteTimer.Update(time.Since(start)) + snapStorageWriteCounter.Inc(time.Since(start).Nanoseconds()) }(time.Now()) if delete {