From 77a4294a5fa5d530133a4513336f3d312c52cc1b Mon Sep 17 00:00:00 2001 From: Gaston Ponti Date: Wed, 5 May 2021 19:26:27 -0300 Subject: [PATCH] Stabilize Snapshots (#1529) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cherry pick bug fixes from upstream for snapshots, which will enable higher transaction throughput. It also enables snapshots by default (which is one of the commits pulled from upstream). Upstream commits included: 68754f393 cmd/utils: grant snapshot cache to trie if disabled (#21416) 3ee91b9f2 core/state/snapshot: reduce disk layer depth during generation a15d71a25 core/state/snapshot: stop generator if it hits missing trie nodes (#21649) 43c278cdf core/state: disable snapshot iteration if it's not fully constructed (#21682) b63e3c37a core: improve snapshot journal recovery (#21594) e6402677c core/state/snapshot: fix journal recovery from generating old journal (#21775) 7b7b327ff core/state/snapshot: update generator marker in sync with flushes 167ff563d core/state/snapshot: gethring -> gathering typo (#22104) d2e1b17f1 snapshot, trie: fixed typos, mostly in snapshot pkg (#22133) c4deebbf1 core/state/snapshot: add generation logs to storage too 5e9f5ca5d core/state/snapshot: write snapshot generator in batch (#22163) 18145adf0 core/state: maintain one more diff layer (#21730) 04a72260c snapshot: merge loops for better performance (#22160) 994cdc69c cmd/utils: enable snapshots by default 9ec332989 core/state/snapshot: ensure Cap retains a min number of layers 52e5c38aa core/state: copy the snap when copying the state (#22340) a31f6d54d core/state/snapshot: fix panic on missing parent 61ff3e86b core/state/snapshot, ethdb: track deletions more accurately (#22582) c79fc209c core/state/snapshot: fix data race in diff layer (#22540) Other changes Commit f9b5530 (not from upstream) fixes an incorrect default DatabaseCache value due to an earlier bad merge. Tested Automated tests Testing on a private testnet Backwards compatibility Enabling snapshots by default is a breaking change in terms of the CLI flags, but will not cause backwards incompatibility between the node and other nodes. Co-authored-by: Péter Szilágyi Co-authored-by: gary rong Co-authored-by: Melvin Junhee Woo Co-authored-by: Martin Holst Swende Co-authored-by: Edgar Aroutiounian --- cmd/utils/flags.go | 9 +- core/blockchain.go | 97 ++- core/blockchain_repair_test.go | 243 +++++- core/blockchain_sethead_test.go | 249 +++++- core/blockchain_snapshot_test.go | 1024 ++++++++++++++++++++++++ core/rawdb/accessors_snapshot.go | 57 ++ core/rawdb/schema.go | 6 + core/state/snapshot/conversion.go | 2 +- core/state/snapshot/difflayer.go | 38 +- core/state/snapshot/difflayer_test.go | 2 +- core/state/snapshot/disklayer.go | 2 +- core/state/snapshot/disklayer_test.go | 76 ++ core/state/snapshot/generate.go | 115 ++- core/state/snapshot/generate_test.go | 190 +++++ core/state/snapshot/iterator.go | 4 +- core/state/snapshot/iterator_binary.go | 8 +- core/state/snapshot/iterator_fast.go | 6 +- core/state/snapshot/journal.go | 222 ++++- core/state/snapshot/snapshot.go | 234 ++++-- core/state/snapshot/snapshot_test.go | 53 +- core/state/statedb.go | 33 +- eth/config.go | 6 +- ethdb/leveldb/leveldb.go | 2 +- ethdb/memorydb/memorydb.go | 2 +- tests/state_test_util.go | 2 +- trie/database.go | 2 +- 26 files changed, 2402 insertions(+), 282 deletions(-) create mode 100644 core/blockchain_snapshot_test.go create mode 100644 core/state/snapshot/generate_test.go diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 86eb440ce676..9adfbf64ff9a 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -214,9 +214,9 @@ var ( Usage: `Blockchain garbage collection mode ("full", "archive")`, Value: "full", } - SnapshotFlag = cli.BoolFlag{ + SnapshotFlag = cli.BoolTFlag{ Name: "snapshot", - Usage: `Enables snapshot-database mode -- experimental work in progress feature`, + Usage: `Enables snapshot-database mode (default = enable)`, } TxLookupLimitFlag = cli.Int64Flag{ Name: "txlookuplimit", @@ -1695,7 +1695,8 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *eth.Config) { if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheSnapshotFlag.Name) { cfg.SnapshotCache = ctx.GlobalInt(CacheFlag.Name) * ctx.GlobalInt(CacheSnapshotFlag.Name) / 100 } - if !ctx.GlobalIsSet(SnapshotFlag.Name) { + if !ctx.GlobalBool(SnapshotFlag.Name) { + cfg.TrieCleanCache += cfg.SnapshotCache cfg.SnapshotCache = 0 // Disabled } if ctx.GlobalIsSet(DocRootFlag.Name) { @@ -1946,7 +1947,7 @@ func MakeChain(ctx *cli.Context, stack *node.Node, readOnly bool) (chain *core.B TrieTimeLimit: eth.DefaultConfig.TrieTimeout, SnapshotLimit: eth.DefaultConfig.SnapshotCache, } - if !ctx.GlobalIsSet(SnapshotFlag.Name) { + if !ctx.GlobalBool(SnapshotFlag.Name) { cache.SnapshotLimit = 0 // Disabled } if ctx.GlobalIsSet(CacheFlag.Name) || ctx.GlobalIsSet(CacheTrieFlag.Name) { diff --git a/core/blockchain.go b/core/blockchain.go index 8c3863957653..bf09157d5ce6 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -203,9 +203,10 @@ type BlockChain struct { processor Processor // Block transaction processor interface vmConfig vm.Config - badBlocks *lru.Cache // Bad block cache - shouldPreserve func(*types.Block) bool // Function used to determine whether should preserve the given block. - terminateInsert func(common.Hash, uint64) bool // Testing hook used to terminate ancient receipt chain insertion. + badBlocks *lru.Cache // Bad block cache + shouldPreserve func(*types.Block) bool // Function used to determine whether should preserve the given block. + terminateInsert func(common.Hash, uint64) bool // Testing hook used to terminate ancient receipt chain insertion. + writeLegacyJournal bool // Testing flag used to flush the snapshot journal in legacy format. } // NewBlockChain returns a fully initialised block chain using information @@ -277,9 +278,29 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par // Make sure the state associated with the block is available head := bc.CurrentBlock() if _, err := state.New(head.Root(), bc.stateCache, bc.snaps); err != nil { - log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash()) - if err := bc.SetHead(head.NumberU64()); err != nil { - return nil, err + // Head state is missing, before the state recovery, find out the + // disk layer point of snapshot(if it's enabled). Make sure the + // rewound point is lower than disk layer. + var diskRoot common.Hash + if bc.cacheConfig.SnapshotLimit > 0 { + diskRoot = rawdb.ReadSnapshotRoot(bc.db) + } + if diskRoot != (common.Hash{}) { + log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash(), "snaproot", diskRoot) + + snapDisk, err := bc.SetHeadBeyondRoot(head.NumberU64(), diskRoot) + if err != nil { + return nil, err + } + // Chain rewound, persist old snapshot number to indicate recovery procedure + if snapDisk != 0 { + rawdb.WriteSnapshotRecoveryNumber(bc.db, snapDisk) + } + } else { + log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash()) + if err := bc.SetHead(head.NumberU64()); err != nil { + return nil, err + } } } // Ensure that a previous crash in SetHead doesn't leave extra ancients @@ -335,7 +356,18 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par } // Load any existing snapshot, regenerating it if loading failed if bc.cacheConfig.SnapshotLimit > 0 { - bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, bc.CurrentBlock().Root(), !bc.cacheConfig.SnapshotWait) + // If the chain was rewound past the snapshot persistent layer (causing + // a recovery block number to be persisted to disk), check if we're still + // in recovery mode and in that case, don't invalidate the snapshot on a + // head mismatch. + var recover bool + + head := bc.CurrentBlock() + if layer := rawdb.ReadSnapshotRecoveryNumber(bc.db); layer != nil && *layer > head.NumberU64() { + log.Warn("Enabling snapshot recovery", "chainhead", head.NumberU64(), "diskbase", *layer) + recover = true + } + bc.snaps = snapshot.New(bc.db, bc.stateCache.TrieDB(), bc.cacheConfig.SnapshotLimit, head.Root(), !bc.cacheConfig.SnapshotWait, recover) } // Take ownership of this particular state go bc.update() @@ -435,9 +467,25 @@ func (bc *BlockChain) loadLastState() error { // was fast synced or full synced and in which state, the method will try to // delete minimal data from disk whilst retaining chain consistency. func (bc *BlockChain) SetHead(head uint64) error { + _, err := bc.SetHeadBeyondRoot(head, common.Hash{}) + return err +} + +// SetHeadBeyondRoot rewinds the local chain to a new head with the extra condition +// that the rewind must pass the specified state root. This method is meant to be +// used when rewiding with snapshots enabled to ensure that we go back further than +// persistent disk layer. Depending on whether the node was fast synced or full, and +// in which state, the method will try to delete minimal data from disk whilst +// retaining chain consistency. +// +// The method returns the block number where the requested root cap was found. +func (bc *BlockChain) SetHeadBeyondRoot(head uint64, root common.Hash) (uint64, error) { bc.chainmu.Lock() defer bc.chainmu.Unlock() + // Track the block number of the requested root hash + var rootNumber uint64 // (no root == always 0) + // Retrieve the last pivot block to short circuit rollbacks beyond it and the // current freezer limit to start nuking id underflown pivot := rawdb.ReadLastPivotNumber(bc.db) @@ -453,8 +501,16 @@ func (bc *BlockChain) SetHead(head uint64) error { log.Error("Gap in the chain, rewinding to genesis", "number", header.Number, "hash", header.Hash()) newHeadBlock = bc.genesisBlock } else { - // Block exists, keep rewinding until we find one with state + // Block exists, keep rewinding until we find one with state, + // keeping rewinding until we exceed the optional threshold + // root hash + beyondRoot := (root == common.Hash{}) // Flag whether we're beyond the requested root (no root, always true) + for { + // If a root threshold was requested but not yet crossed, check + if root != (common.Hash{}) && !beyondRoot && newHeadBlock.Root() == root { + beyondRoot, rootNumber = true, newHeadBlock.NumberU64() + } if _, err := state.New(newHeadBlock.Root(), bc.stateCache, bc.snaps); err != nil { log.Info("Block state missing, rewinding further", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) if pivot == nil || newHeadBlock.NumberU64() > *pivot { @@ -465,8 +521,12 @@ func (bc *BlockChain) SetHead(head uint64) error { newHeadBlock = bc.genesisBlock } } - log.Info("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) - break + if beyondRoot || newHeadBlock.NumberU64() == 0 { + log.Info("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash()) + break + } + log.Info("Skipping block with threshold state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "root", newHeadBlock.Root()) + newHeadBlock = bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1) // Keep rewinding } } rawdb.WriteHeadBlockHash(db, newHeadBlock.Hash()) @@ -547,7 +607,7 @@ func (bc *BlockChain) SetHead(head uint64) error { bc.txLookupCache.Purge() bc.futureBlocks.Purge() - return bc.loadLastState() + return rootNumber, bc.loadLastState() } // FastSyncCommitHead sets the current head block to the one defined by the hash @@ -590,6 +650,11 @@ func (bc *BlockChain) Snapshot() *snapshot.Tree { return bc.snaps } +// Add this to solve conflicts due to cherry-picking +func (bc *BlockChain) Snapshots() *snapshot.Tree { + return bc.Snapshot() +} + // CurrentFastBlock retrieves the current fast-sync head block of the canonical // chain. The block is retrieved from the blockchain's internal cache. func (bc *BlockChain) CurrentFastBlock() *types.Block { @@ -899,8 +964,14 @@ func (bc *BlockChain) Stop() { var snapBase common.Hash if bc.snaps != nil { var err error - if snapBase, err = bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil { - log.Error("Failed to journal state snapshot", "err", err) + if bc.writeLegacyJournal { + if snapBase, err = bc.snaps.LegacyJournal(bc.CurrentBlock().Root()); err != nil { + log.Error("Failed to journal state snapshot", "err", err) + } + } else { + if snapBase, err = bc.snaps.Journal(bc.CurrentBlock().Root()); err != nil { + log.Error("Failed to journal state snapshot", "err", err) + } } } // Ensure the state of a recent block is also stored to disk before exiting. diff --git a/core/blockchain_repair_test.go b/core/blockchain_repair_test.go index f104a93061aa..478248070344 100644 --- a/core/blockchain_repair_test.go +++ b/core/blockchain_repair_test.go @@ -25,6 +25,7 @@ import ( "io/ioutil" "os" "testing" + "time" "github.com/celo-org/celo-blockchain/common" mockEngine "github.com/celo-org/celo-blockchain/consensus/consensustest" @@ -38,7 +39,10 @@ import ( // committed to disk and then the process crashed. In this case we expect the full // chain to be rolled back to the committed block, but the chain data itself left // in the database for replaying. -func TestShortRepair(t *testing.T) { +func TestShortRepair(t *testing.T) { testShortRepair(t, false) } +func TestShortRepairWithSnapshots(t *testing.T) { testShortRepair(t, true) } + +func testShortRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -68,14 +72,17 @@ func TestShortRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain where the fast sync pivot point was // already committed, after which the process crashed. In this case we expect the full // chain to be rolled back to the committed block, but the chain data itself left in // the database for replaying. -func TestShortFastSyncedRepair(t *testing.T) { +func TestShortFastSyncedRepair(t *testing.T) { testShortFastSyncedRepair(t, false) } +func TestShortFastSyncedRepairWithSnapshots(t *testing.T) { testShortFastSyncedRepair(t, true) } + +func testShortFastSyncedRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -105,14 +112,17 @@ func TestShortFastSyncedRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain where the fast sync pivot point was // not yet committed, but the process crashed. In this case we expect the chain to // detect that it was fast syncing and not delete anything, since we can just pick // up directly where we left off. -func TestShortFastSyncingRepair(t *testing.T) { +func TestShortFastSyncingRepair(t *testing.T) { testShortFastSyncingRepair(t, false) } +func TestShortFastSyncingRepairWithSnapshots(t *testing.T) { testShortFastSyncingRepair(t, true) } + +func testShortFastSyncingRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -142,7 +152,7 @@ func TestShortFastSyncingRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where a @@ -150,7 +160,10 @@ func TestShortFastSyncingRepair(t *testing.T) { // test scenario the side chain is below the committed block. In this case we expect // the canonical chain to be rolled back to the committed block, but the chain data // itself left in the database for replaying. -func TestShortOldForkedRepair(t *testing.T) { +func TestShortOldForkedRepair(t *testing.T) { testShortOldForkedRepair(t, false) } +func TestShortOldForkedRepairWithSnapshots(t *testing.T) { testShortOldForkedRepair(t, true) } + +func testShortOldForkedRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -182,7 +195,7 @@ func TestShortOldForkedRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where @@ -191,6 +204,13 @@ func TestShortOldForkedRepair(t *testing.T) { // this case we expect the canonical chain to be rolled back to the committed block, // but the chain data itself left in the database for replaying. func TestShortOldForkedFastSyncedRepair(t *testing.T) { + testShortOldForkedFastSyncedRepair(t, false) +} +func TestShortOldForkedFastSyncedRepairWithSnapshots(t *testing.T) { + testShortOldForkedFastSyncedRepair(t, true) +} + +func testShortOldForkedFastSyncedRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -222,7 +242,7 @@ func TestShortOldForkedFastSyncedRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where @@ -231,6 +251,13 @@ func TestShortOldForkedFastSyncedRepair(t *testing.T) { // the chain to detect that it was fast syncing and not delete anything, since we // can just pick up directly where we left off. func TestShortOldForkedFastSyncingRepair(t *testing.T) { + testShortOldForkedFastSyncingRepair(t, false) +} +func TestShortOldForkedFastSyncingRepairWithSnapshots(t *testing.T) { + testShortOldForkedFastSyncingRepair(t, true) +} + +func testShortOldForkedFastSyncingRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -262,7 +289,7 @@ func TestShortOldForkedFastSyncingRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where a @@ -270,7 +297,10 @@ func TestShortOldForkedFastSyncingRepair(t *testing.T) { // test scenario the side chain reaches above the committed block. In this case we // expect the canonical chain to be rolled back to the committed block, but the // chain data itself left in the database for replaying. -func TestShortNewlyForkedRepair(t *testing.T) { +func TestShortNewlyForkedRepair(t *testing.T) { testShortNewlyForkedRepair(t, false) } +func TestShortNewlyForkedRepairWithSnapshots(t *testing.T) { testShortNewlyForkedRepair(t, true) } + +func testShortNewlyForkedRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3->S4->S5->S6 @@ -302,7 +332,7 @@ func TestShortNewlyForkedRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where @@ -311,6 +341,13 @@ func TestShortNewlyForkedRepair(t *testing.T) { // In this case we expect the canonical chain to be rolled back to the committed // block, but the chain data itself left in the database for replaying. func TestShortNewlyForkedFastSyncedRepair(t *testing.T) { + testShortNewlyForkedFastSyncedRepair(t, false) +} +func TestShortNewlyForkedFastSyncedRepairWithSnapshots(t *testing.T) { + testShortNewlyForkedFastSyncedRepair(t, true) +} + +func testShortNewlyForkedFastSyncedRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3->S4->S5->S6 @@ -342,7 +379,7 @@ func TestShortNewlyForkedFastSyncedRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a short canonical chain and a shorter side chain, where @@ -351,6 +388,13 @@ func TestShortNewlyForkedFastSyncedRepair(t *testing.T) { // case we expect the chain to detect that it was fast syncing and not delete // anything, since we can just pick up directly where we left off. func TestShortNewlyForkedFastSyncingRepair(t *testing.T) { + testShortNewlyForkedFastSyncingRepair(t, false) +} +func TestShortNewlyForkedFastSyncingRepairWithSnapshots(t *testing.T) { + testShortNewlyForkedFastSyncingRepair(t, true) +} + +func testShortNewlyForkedFastSyncingRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3->S4->S5->S6 @@ -382,14 +426,17 @@ func TestShortNewlyForkedFastSyncingRepair(t *testing.T) { expHeadHeader: 8, expHeadFastBlock: 8, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where a recent // block - newer than the ancient limit - was already committed to disk and then // the process crashed. In this case we expect the chain to be rolled back to the // committed block, with everything afterwads kept as fast sync data. -func TestLongShallowRepair(t *testing.T) { +func TestLongShallowRepair(t *testing.T) { testLongShallowRepair(t, false) } +func TestLongShallowRepairWithSnapshots(t *testing.T) { testLongShallowRepair(t, true) } + +func testLongShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -424,14 +471,17 @@ func TestLongShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where a recent // block - older than the ancient limit - was already committed to disk and then // the process crashed. In this case we expect the chain to be rolled back to the // committed block, with everything afterwads deleted. -func TestLongDeepRepair(t *testing.T) { +func TestLongDeepRepair(t *testing.T) { testLongDeepRepair(t, false) } +func TestLongDeepRepairWithSnapshots(t *testing.T) { testLongDeepRepair(t, true) } + +func testLongDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -465,7 +515,7 @@ func TestLongDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where the fast @@ -473,6 +523,13 @@ func TestLongDeepRepair(t *testing.T) { // which the process crashed. In this case we expect the chain to be rolled back // to the committed block, with everything afterwads kept as fast sync data. func TestLongFastSyncedShallowRepair(t *testing.T) { + testLongFastSyncedShallowRepair(t, false) +} +func TestLongFastSyncedShallowRepairWithSnapshots(t *testing.T) { + testLongFastSyncedShallowRepair(t, true) +} + +func testLongFastSyncedShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -507,14 +564,17 @@ func TestLongFastSyncedShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where the fast // sync pivot point - older than the ancient limit - was already committed, after // which the process crashed. In this case we expect the chain to be rolled back // to the committed block, with everything afterwads deleted. -func TestLongFastSyncedDeepRepair(t *testing.T) { +func TestLongFastSyncedDeepRepair(t *testing.T) { testLongFastSyncedDeepRepair(t, false) } +func TestLongFastSyncedDeepRepairWithSnapshots(t *testing.T) { testLongFastSyncedDeepRepair(t, true) } + +func testLongFastSyncedDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -548,7 +608,7 @@ func TestLongFastSyncedDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where the fast @@ -557,6 +617,13 @@ func TestLongFastSyncedDeepRepair(t *testing.T) { // syncing and not delete anything, since we can just pick up directly where we // left off. func TestLongFastSyncingShallowRepair(t *testing.T) { + testLongFastSyncingShallowRepair(t, false) +} +func TestLongFastSyncingShallowRepairWithSnapshots(t *testing.T) { + testLongFastSyncingShallowRepair(t, true) +} + +func testLongFastSyncingShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -591,7 +658,7 @@ func TestLongFastSyncingShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks where the fast @@ -599,7 +666,10 @@ func TestLongFastSyncingShallowRepair(t *testing.T) { // process crashed. In this case we expect the chain to detect that it was fast // syncing and not delete anything, since we can just pick up directly where we // left off. -func TestLongFastSyncingDeepRepair(t *testing.T) { +func TestLongFastSyncingDeepRepair(t *testing.T) { testLongFastSyncingDeepRepair(t, false) } +func TestLongFastSyncingDeepRepairWithSnapshots(t *testing.T) { testLongFastSyncingDeepRepair(t, true) } + +func testLongFastSyncingDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -634,7 +704,7 @@ func TestLongFastSyncingDeepRepair(t *testing.T) { expHeadHeader: 24, expHeadFastBlock: 24, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -644,6 +714,13 @@ func TestLongFastSyncingDeepRepair(t *testing.T) { // rolled back to the committed block, with everything afterwads kept as fast // sync data; the side chain completely nuked by the freezer. func TestLongOldForkedShallowRepair(t *testing.T) { + testLongOldForkedShallowRepair(t, false) +} +func TestLongOldForkedShallowRepairWithSnapshots(t *testing.T) { + testLongOldForkedShallowRepair(t, true) +} + +func testLongOldForkedShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -679,7 +756,7 @@ func TestLongOldForkedShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -688,7 +765,10 @@ func TestLongOldForkedShallowRepair(t *testing.T) { // chain is below the committed block. In this case we expect the canonical chain // to be rolled back to the committed block, with everything afterwads deleted; // the side chain completely nuked by the freezer. -func TestLongOldForkedDeepRepair(t *testing.T) { +func TestLongOldForkedDeepRepair(t *testing.T) { testLongOldForkedDeepRepair(t, false) } +func TestLongOldForkedDeepRepairWithSnapshots(t *testing.T) { testLongOldForkedDeepRepair(t, true) } + +func testLongOldForkedDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -723,7 +803,7 @@ func TestLongOldForkedDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -733,6 +813,13 @@ func TestLongOldForkedDeepRepair(t *testing.T) { // to be rolled back to the committed block, with everything afterwads kept as // fast sync data; the side chain completely nuked by the freezer. func TestLongOldForkedFastSyncedShallowRepair(t *testing.T) { + testLongOldForkedFastSyncedShallowRepair(t, false) +} +func TestLongOldForkedFastSyncedShallowRepairWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncedShallowRepair(t, true) +} + +func testLongOldForkedFastSyncedShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -768,7 +855,7 @@ func TestLongOldForkedFastSyncedShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -778,6 +865,13 @@ func TestLongOldForkedFastSyncedShallowRepair(t *testing.T) { // chain to be rolled back to the committed block, with everything afterwads deleted; // the side chain completely nuked by the freezer. func TestLongOldForkedFastSyncedDeepRepair(t *testing.T) { + testLongOldForkedFastSyncedDeepRepair(t, false) +} +func TestLongOldForkedFastSyncedDeepRepairWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncedDeepRepair(t, true) +} + +func testLongOldForkedFastSyncedDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -812,7 +906,7 @@ func TestLongOldForkedFastSyncedDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -822,6 +916,13 @@ func TestLongOldForkedFastSyncedDeepRepair(t *testing.T) { // that it was fast syncing and not delete anything. The side chain is completely // nuked by the freezer. func TestLongOldForkedFastSyncingShallowRepair(t *testing.T) { + testLongOldForkedFastSyncingShallowRepair(t, false) +} +func TestLongOldForkedFastSyncingShallowRepairWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncingShallowRepair(t, true) +} + +func testLongOldForkedFastSyncingShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -857,7 +958,7 @@ func TestLongOldForkedFastSyncingShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -867,6 +968,13 @@ func TestLongOldForkedFastSyncingShallowRepair(t *testing.T) { // that it was fast syncing and not delete anything. The side chain is completely // nuked by the freezer. func TestLongOldForkedFastSyncingDeepRepair(t *testing.T) { + testLongOldForkedFastSyncingDeepRepair(t, false) +} +func TestLongOldForkedFastSyncingDeepRepairWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncingDeepRepair(t, true) +} + +func testLongOldForkedFastSyncingDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -902,7 +1010,7 @@ func TestLongOldForkedFastSyncingDeepRepair(t *testing.T) { expHeadHeader: 24, expHeadFastBlock: 24, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -912,6 +1020,13 @@ func TestLongOldForkedFastSyncingDeepRepair(t *testing.T) { // rolled back to the committed block, with everything afterwads kept as fast // sync data; the side chain completely nuked by the freezer. func TestLongNewerForkedShallowRepair(t *testing.T) { + testLongNewerForkedShallowRepair(t, false) +} +func TestLongNewerForkedShallowRepairWithSnapshots(t *testing.T) { + testLongNewerForkedShallowRepair(t, true) +} + +func testLongNewerForkedShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -947,7 +1062,7 @@ func TestLongNewerForkedShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -956,7 +1071,10 @@ func TestLongNewerForkedShallowRepair(t *testing.T) { // chain is above the committed block. In this case we expect the canonical chain // to be rolled back to the committed block, with everything afterwads deleted; // the side chain completely nuked by the freezer. -func TestLongNewerForkedDeepRepair(t *testing.T) { +func TestLongNewerForkedDeepRepair(t *testing.T) { testLongNewerForkedDeepRepair(t, false) } +func TestLongNewerForkedDeepRepairWithSnapshots(t *testing.T) { testLongNewerForkedDeepRepair(t, true) } + +func testLongNewerForkedDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -991,7 +1109,7 @@ func TestLongNewerForkedDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -1001,6 +1119,13 @@ func TestLongNewerForkedDeepRepair(t *testing.T) { // to be rolled back to the committed block, with everything afterwads kept as fast // sync data; the side chain completely nuked by the freezer. func TestLongNewerForkedFastSyncedShallowRepair(t *testing.T) { + testLongNewerForkedFastSyncedShallowRepair(t, false) +} +func TestLongNewerForkedFastSyncedShallowRepairWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncedShallowRepair(t, true) +} + +func testLongNewerForkedFastSyncedShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1036,7 +1161,7 @@ func TestLongNewerForkedFastSyncedShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -1046,6 +1171,13 @@ func TestLongNewerForkedFastSyncedShallowRepair(t *testing.T) { // chain to be rolled back to the committed block, with everything afterwads deleted; // the side chain completely nuked by the freezer. func TestLongNewerForkedFastSyncedDeepRepair(t *testing.T) { + testLongNewerForkedFastSyncedDeepRepair(t, false) +} +func TestLongNewerForkedFastSyncedDeepRepairWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncedDeepRepair(t, true) +} + +func testLongNewerForkedFastSyncedDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1080,7 +1212,7 @@ func TestLongNewerForkedFastSyncedDeepRepair(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -1090,6 +1222,13 @@ func TestLongNewerForkedFastSyncedDeepRepair(t *testing.T) { // that it was fast syncing and not delete anything. The side chain is completely // nuked by the freezer. func TestLongNewerForkedFastSyncingShallowRepair(t *testing.T) { + testLongNewerForkedFastSyncingShallowRepair(t, false) +} +func TestLongNewerForkedFastSyncingShallowRepairWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncingShallowRepair(t, true) +} + +func testLongNewerForkedFastSyncingShallowRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1125,7 +1264,7 @@ func TestLongNewerForkedFastSyncingShallowRepair(t *testing.T) { expHeadHeader: 18, expHeadFastBlock: 18, expHeadBlock: 0, - }) + }, snapshots) } // Tests a recovery for a long canonical chain with frozen blocks and a shorter @@ -1135,6 +1274,13 @@ func TestLongNewerForkedFastSyncingShallowRepair(t *testing.T) { // that it was fast syncing and not delete anything. The side chain is completely // nuked by the freezer. func TestLongNewerForkedFastSyncingDeepRepair(t *testing.T) { + testLongNewerForkedFastSyncingDeepRepair(t, false) +} +func TestLongNewerForkedFastSyncingDeepRepairWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncingDeepRepair(t, true) +} + +func testLongNewerForkedFastSyncingDeepRepair(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1170,13 +1316,13 @@ func TestLongNewerForkedFastSyncingDeepRepair(t *testing.T) { expHeadHeader: 24, expHeadFastBlock: 24, expHeadBlock: 0, - }) + }, snapshots) } -func testRepair(t *testing.T, tt *rewindTest) { +func testRepair(t *testing.T, tt *rewindTest, snapshots bool) { // It's hard to follow the test case, visualize the input //log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) - //fmt.Println(tt.dump(true)) + // fmt.Println(tt.dump(true)) // Create a temporary persistent database datadir, err := ioutil.TempDir("", "") @@ -1195,8 +1341,18 @@ func testRepair(t *testing.T, tt *rewindTest) { var ( genesis = new(Genesis).MustCommit(db) engine = mockEngine.NewFaker() + config = &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 0, // Disable snapshot by default + } ) - chain, err := NewBlockChain(db, nil, params.IstanbulTestChainConfig, engine, vm.Config{}, nil, nil) + if snapshots { + config.SnapshotLimit = 256 + config.SnapshotWait = true + } + chain, err := NewBlockChain(db, config, params.IstanbulTestChainConfig, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create chain: %v", err) } @@ -1219,6 +1375,11 @@ func testRepair(t *testing.T, tt *rewindTest) { } if tt.commitBlock > 0 { chain.stateCache.TrieDB().Commit(canonblocks[tt.commitBlock-1].Root(), true) + if snapshots { + if err := chain.snaps.Cap(canonblocks[tt.commitBlock-1].Root(), 0); err != nil { + t.Fatalf("Failed to flatten snapshots: %v", err) + } + } } if _, err := chain.InsertChain(canonblocks[tt.commitBlock:]); err != nil { t.Fatalf("Failed to import canonical chain tail: %v", err) diff --git a/core/blockchain_sethead_test.go b/core/blockchain_sethead_test.go index e912941a3b25..911924a5d052 100644 --- a/core/blockchain_sethead_test.go +++ b/core/blockchain_sethead_test.go @@ -25,6 +25,7 @@ import ( "os" "strings" "testing" + "time" "github.com/celo-org/celo-blockchain/common" mockEngine "github.com/celo-org/celo-blockchain/consensus/consensustest" @@ -149,7 +150,10 @@ func (tt *rewindTest) Dump(crash bool) string { // chain to be rolled back to the committed block. Everything above the sethead // point should be deleted. In between the committed block and the requested head // the data can remain as "fast sync" data to avoid redownloading it. -func TestShortSetHead(t *testing.T) { +func TestShortSetHead(t *testing.T) { testShortSetHead(t, false) } +func TestShortSetHeadWithSnapshots(t *testing.T) { testShortSetHead(t, true) } + +func testShortSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -180,7 +184,7 @@ func TestShortSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain where the fast sync pivot point was @@ -189,7 +193,10 @@ func TestShortSetHead(t *testing.T) { // Everything above the sethead point should be deleted. In between the committed // block and the requested head the data can remain as "fast sync" data to avoid // redownloading it. -func TestShortFastSyncedSetHead(t *testing.T) { +func TestShortFastSyncedSetHead(t *testing.T) { testShortFastSyncedSetHead(t, false) } +func TestShortFastSyncedSetHeadWithSnapshots(t *testing.T) { testShortFastSyncedSetHead(t, true) } + +func testShortFastSyncedSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -220,7 +227,7 @@ func TestShortFastSyncedSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain where the fast sync pivot point was @@ -228,7 +235,10 @@ func TestShortFastSyncedSetHead(t *testing.T) { // detect that it was fast syncing and delete everything from the new head, since // we can just pick up fast syncing from there. The head full block should be set // to the genesis. -func TestShortFastSyncingSetHead(t *testing.T) { +func TestShortFastSyncingSetHead(t *testing.T) { testShortFastSyncingSetHead(t, false) } +func TestShortFastSyncingSetHeadWithSnapshots(t *testing.T) { testShortFastSyncingSetHead(t, true) } + +func testShortFastSyncingSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // @@ -259,7 +269,7 @@ func TestShortFastSyncingSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where a @@ -269,7 +279,10 @@ func TestShortFastSyncingSetHead(t *testing.T) { // above the sethead point should be deleted. In between the committed block and // the requested head the data can remain as "fast sync" data to avoid redownloading // it. The side chain should be left alone as it was shorter. -func TestShortOldForkedSetHead(t *testing.T) { +func TestShortOldForkedSetHead(t *testing.T) { testShortOldForkedSetHead(t, false) } +func TestShortOldForkedSetHeadWithSnapshots(t *testing.T) { testShortOldForkedSetHead(t, true) } + +func testShortOldForkedSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -302,7 +315,7 @@ func TestShortOldForkedSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where @@ -313,6 +326,13 @@ func TestShortOldForkedSetHead(t *testing.T) { // committed block and the requested head the data can remain as "fast sync" data // to avoid redownloading it. The side chain should be left alone as it was shorter. func TestShortOldForkedFastSyncedSetHead(t *testing.T) { + testShortOldForkedFastSyncedSetHead(t, false) +} +func TestShortOldForkedFastSyncedSetHeadWithSnapshots(t *testing.T) { + testShortOldForkedFastSyncedSetHead(t, true) +} + +func testShortOldForkedFastSyncedSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -345,7 +365,7 @@ func TestShortOldForkedFastSyncedSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where @@ -355,6 +375,13 @@ func TestShortOldForkedFastSyncedSetHead(t *testing.T) { // head, since we can just pick up fast syncing from there. The head full block // should be set to the genesis. func TestShortOldForkedFastSyncingSetHead(t *testing.T) { + testShortOldForkedFastSyncingSetHead(t, false) +} +func TestShortOldForkedFastSyncingSetHeadWithSnapshots(t *testing.T) { + testShortOldForkedFastSyncingSetHead(t, true) +} + +func testShortOldForkedFastSyncingSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) // └->S1->S2->S3 @@ -387,7 +414,7 @@ func TestShortOldForkedFastSyncingSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where a @@ -401,7 +428,10 @@ func TestShortOldForkedFastSyncingSetHead(t *testing.T) { // The side chain could be left to be if the fork point was before the new head // we are deleting to, but it would be exceedingly hard to detect that case and // properly handle it, so we'll trade extra work in exchange for simpler code. -func TestShortNewlyForkedSetHead(t *testing.T) { +func TestShortNewlyForkedSetHead(t *testing.T) { testShortNewlyForkedSetHead(t, false) } +func TestShortNewlyForkedSetHeadWithSnapshots(t *testing.T) { testShortNewlyForkedSetHead(t, true) } + +func testShortNewlyForkedSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8 @@ -434,7 +464,7 @@ func TestShortNewlyForkedSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where @@ -448,6 +478,13 @@ func TestShortNewlyForkedSetHead(t *testing.T) { // we are deleting to, but it would be exceedingly hard to detect that case and // properly handle it, so we'll trade extra work in exchange for simpler code. func TestShortNewlyForkedFastSyncedSetHead(t *testing.T) { + testShortNewlyForkedFastSyncedSetHead(t, false) +} +func TestShortNewlyForkedFastSyncedSetHeadWithSnapshots(t *testing.T) { + testShortNewlyForkedFastSyncedSetHead(t, true) +} + +func testShortNewlyForkedFastSyncedSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8 @@ -480,7 +517,7 @@ func TestShortNewlyForkedFastSyncedSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a short canonical chain and a shorter side chain, where @@ -494,6 +531,13 @@ func TestShortNewlyForkedFastSyncedSetHead(t *testing.T) { // we are deleting to, but it would be exceedingly hard to detect that case and // properly handle it, so we'll trade extra work in exchange for simpler code. func TestShortNewlyForkedFastSyncingSetHead(t *testing.T) { + testShortNewlyForkedFastSyncingSetHead(t, false) +} +func TestShortNewlyForkedFastSyncingSetHeadWithSnapshots(t *testing.T) { + testShortNewlyForkedFastSyncingSetHead(t, true) +} + +func testShortNewlyForkedFastSyncingSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8 @@ -526,7 +570,7 @@ func TestShortNewlyForkedFastSyncingSetHead(t *testing.T) { expHeadHeader: 7, expHeadFastBlock: 7, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where a recent @@ -535,7 +579,10 @@ func TestShortNewlyForkedFastSyncingSetHead(t *testing.T) { // to the committed block. Everything above the sethead point should be deleted. // In between the committed block and the requested head the data can remain as // "fast sync" data to avoid redownloading it. -func TestLongShallowSetHead(t *testing.T) { +func TestLongShallowSetHead(t *testing.T) { testLongShallowSetHead(t, false) } +func TestLongShallowSetHeadWithSnapshots(t *testing.T) { testLongShallowSetHead(t, true) } + +func testLongShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -571,7 +618,7 @@ func TestLongShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where a recent @@ -579,7 +626,10 @@ func TestLongShallowSetHead(t *testing.T) { // sethead was called. In this case we expect the full chain to be rolled back // to the committed block. Since the ancient limit was underflown, everything // needs to be deleted onwards to avoid creating a gap. -func TestLongDeepSetHead(t *testing.T) { +func TestLongDeepSetHead(t *testing.T) { testLongDeepSetHead(t, false) } +func TestLongDeepSetHeadWithSnapshots(t *testing.T) { testLongDeepSetHead(t, true) } + +func testLongDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -614,7 +664,7 @@ func TestLongDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where the fast @@ -624,6 +674,13 @@ func TestLongDeepSetHead(t *testing.T) { // deleted. In between the committed block and the requested head the data can // remain as "fast sync" data to avoid redownloading it. func TestLongFastSyncedShallowSetHead(t *testing.T) { + testLongFastSyncedShallowSetHead(t, false) +} +func TestLongFastSyncedShallowSetHeadWithSnapshots(t *testing.T) { + testLongFastSyncedShallowSetHead(t, true) +} + +func testLongFastSyncedShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -659,7 +716,7 @@ func TestLongFastSyncedShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where the fast @@ -667,7 +724,10 @@ func TestLongFastSyncedShallowSetHead(t *testing.T) { // which sethead was called. In this case we expect the full chain to be rolled // back to the committed block. Since the ancient limit was underflown, everything // needs to be deleted onwards to avoid creating a gap. -func TestLongFastSyncedDeepSetHead(t *testing.T) { +func TestLongFastSyncedDeepSetHead(t *testing.T) { testLongFastSyncedDeepSetHead(t, false) } +func TestLongFastSyncedDeepSetHeadWithSnapshots(t *testing.T) { testLongFastSyncedDeepSetHead(t, true) } + +func testLongFastSyncedDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -702,7 +762,7 @@ func TestLongFastSyncedDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where the fast @@ -711,6 +771,13 @@ func TestLongFastSyncedDeepSetHead(t *testing.T) { // syncing and delete everything from the new head, since we can just pick up fast // syncing from there. func TestLongFastSyncingShallowSetHead(t *testing.T) { + testLongFastSyncingShallowSetHead(t, false) +} +func TestLongFastSyncingShallowSetHeadWithSnapshots(t *testing.T) { + testLongFastSyncingShallowSetHead(t, true) +} + +func testLongFastSyncingShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // @@ -746,7 +813,7 @@ func TestLongFastSyncingShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks where the fast @@ -755,6 +822,13 @@ func TestLongFastSyncingShallowSetHead(t *testing.T) { // syncing and delete everything from the new head, since we can just pick up fast // syncing from there. func TestLongFastSyncingDeepSetHead(t *testing.T) { + testLongFastSyncingDeepSetHead(t, false) +} +func TestLongFastSyncingDeepSetHeadWithSnapshots(t *testing.T) { + testLongFastSyncingDeepSetHead(t, true) +} + +func testLongFastSyncingDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // @@ -789,7 +863,7 @@ func TestLongFastSyncingDeepSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter side @@ -800,6 +874,13 @@ func TestLongFastSyncingDeepSetHead(t *testing.T) { // can remain as "fast sync" data to avoid redownloading it. The side chain is nuked // by the freezer. func TestLongOldForkedShallowSetHead(t *testing.T) { + testLongOldForkedShallowSetHead(t, false) +} +func TestLongOldForkedShallowSetHeadWithSnapshots(t *testing.T) { + testLongOldForkedShallowSetHead(t, true) +} + +func testLongOldForkedShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -836,7 +917,7 @@ func TestLongOldForkedShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter side @@ -845,7 +926,10 @@ func TestLongOldForkedShallowSetHead(t *testing.T) { // chain to be rolled back to the committed block. Since the ancient limit was // underflown, everything needs to be deleted onwards to avoid creating a gap. The // side chain is nuked by the freezer. -func TestLongOldForkedDeepSetHead(t *testing.T) { +func TestLongOldForkedDeepSetHead(t *testing.T) { testLongOldForkedDeepSetHead(t, false) } +func TestLongOldForkedDeepSetHeadWithSnapshots(t *testing.T) { testLongOldForkedDeepSetHead(t, true) } + +func testLongOldForkedDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -881,7 +965,7 @@ func TestLongOldForkedDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -893,6 +977,13 @@ func TestLongOldForkedDeepSetHead(t *testing.T) { // requested head the data can remain as "fast sync" data to avoid redownloading // it. The side chain is nuked by the freezer. func TestLongOldForkedFastSyncedShallowSetHead(t *testing.T) { + testLongOldForkedFastSyncedShallowSetHead(t, false) +} +func TestLongOldForkedFastSyncedShallowSetHeadWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncedShallowSetHead(t, true) +} + +func testLongOldForkedFastSyncedShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -929,7 +1020,7 @@ func TestLongOldForkedFastSyncedShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -940,6 +1031,13 @@ func TestLongOldForkedFastSyncedShallowSetHead(t *testing.T) { // underflown, everything needs to be deleted onwards to avoid creating a gap. The // side chain is nuked by the freezer. func TestLongOldForkedFastSyncedDeepSetHead(t *testing.T) { + testLongOldForkedFastSyncedDeepSetHead(t, false) +} +func TestLongOldForkedFastSyncedDeepSetHeadWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncedDeepSetHead(t, true) +} + +func testLongOldForkedFastSyncedDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -975,7 +1073,7 @@ func TestLongOldForkedFastSyncedDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -986,6 +1084,13 @@ func TestLongOldForkedFastSyncedDeepSetHead(t *testing.T) { // just pick up fast syncing from there. The side chain is completely nuked by the // freezer. func TestLongOldForkedFastSyncingShallowSetHead(t *testing.T) { + testLongOldForkedFastSyncingShallowSetHead(t, false) +} +func TestLongOldForkedFastSyncingShallowSetHeadWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncingShallowSetHead(t, true) +} + +func testLongOldForkedFastSyncingShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3 @@ -1022,7 +1127,7 @@ func TestLongOldForkedFastSyncingShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1033,6 +1138,13 @@ func TestLongOldForkedFastSyncingShallowSetHead(t *testing.T) { // just pick up fast syncing from there. The side chain is completely nuked by the // freezer. func TestLongOldForkedFastSyncingDeepSetHead(t *testing.T) { + testLongOldForkedFastSyncingDeepSetHead(t, false) +} +func TestLongOldForkedFastSyncingDeepSetHeadWithSnapshots(t *testing.T) { + testLongOldForkedFastSyncingDeepSetHead(t, true) +} + +func testLongOldForkedFastSyncingDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3 @@ -1068,7 +1180,7 @@ func TestLongOldForkedFastSyncingDeepSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1077,6 +1189,13 @@ func TestLongOldForkedFastSyncingDeepSetHead(t *testing.T) { // chain is above the committed block. In this case the freezer will delete the // sidechain since it's dangling, reverting to TestLongShallowSetHead. func TestLongNewerForkedShallowSetHead(t *testing.T) { + testLongNewerForkedShallowSetHead(t, false) +} +func TestLongNewerForkedShallowSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedShallowSetHead(t, true) +} + +func testLongNewerForkedShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1113,7 +1232,7 @@ func TestLongNewerForkedShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1122,6 +1241,13 @@ func TestLongNewerForkedShallowSetHead(t *testing.T) { // chain is above the committed block. In this case the freezer will delete the // sidechain since it's dangling, reverting to TestLongDeepSetHead. func TestLongNewerForkedDeepSetHead(t *testing.T) { + testLongNewerForkedDeepSetHead(t, false) +} +func TestLongNewerForkedDeepSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedDeepSetHead(t, true) +} + +func testLongNewerForkedDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1157,7 +1283,7 @@ func TestLongNewerForkedDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1166,6 +1292,13 @@ func TestLongNewerForkedDeepSetHead(t *testing.T) { // the side chain is above the committed block. In this case the freezer will delete // the sidechain since it's dangling, reverting to TestLongFastSyncedShallowSetHead. func TestLongNewerForkedFastSyncedShallowSetHead(t *testing.T) { + testLongNewerForkedFastSyncedShallowSetHead(t, false) +} +func TestLongNewerForkedFastSyncedShallowSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncedShallowSetHead(t, true) +} + +func testLongNewerForkedFastSyncedShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1202,7 +1335,7 @@ func TestLongNewerForkedFastSyncedShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1211,6 +1344,13 @@ func TestLongNewerForkedFastSyncedShallowSetHead(t *testing.T) { // the side chain is above the committed block. In this case the freezer will delete // the sidechain since it's dangling, reverting to TestLongFastSyncedDeepSetHead. func TestLongNewerForkedFastSyncedDeepSetHead(t *testing.T) { + testLongNewerForkedFastSyncedDeepSetHead(t, false) +} +func TestLongNewerForkedFastSyncedDeepSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncedDeepSetHead(t, true) +} + +func testLongNewerForkedFastSyncedDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1246,7 +1386,7 @@ func TestLongNewerForkedFastSyncedDeepSetHead(t *testing.T) { expHeadHeader: 4, expHeadFastBlock: 4, expHeadBlock: 4, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1255,6 +1395,13 @@ func TestLongNewerForkedFastSyncedDeepSetHead(t *testing.T) { // chain is above the committed block. In this case the freezer will delete the // sidechain since it's dangling, reverting to TestLongFastSyncinghallowSetHead. func TestLongNewerForkedFastSyncingShallowSetHead(t *testing.T) { + testLongNewerForkedFastSyncingShallowSetHead(t, false) +} +func TestLongNewerForkedFastSyncingShallowSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncingShallowSetHead(t, true) +} + +func testLongNewerForkedFastSyncingShallowSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1291,7 +1438,7 @@ func TestLongNewerForkedFastSyncingShallowSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } // Tests a sethead for a long canonical chain with frozen blocks and a shorter @@ -1300,6 +1447,13 @@ func TestLongNewerForkedFastSyncingShallowSetHead(t *testing.T) { // chain is above the committed block. In this case the freezer will delete the // sidechain since it's dangling, reverting to TestLongFastSyncingDeepSetHead. func TestLongNewerForkedFastSyncingDeepSetHead(t *testing.T) { + testLongNewerForkedFastSyncingDeepSetHead(t, false) +} +func TestLongNewerForkedFastSyncingDeepSetHeadWithSnapshots(t *testing.T) { + testLongNewerForkedFastSyncingDeepSetHead(t, true) +} + +func testLongNewerForkedFastSyncingDeepSetHead(t *testing.T, snapshots bool) { // Chain: // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10->C11->C12->C13->C14->C15->C16->C17->C18->C19->C20->C21->C22->C23->C24 (HEAD) // └->S1->S2->S3->S4->S5->S6->S7->S8->S9->S10->S11->S12 @@ -1335,13 +1489,13 @@ func TestLongNewerForkedFastSyncingDeepSetHead(t *testing.T) { expHeadHeader: 6, expHeadFastBlock: 6, expHeadBlock: 0, - }) + }, snapshots) } -func testSetHead(t *testing.T, tt *rewindTest) { +func testSetHead(t *testing.T, tt *rewindTest, snapshots bool) { // It's hard to follow the test case, visualize the input - //log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) - //fmt.Println(tt.dump(false)) + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump(false)) // Create a temporary persistent database datadir, err := ioutil.TempDir("", "") @@ -1360,8 +1514,18 @@ func testSetHead(t *testing.T, tt *rewindTest) { var ( genesis = new(Genesis).MustCommit(db) engine = mockEngine.NewFaker() + config = &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 0, // Disable snapshot + } ) - chain, err := NewBlockChain(db, nil, params.IstanbulTestChainConfig, engine, vm.Config{}, nil, nil) + if snapshots { + config.SnapshotLimit = 256 + config.SnapshotWait = true + } + chain, err := NewBlockChain(db, config, params.IstanbulTestChainConfig, engine, vm.Config{}, nil, nil) if err != nil { t.Fatalf("Failed to create chain: %v", err) } @@ -1384,6 +1548,11 @@ func testSetHead(t *testing.T, tt *rewindTest) { } if tt.commitBlock > 0 { chain.stateCache.TrieDB().Commit(canonblocks[tt.commitBlock-1].Root(), true) + if snapshots { + if err := chain.snaps.Cap(canonblocks[tt.commitBlock-1].Root(), 0); err != nil { + t.Fatalf("Failed to flatten snapshots: %v", err) + } + } } if _, err := chain.InsertChain(canonblocks[tt.commitBlock:]); err != nil { t.Fatalf("Failed to import canonical chain tail: %v", err) diff --git a/core/blockchain_snapshot_test.go b/core/blockchain_snapshot_test.go new file mode 100644 index 000000000000..93548f98141e --- /dev/null +++ b/core/blockchain_snapshot_test.go @@ -0,0 +1,1024 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Tests that abnormal program termination (i.e.crash) and restart can recovery +// the snapshot properly if the snapshot is enabled. + +package core + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "strings" + "testing" + "time" + + "github.com/celo-org/celo-blockchain/consensus" + mockEngine "github.com/celo-org/celo-blockchain/consensus/consensustest" + "github.com/celo-org/celo-blockchain/core/rawdb" + "github.com/celo-org/celo-blockchain/core/state/snapshot" + "github.com/celo-org/celo-blockchain/core/types" + "github.com/celo-org/celo-blockchain/core/vm" + "github.com/celo-org/celo-blockchain/ethdb" + "github.com/celo-org/celo-blockchain/params" +) + +// snapshotTestBasic wraps the common testing fields in the snapshot tests. +type snapshotTestBasic struct { + legacy bool // Wether write the snapshot journal in legacy format + chainBlocks int // Number of blocks to generate for the canonical chain + snapshotBlock uint64 // Block number of the relevant snapshot disk layer + commitBlock uint64 // Block number for which to commit the state to disk + + expCanonicalBlocks int // Number of canonical blocks expected to remain in the database (excl. genesis) + expHeadHeader uint64 // Block number of the expected head header + expHeadFastBlock uint64 // Block number of the expected head fast sync block + expHeadBlock uint64 // Block number of the expected head full block + expSnapshotBottom uint64 // The block height corresponding to the snapshot disk layer + + // share fields, set in runtime + datadir string + db ethdb.Database + gendb ethdb.Database + engine consensus.Engine +} + +func (basic *snapshotTestBasic) prepare(t *testing.T) (*BlockChain, []*types.Block) { + // Create a temporary persistent database + datadir, err := ioutil.TempDir("", "") + if err != nil { + t.Fatalf("Failed to create temporary datadir: %v", err) + } + os.RemoveAll(datadir) + + db, err := rawdb.NewLevelDBDatabaseWithFreezer(datadir, 0, 0, datadir, "") + if err != nil { + t.Fatalf("Failed to create persistent database: %v", err) + } + // Initialize a fresh chain + var ( + genesis = new(Genesis).MustCommit(db) + engine = mockEngine.NewFaker() + gendb = rawdb.NewMemoryDatabase() + + // Snapshot is enabled, the first snapshot is created from the Genesis. + // The snapshot memory allowance is 256MB, it means no snapshot flush + // will happen during the block insertion. + cacheConfig = defaultCacheConfig + ) + chain, err := NewBlockChain(db, cacheConfig, params.IstanbulTestChainConfig, engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to create chain: %v", err) + } + blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, gendb, basic.chainBlocks, func(i int, b *BlockGen) {}) + + // Insert the blocks with configured settings. + var breakpoints []uint64 + if basic.commitBlock > basic.snapshotBlock { + breakpoints = append(breakpoints, basic.snapshotBlock, basic.commitBlock) + } else { + breakpoints = append(breakpoints, basic.commitBlock, basic.snapshotBlock) + } + var startPoint uint64 + for _, point := range breakpoints { + if _, err := chain.InsertChain(blocks[startPoint:point]); err != nil { + t.Fatalf("Failed to import canonical chain start: %v", err) + } + startPoint = point + + if basic.commitBlock > 0 && basic.commitBlock == point { + chain.stateCache.TrieDB().Commit(blocks[point-1].Root(), true) + } + if basic.snapshotBlock > 0 && basic.snapshotBlock == point { + if basic.legacy { + // Here we commit the snapshot disk root to simulate + // committing the legacy snapshot. + rawdb.WriteSnapshotRoot(db, blocks[point-1].Root()) + } else { + // Flushing the entire snap tree into the disk, the + // relavant (a) snapshot root and (b) snapshot generator + // will be persisted atomically. + chain.snaps.Cap(blocks[point-1].Root(), 0) + diskRoot, blockRoot := chain.snaps.DiskRoot(), blocks[point-1].Root() + if !bytes.Equal(diskRoot.Bytes(), blockRoot.Bytes()) { + t.Fatalf("Failed to flush disk layer change, want %x, got %x", blockRoot, diskRoot) + } + } + } + } + if _, err := chain.InsertChain(blocks[startPoint:]); err != nil { + t.Fatalf("Failed to import canonical chain tail: %v", err) + } + + // Set runtime fields + basic.datadir = datadir + basic.db = db + basic.gendb = gendb + basic.engine = engine + + // Ugly hack, notify the chain to flush the journal in legacy format + // if it's requested. + if basic.legacy { + chain.writeLegacyJournal = true + } + return chain, blocks +} + +func (basic *snapshotTestBasic) verify(t *testing.T, chain *BlockChain, blocks []*types.Block) { + // Iterate over all the remaining blocks and ensure there are no gaps + verifyNoGaps(t, chain, true, blocks) + verifyCutoff(t, chain, true, blocks, basic.expCanonicalBlocks) + + if head := chain.CurrentHeader(); head.Number.Uint64() != basic.expHeadHeader { + t.Errorf("Head header mismatch: have %d, want %d", head.Number, basic.expHeadHeader) + } + if head := chain.CurrentFastBlock(); head.NumberU64() != basic.expHeadFastBlock { + t.Errorf("Head fast block mismatch: have %d, want %d", head.NumberU64(), basic.expHeadFastBlock) + } + if head := chain.CurrentBlock(); head.NumberU64() != basic.expHeadBlock { + t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), basic.expHeadBlock) + } + + // Check the disk layer, ensure they are matched + block := chain.GetBlockByNumber(basic.expSnapshotBottom) + if block == nil { + t.Errorf("The correspnding block[%d] of snapshot disk layer is missing", basic.expSnapshotBottom) + } else if !bytes.Equal(chain.snaps.DiskRoot().Bytes(), block.Root().Bytes()) { + t.Errorf("The snapshot disk layer root is incorrect, want %x, get %x", block.Root(), chain.snaps.DiskRoot()) + } + + // Check the snapshot, ensure it's integrated + if err := snapshot.VerifyState(chain.snaps, block.Root()); err != nil { + t.Errorf("The disk layer is not integrated %v", err) + } +} + +func (basic *snapshotTestBasic) Dump() string { + buffer := new(strings.Builder) + + fmt.Fprint(buffer, "Chain:\n G") + for i := 0; i < basic.chainBlocks; i++ { + fmt.Fprintf(buffer, "->C%d", i+1) + } + fmt.Fprint(buffer, " (HEAD)\n\n") + + fmt.Fprintf(buffer, "Commit: G") + if basic.commitBlock > 0 { + fmt.Fprintf(buffer, ", C%d", basic.commitBlock) + } + fmt.Fprint(buffer, "\n") + + fmt.Fprintf(buffer, "Snapshot: G") + if basic.snapshotBlock > 0 { + fmt.Fprintf(buffer, ", C%d", basic.snapshotBlock) + } + fmt.Fprint(buffer, "\n") + + //if crash { + // fmt.Fprintf(buffer, "\nCRASH\n\n") + //} else { + // fmt.Fprintf(buffer, "\nSetHead(%d)\n\n", basic.setHead) + //} + fmt.Fprintf(buffer, "------------------------------\n\n") + + fmt.Fprint(buffer, "Expected in leveldb:\n G") + for i := 0; i < basic.expCanonicalBlocks; i++ { + fmt.Fprintf(buffer, "->C%d", i+1) + } + fmt.Fprintf(buffer, "\n\n") + fmt.Fprintf(buffer, "Expected head header : C%d\n", basic.expHeadHeader) + fmt.Fprintf(buffer, "Expected head fast block: C%d\n", basic.expHeadFastBlock) + if basic.expHeadBlock == 0 { + fmt.Fprintf(buffer, "Expected head block : G\n") + } else { + fmt.Fprintf(buffer, "Expected head block : C%d\n", basic.expHeadBlock) + } + if basic.expSnapshotBottom == 0 { + fmt.Fprintf(buffer, "Expected snapshot disk : G\n") + } else { + fmt.Fprintf(buffer, "Expected snapshot disk : C%d\n", basic.expSnapshotBottom) + } + return buffer.String() +} + +func (basic *snapshotTestBasic) teardown() { + basic.db.Close() + basic.gendb.Close() + os.RemoveAll(basic.datadir) +} + +// snapshotTest is a test case type for normal snapshot recovery. +// It can be used for testing that restart Geth normally. +type snapshotTest struct { + snapshotTestBasic +} + +func (snaptest *snapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Restart the chain normally + chain.Stop() + newchain, err := NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer newchain.Stop() + + snaptest.verify(t, newchain, blocks) +} + +// crashSnapshotTest is a test case type for innormal snapshot recovery. +// It can be used for testing that restart Geth after the crash. +type crashSnapshotTest struct { + snapshotTestBasic +} + +func (snaptest *crashSnapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Pull the plug on the database, simulating a hard crash + db := chain.db + db.Close() + + // Start a new blockchain back up and see where the repair leads us + newdb, err := rawdb.NewLevelDBDatabaseWithFreezer(snaptest.datadir, 0, 0, snaptest.datadir, "") + if err != nil { + t.Fatalf("Failed to reopen persistent database: %v", err) + } + defer newdb.Close() + + // The interesting thing is: instead of starting the blockchain after + // the crash, we do restart twice here: one after the crash and one + // after the normal stop. It's used to ensure the broken snapshot + // can be detected all the time. + newchain, err := NewBlockChain(newdb, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + newchain.Stop() + + newchain, err = NewBlockChain(newdb, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer newchain.Stop() + + snaptest.verify(t, newchain, blocks) +} + +// gappedSnapshotTest is a test type used to test this scenario: +// - have a complete snapshot +// - restart without enabling the snapshot +// - insert a few blocks +// - restart with enabling the snapshot again +type gappedSnapshotTest struct { + snapshotTestBasic + gapped int // Number of blocks to insert without enabling snapshot +} + +func (snaptest *gappedSnapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Insert blocks without enabling snapshot if gapping is required. + chain.Stop() + gappedBlocks, _ := GenerateChain(params.TestChainConfig, blocks[len(blocks)-1], snaptest.engine, snaptest.gendb, snaptest.gapped, func(i int, b *BlockGen) {}) + + // Insert a few more blocks without enabling snapshot + var cacheConfig = &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 0, + } + newchain, err := NewBlockChain(snaptest.db, cacheConfig, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + newchain.InsertChain(gappedBlocks) + newchain.Stop() + + // Restart the chain with enabling the snapshot + newchain, err = NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer newchain.Stop() + + snaptest.verify(t, newchain, blocks) +} + +// setHeadSnapshotTest is the test type used to test this scenario: +// - have a complete snapshot +// - set the head to a lower point +// - restart +type setHeadSnapshotTest struct { + snapshotTestBasic + setHead uint64 // Block number to set head back to +} + +func (snaptest *setHeadSnapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Rewind the chain if setHead operation is required. + chain.SetHead(snaptest.setHead) + chain.Stop() + + newchain, err := NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer newchain.Stop() + + snaptest.verify(t, newchain, blocks) +} + +// restartCrashSnapshotTest is the test type used to test this scenario: +// - have a complete snapshot +// - restart chain +// - insert more blocks with enabling the snapshot +// - commit the snapshot +// - crash +// - restart again +type restartCrashSnapshotTest struct { + snapshotTestBasic + newBlocks int +} + +func (snaptest *restartCrashSnapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Firstly, stop the chain properly, with all snapshot journal + // and state committed. + chain.Stop() + + newchain, err := NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + newBlocks, _ := GenerateChain(params.TestChainConfig, blocks[len(blocks)-1], snaptest.engine, snaptest.gendb, snaptest.newBlocks, func(i int, b *BlockGen) {}) + newchain.InsertChain(newBlocks) + + // Commit the entire snapshot into the disk if requested. Note only + // (a) snapshot root and (b) snapshot generator will be committed, + // the diff journal is not. + newchain.Snapshots().Cap(newBlocks[len(newBlocks)-1].Root(), 0) + + // Simulate the blockchain crash + // Don't call chain.Stop here, so that no snapshot + // journal and latest state will be committed + + // Restart the chain after the crash + newchain, err = NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + defer newchain.Stop() + + snaptest.verify(t, newchain, blocks) +} + +// wipeCrashSnapshotTest is the test type used to test this scenario: +// - have a complete snapshot +// - restart, insert more blocks without enabling the snapshot +// - restart again with enabling the snapshot +// - crash +type wipeCrashSnapshotTest struct { + snapshotTestBasic + newBlocks int +} + +func (snaptest *wipeCrashSnapshotTest) test(t *testing.T) { + // It's hard to follow the test case, visualize the input + // log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) + // fmt.Println(tt.dump()) + chain, blocks := snaptest.prepare(t) + + // Firstly, stop the chain properly, with all snapshot journal + // and state committed. + chain.Stop() + + config := &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 0, + } + newchain, err := NewBlockChain(snaptest.db, config, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + newBlocks, _ := GenerateChain(params.TestChainConfig, blocks[len(blocks)-1], snaptest.engine, snaptest.gendb, snaptest.newBlocks, func(i int, b *BlockGen) {}) + newchain.InsertChain(newBlocks) + newchain.Stop() + + // Restart the chain, the wiper should starts working + config = &CacheConfig{ + TrieCleanLimit: 256, + TrieDirtyLimit: 256, + TrieTimeLimit: 5 * time.Minute, + SnapshotLimit: 256, + SnapshotWait: false, // Don't wait rebuild + } + _, err = NewBlockChain(snaptest.db, config, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + // Simulate the blockchain crash. + + newchain, err = NewBlockChain(snaptest.db, nil, params.IstanbulTestChainConfig, snaptest.engine, vm.Config{}, nil, nil) + if err != nil { + t.Fatalf("Failed to recreate chain: %v", err) + } + snaptest.verify(t, newchain, blocks) +} + +// Tests a Geth restart with valid snapshot. Before the shutdown, all snapshot +// journal will be persisted correctly. In this case no snapshot recovery is +// required. +func TestRestartWithNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : C8 + // Expected snapshot disk : G + test := &snapshotTest{ + snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 8, + expSnapshotBottom: 0, // Initial disk layer built from genesis + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth restart with valid but "legacy" snapshot. Before the shutdown, +// all snapshot journal will be persisted correctly. In this case no snapshot +// recovery is required. +func TestRestartWithLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : C8 + // Expected snapshot disk : G + t.Skip("Legacy format testing is not supported") + test := &snapshotTest{ + snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 8, + expSnapshotBottom: 0, // Initial disk layer built from genesis + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken snapshot. In this case the +// chain head should be rewound to the point with available state. And also the +// new head should must be lower than disk layer. But there is no committed point +// so the chain should be rewound to genesis and the disk layer should be left +// for recovery. +func TestNoCommitCrashWithNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : G + // Expected snapshot disk : C4 + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 0, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 0, + expSnapshotBottom: 4, // Last committed disk layer, wait recovery + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken snapshot. In this case the +// chain head should be rewound to the point with available state. And also the +// new head should must be lower than disk layer. But there is only a low committed +// point so the chain should be rewound to committed point and the disk layer +// should be left for recovery. +func TestLowCommitCrashWithNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G, C2 + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : C2 + // Expected snapshot disk : C4 + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 2, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 2, + expSnapshotBottom: 4, // Last committed disk layer, wait recovery + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken snapshot. In this case +// the chain head should be rewound to the point with available state. And also +// the new head should must be lower than disk layer. But there is only a high +// committed point so the chain should be rewound to genesis and the disk layer +// should be left for recovery. +func TestHighCommitCrashWithNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G, C6 + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : G + // Expected snapshot disk : C4 + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 6, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 0, + expSnapshotBottom: 4, // Last committed disk layer, wait recovery + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken and "legacy format" +// snapshot. In this case the entire legacy snapshot should be discared +// and rebuild from the new chain head. The new head here refers to the +// genesis because there is no committed point. +func TestNoCommitCrashWithLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : G + // Expected snapshot disk : G + t.Skip("Legacy format testing is not supported") + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 0, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 0, + expSnapshotBottom: 0, // Rebuilt snapshot from the latest HEAD(genesis) + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken and "legacy format" +// snapshot. In this case the entire legacy snapshot should be discared +// and rebuild from the new chain head. The new head here refers to the +// block-2 because it's committed into the disk. +func TestLowCommitCrashWithLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G, C2 + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : C2 + // Expected snapshot disk : C2 + t.Skip("Legacy format testing is not supported") + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 2, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 2, + expSnapshotBottom: 2, // Rebuilt snapshot from the latest HEAD + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was crashed and restarts with a broken and "legacy format" +// snapshot. In this case the entire legacy snapshot should be discared +// and rebuild from the new chain head. +// +// The new head here refers to the the genesis, the reason is: +// - the state of block-6 is committed into the disk +// - the legacy disk layer of block-4 is committed into the disk +// - the head is rewound the genesis in order to find an available +// state lower than disk layer +func TestHighCommitCrashWithLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G, C6 + // Snapshot: G, C4 + // + // CRASH + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8 + // + // Expected head header : C8 + // Expected head fast block: C8 + // Expected head block : G + // Expected snapshot disk : G + t.Skip("Legacy format testing is not supported") + test := &crashSnapshotTest{ + snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 6, + expCanonicalBlocks: 8, + expHeadHeader: 8, + expHeadFastBlock: 8, + expHeadBlock: 0, + expSnapshotBottom: 0, // Rebuilt snapshot from the latest HEAD(genesis) + }, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was running with snapshot enabled. Then restarts without +// enabling snapshot and after that re-enable the snapshot again. In this +// case the snapshot should be rebuilt with latest chain head. +func TestGappedNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 + // + // Expected head header : C10 + // Expected head fast block: C10 + // Expected head block : C10 + // Expected snapshot disk : C10 + test := &gappedSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 10, + expHeadHeader: 10, + expHeadFastBlock: 10, + expHeadBlock: 10, + expSnapshotBottom: 10, // Rebuilt snapshot from the latest HEAD + }, + gapped: 2, + } + test.test(t) + test.teardown() +} + +// Tests a Geth was running with leagcy snapshot enabled. Then restarts +// without enabling snapshot and after that re-enable the snapshot again. +// In this case the snapshot should be rebuilt with latest chain head. +func TestGappedLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 + // + // Expected head header : C10 + // Expected head fast block: C10 + // Expected head block : C10 + // Expected snapshot disk : C10 + t.Skip("Legacy format testing is not supported") + test := &gappedSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 10, + expHeadHeader: 10, + expHeadFastBlock: 10, + expHeadBlock: 10, + expSnapshotBottom: 10, // Rebuilt snapshot from the latest HEAD + }, + gapped: 2, + } + test.test(t) + test.teardown() +} + +// Tests the Geth was running with snapshot enabled and resetHead is applied. +// In this case the head is rewound to the target(with state available). After +// that the chain is restarted and the original disk layer is kept. +func TestSetHeadWithNewSnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(4) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4 + // + // Expected head header : C4 + // Expected head fast block: C4 + // Expected head block : C4 + // Expected snapshot disk : G + test := &setHeadSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 4, + expHeadHeader: 4, + expHeadFastBlock: 4, + expHeadBlock: 4, + expSnapshotBottom: 0, // The initial disk layer is built from the genesis + }, + setHead: 4, + } + test.test(t) + test.teardown() +} + +// Tests the Geth was running with snapshot(legacy-format) enabled and resetHead +// is applied. In this case the head is rewound to the target(with state available). +// After that the chain is restarted and the original disk layer is kept. +func TestSetHeadWithLegacySnapshot(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(4) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4 + // + // Expected head header : C4 + // Expected head fast block: C4 + // Expected head block : C4 + // Expected snapshot disk : G + t.Skip("Legacy format testing is not supported") + test := &setHeadSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 4, + expHeadHeader: 4, + expHeadFastBlock: 4, + expHeadBlock: 4, + expSnapshotBottom: 0, // The initial disk layer is built from the genesis + }, + setHead: 4, + } + test.test(t) + test.teardown() +} + +// Tests the Geth was running with snapshot(legacy-format) enabled and upgrades +// the disk layer journal(journal generator) to latest format. After that the Geth +// is restarted from a crash. In this case Geth will find the new-format disk layer +// journal but with legacy-format diff journal(the new-format is never committed), +// and the invalid diff journal is expected to be dropped. +func TestRecoverSnapshotFromCrashWithLegacyDiffJournal(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 + // + // Expected head header : C10 + // Expected head fast block: C10 + // Expected head block : C8 + // Expected snapshot disk : C10 + t.Skip("Legacy format testing is not supported") + test := &restartCrashSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: true, + chainBlocks: 8, + snapshotBlock: 0, + commitBlock: 0, + expCanonicalBlocks: 10, + expHeadHeader: 10, + expHeadFastBlock: 10, + expHeadBlock: 8, // The persisted state in the first running + expSnapshotBottom: 10, // The persisted disk layer in the second running + }, + newBlocks: 2, + } + test.test(t) + test.teardown() +} + +// Tests the Geth was running with a complete snapshot and then imports a few +// more new blocks on top without enabling the snapshot. After the restart, +// crash happens. Check everything is ok after the restart. +func TestRecoverSnapshotFromWipingCrash(t *testing.T) { + // Chain: + // G->C1->C2->C3->C4->C5->C6->C7->C8 (HEAD) + // + // Commit: G + // Snapshot: G + // + // SetHead(0) + // + // ------------------------------ + // + // Expected in leveldb: + // G->C1->C2->C3->C4->C5->C6->C7->C8->C9->C10 + // + // Expected head header : C10 + // Expected head fast block: C10 + // Expected head block : C8 + // Expected snapshot disk : C10 + test := &wipeCrashSnapshotTest{ + snapshotTestBasic: snapshotTestBasic{ + legacy: false, + chainBlocks: 8, + snapshotBlock: 4, + commitBlock: 0, + expCanonicalBlocks: 10, + expHeadHeader: 10, + expHeadFastBlock: 10, + expHeadBlock: 10, + expSnapshotBottom: 10, + }, + newBlocks: 2, + } + test.test(t) + test.teardown() +} diff --git a/core/rawdb/accessors_snapshot.go b/core/rawdb/accessors_snapshot.go index e5aefbe17dab..eef8acc01852 100644 --- a/core/rawdb/accessors_snapshot.go +++ b/core/rawdb/accessors_snapshot.go @@ -17,6 +17,8 @@ package rawdb import ( + "encoding/binary" + "github.com/celo-org/celo-blockchain/common" "github.com/celo-org/celo-blockchain/ethdb" "github.com/celo-org/celo-blockchain/log" @@ -118,3 +120,58 @@ func DeleteSnapshotJournal(db ethdb.KeyValueWriter) { log.Crit("Failed to remove snapshot journal", "err", err) } } + +// ReadSnapshotGenerator retrieves the serialized snapshot generator saved at +// the last shutdown. +func ReadSnapshotGenerator(db ethdb.KeyValueReader) []byte { + data, _ := db.Get(snapshotGeneratorKey) + return data +} + +// WriteSnapshotGenerator stores the serialized snapshot generator to save at +// shutdown. +func WriteSnapshotGenerator(db ethdb.KeyValueWriter, generator []byte) { + if err := db.Put(snapshotGeneratorKey, generator); err != nil { + log.Crit("Failed to store snapshot generator", "err", err) + } +} + +// DeleteSnapshotGenerator deletes the serialized snapshot generator saved at +// the last shutdown +func DeleteSnapshotGenerator(db ethdb.KeyValueWriter) { + if err := db.Delete(snapshotGeneratorKey); err != nil { + log.Crit("Failed to remove snapshot generator", "err", err) + } +} + +// ReadSnapshotRecoveryNumber retrieves the block number of the last persisted +// snapshot layer. +func ReadSnapshotRecoveryNumber(db ethdb.KeyValueReader) *uint64 { + data, _ := db.Get(snapshotRecoveryKey) + if len(data) == 0 { + return nil + } + if len(data) != 8 { + return nil + } + number := binary.BigEndian.Uint64(data) + return &number +} + +// WriteSnapshotRecoveryNumber stores the block number of the last persisted +// snapshot layer. +func WriteSnapshotRecoveryNumber(db ethdb.KeyValueWriter, number uint64) { + var buf [8]byte + binary.BigEndian.PutUint64(buf[:], number) + if err := db.Put(snapshotRecoveryKey, buf[:]); err != nil { + log.Crit("Failed to store snapshot recovery number", "err", err) + } +} + +// DeleteSnapshotRecoveryNumber deletes the block number of the last persisted +// snapshot layer. +func DeleteSnapshotRecoveryNumber(db ethdb.KeyValueWriter) { + if err := db.Delete(snapshotRecoveryKey); err != nil { + log.Crit("Failed to remove snapshot recovery number", "err", err) + } +} diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 628e428b5b96..d827220b79ec 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -50,6 +50,12 @@ var ( // snapshotJournalKey tracks the in-memory diff layers across restarts. snapshotJournalKey = []byte("SnapshotJournal") + // snapshotGeneratorKey tracks the snapshot generation marker across restarts. + snapshotGeneratorKey = []byte("SnapshotGenerator") + + // snapshotRecoveryKey tracks the snapshot recovery marker across restarts. + snapshotRecoveryKey = []byte("SnapshotRecovery") + // txIndexTailKey tracks the oldest block whose transactions have been indexed. txIndexTailKey = []byte("TransactionIndexTail") diff --git a/core/state/snapshot/conversion.go b/core/state/snapshot/conversion.go index dd1741ce6e23..b464c28dca8e 100644 --- a/core/state/snapshot/conversion.go +++ b/core/state/snapshot/conversion.go @@ -240,7 +240,7 @@ func generateTrieRoot(it Iterator, account common.Hash, generatorFn trieGenerato } in <- leaf - // Accumulate the generaation statistic if it's required. + // Accumulate the generation statistic if it's required. processed++ if time.Since(logged) > 3*time.Second && stats != nil { if account == (common.Hash{}) { diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go index 900b34e9d7d6..aed58f17a281 100644 --- a/core/state/snapshot/difflayer.go +++ b/core/state/snapshot/difflayer.go @@ -44,7 +44,7 @@ var ( // aggregatorItemLimit is an approximate number of items that will end up // in the agregator layer before it's flushed out to disk. A plain account // weighs around 14B (+hash), a storage slot 32B (+hash), a deleted slot - // 0B (+hash). Slots are mostly set/unset in lockstep, so thet average at + // 0B (+hash). Slots are mostly set/unset in lockstep, so that average at // 16B (+hash). All in all, the average entry seems to be 15+32=47B. Use a // smaller number to be on the safe side. aggregatorItemLimit = aggregatorMemoryLimit / 42 @@ -114,9 +114,9 @@ type diffLayer struct { // deleted, all data in other set belongs to the "new" A. destructSet map[common.Hash]struct{} // Keyed markers for deleted (and potentially) recreated accounts accountList []common.Hash // List of account for iteration. If it exists, it's sorted, otherwise it's nil - accountData map[common.Hash][]byte // Keyed accounts for direct retrival (nil means deleted) + accountData map[common.Hash][]byte // Keyed accounts for direct retrieval (nil means deleted) storageList map[common.Hash][]common.Hash // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil - storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrival. one per account (nil means deleted) + storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrieval. one per account (nil means deleted) diffed *bloomfilter.Filter // Bloom filter tracking all the diffed items up to the disk layer @@ -191,19 +191,15 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s if blob == nil { panic(fmt.Sprintf("account %#x nil", accountHash)) } + // Determine memory size and track the dirty writes + dl.memory += uint64(common.HashLength + len(blob)) + snapshotDirtyAccountWriteMeter.Mark(int64(len(blob))) } for accountHash, slots := range storage { if slots == nil { panic(fmt.Sprintf("storage %#x nil", accountHash)) } - } - // Determine memory size and track the dirty writes - for _, data := range accounts { - dl.memory += uint64(common.HashLength + len(data)) - snapshotDirtyAccountWriteMeter.Mark(int64(len(data))) - } - // Determine memory size and track the dirty writes - for _, slots := range storage { + // Determine memory size and track the dirty writes for _, data := range slots { dl.memory += uint64(common.HashLength + len(data)) snapshotDirtyStorageWriteMeter.Mark(int64(len(data))) @@ -300,13 +296,17 @@ func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) { if !hit { hit = dl.diffed.Contains(destructBloomHasher(hash)) } + var origin *diskLayer + if !hit { + origin = dl.origin // extract origin while holding the lock + } dl.lock.RUnlock() // If the bloom filter misses, don't even bother with traversing the memory // diff layers, reach straight into the bottom persistent disk layer - if !hit { + if origin != nil { snapshotBloomAccountMissMeter.Mark(1) - return dl.origin.AccountRLP(hash) + return origin.AccountRLP(hash) } // The bloom filter hit, start poking in the internal maps return dl.accountRLP(hash, 0) @@ -362,13 +362,17 @@ func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, erro if !hit { hit = dl.diffed.Contains(destructBloomHasher(accountHash)) } + var origin *diskLayer + if !hit { + origin = dl.origin // extract origin while holding the lock + } dl.lock.RUnlock() // If the bloom filter misses, don't even bother with traversing the memory // diff layers, reach straight into the bottom persistent disk layer - if !hit { + if origin != nil { snapshotBloomStorageMissMeter.Mark(1) - return dl.origin.Storage(accountHash, storageHash) + return origin.Storage(accountHash, storageHash) } // The bloom filter hit, start poking in the internal maps return dl.storage(accountHash, storageHash, 0) @@ -482,7 +486,7 @@ func (dl *diffLayer) flatten() snapshot { } } -// AccountList returns a sorted list of all accounts in this difflayer, including +// AccountList returns a sorted list of all accounts in this diffLayer, including // the deleted ones. // // Note, the returned slice is not a copy, so do not modify it. @@ -513,7 +517,7 @@ func (dl *diffLayer) AccountList() []common.Hash { return dl.accountList } -// StorageList returns a sorted list of all storage slot hashes in this difflayer +// StorageList returns a sorted list of all storage slot hashes in this diffLayer // for the given account. If the whole storage is destructed in this layer, then // an additional flag *destructed = true* will be returned, otherwise the flag is // false. Besides, the returned list will include the hash of deleted storage slot. diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go index 2c4282f2a730..cc48c84747cc 100644 --- a/core/state/snapshot/difflayer_test.go +++ b/core/state/snapshot/difflayer_test.go @@ -314,7 +314,7 @@ func BenchmarkSearchSlot(b *testing.B) { // With accountList and sorting // BenchmarkFlatten-6 50 29890856 ns/op // -// Without sorting and tracking accountlist +// Without sorting and tracking accountList // BenchmarkFlatten-6 300 5511511 ns/op func BenchmarkFlatten(b *testing.B) { fill := func(parent snapshot) *diffLayer { diff --git a/core/state/snapshot/disklayer.go b/core/state/snapshot/disklayer.go index d6195d6e6f6f..833cabee1c03 100644 --- a/core/state/snapshot/disklayer.go +++ b/core/state/snapshot/disklayer.go @@ -31,7 +31,7 @@ import ( // diskLayer is a low level persistent snapshot built on top of a key-value store. type diskLayer struct { diskdb ethdb.KeyValueStore // Key-value store containing the base snapshot - triedb *trie.Database // Trie node cache for reconstuction purposes + triedb *trie.Database // Trie node cache for reconstruction purposes cache *fastcache.Cache // Cache to avoid hitting the disk for direct access root common.Hash // Root hash of the base snapshot diff --git a/core/state/snapshot/disklayer_test.go b/core/state/snapshot/disklayer_test.go index 79d015b8a544..364bdcae4a9a 100644 --- a/core/state/snapshot/disklayer_test.go +++ b/core/state/snapshot/disklayer_test.go @@ -28,6 +28,7 @@ import ( "github.com/celo-org/celo-blockchain/ethdb" "github.com/celo-org/celo-blockchain/ethdb/leveldb" "github.com/celo-org/celo-blockchain/ethdb/memorydb" + "github.com/celo-org/celo-blockchain/rlp" ) // reverse reverses the contents of a byte slice. It's used to update random accs @@ -429,6 +430,81 @@ func TestDiskPartialMerge(t *testing.T) { } } +// Tests that when the bottom-most diff layer is merged into the disk +// layer whether the corresponding generator is persisted correctly. +func TestDiskGeneratorPersistence(t *testing.T) { + var ( + accOne = randomHash() + accTwo = randomHash() + accOneSlotOne = randomHash() + accOneSlotTwo = randomHash() + + accThree = randomHash() + accThreeSlot = randomHash() + baseRoot = randomHash() + diffRoot = randomHash() + diffTwoRoot = randomHash() + genMarker = append(randomHash().Bytes(), randomHash().Bytes()...) + ) + // Testing scenario 1, the disk layer is still under the construction. + db := rawdb.NewMemoryDatabase() + + rawdb.WriteAccountSnapshot(db, accOne, accOne[:]) + rawdb.WriteStorageSnapshot(db, accOne, accOneSlotOne, accOneSlotOne[:]) + rawdb.WriteStorageSnapshot(db, accOne, accOneSlotTwo, accOneSlotTwo[:]) + rawdb.WriteSnapshotRoot(db, baseRoot) + + // Create a disk layer based on all above updates + snaps := &Tree{ + layers: map[common.Hash]snapshot{ + baseRoot: &diskLayer{ + diskdb: db, + cache: fastcache.New(500 * 1024), + root: baseRoot, + genMarker: genMarker, + }, + }, + } + // Modify or delete some accounts, flatten everything onto disk + if err := snaps.Update(diffRoot, baseRoot, nil, map[common.Hash][]byte{ + accTwo: accTwo[:], + }, nil); err != nil { + t.Fatalf("failed to update snapshot tree: %v", err) + } + if err := snaps.Cap(diffRoot, 0); err != nil { + t.Fatalf("failed to flatten snapshot tree: %v", err) + } + blob := rawdb.ReadSnapshotGenerator(db) + var generator journalGenerator + if err := rlp.DecodeBytes(blob, &generator); err != nil { + t.Fatalf("Failed to decode snapshot generator %v", err) + } + if !bytes.Equal(generator.Marker, genMarker) { + t.Fatalf("Generator marker is not matched") + } + // Test scenario 2, the disk layer is fully generated + // Modify or delete some accounts, flatten everything onto disk + if err := snaps.Update(diffTwoRoot, diffRoot, nil, map[common.Hash][]byte{ + accThree: accThree.Bytes(), + }, map[common.Hash]map[common.Hash][]byte{ + accThree: {accThreeSlot: accThreeSlot.Bytes()}, + }); err != nil { + t.Fatalf("failed to update snapshot tree: %v", err) + } + diskLayer := snaps.layers[snaps.diskRoot()].(*diskLayer) + diskLayer.genMarker = nil // Construction finished + if err := snaps.Cap(diffTwoRoot, 0); err != nil { + t.Fatalf("failed to flatten snapshot tree: %v", err) + } + blob = rawdb.ReadSnapshotGenerator(db) + if err := rlp.DecodeBytes(blob, &generator); err != nil { + t.Fatalf("Failed to decode snapshot generator %v", err) + } + if len(generator.Marker) != 0 { + t.Fatalf("Failed to update snapshot generator") + } +} + // Tests that merging something into a disk layer persists it into the database // and invalidates any previously written and cached values, discarding anything // after the in-progress generation marker. diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 6f3e198a1dfd..1f0ed4ec192b 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -19,6 +19,7 @@ package snapshot import ( "bytes" "encoding/binary" + "fmt" "math/big" "time" @@ -54,9 +55,11 @@ type generatorStats struct { // Log creates an contextual log with the given message and the context pulled // from the internally maintained statistics. -func (gs *generatorStats) Log(msg string, marker []byte) { +func (gs *generatorStats) Log(msg string, root common.Hash, marker []byte) { var ctx []interface{} - + if root != (common.Hash{}) { + ctx = append(ctx, []interface{}{"root", root}...) + } // Figure out whether we're after or within an account switch len(marker) { case common.HashLength: @@ -98,36 +101,79 @@ func generateSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache i wiper = wipeSnapshot(diskdb, true) } // Create a new disk layer with an initialized state marker at zero - rawdb.WriteSnapshotRoot(diskdb, root) - + var ( + stats = &generatorStats{wiping: wiper, start: time.Now()} + batch = diskdb.NewBatch() + genMarker = []byte{} // Initialized but empty! + ) + rawdb.WriteSnapshotRoot(batch, root) + journalProgress(batch, genMarker, stats) + if err := batch.Write(); err != nil { + log.Crit("Failed to write initialized state marker", "error", err) + } base := &diskLayer{ diskdb: diskdb, triedb: triedb, root: root, cache: fastcache.New(cache * 1024 * 1024), - genMarker: []byte{}, // Initialized but empty! + genMarker: genMarker, genPending: make(chan struct{}), genAbort: make(chan chan *generatorStats), } - go base.generate(&generatorStats{wiping: wiper, start: time.Now()}) + go base.generate(stats) + log.Debug("Start snapshot generation", "root", root) return base } +// journalProgress persists the generator stats into the database to resume later. +func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorStats) { + // Write out the generator marker. Note it's a standalone disk layer generator + // which is not mixed with journal. It's ok if the generator is persisted while + // journal is not. + entry := journalGenerator{ + Done: marker == nil, + Marker: marker, + } + if stats != nil { + entry.Wiping = (stats.wiping != nil) + entry.Accounts = stats.accounts + entry.Slots = stats.slots + entry.Storage = uint64(stats.storage) + } + blob, err := rlp.EncodeToBytes(entry) + if err != nil { + panic(err) // Cannot happen, here to catch dev errors + } + var logstr string + switch { + case marker == nil: + logstr = "done" + case bytes.Equal(marker, []byte{}): + logstr = "empty" + case len(marker) == common.HashLength: + logstr = fmt.Sprintf("%#x", marker) + default: + logstr = fmt.Sprintf("%#x:%#x", marker[:common.HashLength], marker[common.HashLength:]) + } + log.Debug("Journalled generator progress", "progress", logstr) + rawdb.WriteSnapshotGenerator(db, blob) +} + // generate is a background thread that iterates over the state and storage tries, // constructing the state snapshot. All the arguments are purely for statistics -// gethering and logging, since the method surfs the blocks as they arrive, often +// gathering and logging, since the method surfs the blocks as they arrive, often // being restarted. func (dl *diskLayer) generate(stats *generatorStats) { // If a database wipe is in operation, wait until it's done if stats.wiping != nil { - stats.Log("Wiper running, state snapshotting paused", dl.genMarker) + stats.Log("Wiper running, state snapshotting paused", common.Hash{}, dl.genMarker) select { // If wiper is done, resume normal mode of operation case <-stats.wiping: stats.wiping = nil stats.start = time.Now() - // If generator was aboted during wipe, return + // If generator was aborted during wipe, return case abort := <-dl.genAbort: abort <- stats return @@ -137,13 +183,13 @@ func (dl *diskLayer) generate(stats *generatorStats) { accTrie, err := trie.NewSecure(dl.root, dl.triedb) if err != nil { // The account trie is missing (GC), surf the chain until one becomes available - stats.Log("Trie missing, state snapshotting paused", dl.genMarker) + stats.Log("Trie missing, state snapshotting paused", dl.root, dl.genMarker) abort := <-dl.genAbort abort <- stats return } - stats.Log("Resuming state snapshot generation", dl.genMarker) + stats.Log("Resuming state snapshot generation", dl.root, dl.genMarker) var accMarker []byte if len(dl.genMarker) > 0 { // []byte{} is the start, use nil for that @@ -184,15 +230,19 @@ func (dl *diskLayer) generate(stats *generatorStats) { if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { // Only write and set the marker if we actually did something useful if batch.ValueSize() > 0 { + // Ensure the generator entry is in sync with the data + marker := accountHash[:] + journalProgress(batch, marker, stats) + batch.Write() batch.Reset() dl.lock.Lock() - dl.genMarker = accountHash[:] + dl.genMarker = marker dl.lock.Unlock() } if abort != nil { - stats.Log("Aborting state snapshot generation", accountHash[:]) + stats.Log("Aborting state snapshot generation", dl.root, accountHash[:]) abort <- stats return } @@ -201,7 +251,10 @@ func (dl *diskLayer) generate(stats *generatorStats) { if acc.Root != emptyRoot { storeTrie, err := trie.NewSecure(acc.Root, dl.triedb) if err != nil { - log.Crit("Storage trie inaccessible for snapshot generation", "err", err) + log.Error("Generator failed to access storage trie", "accroot", dl.root, "acchash", common.BytesToHash(accIt.Key), "stroot", acc.Root, "err", err) + abort := <-dl.genAbort + abort <- stats + return } var storeMarker []byte if accMarker != nil && bytes.Equal(accountHash[:], accMarker) && len(dl.genMarker) > common.HashLength { @@ -222,32 +275,54 @@ func (dl *diskLayer) generate(stats *generatorStats) { if batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { // Only write and set the marker if we actually did something useful if batch.ValueSize() > 0 { + // Ensure the generator entry is in sync with the data + marker := append(accountHash[:], storeIt.Key...) + journalProgress(batch, marker, stats) + batch.Write() batch.Reset() dl.lock.Lock() - dl.genMarker = append(accountHash[:], storeIt.Key...) + dl.genMarker = marker dl.lock.Unlock() } if abort != nil { - stats.Log("Aborting state snapshot generation", append(accountHash[:], storeIt.Key...)) + stats.Log("Aborting state snapshot generation", dl.root, append(accountHash[:], storeIt.Key...)) abort <- stats return } + if time.Since(logged) > 8*time.Second { + stats.Log("Generating state snapshot", dl.root, append(accountHash[:], storeIt.Key...)) + logged = time.Now() + } } } + if err := storeIt.Err; err != nil { + log.Error("Generator failed to iterate storage trie", "accroot", dl.root, "acchash", common.BytesToHash(accIt.Key), "stroot", acc.Root, "err", err) + abort := <-dl.genAbort + abort <- stats + return + } } if time.Since(logged) > 8*time.Second { - stats.Log("Generating state snapshot", accIt.Key) + stats.Log("Generating state snapshot", dl.root, accIt.Key) logged = time.Now() } // Some account processed, unmark the marker accMarker = nil } - // Snapshot fully generated, set the marker to nil - if batch.ValueSize() > 0 { - batch.Write() + if err := accIt.Err; err != nil { + log.Error("Generator failed to iterate account trie", "root", dl.root, "err", err) + abort := <-dl.genAbort + abort <- stats + return } + // Snapshot fully generated, set the marker to nil. + // Note even there is nothing to commit, persist the + // generator anyway to mark the snapshot is complete. + journalProgress(batch, nil, stats) + batch.Write() + log.Info("Generated state snapshot", "accounts", stats.accounts, "slots", stats.slots, "storage", stats.storage, "elapsed", common.PrettyDuration(time.Since(stats.start))) diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go new file mode 100644 index 000000000000..648325f00798 --- /dev/null +++ b/core/state/snapshot/generate_test.go @@ -0,0 +1,190 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package snapshot + +import ( + "math/big" + "testing" + "time" + + "github.com/celo-org/celo-blockchain/common" + "github.com/celo-org/celo-blockchain/ethdb/memorydb" + "github.com/celo-org/celo-blockchain/rlp" + "github.com/celo-org/celo-blockchain/trie" +) + +// Tests that snapshot generation errors out correctly in case of a missing trie +// node in the account trie. +func TestGenerateCorruptAccountTrie(t *testing.T) { + // We can't use statedb to make a test trie (circular dependency), so make + // a fake one manually. We're going with a small account trie of 3 accounts, + // without any storage slots to keep the test smaller. + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + tr, _ := trie.NewSecure(common.Hash{}, triedb) + acc := &Account{Balance: big.NewInt(1), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + tr.Update([]byte("acc-1"), val) // 0xc7a30f39aff471c95d8a837497ad0e49b65be475cc0953540f80cfcdbdcd9074 + + acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + tr.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + + acc = &Account{Balance: big.NewInt(3), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + tr.Update([]byte("acc-3"), val) // 0x19ead688e907b0fab07176120dceec244a72aff2f0aa51e8b827584e378772f4 + tr.Commit(nil) // Root: 0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978 + + // Delete an account trie leaf and ensure the generator chokes + triedb.Commit(common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978"), false) + diskdb.Delete(common.HexToHash("0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7").Bytes()) + + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978"), nil) + select { + case <-snap.genPending: + // Snapshot generation succeeded + t.Errorf("Snapshot generated against corrupt account trie") + + case <-time.After(250 * time.Millisecond): + // Not generated fast enough, hopefully blocked inside on missing trie node fail + } + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} + +// Tests that snapshot generation errors out correctly in case of a missing root +// trie node for a storage trie. It's similar to internal corruption but it is +// handled differently inside the generator. +func TestGenerateMissingStorageTrie(t *testing.T) { + // We can't use statedb to make a test trie (circular dependency), so make + // a fake one manually. We're going with a small account trie of 3 accounts, + // two of which also has the same 3-slot storage trie attached. + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + stTrie, _ := trie.NewSecure(common.Hash{}, triedb) + stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0 + stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371 + stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78 + stTrie.Commit(nil) // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + + accTrie, _ := trie.NewSecure(common.Hash{}, triedb) + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + + acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + + acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd + + // We can only corrupt the disk database, so flush the tries out + triedb.Reference( + common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), + common.HexToHash("0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e"), + ) + triedb.Reference( + common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), + common.HexToHash("0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2"), + ) + triedb.Commit(common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), false) + + // Delete a storage trie root and ensure the generator chokes + diskdb.Delete(common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67").Bytes()) + + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), nil) + select { + case <-snap.genPending: + // Snapshot generation succeeded + t.Errorf("Snapshot generated against corrupt storage trie") + + case <-time.After(250 * time.Millisecond): + // Not generated fast enough, hopefully blocked inside on missing trie node fail + } + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} + +// Tests that snapshot generation errors out correctly in case of a missing trie +// node in a storage trie. +func TestGenerateCorruptStorageTrie(t *testing.T) { + // We can't use statedb to make a test trie (circular dependency), so make + // a fake one manually. We're going with a small account trie of 3 accounts, + // two of which also has the same 3-slot storage trie attached. + var ( + diskdb = memorydb.New() + triedb = trie.NewDatabase(diskdb) + ) + stTrie, _ := trie.NewSecure(common.Hash{}, triedb) + stTrie.Update([]byte("key-1"), []byte("val-1")) // 0x1314700b81afc49f94db3623ef1df38f3ed18b73a1b7ea2f6c095118cf6118a0 + stTrie.Update([]byte("key-2"), []byte("val-2")) // 0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371 + stTrie.Update([]byte("key-3"), []byte("val-3")) // 0x51c71a47af0695957647fb68766d0becee77e953df17c29b3c2f25436f055c78 + stTrie.Commit(nil) // Root: 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + + accTrie, _ := trie.NewSecure(common.Hash{}, triedb) + acc := &Account{Balance: big.NewInt(1), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-1"), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + + acc = &Account{Balance: big.NewInt(2), Root: emptyRoot.Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-2"), val) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + + acc = &Account{Balance: big.NewInt(3), Root: stTrie.Hash().Bytes(), CodeHash: emptyCode.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + accTrie.Update([]byte("acc-3"), val) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + accTrie.Commit(nil) // Root: 0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd + + // We can only corrupt the disk database, so flush the tries out + triedb.Reference( + common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), + common.HexToHash("0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e"), + ) + triedb.Reference( + common.HexToHash("0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67"), + common.HexToHash("0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2"), + ) + triedb.Commit(common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), false) + + // Delete a storage trie leaf and ensure the generator chokes + diskdb.Delete(common.HexToHash("0x18a0f4d79cff4459642dd7604f303886ad9d77c30cf3d7d7cedb3a693ab6d371").Bytes()) + + snap := generateSnapshot(diskdb, triedb, 16, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"), nil) + select { + case <-snap.genPending: + // Snapshot generation succeeded + t.Errorf("Snapshot generated against corrupt storage trie") + + case <-time.After(250 * time.Millisecond): + // Not generated fast enough, hopefully blocked inside on missing trie node fail + } + // Signal abortion to the generator and wait for it to tear down + stop := make(chan *generatorStats) + snap.genAbort <- stop + <-stop +} diff --git a/core/state/snapshot/iterator.go b/core/state/snapshot/iterator.go index d0576cb7fb3d..b30acfd4bd33 100644 --- a/core/state/snapshot/iterator.go +++ b/core/state/snapshot/iterator.go @@ -133,7 +133,7 @@ func (it *diffAccountIterator) Hash() common.Hash { // Account returns the RLP encoded slim account the iterator is currently at. // This method may _fail_, if the underlying layer has been flattened between -// the call to Next and Acccount. That type of error will set it.Err. +// the call to Next and Account. That type of error will set it.Err. // This method assumes that flattening does not delete elements from // the accountdata mapping (writing nil into it is fine though), and will panic // if elements have been deleted. @@ -243,7 +243,7 @@ type diffStorageIterator struct { } // StorageIterator creates a storage iterator over a single diff layer. -// Execept the storage iterator is returned, there is an additional flag +// Except the storage iterator is returned, there is an additional flag // "destructed" returned. If it's true then it means the whole storage is // destructed in this layer(maybe recreated too), don't bother deeper layer // for storage retrieval. diff --git a/core/state/snapshot/iterator_binary.go b/core/state/snapshot/iterator_binary.go index 6e51aed1413f..4af1dccd58a8 100644 --- a/core/state/snapshot/iterator_binary.go +++ b/core/state/snapshot/iterator_binary.go @@ -37,7 +37,7 @@ type binaryIterator struct { } // initBinaryAccountIterator creates a simplistic iterator to step over all the -// accounts in a slow, but eaily verifiable way. Note this function is used for +// accounts in a slow, but easily verifiable way. Note this function is used for // initialization, use `newBinaryAccountIterator` as the API. func (dl *diffLayer) initBinaryAccountIterator() Iterator { parent, ok := dl.parent.(*diffLayer) @@ -62,7 +62,7 @@ func (dl *diffLayer) initBinaryAccountIterator() Iterator { } // initBinaryStorageIterator creates a simplistic iterator to step over all the -// storage slots in a slow, but eaily verifiable way. Note this function is used +// storage slots in a slow, but easily verifiable way. Note this function is used // for initialization, use `newBinaryStorageIterator` as the API. func (dl *diffLayer) initBinaryStorageIterator(account common.Hash) Iterator { parent, ok := dl.parent.(*diffLayer) @@ -199,14 +199,14 @@ func (it *binaryIterator) Release() { } // newBinaryAccountIterator creates a simplistic account iterator to step over -// all the accounts in a slow, but eaily verifiable way. +// all the accounts in a slow, but easily verifiable way. func (dl *diffLayer) newBinaryAccountIterator() AccountIterator { iter := dl.initBinaryAccountIterator() return iter.(AccountIterator) } // newBinaryStorageIterator creates a simplistic account iterator to step over -// all the storage slots in a slow, but eaily verifiable way. +// all the storage slots in a slow, but easily verifiable way. func (dl *diffLayer) newBinaryStorageIterator(account common.Hash) StorageIterator { iter := dl.initBinaryStorageIterator(account) return iter.(StorageIterator) diff --git a/core/state/snapshot/iterator_fast.go b/core/state/snapshot/iterator_fast.go index 9ef1ee3887f2..ad6410997228 100644 --- a/core/state/snapshot/iterator_fast.go +++ b/core/state/snapshot/iterator_fast.go @@ -75,7 +75,7 @@ type fastIterator struct { fail error } -// newFastIterator creates a new hierarhical account or storage iterator with one +// newFastIterator creates a new hierarchical account or storage iterator with one // element per diff layer. The returned combo iterator can be used to walk over // the entire snapshot diff stack simultaneously. func newFastIterator(tree *Tree, root common.Hash, account common.Hash, seek common.Hash, accountIterator bool) (*fastIterator, error) { @@ -335,14 +335,14 @@ func (fi *fastIterator) Debug() { fmt.Println() } -// newFastAccountIterator creates a new hierarhical account iterator with one +// newFastAccountIterator creates a new hierarchical account iterator with one // element per diff layer. The returned combo iterator can be used to walk over // the entire snapshot diff stack simultaneously. func newFastAccountIterator(tree *Tree, root common.Hash, seek common.Hash) (AccountIterator, error) { return newFastIterator(tree, root, common.Hash{}, seek, true) } -// newFastStorageIterator creates a new hierarhical storage iterator with one +// newFastStorageIterator creates a new hierarchical storage iterator with one // element per diff layer. The returned combo iterator can be used to walk over // the entire snapshot diff stack simultaneously. func newFastStorageIterator(tree *Tree, root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) { diff --git a/core/state/snapshot/journal.go b/core/state/snapshot/journal.go index 6bddc4b90a45..332fccd556e3 100644 --- a/core/state/snapshot/journal.go +++ b/core/state/snapshot/journal.go @@ -33,6 +33,8 @@ import ( "github.com/celo-org/celo-blockchain/trie" ) +const journalVersion uint64 = 0 + // journalGenerator is a disk layer entry containing the generator progress marker. type journalGenerator struct { Wiping bool // Whether the database was in progress of being wiped @@ -61,8 +63,91 @@ type journalStorage struct { Vals [][]byte } +// loadAndParseLegacyJournal tries to parse the snapshot journal in legacy format. +func loadAndParseLegacyJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, journalGenerator, error) { + // Retrieve the journal, for legacy journal it must exist since even for + // 0 layer it stores whether we've already generated the snapshot or are + // in progress only. + journal := rawdb.ReadSnapshotJournal(db) + if len(journal) == 0 { + return nil, journalGenerator{}, errors.New("missing or corrupted snapshot journal") + } + r := rlp.NewStream(bytes.NewReader(journal), 0) + + // Read the snapshot generation progress for the disk layer + var generator journalGenerator + if err := r.Decode(&generator); err != nil { + return nil, journalGenerator{}, fmt.Errorf("failed to load snapshot progress marker: %v", err) + } + // Load all the snapshot diffs from the journal + snapshot, err := loadDiffLayer(base, r) + if err != nil { + return nil, generator, err + } + return snapshot, generator, nil +} + +// loadAndParseJournal tries to parse the snapshot journal in latest format. +func loadAndParseJournal(db ethdb.KeyValueStore, base *diskLayer) (snapshot, journalGenerator, error) { + // Retrieve the disk layer generator. It must exist, no matter the + // snapshot is fully generated or not. Otherwise the entire disk + // layer is invalid. + generatorBlob := rawdb.ReadSnapshotGenerator(db) + if len(generatorBlob) == 0 { + return nil, journalGenerator{}, errors.New("missing snapshot generator") + } + var generator journalGenerator + if err := rlp.DecodeBytes(generatorBlob, &generator); err != nil { + return nil, journalGenerator{}, fmt.Errorf("failed to decode snapshot generator: %v", err) + } + // Retrieve the diff layer journal. It's possible that the journal is + // not existent, e.g. the disk layer is generating while that the Geth + // crashes without persisting the diff journal. + // So if there is no journal, or the journal is invalid(e.g. the journal + // is not matched with disk layer; or the it's the legacy-format journal, + // etc.), we just discard all diffs and try to recover them later. + journal := rawdb.ReadSnapshotJournal(db) + if len(journal) == 0 { + log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "missing") + return base, generator, nil + } + r := rlp.NewStream(bytes.NewReader(journal), 0) + + // Firstly, resolve the first element as the journal version + version, err := r.Uint() + if err != nil { + log.Warn("Failed to resolve the journal version", "error", err) + return base, generator, nil + } + if version != journalVersion { + log.Warn("Discarded the snapshot journal with wrong version", "required", journalVersion, "got", version) + return base, generator, nil + } + // Secondly, resolve the disk layer root, ensure it's continuous + // with disk layer. Note now we can ensure it's the snapshot journal + // correct version, so we expect everything can be resolved properly. + var root common.Hash + if err := r.Decode(&root); err != nil { + return nil, journalGenerator{}, errors.New("missing disk layer root") + } + // The diff journal is not matched with disk, discard them. + // It can happen that Geth crashes without persisting the latest + // diff journal. + if !bytes.Equal(root.Bytes(), base.root.Bytes()) { + log.Warn("Loaded snapshot journal", "diskroot", base.root, "diffs", "unmatched") + return base, generator, nil + } + // Load all the snapshot diffs from the journal + snapshot, err := loadDiffLayer(base, r) + if err != nil { + return nil, journalGenerator{}, err + } + log.Debug("Loaded snapshot journal", "diskroot", base.root, "diffhead", snapshot.Root()) + return snapshot, generator, nil +} + // loadSnapshot loads a pre-existing state snapshot backed by a key-value store. -func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash) (snapshot, error) { +func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, recovery bool) (snapshot, error) { // Retrieve the block number and hash of the snapshot, failing if no snapshot // is present in the database (or crashed mid-update). baseRoot := rawdb.ReadSnapshotRoot(diskdb) @@ -75,28 +160,36 @@ func loadSnapshot(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, cache: fastcache.New(cache * 1024 * 1024), root: baseRoot, } - // Retrieve the journal, it must exist since even for 0 layer it stores whether - // we've already generated the snapshot or are in progress only - journal := rawdb.ReadSnapshotJournal(diskdb) - if len(journal) == 0 { - return nil, errors.New("missing or corrupted snapshot journal") - } - r := rlp.NewStream(bytes.NewReader(journal), 0) - - // Read the snapshot generation progress for the disk layer - var generator journalGenerator - if err := r.Decode(&generator); err != nil { - return nil, fmt.Errorf("failed to load snapshot progress marker: %v", err) + var legacy bool + snapshot, generator, err := loadAndParseJournal(diskdb, base) + if err != nil { + log.Warn("Failed to load new-format journal", "error", err) + snapshot, generator, err = loadAndParseLegacyJournal(diskdb, base) + legacy = true } - // Load all the snapshot diffs from the journal - snapshot, err := loadDiffLayer(base, r) if err != nil { return nil, err } - // Entire snapshot journal loaded, sanity check the head and return - // Journal doesn't exist, don't worry if it's not supposed to + // Entire snapshot journal loaded, sanity check the head. If the loaded + // snapshot is not matched with current state root, print a warning log + // or discard the entire snapshot it's legacy snapshot. + // + // Possible scenario: Geth was crashed without persisting journal and then + // restart, the head is rewound to the point with available state(trie) + // which is below the snapshot. In this case the snapshot can be recovered + // by re-executing blocks but right now it's unavailable. if head := snapshot.Root(); head != root { - return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root) + // If it's legacy snapshot, or it's new-format snapshot but + // it's not in recovery mode, returns the error here for + // rebuilding the entire snapshot forcibly. + if legacy || !recovery { + return nil, fmt.Errorf("head doesn't match snapshot: have %#x, want %#x", head, root) + } + // It's in snapshot recovery, the assumption is held that + // the disk layer is always higher than chain head. It can + // be eventually recovered when the chain head beyonds the + // disk layer. + log.Warn("Snapshot is not continuous with chain", "snaproot", head, "chainroot", root) } // Everything loaded correctly, resume any suspended operations if !generator.Done { @@ -183,8 +276,8 @@ func loadDiffLayer(parent snapshot, r *rlp.Stream) (snapshot, error) { return loadDiffLayer(newDiffLayer(parent, root, destructSet, accountData, storageData), r) } -// Journal writes the persistent layer generator stats into a buffer to be stored -// in the database as the snapshot journal. +// Journal terminates any in-progress snapshot generation, also implicitly pushing +// the progress into the database. func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { // If the snapshot is currently being generated, abort it var stats *generatorStats @@ -193,7 +286,86 @@ func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { dl.genAbort <- abort if stats = <-abort; stats != nil { - stats.Log("Journalling in-progress snapshot", dl.genMarker) + stats.Log("Journalling in-progress snapshot", dl.root, dl.genMarker) + } + } + // Ensure the layer didn't get stale + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.stale { + return common.Hash{}, ErrSnapshotStale + } + // Ensure the generator stats is written even if none was ran this cycle + journalProgress(dl.diskdb, dl.genMarker, stats) + + log.Debug("Journalled disk layer", "root", dl.root) + return dl.root, nil +} + +// Journal writes the memory layer contents into a buffer to be stored in the +// database as the snapshot journal. +func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { + // Journal the parent first + base, err := dl.parent.Journal(buffer) + if err != nil { + return common.Hash{}, err + } + // Ensure the layer didn't get stale + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.Stale() { + return common.Hash{}, ErrSnapshotStale + } + // Everything below was journalled, persist this layer too + if err := rlp.Encode(buffer, dl.root); err != nil { + return common.Hash{}, err + } + destructs := make([]journalDestruct, 0, len(dl.destructSet)) + for hash := range dl.destructSet { + destructs = append(destructs, journalDestruct{Hash: hash}) + } + if err := rlp.Encode(buffer, destructs); err != nil { + return common.Hash{}, err + } + accounts := make([]journalAccount, 0, len(dl.accountData)) + for hash, blob := range dl.accountData { + accounts = append(accounts, journalAccount{Hash: hash, Blob: blob}) + } + if err := rlp.Encode(buffer, accounts); err != nil { + return common.Hash{}, err + } + storage := make([]journalStorage, 0, len(dl.storageData)) + for hash, slots := range dl.storageData { + keys := make([]common.Hash, 0, len(slots)) + vals := make([][]byte, 0, len(slots)) + for key, val := range slots { + keys = append(keys, key) + vals = append(vals, val) + } + storage = append(storage, journalStorage{Hash: hash, Keys: keys, Vals: vals}) + } + if err := rlp.Encode(buffer, storage); err != nil { + return common.Hash{}, err + } + log.Debug("Journalled diff layer", "root", dl.root, "parent", dl.parent.Root()) + return base, nil +} + +// LegacyJournal writes the persistent layer generator stats into a buffer +// to be stored in the database as the snapshot journal. +// +// Note it's the legacy version which is only used in testing right now. +func (dl *diskLayer) LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) { + // If the snapshot is currently being generated, abort it + var stats *generatorStats + if dl.genAbort != nil { + abort := make(chan *generatorStats) + dl.genAbort <- abort + + if stats = <-abort; stats != nil { + stats.Log("Journalling in-progress snapshot", dl.root, dl.genMarker) } } // Ensure the layer didn't get stale @@ -214,6 +386,7 @@ func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { entry.Slots = stats.slots entry.Storage = uint64(stats.storage) } + log.Debug("Legacy journalled disk layer", "root", dl.root) if err := rlp.Encode(buffer, entry); err != nil { return common.Hash{}, err } @@ -222,9 +395,11 @@ func (dl *diskLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { // Journal writes the memory layer contents into a buffer to be stored in the // database as the snapshot journal. -func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { +// +// Note it's the legacy version which is only used in testing right now. +func (dl *diffLayer) LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) { // Journal the parent first - base, err := dl.parent.Journal(buffer) + base, err := dl.parent.LegacyJournal(buffer) if err != nil { return common.Hash{}, err } @@ -266,5 +441,6 @@ func (dl *diffLayer) Journal(buffer *bytes.Buffer) (common.Hash, error) { if err := rlp.Encode(buffer, storage); err != nil { return common.Hash{}, err } + log.Debug("Legacy journalled diff layer", "root", dl.root, "parent", dl.parent.Root()) return base, nil } diff --git a/core/state/snapshot/snapshot.go b/core/state/snapshot/snapshot.go index f575a092dcc7..c46d8ed8ffcb 100644 --- a/core/state/snapshot/snapshot.go +++ b/core/state/snapshot/snapshot.go @@ -29,6 +29,7 @@ import ( "github.com/celo-org/celo-blockchain/ethdb" "github.com/celo-org/celo-blockchain/log" "github.com/celo-org/celo-blockchain/metrics" + "github.com/celo-org/celo-blockchain/rlp" "github.com/celo-org/celo-blockchain/trie" ) @@ -86,6 +87,10 @@ var ( // range of accounts covered. ErrNotCoveredYet = errors.New("not covered yet") + // ErrNotConstructed is returned if the callers want to iterate the snapshot + // while the generation is not finished yet. + ErrNotConstructed = errors.New("snapshot is not constructed") + // errSnapshotCycle is returned if a snapshot is attempted to be inserted // that forms a cycle in the snapshot tree. errSnapshotCycle = errors.New("snapshot cycle") @@ -132,6 +137,10 @@ type snapshot interface { // flattening everything down (bad for reorgs). Journal(buffer *bytes.Buffer) (common.Hash, error) + // LegacyJournal is basically identical to Journal. it's the legacy version for + // flushing legacy journal. Now the only purpose of this function is for testing. + LegacyJournal(buffer *bytes.Buffer) (common.Hash, error) + // Stale return whether this layer has become stale (was flattened across) or // if it's still live. Stale() bool @@ -164,10 +173,12 @@ type Tree struct { // store (with a number of memory layers from a journal), ensuring that the head // of the snapshot matches the expected one. // -// If the snapshot is missing or inconsistent, the entirety is deleted and will -// be reconstructed from scratch based on the tries in the key-value store, on a -// background thread. -func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool) *Tree { +// If the snapshot is missing or the disk layer is broken, the entire is deleted +// and will be reconstructed from scratch based on the tries in the key-value +// store, on a background thread. If the memory layers from the journal is not +// continuous with disk layer or the journal is missing, all diffs will be discarded +// iff it's in "recovery" mode, otherwise rebuild is mandatory. +func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root common.Hash, async bool, recovery bool) *Tree { // Create a new, empty snapshot tree snap := &Tree{ diskdb: diskdb, @@ -179,7 +190,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm defer snap.waitBuild() } // Attempt to load a previously persisted snapshot and rebuild one if failed - head, err := loadSnapshot(diskdb, triedb, cache, root) + head, err := loadSnapshot(diskdb, triedb, cache, root, recovery) if err != nil { log.Warn("Failed to load snapshot, regenerating", "err", err) snap.Rebuild(root) @@ -194,7 +205,7 @@ func New(diskdb ethdb.KeyValueStore, triedb *trie.Database, cache int, root comm } // waitBuild blocks until the snapshot finishes rebuilding. This method is meant -// to be used by tests to ensure we're testing what we believe we are. +// to be used by tests to ensure we're testing what we believe we are. func (t *Tree) waitBuild() { // Find the rebuild termination channel var done chan struct{} @@ -236,11 +247,11 @@ func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs m return errSnapshotCycle } // Generate a new snapshot on top of the parent - parent := t.Snapshot(parentRoot).(snapshot) + parent := t.Snapshot(parentRoot) if parent == nil { return fmt.Errorf("parent [%#x] snapshot missing", parentRoot) } - snap := parent.Update(blockRoot, destructs, accounts, storage) + snap := parent.(snapshot).Update(blockRoot, destructs, accounts, storage) // Save the new snapshot for later t.lock.Lock() @@ -253,6 +264,12 @@ func (t *Tree) Update(blockRoot common.Hash, parentRoot common.Hash, destructs m // Cap traverses downwards the snapshot tree from a head block hash until the // number of allowed layers are crossed. All layers beyond the permitted number // are flattened downwards. +// +// Note, the final diff layer count in general will be one more than the amount +// requested. This happens because the bottom-most diff layer is the accumulator +// which may or may not overflow and cascade to disk. Since this last layer's +// survival is only known *after* capping, we need to omit it from the count if +// we want to ensure that *at least* the requested number of diff layers remain. func (t *Tree) Cap(root common.Hash, layers int) error { // Retrieve the head snapshot to cap from snap := t.Snapshot(root) @@ -263,6 +280,13 @@ func (t *Tree) Cap(root common.Hash, layers int) error { if !ok { return fmt.Errorf("snapshot [%#x] is disk layer", root) } + // If the generator is still running, use a more aggressive cap + diff.origin.lock.RLock() + if diff.origin.genMarker != nil && layers > 8 { + layers = 8 + } + diff.origin.lock.RUnlock() + // Run the internal capping and discard all stale layers t.lock.Lock() defer t.lock.Unlock() @@ -270,10 +294,7 @@ func (t *Tree) Cap(root common.Hash, layers int) error { // Flattening the bottom-most diff layer requires special casing since there's // no child to rewire to the grandparent. In that case we can fake a temporary // child for the capping and then remove it. - var persisted *diskLayer - - switch layers { - case 0: + if layers == 0 { // If full commit was requested, flatten the diffs and merge onto disk diff.lock.RLock() base := diffToDisk(diff.flatten().(*diffLayer)) @@ -282,33 +303,9 @@ func (t *Tree) Cap(root common.Hash, layers int) error { // Replace the entire snapshot tree with the flat base t.layers = map[common.Hash]snapshot{base.root: base} return nil - - case 1: - // If full flattening was requested, flatten the diffs but only merge if the - // memory limit was reached - var ( - bottom *diffLayer - base *diskLayer - ) - diff.lock.RLock() - bottom = diff.flatten().(*diffLayer) - if bottom.memory >= aggregatorMemoryLimit { - base = diffToDisk(bottom) - } - diff.lock.RUnlock() - - // If all diff layers were removed, replace the entire snapshot tree - if base != nil { - t.layers = map[common.Hash]snapshot{base.root: base} - return nil - } - // Merge the new aggregated layer into the snapshot tree, clean stales below - t.layers[bottom.root] = bottom - - default: - // Many layers requested to be retained, cap normally - persisted = t.cap(diff, layers) } + persisted := t.cap(diff, layers) + // Remove any layer that is stale or links into a stale layer children := make(map[common.Hash][]common.Hash) for root, snap := range t.layers { @@ -350,10 +347,16 @@ func (t *Tree) Cap(root common.Hash, layers int) error { // crossed. All diffs beyond the permitted number are flattened downwards. If the // layer limit is reached, memory cap is also enforced (but not before). // -// The method returns the new disk layer if diffs were persistend into it. +// The method returns the new disk layer if diffs were persisted into it. +// +// Note, the final diff layer count in general will be one more than the amount +// requested. This happens because the bottom-most diff layer is the accumulator +// which may or may not overflow and cascade to disk. Since this last layer's +// survival is only known *after* capping, we need to omit it from the count if +// we want to ensure that *at least* the requested number of diff layers remain. func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer { // Dive until we run out of layers or reach the persistent database - for ; layers > 2; layers-- { + for i := 0; i < layers-1; i++ { // If we still have diff layers below, continue down if parent, ok := diff.parent.(*diffLayer); ok { diff = parent @@ -404,6 +407,9 @@ func (t *Tree) cap(diff *diffLayer, layers int) *diskLayer { // diffToDisk merges a bottom-most diff into the persistent disk layer underneath // it. The method will panic if called onto a non-bottom-most diff layer. +// +// The disk layer persistence should be operated in an atomic way. All updates should +// be discarded if the whole transition if not finished. func diffToDisk(bottom *diffLayer) *diskLayer { var ( base = bottom.parent.(*diskLayer) @@ -416,8 +422,7 @@ func diffToDisk(bottom *diffLayer) *diskLayer { base.genAbort <- abort stats = <-abort } - // Start by temporarily deleting the current snapshot block marker. This - // ensures that in the case of a crash, the entire snapshot is invalidated. + // Put the deletion in the batch writer, flush all updates in the final step. rawdb.DeleteSnapshotRoot(batch) // Mark the original base as stale as we're going to create a new wrapper @@ -443,8 +448,17 @@ func diffToDisk(bottom *diffLayer) *diskLayer { if key := it.Key(); len(key) == 65 { // TODO(karalabe): Yuck, we should move this into the iterator batch.Delete(key) base.cache.Del(key[1:]) - snapshotFlushStorageItemMeter.Mark(1) + + // Ensure we don't delete too much data blindly (contract can be + // huge). It's ok to flush, the root will go missing in case of a + // crash and we'll detect and regenerate the snapshot. + if batch.ValueSize() > ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + log.Crit("Failed to write storage deletions", "err", err) + } + batch.Reset() + } } } it.Release() @@ -460,14 +474,18 @@ func diffToDisk(bottom *diffLayer) *diskLayer { base.cache.Set(hash[:], data) snapshotCleanAccountWriteMeter.Mark(int64(len(data))) + snapshotFlushAccountItemMeter.Mark(1) + snapshotFlushAccountSizeMeter.Mark(int64(len(data))) + + // Ensure we don't write too much data blindly. It's ok to flush, the + // root will go missing in case of a crash and we'll detect and regen + // the snapshot. if batch.ValueSize() > ethdb.IdealBatchSize { if err := batch.Write(); err != nil { - log.Crit("Failed to write account snapshot", "err", err) + log.Crit("Failed to write storage deletions", "err", err) } batch.Reset() } - snapshotFlushAccountItemMeter.Mark(1) - snapshotFlushAccountSizeMeter.Mark(int64(len(data))) } // Push all the storage slots into the database for accountHash, storage := range bottom.storageData { @@ -494,18 +512,19 @@ func diffToDisk(bottom *diffLayer) *diskLayer { snapshotFlushStorageItemMeter.Mark(1) snapshotFlushStorageSizeMeter.Mark(int64(len(data))) } - if batch.ValueSize() > ethdb.IdealBatchSize { - if err := batch.Write(); err != nil { - log.Crit("Failed to write storage snapshot", "err", err) - } - batch.Reset() - } } // Update the snapshot block marker and write any remainder data rawdb.WriteSnapshotRoot(batch, bottom.root) + + // Write out the generator progress marker and report + journalProgress(batch, base.genMarker, stats) + + // Flush all the updates in the single db operation. Ensure the + // disk layer transition is atomic. if err := batch.Write(); err != nil { log.Crit("Failed to write leftover snapshot", "err", err) } + log.Debug("Journalled disk layer", "root", bottom.root, "complete", base.genMarker == nil) res := &diskLayer{ root: bottom.root, cache: base.cache, @@ -543,7 +562,21 @@ func (t *Tree) Journal(root common.Hash) (common.Hash, error) { t.lock.Lock() defer t.lock.Unlock() + // Firstly write out the metadata of journal journal := new(bytes.Buffer) + if err := rlp.Encode(journal, journalVersion); err != nil { + return common.Hash{}, err + } + diskroot := t.diskRoot() + if diskroot == (common.Hash{}) { + return common.Hash{}, errors.New("invalid disk root") + } + // Secondly write out the disk layer root, ensure the + // diff journal is continuous with disk. + if err := rlp.Encode(journal, diskroot); err != nil { + return common.Hash{}, err + } + // Finally write out the journal of each layer in reverse order. base, err := snap.(snapshot).Journal(journal) if err != nil { return common.Hash{}, err @@ -553,6 +586,29 @@ func (t *Tree) Journal(root common.Hash) (common.Hash, error) { return base, nil } +// LegacyJournal is basically identical to Journal. it's the legacy +// version for flushing legacy journal. Now the only purpose of this +// function is for testing. +func (t *Tree) LegacyJournal(root common.Hash) (common.Hash, error) { + // Retrieve the head snapshot to journal from var snap snapshot + snap := t.Snapshot(root) + if snap == nil { + return common.Hash{}, fmt.Errorf("snapshot [%#x] missing", root) + } + // Run the journaling + t.lock.Lock() + defer t.lock.Unlock() + + journal := new(bytes.Buffer) + base, err := snap.(snapshot).LegacyJournal(journal) + if err != nil { + return common.Hash{}, err + } + // Store the journal into the database and return + rawdb.WriteSnapshotJournal(t.diskdb, journal.Bytes()) + return base, nil +} + // Rebuild wipes all available snapshot data from the persistent database and // discard all caches and diff layers. Afterwards, it starts a new snapshot // generator with the given root hash. @@ -560,6 +616,10 @@ func (t *Tree) Rebuild(root common.Hash) { t.lock.Lock() defer t.lock.Unlock() + // Firstly delete any recovery flag in the database. Because now we are + // building a brand new snapshot. + rawdb.DeleteSnapshotRecoveryNumber(t.diskdb) + // Track whether there's a wipe currently running and keep it alive if so var wiper chan struct{} @@ -591,7 +651,7 @@ func (t *Tree) Rebuild(root common.Hash) { panic(fmt.Sprintf("unknown layer type: %T", layer)) } } - // Start generating a new snapshot from scratch on a backgroung thread. The + // Start generating a new snapshot from scratch on a background thread. The // generator will run a wiper first if there's not one running right now. log.Info("Rebuilding state snapshot") t.layers = map[common.Hash]snapshot{ @@ -602,11 +662,79 @@ func (t *Tree) Rebuild(root common.Hash) { // AccountIterator creates a new account iterator for the specified root hash and // seeks to a starting account hash. func (t *Tree) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) { + ok, err := t.generating() + if err != nil { + return nil, err + } + if ok { + return nil, ErrNotConstructed + } return newFastAccountIterator(t, root, seek) } // StorageIterator creates a new storage iterator for the specified root hash and // account. The iterator will be move to the specific start position. func (t *Tree) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) { + ok, err := t.generating() + if err != nil { + return nil, err + } + if ok { + return nil, ErrNotConstructed + } return newFastStorageIterator(t, root, account, seek) } + +// disklayer is an internal helper function to return the disk layer. +// The lock of snapTree is assumed to be held already. +func (t *Tree) disklayer() *diskLayer { + var snap snapshot + for _, s := range t.layers { + snap = s + break + } + if snap == nil { + return nil + } + switch layer := snap.(type) { + case *diskLayer: + return layer + case *diffLayer: + return layer.origin + default: + panic(fmt.Sprintf("%T: undefined layer", snap)) + } +} + +// diskRoot is a internal helper function to return the disk layer root. +// The lock of snapTree is assumed to be held already. +func (t *Tree) diskRoot() common.Hash { + disklayer := t.disklayer() + if disklayer == nil { + return common.Hash{} + } + return disklayer.Root() +} + +// generating is an internal helper function which reports whether the snapshot +// is still under the construction. +func (t *Tree) generating() (bool, error) { + t.lock.Lock() + defer t.lock.Unlock() + + layer := t.disklayer() + if layer == nil { + return false, errors.New("disk layer is missing") + } + layer.lock.RLock() + defer layer.lock.RUnlock() + return layer.genMarker != nil, nil +} + +// diskRoot is a external helper function to return the disk layer root. +func (t *Tree) DiskRoot() common.Hash { + t.lock.Lock() + defer t.lock.Unlock() + + return t.diskRoot() +} diff --git a/core/state/snapshot/snapshot_test.go b/core/state/snapshot/snapshot_test.go index 9bdc4933b574..88a0e0cd4e44 100644 --- a/core/state/snapshot/snapshot_test.go +++ b/core/state/snapshot/snapshot_test.go @@ -161,57 +161,10 @@ func TestDiskLayerExternalInvalidationPartialFlatten(t *testing.T) { defer func(memcap uint64) { aggregatorMemoryLimit = memcap }(aggregatorMemoryLimit) aggregatorMemoryLimit = 0 - if err := snaps.Cap(common.HexToHash("0x03"), 2); err != nil { - t.Fatalf("failed to merge diff layer onto disk: %v", err) - } - // Since the base layer was modified, ensure that data retrievald on the external reference fail - if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale { - t.Errorf("stale reference returned account: %#x (err: %v)", acc, err) - } - if slot, err := ref.Storage(common.HexToHash("0xa1"), common.HexToHash("0xb1")); err != ErrSnapshotStale { - t.Errorf("stale reference returned storage slot: %#x (err: %v)", slot, err) - } - if n := len(snaps.layers); n != 2 { - t.Errorf("post-cap layer count mismatch: have %d, want %d", n, 2) - fmt.Println(snaps.layers) - } -} - -// Tests that if a diff layer becomes stale, no active external references will -// be returned with junk data. This version of the test flattens every diff layer -// to check internal corner case around the bottom-most memory accumulator. -func TestDiffLayerExternalInvalidationFullFlatten(t *testing.T) { - // Create an empty base layer and a snapshot tree out of it - base := &diskLayer{ - diskdb: rawdb.NewMemoryDatabase(), - root: common.HexToHash("0x01"), - cache: fastcache.New(1024 * 500), - } - snaps := &Tree{ - layers: map[common.Hash]snapshot{ - base.root: base, - }, - } - // Commit two diffs on top and retrieve a reference to the bottommost - accounts := map[common.Hash][]byte{ - common.HexToHash("0xa1"): randomAccount(), - } - if err := snaps.Update(common.HexToHash("0x02"), common.HexToHash("0x01"), nil, accounts, nil); err != nil { - t.Fatalf("failed to create a diff layer: %v", err) - } - if err := snaps.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), nil, accounts, nil); err != nil { - t.Fatalf("failed to create a diff layer: %v", err) - } - if n := len(snaps.layers); n != 3 { - t.Errorf("pre-cap layer count mismatch: have %d, want %d", n, 3) - } - ref := snaps.Snapshot(common.HexToHash("0x02")) - - // Flatten the diff layer into the bottom accumulator if err := snaps.Cap(common.HexToHash("0x03"), 1); err != nil { - t.Fatalf("failed to flatten diff layer into accumulator: %v", err) + t.Fatalf("failed to merge accumulator onto disk: %v", err) } - // Since the accumulator diff layer was modified, ensure that data retrievald on the external reference fail + // Since the base layer was modified, ensure that data retrievald on the external reference fail if acc, err := ref.Account(common.HexToHash("0x01")); err != ErrSnapshotStale { t.Errorf("stale reference returned account: %#x (err: %v)", acc, err) } @@ -266,7 +219,7 @@ func TestDiffLayerExternalInvalidationPartialFlatten(t *testing.T) { t.Errorf("layers modified, got %d exp %d", got, exp) } // Flatten the diff layer into the bottom accumulator - if err := snaps.Cap(common.HexToHash("0x04"), 2); err != nil { + if err := snaps.Cap(common.HexToHash("0x04"), 1); err != nil { t.Fatalf("failed to flatten diff layer into accumulator: %v", err) } // Since the accumulator diff layer was modified, ensure that data retrievald on the external reference fail diff --git a/core/state/statedb.go b/core/state/statedb.go index d2835a9c4032..40805d2e823b 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -691,6 +691,31 @@ func (s *StateDB) Copy() *StateDB { for hash, preimage := range s.preimages { state.preimages[hash] = preimage } + if s.snaps != nil { + // In order for the miner to be able to use and make additions + // to the snapshot tree, we need to copy that aswell. + // Otherwise, any block mined by ourselves will cause gaps in the tree, + // and force the miner to operate trie-backed only + state.snaps = s.snaps + state.snap = s.snap + // deep copy needed + state.snapDestructs = make(map[common.Hash]struct{}) + for k, v := range s.snapDestructs { + state.snapDestructs[k] = v + } + state.snapAccounts = make(map[common.Hash][]byte) + for k, v := range s.snapAccounts { + state.snapAccounts[k] = v + } + state.snapStorage = make(map[common.Hash]map[common.Hash][]byte) + for k, v := range s.snapStorage { + temp := make(map[common.Hash][]byte) + for kk, vv := range v { + temp[kk] = vv + } + state.snapStorage[k] = temp + } + } return state } @@ -861,8 +886,12 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { if err := s.snaps.Update(root, parent, s.snapDestructs, s.snapAccounts, s.snapStorage); err != nil { log.Warn("Failed to update snapshot tree", "from", parent, "to", root, "err", err) } - if err := s.snaps.Cap(root, 127); err != nil { // Persistent layer is 128th, the last available trie - log.Warn("Failed to cap snapshot tree", "root", root, "layers", 127, "err", err) + // Keep 128 diff layers in the memory, persistent layer is 129th. + // - head layer is paired with HEAD state + // - head-1 layer is paired with HEAD-1 state + // - head-127 layer(bottom-most diff layer) is paired with HEAD-127 state + if err := s.snaps.Cap(root, 128); err != nil { + log.Warn("Failed to cap snapshot tree", "root", root, "layers", 128, "err", err) } } s.snap, s.snapDestructs, s.snapAccounts, s.snapStorage = nil, nil, nil, nil diff --git a/eth/config.go b/eth/config.go index aa48cd04968e..601c64fc6386 100644 --- a/eth/config.go +++ b/eth/config.go @@ -35,11 +35,11 @@ var DefaultConfig = Config{ LightPeers: 100, LightServ: 0, UltraLightFraction: 75, - DatabaseCache: 768, - TrieCleanCache: 256, + DatabaseCache: 512, + TrieCleanCache: 154, TrieDirtyCache: 256, TrieTimeout: 60 * time.Minute, - SnapshotCache: 256, + SnapshotCache: 102, Miner: miner.Config{ GasFloor: 8000000, GasCeil: 8000000, diff --git a/ethdb/leveldb/leveldb.go b/ethdb/leveldb/leveldb.go index c3834c4b543d..e92bac2d2f92 100644 --- a/ethdb/leveldb/leveldb.go +++ b/ethdb/leveldb/leveldb.go @@ -428,7 +428,7 @@ func (b *batch) Put(key, value []byte) error { // Delete inserts the a key removal into the batch for later committing. func (b *batch) Delete(key []byte) error { b.b.Delete(key) - b.size++ + b.size += len(key) return nil } diff --git a/ethdb/memorydb/memorydb.go b/ethdb/memorydb/memorydb.go index 8e9cf3c70b58..338dd1aeb579 100644 --- a/ethdb/memorydb/memorydb.go +++ b/ethdb/memorydb/memorydb.go @@ -211,7 +211,7 @@ func (b *batch) Put(key, value []byte) error { // Delete inserts the a key removal into the batch for later committing. func (b *batch) Delete(key []byte) error { b.writes = append(b.writes, keyvalue{common.CopyBytes(key), nil, true}) - b.size += 1 + b.size += len(key) return nil } diff --git a/tests/state_test_util.go b/tests/state_test_util.go index 1c04719a367d..4abb8dc72e47 100644 --- a/tests/state_test_util.go +++ b/tests/state_test_util.go @@ -220,7 +220,7 @@ func MakePreState(db ethdb.Database, accounts core.GenesisAlloc, snapshotter boo var snaps *snapshot.Tree if snapshotter { - snaps = snapshot.New(db, sdb.TrieDB(), 1, root, false) + snaps = snapshot.New(db, sdb.TrieDB(), 1, root, false, false) } statedb, _ = state.New(root, sdb, snaps) return snaps, statedb diff --git a/trie/database.go b/trie/database.go index fae5cf491e55..321c62cc35d8 100644 --- a/trie/database.go +++ b/trie/database.go @@ -747,7 +747,7 @@ func (db *Database) Commit(node common.Hash, report bool) error { batch.Replay(uncacher) batch.Reset() - // Reset the storage counters and bumpd metrics + // Reset the storage counters and bumped metrics db.preimages = make(map[common.Hash][]byte) db.preimagesSize = 0