Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core/state/snapshot: implement storage iterator #20971

Merged
merged 6 commits into from
Apr 29, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 78 additions & 19 deletions core/state/snapshot/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package snapshot

import (
"fmt"
"sync"
"time"

Expand All @@ -37,15 +38,21 @@ type conversionAccount struct {
CodeHash []byte
}

// SlimToFull converts data on the 'slim RLP' format into the full RLP-format
func SlimToFull(data []byte) ([]byte, error) {
// SlimToFull converts data on the 'slim RLP' format into the full RLP-format.
// Besides, this function accepts another parameter "subRoot". If the root is
// not empty, apply it to account. Usually the subRoot is specified if we want
// to verify the whole state or re-generate state root with different trie algo.
func SlimToFull(data []byte, subRoot common.Hash) ([]byte, error) {
acc := &conversionAccount{}
if err := rlp.DecodeBytes(data, acc); err != nil {
return nil, err
}
if len(acc.Root) == 0 {
acc.Root = emptyRoot[:]
}
if subRoot != (common.Hash{}) {
acc.Root = subRoot.Bytes()
}
rjl493456442 marked this conversation as resolved.
Show resolved Hide resolved
if len(acc.CodeHash) == 0 {
acc.CodeHash = emptyCode[:]
}
Expand All @@ -62,14 +69,49 @@ type trieKV struct {
value []byte
}

type trieGeneratorFn func(in chan (trieKV), out chan (common.Hash))
type (
// trieGeneratorFn is the interface of trie generation which can
// be implemented by different trie algorithm.
trieGeneratorFn func(in chan (trieKV), out chan (common.Hash))

// leafCallbackFn is the callback invoked at the leaves of the trie,
// returns the subtrie root with the specified subtrie identifier.
leafCallbackFn func(hash common.Hash) common.Hash
)

// GenerateAccountTrieRoot takes an account iterator and reproduces the root hash.
func GenerateAccountTrieRoot(it AccountIterator) common.Hash {
return generateTrieRoot(it, true, stdGenerate, nil, true)
}

// GenerateTrieRoot takes an account iterator and reproduces the root hash.
func GenerateTrieRoot(it AccountIterator) common.Hash {
return generateTrieRoot(it, stdGenerate)
// GenerateStorageTrieRoot takes a storage iterator and reproduces the root hash.
func GenerateStorageTrieRoot(it StorageIterator) common.Hash {
return generateTrieRoot(it, false, stdGenerate, nil, true)
}

func generateTrieRoot(it AccountIterator, generatorFn trieGeneratorFn) common.Hash {
// VerifyState takes the whole snapshot tree as the input, traverses all the accounts
// as well as the corresponding storages and compares the re-computed hash with the
// original one(state root and the storage root).
func VerifyState(snaptree *Tree, root common.Hash) error {
acctIt, err := snaptree.AccountIterator(root, common.Hash{})
if err != nil {
return err
}
got := generateTrieRoot(acctIt, true, stdGenerate, func(account common.Hash) common.Hash {
storageIt, err := snaptree.StorageIterator(root, account, common.Hash{})
if err != nil {
return common.Hash{}
}
return generateTrieRoot(storageIt, false, stdGenerate, nil, false)
}, true)

if got != root {
return fmt.Errorf("State root hash mismatch, got %x, want %x", got, root)
}
return nil
}

func generateTrieRoot(it Iterator, accountIterator bool, generatorFn trieGeneratorFn, leafCallback leafCallbackFn, report bool) common.Hash {
var (
in = make(chan trieKV) // chan to pass leaves
out = make(chan common.Hash) // chan to collect result
Expand All @@ -80,26 +122,43 @@ func generateTrieRoot(it AccountIterator, generatorFn trieGeneratorFn) common.Ha
generatorFn(in, out)
wg.Done()
}()
// Feed leaves
start := time.Now()
logged := time.Now()
accounts := 0

var (
start = time.Now()
logged = time.Now()
entries = 0
)
// Start to feed leaves
for it.Next() {
slimData := it.Account()
fullData, _ := SlimToFull(slimData)
l := trieKV{it.Hash(), fullData}
// Apply the leaf callback first. Normally the callback is used
// to traverse the storage trie and re-generate the subtrie root.
// If the callback is specified, then replace the original storage
// root hash with new one.
var subRoot common.Hash
if leafCallback != nil {
subRoot = leafCallback(it.Hash())
}
var l trieKV
if accountIterator {
fullData, _ := SlimToFull(it.(AccountIterator).Account(), subRoot)
rjl493456442 marked this conversation as resolved.
Show resolved Hide resolved
l = trieKV{it.Hash(), fullData}
} else {
l = trieKV{it.Hash(), it.(StorageIterator).Slot()}
}
in <- l
if time.Since(logged) > 8*time.Second {
log.Info("Generating trie hash from snapshot",
"at", l.key, "accounts", accounts, "elapsed", time.Since(start))
if time.Since(logged) > 8*time.Second && report {
log.Info("Generating trie hash from snapshot", "at", l.key, "entries", entries, "elapsed", time.Since(start))
logged = time.Now()
}
accounts++
entries++
rjl493456442 marked this conversation as resolved.
Show resolved Hide resolved
}
close(in)
result := <-out
log.Info("Generated trie hash from snapshot", "accounts", accounts, "elapsed", time.Since(start))
wg.Wait()

if report {
log.Info("Generated trie hash from snapshot", "entries", entries, "elapsed", time.Since(start))
}
return result
}

Expand Down
41 changes: 28 additions & 13 deletions core/state/snapshot/difflayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ type diffLayer struct {
root common.Hash // Root hash to which this snapshot diff belongs to
stale uint32 // Signals that the layer became stale (state progressed)

// destructSet is a very special helper marker. If an account is marked as
// deleted, then it's recorded in this set. However it's allowed that an account
// is included here but still available in other sets(e.g. storageData). The
// reason is the diff layer includes all the changes in a *block*. It can
// happen that in the tx_1, account A is self-destructed while in the tx_2
// it's recreated. But we still need this marker to indicate the "old" A is
// deleted, all data in other set belongs to the "new" A.
destructSet map[common.Hash]struct{} // Keyed markers for deleted (and potentially) recreated accounts
accountList []common.Hash // List of account for iteration. If it exists, it's sorted, otherwise it's nil
accountData map[common.Hash][]byte // Keyed accounts for direct retrival (nil means deleted)
Expand Down Expand Up @@ -169,6 +176,7 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s
destructSet: destructs,
accountData: accounts,
storageData: storage,
storageList: make(map[common.Hash][]common.Hash),
}
switch parent := parent.(type) {
case *diskLayer:
Expand All @@ -194,19 +202,14 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s
dl.memory += uint64(common.HashLength + len(data))
snapshotDirtyAccountWriteMeter.Mark(int64(len(data)))
}
// Fill the storage hashes and sort them for the iterator
dl.storageList = make(map[common.Hash][]common.Hash)
for accountHash := range destructs {
dl.storageList[accountHash] = nil
}
// Determine memory size and track the dirty writes
for _, slots := range storage {
for _, data := range slots {
dl.memory += uint64(common.HashLength + len(data))
snapshotDirtyStorageWriteMeter.Mark(int64(len(data)))
}
}
dl.memory += uint64(len(dl.storageList) * common.HashLength)
dl.memory += uint64(len(destructs) * common.HashLength)
return dl
}

Expand Down Expand Up @@ -287,6 +290,8 @@ func (dl *diffLayer) Account(hash common.Hash) (*Account, error) {

// AccountRLP directly retrieves the account RLP associated with a particular
// hash in the snapshot slim data format.
//
// Note the returned account is not a copy, please don't modify it.
func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
// Check the bloom filter first whether there's even a point in reaching into
// all the maps in all the layers below
Expand Down Expand Up @@ -347,6 +352,8 @@ func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) {
// Storage directly retrieves the storage data associated with a particular hash,
// within a particular account. If the slot is unknown to this diff, it's parent
// is consulted.
//
// Note the returned slot is not a copy, please don't modify it.
func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
// Check the bloom filter first whether there's even a point in reaching into
// all the maps in all the layers below
Expand Down Expand Up @@ -502,22 +509,29 @@ func (dl *diffLayer) AccountList() []common.Hash {
}
}
sort.Sort(hashes(dl.accountList))
dl.memory += uint64(len(dl.accountList) * common.HashLength)
return dl.accountList
}

// StorageList returns a sorted list of all storage slot hashes in this difflayer
// for the given account.
// for the given account. If the whole storage is destructed in this layer, then
// an additional flag *destructed = true* will be returned, otherwise the flag is
// false. Besides, the returned list will include the hash of deleted storage slot.
// Note a special case is an account is deleted in a prior tx but is recreated in
// the following tx with some storage slots set. In this case the returned list is
// not empty but the flag is true.
//
// Note, the returned slice is not a copy, so do not modify it.
func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
func (dl *diffLayer) StorageList(accountHash common.Hash) ([]common.Hash, bool) {
// If an old list already exists, return it
dl.lock.RLock()
list := dl.storageList[accountHash]
_, destructed := dl.destructSet[accountHash]
if list, exist := dl.storageList[accountHash]; exist {
dl.lock.RUnlock()
return list, destructed // The list might be nil
}
dl.lock.RUnlock()

if list != nil {
return list
}
// No old sorted account list exists, generate a new one
dl.lock.Lock()
defer dl.lock.Unlock()
Expand All @@ -529,5 +543,6 @@ func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
}
sort.Sort(hashes(storageList))
dl.storageList[accountHash] = storageList
return storageList
dl.memory += uint64(len(dl.storageList)*common.HashLength + common.HashLength)
return storageList, destructed
}
4 changes: 3 additions & 1 deletion core/state/snapshot/difflayer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ func TestMergeBasics(t *testing.T) {
accounts[h] = data
if rand.Intn(4) == 0 {
destructs[h] = struct{}{}
delete(destructs, h)
}
if rand.Intn(2) == 0 {
accStorage := make(map[common.Hash][]byte)
Expand Down Expand Up @@ -109,7 +110,8 @@ func TestMergeBasics(t *testing.T) {
if have, want := len(merged.storageList), i; have != want {
t.Errorf("[1] storageList wrong: have %v, want %v", have, want)
}
if have, want := len(merged.StorageList(aHash)), len(sMap); have != want {
list, _ := merged.StorageList(aHash)
if have, want := len(list), len(sMap); have != want {
t.Errorf("[2] StorageList() wrong: have %v, want %v", have, want)
}
if have, want := len(merged.storageList[aHash]), len(sMap); have != want {
Expand Down
Loading