Skip to content

Commit

Permalink
core/state/snapshot: implement storage iterator
Browse files Browse the repository at this point in the history
  • Loading branch information
rjl493456442 committed Apr 26, 2020
1 parent 1aa8329 commit 72d04cb
Show file tree
Hide file tree
Showing 8 changed files with 826 additions and 131 deletions.
37 changes: 24 additions & 13 deletions core/state/snapshot/difflayer.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s
destructSet: destructs,
accountData: accounts,
storageData: storage,
storageList: make(map[common.Hash][]common.Hash),
}
switch parent := parent.(type) {
case *diskLayer:
Expand All @@ -194,19 +195,14 @@ func newDiffLayer(parent snapshot, root common.Hash, destructs map[common.Hash]s
dl.memory += uint64(common.HashLength + len(data))
snapshotDirtyAccountWriteMeter.Mark(int64(len(data)))
}
// Fill the storage hashes and sort them for the iterator
dl.storageList = make(map[common.Hash][]common.Hash)
for accountHash := range destructs {
dl.storageList[accountHash] = nil
}
// Determine memory size and track the dirty writes
for _, slots := range storage {
for _, data := range slots {
dl.memory += uint64(common.HashLength + len(data))
snapshotDirtyStorageWriteMeter.Mark(int64(len(data)))
}
}
dl.memory += uint64(len(dl.storageList) * common.HashLength)
dl.memory += uint64(len(destructs) * common.HashLength)
return dl
}

Expand Down Expand Up @@ -287,6 +283,8 @@ func (dl *diffLayer) Account(hash common.Hash) (*Account, error) {

// AccountRLP directly retrieves the account RLP associated with a particular
// hash in the snapshot slim data format.
//
// Note the returned account is not a copy, please don't modify it.
func (dl *diffLayer) AccountRLP(hash common.Hash) ([]byte, error) {
// Check the bloom filter first whether there's even a point in reaching into
// all the maps in all the layers below
Expand Down Expand Up @@ -347,6 +345,8 @@ func (dl *diffLayer) accountRLP(hash common.Hash, depth int) ([]byte, error) {
// Storage directly retrieves the storage data associated with a particular hash,
// within a particular account. If the slot is unknown to this diff, it's parent
// is consulted.
//
// Note the returned slot is not a copy, please don't modify it.
func (dl *diffLayer) Storage(accountHash, storageHash common.Hash) ([]byte, error) {
// Check the bloom filter first whether there's even a point in reaching into
// all the maps in all the layers below
Expand Down Expand Up @@ -502,32 +502,43 @@ func (dl *diffLayer) AccountList() []common.Hash {
}
}
sort.Sort(hashes(dl.accountList))
dl.memory += uint64(len(dl.accountList) * common.HashLength)
return dl.accountList
}

// StorageList returns a sorted list of all storage slot hashes in this difflayer
// for the given account.
// for the given account. If the whole storage is destructed in this layer, then
// an additional flag *destructed = true* will be returned, otherwise the flag is
// false. Besides, the returned list will include the hash of deleted storage slot.
//
// Note, the returned slice is not a copy, so do not modify it.
func (dl *diffLayer) StorageList(accountHash common.Hash) []common.Hash {
func (dl *diffLayer) StorageList(accountHash common.Hash) ([]common.Hash, bool) {
// If an old list already exists, return it
dl.lock.RLock()
list := dl.storageList[accountHash]
if _, exist := dl.destructSet[accountHash]; exist {
dl.lock.RUnlock()
return nil, true
}
if list, exist := dl.storageList[accountHash]; exist {
dl.lock.RUnlock()
return list, false // all cached lists are still alive, even if they are empty.
}
dl.lock.RUnlock()

if list != nil {
return list
}
// No old sorted account list exists, generate a new one
dl.lock.Lock()
defer dl.lock.Unlock()

// Otherwise allocate the sorted storage and return. Note even there is zero
// storage change included in this layer, the returned slice is not **nil**.
// Nil slice represents the whole storage is removed.
storageMap := dl.storageData[accountHash]
storageList := make([]common.Hash, 0, len(storageMap))
for k := range storageMap {
storageList = append(storageList, k)
}
sort.Sort(hashes(storageList))
dl.storageList[accountHash] = storageList
return storageList
dl.memory += uint64(len(dl.storageList) * common.HashLength)
return storageList, false
}
4 changes: 3 additions & 1 deletion core/state/snapshot/difflayer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ func TestMergeBasics(t *testing.T) {
accounts[h] = data
if rand.Intn(4) == 0 {
destructs[h] = struct{}{}
delete(destructs, h)
}
if rand.Intn(2) == 0 {
accStorage := make(map[common.Hash][]byte)
Expand Down Expand Up @@ -109,7 +110,8 @@ func TestMergeBasics(t *testing.T) {
if have, want := len(merged.storageList), i; have != want {
t.Errorf("[1] storageList wrong: have %v, want %v", have, want)
}
if have, want := len(merged.StorageList(aHash)), len(sMap); have != want {
list, _ := merged.StorageList(aHash)
if have, want := len(list), len(sMap); have != want {
t.Errorf("[2] StorageList() wrong: have %v, want %v", have, want)
}
if have, want := len(merged.storageList[aHash]), len(sMap); have != want {
Expand Down
216 changes: 205 additions & 11 deletions core/state/snapshot/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ import (
"github.com/ethereum/go-ethereum/ethdb"
)

// AccountIterator is an iterator to step over all the accounts in a snapshot,
// which may or may npt be composed of multiple layers.
type AccountIterator interface {
// Iterator is a iterator to step over all the accounts or the specific
// storage in a snapshot which may or may not be composed of multiple layers.
type Iterator interface {
// Next steps the iterator forward one element, returning false if exhausted,
// or an error if iteration failed for some reason (e.g. root being iterated
// becomes stale and garbage collected).
Expand All @@ -38,18 +38,35 @@ type AccountIterator interface {
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
Error() error

// Hash returns the hash of the account the iterator is currently at.
// Hash returns the hash of the account or storage slot the iterator is
// currently at.
Hash() common.Hash

// Account returns the RLP encoded slim account the iterator is currently at.
// An error will be returned if the iterator becomes invalid (e.g. snaph
Account() []byte

// Release releases associated resources. Release should always succeed and
// can be called multiple times without causing error.
Release()
}

// AccountIterator is a iterator to step over all the accounts in a snapshot,
// which may or may not be composed of multiple layers.
type AccountIterator interface {
Iterator

// Account returns the RLP encoded slim account the iterator is currently at.
// An error will be returned if the iterator becomes invalid
Account() []byte
}

// StorageIterator is a iterator to step over the specific storage in a snapshot,
// which may or may not be composed of multiple layers.
type StorageIterator interface {
Iterator

// Slot returns the storage slot the iterator is currently at. An error will
// be returned if the iterator becomes invalid
Slot() []byte
}

// diffAccountIterator is an account iterator that steps over the accounts (both
// live and deleted) contained within a single diff layer. Higher order iterators
// will use the deleted accounts to skip deeper iterators.
Expand Down Expand Up @@ -134,7 +151,7 @@ func (it *diffAccountIterator) Account() []byte {
if it.layer.Stale() {
it.fail, it.keys = ErrSnapshotStale, nil
}
return blob
return common.CopyBytes(blob)
}

// Release is a noop for diff account iterators as there are no held resources.
Expand Down Expand Up @@ -182,17 +199,20 @@ func (it *diskAccountIterator) Next() bool {
// A diff layer is immutable after creation content wise and can always be fully
// iterated without error, so this method always returns nil.
func (it *diskAccountIterator) Error() error {
if it.it == nil {
return nil // Iterator is exhausted and released
}
return it.it.Error()
}

// Hash returns the hash of the account the iterator is currently at.
func (it *diskAccountIterator) Hash() common.Hash {
return common.BytesToHash(it.it.Key())
return common.BytesToHash(it.it.Key()) // The prefix will be truncated
}

// Account returns the RLP encoded slim account the iterator is currently at.
func (it *diskAccountIterator) Account() []byte {
return it.it.Value()
return common.CopyBytes(it.it.Value())
}

// Release releases the database snapshot held during iteration.
Expand All @@ -203,3 +223,177 @@ func (it *diskAccountIterator) Release() {
it.it = nil
}
}

// diffStorageIterator is a storage iterator that steps over the specific storage
// (both live and deleted) contained within a single diff layer. Higher order
// iterators will use the deleted slot to skip deeper iterators.
type diffStorageIterator struct {
// curHash is the current hash the iterator is positioned on. The field is
// explicitly tracked since the referenced diff layer might go stale after
// the iterator was positioned and we don't want to fail accessing the old
// hash as long as the iterator is not touched any more.
curHash common.Hash
account common.Hash

layer *diffLayer // Live layer to retrieve values from
keys []common.Hash // Keys left in the layer to iterate
fail error // Any failures encountered (stale)
}

// StorageIterator creates a storage iterator over a single diff layer.
// Execept the storage iterator is returned, there is an additional flag
// "destructed" returned. If it's true then it means the whole storage is
// destructed.
func (dl *diffLayer) StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) {
// If the storage is destructed, return nil iterator.
hashes, destructed := dl.StorageList(account)
if destructed {
return nil, true
}
// Otherwise, create the storage iterator even there is
// zero storage change included(the exhausted iterator).
index := sort.Search(len(hashes), func(i int) bool {
return bytes.Compare(seek[:], hashes[i][:]) <= 0
})
// Assemble and returned the already seeked iterator
return &diffStorageIterator{
layer: dl,
account: account,
keys: hashes[index:],
}, false
}

// Next steps the iterator forward one element, returning false if exhausted.
func (it *diffStorageIterator) Next() bool {
// If the iterator was already stale, consider it a programmer error. Although
// we could just return false here, triggering this path would probably mean
// somebody forgot to check for Error, so lets blow up instead of undefined
// behavior that's hard to debug.
if it.fail != nil {
panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail))
}
// Stop iterating if all keys were exhausted
if len(it.keys) == 0 {
return false
}
if it.layer.Stale() {
it.fail, it.keys = ErrSnapshotStale, nil
return false
}
// Iterator seems to be still alive, retrieve and cache the live hash
it.curHash = it.keys[0]
// key cached, shift the iterator and notify the user of success
it.keys = it.keys[1:]
return true
}

// Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
func (it *diffStorageIterator) Error() error {
return it.fail
}

// Hash returns the hash of the storage slot the iterator is currently at.
func (it *diffStorageIterator) Hash() common.Hash {
return it.curHash
}

// Slot returns the raw storage slot value the iterator is currently at.
// This method may _fail_, if the underlying layer has been flattened between
// the call to Next and Value. That type of error will set it.Err.
// This method assumes that flattening does not delete elements from
// the storage mapping (writing nil into it is fine though), and will panic
// if elements have been deleted.
func (it *diffStorageIterator) Slot() []byte {
it.layer.lock.RLock()
storage, ok := it.layer.storageData[it.account]
if !ok {
panic(fmt.Sprintf("iterator referenced non-existent account storage: %x", it.account))
}
// Storage slot might be nil(deleted), but it must exist
blob, ok := storage[it.curHash]
if !ok {
panic(fmt.Sprintf("iterator referenced non-existent storage slot: %x", it.curHash))
}
it.layer.lock.RUnlock()
if it.layer.Stale() {
it.fail, it.keys = ErrSnapshotStale, nil
}
return common.CopyBytes(blob)
}

// Release is a noop for diff account iterators as there are no held resources.
func (it *diffStorageIterator) Release() {}

// diskStorageIterator is a storage iterator that steps over the live storage
// contained within a disk layer.
type diskStorageIterator struct {
layer *diskLayer
account common.Hash
it ethdb.Iterator
}

// StorageIterator creates a storage iterator over a disk layer.
// If the whole storage is destructed, then all entries in the disk
// layer are deleted already. So the "destructed" flag returned here
// is always false.
func (dl *diskLayer) StorageIterator(account common.Hash, seek common.Hash) (StorageIterator, bool) {
pos := common.TrimRightZeroes(seek[:])
return &diskStorageIterator{
layer: dl,
account: account,
it: dl.diskdb.NewIterator(append(rawdb.SnapshotStoragePrefix, account.Bytes()...), pos),
}, false
}

// Next steps the iterator forward one element, returning false if exhausted.
func (it *diskStorageIterator) Next() bool {
// If the iterator was already exhausted, don't bother
if it.it == nil {
return false
}
// Try to advance the iterator and release it if we reached the end
prefix := append(rawdb.SnapshotStoragePrefix, it.account.Bytes()...)
for {
if !it.it.Next() || !bytes.HasPrefix(it.it.Key(), prefix) {
it.it.Release()
it.it = nil
return false
}
if len(it.it.Key()) == len(rawdb.SnapshotStoragePrefix)+common.HashLength+common.HashLength {
break
}
}
return true
}

// Error returns any failure that occurred during iteration, which might have
// caused a premature iteration exit (e.g. snapshot stack becoming stale).
//
// A diff layer is immutable after creation content wise and can always be fully
// iterated without error, so this method always returns nil.
func (it *diskStorageIterator) Error() error {
if it.it == nil {
return nil // Iterator is exhausted and released
}
return it.it.Error()
}

// Hash returns the hash of the storage slot the iterator is currently at.
func (it *diskStorageIterator) Hash() common.Hash {
return common.BytesToHash(it.it.Key()) // The prefix will be truncated
}

// Slot returns the raw strorage slot content the iterator is currently at.
func (it *diskStorageIterator) Slot() []byte {
return common.CopyBytes(it.it.Value())
}

// Release releases the database snapshot held during iteration.
func (it *diskStorageIterator) Release() {
// The iterator is auto-released on exhaustion, so make sure it's still alive
if it.it != nil {
it.it.Release()
it.it = nil
}
}
Loading

0 comments on commit 72d04cb

Please sign in to comment.