Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cmd): Add offline pruning of state trie. #1564

Merged
merged 18 commits into from
May 20, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions cmd/gossamer/bloom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package main

import (
"encoding/binary"
"errors"

"github.com/ChainSafe/gossamer/lib/common"
log "github.com/ChainSafe/log15"
bloomfilter "github.com/holiman/bloomfilter/v2"
)

// ErrKeySize is returned when key size does not fit
var ErrKeySize = errors.New("cannot have nil keystore")

type stateBloomHasher []byte

func (f stateBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
func (f stateBloomHasher) Sum(b []byte) []byte { panic("not implemented") }
func (f stateBloomHasher) Reset() { panic("not implemented") }
func (f stateBloomHasher) BlockSize() int { panic("not implemented") }
func (f stateBloomHasher) Size() int { return 8 }
func (f stateBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) }

// stateBloom is a wrapper for bloom filter.
// The keys of all generated entries will be recorded here so that in the pruning
// stage the entries belong to the specific version can be avoided for deletion.
type stateBloom struct {
bloom *bloomfilter.Filter
}

// newStateBloomWithSize creates a brand new state bloom for state generation
// The bloom filter will be created by the passing bloom filter size. the parameters
// are picked so that the false-positive rate for mainnet is low enough.
func newStateBloomWithSize(size uint64) (*stateBloom, error) {
bloom, err := bloomfilter.New(size*1024*1024*8, 4)
if err != nil {
return nil, err
}
log.Info("initialised state bloom", "size", float64(bloom.M()/8))
return &stateBloom{bloom: bloom}, nil
}

// put writes key to bloom filter
func (sb *stateBloom) put(key []byte) error {
if len(key) != common.HashLength {
return ErrKeySize
}

sb.bloom.Add(stateBloomHasher(key))
return nil
}

// contain is the wrapper of the underlying contains function which
// reports whether the key is contained.
// - If it says yes, the key may be contained
// - If it says no, the key is definitely not contained.
func (sb *stateBloom) contain(key []byte) bool {
return sb.bloom.Contains(stateBloomHasher(key))
}
30 changes: 30 additions & 0 deletions cmd/gossamer/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,29 @@ var (
}
)

// State Prune flags
var (
// BloomFilterSizeFlag size for bloom filter, valid for the use with prune-state subcommand
BloomFilterSizeFlag = cli.IntFlag{
Name: "bloom-size",
Usage: "Megabytes of memory allocated to bloom-filter for pruning",
Value: 2048,
}

// DBPathFlag data directory for DB, valid for the use with prune-state subcommand
DBPathFlag = cli.StringFlag{
Name: "badger-path",
Usage: "Data directory for the output DB",
}

// RetainBlockNumberFlag retain number of block from latest block while pruning, valid for the use with prune-state subcommand
RetainBlockNumberFlag = cli.IntFlag{
Name: "retain-block",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe name retain-blocks otherwise it could be interpreted as what block number to retain

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Usage: "Retain number of block from latest block while pruning",
Value: 256,
}
)

// flag sets that are shared by multiple commands
var (
// GlobalFlags are flags that are valid for use with the root command and all subcommands
Expand Down Expand Up @@ -354,6 +377,13 @@ var (
HeaderFlag,
FirstSlotFlag,
}

PruningFlags = []cli.Flag{
BasePathFlag,
BloomFilterSizeFlag,
DBPathFlag,
RetainBlockNumberFlag,
}
)

// FixFlagOrder allow us to use various flag order formats (ie, `gossamer init
Expand Down
51 changes: 51 additions & 0 deletions cmd/gossamer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ const (
buildSpecCommandName = "build-spec"
importRuntimeCommandName = "import-runtime"
importStateCommandName = "import-state"
pruningStateCommandName = "prune-state"
)

// app is the cli application
Expand Down Expand Up @@ -115,6 +116,18 @@ var (
"Input can be generated by using the RPC function state_getPairs.\n" +
"\tUsage: gossamer import-state --state state.json --header header.json --first-slot <first slot of network>\n",
}

pruningCommand = cli.Command{
Action: FixFlagOrder(pruneState),
Name: pruningStateCommandName,
Usage: "Prune state will prune the state trie",
ArgsUsage: "<root>",
Flags: PruningFlags,
Description: `prune-state <retain-block> will prune historical state data.
All trie nodes that do not belong to the specified version state will be deleted from the database.

The default pruning target is the HEAD-256 state`,
}
)

// init initialises the cli application
Expand All @@ -132,6 +145,7 @@ func init() {
buildSpecCommand,
importRuntimeCommand,
importStateCommand,
pruningCommand,
}
app.Flags = RootFlags
}
Expand Down Expand Up @@ -411,3 +425,40 @@ func buildSpecAction(ctx *cli.Context) error {

return nil
}

func pruneState(ctx *cli.Context) error {
inputDBPath := ctx.GlobalString(BasePathFlag.Name)
if inputDBPath == "" {
inputDBPath = dot.GssmrConfig().Global.BasePath
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the user should be able to specify the chain with --chain and then it should use the basepath based on the chain

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


bloomSize := ctx.Uint64(BloomFilterSizeFlag.Name)
retainBlocks := ctx.Int64(RetainBlockNumberFlag.Name)

pruner, err := newPruner(inputDBPath, bloomSize, retainBlocks)
if err != nil {
return err
}

logger.Info("Pruner initialised")

err = pruner.setBloomFilter()
if err != nil {
return fmt.Errorf("failed to set keys into bloom filter %w", err)
}

// close input DB so we can open reopen it for streaming,
_ = pruner.inputDB.Close()

prunedDBPath := ctx.String(DBPathFlag.Name)
if prunedDBPath == "" {
return fmt.Errorf("path not specified for badger db")
}

err = pruner.prune(inputDBPath, prunedDBPath)
if err != nil {
return fmt.Errorf("failed to prune %w", err)
}

return nil
}
81 changes: 81 additions & 0 deletions cmd/gossamer/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,17 @@ import (
"os"
"os/exec"
"strconv"
"strings"
"sync"
"syscall"
"testing"
"text/template"
"time"

"github.com/ChainSafe/gossamer/dot"
"github.com/ChainSafe/gossamer/dot/state"
"github.com/ChainSafe/gossamer/lib/utils"
"github.com/dgraph-io/badger/v2"
"github.com/docker/docker/pkg/reexec"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -349,3 +352,81 @@ func TestBuildSpecCommandWithOutput(t *testing.T) {
// TODO: TestInitCommand test "gossamer init" does not error

// TODO: TestAccountCommand test "gossamer account" does not error

func TestPruneState(t *testing.T) {
const (
bloomSize = 256
retainBlockNum = 5
)

chainDBPath := "/tmp/TestSync_ProduceBlocks/alice"
opts := badger.DefaultOptions(chainDBPath)
currDB, err := badger.Open(opts)
require.NoError(t, err)

txn := currDB.NewTransaction(false)
itr := txn.NewIterator(badger.DefaultIteratorOptions)

keyMap := make(map[string]interface{})
for itr.Rewind(); itr.Valid(); itr.Next() {
key := string(itr.Item().Key())

if !strings.HasPrefix(key, state.StoragePrefix) {
keyMap[key] = nil
}
}

t.Log("Total keys in old DB", len(keyMap))
currDB.Close()

pruner, err := newPruner(chainDBPath, bloomSize, retainBlockNum)
require.NoError(t, err)

// key with storage prefix of last 256 blocks
err = pruner.setBloomFilter()
require.NoError(t, err)

// close pruner inputDB so it can be used again
pruner.inputDB.Close()

newBadgerDBPath := fmt.Sprintf("%s/%s", t.TempDir(), "badger")
_ = runTestGossamer(t,
"prune-state",
"--basepath", chainDBPath,
"--badger-path", newBadgerDBPath,
"--bloom-size", "256",
"--retain-block", "5")

time.Sleep(10 * time.Second)

t.Logf("new badger DB path %s", newBadgerDBPath)

prunedDB, err := badger.Open(badger.DefaultOptions(newBadgerDBPath))
require.NoError(t, err)

txn = prunedDB.NewTransaction(false)
itr = txn.NewIterator(badger.DefaultIteratorOptions)

storageKeyMap := make(map[string]interface{})
otherKeyMap := make(map[string]interface{})

for itr.Rewind(); itr.Valid(); itr.Next() {
key := string(itr.Item().Key())
if strings.HasPrefix(key, state.StoragePrefix) {
key = strings.TrimPrefix(key, state.StoragePrefix)
storageKeyMap[key] = nil
continue
}
otherKeyMap[key] = nil
}

for k := range keyMap {
_, ok := otherKeyMap[k]
require.True(t, ok)
}

for k := range storageKeyMap {
ok := pruner.bloom.contain([]byte(k))
require.True(t, ok)
}
}
Loading