diff --git a/chain/gen/gen.go b/chain/gen/gen.go index 8e749095cf3..1ad8dad6d54 100644 --- a/chain/gen/gen.go +++ b/chain/gen/gen.go @@ -14,7 +14,6 @@ import ( "github.com/filecoin-project/go-state-types/big" "github.com/filecoin-project/go-state-types/crypto" "github.com/google/uuid" - block "github.com/ipfs/go-block-format" "github.com/ipfs/go-blockservice" "github.com/ipfs/go-cid" offline "github.com/ipfs/go-ipfs-exchange-offline" @@ -85,19 +84,6 @@ type ChainGen struct { lr repo.LockedRepo } -type mybs struct { - blockstore.Blockstore -} - -func (m mybs) Get(c cid.Cid) (block.Block, error) { - b, err := m.Blockstore.Get(c) - if err != nil { - return nil, err - } - - return b, nil -} - var rootkeyMultisig = genesis.MultisigMeta{ Signers: []address.Address{remAccTestKey}, Threshold: 1, @@ -152,8 +138,6 @@ func NewGeneratorWithSectors(numSectors int) (*ChainGen, error) { } }() - bs = mybs{bs} - ks, err := lr.KeyStore() if err != nil { return nil, xerrors.Errorf("getting repo keystore failed: %w", err) @@ -465,7 +449,12 @@ func (cg *ChainGen) NextTipSetFromMinersWithMessages(base *types.TipSet, miners } } - return store.NewFullTipSet(blks), nil + fts := store.NewFullTipSet(blks) + if err := cg.cs.PutTipSet(context.TODO(), fts.TipSet()); err != nil { + return nil, err + } + + return fts, nil } func (cg *ChainGen) makeBlock(parents *types.TipSet, m address.Address, vrfticket *types.Ticket, diff --git a/chain/stmgr/forks.go b/chain/stmgr/forks.go index e09744a67c1..b36f2c0bd5c 100644 --- a/chain/stmgr/forks.go +++ b/chain/stmgr/forks.go @@ -4,6 +4,10 @@ import ( "bytes" "context" "encoding/binary" + "runtime" + "sort" + "sync" + "time" "github.com/filecoin-project/go-state-types/rt" @@ -36,20 +40,70 @@ import ( "golang.org/x/xerrors" ) -// UpgradeFunc is a migration function run at every upgrade. +// MigrationCache can be used to cache information used by a migration. This is primarily useful to +// "pre-compute" some migration state ahead of time, and make it accessible in the migration itself. +type MigrationCache interface { + Write(key string, value cid.Cid) error + Read(key string) (bool, cid.Cid, error) + Load(key string, loadFunc func() (cid.Cid, error)) (cid.Cid, error) +} + +// MigrationFunc is a migration function run at every upgrade. // +// - The cache is a per-upgrade cache, pre-populated by pre-migrations. // - The oldState is the state produced by the upgrade epoch. // - The returned newState is the new state that will be used by the next epoch. // - The height is the upgrade epoch height (already executed). // - The tipset is the tipset for the last non-null block before the upgrade. Do // not assume that ts.Height() is the upgrade height. -type UpgradeFunc func(ctx context.Context, sm *StateManager, cb ExecCallback, oldState cid.Cid, height abi.ChainEpoch, ts *types.TipSet) (newState cid.Cid, err error) +type MigrationFunc func( + ctx context.Context, + sm *StateManager, cache MigrationCache, + cb ExecCallback, oldState cid.Cid, + height abi.ChainEpoch, ts *types.TipSet, +) (newState cid.Cid, err error) + +// PreMigrationFunc is a function run _before_ a network upgrade to pre-compute part of the network +// upgrade and speed it up. +type PreMigrationFunc func( + ctx context.Context, + sm *StateManager, cache MigrationCache, + oldState cid.Cid, + height abi.ChainEpoch, ts *types.TipSet, +) error + +// PreMigration describes a pre-migration step to prepare for a network state upgrade. Pre-migrations +// are optimizations, are not guaranteed to run, and may be canceled and/or run multiple times. +type PreMigration struct { + // PreMigration is the pre-migration function to run at the specified time. This function is + // run asynchronously and must abort promptly when canceled. + PreMigration PreMigrationFunc + + // StartWithin specifies that this pre-migration should be started at most StartWithin + // epochs before the upgrade. + StartWithin abi.ChainEpoch + + // DontStartWithin specifies that this pre-migration should not be started DontStartWithin + // epochs before the final upgrade epoch. + // + // This should be set such that the pre-migration is likely to complete before StopWithin. + DontStartWithin abi.ChainEpoch + + // StopWithin specifies that this pre-migration should be stopped StopWithin epochs of the + // final upgrade epoch. + StopWithin abi.ChainEpoch +} type Upgrade struct { Height abi.ChainEpoch Network network.Version Expensive bool - Migration UpgradeFunc + Migration MigrationFunc + + // PreMigrations specifies a set of pre-migration functions to run at the indicated epochs. + // These functions should fill the given cache with information that can speed up the + // eventual full migration at the upgrade epoch. + PreMigrations []PreMigration } type UpgradeSchedule []Upgrade @@ -121,6 +175,17 @@ func DefaultUpgradeSchedule() UpgradeSchedule { Height: build.UpgradeActorsV3Height, Network: network.Version10, Migration: UpgradeActorsV3, + PreMigrations: []PreMigration{{ + PreMigration: PreUpgradeActorsV3, + StartWithin: 120, + DontStartWithin: 60, + StopWithin: 35, + }, { + PreMigration: PreUpgradeActorsV3, + StartWithin: 30, + DontStartWithin: 15, + StopWithin: 5, + }}, Expensive: true, }} @@ -135,14 +200,43 @@ func DefaultUpgradeSchedule() UpgradeSchedule { } func (us UpgradeSchedule) Validate() error { - // Make sure we're not trying to upgrade to version 0. + // Make sure each upgrade is valid. for _, u := range us { if u.Network <= 0 { return xerrors.Errorf("cannot upgrade to version <= 0: %d", u.Network) } + + for _, m := range u.PreMigrations { + if m.StartWithin <= 0 { + return xerrors.Errorf("pre-migration must specify a positive start-within epoch") + } + + if m.DontStartWithin < 0 || m.StopWithin < 0 { + return xerrors.Errorf("pre-migration must specify non-negative epochs") + } + + if m.StartWithin <= m.StopWithin { + return xerrors.Errorf("pre-migration start-within must come before stop-within") + } + + // If we have a dont-start-within. + if m.DontStartWithin != 0 { + if m.DontStartWithin < m.StopWithin { + return xerrors.Errorf("pre-migration dont-start-within must come before stop-within") + } + if m.StartWithin <= m.DontStartWithin { + return xerrors.Errorf("pre-migration start-within must come after dont-start-within") + } + } + } + if !sort.SliceIsSorted(u.PreMigrations, func(i, j int) bool { + return u.PreMigrations[i].StartWithin > u.PreMigrations[j].StartWithin //nolint:scopelint,gosec + }) { + return xerrors.Errorf("pre-migrations must be sorted by start epoch") + } } - // Make sure all the upgrades make sense. + // Make sure the upgrade order makes sense. for i := 1; i < len(us); i++ { prev := &us[i-1] curr := &us[i] @@ -164,12 +258,26 @@ func (us UpgradeSchedule) Validate() error { func (sm *StateManager) handleStateForks(ctx context.Context, root cid.Cid, height abi.ChainEpoch, cb ExecCallback, ts *types.TipSet) (cid.Cid, error) { retCid := root var err error - f, ok := sm.stateMigrations[height] - if ok { - retCid, err = f(ctx, sm, cb, root, height, ts) + u := sm.stateMigrations[height] + if u != nil && u.upgrade != nil { + startTime := time.Now() + log.Warnw("STARTING migration", "height", height) + // Yes, we clone the cache, even for the final upgrade epoch. Why? Reverts. We may + // have to migrate multiple times. + tmpCache := u.cache.Clone() + retCid, err = u.upgrade(ctx, sm, tmpCache, cb, root, height, ts) if err != nil { + log.Errorw("FAILED migration", "height", height, "error", err) return cid.Undef, err } + // Yes, we update the cache, even for the final upgrade epoch. Why? Reverts. This + // can save us a _lot_ of time because very few actors will have changed if we + // do a small revert then need to re-run the migration. + u.cache.Update(tmpCache) + log.Warnw("COMPLETED migration", + "height", height, + "duration", time.Since(startTime), + ) } return retCid, nil @@ -180,6 +288,109 @@ func (sm *StateManager) hasExpensiveFork(ctx context.Context, height abi.ChainEp return ok } +func runPreMigration(ctx context.Context, sm *StateManager, fn PreMigrationFunc, cache *nv10.MemMigrationCache, ts *types.TipSet) { + height := ts.Height() + parent := ts.ParentState() + + startTime := time.Now() + + log.Warn("STARTING pre-migration") + // Clone the cache so we don't actually _update_ it + // till we're done. Otherwise, if we fail, the next + // migration to use the cache may assume that + // certain blocks exist, even if they don't. + tmpCache := cache.Clone() + err := fn(ctx, sm, tmpCache, parent, height, ts) + if err != nil { + log.Errorw("FAILED pre-migration", "error", err) + return + } + // Finally, if everything worked, update the cache. + cache.Update(tmpCache) + log.Warnw("COMPLETED pre-migration", "duration", time.Since(startTime)) +} + +func (sm *StateManager) preMigrationWorker(ctx context.Context) { + defer close(sm.shutdown) + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + type op struct { + after abi.ChainEpoch + notAfter abi.ChainEpoch + run func(ts *types.TipSet) + } + + var wg sync.WaitGroup + defer wg.Wait() + + // Turn each pre-migration into an operation in a schedule. + var schedule []op + for upgradeEpoch, migration := range sm.stateMigrations { + cache := migration.cache + for _, prem := range migration.preMigrations { + preCtx, preCancel := context.WithCancel(ctx) + migrationFunc := prem.PreMigration + + afterEpoch := upgradeEpoch - prem.StartWithin + notAfterEpoch := upgradeEpoch - prem.DontStartWithin + stopEpoch := upgradeEpoch - prem.StopWithin + // We can't start after we stop. + if notAfterEpoch > stopEpoch { + notAfterEpoch = stopEpoch - 1 + } + + // Add an op to start a pre-migration. + schedule = append(schedule, op{ + after: afterEpoch, + notAfter: notAfterEpoch, + + // TODO: are these values correct? + run: func(ts *types.TipSet) { + wg.Add(1) + go func() { + defer wg.Done() + runPreMigration(preCtx, sm, migrationFunc, cache, ts) + }() + }, + }) + + // Add an op to cancel the pre-migration if it's still running. + schedule = append(schedule, op{ + after: stopEpoch, + notAfter: -1, + run: func(ts *types.TipSet) { preCancel() }, + }) + } + } + + // Then sort by epoch. + sort.Slice(schedule, func(i, j int) bool { + return schedule[i].after < schedule[j].after + }) + + // Finally, when the head changes, see if there's anything we need to do. + // + // We're intentionally ignoring reorgs as they don't matter for our purposes. + for change := range sm.cs.SubHeadChanges(ctx) { + for _, head := range change { + for len(schedule) > 0 { + op := &schedule[0] + if head.Val.Height() < op.after { + break + } + + // If we haven't passed the pre-migration height... + if op.notAfter < 0 || head.Val.Height() < op.notAfter { + op.run(head.Val) + } + schedule = schedule[1:] + } + } + } +} + func doTransfer(tree types.StateTree, from, to address.Address, amt abi.TokenAmount, cb func(trace types.ExecutionTrace)) error { fromAct, err := tree.GetActor(from) if err != nil { @@ -233,7 +444,7 @@ func doTransfer(tree types.StateTree, from, to address.Address, amt abi.TokenAmo return nil } -func UpgradeFaucetBurnRecovery(ctx context.Context, sm *StateManager, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { +func UpgradeFaucetBurnRecovery(ctx context.Context, sm *StateManager, _ MigrationCache, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { // Some initial parameters FundsForMiners := types.FromFil(1_000_000) LookbackEpoch := abi.ChainEpoch(32000) @@ -519,7 +730,7 @@ func UpgradeFaucetBurnRecovery(ctx context.Context, sm *StateManager, cb ExecCal return tree.Flush(ctx) } -func UpgradeIgnition(ctx context.Context, sm *StateManager, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { +func UpgradeIgnition(ctx context.Context, sm *StateManager, _ MigrationCache, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { store := sm.cs.Store(ctx) if build.UpgradeLiftoffHeight <= epoch { @@ -574,7 +785,7 @@ func UpgradeIgnition(ctx context.Context, sm *StateManager, cb ExecCallback, roo return tree.Flush(ctx) } -func UpgradeRefuel(ctx context.Context, sm *StateManager, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { +func UpgradeRefuel(ctx context.Context, sm *StateManager, _ MigrationCache, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { store := sm.cs.Store(ctx) tree, err := sm.StateTree(root) @@ -600,7 +811,7 @@ func UpgradeRefuel(ctx context.Context, sm *StateManager, cb ExecCallback, root return tree.Flush(ctx) } -func UpgradeActorsV2(ctx context.Context, sm *StateManager, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { +func UpgradeActorsV2(ctx context.Context, sm *StateManager, _ MigrationCache, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { buf := bufbstore.NewTieredBstore(sm.cs.Blockstore(), bstore.NewTemporarySync()) store := store.ActorStore(ctx, buf) @@ -646,7 +857,7 @@ func UpgradeActorsV2(ctx context.Context, sm *StateManager, cb ExecCallback, roo return newRoot, nil } -func UpgradeLiftoff(ctx context.Context, sm *StateManager, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { +func UpgradeLiftoff(ctx context.Context, sm *StateManager, _ MigrationCache, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { tree, err := sm.StateTree(root) if err != nil { return cid.Undef, xerrors.Errorf("getting state tree: %w", err) @@ -660,7 +871,7 @@ func UpgradeLiftoff(ctx context.Context, sm *StateManager, cb ExecCallback, root return tree.Flush(ctx) } -func UpgradeCalico(ctx context.Context, sm *StateManager, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { +func UpgradeCalico(ctx context.Context, sm *StateManager, _ MigrationCache, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { store := sm.cs.Store(ctx) var stateRoot types.StateRoot if err := store.Get(ctx, root, &stateRoot); err != nil { @@ -702,12 +913,56 @@ func UpgradeCalico(ctx context.Context, sm *StateManager, cb ExecCallback, root return newRoot, nil } -func UpgradeActorsV3(ctx context.Context, sm *StateManager, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { +func UpgradeActorsV3(ctx context.Context, sm *StateManager, cache MigrationCache, cb ExecCallback, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { + // Use all the CPUs except 3. + workerCount := runtime.NumCPU() - 3 + if workerCount <= 0 { + workerCount = 1 + } + + config := nv10.Config{MaxWorkers: uint(workerCount)} + newRoot, err := upgradeActorsV3Common(ctx, sm, cache, root, epoch, ts, config) + if err != nil { + return cid.Undef, xerrors.Errorf("migrating actors v3 state: %w", err) + } + + // perform some basic sanity checks to make sure everything still works. + store := store.ActorStore(ctx, sm.ChainStore().Blockstore()) + if newSm, err := state.LoadStateTree(store, newRoot); err != nil { + return cid.Undef, xerrors.Errorf("state tree sanity load failed: %w", err) + } else if newRoot2, err := newSm.Flush(ctx); err != nil { + return cid.Undef, xerrors.Errorf("state tree sanity flush failed: %w", err) + } else if newRoot2 != newRoot { + return cid.Undef, xerrors.Errorf("state-root mismatch: %s != %s", newRoot, newRoot2) + } else if _, err := newSm.GetActor(init_.Address); err != nil { + return cid.Undef, xerrors.Errorf("failed to load init actor after upgrade: %w", err) + } + + return newRoot, nil +} + +func PreUpgradeActorsV3(ctx context.Context, sm *StateManager, cache MigrationCache, root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet) error { + // Use half the CPUs for pre-migration, but leave at least 3. + workerCount := runtime.NumCPU() + if workerCount <= 4 { + workerCount = 1 + } else { + workerCount /= 2 + } + config := nv10.Config{MaxWorkers: uint(workerCount)} + _, err := upgradeActorsV3Common(ctx, sm, cache, root, epoch, ts, config) + return err +} + +func upgradeActorsV3Common( + ctx context.Context, sm *StateManager, cache MigrationCache, + root cid.Cid, epoch abi.ChainEpoch, ts *types.TipSet, + config nv10.Config, +) (cid.Cid, error) { buf := bufbstore.NewTieredBstore(sm.cs.Blockstore(), bstore.NewTemporarySync()) store := store.ActorStore(ctx, buf) // Load the state root. - var stateRoot types.StateRoot if err := store.Get(ctx, root, &stateRoot); err != nil { return cid.Undef, xerrors.Errorf("failed to decode state root: %w", err) @@ -721,18 +976,12 @@ func UpgradeActorsV3(ctx context.Context, sm *StateManager, cb ExecCallback, roo } // Perform the migration - - // TODO: store this somewhere and pre-migrate - cache := nv10.NewMemMigrationCache() - // TODO: tune this. - config := nv10.Config{MaxWorkers: 1} newHamtRoot, err := nv10.MigrateStateTree(ctx, store, stateRoot.Actors, epoch, config, migrationLogger{}, cache) if err != nil { return cid.Undef, xerrors.Errorf("upgrading to actors v2: %w", err) } // Persist the result. - newRoot, err := store.Put(ctx, &types.StateRoot{ Version: types.StateTreeVersion2, Actors: newHamtRoot, @@ -742,19 +991,6 @@ func UpgradeActorsV3(ctx context.Context, sm *StateManager, cb ExecCallback, roo return cid.Undef, xerrors.Errorf("failed to persist new state root: %w", err) } - // Check the result. - - // perform some basic sanity checks to make sure everything still works. - if newSm, err := state.LoadStateTree(store, newRoot); err != nil { - return cid.Undef, xerrors.Errorf("state tree sanity load failed: %w", err) - } else if newRoot2, err := newSm.Flush(ctx); err != nil { - return cid.Undef, xerrors.Errorf("state tree sanity flush failed: %w", err) - } else if newRoot2 != newRoot { - return cid.Undef, xerrors.Errorf("state-root mismatch: %s != %s", newRoot, newRoot2) - } else if _, err := newSm.GetActor(init_.Address); err != nil { - return cid.Undef, xerrors.Errorf("failed to load init actor after upgrade: %w", err) - } - // Persist the new tree. { diff --git a/chain/stmgr/forks_test.go b/chain/stmgr/forks_test.go index a2b7a179fba..95e7ef69900 100644 --- a/chain/stmgr/forks_test.go +++ b/chain/stmgr/forks_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "sync" "testing" "github.com/ipfs/go-cid" @@ -122,7 +123,7 @@ func TestForkHeightTriggers(t *testing.T) { cg.ChainStore(), UpgradeSchedule{{ Network: 1, Height: testForkHeight, - Migration: func(ctx context.Context, sm *StateManager, cb ExecCallback, + Migration: func(ctx context.Context, sm *StateManager, cache MigrationCache, cb ExecCallback, root cid.Cid, height abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { cst := ipldcbor.NewCborStore(sm.ChainStore().Blockstore()) @@ -252,7 +253,7 @@ func TestForkRefuseCall(t *testing.T) { Network: 1, Expensive: true, Height: testForkHeight, - Migration: func(ctx context.Context, sm *StateManager, cb ExecCallback, + Migration: func(ctx context.Context, sm *StateManager, cache MigrationCache, cb ExecCallback, root cid.Cid, height abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { return root, nil }}}) @@ -317,3 +318,166 @@ func TestForkRefuseCall(t *testing.T) { } } } + +func TestForkPreMigration(t *testing.T) { + logging.SetAllLoggers(logging.LevelInfo) + + cg, err := gen.NewGenerator() + if err != nil { + t.Fatal(err) + } + + fooCid, err := abi.CidBuilder.Sum([]byte("foo")) + require.NoError(t, err) + + barCid, err := abi.CidBuilder.Sum([]byte("bar")) + require.NoError(t, err) + + failCid, err := abi.CidBuilder.Sum([]byte("fail")) + require.NoError(t, err) + + var wait20 sync.WaitGroup + wait20.Add(3) + + wasCanceled := make(chan struct{}) + + checkCache := func(t *testing.T, cache MigrationCache) { + found, value, err := cache.Read("foo") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, fooCid, value) + + found, value, err = cache.Read("bar") + require.NoError(t, err) + require.True(t, found) + require.Equal(t, barCid, value) + + found, _, err = cache.Read("fail") + require.NoError(t, err) + require.False(t, found) + } + + counter := make(chan struct{}, 10) + + sm, err := NewStateManagerWithUpgradeSchedule( + cg.ChainStore(), UpgradeSchedule{{ + Network: 1, + Height: testForkHeight, + Migration: func(ctx context.Context, sm *StateManager, cache MigrationCache, cb ExecCallback, + root cid.Cid, height abi.ChainEpoch, ts *types.TipSet) (cid.Cid, error) { + + // Make sure the test that should be canceled, is canceled. + select { + case <-wasCanceled: + case <-ctx.Done(): + return cid.Undef, ctx.Err() + } + + // the cache should be setup correctly. + checkCache(t, cache) + + counter <- struct{}{} + + return root, nil + }, + PreMigrations: []PreMigration{{ + StartWithin: 20, + PreMigration: func(ctx context.Context, _ *StateManager, cache MigrationCache, + _ cid.Cid, _ abi.ChainEpoch, _ *types.TipSet) error { + wait20.Done() + wait20.Wait() + + err := cache.Write("foo", fooCid) + require.NoError(t, err) + + counter <- struct{}{} + + return nil + }, + }, { + StartWithin: 20, + PreMigration: func(ctx context.Context, _ *StateManager, cache MigrationCache, + _ cid.Cid, _ abi.ChainEpoch, _ *types.TipSet) error { + wait20.Done() + wait20.Wait() + + err := cache.Write("bar", barCid) + require.NoError(t, err) + + counter <- struct{}{} + + return nil + }, + }, { + StartWithin: 20, + PreMigration: func(ctx context.Context, _ *StateManager, cache MigrationCache, + _ cid.Cid, _ abi.ChainEpoch, _ *types.TipSet) error { + wait20.Done() + wait20.Wait() + + err := cache.Write("fail", failCid) + require.NoError(t, err) + + counter <- struct{}{} + + // Fail this migration. The cached entry should not be persisted. + return fmt.Errorf("failed") + }, + }, { + StartWithin: 15, + StopWithin: 5, + PreMigration: func(ctx context.Context, _ *StateManager, cache MigrationCache, + _ cid.Cid, _ abi.ChainEpoch, _ *types.TipSet) error { + + <-ctx.Done() + close(wasCanceled) + + counter <- struct{}{} + + return nil + }, + }, { + StartWithin: 10, + PreMigration: func(ctx context.Context, _ *StateManager, cache MigrationCache, + _ cid.Cid, _ abi.ChainEpoch, _ *types.TipSet) error { + + checkCache(t, cache) + + counter <- struct{}{} + + return nil + }, + }}}, + }) + if err != nil { + t.Fatal(err) + } + require.NoError(t, sm.Start(context.Background())) + defer func() { + require.NoError(t, sm.Stop(context.Background())) + }() + + inv := vm.NewActorRegistry() + inv.Register(nil, testActor{}) + + sm.SetVMConstructor(func(ctx context.Context, vmopt *vm.VMOpts) (*vm.VM, error) { + nvm, err := vm.NewVM(ctx, vmopt) + if err != nil { + return nil, err + } + nvm.SetInvoker(inv) + return nvm, nil + }) + + cg.SetStateManager(sm) + + for i := 0; i < 50; i++ { + _, err := cg.NextTipSet() + if err != nil { + t.Fatal(err) + } + } + // We have 5 pre-migration steps, and the migration. They should all have written something + // to this channel. + require.Equal(t, 6, len(counter)) +} diff --git a/chain/stmgr/stmgr.go b/chain/stmgr/stmgr.go index 84e9e1744f0..d58d7302800 100644 --- a/chain/stmgr/stmgr.go +++ b/chain/stmgr/stmgr.go @@ -20,6 +20,7 @@ import ( // Used for genesis. msig0 "github.com/filecoin-project/specs-actors/actors/builtin/multisig" + "github.com/filecoin-project/specs-actors/v3/actors/migration/nv10" // we use the same adt for all receipts blockadt "github.com/filecoin-project/specs-actors/actors/util/adt" @@ -62,15 +63,24 @@ type versionSpec struct { atOrBelow abi.ChainEpoch } +type migration struct { + upgrade MigrationFunc + preMigrations []PreMigration + cache *nv10.MemMigrationCache +} + type StateManager struct { cs *store.ChainStore + cancel context.CancelFunc + shutdown chan struct{} + // Determines the network version at any given epoch. networkVersions []versionSpec latestVersion network.Version - // Maps chain epochs to upgrade functions. - stateMigrations map[abi.ChainEpoch]UpgradeFunc + // Maps chain epochs to migrations. + stateMigrations map[abi.ChainEpoch]*migration // A set of potentially expensive/time consuming upgrades. Explicit // calls for, e.g., gas estimation fail against this epoch with // ErrExpensiveFork. @@ -103,7 +113,7 @@ func NewStateManagerWithUpgradeSchedule(cs *store.ChainStore, us UpgradeSchedule return nil, err } - stateMigrations := make(map[abi.ChainEpoch]UpgradeFunc, len(us)) + stateMigrations := make(map[abi.ChainEpoch]*migration, len(us)) expensiveUpgrades := make(map[abi.ChainEpoch]struct{}, len(us)) var networkVersions []versionSpec lastVersion := network.Version0 @@ -111,8 +121,13 @@ func NewStateManagerWithUpgradeSchedule(cs *store.ChainStore, us UpgradeSchedule // If we have any upgrades, process them and create a version // schedule. for _, upgrade := range us { - if upgrade.Migration != nil { - stateMigrations[upgrade.Height] = upgrade.Migration + if upgrade.Migration != nil || upgrade.PreMigrations != nil { + migration := &migration{ + upgrade: upgrade.Migration, + preMigrations: upgrade.PreMigrations, + cache: nv10.NewMemMigrationCache(), + } + stateMigrations[upgrade.Height] = migration } if upgrade.Expensive { expensiveUpgrades[upgrade.Height] = struct{}{} @@ -148,6 +163,33 @@ func cidsToKey(cids []cid.Cid) string { return out } +// Start starts the state manager's optional background processes. At the moment, this schedules +// pre-migration functions to run ahead of network upgrades. +// +// This method is not safe to invoke from multiple threads or concurrently with Stop. +func (sm *StateManager) Start(context.Context) error { + var ctx context.Context + ctx, sm.cancel = context.WithCancel(context.Background()) + sm.shutdown = make(chan struct{}) + go sm.preMigrationWorker(ctx) + return nil +} + +// Stop starts the state manager's background processes. +// +// This method is not safe to invoke concurrently with Start. +func (sm *StateManager) Stop(ctx context.Context) error { + if sm.cancel != nil { + sm.cancel() + select { + case <-sm.shutdown: + case <-ctx.Done(): + return ctx.Err() + } + } + return nil +} + func (sm *StateManager) TipSetState(ctx context.Context, ts *types.TipSet) (st cid.Cid, rec cid.Cid, err error) { ctx, span := trace.StartSpan(ctx, "tipSetState") defer span.End() diff --git a/node/builder.go b/node/builder.go index 8ee9b367440..1dd60ee1b32 100644 --- a/node/builder.go +++ b/node/builder.go @@ -269,7 +269,7 @@ func Online() Option { Override(new(vm.SyscallBuilder), vm.Syscalls), Override(new(*store.ChainStore), modules.ChainStore), Override(new(stmgr.UpgradeSchedule), stmgr.DefaultUpgradeSchedule()), - Override(new(*stmgr.StateManager), stmgr.NewStateManagerWithUpgradeSchedule), + Override(new(*stmgr.StateManager), modules.StateManager), Override(new(*wallet.LocalWallet), wallet.NewWallet), Override(new(wallet.Default), From(new(*wallet.LocalWallet))), Override(new(api.WalletAPI), From(new(wallet.MultiWallet))), diff --git a/node/modules/stmgr.go b/node/modules/stmgr.go new file mode 100644 index 00000000000..9d3917b856f --- /dev/null +++ b/node/modules/stmgr.go @@ -0,0 +1,20 @@ +package modules + +import ( + "go.uber.org/fx" + + "github.com/filecoin-project/lotus/chain/stmgr" + "github.com/filecoin-project/lotus/chain/store" +) + +func StateManager(lc fx.Lifecycle, cs *store.ChainStore, us stmgr.UpgradeSchedule) (*stmgr.StateManager, error) { + sm, err := stmgr.NewStateManagerWithUpgradeSchedule(cs, us) + if err != nil { + return nil, err + } + lc.Append(fx.Hook{ + OnStart: sm.Start, + OnStop: sm.Stop, + }) + return sm, nil +}