Skip to content

Commit

Permalink
chore: more frequent migration progress logs (#12732)
Browse files Browse the repository at this point in the history
  • Loading branch information
virajbhartiya authored Dec 12, 2024
1 parent ae5b845 commit 4f29e2e
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

- Add json output of tipsets to `louts chain list`. ([filecoin-project/lotus#12691](https://github.com/filecoin-project/lotus/pull/12691))
- Remove IPNI advertisement relay over pubsub via Lotus node as it now has been deprecated. ([filecoin-project/lotus#12768](https://github.com/filecoin-project/lotus/pull/12768)
- During a network upgrade, log migration progress every 2 seconds so they are more helpful and informative. The `LOTUS_MIGRATE_PROGRESS_LOG_SECONDS` environment variable can be used to change this if needed. ([filecoin-project/lotus#12732](https://github.com/filecoin-project/lotus/pull/12732))

# UNRELEASED v.1.32.0

Expand Down
31 changes: 29 additions & 2 deletions chain/consensus/filcns/upgrades.go
Original file line number Diff line number Diff line change
Expand Up @@ -2754,9 +2754,14 @@ func PreUpgradeActorsV16(ctx context.Context, sm *stmgr.StateManager, cache stmg
return xerrors.Errorf("error getting lookback ts for premigration: %w", err)
}

logPeriod, err := getMigrationProgressLogPeriod()
if err != nil {
return xerrors.Errorf("error getting progress log period: %w", err)
}

config := migration.Config{
MaxWorkers: uint(workerCount),
ProgressLogPeriod: time.Minute * 5,
ProgressLogPeriod: logPeriod,
}

_, err = upgradeActorsV16Common(ctx, sm, cache, lbRoot, epoch, lbts, config)
Expand All @@ -2770,11 +2775,17 @@ func UpgradeActorsV16(ctx context.Context, sm *stmgr.StateManager, cache stmgr.M
if workerCount <= 0 {
workerCount = 1
}

logPeriod, err := getMigrationProgressLogPeriod()
if err != nil {
return cid.Undef, xerrors.Errorf("error getting progress log period: %w", err)
}

config := migration.Config{
MaxWorkers: uint(workerCount),
JobQueueSize: 1000,
ResultQueueSize: 100,
ProgressLogPeriod: 10 * time.Second,
ProgressLogPeriod: logPeriod,
}
newRoot, err := upgradeActorsV16Common(ctx, sm, cache, root, epoch, ts, config)
if err != nil {
Expand Down Expand Up @@ -3005,3 +3016,19 @@ func (ml migrationLogger) Log(level rt.LogLevel, msg string, args ...interface{}
log.Errorf(msg, args...)
}
}

func getMigrationProgressLogPeriod() (time.Duration, error) {
logPeriod := time.Second * 2 // default period
period := os.Getenv("LOTUS_MIGRATE_PROGRESS_LOG_SECONDS")
if period != "" {
seconds, err := strconv.Atoi(period)
if err != nil {
return 0, xerrors.Errorf("LOTUS_MIGRATE_PROGRESS_LOG_SECONDS must be an integer: %w", err)
}
if seconds <= 0 {
return 0, xerrors.Errorf("LOTUS_MIGRATE_PROGRESS_LOG_SECONDS must be positive")
}
logPeriod = time.Duration(seconds) * time.Second
}
return logPeriod, nil
}
15 changes: 13 additions & 2 deletions documentation/misc/Building_a_network_skeleton.md
Original file line number Diff line number Diff line change
Expand Up @@ -456,9 +456,14 @@ Typically it's safe to not upgrade filecoin-ffi's version of go-state-types. Th
return xerrors.Errorf("error getting lookback ts for premigration: %w", err)
}
logPeriod, err := getMigrationProgressLogPeriod()
if err != nil {
return xerrors.Errorf("error getting progress log period: %w", err)
}
config := migration.Config{
MaxWorkers: uint(workerCount),
ProgressLogPeriod: time.Minute * 5,
ProgressLogPeriod: logPeriod,
}
_, err = upgradeActorsV(XX+1)Common(ctx, sm, cache, lbRoot, epoch, lbts, config)
Expand All @@ -472,11 +477,17 @@ Typically it's safe to not upgrade filecoin-ffi's version of go-state-types. Th
if workerCount <= 0 {
workerCount = 1
}
logPeriod, err := getMigrationProgressLogPeriod()
if err != nil {
return cid.Undef, xerrors.Errorf("error getting progress log period: %w", err)
}
config := migration.Config{
MaxWorkers: uint(workerCount),
JobQueueSize: 1000,
ResultQueueSize: 100,
ProgressLogPeriod: 10 * time.Second,
ProgressLogPeriod: logPeriod,
}
newRoot, err := upgradeActorsV(XX+1)Common(ctx, sm, cache, root, epoch, ts, config)
if err != nil {
Expand Down

0 comments on commit 4f29e2e

Please sign in to comment.