Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OnlineDDL: fix nil 'completed_timestamp' for cancelled migrations #13928

Merged
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,9 @@ func TestSchemaChange(t *testing.T) {
t.Run("summary: validate sequential migration IDs", func(t *testing.T) {
onlineddl.ValidateSequentialMigrationIDs(t, &vtParams, shards)
})
t.Run("summary: validate completed_timestamp", func(t *testing.T) {
onlineddl.ValidateCompletedTimestamp(t, &vtParams)
})
}

func testScheduler(t *testing.T) {
Expand Down
3 changes: 3 additions & 0 deletions go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,9 @@ func TestSchemaChange(t *testing.T) {
t.Run("summary: validate sequential migration IDs", func(t *testing.T) {
onlineddl.ValidateSequentialMigrationIDs(t, &vtParams, shards)
})
t.Run("summary: validate completed_timestamp", func(t *testing.T) {
onlineddl.ValidateCompletedTimestamp(t, &vtParams)
})
}

func insertRow(t *testing.T) {
Expand Down
36 changes: 36 additions & 0 deletions go/test/endtoend/onlineddl/vtgate_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ const (
ThrottledAppsTimeout = 60 * time.Second
)

var (
testsStartupTime time.Time
)

func init() {
testsStartupTime = time.Now()
}

// VtgateExecQuery runs a query on VTGate using given query params
func VtgateExecQuery(t *testing.T, vtParams *mysql.ConnParams, query string, expectError string) *sqltypes.Result {
t.Helper()
Expand Down Expand Up @@ -405,3 +413,31 @@ func ValidateSequentialMigrationIDs(t *testing.T, vtParams *mysql.ConnParams, sh
assert.Equalf(t, count, shardMax[shard]-shardMin[shard]+1, "mismatch: shared=%v, count=%v, min=%v, max=%v", shard, count, shardMin[shard], shardMax[shard])
}
}

// ValidateCompletedTimestamp ensures that any migration in `cancelled`, `completed`, `failed` statuses
// has a non-nil and valid `completed_timestamp` value.
func ValidateCompletedTimestamp(t *testing.T, vtParams *mysql.ConnParams) {
require.False(t, testsStartupTime.IsZero())
r := VtgateExecQuery(t, vtParams, "show vitess_migrations", "")

completedTimestampNumValidations := 0
for _, row := range r.Named().Rows {
migrationStatus := row.AsString("migration_status", "")
require.NotEmpty(t, migrationStatus)
switch migrationStatus {
case string(schema.OnlineDDLStatusComplete),
string(schema.OnlineDDLStatusFailed),
string(schema.OnlineDDLStatusCancelled):
{
assert.False(t, row["completed_timestamp"].IsNull())
// Also make sure the timestamp is "real", and that it is recent.
timestamp := row.AsString("completed_timestamp", "")
completedTime, err := time.Parse(sqltypes.TimestampFormat, timestamp)
assert.NoError(t, err)
assert.Greater(t, completedTime.Unix(), testsStartupTime.Unix())
completedTimestampNumValidations++
}
}
}
assert.NotZero(t, completedTimestampNumValidations)
}
19 changes: 19 additions & 0 deletions go/vt/vttablet/onlineddl/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ var (
ErrMigrationNotFound = errors.New("migration not found")
)

var (
// fixCompletedTimestampDone fixes a nil `completed_tiemstamp` columns, see
// https://github.com/vitessio/vitess/issues/13927
// The fix is in release-18.0
// TODO: remove in release-19.0
fixCompletedTimestampDone bool
)

var emptyResult = &sqltypes.Result{}
var acceptableDropTableIfExistsErrorCodes = []sqlerror.ErrorCode{sqlerror.ERCantFindFile, sqlerror.ERNoSuchTable}
var copyAlgorithm = sqlparser.AlgorithmValue(sqlparser.CopyStr)
Expand Down Expand Up @@ -3740,6 +3748,17 @@ func (e *Executor) gcArtifacts(ctx context.Context) error {
e.migrationMutex.Lock()
defer e.migrationMutex.Unlock()

// v18 fix. Remove in v19
if !fixCompletedTimestampDone {
if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
// This query fixes a bug where stale migrations were marked as 'cancelled' or 'failed' without updating 'completed_timestamp'
// Running this query retroactively sets completed_timestamp
// This fix is created in v18 and can be removed in v19
return err
}
fixCompletedTimestampDone = true
}

query, err := sqlparser.ParseAndBind(sqlSelectUncollectedArtifacts,
sqltypes.Int64BindVariable(int64((retainOnlineDDLTables).Seconds())),
)
Expand Down
5 changes: 3 additions & 2 deletions go/vt/vttablet/onlineddl/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ const (
migration_uuid=%a
`
sqlUpdateMigrationStatusFailedOrCancelled = `UPDATE _vt.schema_migrations
SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled')
SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled'),
completed_timestamp=NOW(6)
WHERE
migration_uuid=%a
`
Expand Down Expand Up @@ -356,7 +357,7 @@ const (
SET
completed_timestamp=NOW(6)
WHERE
migration_status='failed'
migration_status IN ('cancelled', 'failed')
AND cleanup_timestamp IS NULL
AND completed_timestamp IS NULL
`
Expand Down