Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing multiple issues related to onlineddl/lifecycle #8500

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions go/vt/schema/tablegc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestIsGCTableName(t *testing.T) {
Expand Down Expand Up @@ -152,3 +153,16 @@ func TestParseGCLifecycle(t *testing.T) {
})
}
}

func TestGenerateRenameStatementWithUUID(t *testing.T) {
uuid := "997342e3_e91d_11eb_aaae_0a43f95f28a3"
tableName := "mytbl"
countIterations := 5
toTableNames := map[string]bool{}
for i := 0; i < countIterations; i++ {
_, toTableName, err := GenerateRenameStatementWithUUID(tableName, PurgeTableGCState, OnlineDDLToGCUUID(uuid), time.Now().Add(time.Duration(i)*time.Second).UTC())
require.NoError(t, err)
toTableNames[toTableName] = true
}
assert.Equal(t, countIterations, len(toTableNames))
}
41 changes: 23 additions & 18 deletions go/vt/vttablet/onlineddl/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,15 +420,9 @@ func (e *Executor) dropOnlineDDLUser(ctx context.Context) error {

// tableExists checks if a given table exists.
func (e *Executor) tableExists(ctx context.Context, tableName string) (bool, error) {
conn, err := e.pool.Get(ctx)
if err != nil {
return false, err
}
defer conn.Recycle()

tableName = strings.ReplaceAll(tableName, `_`, `\_`)
parsed := sqlparser.BuildParsedQuery(sqlShowTablesLike, tableName)
rs, err := conn.Exec(ctx, parsed.Query, 1, true)
rs, err := e.execQuery(ctx, parsed.Query)
if err != nil {
return false, err
}
Expand Down Expand Up @@ -2362,6 +2356,10 @@ func (e *Executor) reviewStaleMigrations(ctx context.Context) error {
if err := e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed); err != nil {
return err
}
_ = e.updateMigrationStartedTimestamp(ctx, uuid)
if err := e.updateMigrationTimestamp(ctx, "completed_timestamp", uuid); err != nil {
return err
}
_ = e.updateMigrationMessage(ctx, onlineDDL.UUID, "stale migration")
}

Expand All @@ -2376,7 +2374,7 @@ func (e *Executor) retryTabletFailureMigrations(ctx context.Context) error {
}

// gcArtifactTable garbage-collects a single table
func (e *Executor) gcArtifactTable(ctx context.Context, artifactTable, uuid string) error {
func (e *Executor) gcArtifactTable(ctx context.Context, artifactTable, uuid string, t time.Time) error {
tableExists, err := e.tableExists(ctx, artifactTable)
if err != nil {
return err
Expand All @@ -2386,17 +2384,11 @@ func (e *Executor) gcArtifactTable(ctx context.Context, artifactTable, uuid stri
}
// We've already concluded in gcArtifacts() that this table was held for long enough.
// We therefore move it into PURGE state.
renameStatement, _, err := schema.GenerateRenameStatementWithUUID(artifactTable, schema.PurgeTableGCState, schema.OnlineDDLToGCUUID(uuid), time.Now().UTC())
if err != nil {
return err
}
conn, err := e.pool.Get(ctx)
renameStatement, _, err := schema.GenerateRenameStatementWithUUID(artifactTable, schema.PurgeTableGCState, schema.OnlineDDLToGCUUID(uuid), t)
if err != nil {
return err
}
defer conn.Recycle()

_, err = conn.Exec(ctx, renameStatement, 1, true)
_, err = e.execQuery(ctx, renameStatement)
return err
}

Expand All @@ -2405,6 +2397,13 @@ func (e *Executor) gcArtifacts(ctx context.Context) error {
e.migrationMutex.Lock()
defer e.migrationMutex.Unlock()

if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
// This query fixes a bug where stale migrations were marked as 'failed' without updating 'completed_timestamp'
// see https://github.com/vitessio/vitess/issues/8499
// Running this query retroactively sets completed_timestamp
// This 'if' clause can be removed in version v13
return err
}
query, err := sqlparser.ParseAndBind(sqlSelectUncollectedArtifacts,
sqltypes.Int64BindVariable(int64((*retainOnlineDDLTables).Seconds())),
)
Expand All @@ -2420,8 +2419,14 @@ func (e *Executor) gcArtifacts(ctx context.Context) error {
artifacts := row["artifacts"].ToString()

artifactTables := textutil.SplitDelimitedList(artifacts)
for _, artifactTable := range artifactTables {
if err := e.gcArtifactTable(ctx, artifactTable, uuid); err != nil {

timeNow := time.Now()
for i, artifactTable := range artifactTables {
// We wish to generate distinct timestamp values for each table in this UUID,
// because all tables will be renamed as _something_UUID_timestamp. Since UUID
// is shared for all artifacts in this loop, we differentiate via timestamp
t := timeNow.Add(time.Duration(i) * time.Second).UTC()
if err := e.gcArtifactTable(ctx, artifactTable, uuid, t); err != nil {
return err
}
log.Infof("Executor.gcArtifacts: renamed away artifact %s", artifactTable)
Expand Down
15 changes: 12 additions & 3 deletions go/vt/vttablet/onlineddl/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,9 @@ const (
WHERE
migration_uuid=%a
`
sqlUpdateMigrationStartedTimestamp = `UPDATE _vt.schema_migrations
SET started_timestamp=IFNULL(started_timestamp, NOW())
sqlUpdateMigrationStartedTimestamp = `UPDATE _vt.schema_migrations SET
started_timestamp =IFNULL(started_timestamp, NOW()),
liveness_timestamp=IFNULL(liveness_timestamp, NOW())
WHERE
migration_uuid=%a
`
Expand All @@ -130,7 +131,7 @@ const (
migration_uuid=%a
`
sqlUpdateArtifacts = `UPDATE _vt.schema_migrations
SET artifacts=concat(%a, ',', artifacts)
SET artifacts=concat(%a, ',', artifacts), cleanup_timestamp=NULL
WHERE
migration_uuid=%a
`
Expand Down Expand Up @@ -270,6 +271,14 @@ const (
AND cleanup_timestamp IS NULL
AND completed_timestamp <= NOW() - INTERVAL %a SECOND
`
sqlFixCompletedTimestamp = `UPDATE _vt.schema_migrations
SET
completed_timestamp=NOW()
WHERE
migration_status='failed'
AND cleanup_timestamp IS NULL
AND completed_timestamp IS NULL
`
sqlSelectMigration = `SELECT
id,
migration_uuid,
Expand Down