Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: ensure graceful rollbacks if the ADD REGION async job fails #60762

Merged
merged 1 commit into from
Feb 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions pkg/sql/multiregion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,62 @@ CREATE TABLE db.t(k INT) LOCALITY REGIONAL BY TABLE IN PRIMARY REGION;
t.Fatal("expected crdb_internal_region to be present in system.namespace")
}
}

// TODO(arul): Currently, database level zone configurations aren't rolled back.
// When we fix this limitation we should also update this test to check for that.
func TestRollbackDuringAddRegion(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

// Decrease the adopt loop interval so that retries happen quickly.
defer sqltestutils.SetTestJobsAdoptInterval()()

numServers := 2
serverArgs := make(map[int]base.TestServerArgs)

regionNames := make([]string, numServers)
for i := 0; i < numServers; i++ {
// "us-east1", "us-east2"...
regionNames[i] = fmt.Sprintf("us-east%d", i+1)
}

for i := 0; i < numServers; i++ {
serverArgs[i] = base.TestServerArgs{
Knobs: base.TestingKnobs{
SQLTypeSchemaChanger: &sql.TypeSchemaChangerTestingKnobs{
RunBeforeExec: func() error {
return errors.New("boom")
},
},
},
Locality: roachpb.Locality{
Tiers: []roachpb.Tier{{Key: "region", Value: regionNames[i]}},
},
}
}

tc := serverutils.StartNewTestCluster(t, numServers, base.TestClusterArgs{
ServerArgsPerNode: serverArgs,
})

ctx := context.Background()
defer tc.Stopper().Stop(ctx)

sqlDB := tc.ServerConn(0)

_, err := sqlDB.Exec(`CREATE DATABASE db WITH PRIMARY REGION "us-east1"`)
require.NoError(t, err)
if err != nil {
t.Error(err)
}

_, err = sqlDB.Exec(`ALTER DATABASE db ADD REGION "us-east2"`)
require.Error(t, err, "boom")

var jobStatus string
var jobErr string
row := sqlDB.QueryRow("SELECT status, error FROM [SHOW JOBS] WHERE job_type = 'TYPEDESC SCHEMA CHANGE'")
require.NoError(t, row.Scan(&jobStatus, &jobErr))
require.Regexp(t, "boom", jobErr)
require.Regexp(t, "failed", jobStatus)
}
67 changes: 54 additions & 13 deletions pkg/sql/type_change.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,30 +363,71 @@ func (t *typeSchemaChanger) cleanupEnumValues(ctx context.Context) error {
if !enumHasNonPublic(&typeDesc.Immutable) {
return nil
}
// First, deal with all members that we initially hoped to remove but
// now need to be promoted back to writable.
for i := range typeDesc.EnumMembers {
member := &typeDesc.EnumMembers[i]
if t.isTransitioningInCurrentJob(member) && enumMemberIsRemoving(member) {
member.Capability = descpb.TypeDescriptor_EnumMember_ALL
member.Direction = descpb.TypeDescriptor_EnumMember_NONE

if typeDesc.Kind == descpb.TypeDescriptor_MULTIREGION_ENUM {
// First, we deal with cleanup specific to the multi-region enum.
// TODO(arul): This can go away once we stop copying regions on the dataabse
// descriptor.
if typeDesc.Kind == descpb.TypeDescriptor_MULTIREGION_ENUM {
for _, member := range typeDesc.EnumMembers {
if t.isTransitioningInCurrentJob(&member) {
_, dbDesc, err := descsCol.GetMutableDatabaseByID(
ctx, txn, typeDesc.ParentID, tree.DatabaseLookupFlags{Required: true})
if err != nil {
return err
}
err = addRegionToRegionConfig(dbDesc, descpb.RegionName(member.LogicalRepresentation))
if err != nil {
return err

// If the enum member was being removed, we need to add it back to the
// database region config.
if enumMemberIsRemoving(&member) {
err = addRegionToRegionConfig(dbDesc, descpb.RegionName(member.LogicalRepresentation))
if err != nil {
return err
}
if err := descsCol.WriteDescToBatch(ctx, true /* kvTrace */, dbDesc, b); err != nil {
return err
}
}
if err := descsCol.WriteDescToBatch(ctx, true /* kvTrace */, dbDesc, b); err != nil {
return err

// If the enum member was being added, we must remove it from the
// database region config.
if enumMemberIsAdding(&member) {
idx := 0
found := false
for i, region := range dbDesc.RegionConfig.Regions {
if region.Name == descpb.RegionName(member.LogicalRepresentation) {
idx = i
found = true
break
}
}

if !found {
// This shouldn't happen and is simply a sanity check to ensure the
// database descriptor regions and multi-region enum regions are
// in sync.
return errors.AssertionFailedf(
"expected to find region on database %d during cleanup", dbDesc.GetID(),
)
}
dbDesc.RegionConfig.Regions = append(dbDesc.RegionConfig.Regions[:idx],
dbDesc.RegionConfig.Regions[idx+1:]...)
if err := descsCol.WriteDescToBatch(ctx, true /* kvTrace */, dbDesc, b); err != nil {
return err
}
}
}
}
}

// Next, deal with all members that we initially hoped to remove but
// now need to be promoted back to writable.
for i := range typeDesc.EnumMembers {
member := &typeDesc.EnumMembers[i]
if t.isTransitioningInCurrentJob(member) && enumMemberIsRemoving(member) {
member.Capability = descpb.TypeDescriptor_EnumMember_ALL
member.Direction = descpb.TypeDescriptor_EnumMember_NONE
}
}
// Now deal with all members that we initially hoped to add but now need
// to be removed from the descriptor.
applyFilterOnEnumMembers(typeDesc, func(member *descpb.TypeDescriptor_EnumMember) bool {
Expand Down