Skip to content

Commit

Permalink
br: reset cloud_admin and root after ebs restoration (#39986)
Browse files Browse the repository at this point in the history
close #40418
  • Loading branch information
3pointer authored Jan 10, 2023
1 parent 3f223b3 commit be31b6c
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 15 deletions.
1 change: 1 addition & 0 deletions br/pkg/restore/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ go_test(
"search_test.go",
"split_test.go",
"stream_metas_test.go",
"systable_restore_test.go",
"util_test.go",
],
embed = [":restore"],
Expand Down
103 changes: 90 additions & 13 deletions br/pkg/restore/systable_restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ import (
"go.uber.org/zap"
)

const sysUserTableName = "user"
const (
rootUser = "root"
sysUserTableName = "user"
cloudAdminUser = "cloud_admin"
)

var statsTables = map[string]struct{}{
"stats_buckets": {},
Expand Down Expand Up @@ -51,14 +55,14 @@ var unRecoverableTable = map[string]struct{}{
// skip clearing or restoring 'cloud_admin'@'%' which is a special
// user on TiDB Cloud
var sysPrivilegeTableMap = map[string]string{
"user": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0
"db": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0
"tables_priv": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0
"columns_priv": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0
"default_roles": "not (user = 'cloud_admin' and host = '%')", // since v3.0.0
"role_edges": "not (to_user = 'cloud_admin' and to_host = '%')", // since v3.0.0
"global_priv": "not (user = 'cloud_admin' and host = '%')", // since v3.0.8
"global_grants": "not (user = 'cloud_admin' and host = '%')", // since v5.0.3
"user": "(user = '%s' and host = '%%')", // since v1.0.0
"db": "(user = '%s' and host = '%%')", // since v1.0.0
"tables_priv": "(user = '%s' and host = '%%')", // since v1.0.0
"columns_priv": "(user = '%s' and host = '%%')", // since v1.0.0
"default_roles": "(user = '%s' and host = '%%')", // since v3.0.0
"role_edges": "(to_user = '%s' and to_host = '%%')", // since v3.0.0
"global_priv": "(user = '%s' and host = '%%')", // since v3.0.8
"global_grants": "(user = '%s' and host = '%%')", // since v5.0.3
}

func isUnrecoverableTable(tableName string) bool {
Expand All @@ -71,6 +75,78 @@ func isStatsTable(tableName string) bool {
return ok
}

func generateResetSQLs(db *database, resetUsers []string) []string {
if db.Name.L != mysql.SystemDB {
return nil
}
sqls := make([]string, 0, 10)
// we only need reset root password once
rootReset := false
for tableName := range db.ExistingTables {
if sysPrivilegeTableMap[tableName] != "" {
for _, name := range resetUsers {
if strings.ToLower(name) == rootUser {
if !rootReset {
updateSQL := fmt.Sprintf("UPDATE %s.%s SET authentication_string='',"+
" Shutdown_priv='Y',"+
" Config_priv='Y'"+
" WHERE USER='root' AND Host='%%';",
db.Name.L, sysUserTableName)
sqls = append(sqls, updateSQL)
rootReset = true
} else {
continue
}
} else {
/* #nosec G202: SQL string concatenation */
whereClause := fmt.Sprintf("WHERE "+sysPrivilegeTableMap[tableName], name)
deleteSQL := fmt.Sprintf("DELETE FROM %s %s;",
utils.EncloseDBAndTable(db.Name.L, tableName), whereClause)
sqls = append(sqls, deleteSQL)
}
}
}
}
return sqls
}

// ClearSystemUsers is used for volume-snapshot restoration.
// because we can not support restore user in some scenarios, for example in cloud.
// we'd better use this function to drop cloud_admin user after volume-snapshot restore.
func (rc *Client) ClearSystemUsers(ctx context.Context, resetUsers []string) error {
sysDB := mysql.SystemDB
db, ok := rc.getDatabaseByName(sysDB)
if !ok {
log.Warn("target database not exist, aborting", zap.String("database", sysDB))
return nil
}
execSQL := func(sql string) error {
// SQLs here only contain table name and database name, seems it is no need to redact them.
if err := rc.db.se.Execute(ctx, sql); err != nil {
log.Warn("failed to clear system users",
zap.Stringer("database", db.Name),
zap.String("sql", sql),
zap.Error(err),
)
return berrors.ErrUnknown.Wrap(err).GenWithStack("failed to execute %s", sql)
}
log.Info("successfully clear system users after restoration",
zap.Stringer("database", db.Name),
zap.String("sql", sql),
)
return nil
}

sqls := generateResetSQLs(db, resetUsers)
for _, sql := range sqls {
log.Info("reset system user for cloud", zap.String("sql", sql))
if err := execSQL(sql); err != nil {
return err
}
}
return nil
}

// RestoreSystemSchemas restores the system schema(i.e. the `mysql` schema).
// Detail see https://github.com/pingcap/br/issues/679#issuecomment-762592254.
func (rc *Client) RestoreSystemSchemas(ctx context.Context, f filter.Filter) {
Expand Down Expand Up @@ -203,14 +279,15 @@ func (rc *Client) replaceTemporaryTableToSystable(ctx context.Context, ti *model
}

if db.ExistingTables[tableName] != nil {
whereClause := ""
whereNotClause := ""
if rc.fullClusterRestore && sysPrivilegeTableMap[tableName] != "" {
// cloud_admin is a special user on tidb cloud, need to skip it.
whereClause = fmt.Sprintf("WHERE %s", sysPrivilegeTableMap[tableName])
/* #nosec G202: SQL string concatenation */
whereNotClause = fmt.Sprintf("WHERE NOT "+sysPrivilegeTableMap[tableName], cloudAdminUser)
log.Info("full cluster restore, delete existing data",
zap.String("table", tableName), zap.Stringer("schema", db.Name))
deleteSQL := fmt.Sprintf("DELETE FROM %s %s;",
utils.EncloseDBAndTable(db.Name.L, tableName), whereClause)
utils.EncloseDBAndTable(db.Name.L, tableName), whereNotClause)
if err := execSQL(deleteSQL); err != nil {
return err
}
Expand All @@ -228,7 +305,7 @@ func (rc *Client) replaceTemporaryTableToSystable(ctx context.Context, ti *model
utils.EncloseDBAndTable(db.Name.L, tableName),
colListStr, colListStr,
utils.EncloseDBAndTable(db.TemporaryName.L, tableName),
whereClause)
whereNotClause)
return execSQL(replaceIntoSQL)
}

Expand Down
72 changes: 72 additions & 0 deletions br/pkg/restore/systable_restore_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.

package restore

import (
"regexp"
"testing"

"github.com/pingcap/tidb/br/pkg/utils"
"github.com/pingcap/tidb/parser/model"
"github.com/stretchr/testify/require"
)

func testTableInfo(name string) *model.TableInfo {
return &model.TableInfo{
Name: model.NewCIStr(name),
}
}

func TestGenerateResetSQL(t *testing.T) {
// case #1: ignore non-mysql databases
mockDB := &database{
ExistingTables: map[string]*model.TableInfo{},
Name: model.NewCIStr("non-mysql"),
TemporaryName: utils.TemporaryDBName("non-mysql"),
}
for name := range sysPrivilegeTableMap {
mockDB.ExistingTables[name] = testTableInfo(name)
}
resetUsers := []string{"cloud_admin", "root"}
require.Equal(t, 0, len(generateResetSQLs(mockDB, resetUsers)))

// case #2: ignore non expected table
mockDB = &database{
ExistingTables: map[string]*model.TableInfo{},
Name: model.NewCIStr("mysql"),
TemporaryName: utils.TemporaryDBName("mysql"),
}
for name := range sysPrivilegeTableMap {
name += "non_available"
mockDB.ExistingTables[name] = testTableInfo(name)
}
resetUsers = []string{"cloud_admin", "root"}
require.Equal(t, 0, len(generateResetSQLs(mockDB, resetUsers)))

// case #3: only reset cloud admin account
for name := range sysPrivilegeTableMap {
mockDB.ExistingTables[name] = testTableInfo(name)
}
resetUsers = []string{"cloud_admin"}
sqls := generateResetSQLs(mockDB, resetUsers)
require.Equal(t, 8, len(sqls))
for _, sql := range sqls {
// for cloud_admin we only generate DELETE sql
require.Regexp(t, regexp.MustCompile("DELETE*"), sql)
}

// case #4: reset cloud admin/other account
resetUsers = []string{"cloud_admin", "cloud_other"}
sqls = generateResetSQLs(mockDB, resetUsers)
require.Equal(t, 16, len(sqls))
for _, sql := range sqls {
// for cloud_admin/cloud_other we only generate DELETE sql
require.Regexp(t, regexp.MustCompile("DELETE*"), sql)
}

// case #5: reset cloud admin && root account
resetUsers = []string{"cloud_admin", "root"}
sqls = generateResetSQLs(mockDB, resetUsers)
// 8 DELETE sqls for cloud admin and 1 UPDATE sql for root
require.Equal(t, 9, len(sqls))
}
14 changes: 12 additions & 2 deletions br/pkg/task/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ const (
FlagPiTRBatchSize = "pitr-batch-size"
FlagPiTRConcurrency = "pitr-concurrency"

FlagResetSysUsers = "reset-sys-users"

defaultPiTRBatchCount = 8
defaultPiTRBatchSize = 16 * 1024 * 1024
defaultRestoreConcurrency = 128
Expand Down Expand Up @@ -93,6 +95,8 @@ type RestoreCommonConfig struct {

// determines whether enable restore sys table on default, see fullClusterRestore in restore/client.go
WithSysTable bool `json:"with-sys-table" toml:"with-sys-table"`

ResetSysUsers []string `json:"reset-sys-users" toml:"reset-sys-users"`
}

// adjust adjusts the abnormal config value in the current config.
Expand All @@ -118,10 +122,12 @@ func DefineRestoreCommonFlags(flags *pflag.FlagSet) {
flags.Uint(FlagPDConcurrency, defaultPDConcurrency,
"concurrency pd-relative operations like split & scatter.")
flags.Duration(FlagBatchFlushInterval, defaultBatchFlushInterval,
"after how long a restore batch would be auto sended.")
"after how long a restore batch would be auto sent.")
flags.Uint(FlagDdlBatchSize, defaultFlagDdlBatchSize,
"batch size for ddl to create a batch of tabes once.")
"batch size for ddl to create a batch of tables once.")
flags.Bool(flagWithSysTable, false, "whether restore system privilege tables on default setting")
flags.StringArrayP(FlagResetSysUsers, "", []string{"cloud_admin", "root"}, "whether reset these users after restoration")
_ = flags.MarkHidden(FlagResetSysUsers)
_ = flags.MarkHidden(FlagMergeRegionSizeBytes)
_ = flags.MarkHidden(FlagMergeRegionKeyCount)
_ = flags.MarkHidden(FlagPDConcurrency)
Expand Down Expand Up @@ -150,6 +156,10 @@ func (cfg *RestoreCommonConfig) ParseFromFlags(flags *pflag.FlagSet) error {
return errors.Trace(err)
}
}
cfg.ResetSysUsers, err = flags.GetStringArray(FlagResetSysUsers)
if err != nil {
return errors.Trace(err)
}
return errors.Trace(err)
}

Expand Down
18 changes: 18 additions & 0 deletions br/pkg/task/restore_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/pingcap/tidb/br/pkg/storage"
"github.com/pingcap/tidb/br/pkg/summary"
"github.com/pingcap/tidb/br/pkg/utils"
tidbconfig "github.com/pingcap/tidb/config"
"go.uber.org/zap"
)

Expand Down Expand Up @@ -71,6 +72,11 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto
defer mgr.Close()

keepaliveCfg.PermitWithoutStream = true
tc := tidbconfig.GetGlobalConfig()
tc.SkipRegisterToDashboard = true
tc.EnableGlobalKill = false
tidbconfig.StoreGlobalConfig(tc)

client := restore.NewRestoreClient(mgr.GetPDClient(), mgr.GetTLSConfig(), keepaliveCfg, false)

restoreTS, err := client.GetTS(ctx)
Expand Down Expand Up @@ -153,6 +159,18 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto

//TODO: restore volume type into origin type
//ModifyVolume(*ec2.ModifyVolumeInput) (*ec2.ModifyVolumeOutput, error) by backupmeta
// this is used for cloud restoration
err = client.Init(g, mgr.GetStorage())
if err != nil {
return errors.Trace(err)
}
defer client.Close()
log.Info("start to clear system user for cloud")
err = client.ClearSystemUsers(ctx, cfg.ResetSysUsers)

if err != nil {
return errors.Trace(err)
}

// since we cannot reset tiflash automaticlly. so we should start it manually
if err = client.ResetTiFlashReplicas(ctx, g, mgr.GetStorage()); err != nil {
Expand Down

0 comments on commit be31b6c

Please sign in to comment.