Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

br: reset cloud_admin and root after ebs restoration (#39986) #40448

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions br/pkg/restore/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ go_test(
"search_test.go",
"split_test.go",
"stream_metas_test.go",
"systable_restore_test.go",
"util_test.go",
],
embed = [":restore"],
Expand Down
103 changes: 90 additions & 13 deletions br/pkg/restore/systable_restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ import (
"go.uber.org/zap"
)

const sysUserTableName = "user"
const (
rootUser = "root"
sysUserTableName = "user"
cloudAdminUser = "cloud_admin"
)

var statsTables = map[string]struct{}{
"stats_buckets": {},
Expand Down Expand Up @@ -51,14 +55,14 @@ var unRecoverableTable = map[string]struct{}{
// skip clearing or restoring 'cloud_admin'@'%' which is a special
// user on TiDB Cloud
var sysPrivilegeTableMap = map[string]string{
"user": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0
"db": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0
"tables_priv": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0
"columns_priv": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0
"default_roles": "not (user = 'cloud_admin' and host = '%')", // since v3.0.0
"role_edges": "not (to_user = 'cloud_admin' and to_host = '%')", // since v3.0.0
"global_priv": "not (user = 'cloud_admin' and host = '%')", // since v3.0.8
"global_grants": "not (user = 'cloud_admin' and host = '%')", // since v5.0.3
"user": "(user = '%s' and host = '%%')", // since v1.0.0
"db": "(user = '%s' and host = '%%')", // since v1.0.0
"tables_priv": "(user = '%s' and host = '%%')", // since v1.0.0
"columns_priv": "(user = '%s' and host = '%%')", // since v1.0.0
"default_roles": "(user = '%s' and host = '%%')", // since v3.0.0
"role_edges": "(to_user = '%s' and to_host = '%%')", // since v3.0.0
"global_priv": "(user = '%s' and host = '%%')", // since v3.0.8
"global_grants": "(user = '%s' and host = '%%')", // since v5.0.3
}

func isUnrecoverableTable(tableName string) bool {
Expand All @@ -71,6 +75,78 @@ func isStatsTable(tableName string) bool {
return ok
}

func generateResetSQLs(db *database, resetUsers []string) []string {
if db.Name.L != mysql.SystemDB {
return nil
}
sqls := make([]string, 0, 10)
// we only need reset root password once
rootReset := false
for tableName := range db.ExistingTables {
if sysPrivilegeTableMap[tableName] != "" {
for _, name := range resetUsers {
if strings.ToLower(name) == rootUser {
if !rootReset {
updateSQL := fmt.Sprintf("UPDATE %s.%s SET authentication_string='',"+
" Shutdown_priv='Y',"+
" Config_priv='Y'"+
" WHERE USER='root' AND Host='%%';",
db.Name.L, sysUserTableName)
sqls = append(sqls, updateSQL)
rootReset = true
} else {
continue
}
} else {
/* #nosec G202: SQL string concatenation */
whereClause := fmt.Sprintf("WHERE "+sysPrivilegeTableMap[tableName], name)
deleteSQL := fmt.Sprintf("DELETE FROM %s %s;",
utils.EncloseDBAndTable(db.Name.L, tableName), whereClause)
sqls = append(sqls, deleteSQL)
}
}
}
}
return sqls
}

// ClearSystemUsers is used for volume-snapshot restoration.
// because we can not support restore user in some scenarios, for example in cloud.
// we'd better use this function to drop cloud_admin user after volume-snapshot restore.
func (rc *Client) ClearSystemUsers(ctx context.Context, resetUsers []string) error {
sysDB := mysql.SystemDB
db, ok := rc.getDatabaseByName(sysDB)
if !ok {
log.Warn("target database not exist, aborting", zap.String("database", sysDB))
return nil
}
execSQL := func(sql string) error {
// SQLs here only contain table name and database name, seems it is no need to redact them.
if err := rc.db.se.Execute(ctx, sql); err != nil {
log.Warn("failed to clear system users",
zap.Stringer("database", db.Name),
zap.String("sql", sql),
zap.Error(err),
)
return berrors.ErrUnknown.Wrap(err).GenWithStack("failed to execute %s", sql)
}
log.Info("successfully clear system users after restoration",
zap.Stringer("database", db.Name),
zap.String("sql", sql),
)
return nil
}

sqls := generateResetSQLs(db, resetUsers)
for _, sql := range sqls {
log.Info("reset system user for cloud", zap.String("sql", sql))
if err := execSQL(sql); err != nil {
return err
}
}
return nil
}

// RestoreSystemSchemas restores the system schema(i.e. the `mysql` schema).
// Detail see https://github.com/pingcap/br/issues/679#issuecomment-762592254.
func (rc *Client) RestoreSystemSchemas(ctx context.Context, f filter.Filter) {
Expand Down Expand Up @@ -203,14 +279,15 @@ func (rc *Client) replaceTemporaryTableToSystable(ctx context.Context, ti *model
}

if db.ExistingTables[tableName] != nil {
whereClause := ""
whereNotClause := ""
if rc.fullClusterRestore && sysPrivilegeTableMap[tableName] != "" {
// cloud_admin is a special user on tidb cloud, need to skip it.
whereClause = fmt.Sprintf("WHERE %s", sysPrivilegeTableMap[tableName])
/* #nosec G202: SQL string concatenation */
whereNotClause = fmt.Sprintf("WHERE NOT "+sysPrivilegeTableMap[tableName], cloudAdminUser)
log.Info("full cluster restore, delete existing data",
zap.String("table", tableName), zap.Stringer("schema", db.Name))
deleteSQL := fmt.Sprintf("DELETE FROM %s %s;",
utils.EncloseDBAndTable(db.Name.L, tableName), whereClause)
utils.EncloseDBAndTable(db.Name.L, tableName), whereNotClause)
if err := execSQL(deleteSQL); err != nil {
return err
}
Expand All @@ -228,7 +305,7 @@ func (rc *Client) replaceTemporaryTableToSystable(ctx context.Context, ti *model
utils.EncloseDBAndTable(db.Name.L, tableName),
colListStr, colListStr,
utils.EncloseDBAndTable(db.TemporaryName.L, tableName),
whereClause)
whereNotClause)
return execSQL(replaceIntoSQL)
}

Expand Down
72 changes: 72 additions & 0 deletions br/pkg/restore/systable_restore_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.

package restore

import (
"regexp"
"testing"

"github.com/pingcap/tidb/br/pkg/utils"
"github.com/pingcap/tidb/parser/model"
"github.com/stretchr/testify/require"
)

func testTableInfo(name string) *model.TableInfo {
return &model.TableInfo{
Name: model.NewCIStr(name),
}
}

func TestGenerateResetSQL(t *testing.T) {
// case #1: ignore non-mysql databases
mockDB := &database{
ExistingTables: map[string]*model.TableInfo{},
Name: model.NewCIStr("non-mysql"),
TemporaryName: utils.TemporaryDBName("non-mysql"),
}
for name := range sysPrivilegeTableMap {
mockDB.ExistingTables[name] = testTableInfo(name)
}
resetUsers := []string{"cloud_admin", "root"}
require.Equal(t, 0, len(generateResetSQLs(mockDB, resetUsers)))

// case #2: ignore non expected table
mockDB = &database{
ExistingTables: map[string]*model.TableInfo{},
Name: model.NewCIStr("mysql"),
TemporaryName: utils.TemporaryDBName("mysql"),
}
for name := range sysPrivilegeTableMap {
name += "non_available"
mockDB.ExistingTables[name] = testTableInfo(name)
}
resetUsers = []string{"cloud_admin", "root"}
require.Equal(t, 0, len(generateResetSQLs(mockDB, resetUsers)))

// case #3: only reset cloud admin account
for name := range sysPrivilegeTableMap {
mockDB.ExistingTables[name] = testTableInfo(name)
}
resetUsers = []string{"cloud_admin"}
sqls := generateResetSQLs(mockDB, resetUsers)
require.Equal(t, 8, len(sqls))
for _, sql := range sqls {
// for cloud_admin we only generate DELETE sql
require.Regexp(t, regexp.MustCompile("DELETE*"), sql)
}

// case #4: reset cloud admin/other account
resetUsers = []string{"cloud_admin", "cloud_other"}
sqls = generateResetSQLs(mockDB, resetUsers)
require.Equal(t, 16, len(sqls))
for _, sql := range sqls {
// for cloud_admin/cloud_other we only generate DELETE sql
require.Regexp(t, regexp.MustCompile("DELETE*"), sql)
}

// case #5: reset cloud admin && root account
resetUsers = []string{"cloud_admin", "root"}
sqls = generateResetSQLs(mockDB, resetUsers)
// 8 DELETE sqls for cloud admin and 1 UPDATE sql for root
require.Equal(t, 9, len(sqls))
}
14 changes: 12 additions & 2 deletions br/pkg/task/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ const (
FlagPiTRBatchSize = "pitr-batch-size"
FlagPiTRConcurrency = "pitr-concurrency"

FlagResetSysUsers = "reset-sys-users"

defaultPiTRBatchCount = 8
defaultPiTRBatchSize = 16 * 1024 * 1024
defaultRestoreConcurrency = 128
Expand Down Expand Up @@ -93,6 +95,8 @@ type RestoreCommonConfig struct {

// determines whether enable restore sys table on default, see fullClusterRestore in restore/client.go
WithSysTable bool `json:"with-sys-table" toml:"with-sys-table"`

ResetSysUsers []string `json:"reset-sys-users" toml:"reset-sys-users"`
}

// adjust adjusts the abnormal config value in the current config.
Expand All @@ -118,10 +122,12 @@ func DefineRestoreCommonFlags(flags *pflag.FlagSet) {
flags.Uint(FlagPDConcurrency, defaultPDConcurrency,
"concurrency pd-relative operations like split & scatter.")
flags.Duration(FlagBatchFlushInterval, defaultBatchFlushInterval,
"after how long a restore batch would be auto sended.")
"after how long a restore batch would be auto sent.")
flags.Uint(FlagDdlBatchSize, defaultFlagDdlBatchSize,
"batch size for ddl to create a batch of tabes once.")
"batch size for ddl to create a batch of tables once.")
flags.Bool(flagWithSysTable, false, "whether restore system privilege tables on default setting")
flags.StringArrayP(FlagResetSysUsers, "", []string{"cloud_admin", "root"}, "whether reset these users after restoration")
_ = flags.MarkHidden(FlagResetSysUsers)
_ = flags.MarkHidden(FlagMergeRegionSizeBytes)
_ = flags.MarkHidden(FlagMergeRegionKeyCount)
_ = flags.MarkHidden(FlagPDConcurrency)
Expand Down Expand Up @@ -150,6 +156,10 @@ func (cfg *RestoreCommonConfig) ParseFromFlags(flags *pflag.FlagSet) error {
return errors.Trace(err)
}
}
cfg.ResetSysUsers, err = flags.GetStringArray(FlagResetSysUsers)
if err != nil {
return errors.Trace(err)
}
return errors.Trace(err)
}

Expand Down
18 changes: 18 additions & 0 deletions br/pkg/task/restore_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/pingcap/tidb/br/pkg/storage"
"github.com/pingcap/tidb/br/pkg/summary"
"github.com/pingcap/tidb/br/pkg/utils"
tidbconfig "github.com/pingcap/tidb/config"
"go.uber.org/zap"
)

Expand Down Expand Up @@ -71,6 +72,11 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto
defer mgr.Close()

keepaliveCfg.PermitWithoutStream = true
tc := tidbconfig.GetGlobalConfig()
tc.SkipRegisterToDashboard = true
tc.EnableGlobalKill = false
tidbconfig.StoreGlobalConfig(tc)

client := restore.NewRestoreClient(mgr.GetPDClient(), mgr.GetTLSConfig(), keepaliveCfg, false)

restoreTS, err := client.GetTS(ctx)
Expand Down Expand Up @@ -153,6 +159,18 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto

//TODO: restore volume type into origin type
//ModifyVolume(*ec2.ModifyVolumeInput) (*ec2.ModifyVolumeOutput, error) by backupmeta
// this is used for cloud restoration
err = client.Init(g, mgr.GetStorage())
if err != nil {
return errors.Trace(err)
}
defer client.Close()
log.Info("start to clear system user for cloud")
err = client.ClearSystemUsers(ctx, cfg.ResetSysUsers)

if err != nil {
return errors.Trace(err)
}

progress.Close()
summary.CollectDuration("restore duration", time.Since(startAll))
Expand Down