-
Notifications
You must be signed in to change notification settings - Fork 101
restore: remove tiflash replica before restore #194
Changes from 3 commits
5327efc
abef434
c0e26ca
8043a9c
410c844
3592c89
901f7b6
a604cf5
44055d1
65e47ff
9a98ffc
63c1d0f
158eea4
5cb4c0e
338c4cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -84,13 +84,13 @@ func (db *DB) CreateDatabase(ctx context.Context, schema *model.DBInfo) error { | |
|
||
// CreateTable executes a CREATE TABLE SQL. | ||
func (db *DB) CreateTable(ctx context.Context, table *utils.Table) error { | ||
schema := table.Info | ||
createSQL, err := db.se.ShowCreateTable(schema, newIDAllocator(schema.AutoIncID)) | ||
tableInfo := table.Info | ||
createSQL, err := db.se.ShowCreateTable(tableInfo, newIDAllocator(tableInfo.AutoIncID)) | ||
if err != nil { | ||
log.Error( | ||
"build create table SQL failed", | ||
zap.Stringer("db", table.Db.Name), | ||
zap.Stringer("table", schema.Name), | ||
zap.Stringer("table", tableInfo.Name), | ||
zap.Error(err)) | ||
return errors.Trace(err) | ||
} | ||
|
@@ -119,8 +119,8 @@ func (db *DB) CreateTable(ctx context.Context, table *utils.Table) error { | |
} | ||
alterAutoIncIDSQL := fmt.Sprintf( | ||
"alter table %s auto_increment = %d", | ||
utils.EncloseName(schema.Name.O), | ||
schema.AutoIncID) | ||
utils.EncloseName(tableInfo.Name.O), | ||
tableInfo.AutoIncID) | ||
err = db.se.Execute(ctx, alterAutoIncIDSQL) | ||
if err != nil { | ||
log.Error("alter AutoIncID failed", | ||
|
@@ -129,6 +129,21 @@ func (db *DB) CreateTable(ctx context.Context, table *utils.Table) error { | |
zap.Stringer("table", table.Info.Name), | ||
zap.Error(err)) | ||
} | ||
|
||
// TODO: remove this after tiflash supports restore | ||
removeTiFlashSQL := fmt.Sprintf( | ||
"alter table %s set tiflash replica 0", | ||
utils.EncloseName(tableInfo.Name.O), | ||
) | ||
err = db.se.Execute(ctx, removeTiFlashSQL) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if ignore this error when tidb not support set tiflash replica There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would fail, do we need to support the versions of tidb which don't support tiflash? |
||
if err != nil { | ||
log.Error("remove tiflash replica failed", | ||
zap.String("query", removeTiFlashSQL), | ||
zap.Stringer("db", table.Db.Name), | ||
zap.Stringer("table", table.Info.Name), | ||
zap.Error(err)) | ||
} | ||
|
||
return errors.Trace(err) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,8 +32,11 @@ const ( | |
ScatterWaitMaxRetryTimes = 64 | ||
ScatterWaitInterval = 50 * time.Millisecond | ||
ScatterMaxWaitInterval = time.Second | ||
|
||
ScatterWaitUpperInterval = 180 * time.Second | ||
|
||
RejectStoreCheckRetryTimes = 64 | ||
RejectStoreCheckInterval = 100 * time.Millisecond | ||
RejectStoreMaxCheckInterval = 2 * time.Second | ||
) | ||
|
||
// RegionSplitter is a executor of region split by rules. | ||
|
@@ -60,16 +63,17 @@ func (rs *RegionSplitter) Split( | |
ctx context.Context, | ||
ranges []rtree.Range, | ||
rewriteRules *RewriteRules, | ||
rejectStores []uint64, | ||
onSplit OnSplitFunc, | ||
) error { | ||
if len(ranges) == 0 { | ||
return nil | ||
} | ||
startTime := time.Now() | ||
// Sort the range for getting the min and max key of the ranges | ||
sortedRanges, err := sortRanges(ranges, rewriteRules) | ||
if err != nil { | ||
return errors.Trace(err) | ||
sortedRanges, errSplit := sortRanges(ranges, rewriteRules) | ||
if errSplit != nil { | ||
return errors.Trace(errSplit) | ||
} | ||
minKey := codec.EncodeBytes([]byte{}, sortedRanges[0].StartKey) | ||
maxKey := codec.EncodeBytes([]byte{}, sortedRanges[len(sortedRanges)-1].EndKey) | ||
|
@@ -91,12 +95,14 @@ func (rs *RegionSplitter) Split( | |
} | ||
interval := SplitRetryInterval | ||
scatterRegions := make([]*RegionInfo, 0) | ||
allRegions := make([]*RegionInfo, 0) | ||
SplitRegions: | ||
for i := 0; i < SplitRetryTimes; i++ { | ||
regions, err1 := paginateScanRegion(ctx, rs.client, minKey, maxKey, scanRegionPaginationLimit) | ||
if err1 != nil { | ||
return errors.Trace(err1) | ||
regions, errScan := paginateScanRegion(ctx, rs.client, minKey, maxKey, scanRegionPaginationLimit) | ||
if errScan != nil { | ||
return errors.Trace(errScan) | ||
} | ||
allRegions = append(allRegions, regions...) | ||
if len(regions) == 0 { | ||
log.Warn("cannot scan any region") | ||
return nil | ||
|
@@ -109,24 +115,24 @@ SplitRegions: | |
for regionID, keys := range splitKeyMap { | ||
var newRegions []*RegionInfo | ||
region := regionMap[regionID] | ||
newRegions, err = rs.splitAndScatterRegions(ctx, region, keys) | ||
if err != nil { | ||
if strings.Contains(err.Error(), "no valid key") { | ||
newRegions, errSplit = rs.splitAndScatterRegions(ctx, region, keys) | ||
if errSplit != nil { | ||
if strings.Contains(errSplit.Error(), "no valid key") { | ||
for _, key := range keys { | ||
log.Error("no valid key", | ||
zap.Binary("startKey", region.Region.StartKey), | ||
zap.Binary("endKey", region.Region.EndKey), | ||
zap.Binary("key", codec.EncodeBytes([]byte{}, key))) | ||
} | ||
return errors.Trace(err) | ||
return errors.Trace(errSplit) | ||
} | ||
interval = 2 * interval | ||
if interval > SplitMaxRetryInterval { | ||
interval = SplitMaxRetryInterval | ||
} | ||
time.Sleep(interval) | ||
if i > 3 { | ||
log.Warn("splitting regions failed, retry it", zap.Error(err), zap.ByteStrings("keys", keys)) | ||
log.Warn("splitting regions failed, retry it", zap.Error(errSplit), zap.ByteStrings("keys", keys)) | ||
} | ||
continue SplitRegions | ||
} | ||
|
@@ -136,10 +142,27 @@ SplitRegions: | |
} | ||
break | ||
} | ||
if err != nil { | ||
return errors.Trace(err) | ||
if errSplit != nil { | ||
return errors.Trace(errSplit) | ||
} | ||
log.Info("splitting regions done, wait for scattering regions", | ||
if len(rejectStores) > 0 { | ||
startTime = time.Now() | ||
log.Info("start to wait for removing rejected stores", zap.Uint64s("rejectStores", rejectStores)) | ||
storeMap := make(map[uint64]bool) | ||
for _, storeID := range rejectStores { | ||
storeMap[storeID] = true | ||
} | ||
for _, region := range allRegions { | ||
if !rs.waitForRemoveRejectStores(ctx, region, storeMap) { | ||
log.Error("waiting for removing rejected stores failed", | ||
zap.Stringer("region", region.Region)) | ||
return errors.New("waiting for removing rejected stores failed") | ||
} | ||
} | ||
log.Info("waiting for removing rejected stores done", | ||
zap.Int("regions", len(allRegions)), zap.Duration("take", time.Since(startTime))) | ||
} | ||
log.Info("start to wait for scattering regions", | ||
zap.Int("regions", len(scatterRegions)), zap.Duration("take", time.Since(startTime))) | ||
startTime = time.Now() | ||
scatterCount := 0 | ||
|
@@ -192,6 +215,30 @@ func (rs *RegionSplitter) isScatterRegionFinished(ctx context.Context, regionID | |
return ok, nil | ||
} | ||
|
||
func (rs *RegionSplitter) hasRejectStorePeer( | ||
ctx context.Context, | ||
regionID uint64, | ||
rejectStores map[uint64]bool, | ||
) (bool, error) { | ||
regionInfo, err := rs.client.GetRegionByID(ctx, regionID) | ||
if err != nil { | ||
return false, err | ||
} | ||
if regionInfo == nil { | ||
return false, nil | ||
} | ||
for _, peer := range regionInfo.Region.GetPeers() { | ||
if rejectStores[peer.GetStoreId()] { | ||
return true, nil | ||
} | ||
} | ||
retryTimes := ctx.Value(retryTimes).(int) | ||
if retryTimes > 10 { | ||
log.Warn("get region info", zap.Stringer("region", regionInfo.Region)) | ||
} | ||
Comment on lines
+231
to
+234
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you handle the retry outside of the method? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, the region info would be printed here. |
||
return false, nil | ||
} | ||
|
||
func (rs *RegionSplitter) waitForSplit(ctx context.Context, regionID uint64) { | ||
interval := SplitCheckInterval | ||
for i := 0; i < SplitCheckMaxRetryTimes; i++ { | ||
|
@@ -237,6 +284,36 @@ func (rs *RegionSplitter) waitForScatterRegion(ctx context.Context, regionInfo * | |
} | ||
} | ||
|
||
func (rs *RegionSplitter) waitForRemoveRejectStores( | ||
ctx context.Context, | ||
regionInfo *RegionInfo, | ||
rejectStores map[uint64]bool, | ||
) bool { | ||
interval := RejectStoreCheckInterval | ||
regionID := regionInfo.Region.GetId() | ||
for i := 0; i < RejectStoreCheckRetryTimes; i++ { | ||
ctx1 := context.WithValue(ctx, retryTimes, i) | ||
ok, err := rs.hasRejectStorePeer(ctx1, regionID, rejectStores) | ||
if err != nil { | ||
log.Warn("wait for rejecting store failed", | ||
zap.Stringer("region", regionInfo.Region), | ||
zap.Error(err)) | ||
return false | ||
} | ||
// Do not have any peer in the rejected store, return true | ||
if !ok { | ||
return true | ||
} | ||
interval = 2 * interval | ||
if interval > RejectStoreMaxCheckInterval { | ||
interval = RejectStoreMaxCheckInterval | ||
} | ||
time.Sleep(interval) | ||
} | ||
|
||
return false | ||
} | ||
|
||
func (rs *RegionSplitter) splitAndScatterRegions( | ||
ctx context.Context, regionInfo *RegionInfo, keys [][]byte, | ||
) ([]*RegionInfo, error) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add an option to
GatAllTiKVStores
to return TiFlash store only?