Skip to content

Commit

Permalink
check: remove orphan peer only when the peers is greater than the rul…
Browse files Browse the repository at this point in the history
…e count (#7581)

close #7584

The healthy orphan peer should be the last one to be removed only if there are extra peers to keep the high availablility.

Signed-off-by: bufferflies <1045931706@qq.com>

Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
  • Loading branch information
bufferflies and ti-chi-bot[bot] authored Dec 20, 2023
1 parent cfbc9b9 commit 48dbce1
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 1 deletion.
4 changes: 3 additions & 1 deletion pkg/schedule/checker/rule_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,7 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg
}
}

extra := fit.ExtraCount()
// If hasUnhealthyFit is true, try to remove unhealthy orphan peers only if number of OrphanPeers is >= 2.
// Ref https://github.com/tikv/pd/issues/4045
if len(fit.OrphanPeers) >= 2 {
Expand All @@ -576,7 +577,8 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg
ruleCheckerRemoveOrphanPeerCounter.Inc()
return operator.CreateRemovePeerOperator("remove-unhealthy-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId)
}
if hasHealthPeer {
// The healthy orphan peer can be removed to keep the high availability only if the peer count is greater than the rule requirement.
if hasHealthPeer && extra > 0 {
// there already exists a healthy orphan peer, so we can remove other orphan Peers.
ruleCheckerRemoveOrphanPeerCounter.Inc()
// if there exists a disconnected orphan peer, we will pick it to remove firstly.
Expand Down
36 changes: 36 additions & 0 deletions pkg/schedule/checker/rule_checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2029,3 +2029,39 @@ func (suite *ruleCheckerTestAdvancedSuite) TestReplaceAnExistingPeerCases() {
suite.ruleManager.DeleteGroupBundle(groupName, false)
}
}

func (suite *ruleCheckerTestSuite) TestRemoveOrphanPeer() {
suite.cluster.AddLabelsStore(1, 1, map[string]string{"zone": "z1", "host": "h1"})
suite.cluster.AddLabelsStore(2, 1, map[string]string{"zone": "z1", "host": "h1"})
suite.cluster.AddLabelsStore(3, 1, map[string]string{"zone": "z1", "host": "h1"})
suite.cluster.AddLabelsStore(4, 1, map[string]string{"zone": "z2", "host": "h1"})
suite.cluster.AddLabelsStore(5, 1, map[string]string{"zone": "z2", "host": "h2"})
suite.cluster.AddLabelsStore(6, 1, map[string]string{"zone": "z2", "host": "h2"})
rule := &placement.Rule{
GroupID: "pd",
ID: "test2",
Role: placement.Voter,
Count: 3,
LabelConstraints: []placement.LabelConstraint{
{
Key: "zone",
Op: placement.In,
Values: []string{"z2"},
},
},
}
suite.ruleManager.SetRule(rule)
suite.ruleManager.DeleteRule("pd", "default")

// case1: regionA has 3 peers but not extra peer can be removed, so it needs to add peer first
suite.cluster.AddLeaderRegionWithRange(1, "200", "300", 1, 2, 3)
op := suite.rc.Check(suite.cluster.GetRegion(1))
suite.NotNil(op)
suite.Equal("add-rule-peer", op.Desc())

// case2: regionB has 4 peers and one extra peer can be removed, so it needs to remove extra peer first
suite.cluster.AddLeaderRegionWithRange(2, "300", "400", 1, 2, 3, 4)
op = suite.rc.Check(suite.cluster.GetRegion(2))
suite.NotNil(op)
suite.Equal("remove-orphan-peer", op.Desc())
}
9 changes: 9 additions & 0 deletions pkg/schedule/placement/fit.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,15 @@ func (f *RegionFit) IsSatisfied() bool {
return len(f.OrphanPeers) == 0
}

// ExtraCount return the extra count.
func (f *RegionFit) ExtraCount() int {
desired := 0
for _, r := range f.RuleFits {
desired += r.Rule.Count
}
return len(f.regionStores) - desired
}

// GetRuleFit returns the RuleFit that contains the peer.
func (f *RegionFit) GetRuleFit(peerID uint64) *RuleFit {
for _, rf := range f.RuleFits {
Expand Down

0 comments on commit 48dbce1

Please sign in to comment.