Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
Merge pull request #247 from github/promotion-rule-prefer-not
Browse files Browse the repository at this point in the history
supporting prefer_not promotion rule
  • Loading branch information
Shlomi Noach authored Aug 9, 2017
2 parents 92aed4a + 2f51863 commit 3b3ed84
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 19 deletions.
6 changes: 3 additions & 3 deletions go/cmd/orchestrator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func main() {
config.RuntimeCLIFlags.BinlogFile = flag.String("binlog", "", "Binary log file name")
config.RuntimeCLIFlags.Statement = flag.String("statement", "", "Statement/hint")
config.RuntimeCLIFlags.GrabElection = flag.Bool("grab-election", false, "Grab leadership (only applies to continuous mode)")
config.RuntimeCLIFlags.PromotionRule = flag.String("promotion-rule", "prefer", "Promotion rule for register-andidate (prefer|neutral|must_not)")
config.RuntimeCLIFlags.PromotionRule = flag.String("promotion-rule", "prefer", "Promotion rule for register-andidate (prefer|neutral|prefer_not|must_not)")
config.RuntimeCLIFlags.Version = flag.Bool("version", false, "Print version and exit")
config.RuntimeCLIFlags.SkipContinuousRegistration = flag.Bool("skip-continuous-registration", false, "Skip cli commands performaing continuous registration (to reduce orchestratrator backend db load")
config.RuntimeCLIFlags.EnableDatabaseUpdate = flag.Bool("enable-database-update", false, "Enable database update, overrides SkipOrchestratorDatabaseUpdate")
Expand All @@ -68,13 +68,13 @@ func main() {
log.Fatalf("-s and -d are synonyms, yet both were specified. You're probably doing the wrong thing.")
}
switch *config.RuntimeCLIFlags.PromotionRule {
case "prefer", "neutral", "must_not":
case "prefer", "neutral", "prefer_not", "must_not":
{
// OK
}
default:
{
log.Fatalf("-promotion-rule only supports prefer|neutral|must_not")
log.Fatalf("-promotion-rule only supports prefer|neutral|prefer_not|must_not")
}
}
if *destination == "" {
Expand Down
3 changes: 0 additions & 3 deletions go/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,9 +459,6 @@ var generateSQLBase = []string{
`
DROP INDEX hostname_port_active_period_uidx ON topology_failure_detection
`,
`
CREATE UNIQUE INDEX hostname_port_active_period_uidx_topology_failure_detection ON topology_failure_detection (hostname, port, in_active_period, end_active_period_unixtime)
`,
`
DROP INDEX in_active_start_period_idx ON topology_failure_detection
`,
Expand Down
14 changes: 14 additions & 0 deletions go/inst/instance_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,20 @@ func ReadClusterCandidateInstances(clusterName string) ([](*Instance), error) {
return readInstancesByCondition(condition, sqlutils.Args(clusterName), "")
}

// ReadClusterNeutralPromotionRuleInstances reads cluster instances whose promotion-rule is marked as 'neutral'
func ReadClusterNeutralPromotionRuleInstances(clusterName string) (neutralInstances [](*Instance), err error) {
instances, err := ReadClusterInstances(clusterName)
if err != nil {
return neutralInstances, err
}
for _, instance := range instances {
if instance.PromotionRule == NeutralPromoteRule {
neutralInstances = append(neutralInstances, instance)
}
}
return neutralInstances, nil
}

// filterOSCInstances will filter the given list such that only replicas fit for OSC control remain.
func filterOSCInstances(instances [](*Instance)) [](*Instance) {
result := [](*Instance){}
Expand Down
63 changes: 60 additions & 3 deletions go/inst/instance_topology_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,20 @@ func TestIsGenerallyValidAsCandidateReplica(t *testing.T) {
}

func TestIsBannedFromBeingCandidateReplica(t *testing.T) {
instances, _ := generateTestInstances()
for _, instance := range instances {
test.S(t).ExpectFalse(IsBannedFromBeingCandidateReplica(instance))
{
instances, _ := generateTestInstances()
for _, instance := range instances {
test.S(t).ExpectFalse(IsBannedFromBeingCandidateReplica(instance))
}
}
{
instances, _ := generateTestInstances()
for _, instance := range instances {
instance.PromotionRule = MustNotPromoteRule
}
for _, instance := range instances {
test.S(t).ExpectTrue(IsBannedFromBeingCandidateReplica(instance))
}
}
}

Expand Down Expand Up @@ -378,3 +389,49 @@ func TestChooseCandidateReplicaPriorityBinlogFormatRowOverrides(t *testing.T) {
test.S(t).ExpectEquals(len(laterReplicas), 3)
test.S(t).ExpectEquals(len(cannotReplicateReplicas), 2)
}

func TestChooseCandidateReplicaMustNotPromoteRule(t *testing.T) {
instances, instancesMap := generateTestInstances()
applyGeneralGoodToGoReplicationParams(instances)
instancesMap[i830Key.StringCode()].PromotionRule = MustNotPromoteRule
instances = sortedReplicas(instances, NoStopReplication)
candidate, aheadReplicas, equalReplicas, laterReplicas, cannotReplicateReplicas, err := chooseCandidateReplica(instances)
test.S(t).ExpectNil(err)
test.S(t).ExpectEquals(candidate.Key, i820Key)
test.S(t).ExpectEquals(len(aheadReplicas), 1)
test.S(t).ExpectEquals(len(equalReplicas), 0)
test.S(t).ExpectEquals(len(laterReplicas), 4)
test.S(t).ExpectEquals(len(cannotReplicateReplicas), 0)
}

func TestChooseCandidateReplicaPreferNotPromoteRule(t *testing.T) {
instances, instancesMap := generateTestInstances()
applyGeneralGoodToGoReplicationParams(instances)
instancesMap[i830Key.StringCode()].PromotionRule = MustNotPromoteRule
instancesMap[i820Key.StringCode()].PromotionRule = PreferNotPromoteRule
instances = sortedReplicas(instances, NoStopReplication)
candidate, aheadReplicas, equalReplicas, laterReplicas, cannotReplicateReplicas, err := chooseCandidateReplica(instances)
test.S(t).ExpectNil(err)
test.S(t).ExpectEquals(candidate.Key, i820Key)
test.S(t).ExpectEquals(len(aheadReplicas), 1)
test.S(t).ExpectEquals(len(equalReplicas), 0)
test.S(t).ExpectEquals(len(laterReplicas), 4)
test.S(t).ExpectEquals(len(cannotReplicateReplicas), 0)
}

func TestChooseCandidateReplicaPreferNotPromoteRule2(t *testing.T) {
instances, instancesMap := generateTestInstances()
applyGeneralGoodToGoReplicationParams(instances)
for _, instance := range instances {
instance.PromotionRule = PreferNotPromoteRule
}
instancesMap[i830Key.StringCode()].PromotionRule = MustNotPromoteRule
instances = sortedReplicas(instances, NoStopReplication)
candidate, aheadReplicas, equalReplicas, laterReplicas, cannotReplicateReplicas, err := chooseCandidateReplica(instances)
test.S(t).ExpectNil(err)
test.S(t).ExpectEquals(candidate.Key, i820Key)
test.S(t).ExpectEquals(len(aheadReplicas), 1)
test.S(t).ExpectEquals(len(equalReplicas), 0)
test.S(t).ExpectEquals(len(laterReplicas), 4)
test.S(t).ExpectEquals(len(cannotReplicateReplicas), 0)
}
4 changes: 2 additions & 2 deletions go/inst/promotion_rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ const (
// It returns an error if there is no known rule by the given name.
func ParseCandidatePromotionRule(ruleName string) (CandidatePromotionRule, error) {
switch ruleName {
case "prefer", "neutral", "must_not":
case "prefer", "neutral", "prefer_not", "must_not":
return CandidatePromotionRule(ruleName), nil
case "must", "prefer_not":
case "must":
return CandidatePromotionRule(""), fmt.Errorf("CandidatePromotionRule: %v not supported yet", ruleName)
default:
return CandidatePromotionRule(""), fmt.Errorf("Invalid CandidatePromotionRule: %v", ruleName)
Expand Down
63 changes: 55 additions & 8 deletions go/logic/topology_recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -612,9 +612,11 @@ func replacePromotedReplicaWithCandidate(topologyRecovery *TopologyRecovery, dea
// The current logic is:
// - 1. we prefer to promote a "is_candidate" which is in the same DC & env as the dead intermediate master (or do nothing if the promtoed replica is such one)
// - 2. we prefer to promote a "is_candidate" which is in the same DC & env as the promoted replica (or do nothing if the promtoed replica is such one)
// - 3. keep to current choice
// - 3. we prefer to promote a "neutral" server over a "prefer_not"
// - 4. keep to current choice
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("checking if should replace promoted replica with a better candidate"))
if candidateInstanceKey == nil {
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("+ checking if promoted replica is the ideal candidate"))
if deadInstance, _, err := inst.ReadInstance(deadInstanceKey); err == nil && deadInstance != nil {
for _, candidateReplica := range candidateReplicas {
if promotedReplica.Key.Equals(&candidateReplica.Key) &&
Expand All @@ -630,20 +632,22 @@ func replacePromotedReplicaWithCandidate(topologyRecovery *TopologyRecovery, dea
// We didn't pick the ideal candidate; let's see if we can replace with a candidate from same DC and ENV
if candidateInstanceKey == nil {
// Try a candidate replica that is in same DC & env as the dead instance
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("+ searching for an ideal candidate"))
if deadInstance, _, err := inst.ReadInstance(deadInstanceKey); err == nil && deadInstance != nil {
for _, candidateReplica := range candidateReplicas {
if candidateReplica.DataCenter == deadInstance.DataCenter &&
candidateReplica.PhysicalEnvironment == deadInstance.PhysicalEnvironment &&
candidateReplica.MasterKey.Equals(&promotedReplica.Key) {
if canTakeOverPromotedServerAsMaster(candidateReplica, promotedReplica) &&
candidateReplica.DataCenter == deadInstance.DataCenter &&
candidateReplica.PhysicalEnvironment == deadInstance.PhysicalEnvironment {
// This would make a great candidate
candidateInstanceKey = &candidateReplica.Key
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("no candidate was offered for %+v but orchestrator picks %+v as candidate replacement, based on being in same DC & env as failed instance", promotedReplica.Key, candidateReplica.Key))
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("no candidate was offered for %+v but orchestrator picks %+v as candidate replacement, based on being in same DC & env as failed instance", *deadInstanceKey, candidateReplica.Key))
}
}
}
}
if candidateInstanceKey == nil {
// We cannot find a candidate in same DC and ENV as dead master
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("+ checking if promoted replica is an OK candidate"))
for _, candidateReplica := range candidateReplicas {
if promotedReplica.Key.Equals(&candidateReplica.Key) {
// Seems like we promoted a candidate replica (though not in same DC and ENV as dead master). Good enough.
Expand All @@ -656,24 +660,54 @@ func replacePromotedReplicaWithCandidate(topologyRecovery *TopologyRecovery, dea
// Still nothing?
if candidateInstanceKey == nil {
// Try a candidate replica that is in same DC & env as the promoted replica (our promoted replica is not an "is_candidate")
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("+ searching for a candidate"))
for _, candidateReplica := range candidateReplicas {
if promotedReplica.DataCenter == candidateReplica.DataCenter &&
promotedReplica.PhysicalEnvironment == candidateReplica.PhysicalEnvironment &&
candidateReplica.MasterKey.Equals(&promotedReplica.Key) {
if canTakeOverPromotedServerAsMaster(candidateReplica, promotedReplica) &&
promotedReplica.DataCenter == candidateReplica.DataCenter &&
promotedReplica.PhysicalEnvironment == candidateReplica.PhysicalEnvironment {
// OK, better than nothing
candidateInstanceKey = &candidateReplica.Key
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("no candidate was offered for %+v but orchestrator picks %+v as candidate replacement, based on being in same DC & env as promoted instance", promotedReplica.Key, candidateReplica.Key))
}
}
}

if promotedReplica.PromotionRule == inst.PreferNotPromoteRule {
neutralReplicas, _ := inst.ReadClusterNeutralPromotionRuleInstances(promotedReplica.ClusterName)

if candidateInstanceKey == nil {
// Still nothing? Then we didn't find a replica marked as "candidate". OK, further down the stream we have:
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("+ searching for a neutral server to replace a prefer_not, in same DC and env"))
for _, neutralReplica := range neutralReplicas {
if canTakeOverPromotedServerAsMaster(neutralReplica, promotedReplica) &&
promotedReplica.DataCenter == neutralReplica.DataCenter &&
promotedReplica.PhysicalEnvironment == neutralReplica.PhysicalEnvironment {
candidateInstanceKey = &neutralReplica.Key
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("no candidate was offered for %+v but orchestrator picks %+v as candidate replacement, based on being in same DC & env as promoted instance that has prefer_not promotion rule", promotedReplica.Key, neutralReplica.Key))
}
}
}
if candidateInstanceKey == nil {
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("+ searching for a neutral server to replace a prefer_not"))
for _, neutralReplica := range neutralReplicas {
if canTakeOverPromotedServerAsMaster(neutralReplica, promotedReplica) {
// OK, better than nothing
candidateInstanceKey = &neutralReplica.Key
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("no candidate was offered for %+v but orchestrator picks %+v as candidate replacement, based on promoted instance having prefer_not promotion rule", promotedReplica.Key, neutralReplica.Key))
}
}
}
}

// So do we have a candidate?
if candidateInstanceKey == nil {
// Found nothing. Stick with promoted replica
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("+ found no server to promote on top promoted replica"))
return promotedReplica, nil
}
if promotedReplica.Key.Equals(candidateInstanceKey) {
// Sanity. It IS the candidate, nothing to promote...
AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("+ sanity check: found our very own server to promote; doing nothing"))
return promotedReplica, nil
}

Expand Down Expand Up @@ -827,6 +861,19 @@ func isGenerallyValidAsWouldBeMaster(replica *inst.Instance, requireLogSlaveUpda
return true
}

func canTakeOverPromotedServerAsMaster(wantToTakeOver *inst.Instance, toBeTakenOver *inst.Instance) bool {
if !isGenerallyValidAsWouldBeMaster(wantToTakeOver, true) {
return false
}
if !wantToTakeOver.MasterKey.Equals(&toBeTakenOver.Key) {
return false
}
if canReplicate, _ := toBeTakenOver.CanReplicateFrom(wantToTakeOver); !canReplicate {
return false
}
return true
}

func GetBestMasterReplacementFromAmongItsReplicas(topologyRecovery *TopologyRecovery, master *inst.Instance, replicas [](*inst.Instance), requireLogSlaveUpdates bool) (replacement *inst.Instance, err error) {
// Sanity:
for _, replica := range replicas {
Expand Down
3 changes: 3 additions & 0 deletions resources/public/js/orchestrator.js
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,9 @@ function renderInstanceElement(popoverElement, instance, renderType) {
if (instance.IsCandidate) {
popoverElement.find("h3 div.pull-right").prepend('<span class="glyphicon glyphicon-heart" title="Candidate"></span> ');
}
if (instance.PromotionRule == "prefer_not") {
popoverElement.find("h3 div.pull-right").prepend('<span class="glyphicon glyphicon-thumbs-down" title="Prefer not promote"></span> ');
}
if (instance.PromotionRule == "must_not") {
popoverElement.find("h3 div.pull-right").prepend('<span class="glyphicon glyphicon-ban-circle" title="Must not promote"></span> ');
}
Expand Down

0 comments on commit 3b3ed84

Please sign in to comment.