From 13191fa15cba13cda72b0ddcc0496e6d85e185de Mon Sep 17 00:00:00 2001
From: Shlomi Noach <shlomi-noach@github.com>
Date: Wed, 7 Jun 2017 16:58:49 +0300
Subject: [PATCH 1/6] support for force-master-failover command

---
 go/app/cli.go                 |  9 +++++++++
 go/logic/topology_recovery.go | 27 +++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/go/app/cli.go b/go/app/cli.go
index 5f6037ee4..755b37b8d 100644
--- a/go/app/cli.go
+++ b/go/app/cli.go
@@ -1384,6 +1384,15 @@ func Cli(command string, strict bool, instance string, destination string, owner
 				fmt.Println(promotedInstanceKey.DisplayString())
 			}
 		}
+	case registerCliCommand("force-master-failover", "Recovery", `Forcibly discard master and initiate a failover, even if orchestrator doesn't see a problem. This command lets orchestrator choose the replacement master`):
+		{
+			clusterName := getClusterName(clusterAlias, instanceKey)
+			topologyRecovery, err := logic.ForceMasterFailover(clusterName)
+			if err != nil {
+				log.Fatale(err)
+			}
+			fmt.Println(topologyRecovery.SuccessorKey.DisplayString())
+		}
 	case registerCliCommand("force-master-takeover", "Recovery", `Forcibly discard master and promote another (direct child) instance instead, even if everything is running well`):
 		{
 			clusterName := getClusterName(clusterAlias, instanceKey)
diff --git a/go/logic/topology_recovery.go b/go/logic/topology_recovery.go
index 7737e5810..ffd6d38f9 100644
--- a/go/logic/topology_recovery.go
+++ b/go/logic/topology_recovery.go
@@ -1403,6 +1403,33 @@ func ForceExecuteRecovery(clusterName string, analysisCode inst.AnalysisCode, fa
 	return executeCheckAndRecoverFunction(analysisEntry, candidateInstanceKey, true, skipProcesses)
 }
 
+// ForceMasterFailover *trusts* master of given cluster is dead and initiates a failover
+func ForceMasterFailover(clusterName string) (topologyRecovery *TopologyRecovery, err error) {
+	clusterMasters, err := inst.ReadClusterWriteableMaster(clusterName)
+	if err != nil {
+		return nil, fmt.Errorf("Cannot deduce cluster master for %+v", clusterName)
+	}
+	if len(clusterMasters) != 1 {
+		return nil, fmt.Errorf("Cannot deduce cluster master for %+v", clusterName)
+	}
+	clusterMaster := clusterMasters[0]
+
+	recoveryAttempted, topologyRecovery, err := ForceExecuteRecovery(clusterName, inst.DeadMaster, &clusterMaster.Key, nil, false)
+	if err != nil {
+		return nil, err
+	}
+	if !recoveryAttempted {
+		return nil, fmt.Errorf("Unexpected error: recovery not attempted. This should not happen")
+	}
+	if topologyRecovery == nil {
+		return nil, fmt.Errorf("Recovery attempted but with no results. This should not happen")
+	}
+	if topologyRecovery.SuccessorKey == nil {
+		return nil, fmt.Errorf("Recovery attempted yet no replica promoted")
+	}
+	return topologyRecovery, nil
+}
+
 // ForceMasterTakeover *trusts* master of given cluster is dead and fails over to designated instance,
 // which has to be its direct child.
 func ForceMasterTakeover(clusterName string, destination *inst.Instance) (topologyRecovery *TopologyRecovery, err error) {

From 20ec58910aa14fa8142d9f4403f73eea4628dd51 Mon Sep 17 00:00:00 2001
From: Shlomi Noach <shlomi-noach@github.com>
Date: Thu, 8 Jun 2017 09:12:19 +0300
Subject: [PATCH 2/6] /api/force-master-failover added

---
 go/http/api.go | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/go/http/api.go b/go/http/api.go
index 83f504d84..14d62c0fa 100644
--- a/go/http/api.go
+++ b/go/http/api.go
@@ -2186,6 +2186,37 @@ func (this *HttpAPI) Recover(params martini.Params, r render.Render, req *http.R
 	}
 }
 
+// ForceMasterFailover fails over a master (even if there's no particular problem with the master)
+func (this *HttpAPI) ForceMasterFailover(params martini.Params, r render.Render, req *http.Request, user auth.User) {
+	if !isAuthorizedForAction(req, user) {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: "Unauthorized"})
+		return
+	}
+	instanceKey, err := this.getInstanceKey(params["host"], params["port"])
+	if err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()})
+		return
+	}
+	instance, found, err := inst.ReadInstance(&instanceKey)
+	if (!found) || (err != nil) {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: fmt.Sprintf("Cannot read instance: %+v", instanceKey)})
+		return
+	}
+
+	topologyRecovery, err := logic.ForceMasterFailover(instance.ClusterName)
+	if err != nil {
+		r.JSON(200, &APIResponse{Code: ERROR, Message: err.Error()})
+		return
+	}
+	fmt.Println(topologyRecovery.SuccessorKey.DisplayString())
+
+	if topologyRecovery.SuccessorKey != nil {
+		r.JSON(200, &APIResponse{Code: OK, Message: "Master failed over", Details: topologyRecovery})
+	} else {
+		r.JSON(200, &APIResponse{Code: OK, Message: "Master not failed over", Details: topologyRecovery})
+	}
+}
+
 // Registers promotion preference for given instance
 func (this *HttpAPI) RegisterCandidate(params martini.Params, r render.Render, req *http.Request, user auth.User) {
 	if !isAuthorizedForAction(req, user) {
@@ -2574,6 +2605,7 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) {
 	this.registerRequest(m, "recover/:host/:port/:candidateHost/:candidatePort", this.Recover)
 	this.registerRequest(m, "recover-lite/:host/:port", this.RecoverLite)
 	this.registerRequest(m, "recover-lite/:host/:port/:candidateHost/:candidatePort", this.RecoverLite)
+	this.registerRequest(m, "force-master-failover/:host/:port", this.ForceMasterFailover)
 	this.registerRequest(m, "register-candidate/:host/:port/:promotionRule", this.RegisterCandidate)
 	this.registerRequest(m, "automated-recovery-filters", this.AutomatedRecoveryFilters)
 	this.registerRequest(m, "audit-failure-detection", this.AuditFailureDetection)

From 58e8922d42d9026270fcc933901140b7f4e8bcf0 Mon Sep 17 00:00:00 2001
From: Shlomi Noach <shlomi-noach@github.com>
Date: Thu, 8 Jun 2017 09:12:30 +0300
Subject: [PATCH 3/6] added web interface for failing over master

---
 resources/public/js/cluster.js | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/resources/public/js/cluster.js b/resources/public/js/cluster.js
index ba6d938bc..6c32967b3 100644
--- a/resources/public/js/cluster.js
+++ b/resources/public/js/cluster.js
@@ -23,6 +23,10 @@ function Cluster() {
       apiCommand("/api/recover-lite/" + _instancesMap[e.draggedNodeId].Key.Hostname + "/" + _instancesMap[e.draggedNodeId].Key.Port);
       return true;
     },
+    "force-master-failover": function(e) {
+      apiCommand("/api/force-master-failover/" + _instancesMap[e.draggedNodeId].Key.Hostname + "/" + _instancesMap[e.draggedNodeId].Key.Port);
+      return true;
+    },
     "match-up-replicas": function(e) {
       apiCommand("/api/match-up-replicas/" + _instancesMap[e.draggedNodeId].Key.Hostname + "/" + _instancesMap[e.draggedNodeId].Key.Port);
       return true;
@@ -1366,6 +1370,10 @@ function Cluster() {
     recoveryListing.append('<li><a href="#" data-btn="auto-lite" data-command="recover-auto-lite">Auto (do not execute hooks/processes)</a></li>');
     recoveryListing.append('<li role="separator" class="divider"></li>');
 
+    if (instance.isMaster) {
+      recoveryListing.append('<li><a href="#" data-btn="force-master-failover" data-command="force-master-failover"><div class="glyphicon glyphicon-exclamation-sign text-danger"></div> <span class="text-danger">Force fail over <strong>now</strong> (even if normal handling would not fail over)</span></a></li>');
+      recoveryListing.append('<li role="separator" class="divider"></li>');
+    }
     if (!instance.isMaster) {
       recoveryListing.append('<li><a href="#" data-btn="match-up-replicas" data-command="match-up-replicas">Match up replicas to <code>' + instance.masterTitle + '</code></a></li>');
     }

From 0970018991f23b6148d1941f50fc0a53289dd65c Mon Sep 17 00:00:00 2001
From: Shlomi Noach <shlomi-noach@github.com>
Date: Thu, 8 Jun 2017 10:45:47 +0300
Subject: [PATCH 4/6] fixing listener on recover listing

---
 resources/public/js/cluster.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/resources/public/js/cluster.js b/resources/public/js/cluster.js
index 6c32967b3..9f5cfa419 100644
--- a/resources/public/js/cluster.js
+++ b/resources/public/js/cluster.js
@@ -285,8 +285,9 @@ function Cluster() {
       openNodeModal(_instancesMap[draggedNodeId]);
       return false;
     });
+
     $("body").on("click", ".instance a[data-command], .instance button[data-command]", function(e) {
-      var target = $(e.target);
+      var target = $(e.target).closest("a");
       var instanceEl = target.closest(".instance");
       e.draggedNodeId = instanceEl.attr("data-nodeid");
 

From 34ee413229c869a0c6afff4b2ce57a0b8e4382c9 Mon Sep 17 00:00:00 2001
From: Shlomi Noach <shlomi-noach@github.com>
Date: Thu, 8 Jun 2017 16:43:09 +0300
Subject: [PATCH 5/6] documenting force-master-failover

---
 go/app/prompt.go | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/go/app/prompt.go b/go/app/prompt.go
index 070a22e17..05506a229 100644
--- a/go/app/prompt.go
+++ b/go/app/prompt.go
@@ -624,7 +624,7 @@ Cheatsheet:
 
 				which-lost-in-recovery
 						List instances marked as downtimed for being lost in a recovery process. The output of this command lists
-            "lost" instances that probabaly should be recycled. 
+            "lost" instances that probabaly should be recycled.
 						The topology recovery process injects a magic hint when downtiming lost instances, that is picked up
 						by this command. Examples:
 
@@ -771,6 +771,14 @@ Cheatsheet:
 
             orchestrator -c recover-lite -i dead.instance.com --debug
 
+				force-master-failover
+            Forcibly begin a master failover process, even if orchestrator does not see anything wrong
+            in particular with the master.
+            - This will not work in a master-master configuration
+						- Orchestrator just treats this command as a DeadMaster failover scenario
+            - Orchestrator will issue all relevant pre-failover and post-failover external processes.
+            - Orchestrator will not attempt to recover/reconnect the old master
+
 				force-master-takeover
 						Forcibly discard master and promote another (direct child) instance instead, even if everything is running well.
 						This allows for planned switchover.

From 99c2bc5b6624e582858af691a6bf391c579f66d7 Mon Sep 17 00:00:00 2001
From: Shlomi Noach <shlomi-noach@github.com>
Date: Thu, 8 Jun 2017 21:47:05 +0300
Subject: [PATCH 6/6] simplified recovery options on 'recover' button. For
 masters: just fail over, or attempt to fail over onto specific instances. No
 more 'lite' recoveries. People really just want to fail over. For
 intermediate masters: 'relocate-replicas' instead of match* commands; no
 'lite' option

---
 resources/public/js/cluster.js | 42 ++++++++++++----------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/resources/public/js/cluster.js b/resources/public/js/cluster.js
index 9f5cfa419..9f2ba3e7b 100644
--- a/resources/public/js/cluster.js
+++ b/resources/public/js/cluster.js
@@ -27,24 +27,16 @@ function Cluster() {
       apiCommand("/api/force-master-failover/" + _instancesMap[e.draggedNodeId].Key.Hostname + "/" + _instancesMap[e.draggedNodeId].Key.Port);
       return true;
     },
-    "match-up-replicas": function(e) {
-      apiCommand("/api/match-up-replicas/" + _instancesMap[e.draggedNodeId].Key.Hostname + "/" + _instancesMap[e.draggedNodeId].Key.Port);
-      return true;
-    },
-    "regroup-replicas": function(e) {
-      apiCommand("/api/regroup-replicas/" + _instancesMap[e.draggedNodeId].Key.Hostname + "/" + _instancesMap[e.draggedNodeId].Key.Port);
-      return true;
-    },
     "recover-suggested-successor": function(e) {
-      var suggestedSuccessorHost = $(e.target).attr("data-suggested-successor-host");
-      var suggestedSuccessorPort = $(e.target).attr("data-suggested-successor-port");
+      var suggestedSuccessorHost = $(e.target).attr("data-successor-host");
+      var suggestedSuccessorPort = $(e.target).attr("data-successor-port");
       apiCommand("/api/recover/" + _instancesMap[e.draggedNodeId].Key.Hostname + "/" + _instancesMap[e.draggedNodeId].Key.Port + "/" + suggestedSuccessorHost + "/" + suggestedSuccessorPort);
       return true;
     },
-    "match-replicas": function(e) {
-      var belowHost = $(e.target).attr("data-below-host");
-      var belowPort = $(e.target).attr("data-below-port");
-      apiCommand("/api/match-replicas/" + _instancesMap[e.draggedNodeId].Key.Hostname + "/" + _instancesMap[e.draggedNodeId].Key.Port + "/" + belowHost + "/" + belowPort);
+    "relocate-replicas": function(e) {
+      var belowHost = $(e.target).attr("data-successor-host");
+      var belowPort = $(e.target).attr("data-successor-port");
+      apiCommand("/api/relocate-replicas/" + _instancesMap[e.draggedNodeId].Key.Hostname + "/" + _instancesMap[e.draggedNodeId].Key.Port + "/" + belowHost + "/" + belowPort);
       return true;
     },
     "make-master": function(e) {
@@ -1367,21 +1359,11 @@ function Cluster() {
     popoverElement.find(".popover-footer .dropdown").append('<button type="button" class="btn btn-xs btn-default dropdown-toggle" id="recover_dropdown_' + instance.id + '" data-toggle="dropdown" aria-haspopup="true" aria-expanded="true"><span class="glyphicon glyphicon-heart text-danger"></span> Recover <span class="caret"></span></button><ul class="dropdown-menu" aria-labelledby="recover_dropdown_' + instance.id + '"></ul>');
     popoverElement.find(".popover-footer .dropdown").append('<ul class="dropdown-menu" aria-labelledby="recover_dropdown_' + instance.id + '"></ul>');
     var recoveryListing = popoverElement.find(".dropdown ul");
-    recoveryListing.append('<li><a href="#" data-btn="auto" data-command="recover-auto">Auto (implies running external hooks/processes)</a></li>');
-    recoveryListing.append('<li><a href="#" data-btn="auto-lite" data-command="recover-auto-lite">Auto (do not execute hooks/processes)</a></li>');
-    recoveryListing.append('<li role="separator" class="divider"></li>');
 
     if (instance.isMaster) {
       recoveryListing.append('<li><a href="#" data-btn="force-master-failover" data-command="force-master-failover"><div class="glyphicon glyphicon-exclamation-sign text-danger"></div> <span class="text-danger">Force fail over <strong>now</strong> (even if normal handling would not fail over)</span></a></li>');
       recoveryListing.append('<li role="separator" class="divider"></li>');
-    }
-    if (!instance.isMaster) {
-      recoveryListing.append('<li><a href="#" data-btn="match-up-replicas" data-command="match-up-replicas">Match up replicas to <code>' + instance.masterTitle + '</code></a></li>');
-    }
-    if (instance.children && instance.children.length > 1) {
-      recoveryListing.append('<li><a href="#" data-btn="regroup-replicas" data-command="regroup-replicas">Regroup replicas (auto pick best replica, only heals topology, no external processes)</a></li>');
-    }
-    if (instance.isMaster) {
+
       // Suggest successor
       instance.children.forEach(function(replica) {
         if (!replica.LogBinEnabled) {
@@ -1400,9 +1382,14 @@ function Cluster() {
           return
         }
         recoveryListing.append(
-          '<li><a href="#" data-btn="recover-suggested-successor" data-command="recover-suggested-successor" data-suggested-successor-host="' + replica.Key.Hostname + '" data-suggested-successor-port="' + replica.Key.Port + '">Regroup replicas, try to promote <code>' + replica.title + '</code></a></li>');
+          '<li><a href="#" data-btn="recover-suggested-successor" data-command="recover-suggested-successor" data-successor-host="' + replica.Key.Hostname + '" data-successor-port="' + replica.Key.Port + '">Recover, try to promote <code>' + replica.title + '</code></a></li>');
       });
     }
+    if (!instance.isMaster) {
+      recoveryListing.append('<li><a href="#" data-btn="auto" data-command="recover-auto">Auto (implies running external hooks/processes)</a></li>');
+      recoveryListing.append('<li role="separator" class="divider"></li>');
+      recoveryListing.append('<li><a href="#" data-btn="relocate-replicas" data-command="relocate-replicas" data-successor-host="' + instance.MasterKey.Hostname + '" data-successor-port="' + instance.MasterKey.Port + '">Relocate replicas to <code>' + instance.masterTitle + '</code></a></li>');
+    }
     if (instance.masterNode) {
       // Intermediate master; suggest successor
       instance.masterNode.children.forEach(function(sibling) {
@@ -1422,12 +1409,11 @@ function Cluster() {
           return
         }
         recoveryListing.append(
-          '<li><a href="#" data-btn="match-replicas" data-command="match-replicas" data-below-host="' + sibling.Key.Hostname + '" data-below-port="' + sibling.Key.Port + '">Match all replicas below <code>' + sibling.title + '</code></a></li>');
+          '<li><a href="#" data-btn="relocate-replicas" data-command="relocate-replicas" data-successor-host="' + sibling.Key.Hostname + '" data-successor-port="' + sibling.Key.Port + '">Relocate replicas to <code>' + sibling.title + '</code></a></li>');
       });
     }
   }
 
-
   function reviewReplicationAnalysis(replicationAnalysis) {
     var instancesMap = _instancesMap;
     var clusterHasReplicationAnalysisIssue = false;