diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 6368228eff..5e7dcb00ba 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -5246,6 +5246,13 @@ void clusterSetPrimary(clusterNode *n, int closeSlots) { replicationSetPrimary(n->ip, getNodeDefaultReplicationPort(n)); removeAllNotOwnedShardChannelSubscriptions(); resetManualFailover(); + + if (server.cluster->failover_auth_time) { + /* Since we have changed to a new primary node, the previously set + * failover_auth_time should no longer be used, whether it is in + * progress or timed out. */ + server.cluster->failover_auth_time = 0; + } } /* ----------------------------------------------------------------------------- diff --git a/tests/support/util.tcl b/tests/support/util.tcl index a8948338ea..2e2c70f205 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -176,7 +176,16 @@ proc verify_log_message {srv_idx pattern from_line} { incr from_line set result [exec tail -n +$from_line < [srv $srv_idx stdout]] if {![string match $pattern $result]} { - error "assertion:expected message not found in log file: $pattern" + fail "expected message not found in log file: $pattern" + } +} + +# verify pattern does not exists in server's stout after a certain line number +proc verify_no_log_message {srv_idx pattern from_line} { + incr from_line + set result [exec tail -n +$from_line < [srv $srv_idx stdout]] + if {[string match $pattern $result]} { + fail "expected message found in log file: $pattern" } } diff --git a/tests/unit/cluster/failover2.tcl b/tests/unit/cluster/failover2.tcl new file mode 100644 index 0000000000..7bc6a05e95 --- /dev/null +++ b/tests/unit/cluster/failover2.tcl @@ -0,0 +1,66 @@ +# Check the basic monitoring and failover capabilities. + +start_cluster 3 4 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 5000}} { + + test "Cluster is up" { + wait_for_cluster_state ok + } + + test "Cluster is writable" { + cluster_write_test [srv 0 port] + } + + set paused_pid [srv 0 pid] + test "Killing one primary node" { + pause_process $paused_pid + } + + test "Wait for failover" { + wait_for_condition 1000 50 { + [s -3 role] == "master" || [s -6 role] == "master" + } else { + fail "No failover detected" + } + } + + test "Killing the new primary node" { + if {[s -3 role] == "master"} { + set replica_to_be_primary -6 + set paused_pid2 [srv -3 pid] + } else { + set replica_to_be_primary -3 + set paused_pid2 [srv -6 pid] + } + pause_process $paused_pid2 + } + + test "Cluster should eventually be up again" { + for {set j 0} {$j < [llength $::servers]} {incr j} { + if {[process_is_paused [srv -$j pid]]} continue + wait_for_condition 1000 50 { + [CI $j cluster_state] eq "ok" + } else { + fail "Cluster node $j cluster_state:[CI $j cluster_state]" + } + } + } + + test "wait for new failover" { + wait_for_condition 1000 50 { + [s $replica_to_be_primary role] == "master" + } else { + fail "No failover detected" + } + } + + test "Restarting the previously killed primary nodes" { + resume_process $paused_pid + resume_process $paused_pid2 + } + + test "Make sure there is no failover timeout" { + verify_no_log_message -3 "*Failover attempt expired*" 0 + verify_no_log_message -6 "*Failover attempt expired*" 0 + } + +} ;# start_cluster