hashicorp · notnoop · Jan 28, 2020 · Jan 22, 2020 · Jan 22, 2020 · Jan 28, 2020
diff --git a/nomad/leader.go b/nomad/leader.go
@@ -59,37 +59,60 @@ var defaultSchedulerConfig = &structs.SchedulerConfiguration{
 func (s *Server) monitorLeadership() {
 	var weAreLeaderCh chan struct{}
 	var leaderLoop sync.WaitGroup
-	for {
-		select {
-		case isLeader := <-s.leaderCh:
-			switch {
-			case isLeader:
-				if weAreLeaderCh != nil {
-					s.logger.Error("attempted to start the leader loop while running")
-					continue
-				}
 
-				weAreLeaderCh = make(chan struct{})
-				leaderLoop.Add(1)
-				go func(ch chan struct{}) {
-					defer leaderLoop.Done()
-					s.leaderLoop(ch)
-				}(weAreLeaderCh)
-				s.logger.Info("cluster leadership acquired")
-
-			default:
-				if weAreLeaderCh == nil {
-					s.logger.Error("attempted to stop the leader loop while not running")
-					continue
-				}
+	leaderStep := func(isLeader bool) {
+		switch {
+		case isLeader:
+			if weAreLeaderCh != nil {
+				s.logger.Error("attempted to start the leader loop while running")
+				return
+			}
 
-				s.logger.Debug("shutting down leader loop")
-				close(weAreLeaderCh)
-				leaderLoop.Wait()
-				weAreLeaderCh = nil
-				s.logger.Info("cluster leadership lost")
+			weAreLeaderCh = make(chan struct{})
+			leaderLoop.Add(1)
+			go func(ch chan struct{}) {
+				defer leaderLoop.Done()
+				s.leaderLoop(ch)
+			}(weAreLeaderCh)
+			s.logger.Info("cluster leadership acquired")
+
+		default:
+			if weAreLeaderCh == nil {
+				s.logger.Error("attempted to stop the leader loop while not running")
+				return
 			}
 
+			s.logger.Debug("shutting down leader loop")
+			close(weAreLeaderCh)
+			leaderLoop.Wait()
+			weAreLeaderCh = nil
+			s.logger.Info("cluster leadership lost")
+		}
+	}
+
+	wasLeader := false
+	for {
+		select {
+		case isLeader := <-s.leaderCh:
+			if wasLeader != isLeader {
+				wasLeader = isLeader
+				// normal case where we went through a transition
+				leaderStep(isLeader)
+			} else if wasLeader && isLeader {
+				// Server lost but then gained leadership immediately.
+				// During this time, this server may have received
+				// Raft transitions that haven't been applied to the FSM
+				// yet.
+				// Ensure that that FSM caught up and eval queues are refreshed
+				s.logger.Error("cluster leadership flapped, lost and gained leadership immediately.  Leadership flaps indicate a cluster wide problems (e.g. networking).")
+
+				leaderStep(false)
+				leaderStep(true)
+			} else {
+				// Server gained but lost leadership immediately
+				// before it reacted; nothing to do, move on
+				s.logger.Error("cluster leadership flapped, gained and lost leadership immediately.  Leadership flaps indicate a cluster wide problems (e.g. networking).")
-				s.logger.Error("cluster leadership flapped, gained and lost leadership immediately.  Leadership flaps indicate a cluster wide problems (e.g. networking).")
+				s.logger.Warn("cluster leadership gained and lost. Could indicate network issues, memory paging, or high CPU load.")
-				s.logger.Error("cluster leadership flapped, gained and lost leadership immediately.  Leadership flaps indicate a cluster wide problems (e.g. networking).")
+				s.logger.Warn("cluster leadership gained and lost. Could indicate network issues, memory paging, or high CPU load.")
+			}
 		case <-s.shutdownCh:
 			return
 		}

diff --git a/nomad/server.go b/nomad/server.go
@@ -1234,10 +1234,10 @@ func (s *Server) setupRaft() error {
 		}
 	}
 
-	// Setup the leader channel
+	// Setup the leader channel; that keeps the latest leadership alone
 	leaderCh := make(chan bool, 1)
 	s.config.RaftConfig.NotifyCh = leaderCh
-	s.leaderCh = leaderCh
+	s.leaderCh = dropButLastChannel(leaderCh, s.shutdownCh)
 
 	// Setup the Raft store
 	s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, snap, trans)

diff --git a/nomad/util.go b/nomad/util.go
@@ -301,3 +301,68 @@ func getAlloc(state AllocGetter, allocID string) (*structs.Allocation, error) {
 
 	return alloc, nil
 }
+
+// dropButLastChannel returns a channel that drops all but last value from sourceCh.
+//
+// Useful for aggressively consuming sourceCh when intermediate values aren't relevant.
+//
+// This function propagates values to result quickly and drops intermediate messages
+// in best effort basis.  Golang scheduler may delay delivery or result in extra
+// deliveries.
+//
+// Consider this function for example:
+//
+// ```
+// src := make(chan bool)
+// dst := dropButLastChannel(src, nil)
+//
+// go func() {
+//   src <- true
+//   src <- false
+// }()
+//
+// // v can be `true` here but is very unlikely
+// v := <-dst
+// ```
+//
+func dropButLastChannel(sourceCh <-chan bool, shutdownCh <-chan struct{}) chan bool {
+	// buffer the most recent result
+	dst := make(chan bool)
+
+	go func() {
+		lv := false
+
+	DEQUE_SOURCE:
+		// wait for first message
+		select {
+		case lv = <-sourceCh:
+			goto ENQUEUE_DST
-			goto ENQUEUE_DST
-			goto ENQUEUE_DST
+		case <-shutdownCh:
+			return
+		}
+
+	ENQUEUE_DST:
+		// prioritize draining source first dequeue without blocking
+		for {
+			select {
+			case lv = <-sourceCh:
+			default:
+				break ENQUEUE_DST
+			}
+		}
+
+		// attempt to enqueue but keep monitoring source channel
+		select {
+		case lv = <-sourceCh:
+			goto ENQUEUE_DST
+		case dst <- lv:
+			// enqueued value; back to dequeing from source
+			goto DEQUE_SOURCE
+		case <-shutdownCh:
+			return
+		}
+	}()
+
+	return dst
+
+}
diff --git a/nomad/util_test.go b/nomad/util_test.go
@@ -4,6 +4,7 @@ import (
 	"net"
 	"reflect"
 	"testing"
+	"time"
 
 	version "github.com/hashicorp/go-version"
 	"github.com/hashicorp/nomad/helper/uuid"
@@ -258,3 +259,74 @@ func TestMaxUint64(t *testing.T) {
 		t.Fatalf("bad")
 	}
 }
+
+func TestDropButLastChannelDropsValues(t *testing.T) {
+	sourceCh := make(chan bool)
+	shutdownCh := make(chan struct{})
+
+	dstCh := dropButLastChannel(sourceCh, shutdownCh)
+
+	// timeout duration for any channel propagation delay
+	timeoutDuration := 5 * time.Millisecond
+
+	// test that dstCh doesn't emit anything initially
+	select {
+	case <-dstCh:
+		require.Fail(t, "received a message unexpectedly")
+	case <-time.After(timeoutDuration):
+		// yay no message - it could have been a default: but
+		// checking for goroutine effect
+	}
+
+	sourceCh <- false
+	select {
+	case v := <-dstCh:
+		require.False(t, v, "unexpected value from dstCh Ch")
+	case <-time.After(timeoutDuration):
+		require.Fail(t, "timed out waiting for source->dstCh propagation")
+	}
+
+	// channel is drained now
+	select {
+	case v := <-dstCh:
+		require.Failf(t, "received a message unexpectedly", "value: %v", v)
+	case <-time.After(timeoutDuration):
+		// yay no message - it could have been a default: but
+		// checking for goroutine effect
+	}
+
+	// now enqueue many messages and ensure only last one is received
+	// enqueueing should be fast!
+	sourceCh <- false
+	sourceCh <- false
+	sourceCh <- false
+	sourceCh <- false
+	sourceCh <- true
+
+	// I suspect that dstCh may contain a stale (i.e. `false`) value if golang executes
+	// this select before the implementation goroutine dequeues last value.
+	//
+	// However, never got it to fail in test - so leaving it now to see if it ever fails;
+	// and if/when test fails, we can learn of how much of an issue it is and adjust
+	select {
+	case v := <-dstCh:
+		require.True(t, v, "unexpected value from dstCh Ch")
+	case <-time.After(timeoutDuration):
+		require.Fail(t, "timed out waiting for source->dstCh propagation")
+	}
+
+	sourceCh <- true
+	sourceCh <- true
+	sourceCh <- true
+	sourceCh <- true
+	sourceCh <- true
+	sourceCh <- false
+	select {
+	case v := <-dstCh:
+		require.False(t, v, "unexpected value from dstCh Ch")
+	case <-time.After(timeoutDuration):
+		require.Fail(t, "timed out waiting for source->dstCh propagation")
+	}
+
+	close(shutdownCh)
+}