From 96707c5b7ef9f490d4bcfe8cb8ecc593109ecbc8 Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Tue, 30 Aug 2022 15:13:39 -0400 Subject: [PATCH] ci: fix TestNomad_BootstrapExpect_NonVoter test PR #12130 refactored the test to use the `wantPeers` helper, but this function only returns the number of voting peers, which in this test should be equal to 2. I think the tests were passing back them because of a bug in Raft (https://github.com/hashicorp/raft/pull/483) where a non-voting server was able to transition to candidate state. One possible evidence of this is that a successful test run would have the following log line: ``` raft@v1.3.5/raft.go:1058: nomad.raft: updating configuration: command=AddVoter server-id=127.0.0.1:9101 server-addr=127.0.0.1:9101 servers="[{Suffrage:Voter ID:127.0.0.1:9107 Address:127.0.0.1:9107} {Suffrage:Voter ID:127.0.0.1:9105 Address:127.0.0.1:9105} {Suffrage:Voter ID:127.0.0.1:9103 Address:127.0.0.1:9103} {Suffrage:Voter ID:127.0.0.1:9101 Address:127.0.0.1:9101}]" ``` This commit reverts the test logic to check for peer count, regardless of voting status. --- nomad/serf_test.go | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/nomad/serf_test.go b/nomad/serf_test.go index f5d2f0dcb1aa..d36a4df47d0b 100644 --- a/nomad/serf_test.go +++ b/nomad/serf_test.go @@ -11,7 +11,6 @@ import ( "github.com/hashicorp/nomad/testutil" "github.com/hashicorp/raft" "github.com/hashicorp/serf/serf" - "github.com/hashicorp/serf/testutil/retry" "github.com/stretchr/testify/require" ) @@ -327,16 +326,35 @@ func TestNomad_BootstrapExpect_NonVoter(t *testing.T) { }) defer cleanupS4() + servers := []*Server{s1, s2, s3, s4} + // Join with fourth server (now have quorum) // Start with 4th server for higher chance of success - TestJoin(t, s4, s3, s2, s1) + TestJoin(t, servers...) // Assert leadership with 4 peers - servers := []*Server{s1, s2, s3, s4} - for _, s := range servers { - testutil.WaitForLeader(t, s.RPC) - retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 4)) }) - } + expect := len(servers) + testutil.WaitForLeader(t, servers[0].RPC) + testutil.WaitForResult(func() (bool, error) { + // Retry the join to decrease flakiness + TestJoin(t, servers...) + for _, s := range servers { + peers, err := s.numPeers() + if err != nil { + return false, fmt.Errorf("failed to get number of peers: %v", err) + } + if peers != expect { + return false, fmt.Errorf("expected %d peers, got %d", expect, peers) + } + if len(s.localPeers) != expect { + return false, fmt.Errorf("expected %d local peers, got %d: %#v", expect, len(s.localPeers), s.localPeers) + } + + } + return true, nil + }, func(err error) { + require.NoError(t, err) + }) } func TestNomad_BadExpect(t *testing.T) {