From b85f0923ca7cc001be8af25be16e906aaaaf9d9b Mon Sep 17 00:00:00 2001 From: Peter Wilson Date: Mon, 3 Apr 2023 15:58:57 +0000 Subject: [PATCH] backport of commit 2054ffcbfa519e72fe5b025fcd3f22bb05a23c50 --- changelog/19472.txt | 3 + go.mod | 2 +- go.sum | 4 +- helper/testhelpers/testhelpers_oss.go | 31 ++ physical/raft/raft_autopilot.go | 19 +- .../raft/raft_autopilot_test.go | 265 ++++++++++++++---- vault/raft.go | 5 + 7 files changed, 270 insertions(+), 59 deletions(-) create mode 100644 changelog/19472.txt diff --git a/changelog/19472.txt b/changelog/19472.txt new file mode 100644 index 000000000000..db9ec7276550 --- /dev/null +++ b/changelog/19472.txt @@ -0,0 +1,3 @@ +```release-note:improvement +autopilot: Update version to v0.2.0 to add better support for respecting min quorum +``` diff --git a/go.mod b/go.mod index 094da0654c9f..4a48c4957c46 100644 --- a/go.mod +++ b/go.mod @@ -107,7 +107,7 @@ require ( github.com/hashicorp/hcp-sdk-go v0.23.0 github.com/hashicorp/nomad/api v0.0.0-20220707195938-75f4c2237b28 github.com/hashicorp/raft v1.3.10 - github.com/hashicorp/raft-autopilot v0.1.6 + github.com/hashicorp/raft-autopilot v0.2.0 github.com/hashicorp/raft-boltdb/v2 v2.0.0-20210421194847-a7e34179d62c github.com/hashicorp/raft-snapshot v1.0.4 github.com/hashicorp/vault-plugin-auth-alicloud v0.14.0 diff --git a/go.sum b/go.sum index 3b2a285e60de..90b0d94109ea 100644 --- a/go.sum +++ b/go.sum @@ -1129,8 +1129,8 @@ github.com/hashicorp/raft v1.1.2-0.20191002163536-9c6bd3e3eb17/go.mod h1:vPAJM8A github.com/hashicorp/raft v1.2.0/go.mod h1:vPAJM8Asw6u8LxC3eJCUZmRP/E4QmUGE1R7g7k8sG/8= github.com/hashicorp/raft v1.3.10 h1:LR5QZX1VQd0DFWZfeCwWawyeKfpS/Tm1yjnJIY5X4Tw= github.com/hashicorp/raft v1.3.10/go.mod h1:J8naEwc6XaaCfts7+28whSeRvCqTd6e20BlCU3LtEO4= -github.com/hashicorp/raft-autopilot v0.1.6 h1:C1q3RNF2FfXNZfHWbvVAu0QixaQK8K5pX4O5lh+9z4I= -github.com/hashicorp/raft-autopilot v0.1.6/go.mod h1:Af4jZBwaNOI+tXfIqIdbcAnh/UyyqIMj/pOISIfhArw= +github.com/hashicorp/raft-autopilot v0.2.0 h1:2/R2RPgamgRKgNWGQioULZvjeKXQZmDuw5Ty+6c+H7Y= +github.com/hashicorp/raft-autopilot v0.2.0/go.mod h1:q6tZ8UAZ5xio2gv2JvjgmtOlh80M6ic8xQYBe2Egkg8= github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk= github.com/hashicorp/raft-boltdb/v2 v2.0.0-20210421194847-a7e34179d62c h1:oiKun9QlrOz5yQxMZJ3tf1kWtFYuKSJzxzEDxDPevj4= github.com/hashicorp/raft-boltdb/v2 v2.0.0-20210421194847-a7e34179d62c/go.mod h1:kiPs9g148eLShc2TYagUAyKDnD+dH9U+CQKsXzlY9xo= diff --git a/helper/testhelpers/testhelpers_oss.go b/helper/testhelpers/testhelpers_oss.go index 912d50fdec3b..5f8241934959 100644 --- a/helper/testhelpers/testhelpers_oss.go +++ b/helper/testhelpers/testhelpers_oss.go @@ -17,3 +17,34 @@ func WaitForActiveNodeAndStandbys(t testing.T, cluster *vault.TestCluster) { } } } + +// WaitForNodesExcludingSelectedStandbys is variation on WaitForActiveNodeAndStandbys. +// It waits for the active node before waiting for standby nodes, however +// it will not wait for cores with indexes that match those specified as arguments. +// Whilst you could specify index 0 which is likely to be the leader node, the function +// checks for the leader first regardless of the indexes to skip, so it would be redundant to do so. +// The intention/use case for this function is to allow a cluster to start and become active with one +// or more nodes not joined, so that we can test scenarios where a node joins later. +// e.g. 4 nodes in the cluster, only 3 nodes in cluster 'active', 1 node can be joined later in tests. +func WaitForNodesExcludingSelectedStandbys(t testing.T, cluster *vault.TestCluster, indexesToSkip ...int) { + WaitForActiveNode(t, cluster) + + contains := func(elems []int, e int) bool { + for _, v := range elems { + if v == e { + return true + } + } + + return false + } + for i, core := range cluster.Cores { + if contains(indexesToSkip, i) { + continue + } + + if standby, _ := core.Core.Standby(); standby { + WaitForStandbyNode(t, core) + } + } +} diff --git a/physical/raft/raft_autopilot.go b/physical/raft/raft_autopilot.go index 1c000dc04824..e298e8847e17 100644 --- a/physical/raft/raft_autopilot.go +++ b/physical/raft/raft_autopilot.go @@ -289,7 +289,7 @@ type Delegate struct { emptyVersionLogs map[raft.ServerID]struct{} } -func newDelegate(b *RaftBackend) *Delegate { +func NewDelegate(b *RaftBackend) *Delegate { return &Delegate{ RaftBackend: b, inflightRemovals: make(map[raft.ServerID]bool), @@ -382,6 +382,7 @@ func (d *Delegate) KnownServers() map[raft.ServerID]*autopilot.Server { return nil } + apServerStates := d.autopilot.GetState().Servers servers := future.Configuration().Servers serverIDs := make([]string, 0, len(servers)) for _, server := range servers { @@ -425,6 +426,19 @@ func (d *Delegate) KnownServers() map[raft.ServerID]*autopilot.Server { Ext: d.autopilotServerExt(state), } + // As KnownServers is a delegate called by autopilot let's check if we already + // had this data in the correct format and use it. If we don't (which sounds a + // bit sad, unless this ISN'T a voter) then as a fail-safe, let's try what we've + // done elsewhere in code to check the desired suffrage and manually set NodeType + // based on whether that's a voter or not. If we don't do either of these + // things, NodeType isn't set which means technically it's not a voter. + // It shouldn't be a voter and end up in this state. + if apServerState, found := apServerStates[raft.ServerID(id)]; found && apServerState.Server.NodeType != "" { + server.NodeType = apServerState.Server.NodeType + } else if state.DesiredSuffrage == "voter" { + server.NodeType = autopilot.NodeVoter + } + switch state.IsDead.Load() { case true: d.logger.Debug("informing autopilot that the node left", "id", id) @@ -442,6 +456,7 @@ func (d *Delegate) KnownServers() map[raft.ServerID]*autopilot.Server { Name: d.localID, RaftVersion: raft.ProtocolVersionMax, NodeStatus: autopilot.NodeAlive, + NodeType: autopilot.NodeVoter, // The leader must be a voter Meta: d.meta(&FollowerState{ UpgradeVersion: d.EffectiveVersion(), RedundancyZone: d.RedundancyZone(), @@ -817,7 +832,7 @@ func (b *RaftBackend) SetupAutopilot(ctx context.Context, storageConfig *Autopil if b.autopilotUpdateInterval != 0 { options = append(options, autopilot.WithUpdateInterval(b.autopilotUpdateInterval)) } - b.autopilot = autopilot.New(b.raft, newDelegate(b), options...) + b.autopilot = autopilot.New(b.raft, NewDelegate(b), options...) b.followerStates = followerStates b.followerHeartbeatTicker = time.NewTicker(1 * time.Second) diff --git a/vault/external_tests/raft/raft_autopilot_test.go b/vault/external_tests/raft/raft_autopilot_test.go index c37bcdd8b9c5..37a14fe33414 100644 --- a/vault/external_tests/raft/raft_autopilot_test.go +++ b/vault/external_tests/raft/raft_autopilot_test.go @@ -2,9 +2,11 @@ package rafttests import ( "context" + "encoding/json" "fmt" "math" "reflect" + "sync/atomic" "testing" "time" @@ -255,8 +257,8 @@ func TestRaft_Autopilot_Stabilization_Delay(t *testing.T) { } } - join(t, cluster.Cores[1], client, cluster) - join(t, cluster.Cores[2], client, cluster) + joinAndUnseal(t, cluster.Cores[1], cluster, false, false) + joinAndUnseal(t, cluster.Cores[2], cluster, false, false) core2shouldBeHealthyAt := time.Now().Add(timeToHealthyCore2) @@ -317,8 +319,8 @@ func TestRaft_AutoPilot_Peersets_Equivalent(t *testing.T) { }) require.NoError(t, err) - join(t, cluster.Cores[1], client, cluster) - join(t, cluster.Cores[2], client, cluster) + joinAsVoterAndUnseal(t, cluster.Cores[1], cluster) + joinAsVoterAndUnseal(t, cluster.Cores[2], cluster) deadline := time.Now().Add(10 * time.Second) var core0Peers, core1Peers, core2Peers []raft.Peer @@ -346,6 +348,145 @@ func TestRaft_AutoPilot_Peersets_Equivalent(t *testing.T) { require.Equal(t, core1Peers, core2Peers) } +// TestRaft_VotersStayVoters ensures that autopilot doesn't demote a node just +// because it hasn't been heard from in some time. +func TestRaft_VotersStayVoters(t *testing.T) { + cluster, _ := raftCluster(t, &RaftClusterOpts{ + DisableFollowerJoins: true, + InmemCluster: true, + EnableAutopilot: true, + PhysicalFactoryConfig: map[string]interface{}{ + "performance_multiplier": "5", + "autopilot_reconcile_interval": "300ms", + "autopilot_update_interval": "100ms", + }, + VersionMap: map[int]string{ + 0: version.Version, + 1: version.Version, + 2: version.Version, + }, + }) + defer cluster.Cleanup() + testhelpers.WaitForActiveNode(t, cluster) + + client := cluster.Cores[0].Client + + config, err := client.Sys().RaftAutopilotConfiguration() + require.NoError(t, err) + joinAndStabilizeAndPromote(t, cluster.Cores[1], client, cluster, config, "core-1", 2) + joinAndStabilizeAndPromote(t, cluster.Cores[2], client, cluster, config, "core-2", 3) + + errIfNonVotersExist := func() error { + t.Helper() + resp, err := client.Sys().RaftAutopilotState() + if err != nil { + t.Fatal(err) + } + for k, v := range resp.Servers { + if v.Status == "non-voter" { + return fmt.Errorf("node %q is a non-voter", k) + } + } + return nil + } + testhelpers.RetryUntil(t, 10*time.Second, errIfNonVotersExist) + + // Core0 is the leader, sealing it will both cause an election - and the + // new leader won't have seen any heartbeats initially - and create a "down" + // node that won't be sending heartbeats. + testhelpers.EnsureCoreSealed(t, cluster.Cores[0]) + time.Sleep(config.ServerStabilizationTime + 2*time.Second) + client = cluster.Cores[1].Client + err = errIfNonVotersExist() + require.NoError(t, err) +} + +// TestRaft_Autopilot_DeadServerCleanup tests that dead servers are correctly +// removed by Vault and autopilot when a node stops and a replacement node joins. +// The expected behavior is that removing a node from a 3 node cluster wouldn't +// remove it from Raft until a replacement voter had joined and stabilized/been promoted. +func TestRaft_Autopilot_DeadServerCleanup(t *testing.T) { + conf, opts := teststorage.ClusterSetup(nil, nil, teststorage.RaftBackendSetup) + conf.DisableAutopilot = false + opts.NumCores = 4 + opts.SetupFunc = nil + opts.PhysicalFactoryConfig = map[string]interface{}{ + "autopilot_reconcile_interval": "300ms", + "autopilot_update_interval": "100ms", + } + + cluster := vault.NewTestCluster(t, conf, opts) + cluster.Start() + defer cluster.Cleanup() + leader, addressProvider := setupLeaderAndUnseal(t, cluster) + + // Join 2 extra nodes manually, store the 3rd for later + core1 := cluster.Cores[1] + core2 := cluster.Cores[2] + core3 := cluster.Cores[3] + core1.UnderlyingRawStorage.(*raft.RaftBackend).SetServerAddressProvider(addressProvider) + core2.UnderlyingRawStorage.(*raft.RaftBackend).SetServerAddressProvider(addressProvider) + core3.UnderlyingRawStorage.(*raft.RaftBackend).SetServerAddressProvider(addressProvider) + joinAsVoterAndUnseal(t, core1, cluster) + joinAsVoterAndUnseal(t, core2, cluster) + // Do not join node 3 + testhelpers.WaitForNodesExcludingSelectedStandbys(t, cluster, 3) + + config, err := leader.Client.Sys().RaftAutopilotConfiguration() + require.NoError(t, err) + require.True(t, isHealthyAfterStabilization(t, leader, config.ServerStabilizationTime)) + + // Ensure Autopilot has the aggressive settings + config.CleanupDeadServers = true + config.ServerStabilizationTime = 5 * time.Second + config.DeadServerLastContactThreshold = 10 * time.Second + config.MaxTrailingLogs = 10 + config.LastContactThreshold = 10 * time.Second + config.MinQuorum = 3 + + // We can't use Client.Sys().PutRaftAutopilotConfiguration(config) in OSS as disable_upgrade_migration isn't in OSS + b, err := json.Marshal(&config) + require.NoError(t, err) + var m map[string]interface{} + err = json.Unmarshal(b, &m) + require.NoError(t, err) + delete(m, "disable_upgrade_migration") + _, err = leader.Client.Logical().Write("sys/storage/raft/autopilot/configuration", m) + require.NoError(t, err) + + // Observe for healthy state + state, err := leader.Client.Sys().RaftAutopilotState() + require.NoError(t, err) + require.True(t, state.Healthy) + + // Kill a node (core-2) + cluster.StopCore(t, 2) + // Wait for just over the dead server threshold to ensure the core is classed as 'dead' + time.Sleep(config.DeadServerLastContactThreshold + 2*time.Second) + + // Observe for an unhealthy state (but we still have 3 voters according to Raft) + state, err = leader.Client.Sys().RaftAutopilotState() + require.NoError(t, err) + require.False(t, state.Healthy) + require.Len(t, state.Voters, 3) + + // Join node 3 now + joinAsVoterAndUnseal(t, core3, cluster) + + // Stabilization time + require.True(t, isHealthyAfterStabilization(t, leader, config.ServerStabilizationTime)) + + // Observe for healthy and contains 3 correct voters + state, err = leader.Client.Sys().RaftAutopilotState() + require.NoError(t, err) + require.True(t, state.Healthy) + require.Len(t, state.Voters, 3) + require.Contains(t, state.Voters, "core-0") + require.Contains(t, state.Voters, "core-1") + require.NotContains(t, state.Voters, "core-2") + require.Contains(t, state.Voters, "core-3") +} + func joinAndStabilizeAndPromote(t *testing.T, core *vault.TestClusterCore, client *api.Client, cluster *vault.TestCluster, config *api.AutopilotConfig, nodeID string, numServers int) { joinAndStabilize(t, core, client, cluster, config, nodeID, numServers) @@ -373,7 +514,7 @@ func joinAndStabilizeAndPromote(t *testing.T, core *vault.TestClusterCore, clien func joinAndStabilize(t *testing.T, core *vault.TestClusterCore, client *api.Client, cluster *vault.TestCluster, config *api.AutopilotConfig, nodeID string, numServers int) { t.Helper() - join(t, core, client, cluster) + joinAndUnseal(t, core, cluster, false, false) time.Sleep(2 * time.Second) state, err := client.Sys().RaftAutopilotState() @@ -400,68 +541,84 @@ func joinAndStabilize(t *testing.T, core *vault.TestClusterCore, client *api.Cli } } -func join(t *testing.T, core *vault.TestClusterCore, client *api.Client, cluster *vault.TestCluster) { - t.Helper() +// joinAsVoterAndUnseal joins the specified core to the specified cluster as a voter and unseals it. +// It will wait (up to a timeout) for the core to be fully unsealed before returning +func joinAsVoterAndUnseal(t *testing.T, core *vault.TestClusterCore, cluster *vault.TestCluster) { + joinAndUnseal(t, core, cluster, false, true) +} + +// joinAndUnseal joins the specified core to the specified cluster and unseals it. +// You can specify if the core should be joined as a voter/non-voter, +// and whether to wait (up to a timeout) for the core to be unsealed before returning. +func joinAndUnseal(t *testing.T, core *vault.TestClusterCore, cluster *vault.TestCluster, nonVoter bool, waitForUnseal bool) { + leader, leaderAddr := clusterLeader(t, cluster) _, err := core.JoinRaftCluster(namespace.RootContext(context.Background()), []*raft.LeaderJoinInfo{ { - LeaderAPIAddr: client.Address(), - TLSConfig: cluster.Cores[0].TLSConfig(), + LeaderAPIAddr: leaderAddr, + TLSConfig: leader.TLSConfig(), Retry: true, }, - }, false) + }, nonVoter) require.NoError(t, err) + time.Sleep(1 * time.Second) cluster.UnsealCore(t, core) + if waitForUnseal { + waitForCoreUnseal(t, core) + } } -// TestRaft_VotersStayVoters ensures that autopilot doesn't demote a node just -// because it hasn't been heard from in some time. -func TestRaft_VotersStayVoters(t *testing.T) { - cluster, _ := raftCluster(t, &RaftClusterOpts{ - DisableFollowerJoins: true, - InmemCluster: true, - EnableAutopilot: true, - PhysicalFactoryConfig: map[string]interface{}{ - "performance_multiplier": "5", - "autopilot_reconcile_interval": "300ms", - "autopilot_update_interval": "100ms", - }, - VersionMap: map[int]string{ - 0: version.Version, - 1: version.Version, - 2: version.Version, - }, - }) - defer cluster.Cleanup() - testhelpers.WaitForActiveNode(t, cluster) +// clusterLeader gets the leader node and its address from the specified cluster +func clusterLeader(t *testing.T, cluster *vault.TestCluster) (*vault.TestClusterCore, string) { + for _, core := range cluster.Cores { + isLeader, addr, _, err := core.Leader() + require.NoError(t, err) + if isLeader { + return core, addr + } + } - client := cluster.Cores[0].Client + t.Fatal("unable to find leader") + return nil, "" +} - config, err := client.Sys().RaftAutopilotConfiguration() - require.NoError(t, err) - joinAndStabilizeAndPromote(t, cluster.Cores[1], client, cluster, config, "core-1", 2) - joinAndStabilizeAndPromote(t, cluster.Cores[2], client, cluster, config, "core-2", 3) +// setupLeaderAndUnseal configures and unseals the leader node. +// It will wait until the node is active before returning the core and the address of the leader. +func setupLeaderAndUnseal(t *testing.T, cluster *vault.TestCluster) (*vault.TestClusterCore, *testhelpers.TestRaftServerAddressProvider) { + leader, _ := clusterLeader(t, cluster) - errIfNonVotersExist := func() error { - t.Helper() - resp, err := client.Sys().RaftAutopilotState() - if err != nil { - t.Fatal(err) - } - for k, v := range resp.Servers { - if v.Status == "non-voter" { - return fmt.Errorf("node %q is a non-voter", k) - } + // Lots of tests seem to do this when they deal with a TestRaftServerAddressProvider, it makes the test work rather than error out. + atomic.StoreUint32(&vault.TestingUpdateClusterAddr, 1) + + addressProvider := &testhelpers.TestRaftServerAddressProvider{Cluster: cluster} + testhelpers.EnsureCoreSealed(t, leader) + leader.UnderlyingRawStorage.(*raft.RaftBackend).SetServerAddressProvider(addressProvider) + cluster.UnsealCore(t, leader) + vault.TestWaitActive(t, leader.Core) + + return leader, addressProvider +} + +// waitForCoreUnseal waits until the specified core is unsealed. +// It fails the calling test if the deadline has elapsed and the core is still sealed. +func waitForCoreUnseal(t *testing.T, core *vault.TestClusterCore) { + deadline := time.Now().Add(30 * time.Second) + for time.Now().Before(deadline) { + if !core.Sealed() { + return } - return nil + time.Sleep(time.Second) } - testhelpers.RetryUntil(t, 10*time.Second, errIfNonVotersExist) + t.Fatalf("expected core %v to unseal before deadline but it has not", core.NodeID) +} - // Core0 is the leader, sealing it will both cause an election - and the - // new leader won't have seen any heartbeats initially - and create a "down" - // node that won't be sending heartbeats. - testhelpers.EnsureCoreSealed(t, cluster.Cores[0]) - time.Sleep(30 * time.Second) - client = cluster.Cores[1].Client - errIfNonVotersExist() +// isHealthyAfterStabilization will use the supplied leader core to query the +// health of Raft Autopilot just after the specified deadline. +func isHealthyAfterStabilization(t *testing.T, leaderCore *vault.TestClusterCore, stabilizationTime time.Duration) bool { + timeoutGrace := 2 * time.Second + time.Sleep(stabilizationTime + timeoutGrace) + state, err := leaderCore.Client.Sys().RaftAutopilotState() + require.NoError(t, err) + require.NotNil(t, state) + return state.Healthy } diff --git a/vault/raft.go b/vault/raft.go index 8c68c58491fc..785a44564b4a 100644 --- a/vault/raft.go +++ b/vault/raft.go @@ -1236,6 +1236,11 @@ func (c *Core) raftLeaderInfo(leaderInfo *raft.LeaderJoinInfo, disco *discover.D return ret, nil } +// NewDelegateForCore creates a raft.Delegate for the specified core using its backend. +func NewDelegateForCore(c *Core) *raft.Delegate { + return raft.NewDelegate(c.getRaftBackend()) +} + // getRaftBackend returns the RaftBackend from the HA or physical backend, // in that order of preference, or nil if not of type RaftBackend. func (c *Core) getRaftBackend() *raft.RaftBackend {