From 3d43031fbf972cdea59f7d9c618c2ef8b2daba85 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Thu, 4 May 2017 13:27:33 -0700 Subject: [PATCH 1/2] Updates vendored Raft library. This pulls in https://github.com/hashicorp/raft/pull/207 to get support for the new-style peers.json recovery file. --- vendor/github.com/hashicorp/raft/peersjson.go | 52 +++++++++++++++++++ vendor/vendor.json | 6 +-- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/vendor/github.com/hashicorp/raft/peersjson.go b/vendor/github.com/hashicorp/raft/peersjson.go index c55fdbb43dde..38ca2a8b845a 100644 --- a/vendor/github.com/hashicorp/raft/peersjson.go +++ b/vendor/github.com/hashicorp/raft/peersjson.go @@ -44,3 +44,55 @@ func ReadPeersJSON(path string) (Configuration, error) { } return configuration, nil } + +// configEntry is used when decoding a new-style peers.json. +type configEntry struct { + // ID is the ID of the server (a UUID, usually). + ID ServerID `json:"id"` + + // Address is the host:port of the server. + Address ServerAddress `json:"address"` + + // NonVoter controls the suffrage. We choose this sense so people + // can leave this out and get a Voter by default. + NonVoter bool `json:"non_voter"` +} + +// ReadConfigJSON reads a new-style peers.json and returns a configuration +// structure. This can be used to perform manual recovery when running protocol +// versions that use server IDs. +func ReadConfigJSON(path string) (Configuration, error) { + // Read in the file. + buf, err := ioutil.ReadFile(path) + if err != nil { + return Configuration{}, err + } + + // Parse it as JSON. + var peers []configEntry + dec := json.NewDecoder(bytes.NewReader(buf)) + if err := dec.Decode(&peers); err != nil { + return Configuration{}, err + } + + // Map it into the new-style configuration structure. + var configuration Configuration + for _, peer := range peers { + suffrage := Voter + if peer.NonVoter { + suffrage = Nonvoter + } + server := Server{ + Suffrage: suffrage, + ID: peer.ID, + Address: peer.Address, + } + configuration.Servers = append(configuration.Servers, server) + } + + // We should only ingest valid configurations. + if err := checkConfiguration(configuration); err != nil { + return Configuration{}, err + } + return configuration, nil +} diff --git a/vendor/vendor.json b/vendor/vendor.json index 63874447a076..38d331001170 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -612,10 +612,10 @@ "revisionTime": "2015-11-16T02:03:38Z" }, { - "checksumSHA1": "NvFexY/rs9sPfve+ny/rkMkCL5M=", + "checksumSHA1": "8Na6qG9taUXHDunMYecGxbHbJKE=", "path": "github.com/hashicorp/raft", - "revision": "6b063a18bfe6e0da3fdc2b9bf6256be9c0a4849a", - "revisionTime": "2017-03-16T02:42:32Z", + "revision": "939ebd2103731c2f38c7964d8dd24af0e1b26dc3", + "revisionTime": "2017-05-04T20:16:11Z", "version": "library-v2-stage-one", "versionExact": "library-v2-stage-one" }, From 5d370865069792d40e053cf394fd478311b6c2b2 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Thu, 4 May 2017 14:15:59 -0700 Subject: [PATCH 2/2] Adds support for new-style peers.json recovery for newer Raft protocol versions. --- consul/server.go | 62 +++++++++++++++-- .../source/docs/guides/outage.html.markdown | 67 +++++++++++++++---- 2 files changed, 110 insertions(+), 19 deletions(-) diff --git a/consul/server.go b/consul/server.go index 56df71fc4da2..a879d02afb26 100644 --- a/consul/server.go +++ b/consul/server.go @@ -518,10 +518,17 @@ func (s *Server) setupRaft() error { } } else if _, err := os.Stat(peersFile); err == nil { s.logger.Printf("[INFO] consul: found peers.json file, recovering Raft configuration...") - configuration, err := raft.ReadPeersJSON(peersFile) + + var configuration raft.Configuration + if s.config.RaftConfig.ProtocolVersion < 3 { + configuration, err = raft.ReadPeersJSON(peersFile) + } else { + configuration, err = raft.ReadConfigJSON(peersFile) + } if err != nil { return fmt.Errorf("recovery failed to parse peers.json: %v", err) } + tmpFsm, err := NewFSM(s.tombstoneGC, s.config.LogOutput) if err != nil { return fmt.Errorf("recovery failed to make temp FSM: %v", err) @@ -530,6 +537,7 @@ func (s *Server) setupRaft() error { log, stable, snap, trans, configuration); err != nil { return fmt.Errorf("recovery failed: %v", err) } + if err := os.Remove(peersFile); err != nil { return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err) } @@ -965,10 +973,54 @@ func (s *Server) GetWANCoordinate() (*coordinate.Coordinate, error) { // location. const peersInfoContent = ` As of Consul 0.7.0, the peers.json file is only used for recovery -after an outage. It should be formatted as a JSON array containing the address -and port of each Consul server in the cluster, like this: - -["10.1.0.1:8300","10.1.0.2:8300","10.1.0.3:8300"] +after an outage. The format of this file depends on what the server has +configured for its Raft protocol version. Please see the agent configuration +page at https://www.consul.io/docs/agent/options.html#_raft_protocol for more +details about this parameter. + +For Raft protocol version 2 and earlier, this should be formatted as a JSON +array containing the address and port of each Consul server in the cluster, like +this: + +[ + "10.1.0.1:8300", + "10.1.0.2:8300", + "10.1.0.3:8300" +] + +For Raft protocol version 3 and later, this should be formatted as a JSON +array containing the node ID, address:port, and suffrage information of each +Consul server in the cluster, like this: + +[ + { + "id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e", + "address": "10.1.0.1:8300", + "non_voter": false + }, + { + "id": "8b6dda82-3103-11e7-93ae-92361f002671", + "address": "10.1.0.2:8300", + "non_voter": false + }, + { + "id": "97e17742-3103-11e7-93ae-92361f002671", + "address": "10.1.0.3:8300", + "non_voter": false + } +] + +The "id" field is the node ID of the server. This can be found in the logs when +the server starts up, or in the "node-id" file inside the server's data +directory. + +The "address" field is the address and port of the server. + +The "non_voter" field controls whether the server is a non-voter, which is used +in some advanced Autopilot configurations, please see +https://www.consul.io/docs/guides/autopilot.html for more information. If +"non_voter" is omitted it will default to false, which is typical for most +clusters. Under normal operation, the peers.json file will not be present. diff --git a/website/source/docs/guides/outage.html.markdown b/website/source/docs/guides/outage.html.markdown index 0e16bbe18b61..d5054e330c26 100644 --- a/website/source/docs/guides/outage.html.markdown +++ b/website/source/docs/guides/outage.html.markdown @@ -124,21 +124,60 @@ periodic basis. The next step is to go to the [`-data-dir`](/docs/agent/options.html#_data_dir) of each Consul server. Inside that directory, there will be a `raft/` -sub-directory. We need to create a `raft/peers.json` file. It should look -something like: +sub-directory. We need to create a `raft/peers.json` file. The format of this file +depends on what the server has configured for its +[Raft protocol](/docs/agent/options.html#_raft_protocol) version. -```javascript +For Raft protocol version 2 and earlier, this should be formatted as a JSON +array containing the address and port of each Consul server in the cluster, like +this: + +```json +[ + "10.1.0.1:8300", + "10.1.0.2:8300", + "10.1.0.3:8300" +] +``` + +For Raft protocol version 3 and later, this should be formatted as a JSON +array containing the node ID, address:port, and suffrage information of each +Consul server in the cluster, like this: + +``` [ -"10.0.1.8:8300", -"10.0.1.6:8300", -"10.0.1.7:8300" + { + "id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e", + "address": "10.1.0.1:8300", + "non_voter": false + }, + { + "id": "8b6dda82-3103-11e7-93ae-92361f002671", + "address": "10.1.0.2:8300", + "non_voter": false + }, + { + "id": "97e17742-3103-11e7-93ae-92361f002671", + "address": "10.1.0.3:8300", + "non_voter": false + } ] ``` -Simply create entries for all remaining servers. You must confirm -that servers you do not include here have indeed failed and will not later -rejoin the cluster. Ensure that this file is the same across all remaining -server nodes. +- `id` `(string: )` - Specifies the [node ID](/docs/agent/options.html#_node_id) + of the server. This can be found in the logs when the server starts up if it was auto-generated, + and it can also be found inside the `node-id` file in the server's data directory. + +- `address` `(string: )` - Specifies the IP and port of the server. The port is the + server's RPC port used for cluster communications. + +- `non_voter` `(bool: )` - This controls whether the server is a non-voter, which is used + in some advanced [Autopilot](/docs/guides/autopilot.html) configurations. If omitted, it will + default to false, which is typical for most clusters. + +Simply create entries for all servers. You must confirm that servers you do not include here have +indeed failed and will not later rejoin the cluster. Ensure that this file is the same across all +remaining server nodes. At this point, you can restart all the remaining servers. In Consul 0.7 and later you will see them ingest recovery file: @@ -177,8 +216,8 @@ command to inspect the Raft configuration: ``` $ consul operator raft -list-peers -Node ID Address State Voter -alice 10.0.1.8:8300 10.0.1.8:8300 follower true -bob 10.0.1.6:8300 10.0.1.6:8300 leader true -carol 10.0.1.7:8300 10.0.1.7:8300 follower true +Node ID Address State Voter RaftProtocol +alice 10.0.1.8:8300 10.0.1.8:8300 follower true 2 +bob 10.0.1.6:8300 10.0.1.6:8300 leader true 2 +carol 10.0.1.7:8300 10.0.1.7:8300 follower true 2 ```