From 7b749579a20d98712e2d36ed32bced49a0194892 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Mon, 20 Nov 2017 10:38:46 -0500 Subject: [PATCH 01/24] add ability to upgrade/downgrade nomad agents tls configurations via sighup --- client/client.go | 20 ++++ client/client_test.go | 105 +++++++++++++++++++ command/agent/agent.go | 76 ++++++++++---- command/agent/agent_test.go | 194 ++++++++++++++++++++++++++++++++++++ command/agent/command.go | 45 ++++++++- command/agent/http.go | 5 +- command/agent/http_test.go | 71 +++++++++++++ nomad/pool.go | 15 ++- nomad/raft_rpc.go | 18 +++- nomad/rpc.go | 30 ++++-- nomad/server.go | 74 ++++++++++++-- nomad/server_test.go | 99 ++++++++++++++++++ nomad/structs/config/tls.go | 33 ++++++ 13 files changed, 744 insertions(+), 41 deletions(-) diff --git a/client/client.go b/client/client.go index 1dca496ed921..6a647c2bf2c8 100644 --- a/client/client.go +++ b/client/client.go @@ -30,6 +30,7 @@ import ( "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad" "github.com/hashicorp/nomad/nomad/structs" + nconfig "github.com/hashicorp/nomad/nomad/structs/config" vaultapi "github.com/hashicorp/vault/api" "github.com/mitchellh/hashstructure" "github.com/shirou/gopsutil/host" @@ -364,6 +365,25 @@ func (c *Client) init() error { return nil } +// ReloadTLSConnectoins allows a client to reload RPC connections if the +// client's TLS configuration changes from plaintext to TLS +func (c *Client) ReloadTLSConnections(newConfig *nconfig.TLSConfig) error { + c.configLock.Lock() + defer c.configLock.Unlock() + + c.config.TLSConfig = newConfig + + if c.config.TLSConfig.EnableRPC { + tw, err := c.config.TLSConfiguration().OutgoingTLSWrapper() + if err != nil { + return err + } + c.connPool.ReloadTLS(tw) + } + + return nil +} + // Leave is used to prepare the client to leave the cluster func (c *Client) Leave() error { // TODO diff --git a/client/client_test.go b/client/client_test.go index 3492557f7f9f..4550ebcdc30a 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -1001,3 +1001,108 @@ func TestClient_ValidateMigrateToken_ACLDisabled(t *testing.T) { assert.Equal(c.ValidateMigrateToken("", ""), true) } + +func TestClient_ReloadTLS_UpgradePlaintextToTLS(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + s1, addr := testServer(t, func(c *nomad.Config) { + c.Region = "dc1" + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + const ( + cafile = "../helper/tlsutil/testdata/ca.pem" + foocert = "../helper/tlsutil/testdata/nomad-foo.pem" + fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem" + ) + + c1 := testClient(t, func(c *config.Config) { + c.Servers = []string{addr} + }) + defer c1.Shutdown() + + newConfig := &nconfig.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + } + + err := c1.ReloadTLSConnections(newConfig) + assert.Nil(err) + + req := structs.NodeSpecificRequest{ + NodeID: c1.Node().ID, + QueryOptions: structs.QueryOptions{Region: "dc1"}, + } + var out structs.SingleNodeResponse + testutil.AssertUntil(100*time.Millisecond, + func() (bool, error) { + err := c1.RPC("Node.GetNode", &req, &out) + if err == nil { + return false, fmt.Errorf("client RPC succeeded when it should have failed:\n%+v", err) + } + return true, nil + }, + func(err error) { + t.Fatalf(err.Error()) + }, + ) +} + +func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + s1, addr := testServer(t, func(c *nomad.Config) { + c.Region = "dc1" + }) + defer s1.Shutdown() + testutil.WaitForLeader(t, s1.RPC) + + const ( + cafile = "../helper/tlsutil/testdata/ca.pem" + foocert = "../helper/tlsutil/testdata/nomad-foo.pem" + fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem" + ) + + c1 := testClient(t, func(c *config.Config) { + c.Servers = []string{addr} + c.TLSConfig = &nconfig.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + } + }) + defer c1.Shutdown() + + newConfig := &nconfig.TLSConfig{} + + err := c1.ReloadTLSConnections(newConfig) + assert.Nil(err) + + req := structs.NodeSpecificRequest{ + NodeID: c1.Node().ID, + QueryOptions: structs.QueryOptions{Region: "dc1"}, + } + var out structs.SingleNodeResponse + testutil.AssertUntil(100*time.Millisecond, + func() (bool, error) { + err := c1.RPC("Node.GetNode", &req, &out) + if err != nil { + return false, fmt.Errorf("client RPC succeeded when it should have failed:\n%+v", err) + } + return true, nil + }, + func(err error) { + t.Fatalf(err.Error()) + }, + ) +} diff --git a/command/agent/agent.go b/command/agent/agent.go index de05400fa3d5..788085a50424 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -730,29 +730,71 @@ func (a *Agent) Stats() map[string]map[string]string { return stats } +// ShouldReload determines if we should reload the configuration and agent +// connections. If the TLS Configuration has not changed, we shouldn't reload. +func (a *Agent) ShouldReload(newConfig *Config) (bool, func(*Config) error) { + if a.config.TLSConfig.Equals(newConfig.TLSConfig) { + return false, nil + } + + return true, a.Reload +} + // Reload handles configuration changes for the agent. Provides a method that // is easier to unit test, as this action is invoked via SIGHUP. func (a *Agent) Reload(newConfig *Config) error { a.configLock.Lock() defer a.configLock.Unlock() - if newConfig.TLSConfig != nil { - - // TODO(chelseakomlo) In a later PR, we will introduce the ability to reload - // TLS configuration if the agent is not running with TLS enabled. - if a.config.TLSConfig != nil { - // Reload the certificates on the keyloader and on success store the - // updated TLS config. It is important to reuse the same keyloader - // as this allows us to dynamically reload configurations not only - // on the Agent but on the Server and Client too (they are - // referencing the same keyloader). - keyloader := a.config.TLSConfig.GetKeyLoader() - _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) - if err != nil { - return err - } - a.config.TLSConfig = newConfig.TLSConfig - a.config.TLSConfig.KeyLoader = keyloader + if newConfig == nil || newConfig.TLSConfig == nil { + return fmt.Errorf("cannot reload agent with nil configuration") + } + + // This is just a TLS configuration reload, we don't need to refresh + // existing network connections + if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { + + // Reload the certificates on the keyloader and on success store the + // updated TLS config. It is important to reuse the same keyloader + // as this allows us to dynamically reload configurations not only + // on the Agent but on the Server and Client too (they are + // referencing the same keyloader). + keyloader := a.config.TLSConfig.GetKeyLoader() + _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) + if err != nil { + return err + } + a.config.TLSConfig = newConfig.TLSConfig + a.config.TLSConfig.KeyLoader = keyloader + return nil + } + + // Completely reload the agent's TLS configuration (moving from non-TLS to + // TLS, or vice versa) + // This does not handle errors in loading the new TLS configuration + a.config.TLSConfig = newConfig.TLSConfig.Copy() + + if newConfig.TLSConfig.IsEmpty() { + a.logger.Println("[WARN] Downgrading agent's existing TLS configuration to plaintext") + } else { + a.logger.Println("[INFO] Upgrading from plaintext configuration to TLS") + } + + // Reload the TLS configuration for the client or server, depending on how + // the agent is configured to run. + if s := a.Server(); s != nil { + err := s.ReloadTLSConnections(a.config.TLSConfig) + if err != nil { + a.logger.Printf("[WARN] agent: Issue reloading the server's TLS Configuration, consider a full system restart: %v", err.Error()) + return err + } + } + if c := a.Client(); c != nil { + + err := c.ReloadTLSConnections(a.config.TLSConfig) + if err != nil { + a.logger.Printf("[ERR] agent: Issue reloading the client's TLS Configuration, consider a full system restart: %v", err.Error()) + return err } } diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index 5dc8d8ce737e..023060401c9f 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -746,3 +746,197 @@ func Test_GetConfig(t *testing.T) { actualAgentConfig := agent.GetConfig() assert.Equal(actualAgentConfig, agentConfig) } + +func TestServer_Reload_TLS_WithNilConfiguration(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + logger := log.New(ioutil.Discard, "", 0) + + agent := &Agent{ + logger: logger, + config: &Config{}, + } + + err := agent.Reload(nil) + assert.NotNil(err) + assert.Equal(err.Error(), "cannot reload agent with nil configuration") +} + +func TestServer_Reload_TLS_UpgradeToTLS(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + const ( + cafile = "../../helper/tlsutil/testdata/ca.pem" + foocert = "../../helper/tlsutil/testdata/nomad-foo.pem" + fookey = "../../helper/tlsutil/testdata/nomad-foo-key.pem" + ) + dir := tmpDir(t) + defer os.RemoveAll(dir) + + logger := log.New(ioutil.Discard, "", 0) + + agentConfig := &Config{ + TLSConfig: &sconfig.TLSConfig{}, + } + + agent := &Agent{ + logger: logger, + config: agentConfig, + } + + newConfig := &Config{ + TLSConfig: &sconfig.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + }, + } + + err := agent.Reload(newConfig) + assert.Nil(err) + + assert.Equal(agent.config.TLSConfig.CAFile, newConfig.TLSConfig.CAFile) + assert.Equal(agent.config.TLSConfig.CertFile, newConfig.TLSConfig.CertFile) + assert.Equal(agent.config.TLSConfig.KeyFile, newConfig.TLSConfig.KeyFile) +} + +func TestServer_Reload_TLS_DowngradeFromTLS(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + const ( + cafile = "../../helper/tlsutil/testdata/ca.pem" + foocert = "../../helper/tlsutil/testdata/nomad-foo.pem" + fookey = "../../helper/tlsutil/testdata/nomad-foo-key.pem" + ) + dir := tmpDir(t) + defer os.RemoveAll(dir) + + logger := log.New(ioutil.Discard, "", 0) + + agentConfig := &Config{ + TLSConfig: &sconfig.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + }, + } + + agent := &Agent{ + logger: logger, + config: agentConfig, + } + + newConfig := &Config{ + TLSConfig: &sconfig.TLSConfig{}, + } + + assert.False(agentConfig.TLSConfig.IsEmpty()) + + err := agent.Reload(newConfig) + assert.Nil(err) + + assert.True(agentConfig.TLSConfig.IsEmpty()) +} + +func TestServer_ShouldReload_ReturnFalseForNoChanges(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + const ( + cafile = "../../helper/tlsutil/testdata/ca.pem" + foocert = "../../helper/tlsutil/testdata/nomad-foo.pem" + fookey = "../../helper/tlsutil/testdata/nomad-foo-key.pem" + ) + dir := tmpDir(t) + defer os.RemoveAll(dir) + + logger := log.New(ioutil.Discard, "", 0) + + agentConfig := &Config{ + TLSConfig: &sconfig.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + }, + } + + sameAgentConfig := &Config{ + TLSConfig: &sconfig.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + }, + } + + agent := &Agent{ + logger: logger, + config: agentConfig, + } + + shouldReload, reloadFunc := agent.ShouldReload(sameAgentConfig) + assert.False(shouldReload) + assert.Nil(reloadFunc) +} + +func TestServer_ShouldReload_ReturnTrueForConfigChanges(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + const ( + cafile = "../../helper/tlsutil/testdata/ca.pem" + foocert = "../../helper/tlsutil/testdata/nomad-foo.pem" + fookey = "../../helper/tlsutil/testdata/nomad-foo-key.pem" + foocert2 = "any_cert_path" + fookey2 = "any_key_path" + ) + dir := tmpDir(t) + defer os.RemoveAll(dir) + + logger := log.New(ioutil.Discard, "", 0) + + agentConfig := &Config{ + TLSConfig: &sconfig.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + }, + } + + newConfig := &Config{ + TLSConfig: &sconfig.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert2, + KeyFile: fookey2, + }, + } + + agent := &Agent{ + logger: logger, + config: agentConfig, + } + + shouldReload, reloadFunc := agent.ShouldReload(newConfig) + assert.True(shouldReload) + assert.NotNil(reloadFunc) +} diff --git a/command/agent/command.go b/command/agent/command.go index c1be2286e050..78d7c838b51a 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -598,6 +598,24 @@ WAIT: } } +func (c *Command) reloadHTTPServerOnConfigChange(newConfig *Config) error { + c.agent.logger.Println("[INFO] agent: Reloading HTTP server with new TLS configuration") + err := c.httpServer.Shutdown() + if err != nil { + return err + } + + // Wait some time to ensure a clean shutdown + time.Sleep(5 * time.Second) + http, err := NewHTTPServer(c.agent, c.agent.config) + if err != nil { + return err + } + c.httpServer = http + + return nil +} + // handleReload is invoked when we should reload our configs, e.g. SIGHUP func (c *Command) handleReload() { c.Ui.Output("Reloading configuration...") @@ -620,10 +638,29 @@ func (c *Command) handleReload() { newConf.LogLevel = c.agent.GetConfig().LogLevel } - // Reloads configuration for an agent running in both client and server mode - err := c.agent.Reload(newConf) - if err != nil { - c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) + shouldReload, reloadFunc := c.agent.ShouldReload(newConf) + if shouldReload && reloadFunc != nil { + // Reloads configuration for an agent running in both client and server mode + err := reloadFunc(newConf) + if err != nil { + c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) + } + + err = c.httpServer.Shutdown() + if err != nil { + c.agent.logger.Printf("[ERR] agent: failed to stop HTTP server: %v", err) + return + } + + // Wait some time to ensure a clean shutdown + time.Sleep(5 * time.Second) + http, err := NewHTTPServer(c.agent, c.agent.config) + if err != nil { + c.agent.logger.Printf("[ERR] agent: failed to reload http server: %v", err) + return + } + c.agent.logger.Println("[INFO] agent: successfully restarted the HTTP server") + c.httpServer = http } if s := c.agent.Server(); s != nil { diff --git a/command/agent/http.go b/command/agent/http.go index 4146cffd4f62..2c8fa27ff386 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -126,11 +126,12 @@ func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) { } // Shutdown is used to shutdown the HTTP server -func (s *HTTPServer) Shutdown() { +func (s *HTTPServer) Shutdown() error { if s != nil { s.logger.Printf("[DEBUG] http: Shutting down http server") - s.listener.Close() + return s.listener.Close() } + return nil } // registerHandlers is used to attach our handlers to the mux diff --git a/command/agent/http_test.go b/command/agent/http_test.go index 5d4004c18e13..b145d43571f9 100644 --- a/command/agent/http_test.go +++ b/command/agent/http_test.go @@ -673,3 +673,74 @@ func encodeReq(obj interface{}) io.ReadCloser { enc.Encode(obj) return ioutil.NopCloser(buf) } + +func TestHTTP_VerifyHTTPSClientUpgrade_AfterConfigReload(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + const ( + cafile = "../../helper/tlsutil/testdata/ca.pem" + foocert = "../../helper/tlsutil/testdata/nomad-foo.pem" + fookey = "../../helper/tlsutil/testdata/nomad-foo-key.pem" + ) + + newConfig := &Config{ + TLSConfig: &config.TLSConfig{ + EnableHTTP: true, + VerifyHTTPSClient: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + }, + } + + s := makeHTTPServer(t, func(c *Config) { + c.TLSConfig = newConfig.TLSConfig + }) + defer s.Shutdown() + + // HTTP plaintext request should succeed + reqURL := fmt.Sprintf("http://%s/v1/agent/self", s.Agent.config.AdvertiseAddrs.HTTP) + + // First test with a plaintext request + transport := &http.Transport{} + client := &http.Client{Transport: transport} + _, err := http.NewRequest("GET", reqURL, nil) + assert.Nil(err) + + // Next, reload the TLS configuration + err = s.Agent.Reload(newConfig) + assert.Nil(err) + + // PASS: Requests that specify a valid hostname, CA cert, and client + // certificate succeed. + tlsConf := &tls.Config{ + ServerName: "client.regionFoo.nomad", + RootCAs: x509.NewCertPool(), + GetClientCertificate: func(*tls.CertificateRequestInfo) (*tls.Certificate, error) { + c, err := tls.LoadX509KeyPair(foocert, fookey) + if err != nil { + return nil, err + } + return &c, nil + }, + } + + // HTTPS request should succeed + httpsReqURL := fmt.Sprintf("https://%s/v1/agent/self", s.Agent.config.AdvertiseAddrs.HTTP) + + cacertBytes, err := ioutil.ReadFile(cafile) + assert.Nil(err) + tlsConf.RootCAs.AppendCertsFromPEM(cacertBytes) + + transport = &http.Transport{TLSClientConfig: tlsConf} + client = &http.Client{Transport: transport} + req, err := http.NewRequest("GET", httpsReqURL, nil) + assert.Nil(err) + + resp, err := client.Do(req) + assert.Nil(err) + + resp.Body.Close() + assert.Equal(resp.StatusCode, 200) +} diff --git a/nomad/pool.go b/nomad/pool.go index 320e7a3200d4..017621c99e83 100644 --- a/nomad/pool.go +++ b/nomad/pool.go @@ -10,7 +10,7 @@ import ( "sync/atomic" "time" - "github.com/hashicorp/net-rpc-msgpackrpc" + msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/helper/tlsutil" "github.com/hashicorp/yamux" ) @@ -175,6 +175,19 @@ func (p *ConnPool) Shutdown() error { return nil } +// ReloadTLS reloads TLS configuration on the fly +func (p *ConnPool) ReloadTLS(tlsWrap tlsutil.RegionWrapper) { + p.Lock() + defer p.Unlock() + + oldPool := p.pool + for _, conn := range oldPool { + conn.Close() + } + p.pool = make(map[string]*Conn) + p.tlsWrap = tlsWrap +} + // Acquire is used to get a connection that is // pooled or to return a new connection func (p *ConnPool) acquire(region string, addr net.Addr, version int) (*Conn, error) { diff --git a/nomad/raft_rpc.go b/nomad/raft_rpc.go index 31ac4d0a7d83..e769b9998635 100644 --- a/nomad/raft_rpc.go +++ b/nomad/raft_rpc.go @@ -1,6 +1,7 @@ package nomad import ( + "context" "fmt" "net" "sync" @@ -43,12 +44,14 @@ func NewRaftLayer(addr net.Addr, tlsWrap tlsutil.Wrapper) *RaftLayer { // Handoff is used to hand off a connection to the // RaftLayer. This allows it to be Accept()'ed -func (l *RaftLayer) Handoff(c net.Conn) error { +func (l *RaftLayer) Handoff(c net.Conn, ctx context.Context) error { select { case l.connCh <- c: return nil case <-l.closeCh: return fmt.Errorf("Raft RPC layer closed") + case <-ctx.Done(): + return fmt.Errorf("[INFO] nomad.rpc: Closing server RPC connection") } } @@ -110,3 +113,16 @@ func (l *RaftLayer) Dial(address raft.ServerAddress, timeout time.Duration) (net } return conn, err } + +// ReloadTLS will re-initialize the TLS wrapper on the fly +func (l *RaftLayer) ReloadTLS(tlsWrap tlsutil.Wrapper) { + l.closeLock.Lock() + defer l.closeLock.Unlock() + + if !l.closed { + close(l.closeCh) + } + + l.tlsWrap = tlsWrap + l.closeCh = make(chan struct{}) +} diff --git a/nomad/rpc.go b/nomad/rpc.go index 828ee0c94c0a..8deedd19416f 100644 --- a/nomad/rpc.go +++ b/nomad/rpc.go @@ -68,8 +68,15 @@ func NewServerCodec(conn io.ReadWriteCloser) rpc.ServerCodec { } // listen is used to listen for incoming RPC connections -func (s *Server) listen() { +func (s *Server) listen(ctx context.Context) { for { + select { + case <-ctx.Done(): + s.logger.Println("[INFO] nomad.rpc: Closing server RPC connection") + return + default: + } + // Accept a connection conn, err := s.rpcListener.Accept() if err != nil { @@ -80,14 +87,14 @@ func (s *Server) listen() { continue } - go s.handleConn(conn, false) + go s.handleConn(conn, false, ctx) metrics.IncrCounter([]string{"nomad", "rpc", "accept_conn"}, 1) } } // handleConn is used to determine if this is a Raft or // Nomad type RPC connection and invoke the correct handler -func (s *Server) handleConn(conn net.Conn, isTLS bool) { +func (s *Server) handleConn(conn net.Conn, isTLS bool, ctx context.Context) { // Read a single byte buf := make([]byte, 1) if _, err := conn.Read(buf); err != nil { @@ -110,14 +117,14 @@ func (s *Server) handleConn(conn net.Conn, isTLS bool) { // Switch on the byte switch RPCType(buf[0]) { case rpcNomad: - s.handleNomadConn(conn) + s.handleNomadConn(conn, ctx) case rpcRaft: metrics.IncrCounter([]string{"nomad", "rpc", "raft_handoff"}, 1) - s.raftLayer.Handoff(conn) + s.raftLayer.Handoff(conn, ctx) case rpcMultiplex: - s.handleMultiplex(conn) + s.handleMultiplex(conn, ctx) case rpcTLS: if s.rpcTLS == nil { @@ -126,7 +133,7 @@ func (s *Server) handleConn(conn net.Conn, isTLS bool) { return } conn = tls.Server(conn, s.rpcTLS) - s.handleConn(conn, true) + s.handleConn(conn, true, ctx) default: s.logger.Printf("[ERR] nomad.rpc: unrecognized RPC byte: %v", buf[0]) @@ -137,7 +144,7 @@ func (s *Server) handleConn(conn net.Conn, isTLS bool) { // handleMultiplex is used to multiplex a single incoming connection // using the Yamux multiplexer -func (s *Server) handleMultiplex(conn net.Conn) { +func (s *Server) handleMultiplex(conn net.Conn, ctx context.Context) { defer conn.Close() conf := yamux.DefaultConfig() conf.LogOutput = s.config.LogOutput @@ -150,16 +157,19 @@ func (s *Server) handleMultiplex(conn net.Conn) { } return } - go s.handleNomadConn(sub) + go s.handleNomadConn(sub, ctx) } } // handleNomadConn is used to service a single Nomad RPC connection -func (s *Server) handleNomadConn(conn net.Conn) { +func (s *Server) handleNomadConn(conn net.Conn, ctx context.Context) { defer conn.Close() rpcCodec := NewServerCodec(conn) for { select { + case <-ctx.Done(): + s.logger.Println("[INFO] nomad.rpc: Closing server RPC connection") + return case <-s.shutdownCh: return default: diff --git a/nomad/server.go b/nomad/server.go index 7648c74360f3..dad73148a4fc 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -1,6 +1,7 @@ package nomad import ( + "context" "crypto/tls" "errors" "fmt" @@ -26,6 +27,7 @@ import ( "github.com/hashicorp/nomad/nomad/deploymentwatcher" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/raft" raftboltdb "github.com/hashicorp/raft-boltdb" "github.com/hashicorp/serf/serf" @@ -109,7 +111,8 @@ type Server struct { rpcAdvertise net.Addr // rpcTLS is the TLS config for incoming TLS requests - rpcTLS *tls.Config + rpcTLS *tls.Config + rpcCancel context.CancelFunc // peers is used to track the known Nomad servers. This is // used for region forwarding and clustering. @@ -329,7 +332,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg go s.serfEventHandler() // Start the RPC listeners - go s.listen() + ctx, cancel := context.WithCancel(context.Background()) + s.rpcCancel = cancel + go s.listen(ctx) // Emit metrics for the eval broker go evalBroker.EmitStats(time.Second, s.shutdownCh) @@ -353,6 +358,62 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg return s, nil } +// ReloadTLSConnections updates a server's TLS configuration and reloads RPC +// connections +func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { + s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes") + + s.config.TLSConfig = newTLSConfig + + var tlsWrap tlsutil.RegionWrapper + var incomingTLS *tls.Config + if s.config.TLSConfig.EnableRPC { + tlsConf := s.config.tlsConfig() + tw, err := tlsConf.OutgoingTLSWrapper() + if err != nil { + return err + } + tlsWrap = tw + + itls, err := tlsConf.IncomingTLSConfig() + if err != nil { + return err + } + incomingTLS = itls + } + + if s.rpcCancel == nil { + s.logger.Printf("[ERR] nomad: No TLS Context to reset") + return fmt.Errorf("Unable to reset tls context") + } + + s.rpcTLS = incomingTLS + + s.rpcCancel() + s.connPool.ReloadTLS(tlsWrap) + + // reinitialize our rpc listener + s.rpcListener.Close() + time.Sleep(500 * time.Millisecond) + list, err := net.ListenTCP("tcp", s.config.RPCAddr) + if err != nil || list == nil { + s.logger.Printf("[ERR] nomad: No TLS listener to reload") + return err + } + s.rpcListener = list + + // reinitialize the cancel context + ctx, cancel := context.WithCancel(context.Background()) + s.rpcCancel = cancel + go s.listen(ctx) + + wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) + s.raftLayer.ReloadTLS(wrapper) + + s.logger.Printf("[INFO] nomad: finished reloading server connections") + return nil +} + // Shutdown is used to shutdown the server func (s *Server) Shutdown() error { s.logger.Printf("[INFO] nomad: shutting down server") @@ -497,9 +558,10 @@ func (s *Server) Leave() error { return nil } -// Reload handles a config reload. Not all config fields can handle a reload. -func (s *Server) Reload(config *Config) error { - if config == nil { +// Reload handles a config reload specific to server-only configuration. Not +// all config fields can handle a reload. +func (s *Server) Reload(newConfig *Config) error { + if newConfig == nil { return fmt.Errorf("Reload given a nil config") } @@ -507,7 +569,7 @@ func (s *Server) Reload(config *Config) error { // Handle the Vault reload. Vault should never be nil but just guard. if s.vault != nil { - if err := s.vault.SetConfig(config.VaultConfig); err != nil { + if err := s.vault.SetConfig(newConfig.VaultConfig); err != nil { multierror.Append(&mErr, err) } } diff --git a/nomad/server_test.go b/nomad/server_test.go index 04175a2900ac..bc7eee0a1a42 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -13,12 +13,14 @@ import ( "time" "github.com/hashicorp/consul/lib/freeport" + msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/testutil" + "github.com/stretchr/testify/assert" ) var ( @@ -276,3 +278,100 @@ func TestServer_Reload_Vault(t *testing.T) { t.Fatalf("Vault client should be running") } } + +// Tests that the server will successfully reload its network connections, +// upgrading from plaintext to TLS if the server's TLS configuration changes. +func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + const ( + cafile = "../helper/tlsutil/testdata/ca.pem" + foocert = "../helper/tlsutil/testdata/nomad-foo.pem" + fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem" + ) + dir := tmpDir(t) + defer os.RemoveAll(dir) + s1 := testServer(t, func(c *Config) { + c.DataDir = path.Join(dir, "nodeA") + }) + defer s1.Shutdown() + + // assert that the server started in plaintext mode + assert.Equal(s1.config.TLSConfig.CertFile, "") + + newTLSConfig := &config.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + } + + err := s1.ReloadTLSConnections(newTLSConfig) + assert.Nil(err) + + assert.True(s1.config.TLSConfig.Equals(newTLSConfig)) + + time.Sleep(10 * time.Second) + codec := rpcClient(t, s1) + + node := mock.Node() + req := &structs.NodeRegisterRequest{ + Node: node, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + var resp structs.GenericResponse + err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) + assert.NotNil(err) +} + +// Tests that the server will successfully reload its network connections, +// downgrading from TLS to plaintext if the server's TLS configuration changes. +func TestServer_Reload_TLSConnections_TLSToPlaintext(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + const ( + cafile = "../helper/tlsutil/testdata/ca.pem" + foocert = "../helper/tlsutil/testdata/nomad-foo.pem" + fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem" + ) + + dir := tmpDir(t) + defer os.RemoveAll(dir) + s1 := testServer(t, func(c *Config) { + c.DataDir = path.Join(dir, "nodeB") + c.TLSConfig = &config.TLSConfig{ + EnableHTTP: true, + EnableRPC: true, + VerifyServerHostname: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + } + }) + defer s1.Shutdown() + + newTLSConfig := &config.TLSConfig{} + + err := s1.ReloadTLSConnections(newTLSConfig) + assert.Nil(err) + assert.True(s1.config.TLSConfig.Equals(newTLSConfig)) + + time.Sleep(10 * time.Second) + + codec := rpcClient(t, s1) + + node := mock.Node() + req := &structs.NodeRegisterRequest{ + Node: node, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + + var resp structs.GenericResponse + err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) + assert.Nil(err) +} diff --git a/nomad/structs/config/tls.go b/nomad/structs/config/tls.go index 65178168523a..c93946e34c85 100644 --- a/nomad/structs/config/tls.go +++ b/nomad/structs/config/tls.go @@ -141,6 +141,20 @@ func (t *TLSConfig) Copy() *TLSConfig { return new } +func (t *TLSConfig) IsEmpty() bool { + if t == nil { + return true + } + + return t.EnableHTTP == false && + t.EnableRPC == false && + t.VerifyServerHostname == false && + t.CAFile == "" && + t.CertFile == "" && + t.KeyFile == "" && + t.VerifyHTTPSClient == false +} + // Merge is used to merge two TLS configs together func (t *TLSConfig) Merge(b *TLSConfig) *TLSConfig { result := t.Copy() @@ -171,3 +185,22 @@ func (t *TLSConfig) Merge(b *TLSConfig) *TLSConfig { } return result } + +// Equals compares the fields of two TLS configuration objects, returning a +// boolean indicating if they are the same. +func (t *TLSConfig) Equals(newConfig *TLSConfig) bool { + if t == nil && newConfig == nil { + return true + } + + if t != nil && newConfig == nil { + return false + } + + return t.EnableRPC == newConfig.EnableRPC && + t.CAFile == newConfig.CAFile && + t.CertFile == newConfig.CertFile && + t.KeyFile == newConfig.KeyFile && + t.RPCUpgradeMode == newConfig.RPCUpgradeMode && + t.VerifyHTTPSClient == newConfig.VerifyHTTPSClient +} From b1f87727dd95be6fe4fdbef07f066a8ff159162d Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 21 Nov 2017 13:21:29 -0500 Subject: [PATCH 02/24] fix up downgrading client to plaintext add locks around changing server configuration --- client/client.go | 7 +++++-- client/client_test.go | 2 +- nomad/server.go | 7 +++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/client/client.go b/client/client.go index 6a647c2bf2c8..9346a60c7018 100644 --- a/client/client.go +++ b/client/client.go @@ -369,9 +369,8 @@ func (c *Client) init() error { // client's TLS configuration changes from plaintext to TLS func (c *Client) ReloadTLSConnections(newConfig *nconfig.TLSConfig) error { c.configLock.Lock() - defer c.configLock.Unlock() - c.config.TLSConfig = newConfig + c.configLock.Unlock() if c.config.TLSConfig.EnableRPC { tw, err := c.config.TLSConfiguration().OutgoingTLSWrapper() @@ -379,8 +378,12 @@ func (c *Client) ReloadTLSConnections(newConfig *nconfig.TLSConfig) error { return err } c.connPool.ReloadTLS(tw) + } else { + c.connPool.ReloadTLS(nil) } + time.Sleep(3 * time.Second) + return nil } diff --git a/client/client_test.go b/client/client_test.go index 4550ebcdc30a..92b2995955cd 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -1097,7 +1097,7 @@ func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) { func() (bool, error) { err := c1.RPC("Node.GetNode", &req, &out) if err != nil { - return false, fmt.Errorf("client RPC succeeded when it should have failed:\n%+v", err) + return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err) } return true, nil }, diff --git a/nomad/server.go b/nomad/server.go index dad73148a4fc..0905612421ce 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -84,8 +84,9 @@ const ( // Server is Nomad server which manages the job queues, // schedulers, and notification bus for agents. type Server struct { - config *Config - logger *log.Logger + config *Config + configLock sync.RWMutex + logger *log.Logger // Connection pool to other Nomad servers connPool *ConnPool @@ -363,7 +364,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes") + s.configLock.Lock() s.config.TLSConfig = newTLSConfig + s.configLock.Unlock() var tlsWrap tlsutil.RegionWrapper var incomingTLS *tls.Config From 156297c5ee77d73b47a476b0933944e646eaabfa Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 21 Nov 2017 14:45:50 -0500 Subject: [PATCH 03/24] close raft long-lived connections --- nomad/raft_rpc.go | 2 +- nomad/server.go | 8 +++++--- vendor/github.com/hashicorp/raft/net_transport.go | 13 +++++++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/nomad/raft_rpc.go b/nomad/raft_rpc.go index e769b9998635..5c5508f167de 100644 --- a/nomad/raft_rpc.go +++ b/nomad/raft_rpc.go @@ -51,7 +51,7 @@ func (l *RaftLayer) Handoff(c net.Conn, ctx context.Context) error { case <-l.closeCh: return fmt.Errorf("Raft RPC layer closed") case <-ctx.Done(): - return fmt.Errorf("[INFO] nomad.rpc: Closing server RPC connection") + return fmt.Errorf("[INFO] nomad.rpc: Closing raft RPC connection") } } diff --git a/nomad/server.go b/nomad/server.go index 0905612421ce..a9dbef0a13bf 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -405,14 +405,16 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { } s.rpcListener = list + wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) + s.raftLayer.ReloadTLS(wrapper) + s.raftTransport.Reload() + time.Sleep(500 * time.Millisecond) + // reinitialize the cancel context ctx, cancel := context.WithCancel(context.Background()) s.rpcCancel = cancel go s.listen(ctx) - wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) - s.raftLayer.ReloadTLS(wrapper) - s.logger.Printf("[INFO] nomad: finished reloading server connections") return nil } diff --git a/vendor/github.com/hashicorp/raft/net_transport.go b/vendor/github.com/hashicorp/raft/net_transport.go index 7c55ac5371ff..c4d81016dab6 100644 --- a/vendor/github.com/hashicorp/raft/net_transport.go +++ b/vendor/github.com/hashicorp/raft/net_transport.go @@ -177,6 +177,19 @@ func (n *NetworkTransport) Close() error { return nil } +func (n *NetworkTransport) Reload() { + n.shutdownLock.Lock() + defer n.shutdownLock.Unlock() + + if !n.shutdown { + close(n.shutdownCh) + n.shutdown = true + } + + time.Sleep(3 * time.Second) + n.shutdownCh = make(chan struct{}) +} + // Consumer implements the Transport interface. func (n *NetworkTransport) Consumer() <-chan RPC { return n.consumeCh From d4754d541bca532d192cc47dbff0b06cf906e5b0 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Mon, 27 Nov 2017 11:42:52 -0500 Subject: [PATCH 04/24] fixups from code review Revert "close raft long-lived connections" This reverts commit 3ffda28206fcb3d63ad117fd1d27ae6f832b6625. reload raft connections on changing tls --- client/client.go | 2 +- command/agent/command.go | 17 ++++++----------- nomad/raft_rpc.go | 4 +++- nomad/server.go | 19 +++++++++++++------ .../hashicorp/raft/net_transport.go | 13 ------------- 5 files changed, 23 insertions(+), 32 deletions(-) diff --git a/client/client.go b/client/client.go index 9346a60c7018..eb87e34843f6 100644 --- a/client/client.go +++ b/client/client.go @@ -365,7 +365,7 @@ func (c *Client) init() error { return nil } -// ReloadTLSConnectoins allows a client to reload RPC connections if the +// ReloadTLSConnections allows a client to reload RPC connections if the // client's TLS configuration changes from plaintext to TLS func (c *Client) ReloadTLSConnections(newConfig *nconfig.TLSConfig) error { c.configLock.Lock() diff --git a/command/agent/command.go b/command/agent/command.go index 78d7c838b51a..6928ee677431 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -12,6 +12,7 @@ import ( "sort" "strconv" "strings" + "sync" "syscall" "time" @@ -46,6 +47,7 @@ type Command struct { args []string agent *Agent httpServer *HTTPServer + httpServerLock sync.Mutex logFilter *logutils.LevelFilter logOutput io.Writer retryJoinErrCh chan struct{} @@ -611,6 +613,8 @@ func (c *Command) reloadHTTPServerOnConfigChange(newConfig *Config) error { if err != nil { return err } + c.httpServerLock.Lock() + defer c.httpServerLock.Unlock() c.httpServer = http return nil @@ -644,23 +648,14 @@ func (c *Command) handleReload() { err := reloadFunc(newConf) if err != nil { c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) - } - - err = c.httpServer.Shutdown() - if err != nil { - c.agent.logger.Printf("[ERR] agent: failed to stop HTTP server: %v", err) return } - // Wait some time to ensure a clean shutdown - time.Sleep(5 * time.Second) - http, err := NewHTTPServer(c.agent, c.agent.config) + err = c.reloadHTTPServerOnConfigChange(newConf) if err != nil { - c.agent.logger.Printf("[ERR] agent: failed to reload http server: %v", err) + c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) return } - c.agent.logger.Println("[INFO] agent: successfully restarted the HTTP server") - c.httpServer = http } if s := c.agent.Server(); s != nil { diff --git a/nomad/raft_rpc.go b/nomad/raft_rpc.go index 5c5508f167de..2d45ae87bffb 100644 --- a/nomad/raft_rpc.go +++ b/nomad/raft_rpc.go @@ -51,7 +51,7 @@ func (l *RaftLayer) Handoff(c net.Conn, ctx context.Context) error { case <-l.closeCh: return fmt.Errorf("Raft RPC layer closed") case <-ctx.Done(): - return fmt.Errorf("[INFO] nomad.rpc: Closing raft RPC connection") + return fmt.Errorf("[INFO] nomad.rpc: Closing server RPC connection") } } @@ -120,9 +120,11 @@ func (l *RaftLayer) ReloadTLS(tlsWrap tlsutil.Wrapper) { defer l.closeLock.Unlock() if !l.closed { + l.closed = true close(l.closeCh) } l.tlsWrap = tlsWrap l.closeCh = make(chan struct{}) + l.closed = false } diff --git a/nomad/server.go b/nomad/server.go index a9dbef0a13bf..99730f4751cd 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -365,8 +365,9 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes") s.configLock.Lock() + defer s.configLock.Unlock() + s.config.TLSConfig = newTLSConfig - s.configLock.Unlock() var tlsWrap tlsutil.RegionWrapper var incomingTLS *tls.Config @@ -393,6 +394,9 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.rpcTLS = incomingTLS s.rpcCancel() + s.raftTransport.Close() + s.raftLayer.Close() + s.connPool.ReloadTLS(tlsWrap) // reinitialize our rpc listener @@ -405,16 +409,19 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { } s.rpcListener = list - wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) - s.raftLayer.ReloadTLS(wrapper) - s.raftTransport.Reload() - time.Sleep(500 * time.Millisecond) - // reinitialize the cancel context ctx, cancel := context.WithCancel(context.Background()) s.rpcCancel = cancel go s.listen(ctx) + wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) + s.raftLayer.ReloadTLS(wrapper) + + // re-initialize the network transport with a re-initialized stream layer + trans := raft.NewNetworkTransport(s.raftLayer, 3, s.config.RaftTimeout, + s.config.LogOutput) + s.raftTransport = trans + s.logger.Printf("[INFO] nomad: finished reloading server connections") return nil } diff --git a/vendor/github.com/hashicorp/raft/net_transport.go b/vendor/github.com/hashicorp/raft/net_transport.go index c4d81016dab6..7c55ac5371ff 100644 --- a/vendor/github.com/hashicorp/raft/net_transport.go +++ b/vendor/github.com/hashicorp/raft/net_transport.go @@ -177,19 +177,6 @@ func (n *NetworkTransport) Close() error { return nil } -func (n *NetworkTransport) Reload() { - n.shutdownLock.Lock() - defer n.shutdownLock.Unlock() - - if !n.shutdown { - close(n.shutdownCh) - n.shutdown = true - } - - time.Sleep(3 * time.Second) - n.shutdownCh = make(chan struct{}) -} - // Consumer implements the Transport interface. func (n *NetworkTransport) Consumer() <-chan RPC { return n.consumeCh From 184fbfe5bdf4016a8e4db33f6dd44795df2ba71e Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 28 Nov 2017 12:33:46 -0500 Subject: [PATCH 05/24] prevent races when reloading, fully shut down raft --- command/agent/agent.go | 3 +++ nomad/raft_rpc.go | 15 --------------- nomad/server.go | 38 +++++++++++++++++++++++++++----------- 3 files changed, 30 insertions(+), 26 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index 788085a50424..de6259639734 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -803,6 +803,9 @@ func (a *Agent) Reload(newConfig *Config) error { // GetConfigCopy creates a replica of the agent's config, excluding locks func (a *Agent) GetConfig() *Config { + a.configLock.Lock() + defer a.configLock.Unlock() + return a.config } diff --git a/nomad/raft_rpc.go b/nomad/raft_rpc.go index 2d45ae87bffb..4cfcca4009c4 100644 --- a/nomad/raft_rpc.go +++ b/nomad/raft_rpc.go @@ -113,18 +113,3 @@ func (l *RaftLayer) Dial(address raft.ServerAddress, timeout time.Duration) (net } return conn, err } - -// ReloadTLS will re-initialize the TLS wrapper on the fly -func (l *RaftLayer) ReloadTLS(tlsWrap tlsutil.Wrapper) { - l.closeLock.Lock() - defer l.closeLock.Unlock() - - if !l.closed { - l.closed = true - close(l.closeCh) - } - - l.tlsWrap = tlsWrap - l.closeCh = make(chan struct{}) - l.closed = false -} diff --git a/nomad/server.go b/nomad/server.go index 99730f4751cd..599b441bd090 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -99,21 +99,28 @@ type Server struct { leaderCh <-chan bool raft *raft.Raft raftLayer *RaftLayer - raftStore *raftboltdb.BoltStore - raftInmem *raft.InmemStore - raftTransport *raft.NetworkTransport + raftLayerLock sync.Mutex + + raftStore *raftboltdb.BoltStore + raftInmem *raft.InmemStore + + raftTransport *raft.NetworkTransport + raftTransportLock sync.Mutex // fsm is the state machine used with Raft fsm *nomadFSM // rpcListener is used to listen for incoming connections - rpcListener net.Listener + rpcListener net.Listener + rpcListenerLock sync.Mutex + rpcServer *rpc.Server rpcAdvertise net.Addr // rpcTLS is the TLS config for incoming TLS requests - rpcTLS *tls.Config - rpcCancel context.CancelFunc + rpcTLS *tls.Config + rpcCancel context.CancelFunc + rpcTLSLock sync.Mutex // peers is used to track the known Nomad servers. This is // used for region forwarding and clustering. @@ -365,9 +372,8 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes") s.configLock.Lock() - defer s.configLock.Unlock() - s.config.TLSConfig = newTLSConfig + s.configLock.Unlock() var tlsWrap tlsutil.RegionWrapper var incomingTLS *tls.Config @@ -390,16 +396,20 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.logger.Printf("[ERR] nomad: No TLS Context to reset") return fmt.Errorf("Unable to reset tls context") } + s.rpcCancel() + s.rpcTLSLock.Lock() s.rpcTLS = incomingTLS + s.rpcTLSLock.Unlock() - s.rpcCancel() + s.raftTransportLock.Lock() + defer s.raftTransportLock.Unlock() s.raftTransport.Close() - s.raftLayer.Close() s.connPool.ReloadTLS(tlsWrap) // reinitialize our rpc listener + s.rpcListenerLock.Lock() s.rpcListener.Close() time.Sleep(500 * time.Millisecond) list, err := net.ListenTCP("tcp", s.config.RPCAddr) @@ -407,15 +417,21 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.logger.Printf("[ERR] nomad: No TLS listener to reload") return err } + s.rpcListener = list // reinitialize the cancel context ctx, cancel := context.WithCancel(context.Background()) s.rpcCancel = cancel + s.rpcListenerLock.Unlock() + go s.listen(ctx) + s.raftLayerLock.Lock() + s.raftLayer.Close() wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) - s.raftLayer.ReloadTLS(wrapper) + s.raftLayer = NewRaftLayer(s.rpcAdvertise, wrapper) + s.raftLayerLock.Unlock() // re-initialize the network transport with a re-initialized stream layer trans := raft.NewNetworkTransport(s.raftLayer, 3, s.config.RaftTimeout, From c65857c4319a9c4d08246889af7bde547ef174bf Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 28 Nov 2017 17:25:16 -0500 Subject: [PATCH 06/24] remove code duplication --- nomad/server.go | 49 ++++++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/nomad/server.go b/nomad/server.go index 599b441bd090..d5f4de3e730c 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -237,22 +237,8 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg } // Configure TLS - var tlsWrap tlsutil.RegionWrapper - var incomingTLS *tls.Config - if config.TLSConfig.EnableRPC { - tlsConf := config.tlsConfig() - tw, err := tlsConf.OutgoingTLSWrapper() - if err != nil { - return nil, err - } - tlsWrap = tw - - itls, err := tlsConf.IncomingTLSConfig() - if err != nil { - return nil, err - } - incomingTLS = itls - } + tlsConf := config.tlsConfig() + incomingTLS, tlsWrap, err := getTLSConf(config.TLSConfig.EnableRPC, tlsConf) // Create the ACL object cache aclCache, err := lru.New2Q(aclCacheSize) @@ -366,31 +352,36 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg return s, nil } -// ReloadTLSConnections updates a server's TLS configuration and reloads RPC -// connections -func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { - s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes") - - s.configLock.Lock() - s.config.TLSConfig = newTLSConfig - s.configLock.Unlock() - +func getTLSConf(enableRPC bool, tlsConf *tlsutil.Config) (*tls.Config, tlsutil.RegionWrapper, error) { var tlsWrap tlsutil.RegionWrapper var incomingTLS *tls.Config - if s.config.TLSConfig.EnableRPC { - tlsConf := s.config.tlsConfig() + if enableRPC { tw, err := tlsConf.OutgoingTLSWrapper() if err != nil { - return err + return nil, nil, err } tlsWrap = tw itls, err := tlsConf.IncomingTLSConfig() if err != nil { - return err + return nil, nil, err } incomingTLS = itls } + return incomingTLS, tlsWrap, nil +} + +// ReloadTLSConnections updates a server's TLS configuration and reloads RPC +// connections +func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { + s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes") + + s.configLock.Lock() + s.config.TLSConfig = newTLSConfig + s.configLock.Unlock() + + tlsConf := s.config.tlsConfig() + incomingTLS, tlsWrap, err := getTLSConf(s.config.TLSConfig.EnableRPC, tlsConf) if s.rpcCancel == nil { s.logger.Printf("[ERR] nomad: No TLS Context to reset") From bee883c4e017ebe7f7e563fef143e910d31421c1 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Wed, 29 Nov 2017 12:54:05 -0500 Subject: [PATCH 07/24] check error on generating tls context --- nomad/server.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nomad/server.go b/nomad/server.go index d5f4de3e730c..bad4873651ae 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -239,6 +239,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg // Configure TLS tlsConf := config.tlsConfig() incomingTLS, tlsWrap, err := getTLSConf(config.TLSConfig.EnableRPC, tlsConf) + if err != nil { + return nil, err + } // Create the ACL object cache aclCache, err := lru.New2Q(aclCacheSize) @@ -382,11 +385,16 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { tlsConf := s.config.tlsConfig() incomingTLS, tlsWrap, err := getTLSConf(s.config.TLSConfig.EnableRPC, tlsConf) + if err != nil { + s.logger.Printf("[ERR] nomad: unable to reset TLS context") + return err + } if s.rpcCancel == nil { s.logger.Printf("[ERR] nomad: No TLS Context to reset") return fmt.Errorf("Unable to reset tls context") } + s.rpcCancel() s.rpcTLSLock.Lock() From 18fdd31570054f7d9a22f3d7ee0ce3c9c4789aa9 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Wed, 29 Nov 2017 17:22:21 -0500 Subject: [PATCH 08/24] reloading tls config should be atomic for clients/servers --- client/client.go | 18 +++++++++--------- client/config/config.go | 23 +++++++++++++++-------- nomad/config.go | 17 ++++++++++++----- nomad/server.go | 12 ++++++------ 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/client/client.go b/client/client.go index eb87e34843f6..2b4f4456d014 100644 --- a/client/client.go +++ b/client/client.go @@ -368,21 +368,21 @@ func (c *Client) init() error { // ReloadTLSConnections allows a client to reload RPC connections if the // client's TLS configuration changes from plaintext to TLS func (c *Client) ReloadTLSConnections(newConfig *nconfig.TLSConfig) error { - c.configLock.Lock() - c.config.TLSConfig = newConfig - c.configLock.Unlock() + var tlsWrap tlsutil.RegionWrapper + if newConfig.EnableRPC { + tw, err := c.config.NewTLSConfiguration(newConfig).OutgoingTLSWrapper() - if c.config.TLSConfig.EnableRPC { - tw, err := c.config.TLSConfiguration().OutgoingTLSWrapper() if err != nil { return err } - c.connPool.ReloadTLS(tw) - } else { - c.connPool.ReloadTLS(nil) + tlsWrap = tw } - time.Sleep(3 * time.Second) + c.connPool.ReloadTLS(tlsWrap) + + c.configLock.Lock() + c.config.TLSConfig = newConfig + c.configLock.Unlock() return nil } diff --git a/client/config/config.go b/client/config/config.go index 1f89e4033c6b..89820edbef39 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -347,16 +347,23 @@ func (c *Config) ReadStringListToMapDefault(key, defaultValue string) map[string return list } -// TLSConfig returns a TLSUtil Config based on the client configuration +// TLSConfiguration returns a TLSUtil Config based on the existing client +// configuration func (c *Config) TLSConfiguration() *tlsutil.Config { - tlsConf := &tlsutil.Config{ + return c.NewTLSConfiguration(c.TLSConfig) +} + +// NewTLSConfiguration returns a TLSUtil Config for a new TLS config object +// This allows a TLSConfig object to be created without first explicitely +// setting it +func (c *Config) NewTLSConfiguration(tlsConfig *config.TLSConfig) *tlsutil.Config { + return &tlsutil.Config{ VerifyIncoming: true, VerifyOutgoing: true, - VerifyServerHostname: c.TLSConfig.VerifyServerHostname, - CAFile: c.TLSConfig.CAFile, - CertFile: c.TLSConfig.CertFile, - KeyFile: c.TLSConfig.KeyFile, - KeyLoader: c.TLSConfig.GetKeyLoader(), + VerifyServerHostname: tlsConfig.VerifyServerHostname, + CAFile: tlsConfig.CAFile, + CertFile: tlsConfig.CertFile, + KeyFile: tlsConfig.KeyFile, + KeyLoader: tlsConfig.GetKeyLoader(), } - return tlsConf } diff --git a/nomad/config.go b/nomad/config.go index 4a918f393c45..bdc8cba3b007 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -353,13 +353,20 @@ func DefaultConfig() *Config { // tlsConfig returns a TLSUtil Config based on the server configuration func (c *Config) tlsConfig() *tlsutil.Config { + return c.newTLSConfig(c.TLSConfig) +} + +// newTLSConfig returns a TLSUtil Config based on the server configuration +// This is useful for creating a TLSConfig object without explictely setting it +// on the config. +func (c *Config) newTLSConfig(newConf *config.TLSConfig) *tlsutil.Config { return &tlsutil.Config{ VerifyIncoming: true, VerifyOutgoing: true, - VerifyServerHostname: c.TLSConfig.VerifyServerHostname, - CAFile: c.TLSConfig.CAFile, - CertFile: c.TLSConfig.CertFile, - KeyFile: c.TLSConfig.KeyFile, - KeyLoader: c.TLSConfig.GetKeyLoader(), + VerifyServerHostname: newConf.VerifyServerHostname, + CAFile: newConf.CAFile, + CertFile: newConf.CertFile, + KeyFile: newConf.KeyFile, + KeyLoader: newConf.GetKeyLoader(), } } diff --git a/nomad/server.go b/nomad/server.go index bad4873651ae..82ad6a168f16 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -379,12 +379,8 @@ func getTLSConf(enableRPC bool, tlsConf *tlsutil.Config) (*tls.Config, tlsutil.R func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes") - s.configLock.Lock() - s.config.TLSConfig = newTLSConfig - s.configLock.Unlock() - - tlsConf := s.config.tlsConfig() - incomingTLS, tlsWrap, err := getTLSConf(s.config.TLSConfig.EnableRPC, tlsConf) + tlsConf := s.config.newTLSConfig(newTLSConfig) + incomingTLS, tlsWrap, err := getTLSConf(newTLSConfig.EnableRPC, tlsConf) if err != nil { s.logger.Printf("[ERR] nomad: unable to reset TLS context") return err @@ -397,6 +393,10 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.rpcCancel() + s.configLock.Lock() + s.config.TLSConfig = newTLSConfig + s.configLock.Unlock() + s.rpcTLSLock.Lock() s.rpcTLS = incomingTLS s.rpcTLSLock.Unlock() From 359358d24002c88609a984d9ae200e617696370b Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Thu, 30 Nov 2017 10:50:43 -0500 Subject: [PATCH 09/24] code review fixups --- command/agent/agent.go | 18 ++++++++++-------- command/agent/agent_test.go | 6 ++---- command/agent/command.go | 24 ++++++++++-------------- command/agent/http.go | 33 +++++++++++++++++++-------------- nomad/raft_rpc.go | 2 +- nomad/rpc.go | 22 ++++++++++++++-------- nomad/server.go | 2 +- nomad/structs/config/tls.go | 8 +++++--- 8 files changed, 62 insertions(+), 53 deletions(-) diff --git a/command/agent/agent.go b/command/agent/agent.go index de6259639734..59bd4244a76c 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -732,12 +732,14 @@ func (a *Agent) Stats() map[string]map[string]string { // ShouldReload determines if we should reload the configuration and agent // connections. If the TLS Configuration has not changed, we shouldn't reload. -func (a *Agent) ShouldReload(newConfig *Config) (bool, func(*Config) error) { +func (a *Agent) ShouldReload(newConfig *Config) bool { + a.configLock.Lock() + defer a.configLock.Unlock() if a.config.TLSConfig.Equals(newConfig.TLSConfig) { - return false, nil + return false } - return true, a.Reload + return true } // Reload handles configuration changes for the agent. Provides a method that @@ -775,9 +777,9 @@ func (a *Agent) Reload(newConfig *Config) error { a.config.TLSConfig = newConfig.TLSConfig.Copy() if newConfig.TLSConfig.IsEmpty() { - a.logger.Println("[WARN] Downgrading agent's existing TLS configuration to plaintext") + a.logger.Println("[WARN] agent: Downgrading agent's existing TLS configuration to plaintext") } else { - a.logger.Println("[INFO] Upgrading from plaintext configuration to TLS") + a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS") } // Reload the TLS configuration for the client or server, depending on how @@ -785,7 +787,7 @@ func (a *Agent) Reload(newConfig *Config) error { if s := a.Server(); s != nil { err := s.ReloadTLSConnections(a.config.TLSConfig) if err != nil { - a.logger.Printf("[WARN] agent: Issue reloading the server's TLS Configuration, consider a full system restart: %v", err.Error()) + a.logger.Printf("[WARN] agent: error reloading the server's TLS configuration: %v", err.Error()) return err } } @@ -793,7 +795,7 @@ func (a *Agent) Reload(newConfig *Config) error { err := c.ReloadTLSConnections(a.config.TLSConfig) if err != nil { - a.logger.Printf("[ERR] agent: Issue reloading the client's TLS Configuration, consider a full system restart: %v", err.Error()) + a.logger.Printf("[WARN] agent: error reloading the server's TLS configuration: %v", err.Error()) return err } } @@ -801,7 +803,7 @@ func (a *Agent) Reload(newConfig *Config) error { return nil } -// GetConfigCopy creates a replica of the agent's config, excluding locks +// GetConfig creates a locked reference to the agent's config func (a *Agent) GetConfig() *Config { a.configLock.Lock() defer a.configLock.Unlock() diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index 023060401c9f..6042d6902f6b 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -888,9 +888,8 @@ func TestServer_ShouldReload_ReturnFalseForNoChanges(t *testing.T) { config: agentConfig, } - shouldReload, reloadFunc := agent.ShouldReload(sameAgentConfig) + shouldReload := agent.ShouldReload(sameAgentConfig) assert.False(shouldReload) - assert.Nil(reloadFunc) } func TestServer_ShouldReload_ReturnTrueForConfigChanges(t *testing.T) { @@ -936,7 +935,6 @@ func TestServer_ShouldReload_ReturnTrueForConfigChanges(t *testing.T) { config: agentConfig, } - shouldReload, reloadFunc := agent.ShouldReload(newConfig) + shouldReload := agent.ShouldReload(newConfig) assert.True(shouldReload) - assert.NotNil(reloadFunc) } diff --git a/command/agent/command.go b/command/agent/command.go index 6928ee677431..4fffbff80894 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -600,21 +600,17 @@ WAIT: } } -func (c *Command) reloadHTTPServerOnConfigChange(newConfig *Config) error { +func (c *Command) reloadHTTPServer(newConfig *Config) error { c.agent.logger.Println("[INFO] agent: Reloading HTTP server with new TLS configuration") - err := c.httpServer.Shutdown() - if err != nil { - return err - } + c.httpServerLock.Lock() + defer c.httpServerLock.Unlock() + + c.httpServer.Shutdown() - // Wait some time to ensure a clean shutdown - time.Sleep(5 * time.Second) http, err := NewHTTPServer(c.agent, c.agent.config) if err != nil { return err } - c.httpServerLock.Lock() - defer c.httpServerLock.Unlock() c.httpServer = http return nil @@ -642,18 +638,18 @@ func (c *Command) handleReload() { newConf.LogLevel = c.agent.GetConfig().LogLevel } - shouldReload, reloadFunc := c.agent.ShouldReload(newConf) - if shouldReload && reloadFunc != nil { + shouldReload := c.agent.ShouldReload(newConf) + if shouldReload { // Reloads configuration for an agent running in both client and server mode - err := reloadFunc(newConf) + err := c.agent.Reload(newConf) if err != nil { c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) return } - err = c.reloadHTTPServerOnConfigChange(newConf) + c.reloadHTTPServer(newConf) if err != nil { - c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) + c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err) return } } diff --git a/command/agent/http.go b/command/agent/http.go index 2c8fa27ff386..b38aac7db594 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -47,11 +47,12 @@ var ( // HTTPServer is used to wrap an Agent and expose it over an HTTP interface type HTTPServer struct { - agent *Agent - mux *http.ServeMux - listener net.Listener - logger *log.Logger - Addr string + agent *Agent + mux *http.ServeMux + listener net.Listener + listenerCh chan struct{} + logger *log.Logger + Addr string } // NewHTTPServer starts new HTTP server over the agent @@ -89,11 +90,12 @@ func NewHTTPServer(agent *Agent, config *Config) (*HTTPServer, error) { // Create the server srv := &HTTPServer{ - agent: agent, - mux: mux, - listener: ln, - logger: agent.logger, - Addr: ln.Addr().String(), + agent: agent, + mux: mux, + listener: ln, + listenerCh: make(chan struct{}), + logger: agent.logger, + Addr: ln.Addr().String(), } srv.registerHandlers(config.EnableDebug) @@ -103,7 +105,10 @@ func NewHTTPServer(agent *Agent, config *Config) (*HTTPServer, error) { return nil, err } - go http.Serve(ln, gzip(mux)) + go func() { + defer close(srv.listenerCh) + http.Serve(ln, gzip(mux)) + }() return srv, nil } @@ -126,12 +131,12 @@ func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) { } // Shutdown is used to shutdown the HTTP server -func (s *HTTPServer) Shutdown() error { +func (s *HTTPServer) Shutdown() { if s != nil { s.logger.Printf("[DEBUG] http: Shutting down http server") - return s.listener.Close() + s.listener.Close() + <-s.listenerCh // block until http.Serve has returned. } - return nil } // registerHandlers is used to attach our handlers to the mux diff --git a/nomad/raft_rpc.go b/nomad/raft_rpc.go index 4cfcca4009c4..cce0c0e5243a 100644 --- a/nomad/raft_rpc.go +++ b/nomad/raft_rpc.go @@ -51,7 +51,7 @@ func (l *RaftLayer) Handoff(c net.Conn, ctx context.Context) error { case <-l.closeCh: return fmt.Errorf("Raft RPC layer closed") case <-ctx.Done(): - return fmt.Errorf("[INFO] nomad.rpc: Closing server RPC connection") + return nil } } diff --git a/nomad/rpc.go b/nomad/rpc.go index 8deedd19416f..cab276bf8e35 100644 --- a/nomad/rpc.go +++ b/nomad/rpc.go @@ -83,18 +83,24 @@ func (s *Server) listen(ctx context.Context) { if s.shutdown { return } + + select { + case <-ctx.Done(): + return + } + s.logger.Printf("[ERR] nomad.rpc: failed to accept RPC conn: %v", err) continue } - go s.handleConn(conn, false, ctx) + go s.handleConn(ctx, conn, false) metrics.IncrCounter([]string{"nomad", "rpc", "accept_conn"}, 1) } } // handleConn is used to determine if this is a Raft or // Nomad type RPC connection and invoke the correct handler -func (s *Server) handleConn(conn net.Conn, isTLS bool, ctx context.Context) { +func (s *Server) handleConn(ctx context.Context, conn net.Conn, isTLS bool) { // Read a single byte buf := make([]byte, 1) if _, err := conn.Read(buf); err != nil { @@ -117,14 +123,14 @@ func (s *Server) handleConn(conn net.Conn, isTLS bool, ctx context.Context) { // Switch on the byte switch RPCType(buf[0]) { case rpcNomad: - s.handleNomadConn(conn, ctx) + s.handleNomadConn(ctx, conn) case rpcRaft: metrics.IncrCounter([]string{"nomad", "rpc", "raft_handoff"}, 1) s.raftLayer.Handoff(conn, ctx) case rpcMultiplex: - s.handleMultiplex(conn, ctx) + s.handleMultiplex(ctx, conn) case rpcTLS: if s.rpcTLS == nil { @@ -133,7 +139,7 @@ func (s *Server) handleConn(conn net.Conn, isTLS bool, ctx context.Context) { return } conn = tls.Server(conn, s.rpcTLS) - s.handleConn(conn, true, ctx) + s.handleConn(ctx, conn, true) default: s.logger.Printf("[ERR] nomad.rpc: unrecognized RPC byte: %v", buf[0]) @@ -144,7 +150,7 @@ func (s *Server) handleConn(conn net.Conn, isTLS bool, ctx context.Context) { // handleMultiplex is used to multiplex a single incoming connection // using the Yamux multiplexer -func (s *Server) handleMultiplex(conn net.Conn, ctx context.Context) { +func (s *Server) handleMultiplex(ctx context.Context, conn net.Conn) { defer conn.Close() conf := yamux.DefaultConfig() conf.LogOutput = s.config.LogOutput @@ -157,12 +163,12 @@ func (s *Server) handleMultiplex(conn net.Conn, ctx context.Context) { } return } - go s.handleNomadConn(sub, ctx) + go s.handleNomadConn(ctx, sub) } } // handleNomadConn is used to service a single Nomad RPC connection -func (s *Server) handleNomadConn(conn net.Conn, ctx context.Context) { +func (s *Server) handleNomadConn(ctx context.Context, conn net.Conn) { defer conn.Close() rpcCodec := NewServerCodec(conn) for { diff --git a/nomad/server.go b/nomad/server.go index 82ad6a168f16..3ac7b9df6f98 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -437,7 +437,7 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.config.LogOutput) s.raftTransport = trans - s.logger.Printf("[INFO] nomad: finished reloading server connections") + s.logger.Printf("[DEBUG] nomad: finished reloading server connections") return nil } diff --git a/nomad/structs/config/tls.go b/nomad/structs/config/tls.go index c93946e34c85..7cfd1fcd3aea 100644 --- a/nomad/structs/config/tls.go +++ b/nomad/structs/config/tls.go @@ -188,12 +188,14 @@ func (t *TLSConfig) Merge(b *TLSConfig) *TLSConfig { // Equals compares the fields of two TLS configuration objects, returning a // boolean indicating if they are the same. +// NewConfig should never be nil- calling code is responsible for walways +// passing a valid TLSConfig object func (t *TLSConfig) Equals(newConfig *TLSConfig) bool { - if t == nil && newConfig == nil { - return true + if t == nil { + return false } - if t != nil && newConfig == nil { + if t == nil && newConfig != nil { return false } From 9ba0e6a6e3bd157d9949866d3d83d439699cc392 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Thu, 30 Nov 2017 14:52:13 -0500 Subject: [PATCH 10/24] refactor rpc listener methods, wait for proper shutdown --- nomad/server.go | 53 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/nomad/server.go b/nomad/server.go index 3ac7b9df6f98..51f6e5bbaa11 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -113,6 +113,7 @@ type Server struct { // rpcListener is used to listen for incoming connections rpcListener net.Listener rpcListenerLock sync.Mutex + listenerCh chan struct{} rpcServer *rpc.Server rpcAdvertise net.Addr @@ -328,10 +329,7 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg // Start ingesting events for Serf go s.serfEventHandler() - // Start the RPC listeners - ctx, cancel := context.WithCancel(context.Background()) - s.rpcCancel = cancel - go s.listen(ctx) + s.startRPCListener() // Emit metrics for the eval broker go evalBroker.EmitStats(time.Second, s.shutdownCh) @@ -355,6 +353,33 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg return s, nil } +// Start the RPC listeners +func (s *Server) startRPCListener() { + s.rpcListenerLock.Lock() + defer s.rpcListenerLock.Unlock() + ctx, cancel := context.WithCancel(context.Background()) + s.rpcCancel = cancel + go func() { + defer close(s.listenerCh) + s.listen(ctx) + }() +} + +func (s *Server) createRPCListener() error { + s.rpcListenerLock.Lock() + defer s.rpcListenerLock.Unlock() + + s.listenerCh = make(chan struct{}) + list, err := net.ListenTCP("tcp", s.config.RPCAddr) + if err != nil || list == nil { + s.logger.Printf("[ERR] nomad: No TLS listener to reload") + return err + } + + s.rpcListener = list + return nil +} + func getTLSConf(enableRPC bool, tlsConf *tlsutil.Config) (*tls.Config, tlsutil.RegionWrapper, error) { var tlsWrap tlsutil.RegionWrapper var incomingTLS *tls.Config @@ -410,21 +435,14 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { // reinitialize our rpc listener s.rpcListenerLock.Lock() s.rpcListener.Close() - time.Sleep(500 * time.Millisecond) - list, err := net.ListenTCP("tcp", s.config.RPCAddr) - if err != nil || list == nil { - s.logger.Printf("[ERR] nomad: No TLS listener to reload") - return err - } - - s.rpcListener = list - - // reinitialize the cancel context - ctx, cancel := context.WithCancel(context.Background()) - s.rpcCancel = cancel + <-s.listenerCh s.rpcListenerLock.Unlock() - go s.listen(ctx) + err = s.createRPCListener() + if err != nil { + return err + } + s.startRPCListener() s.raftLayerLock.Lock() s.raftLayer.Close() @@ -865,6 +883,7 @@ func (s *Server) setupRPC(tlsWrap tlsutil.RegionWrapper) error { return err } s.rpcListener = list + s.listenerCh = make(chan struct{}) if s.config.RPCAdvertise != nil { s.rpcAdvertise = s.config.RPCAdvertise From a4af400fd18da93195ae5c973865cca7eb37c422 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Thu, 30 Nov 2017 16:58:39 -0500 Subject: [PATCH 11/24] don't ignore error in http reloading code review feedback --- command/agent/command.go | 2 +- nomad/raft_rpc.go | 2 +- nomad/rpc.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index 4fffbff80894..06369773ffb0 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -647,7 +647,7 @@ func (c *Command) handleReload() { return } - c.reloadHTTPServer(newConf) + err = c.reloadHTTPServer(newConf) if err != nil { c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err) return diff --git a/nomad/raft_rpc.go b/nomad/raft_rpc.go index cce0c0e5243a..4cad9e734a64 100644 --- a/nomad/raft_rpc.go +++ b/nomad/raft_rpc.go @@ -44,7 +44,7 @@ func NewRaftLayer(addr net.Addr, tlsWrap tlsutil.Wrapper) *RaftLayer { // Handoff is used to hand off a connection to the // RaftLayer. This allows it to be Accept()'ed -func (l *RaftLayer) Handoff(c net.Conn, ctx context.Context) error { +func (l *RaftLayer) Handoff(ctx context.Context, c net.Conn) error { select { case l.connCh <- c: return nil diff --git a/nomad/rpc.go b/nomad/rpc.go index cab276bf8e35..df4627eca801 100644 --- a/nomad/rpc.go +++ b/nomad/rpc.go @@ -127,7 +127,7 @@ func (s *Server) handleConn(ctx context.Context, conn net.Conn, isTLS bool) { case rpcRaft: metrics.IncrCounter([]string{"nomad", "rpc", "raft_handoff"}, 1) - s.raftLayer.Handoff(conn, ctx) + s.raftLayer.Handoff(ctx, conn) case rpcMultiplex: s.handleMultiplex(ctx, conn) From e3cc0982b226cb2011e693ea9ef7c90ccb93d56d Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Mon, 4 Dec 2017 11:21:37 -0500 Subject: [PATCH 12/24] remove unnecessary nil checks; default case add tests for TLSConfig object --- nomad/rpc.go | 1 + nomad/structs/config/tls.go | 8 ------- nomad/structs/config/tls_test.go | 36 ++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/nomad/rpc.go b/nomad/rpc.go index df4627eca801..1b7e8bccab88 100644 --- a/nomad/rpc.go +++ b/nomad/rpc.go @@ -87,6 +87,7 @@ func (s *Server) listen(ctx context.Context) { select { case <-ctx.Done(): return + default: } s.logger.Printf("[ERR] nomad.rpc: failed to accept RPC conn: %v", err) diff --git a/nomad/structs/config/tls.go b/nomad/structs/config/tls.go index 7cfd1fcd3aea..e309093e3768 100644 --- a/nomad/structs/config/tls.go +++ b/nomad/structs/config/tls.go @@ -191,14 +191,6 @@ func (t *TLSConfig) Merge(b *TLSConfig) *TLSConfig { // NewConfig should never be nil- calling code is responsible for walways // passing a valid TLSConfig object func (t *TLSConfig) Equals(newConfig *TLSConfig) bool { - if t == nil { - return false - } - - if t == nil && newConfig != nil { - return false - } - return t.EnableRPC == newConfig.EnableRPC && t.CAFile == newConfig.CAFile && t.CertFile == newConfig.CertFile && diff --git a/nomad/structs/config/tls_test.go b/nomad/structs/config/tls_test.go index b76360ba84f3..e6dc36363406 100644 --- a/nomad/structs/config/tls_test.go +++ b/nomad/structs/config/tls_test.go @@ -25,3 +25,39 @@ func TestTLSConfig_Merge(t *testing.T) { new := a.Merge(b) assert.Equal(b, new) } + +func TestTLS_Equals_TrueWhenEmpty(t *testing.T) { + assert := assert.New(t) + a := &TLSConfig{} + b := &TLSConfig{} + assert.True(a.Equals(b)) +} + +func TestTLS_Equals_FalseWhenUnequal(t *testing.T) { + assert := assert.New(t) + a := &TLSConfig{CAFile: "abc", CertFile: "def", KeyFile: "ghi"} + b := &TLSConfig{CAFile: "jkl", CertFile: "def", KeyFile: "ghi"} + assert.False(a.Equals(b)) +} + +func TestTLS_Equals_TrueWhenEqual(t *testing.T) { + assert := assert.New(t) + a := &TLSConfig{CAFile: "abc", CertFile: "def", KeyFile: "ghi"} + b := &TLSConfig{CAFile: "abc", CertFile: "def", KeyFile: "ghi"} + assert.True(a.Equals(b)) +} + +func TestTLS_Copy(t *testing.T) { + assert := assert.New(t) + a := &TLSConfig{CAFile: "abc", CertFile: "def", KeyFile: "ghi"} + aCopy := a.Copy() + assert.True(a.Equals(aCopy)) +} + +// GetKeyLoader should always return an initialized KeyLoader for a TLSConfig +// object +func TestTLS_GetKeyloader(t *testing.T) { + assert := assert.New(t) + a := &TLSConfig{} + assert.NotNil(a.GetKeyLoader()) +} From c70702e802ab0b9d5458f01ee9247e3fd25d680f Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Mon, 4 Dec 2017 19:29:43 -0500 Subject: [PATCH 13/24] call reload on agent, client, and server separately --- client/client.go | 12 +++++++++--- client/client_test.go | 4 ++-- command/agent/agent.go | 18 ------------------ command/agent/command.go | 32 +++++++++++++++++++++++++------- nomad/server.go | 10 ++++++++-- nomad/server_test.go | 6 +++--- nomad/structs/config/tls.go | 9 +++++++-- 7 files changed, 54 insertions(+), 37 deletions(-) diff --git a/client/client.go b/client/client.go index 2b4f4456d014..0b73c63bbd5a 100644 --- a/client/client.go +++ b/client/client.go @@ -369,7 +369,7 @@ func (c *Client) init() error { // client's TLS configuration changes from plaintext to TLS func (c *Client) ReloadTLSConnections(newConfig *nconfig.TLSConfig) error { var tlsWrap tlsutil.RegionWrapper - if newConfig.EnableRPC { + if newConfig != nil && newConfig.EnableRPC { tw, err := c.config.NewTLSConfiguration(newConfig).OutgoingTLSWrapper() if err != nil { @@ -378,15 +378,21 @@ func (c *Client) ReloadTLSConnections(newConfig *nconfig.TLSConfig) error { tlsWrap = tw } - c.connPool.ReloadTLS(tlsWrap) - c.configLock.Lock() c.config.TLSConfig = newConfig c.configLock.Unlock() + c.connPool.ReloadTLS(tlsWrap) + return nil } +// Reload allows a client to reload RPC connections if the +// client's TLS configuration changes +func (c *Client) Reload(newConfig *config.Config) error { + return c.reloadTLSConnections(newConfig.TLSConfig) +} + // Leave is used to prepare the client to leave the cluster func (c *Client) Leave() error { // TODO diff --git a/client/client_test.go b/client/client_test.go index 92b2995955cd..ae2f2cdc4fac 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -1032,7 +1032,7 @@ func TestClient_ReloadTLS_UpgradePlaintextToTLS(t *testing.T) { KeyFile: fookey, } - err := c1.ReloadTLSConnections(newConfig) + err := c1.reloadTLSConnections(newConfig) assert.Nil(err) req := structs.NodeSpecificRequest{ @@ -1085,7 +1085,7 @@ func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) { newConfig := &nconfig.TLSConfig{} - err := c1.ReloadTLSConnections(newConfig) + err := c1.reloadTLSConnections(newConfig) assert.Nil(err) req := structs.NodeSpecificRequest{ diff --git a/command/agent/agent.go b/command/agent/agent.go index 59bd4244a76c..1f6b400e7f6e 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -782,24 +782,6 @@ func (a *Agent) Reload(newConfig *Config) error { a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS") } - // Reload the TLS configuration for the client or server, depending on how - // the agent is configured to run. - if s := a.Server(); s != nil { - err := s.ReloadTLSConnections(a.config.TLSConfig) - if err != nil { - a.logger.Printf("[WARN] agent: error reloading the server's TLS configuration: %v", err.Error()) - return err - } - } - if c := a.Client(); c != nil { - - err := c.ReloadTLSConnections(a.config.TLSConfig) - if err != nil { - a.logger.Printf("[WARN] agent: error reloading the server's TLS configuration: %v", err.Error()) - return err - } - } - return nil } diff --git a/command/agent/command.go b/command/agent/command.go index 06369773ffb0..8844e78edefd 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -640,18 +640,11 @@ func (c *Command) handleReload() { shouldReload := c.agent.ShouldReload(newConf) if shouldReload { - // Reloads configuration for an agent running in both client and server mode err := c.agent.Reload(newConf) if err != nil { c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) return } - - err = c.reloadHTTPServer(newConf) - if err != nil { - c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err) - return - } } if s := c.agent.Server(); s != nil { @@ -661,9 +654,34 @@ func (c *Command) handleReload() { } else { if err := s.Reload(sconf); err != nil { c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err) + return } } } + + if s := c.agent.Client(); s != nil { + clientConfig, err := c.agent.clientConfig() + if err != nil { + c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) + return + } + if err := c.agent.Client().Reload(clientConfig); err != nil { + c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) + return + } + } + + // reload HTTP server after we have reloaded both client and server, in case + // we error in either of the above cases. For example, reloading the http + // server to a TLS connection could succeed, while reloading the server's rpc + // connections could fail. + if shouldReload { + err := c.reloadHTTPServer(newConf) + if err != nil { + c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err) + return + } + } } // setupTelemetry is used ot setup the telemetry sub-systems diff --git a/nomad/server.go b/nomad/server.go index 51f6e5bbaa11..4ea115694fdc 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -400,8 +400,8 @@ func getTLSConf(enableRPC bool, tlsConf *tlsutil.Config) (*tls.Config, tlsutil.R } // ReloadTLSConnections updates a server's TLS configuration and reloads RPC -// connections -func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error { +// connections. This will handle both TLS upgrades and downgrades. +func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes") tlsConf := s.config.newTLSConfig(newTLSConfig) @@ -619,6 +619,12 @@ func (s *Server) Reload(newConfig *Config) error { } } + if !newConfig.TLSConfig.Equals(s.config.TLSConfig) { + if err := s.reloadTLSConnections(newConfig.TLSConfig); err != nil { + multierror.Append(&mErr, err) + } + } + return mErr.ErrorOrNil() } diff --git a/nomad/server_test.go b/nomad/server_test.go index bc7eee0a1a42..89e6810d7410 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -309,7 +309,7 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { KeyFile: fookey, } - err := s1.ReloadTLSConnections(newTLSConfig) + err := s1.reloadTLSConnections(newTLSConfig) assert.Nil(err) assert.True(s1.config.TLSConfig.Equals(newTLSConfig)) @@ -330,7 +330,7 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { // Tests that the server will successfully reload its network connections, // downgrading from TLS to plaintext if the server's TLS configuration changes. -func TestServer_Reload_TLSConnections_TLSToPlaintext(t *testing.T) { +func TestServer_reload_TLSConnections_TLSToPlaintext(t *testing.T) { t.Parallel() assert := assert.New(t) @@ -357,7 +357,7 @@ func TestServer_Reload_TLSConnections_TLSToPlaintext(t *testing.T) { newTLSConfig := &config.TLSConfig{} - err := s1.ReloadTLSConnections(newTLSConfig) + err := s1.reloadTLSConnections(newTLSConfig) assert.Nil(err) assert.True(s1.config.TLSConfig.Equals(newTLSConfig)) diff --git a/nomad/structs/config/tls.go b/nomad/structs/config/tls.go index e309093e3768..be1e8f42a1ba 100644 --- a/nomad/structs/config/tls.go +++ b/nomad/structs/config/tls.go @@ -188,9 +188,14 @@ func (t *TLSConfig) Merge(b *TLSConfig) *TLSConfig { // Equals compares the fields of two TLS configuration objects, returning a // boolean indicating if they are the same. -// NewConfig should never be nil- calling code is responsible for walways -// passing a valid TLSConfig object +// It is possible for either the calling TLSConfig to be nil, or the TLSConfig +// that it is being compared against, so we need to handle both places. See +// server.go Reload for example. func (t *TLSConfig) Equals(newConfig *TLSConfig) bool { + if t == nil || newConfig == nil { + return t == newConfig + } + return t.EnableRPC == newConfig.EnableRPC && t.CAFile == newConfig.CAFile && t.CertFile == newConfig.CertFile && From 11089b23cea2b07f258fc1a1a1f30c547deb15bd Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Thu, 7 Dec 2017 12:07:00 -0500 Subject: [PATCH 14/24] reload raft transport layer fix up linting --- client/client.go | 11 +- client/client_test.go | 6 +- client/config/config.go | 2 +- command/agent/command.go | 8 +- nomad/server.go | 79 ++++---- nomad/server_test.go | 173 +++++++++++++++++- .../hashicorp/raft/net_transport.go | 33 +++- 7 files changed, 253 insertions(+), 59 deletions(-) diff --git a/client/client.go b/client/client.go index 0b73c63bbd5a..c58ff7fa5f29 100644 --- a/client/client.go +++ b/client/client.go @@ -365,9 +365,9 @@ func (c *Client) init() error { return nil } -// ReloadTLSConnections allows a client to reload RPC connections if the -// client's TLS configuration changes from plaintext to TLS -func (c *Client) ReloadTLSConnections(newConfig *nconfig.TLSConfig) error { +// reloadTLSConnections allows a client to reload its TLS configuration on the +// fly +func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error { var tlsWrap tlsutil.RegionWrapper if newConfig != nil && newConfig.EnableRPC { tw, err := c.config.NewTLSConfiguration(newConfig).OutgoingTLSWrapper() @@ -378,6 +378,8 @@ func (c *Client) ReloadTLSConnections(newConfig *nconfig.TLSConfig) error { tlsWrap = tw } + // Keep the client configuration up to date as we use configuration values to + // decide on what type of connections to accept c.configLock.Lock() c.config.TLSConfig = newConfig c.configLock.Unlock() @@ -387,8 +389,7 @@ func (c *Client) ReloadTLSConnections(newConfig *nconfig.TLSConfig) error { return nil } -// Reload allows a client to reload RPC connections if the -// client's TLS configuration changes +// Reload allows a client to reload its configuration on the fly func (c *Client) Reload(newConfig *config.Config) error { return c.reloadTLSConnections(newConfig.TLSConfig) } diff --git a/client/client_test.go b/client/client_test.go index ae2f2cdc4fac..d4cba7db5dd2 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -1007,7 +1007,7 @@ func TestClient_ReloadTLS_UpgradePlaintextToTLS(t *testing.T) { assert := assert.New(t) s1, addr := testServer(t, func(c *nomad.Config) { - c.Region = "dc1" + c.Region = "foo" }) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) @@ -1059,7 +1059,7 @@ func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) { assert := assert.New(t) s1, addr := testServer(t, func(c *nomad.Config) { - c.Region = "dc1" + c.Region = "foo" }) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) @@ -1090,7 +1090,7 @@ func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) { req := structs.NodeSpecificRequest{ NodeID: c1.Node().ID, - QueryOptions: structs.QueryOptions{Region: "dc1"}, + QueryOptions: structs.QueryOptions{Region: "foo"}, } var out structs.SingleNodeResponse testutil.AssertUntil(100*time.Millisecond, diff --git a/client/config/config.go b/client/config/config.go index 89820edbef39..e2a271646ee0 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -354,7 +354,7 @@ func (c *Config) TLSConfiguration() *tlsutil.Config { } // NewTLSConfiguration returns a TLSUtil Config for a new TLS config object -// This allows a TLSConfig object to be created without first explicitely +// This allows a TLSConfig object to be created without first explicitly // setting it func (c *Config) NewTLSConfiguration(tlsConfig *config.TLSConfig) *tlsutil.Config { return &tlsutil.Config{ diff --git a/command/agent/command.go b/command/agent/command.go index 8844e78edefd..5a7c67831e70 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -12,7 +12,6 @@ import ( "sort" "strconv" "strings" - "sync" "syscall" "time" @@ -47,7 +46,6 @@ type Command struct { args []string agent *Agent httpServer *HTTPServer - httpServerLock sync.Mutex logFilter *logutils.LevelFilter logOutput io.Writer retryJoinErrCh chan struct{} @@ -602,8 +600,6 @@ WAIT: func (c *Command) reloadHTTPServer(newConfig *Config) error { c.agent.logger.Println("[INFO] agent: Reloading HTTP server with new TLS configuration") - c.httpServerLock.Lock() - defer c.httpServerLock.Unlock() c.httpServer.Shutdown() @@ -640,6 +636,7 @@ func (c *Command) handleReload() { shouldReload := c.agent.ShouldReload(newConf) if shouldReload { + c.agent.logger.Printf("[DEBUG] agent: starting reload of agent config") err := c.agent.Reload(newConf) if err != nil { c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) @@ -649,8 +646,10 @@ func (c *Command) handleReload() { if s := c.agent.Server(); s != nil { sconf, err := convertServerConfig(newConf, c.logOutput) + c.agent.logger.Printf("[DEBUG] agent: starting reload of server config") if err != nil { c.agent.logger.Printf("[ERR] agent: failed to convert server config: %v", err) + return } else { if err := s.Reload(sconf); err != nil { c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err) @@ -661,6 +660,7 @@ func (c *Command) handleReload() { if s := c.agent.Client(); s != nil { clientConfig, err := c.agent.clientConfig() + c.agent.logger.Printf("[DEBUG] agent: starting reload of client config") if err != nil { c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) return diff --git a/nomad/server.go b/nomad/server.go index 4ea115694fdc..14c3ed4a5569 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -85,8 +85,9 @@ const ( // schedulers, and notification bus for agents. type Server struct { config *Config - configLock sync.RWMutex - logger *log.Logger + configLock sync.Mutex + + logger *log.Logger // Connection pool to other Nomad servers connPool *ConnPool @@ -96,32 +97,28 @@ type Server struct { // The raft instance is used among Nomad nodes within the // region to protect operations that require strong consistency - leaderCh <-chan bool - raft *raft.Raft - raftLayer *RaftLayer - raftLayerLock sync.Mutex + leaderCh <-chan bool + raft *raft.Raft + raftLayer *RaftLayer raftStore *raftboltdb.BoltStore raftInmem *raft.InmemStore - raftTransport *raft.NetworkTransport - raftTransportLock sync.Mutex + raftTransport *raft.NetworkTransport // fsm is the state machine used with Raft fsm *nomadFSM // rpcListener is used to listen for incoming connections - rpcListener net.Listener - rpcListenerLock sync.Mutex - listenerCh chan struct{} + rpcListener net.Listener + listenerCh chan struct{} rpcServer *rpc.Server rpcAdvertise net.Addr // rpcTLS is the TLS config for incoming TLS requests - rpcTLS *tls.Config - rpcCancel context.CancelFunc - rpcTLSLock sync.Mutex + rpcTLS *tls.Config + rpcCancel context.CancelFunc // peers is used to track the known Nomad servers. This is // used for region forwarding and clustering. @@ -329,6 +326,7 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg // Start ingesting events for Serf go s.serfEventHandler() + // start the RPC listener for the server s.startRPCListener() // Emit metrics for the eval broker @@ -353,10 +351,8 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logg return s, nil } -// Start the RPC listeners +// startRPCListener starts the server's the RPC listener func (s *Server) startRPCListener() { - s.rpcListenerLock.Lock() - defer s.rpcListenerLock.Unlock() ctx, cancel := context.WithCancel(context.Background()) s.rpcCancel = cancel go func() { @@ -365,10 +361,8 @@ func (s *Server) startRPCListener() { }() } +// createRPCListener creates the server's RPC listener func (s *Server) createRPCListener() error { - s.rpcListenerLock.Lock() - defer s.rpcListenerLock.Unlock() - s.listenerCh = make(chan struct{}) list, err := net.ListenTCP("tcp", s.config.RPCAddr) if err != nil || list == nil { @@ -380,6 +374,8 @@ func (s *Server) createRPCListener() error { return nil } +// getTLSConf gets the server's TLS configuration based on the config supplied +// by the operator func getTLSConf(enableRPC bool, tlsConf *tlsutil.Config) (*tls.Config, tlsutil.RegionWrapper, error) { var tlsWrap tlsutil.RegionWrapper var incomingTLS *tls.Config @@ -399,11 +395,13 @@ func getTLSConf(enableRPC bool, tlsConf *tlsutil.Config) (*tls.Config, tlsutil.R return incomingTLS, tlsWrap, nil } -// ReloadTLSConnections updates a server's TLS configuration and reloads RPC -// connections. This will handle both TLS upgrades and downgrades. +// reloadTLSConnections updates a server's TLS configuration and reloads RPC +// connections. func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes") + // the server config must be in sync with the latest config changes, due to + // testing for TLS configuration settings in rpc.go tlsConf := s.config.newTLSConfig(newTLSConfig) incomingTLS, tlsWrap, err := getTLSConf(newTLSConfig.EnableRPC, tlsConf) if err != nil { @@ -411,6 +409,13 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { return err } + // Keeping configuration in sync is important for other places that require + // access to config information, such as rpc.go, where we decide on what kind + // of network connections to accept depending on the server configuration + s.configLock.Lock() + s.config.TLSConfig = newTLSConfig + s.configLock.Unlock() + if s.rpcCancel == nil { s.logger.Printf("[ERR] nomad: No TLS Context to reset") return fmt.Errorf("Unable to reset tls context") @@ -422,38 +427,27 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.config.TLSConfig = newTLSConfig s.configLock.Unlock() - s.rpcTLSLock.Lock() s.rpcTLS = incomingTLS - s.rpcTLSLock.Unlock() - - s.raftTransportLock.Lock() - defer s.raftTransportLock.Unlock() - s.raftTransport.Close() - s.connPool.ReloadTLS(tlsWrap) // reinitialize our rpc listener - s.rpcListenerLock.Lock() s.rpcListener.Close() <-s.listenerCh - s.rpcListenerLock.Unlock() + s.raftTransport.Pause() + s.raftLayer.Close() err = s.createRPCListener() - if err != nil { - return err - } - s.startRPCListener() - s.raftLayerLock.Lock() - s.raftLayer.Close() + // CLose existing streams wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) s.raftLayer = NewRaftLayer(s.rpcAdvertise, wrapper) - s.raftLayerLock.Unlock() - // re-initialize the network transport with a re-initialized stream layer - trans := raft.NewNetworkTransport(s.raftLayer, 3, s.config.RaftTimeout, - s.config.LogOutput) - s.raftTransport = trans + s.startRPCListener() + + time.Sleep(3 * time.Second) + if err != nil { + return err + } s.logger.Printf("[DEBUG] nomad: finished reloading server connections") return nil @@ -621,6 +615,7 @@ func (s *Server) Reload(newConfig *Config) error { if !newConfig.TLSConfig.Equals(s.config.TLSConfig) { if err := s.reloadTLSConnections(newConfig.TLSConfig); err != nil { + s.logger.Printf("[DEBUG] nomad: reloading server TLS configuration") multierror.Append(&mErr, err) } } diff --git a/nomad/server_test.go b/nomad/server_test.go index 89e6810d7410..e7d063b97a40 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -13,6 +13,7 @@ import ( "time" "github.com/hashicorp/consul/lib/freeport" + memdb "github.com/hashicorp/go-memdb" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper/uuid" @@ -330,7 +331,7 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { // Tests that the server will successfully reload its network connections, // downgrading from TLS to plaintext if the server's TLS configuration changes. -func TestServer_reload_TLSConnections_TLSToPlaintext(t *testing.T) { +func TestServer_Reload_TLSConnections_TLSToPlaintext_RPC(t *testing.T) { t.Parallel() assert := assert.New(t) @@ -375,3 +376,173 @@ func TestServer_reload_TLSConnections_TLSToPlaintext(t *testing.T) { err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) assert.Nil(err) } + +// Test that Raft connections are reloaded as expected when a Nomad server is +// upgraded from plaintext to TLS +func TestServer_Reload_TLSConnections_Raft(t *testing.T) { + assert := assert.New(t) + t.Parallel() + const ( + cafile = "../../helper/tlsutil/testdata/ca.pem" + foocert = "../../helper/tlsutil/testdata/nomad-foo.pem" + fookey = "../../helper/tlsutil/testdata/nomad-foo-key.pem" + barcert = "../dev/tls_cluster/certs/nomad.pem" + barkey = "../dev/tls_cluster/certs/nomad-key.pem" + ) + dir := tmpDir(t) + defer os.RemoveAll(dir) + s1 := testServer(t, func(c *Config) { + c.BootstrapExpect = 2 + c.DevMode = false + c.DevDisableBootstrap = true + c.DataDir = path.Join(dir, "node1") + c.NodeName = "node1" + c.Region = "regionFoo" + }) + defer s1.Shutdown() + + s2 := testServer(t, func(c *Config) { + c.BootstrapExpect = 2 + c.DevMode = false + c.DevDisableBootstrap = true + c.DataDir = path.Join(dir, "node2") + c.NodeName = "node2" + c.Region = "regionFoo" + }) + defer s2.Shutdown() + + testJoin(t, s1, s2) + + testutil.WaitForResult(func() (bool, error) { + peers, _ := s1.numPeers() + return peers == 2, nil + }, func(err error) { + t.Fatalf("should have 2 peers") + }) + + // the server should be connected to the rest of the cluster + testutil.WaitForLeader(t, s2.RPC) + + { + // assert that a job register request will succeed + codec := rpcClient(t, s2) + job := mock.Job() + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{ + Region: "regionFoo", + Namespace: job.Namespace, + }, + } + + // Fetch the response + var resp structs.JobRegisterResponse + err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) + assert.Nil(err) + + // Check for the job in the FSM of each server in the cluster + { + state := s2.fsm.State() + ws := memdb.NewWatchSet() + out, err := state.JobByID(ws, job.Namespace, job.ID) + assert.Nil(err) + assert.NotNil(out) + assert.Equal(out.CreateIndex, resp.JobModifyIndex) + } + { + state := s1.fsm.State() + ws := memdb.NewWatchSet() + out, err := state.JobByID(ws, job.Namespace, job.ID) + assert.Nil(err) + assert.NotNil(out) // TODO Occasionally is flaky + assert.Equal(out.CreateIndex, resp.JobModifyIndex) + } + } + + newTLSConfig := &config.TLSConfig{ + EnableHTTP: true, + VerifyHTTPSClient: true, + CAFile: cafile, + CertFile: foocert, + KeyFile: fookey, + } + + err := s1.reloadTLSConnections(newTLSConfig) + assert.Nil(err) + + { + // assert that a job register request will fail between servers that + // should not be able to communicate over Raft + codec := rpcClient(t, s2) + job := mock.Job() + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{ + Region: "global", + Namespace: job.Namespace, + }, + } + + var resp structs.JobRegisterResponse + err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) + assert.NotNil(err) + + // Check that the job was not persisted + state := s2.fsm.State() + ws := memdb.NewWatchSet() + out, _ := state.JobByID(ws, job.Namespace, job.ID) + assert.Nil(out) + } + + secondNewTLSConfig := &config.TLSConfig{ + EnableHTTP: true, + VerifyHTTPSClient: true, + CAFile: cafile, + CertFile: barcert, + KeyFile: barkey, + } + + // Now, transition the other server to TLS, which should restore their + // ability to communicate. + err = s2.reloadTLSConnections(secondNewTLSConfig) + assert.Nil(err) + + // the server should be connected to the rest of the cluster + testutil.WaitForLeader(t, s2.RPC) + + { + // assert that a job register request will succeed + codec := rpcClient(t, s2) + job := mock.Job() + req := &structs.JobRegisterRequest{ + Job: job, + WriteRequest: structs.WriteRequest{ + Region: "regionFoo", + Namespace: job.Namespace, + }, + } + + // Fetch the response + var resp structs.JobRegisterResponse + err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) + assert.Nil(err) + + // Check for the job in the FSM of each server in the cluster + { + state := s2.fsm.State() + ws := memdb.NewWatchSet() + out, err := state.JobByID(ws, job.Namespace, job.ID) + assert.Nil(err) + assert.NotNil(out) + assert.Equal(out.CreateIndex, resp.JobModifyIndex) + } + { + state := s1.fsm.State() + ws := memdb.NewWatchSet() + out, err := state.JobByID(ws, job.Namespace, job.ID) + assert.Nil(err) + assert.NotNil(out) + assert.Equal(out.CreateIndex, resp.JobModifyIndex) + } + } +} diff --git a/vendor/github.com/hashicorp/raft/net_transport.go b/vendor/github.com/hashicorp/raft/net_transport.go index 7c55ac5371ff..cbfabfc31606 100644 --- a/vendor/github.com/hashicorp/raft/net_transport.go +++ b/vendor/github.com/hashicorp/raft/net_transport.go @@ -2,6 +2,7 @@ package raft import ( "bufio" + "context" "errors" "fmt" "io" @@ -72,7 +73,8 @@ type NetworkTransport struct { shutdownCh chan struct{} shutdownLock sync.Mutex - stream StreamLayer + stream StreamLayer + streamCancel context.CancelFunc timeout time.Duration TimeoutScale int @@ -141,6 +143,7 @@ func NewNetworkTransportWithLogger( if logger == nil { logger = log.New(os.Stderr, "", log.LstdFlags) } + trans := &NetworkTransport{ connPool: make(map[ServerAddress][]*netConn), consumeCh: make(chan RPC), @@ -151,10 +154,26 @@ func NewNetworkTransportWithLogger( timeout: timeout, TimeoutScale: DefaultTimeoutScale, } - go trans.listen() + + ctx, cancel := context.WithCancel(context.Background()) + trans.streamCancel = cancel + go trans.listen(ctx) return trans } +// Pause closes the current stream for a NetworkTransport instance +func (n *NetworkTransport) Pause() { + n.streamCancel() + n.stream.Close() +} + +// Pause creates a new stream for a NetworkTransport instance +func (n *NetworkTransport) Reload(s StreamLayer) { + ctx, cancel := context.WithCancel(context.Background()) + n.streamCancel = cancel + go n.listen(ctx) +} + // SetHeartbeatHandler is used to setup a heartbeat handler // as a fast-pass. This is to avoid head-of-line blocking from // disk IO. @@ -356,14 +375,22 @@ func (n *NetworkTransport) DecodePeer(buf []byte) ServerAddress { } // listen is used to handling incoming connections. -func (n *NetworkTransport) listen() { +func (n *NetworkTransport) listen(ctx context.Context) { for { + select { + case <-ctx.Done(): + n.logger.Println("[INFO] raft-net: stream layer is closed") + return + default: + } + // Accept incoming connections conn, err := n.stream.Accept() if err != nil { if n.IsShutdown() { return } + // TODO Getting an error here n.logger.Printf("[ERR] raft-net: Failed to accept connection: %v", err) continue } From 21c1c1defc22666a4194d0faeebaa346bec11043 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 9 Jan 2018 06:28:07 -0500 Subject: [PATCH 15/24] add documentation --- website/source/guides/securing-nomad.html.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/website/source/guides/securing-nomad.html.md b/website/source/guides/securing-nomad.html.md index d4898f3dd626..4f8e193329c5 100644 --- a/website/source/guides/securing-nomad.html.md +++ b/website/source/guides/securing-nomad.html.md @@ -472,6 +472,17 @@ NOTE: Dynamically reloading certificates will _not_ close existing connections. If you need to rotate certificates due to a security incident, you will still need to completely shutdown and restart the Nomad agent. +## Migrating a cluster to TLS + +Nomad supports dynamically reloading a server to move from plaintext to TLS. +This can be done by editing the server's configuration and reloading the server +or client via SIGHUP. Note that this will only reload changes within the tls +configuration block, other configuration changes still require a full restart. + +This process will read in new TLS configuration for the agent, configure the +agent for the new changes, and reload all network connections. This process +works for both upgrading and downgrading TLS (but we recommend upgrading). + [cfssl]: https://cfssl.org/ [cfssl.json]: https://raw.githubusercontent.com/hashicorp/nomad/master/demo/vagrant/cfssl.json From bbc56860ac109c373b16adf755c8a5e313fc5be6 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 16 Jan 2018 07:34:39 -0500 Subject: [PATCH 16/24] adding additional test assertions; differentiate reloading agent and http server --- client/client.go | 1 - client/client_test.go | 96 +++++++++++++++++++++++++++---------- command/agent/agent.go | 6 +-- command/agent/agent_test.go | 10 ++-- command/agent/command.go | 48 +++++++++---------- 5 files changed, 105 insertions(+), 56 deletions(-) diff --git a/client/client.go b/client/client.go index c58ff7fa5f29..11b716600192 100644 --- a/client/client.go +++ b/client/client.go @@ -371,7 +371,6 @@ func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error { var tlsWrap tlsutil.RegionWrapper if newConfig != nil && newConfig.EnableRPC { tw, err := c.config.NewTLSConfiguration(newConfig).OutgoingTLSWrapper() - if err != nil { return err } diff --git a/client/client_test.go b/client/client_test.go index d4cba7db5dd2..95ff480d357c 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -1007,7 +1007,7 @@ func TestClient_ReloadTLS_UpgradePlaintextToTLS(t *testing.T) { assert := assert.New(t) s1, addr := testServer(t, func(c *nomad.Config) { - c.Region = "foo" + c.Region = "regionFoo" }) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) @@ -1023,6 +1023,27 @@ func TestClient_ReloadTLS_UpgradePlaintextToTLS(t *testing.T) { }) defer c1.Shutdown() + // Registering a node over plaintext should succeed + { + req := structs.NodeSpecificRequest{ + NodeID: c1.Node().ID, + QueryOptions: structs.QueryOptions{Region: "regionFoo"}, + } + + testutil.WaitForResult(func() (bool, error) { + var out structs.SingleNodeResponse + err := c1.RPC("Node.GetNode", &req, &out) + if err != nil { + return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err) + } + return true, nil + }, + func(err error) { + t.Fatalf(err.Error()) + }, + ) + } + newConfig := &nconfig.TLSConfig{ EnableHTTP: true, EnableRPC: true, @@ -1035,23 +1056,26 @@ func TestClient_ReloadTLS_UpgradePlaintextToTLS(t *testing.T) { err := c1.reloadTLSConnections(newConfig) assert.Nil(err) - req := structs.NodeSpecificRequest{ - NodeID: c1.Node().ID, - QueryOptions: structs.QueryOptions{Region: "dc1"}, - } - var out structs.SingleNodeResponse - testutil.AssertUntil(100*time.Millisecond, - func() (bool, error) { + // Registering a node over plaintext should fail after the node has upgraded + // to TLS + { + req := structs.NodeSpecificRequest{ + NodeID: c1.Node().ID, + QueryOptions: structs.QueryOptions{Region: "regionFoo"}, + } + testutil.WaitForResult(func() (bool, error) { + var out structs.SingleNodeResponse err := c1.RPC("Node.GetNode", &req, &out) if err == nil { return false, fmt.Errorf("client RPC succeeded when it should have failed:\n%+v", err) } return true, nil }, - func(err error) { - t.Fatalf(err.Error()) - }, - ) + func(err error) { + t.Fatalf(err.Error()) + }, + ) + } } func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) { @@ -1059,7 +1083,7 @@ func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) { assert := assert.New(t) s1, addr := testServer(t, func(c *nomad.Config) { - c.Region = "foo" + c.Region = "regionFoo" }) defer s1.Shutdown() testutil.WaitForLeader(t, s1.RPC) @@ -1083,26 +1107,50 @@ func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) { }) defer c1.Shutdown() + // assert that when one node is running in encrypted mode, a RPC request to a + // node running in plaintext mode should fail + { + req := structs.NodeSpecificRequest{ + NodeID: c1.Node().ID, + QueryOptions: structs.QueryOptions{Region: "regionFoo"}, + } + testutil.WaitForResult(func() (bool, error) { + var out structs.SingleNodeResponse + err := c1.RPC("Node.GetNode", &req, &out) + if err == nil { + return false, fmt.Errorf("client RPC succeeded when it should have failed :\n%+v", err) + } + return true, nil + }, + func(err error) { + t.Fatalf(err.Error()) + }, + ) + } + newConfig := &nconfig.TLSConfig{} err := c1.reloadTLSConnections(newConfig) assert.Nil(err) - req := structs.NodeSpecificRequest{ - NodeID: c1.Node().ID, - QueryOptions: structs.QueryOptions{Region: "foo"}, - } - var out structs.SingleNodeResponse - testutil.AssertUntil(100*time.Millisecond, - func() (bool, error) { + // assert that when both nodes are in plaintext mode, a RPC request should + // succeed + { + req := structs.NodeSpecificRequest{ + NodeID: c1.Node().ID, + QueryOptions: structs.QueryOptions{Region: "regionFoo"}, + } + testutil.WaitForResult(func() (bool, error) { + var out structs.SingleNodeResponse err := c1.RPC("Node.GetNode", &req, &out) if err != nil { return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err) } return true, nil }, - func(err error) { - t.Fatalf(err.Error()) - }, - ) + func(err error) { + t.Fatalf(err.Error()) + }, + ) + } } diff --git a/command/agent/agent.go b/command/agent/agent.go index 1f6b400e7f6e..3ec9e00f0b41 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -732,14 +732,14 @@ func (a *Agent) Stats() map[string]map[string]string { // ShouldReload determines if we should reload the configuration and agent // connections. If the TLS Configuration has not changed, we shouldn't reload. -func (a *Agent) ShouldReload(newConfig *Config) bool { +func (a *Agent) ShouldReload(newConfig *Config) (bool, bool) { a.configLock.Lock() defer a.configLock.Unlock() if a.config.TLSConfig.Equals(newConfig.TLSConfig) { - return false + return false, false } - return true + return true, true // requires a reload of both agent and http server } // Reload handles configuration changes for the agent. Provides a method that diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index 6042d6902f6b..196b00d7759d 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -888,8 +888,9 @@ func TestServer_ShouldReload_ReturnFalseForNoChanges(t *testing.T) { config: agentConfig, } - shouldReload := agent.ShouldReload(sameAgentConfig) - assert.False(shouldReload) + shouldReloadAgent, shouldReloadHTTPServer := agent.ShouldReload(sameAgentConfig) + assert.False(shouldReloadAgent) + assert.False(shouldReloadHTTPServer) } func TestServer_ShouldReload_ReturnTrueForConfigChanges(t *testing.T) { @@ -935,6 +936,7 @@ func TestServer_ShouldReload_ReturnTrueForConfigChanges(t *testing.T) { config: agentConfig, } - shouldReload := agent.ShouldReload(newConfig) - assert.True(shouldReload) + shouldReloadAgent, shouldReloadHTTPServer := agent.ShouldReload(newConfig) + assert.True(shouldReloadAgent) + assert.True(shouldReloadHTTPServer) } diff --git a/command/agent/command.go b/command/agent/command.go index 5a7c67831e70..cac8616c4a0d 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -634,40 +634,40 @@ func (c *Command) handleReload() { newConf.LogLevel = c.agent.GetConfig().LogLevel } - shouldReload := c.agent.ShouldReload(newConf) - if shouldReload { + shouldReloadAgent, shouldReloadHTTPServer := c.agent.ShouldReload(newConf) + if shouldReloadAgent { c.agent.logger.Printf("[DEBUG] agent: starting reload of agent config") err := c.agent.Reload(newConf) if err != nil { c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) return } - } - if s := c.agent.Server(); s != nil { - sconf, err := convertServerConfig(newConf, c.logOutput) - c.agent.logger.Printf("[DEBUG] agent: starting reload of server config") - if err != nil { - c.agent.logger.Printf("[ERR] agent: failed to convert server config: %v", err) - return - } else { - if err := s.Reload(sconf); err != nil { - c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err) + if s := c.agent.Server(); s != nil { + sconf, err := convertServerConfig(newConf, c.logOutput) + c.agent.logger.Printf("[DEBUG] agent: starting reload of server config") + if err != nil { + c.agent.logger.Printf("[ERR] agent: failed to convert server config: %v", err) return + } else { + if err := s.Reload(sconf); err != nil { + c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err) + return + } } } - } - if s := c.agent.Client(); s != nil { - clientConfig, err := c.agent.clientConfig() - c.agent.logger.Printf("[DEBUG] agent: starting reload of client config") - if err != nil { - c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) - return - } - if err := c.agent.Client().Reload(clientConfig); err != nil { - c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) - return + if s := c.agent.Client(); s != nil { + clientConfig, err := c.agent.clientConfig() + c.agent.logger.Printf("[DEBUG] agent: starting reload of client config") + if err != nil { + c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) + return + } + if err := c.agent.Client().Reload(clientConfig); err != nil { + c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) + return + } } } @@ -675,7 +675,7 @@ func (c *Command) handleReload() { // we error in either of the above cases. For example, reloading the http // server to a TLS connection could succeed, while reloading the server's rpc // connections could fail. - if shouldReload { + if shouldReloadHTTPServer { err := c.reloadHTTPServer(newConf) if err != nil { c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err) From 8de260f19bfb7af6ce13a72e29869e6ae44ed567 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 16 Jan 2018 08:02:39 -0500 Subject: [PATCH 17/24] refactor creating a new tls configuration --- client/client.go | 2 +- client/config/config.go | 17 +++++------------ helper/tlsutil/config.go | 12 ++++++++++++ nomad/config.go | 17 +++++------------ nomad/server.go | 2 +- 5 files changed, 24 insertions(+), 26 deletions(-) diff --git a/client/client.go b/client/client.go index 11b716600192..314b9dda4053 100644 --- a/client/client.go +++ b/client/client.go @@ -370,7 +370,7 @@ func (c *Client) init() error { func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error { var tlsWrap tlsutil.RegionWrapper if newConfig != nil && newConfig.EnableRPC { - tw, err := c.config.NewTLSConfiguration(newConfig).OutgoingTLSWrapper() + tw, err := tlsutil.NewTLSConfiguration(newConfig).OutgoingTLSWrapper() if err != nil { return err } diff --git a/client/config/config.go b/client/config/config.go index e2a271646ee0..8a16eb8bebec 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -350,20 +350,13 @@ func (c *Config) ReadStringListToMapDefault(key, defaultValue string) map[string // TLSConfiguration returns a TLSUtil Config based on the existing client // configuration func (c *Config) TLSConfiguration() *tlsutil.Config { - return c.NewTLSConfiguration(c.TLSConfig) -} - -// NewTLSConfiguration returns a TLSUtil Config for a new TLS config object -// This allows a TLSConfig object to be created without first explicitly -// setting it -func (c *Config) NewTLSConfiguration(tlsConfig *config.TLSConfig) *tlsutil.Config { return &tlsutil.Config{ VerifyIncoming: true, VerifyOutgoing: true, - VerifyServerHostname: tlsConfig.VerifyServerHostname, - CAFile: tlsConfig.CAFile, - CertFile: tlsConfig.CertFile, - KeyFile: tlsConfig.KeyFile, - KeyLoader: tlsConfig.GetKeyLoader(), + VerifyServerHostname: c.TLSConfig.VerifyServerHostname, + CAFile: c.TLSConfig.CAFile, + CertFile: c.TLSConfig.CertFile, + KeyFile: c.TLSConfig.KeyFile, + KeyLoader: c.TLSConfig.GetKeyLoader(), } } diff --git a/helper/tlsutil/config.go b/helper/tlsutil/config.go index 3de5b925a7f2..8f6b1f01df3e 100644 --- a/helper/tlsutil/config.go +++ b/helper/tlsutil/config.go @@ -67,6 +67,18 @@ type Config struct { KeyLoader *config.KeyLoader } +func NewTLSConfiguration(newConf *config.TLSConfig) *Config { + return &Config{ + VerifyIncoming: true, + VerifyOutgoing: true, + VerifyServerHostname: newConf.VerifyServerHostname, + CAFile: newConf.CAFile, + CertFile: newConf.CertFile, + KeyFile: newConf.KeyFile, + KeyLoader: newConf.GetKeyLoader(), + } +} + // AppendCA opens and parses the CA file and adds the certificates to // the provided CertPool. func (c *Config) AppendCA(pool *x509.CertPool) error { diff --git a/nomad/config.go b/nomad/config.go index bdc8cba3b007..4a918f393c45 100644 --- a/nomad/config.go +++ b/nomad/config.go @@ -353,20 +353,13 @@ func DefaultConfig() *Config { // tlsConfig returns a TLSUtil Config based on the server configuration func (c *Config) tlsConfig() *tlsutil.Config { - return c.newTLSConfig(c.TLSConfig) -} - -// newTLSConfig returns a TLSUtil Config based on the server configuration -// This is useful for creating a TLSConfig object without explictely setting it -// on the config. -func (c *Config) newTLSConfig(newConf *config.TLSConfig) *tlsutil.Config { return &tlsutil.Config{ VerifyIncoming: true, VerifyOutgoing: true, - VerifyServerHostname: newConf.VerifyServerHostname, - CAFile: newConf.CAFile, - CertFile: newConf.CertFile, - KeyFile: newConf.KeyFile, - KeyLoader: newConf.GetKeyLoader(), + VerifyServerHostname: c.TLSConfig.VerifyServerHostname, + CAFile: c.TLSConfig.CAFile, + CertFile: c.TLSConfig.CertFile, + KeyFile: c.TLSConfig.KeyFile, + KeyLoader: c.TLSConfig.GetKeyLoader(), } } diff --git a/nomad/server.go b/nomad/server.go index 14c3ed4a5569..45f53267c13b 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -402,7 +402,7 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { // the server config must be in sync with the latest config changes, due to // testing for TLS configuration settings in rpc.go - tlsConf := s.config.newTLSConfig(newTLSConfig) + tlsConf := tlsutil.NewTLSConfiguration(newTLSConfig) incomingTLS, tlsWrap, err := getTLSConf(newTLSConfig.EnableRPC, tlsConf) if err != nil { s.logger.Printf("[ERR] nomad: unable to reset TLS context") From d97c91c7330bacdcb833d217d7709162ff36dc32 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 16 Jan 2018 11:55:11 -0500 Subject: [PATCH 18/24] feedback from code review --- nomad/server.go | 60 +++++++++---------- .../hashicorp/raft/net_transport.go | 1 - website/source/guides/securing-nomad.html.md | 15 ++--- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/nomad/server.go b/nomad/server.go index 45f53267c13b..a2523caf1698 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -362,16 +362,16 @@ func (s *Server) startRPCListener() { } // createRPCListener creates the server's RPC listener -func (s *Server) createRPCListener() error { +func (s *Server) createRPCListener() (*net.TCPListener, error) { s.listenerCh = make(chan struct{}) - list, err := net.ListenTCP("tcp", s.config.RPCAddr) - if err != nil || list == nil { - s.logger.Printf("[ERR] nomad: No TLS listener to reload") - return err + listener, err := net.ListenTCP("tcp", s.config.RPCAddr) + if err != nil { + s.logger.Printf("[ERR] nomad: error when initializing TLS listener %s", err) + return listener, err } - s.rpcListener = list - return nil + s.rpcListener = listener + return listener, nil } // getTLSConf gets the server's TLS configuration based on the config supplied @@ -405,24 +405,21 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { tlsConf := tlsutil.NewTLSConfiguration(newTLSConfig) incomingTLS, tlsWrap, err := getTLSConf(newTLSConfig.EnableRPC, tlsConf) if err != nil { - s.logger.Printf("[ERR] nomad: unable to reset TLS context") + s.logger.Printf("[ERR] nomad: unable to reset TLS context %s", err) return err } - // Keeping configuration in sync is important for other places that require - // access to config information, such as rpc.go, where we decide on what kind - // of network connections to accept depending on the server configuration - s.configLock.Lock() - s.config.TLSConfig = newTLSConfig - s.configLock.Unlock() - if s.rpcCancel == nil { - s.logger.Printf("[ERR] nomad: No TLS Context to reset") - return fmt.Errorf("Unable to reset tls context") + err = fmt.Errorf("No existing RPC server to reset.") + s.logger.Printf("[ERR] nomad: %s", err) + return err } s.rpcCancel() + // Keeping configuration in sync is important for other places that require + // access to config information, such as rpc.go, where we decide on what kind + // of network connections to accept depending on the server configuration s.configLock.Lock() s.config.TLSConfig = newTLSConfig s.configLock.Unlock() @@ -433,21 +430,25 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { // reinitialize our rpc listener s.rpcListener.Close() <-s.listenerCh + + // Close existing Raft connections s.raftTransport.Pause() s.raftLayer.Close() - err = s.createRPCListener() + listener, err := s.createRPCListener() + if err != nil { + listener.Close() + return err + } - // CLose existing streams + // Close existing streams wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) s.raftLayer = NewRaftLayer(s.rpcAdvertise, wrapper) - s.startRPCListener() + // Reload raft connections + s.raftTransport.Reload(s.raftLayer) - time.Sleep(3 * time.Second) - if err != nil { - return err - } + s.startRPCListener() s.logger.Printf("[DEBUG] nomad: finished reloading server connections") return nil @@ -615,7 +616,7 @@ func (s *Server) Reload(newConfig *Config) error { if !newConfig.TLSConfig.Equals(s.config.TLSConfig) { if err := s.reloadTLSConnections(newConfig.TLSConfig); err != nil { - s.logger.Printf("[DEBUG] nomad: reloading server TLS configuration") + s.logger.Printf("[ERR] nomad: error reloading server TLS configuration: %s", err) multierror.Append(&mErr, err) } } @@ -879,12 +880,11 @@ func (s *Server) setupRPC(tlsWrap tlsutil.RegionWrapper) error { s.rpcServer.Register(s.endpoints.Search) s.endpoints.Enterprise.Register(s) - list, err := net.ListenTCP("tcp", s.config.RPCAddr) + listener, err := s.createRPCListener() if err != nil { + listener.Close() return err } - s.rpcListener = list - s.listenerCh = make(chan struct{}) if s.config.RPCAdvertise != nil { s.rpcAdvertise = s.config.RPCAdvertise @@ -895,11 +895,11 @@ func (s *Server) setupRPC(tlsWrap tlsutil.RegionWrapper) error { // Verify that we have a usable advertise address addr, ok := s.rpcAdvertise.(*net.TCPAddr) if !ok { - list.Close() + listener.Close() return fmt.Errorf("RPC advertise address is not a TCP Address: %v", addr) } if addr.IP.IsUnspecified() { - list.Close() + listener.Close() return fmt.Errorf("RPC advertise address is not advertisable: %v", addr) } diff --git a/vendor/github.com/hashicorp/raft/net_transport.go b/vendor/github.com/hashicorp/raft/net_transport.go index cbfabfc31606..a2ab9def108e 100644 --- a/vendor/github.com/hashicorp/raft/net_transport.go +++ b/vendor/github.com/hashicorp/raft/net_transport.go @@ -164,7 +164,6 @@ func NewNetworkTransportWithLogger( // Pause closes the current stream for a NetworkTransport instance func (n *NetworkTransport) Pause() { n.streamCancel() - n.stream.Close() } // Pause creates a new stream for a NetworkTransport instance diff --git a/website/source/guides/securing-nomad.html.md b/website/source/guides/securing-nomad.html.md index 4f8e193329c5..e3a379405fc5 100644 --- a/website/source/guides/securing-nomad.html.md +++ b/website/source/guides/securing-nomad.html.md @@ -474,13 +474,14 @@ need to completely shutdown and restart the Nomad agent. ## Migrating a cluster to TLS -Nomad supports dynamically reloading a server to move from plaintext to TLS. -This can be done by editing the server's configuration and reloading the server -or client via SIGHUP. Note that this will only reload changes within the tls -configuration block, other configuration changes still require a full restart. - -This process will read in new TLS configuration for the agent, configure the -agent for the new changes, and reload all network connections. This process +Nomad supports dynamically reloading it's TLS configuration. To reload Nomad's +configuration, first update the configuration file and then send the Nomad +agent a SIGHUP signal. Note that this will only reload a subset of the +configuration file, including the TLS configuration. + +When reloading the configuration, if there is a change to the TLS +configuration, the agent will reload all network connections and when +establishing new connections, will use the new configuration. This process works for both upgrading and downgrading TLS (but we recommend upgrading). From 0805c411963afb6b58349dc26d063801be53abe8 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 16 Jan 2018 14:16:35 -0500 Subject: [PATCH 19/24] fixing up raft reload tests close second goroutine in raft-net --- command/agent/command.go | 4 +- command/agent/http_test.go | 71 ------------------- nomad/server.go | 15 ++-- nomad/server_test.go | 23 +++--- .../hashicorp/raft/net_transport.go | 21 ++++-- 5 files changed, 37 insertions(+), 97 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index cac8616c4a0d..98c6fa3a5beb 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -598,7 +598,7 @@ WAIT: } } -func (c *Command) reloadHTTPServer(newConfig *Config) error { +func (c *Command) reloadHTTPServer() error { c.agent.logger.Println("[INFO] agent: Reloading HTTP server with new TLS configuration") c.httpServer.Shutdown() @@ -676,7 +676,7 @@ func (c *Command) handleReload() { // server to a TLS connection could succeed, while reloading the server's rpc // connections could fail. if shouldReloadHTTPServer { - err := c.reloadHTTPServer(newConf) + err := c.reloadHTTPServer() if err != nil { c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err) return diff --git a/command/agent/http_test.go b/command/agent/http_test.go index b145d43571f9..5d4004c18e13 100644 --- a/command/agent/http_test.go +++ b/command/agent/http_test.go @@ -673,74 +673,3 @@ func encodeReq(obj interface{}) io.ReadCloser { enc.Encode(obj) return ioutil.NopCloser(buf) } - -func TestHTTP_VerifyHTTPSClientUpgrade_AfterConfigReload(t *testing.T) { - t.Parallel() - assert := assert.New(t) - - const ( - cafile = "../../helper/tlsutil/testdata/ca.pem" - foocert = "../../helper/tlsutil/testdata/nomad-foo.pem" - fookey = "../../helper/tlsutil/testdata/nomad-foo-key.pem" - ) - - newConfig := &Config{ - TLSConfig: &config.TLSConfig{ - EnableHTTP: true, - VerifyHTTPSClient: true, - CAFile: cafile, - CertFile: foocert, - KeyFile: fookey, - }, - } - - s := makeHTTPServer(t, func(c *Config) { - c.TLSConfig = newConfig.TLSConfig - }) - defer s.Shutdown() - - // HTTP plaintext request should succeed - reqURL := fmt.Sprintf("http://%s/v1/agent/self", s.Agent.config.AdvertiseAddrs.HTTP) - - // First test with a plaintext request - transport := &http.Transport{} - client := &http.Client{Transport: transport} - _, err := http.NewRequest("GET", reqURL, nil) - assert.Nil(err) - - // Next, reload the TLS configuration - err = s.Agent.Reload(newConfig) - assert.Nil(err) - - // PASS: Requests that specify a valid hostname, CA cert, and client - // certificate succeed. - tlsConf := &tls.Config{ - ServerName: "client.regionFoo.nomad", - RootCAs: x509.NewCertPool(), - GetClientCertificate: func(*tls.CertificateRequestInfo) (*tls.Certificate, error) { - c, err := tls.LoadX509KeyPair(foocert, fookey) - if err != nil { - return nil, err - } - return &c, nil - }, - } - - // HTTPS request should succeed - httpsReqURL := fmt.Sprintf("https://%s/v1/agent/self", s.Agent.config.AdvertiseAddrs.HTTP) - - cacertBytes, err := ioutil.ReadFile(cafile) - assert.Nil(err) - tlsConf.RootCAs.AppendCertsFromPEM(cacertBytes) - - transport = &http.Transport{TLSClientConfig: tlsConf} - client = &http.Client{Transport: transport} - req, err := http.NewRequest("GET", httpsReqURL, nil) - assert.Nil(err) - - resp, err := client.Do(req) - assert.Nil(err) - - resp.Body.Close() - assert.Equal(resp.StatusCode, 200) -} diff --git a/nomad/server.go b/nomad/server.go index a2523caf1698..0b3c1fb12748 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -400,8 +400,6 @@ func getTLSConf(enableRPC bool, tlsConf *tlsutil.Config) (*tls.Config, tlsutil.R func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes") - // the server config must be in sync with the latest config changes, due to - // testing for TLS configuration settings in rpc.go tlsConf := tlsutil.NewTLSConfiguration(newTLSConfig) incomingTLS, tlsWrap, err := getTLSConf(newTLSConfig.EnableRPC, tlsConf) if err != nil { @@ -430,10 +428,7 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { // reinitialize our rpc listener s.rpcListener.Close() <-s.listenerCh - - // Close existing Raft connections - s.raftTransport.Pause() - s.raftLayer.Close() + s.startRPCListener() listener, err := s.createRPCListener() if err != nil { @@ -441,14 +436,14 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { return err } - // Close existing streams + // Close and reload existing Raft connections + s.raftTransport.Pause() + s.raftLayer.Close() wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) s.raftLayer = NewRaftLayer(s.rpcAdvertise, wrapper) - - // Reload raft connections s.raftTransport.Reload(s.raftLayer) - s.startRPCListener() + time.Sleep(3 * time.Second) s.logger.Printf("[DEBUG] nomad: finished reloading server connections") return nil diff --git a/nomad/server_test.go b/nomad/server_test.go index e7d063b97a40..5de6b8c6b7db 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -293,6 +293,7 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { ) dir := tmpDir(t) defer os.RemoveAll(dir) + s1 := testServer(t, func(c *Config) { c.DataDir = path.Join(dir, "nodeA") }) @@ -312,10 +313,8 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { err := s1.reloadTLSConnections(newTLSConfig) assert.Nil(err) - assert.True(s1.config.TLSConfig.Equals(newTLSConfig)) - time.Sleep(10 * time.Second) codec := rpcClient(t, s1) node := mock.Node() @@ -327,6 +326,7 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { var resp structs.GenericResponse err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) assert.NotNil(err) + assert.Contains("rpc error: EOF", err.Error()) } // Tests that the server will successfully reload its network connections, @@ -343,6 +343,7 @@ func TestServer_Reload_TLSConnections_TLSToPlaintext_RPC(t *testing.T) { dir := tmpDir(t) defer os.RemoveAll(dir) + s1 := testServer(t, func(c *Config) { c.DataDir = path.Join(dir, "nodeB") c.TLSConfig = &config.TLSConfig{ @@ -362,8 +363,6 @@ func TestServer_Reload_TLSConnections_TLSToPlaintext_RPC(t *testing.T) { assert.Nil(err) assert.True(s1.config.TLSConfig.Equals(newTLSConfig)) - time.Sleep(10 * time.Second) - codec := rpcClient(t, s1) node := mock.Node() @@ -391,6 +390,7 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { ) dir := tmpDir(t) defer os.RemoveAll(dir) + s1 := testServer(t, func(c *Config) { c.BootstrapExpect = 2 c.DevMode = false @@ -420,7 +420,6 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { t.Fatalf("should have 2 peers") }) - // the server should be connected to the rest of the cluster testutil.WaitForLeader(t, s2.RPC) { @@ -439,6 +438,7 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { var resp structs.JobRegisterResponse err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) assert.Nil(err) + assert.NotEqual(0, resp.Index) // Check for the job in the FSM of each server in the cluster { @@ -454,7 +454,7 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { ws := memdb.NewWatchSet() out, err := state.JobByID(ws, job.Namespace, job.ID) assert.Nil(err) - assert.NotNil(out) // TODO Occasionally is flaky + assert.NotNil(out) assert.Equal(out.CreateIndex, resp.JobModifyIndex) } } @@ -478,17 +478,19 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { req := &structs.JobRegisterRequest{ Job: job, WriteRequest: structs.WriteRequest{ - Region: "global", + Region: "regionFoo", Namespace: job.Namespace, }, } + // TODO(CK) This occasionally is flaky var resp structs.JobRegisterResponse err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) assert.NotNil(err) + assert.Contains("rpc error: EOF", err.Error()) // Check that the job was not persisted - state := s2.fsm.State() + state := s1.fsm.State() ws := memdb.NewWatchSet() out, _ := state.JobByID(ws, job.Namespace, job.ID) assert.Nil(out) @@ -507,12 +509,12 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { err = s2.reloadTLSConnections(secondNewTLSConfig) assert.Nil(err) - // the server should be connected to the rest of the cluster testutil.WaitForLeader(t, s2.RPC) { // assert that a job register request will succeed codec := rpcClient(t, s2) + job := mock.Job() req := &structs.JobRegisterRequest{ Job: job, @@ -526,6 +528,7 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { var resp structs.JobRegisterResponse err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) assert.Nil(err) + assert.NotEqual(0, resp.Index) // Check for the job in the FSM of each server in the cluster { @@ -533,7 +536,7 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { ws := memdb.NewWatchSet() out, err := state.JobByID(ws, job.Namespace, job.ID) assert.Nil(err) - assert.NotNil(out) + assert.NotNil(out) // TODO(CK) This occasionally is flaky assert.Equal(out.CreateIndex, resp.JobModifyIndex) } { diff --git a/vendor/github.com/hashicorp/raft/net_transport.go b/vendor/github.com/hashicorp/raft/net_transport.go index a2ab9def108e..b45d03e24022 100644 --- a/vendor/github.com/hashicorp/raft/net_transport.go +++ b/vendor/github.com/hashicorp/raft/net_transport.go @@ -161,13 +161,20 @@ func NewNetworkTransportWithLogger( return trans } -// Pause closes the current stream for a NetworkTransport instance +// Pause closes the current stream and existing connections for a +// NetworkTransport instance func (n *NetworkTransport) Pause() { + for _, e := range n.connPool { + for _, conn := range e { + conn.Release() + } + } n.streamCancel() } // Pause creates a new stream for a NetworkTransport instance func (n *NetworkTransport) Reload(s StreamLayer) { + n.stream = s ctx, cancel := context.WithCancel(context.Background()) n.streamCancel = cancel go n.listen(ctx) @@ -389,19 +396,18 @@ func (n *NetworkTransport) listen(ctx context.Context) { if n.IsShutdown() { return } - // TODO Getting an error here n.logger.Printf("[ERR] raft-net: Failed to accept connection: %v", err) continue } n.logger.Printf("[DEBUG] raft-net: %v accepted connection from: %v", n.LocalAddr(), conn.RemoteAddr()) // Handle the connection in dedicated routine - go n.handleConn(conn) + go n.handleConn(ctx, conn) } } // handleConn is used to handle an inbound connection for its lifespan. -func (n *NetworkTransport) handleConn(conn net.Conn) { +func (n *NetworkTransport) handleConn(ctx context.Context, conn net.Conn) { defer conn.Close() r := bufio.NewReader(conn) w := bufio.NewWriter(conn) @@ -409,6 +415,13 @@ func (n *NetworkTransport) handleConn(conn net.Conn) { enc := codec.NewEncoder(w, &codec.MsgpackHandle{}) for { + select { + case <-ctx.Done(): + n.logger.Println("[INFO] raft-net: stream layer is closed for handleConn") + return + default: + } + if err := n.handleCommand(r, dec, enc); err != nil { if err != io.EOF { n.logger.Printf("[ERR] raft-net: Failed to decode incoming command: %v", err) From 028ba9f423cd924a7d84d5ba0790e2fd3e3243a9 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Wed, 17 Jan 2018 12:02:40 -0500 Subject: [PATCH 20/24] allow for similar error messages for closed connections --- nomad/server.go | 1 - nomad/server_test.go | 9 +++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/nomad/server.go b/nomad/server.go index 0b3c1fb12748..f540d53b7b94 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -100,7 +100,6 @@ type Server struct { leaderCh <-chan bool raft *raft.Raft raftLayer *RaftLayer - raftStore *raftboltdb.BoltStore raftInmem *raft.InmemStore diff --git a/nomad/server_test.go b/nomad/server_test.go index 5de6b8c6b7db..34b8ff11382d 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -8,6 +8,7 @@ import ( "net" "os" "path" + "strings" "sync/atomic" "testing" "time" @@ -280,6 +281,10 @@ func TestServer_Reload_Vault(t *testing.T) { } } +func connectionReset(msg string) bool { + return strings.Contains(msg, "EOF") || strings.Contains(msg, "connection reset by peer") +} + // Tests that the server will successfully reload its network connections, // upgrading from plaintext to TLS if the server's TLS configuration changes. func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { @@ -326,7 +331,7 @@ func TestServer_Reload_TLSConnections_PlaintextToTLS(t *testing.T) { var resp structs.GenericResponse err = msgpackrpc.CallWithCodec(codec, "Node.Register", req, &resp) assert.NotNil(err) - assert.Contains("rpc error: EOF", err.Error()) + assert.True(connectionReset(err.Error())) } // Tests that the server will successfully reload its network connections, @@ -487,7 +492,7 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { var resp structs.JobRegisterResponse err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) assert.NotNil(err) - assert.Contains("rpc error: EOF", err.Error()) + assert.True(connectionReset(err.Error())) // Check that the job was not persisted state := s1.fsm.State() From d443098f0c38536a35d1f577ca5d2840ee136422 Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Fri, 19 Jan 2018 17:00:01 -0500 Subject: [PATCH 21/24] vendor raft to master branch --- vendor/github.com/hashicorp/raft/Makefile | 7 +- vendor/github.com/hashicorp/raft/README.md | 20 +- vendor/github.com/hashicorp/raft/api.go | 1 + .../hashicorp/raft/configuration.go | 2 +- .../hashicorp/raft/file_snapshot.go | 22 +- .../hashicorp/raft/inmem_transport.go | 10 +- .../hashicorp/raft/net_transport.go | 200 ++++++++++++------ vendor/github.com/hashicorp/raft/observer.go | 5 + vendor/github.com/hashicorp/raft/raft.go | 8 +- .../github.com/hashicorp/raft/replication.go | 16 +- vendor/github.com/hashicorp/raft/tag.sh | 16 ++ .../hashicorp/raft/tcp_transport.go | 19 +- vendor/github.com/hashicorp/raft/transport.go | 10 +- vendor/vendor.json | 2 +- 14 files changed, 236 insertions(+), 102 deletions(-) create mode 100755 vendor/github.com/hashicorp/raft/tag.sh diff --git a/vendor/github.com/hashicorp/raft/Makefile b/vendor/github.com/hashicorp/raft/Makefile index 0910e2ceb211..75d947f13dfd 100644 --- a/vendor/github.com/hashicorp/raft/Makefile +++ b/vendor/github.com/hashicorp/raft/Makefile @@ -1,11 +1,14 @@ DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...) test: - go test -timeout=60s ./... + go test -timeout=60s . integ: test - INTEG_TESTS=yes go test -timeout=25s -run=Integ ./... + INTEG_TESTS=yes go test -timeout=25s -run=Integ . +fuzz: + go test -timeout=300s ./fuzzy + deps: go get -d -v ./... echo $(DEPS) | xargs -n1 go get -d diff --git a/vendor/github.com/hashicorp/raft/README.md b/vendor/github.com/hashicorp/raft/README.md index 8778b13dc5c3..a70ec8a08b5e 100644 --- a/vendor/github.com/hashicorp/raft/README.md +++ b/vendor/github.com/hashicorp/raft/README.md @@ -3,7 +3,7 @@ raft [![Build Status](https://travis-ci.org/hashicorp/raft.png)](https://travis- raft is a [Go](http://www.golang.org) library that manages a replicated log and can be used with an FSM to manage replicated state machines. It -is library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)). +is a library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)). The use cases for such a library are far-reaching as replicated state machines are a key component of many distributed systems. They enable @@ -32,6 +32,24 @@ A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also availab [raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore` and `StableStore`. +## Tagged Releases + +As of September 2017, Hashicorp will start using tags for this library to clearly indicate +major version updates. We recommend you vendor your application's dependency on this library. + +* v0.1.0 is the original stable version of the library that was in master and has been maintained +with no breaking API changes. This was in use by Consul prior to version 0.7.0. + +* v1.0.0 takes the changes that were staged in the library-v2-stage-one branch. This version +manages server identities using a UUID, so introduces some breaking API changes. It also versions +the Raft protocol, and requires some special steps when interoperating with Raft servers running +older versions of the library (see the detailed comment in config.go about version compatibility). +You can reference https://github.com/hashicorp/consul/pull/2222 for an idea of what was required +to port Consul to these new interfaces. + + This version includes some new features as well, including non voting servers, a new address + provider abstraction in the transport layer, and more resilient snapshots. + ## Protocol raft is based on ["Raft: In Search of an Understandable Consensus Algorithm"](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf) diff --git a/vendor/github.com/hashicorp/raft/api.go b/vendor/github.com/hashicorp/raft/api.go index 147cde295c18..73f057c98589 100644 --- a/vendor/github.com/hashicorp/raft/api.go +++ b/vendor/github.com/hashicorp/raft/api.go @@ -492,6 +492,7 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna } r.processConfigurationLogEntry(&entry) } + r.logger.Printf("[INFO] raft: Initial configuration (index=%d): %+v", r.configurations.latestIndex, r.configurations.latest.Servers) diff --git a/vendor/github.com/hashicorp/raft/configuration.go b/vendor/github.com/hashicorp/raft/configuration.go index 74508c5e530f..8afc38bd93ed 100644 --- a/vendor/github.com/hashicorp/raft/configuration.go +++ b/vendor/github.com/hashicorp/raft/configuration.go @@ -283,7 +283,7 @@ func encodePeers(configuration Configuration, trans Transport) []byte { var encPeers [][]byte for _, server := range configuration.Servers { if server.Suffrage == Voter { - encPeers = append(encPeers, trans.EncodePeer(server.Address)) + encPeers = append(encPeers, trans.EncodePeer(server.ID, server.Address)) } } diff --git a/vendor/github.com/hashicorp/raft/file_snapshot.go b/vendor/github.com/hashicorp/raft/file_snapshot.go index 119bfd308db3..ffc9414542f0 100644 --- a/vendor/github.com/hashicorp/raft/file_snapshot.go +++ b/vendor/github.com/hashicorp/raft/file_snapshot.go @@ -12,6 +12,7 @@ import ( "log" "os" "path/filepath" + "runtime" "sort" "strings" "time" @@ -406,17 +407,18 @@ func (s *FileSnapshotSink) Close() error { return err } - // fsync the parent directory, to sync directory edits to disk - parentFH, err := os.Open(s.parentDir) - defer parentFH.Close() - if err != nil { - s.logger.Printf("[ERR] snapshot: Failed to open snapshot parent directory %v, error: %v", s.parentDir, err) - return err - } + if runtime.GOOS != "windows" { //skipping fsync for directory entry edits on Windows, only needed for *nix style file systems + parentFH, err := os.Open(s.parentDir) + defer parentFH.Close() + if err != nil { + s.logger.Printf("[ERR] snapshot: Failed to open snapshot parent directory %v, error: %v", s.parentDir, err) + return err + } - if err = parentFH.Sync(); err != nil { - s.logger.Printf("[ERR] snapshot: Failed syncing parent directory %v, error: %v", s.parentDir, err) - return err + if err = parentFH.Sync(); err != nil { + s.logger.Printf("[ERR] snapshot: Failed syncing parent directory %v, error: %v", s.parentDir, err) + return err + } } // Reap any old snapshots diff --git a/vendor/github.com/hashicorp/raft/inmem_transport.go b/vendor/github.com/hashicorp/raft/inmem_transport.go index 3693cd5ad1e6..ce37f63aa84c 100644 --- a/vendor/github.com/hashicorp/raft/inmem_transport.go +++ b/vendor/github.com/hashicorp/raft/inmem_transport.go @@ -75,7 +75,7 @@ func (i *InmemTransport) LocalAddr() ServerAddress { // AppendEntriesPipeline returns an interface that can be used to pipeline // AppendEntries requests. -func (i *InmemTransport) AppendEntriesPipeline(target ServerAddress) (AppendPipeline, error) { +func (i *InmemTransport) AppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error) { i.RLock() peer, ok := i.peers[target] i.RUnlock() @@ -90,7 +90,7 @@ func (i *InmemTransport) AppendEntriesPipeline(target ServerAddress) (AppendPipe } // AppendEntries implements the Transport interface. -func (i *InmemTransport) AppendEntries(target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error { +func (i *InmemTransport) AppendEntries(id ServerID, target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error { rpcResp, err := i.makeRPC(target, args, nil, i.timeout) if err != nil { return err @@ -103,7 +103,7 @@ func (i *InmemTransport) AppendEntries(target ServerAddress, args *AppendEntries } // RequestVote implements the Transport interface. -func (i *InmemTransport) RequestVote(target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error { +func (i *InmemTransport) RequestVote(id ServerID, target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error { rpcResp, err := i.makeRPC(target, args, nil, i.timeout) if err != nil { return err @@ -116,7 +116,7 @@ func (i *InmemTransport) RequestVote(target ServerAddress, args *RequestVoteRequ } // InstallSnapshot implements the Transport interface. -func (i *InmemTransport) InstallSnapshot(target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error { +func (i *InmemTransport) InstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error { rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout) if err != nil { return err @@ -159,7 +159,7 @@ func (i *InmemTransport) makeRPC(target ServerAddress, args interface{}, r io.Re } // EncodePeer implements the Transport interface. -func (i *InmemTransport) EncodePeer(p ServerAddress) []byte { +func (i *InmemTransport) EncodePeer(id ServerID, p ServerAddress) []byte { return []byte(p) } diff --git a/vendor/github.com/hashicorp/raft/net_transport.go b/vendor/github.com/hashicorp/raft/net_transport.go index b45d03e24022..454fc2a577b6 100644 --- a/vendor/github.com/hashicorp/raft/net_transport.go +++ b/vendor/github.com/hashicorp/raft/net_transport.go @@ -69,17 +69,45 @@ type NetworkTransport struct { maxPool int + serverAddressProvider ServerAddressProvider + shutdown bool shutdownCh chan struct{} shutdownLock sync.Mutex - stream StreamLayer - streamCancel context.CancelFunc + stream StreamLayer + + // streamCtx is used to cancel existing connection handlers. + streamCtx context.Context + streamCancel context.CancelFunc + streamCtxLock sync.RWMutex timeout time.Duration TimeoutScale int } +// NetworkTransportConfig encapsulates configuration for the network transport layer. +type NetworkTransportConfig struct { + // ServerAddressProvider is used to override the target address when establishing a connection to invoke an RPC + ServerAddressProvider ServerAddressProvider + + Logger *log.Logger + + // Dialer + Stream StreamLayer + + // MaxPool controls how many connections we will pool + MaxPool int + + // Timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply + // the timeout by (SnapshotSize / TimeoutScale). + Timeout time.Duration +} + +type ServerAddressProvider interface { + ServerAddr(id ServerID) (ServerAddress, error) +} + // StreamLayer is used with the NetworkTransport to provide // the low level stream abstraction. type StreamLayer interface { @@ -114,6 +142,32 @@ type netPipeline struct { shutdownLock sync.Mutex } +// NewNetworkTransportWithConfig creates a new network transport with the given config struct +func NewNetworkTransportWithConfig( + config *NetworkTransportConfig, +) *NetworkTransport { + if config.Logger == nil { + config.Logger = log.New(os.Stderr, "", log.LstdFlags) + } + trans := &NetworkTransport{ + connPool: make(map[ServerAddress][]*netConn), + consumeCh: make(chan RPC), + logger: config.Logger, + maxPool: config.MaxPool, + shutdownCh: make(chan struct{}), + stream: config.Stream, + timeout: config.Timeout, + TimeoutScale: DefaultTimeoutScale, + serverAddressProvider: config.ServerAddressProvider, + } + + // Create the connection context and then start our listener. + trans.setupStreamContext() + go trans.listen() + + return trans +} + // NewNetworkTransport creates a new network transport with the given dialer // and listener. The maxPool controls how many connections we will pool. The // timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply @@ -127,10 +181,12 @@ func NewNetworkTransport( if logOutput == nil { logOutput = os.Stderr } - return NewNetworkTransportWithLogger(stream, maxPool, timeout, log.New(logOutput, "", log.LstdFlags)) + logger := log.New(logOutput, "", log.LstdFlags) + config := &NetworkTransportConfig{Stream: stream, MaxPool: maxPool, Timeout: timeout, Logger: logger} + return NewNetworkTransportWithConfig(config) } -// NewNetworkTransportWithLogger creates a new network transport with the given dialer +// NewNetworkTransportWithLogger creates a new network transport with the given logger, dialer // and listener. The maxPool controls how many connections we will pool. The // timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply // the timeout by (SnapshotSize / TimeoutScale). @@ -140,44 +196,23 @@ func NewNetworkTransportWithLogger( timeout time.Duration, logger *log.Logger, ) *NetworkTransport { - if logger == nil { - logger = log.New(os.Stderr, "", log.LstdFlags) - } - - trans := &NetworkTransport{ - connPool: make(map[ServerAddress][]*netConn), - consumeCh: make(chan RPC), - logger: logger, - maxPool: maxPool, - shutdownCh: make(chan struct{}), - stream: stream, - timeout: timeout, - TimeoutScale: DefaultTimeoutScale, - } - - ctx, cancel := context.WithCancel(context.Background()) - trans.streamCancel = cancel - go trans.listen(ctx) - return trans -} - -// Pause closes the current stream and existing connections for a -// NetworkTransport instance -func (n *NetworkTransport) Pause() { - for _, e := range n.connPool { - for _, conn := range e { - conn.Release() - } - } - n.streamCancel() + config := &NetworkTransportConfig{Stream: stream, MaxPool: maxPool, Timeout: timeout, Logger: logger} + return NewNetworkTransportWithConfig(config) } -// Pause creates a new stream for a NetworkTransport instance -func (n *NetworkTransport) Reload(s StreamLayer) { - n.stream = s +// setupStreamContext is used to create a new stream context. This should be +// called with the stream lock held. +func (n *NetworkTransport) setupStreamContext() { ctx, cancel := context.WithCancel(context.Background()) + n.streamCtx = ctx n.streamCancel = cancel - go n.listen(ctx) +} + +// getStreamContext is used retrieve the current stream context. +func (n *NetworkTransport) getStreamContext() context.Context { + n.streamCtxLock.RLock() + defer n.streamCtxLock.RUnlock() + return n.streamCtx } // SetHeartbeatHandler is used to setup a heartbeat handler @@ -189,6 +224,31 @@ func (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) { n.heartbeatFn = cb } +// CloseStreams closes the current streams. +func (n *NetworkTransport) CloseStreams() { + n.connPoolLock.Lock() + defer n.connPoolLock.Unlock() + + // Close all the connections in the connection pool and then remove their + // entry. + for k, e := range n.connPool { + for _, conn := range e { + conn.Release() + } + + delete(n.connPool, k) + } + + // Cancel the existing connections and create a new context. Both these + // operations must always be done with the lock held otherwise we can create + // connection handlers that are holding a context that will never be + // cancelable. + n.streamCtxLock.Lock() + n.streamCancel() + n.setupStreamContext() + n.streamCtxLock.Unlock() +} + // Close is used to stop the network transport. func (n *NetworkTransport) Close() error { n.shutdownLock.Lock() @@ -239,6 +299,24 @@ func (n *NetworkTransport) getPooledConn(target ServerAddress) *netConn { return conn } +// getConnFromAddressProvider returns a connection from the server address provider if available, or defaults to a connection using the target server address +func (n *NetworkTransport) getConnFromAddressProvider(id ServerID, target ServerAddress) (*netConn, error) { + address := n.getProviderAddressOrFallback(id, target) + return n.getConn(address) +} + +func (n *NetworkTransport) getProviderAddressOrFallback(id ServerID, target ServerAddress) ServerAddress { + if n.serverAddressProvider != nil { + serverAddressOverride, err := n.serverAddressProvider.ServerAddr(id) + if err != nil { + n.logger.Printf("[WARN] Unable to get address for server id %v, using fallback address %v: %v", id, target, err) + } else { + return serverAddressOverride + } + } + return target +} + // getConn is used to get a connection from the pool. func (n *NetworkTransport) getConn(target ServerAddress) (*netConn, error) { // Check for a pooled conn @@ -285,9 +363,9 @@ func (n *NetworkTransport) returnConn(conn *netConn) { // AppendEntriesPipeline returns an interface that can be used to pipeline // AppendEntries requests. -func (n *NetworkTransport) AppendEntriesPipeline(target ServerAddress) (AppendPipeline, error) { +func (n *NetworkTransport) AppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error) { // Get a connection - conn, err := n.getConn(target) + conn, err := n.getConnFromAddressProvider(id, target) if err != nil { return nil, err } @@ -297,19 +375,19 @@ func (n *NetworkTransport) AppendEntriesPipeline(target ServerAddress) (AppendPi } // AppendEntries implements the Transport interface. -func (n *NetworkTransport) AppendEntries(target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error { - return n.genericRPC(target, rpcAppendEntries, args, resp) +func (n *NetworkTransport) AppendEntries(id ServerID, target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error { + return n.genericRPC(id, target, rpcAppendEntries, args, resp) } // RequestVote implements the Transport interface. -func (n *NetworkTransport) RequestVote(target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error { - return n.genericRPC(target, rpcRequestVote, args, resp) +func (n *NetworkTransport) RequestVote(id ServerID, target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error { + return n.genericRPC(id, target, rpcRequestVote, args, resp) } // genericRPC handles a simple request/response RPC. -func (n *NetworkTransport) genericRPC(target ServerAddress, rpcType uint8, args interface{}, resp interface{}) error { +func (n *NetworkTransport) genericRPC(id ServerID, target ServerAddress, rpcType uint8, args interface{}, resp interface{}) error { // Get a conn - conn, err := n.getConn(target) + conn, err := n.getConnFromAddressProvider(id, target) if err != nil { return err } @@ -333,9 +411,9 @@ func (n *NetworkTransport) genericRPC(target ServerAddress, rpcType uint8, args } // InstallSnapshot implements the Transport interface. -func (n *NetworkTransport) InstallSnapshot(target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error { +func (n *NetworkTransport) InstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error { // Get a conn, always close for InstallSnapshot - conn, err := n.getConn(target) + conn, err := n.getConnFromAddressProvider(id, target) if err != nil { return err } @@ -371,8 +449,9 @@ func (n *NetworkTransport) InstallSnapshot(target ServerAddress, args *InstallSn } // EncodePeer implements the Transport interface. -func (n *NetworkTransport) EncodePeer(p ServerAddress) []byte { - return []byte(p) +func (n *NetworkTransport) EncodePeer(id ServerID, p ServerAddress) []byte { + address := n.getProviderAddressOrFallback(id, p) + return []byte(address) } // DecodePeer implements the Transport interface. @@ -381,15 +460,8 @@ func (n *NetworkTransport) DecodePeer(buf []byte) ServerAddress { } // listen is used to handling incoming connections. -func (n *NetworkTransport) listen(ctx context.Context) { +func (n *NetworkTransport) listen() { for { - select { - case <-ctx.Done(): - n.logger.Println("[INFO] raft-net: stream layer is closed") - return - default: - } - // Accept incoming connections conn, err := n.stream.Accept() if err != nil { @@ -402,12 +474,14 @@ func (n *NetworkTransport) listen(ctx context.Context) { n.logger.Printf("[DEBUG] raft-net: %v accepted connection from: %v", n.LocalAddr(), conn.RemoteAddr()) // Handle the connection in dedicated routine - go n.handleConn(ctx, conn) + go n.handleConn(n.getStreamContext(), conn) } } -// handleConn is used to handle an inbound connection for its lifespan. -func (n *NetworkTransport) handleConn(ctx context.Context, conn net.Conn) { +// handleConn is used to handle an inbound connection for its lifespan. The +// handler will exit when the passed context is cancelled or the connection is +// closed. +func (n *NetworkTransport) handleConn(connCtx context.Context, conn net.Conn) { defer conn.Close() r := bufio.NewReader(conn) w := bufio.NewWriter(conn) @@ -416,8 +490,8 @@ func (n *NetworkTransport) handleConn(ctx context.Context, conn net.Conn) { for { select { - case <-ctx.Done(): - n.logger.Println("[INFO] raft-net: stream layer is closed for handleConn") + case <-connCtx.Done(): + n.logger.Println("[DEBUG] raft-net: stream layer is closed") return default: } diff --git a/vendor/github.com/hashicorp/raft/observer.go b/vendor/github.com/hashicorp/raft/observer.go index 76c4d555dfa5..bce17ef19a4a 100644 --- a/vendor/github.com/hashicorp/raft/observer.go +++ b/vendor/github.com/hashicorp/raft/observer.go @@ -13,6 +13,11 @@ type Observation struct { Data interface{} } +// LeaderObservation is used for the data when leadership changes. +type LeaderObservation struct { + leader ServerAddress +} + // nextObserverId is used to provide a unique ID for each observer to aid in // deregistration. var nextObserverID uint64 diff --git a/vendor/github.com/hashicorp/raft/raft.go b/vendor/github.com/hashicorp/raft/raft.go index 0f89ccaa4d6a..b5cc9ca980c3 100644 --- a/vendor/github.com/hashicorp/raft/raft.go +++ b/vendor/github.com/hashicorp/raft/raft.go @@ -88,8 +88,12 @@ type leaderState struct { // setLeader is used to modify the current leader of the cluster func (r *Raft) setLeader(leader ServerAddress) { r.leaderLock.Lock() + oldLeader := r.leader r.leader = leader r.leaderLock.Unlock() + if oldLeader != leader { + r.observe(LeaderObservation{leader: leader}) + } } // requestConfigChange is a helper for the above functions that make @@ -1379,7 +1383,7 @@ func (r *Raft) electSelf() <-chan *voteResult { req := &RequestVoteRequest{ RPCHeader: r.getRPCHeader(), Term: r.getCurrentTerm(), - Candidate: r.trans.EncodePeer(r.localAddr), + Candidate: r.trans.EncodePeer(r.localID, r.localAddr), LastLogIndex: lastIdx, LastLogTerm: lastTerm, } @@ -1389,7 +1393,7 @@ func (r *Raft) electSelf() <-chan *voteResult { r.goFunc(func() { defer metrics.MeasureSince([]string{"raft", "candidate", "electSelf"}, time.Now()) resp := &voteResult{voterID: peer.ID} - err := r.trans.RequestVote(peer.Address, req, &resp.RequestVoteResponse) + err := r.trans.RequestVote(peer.ID, peer.Address, req, &resp.RequestVoteResponse) if err != nil { r.logger.Printf("[ERR] raft: Failed to make RequestVote RPC to %v: %v", peer, err) resp.Term = req.Term diff --git a/vendor/github.com/hashicorp/raft/replication.go b/vendor/github.com/hashicorp/raft/replication.go index 68392734397e..e631b5a09baf 100644 --- a/vendor/github.com/hashicorp/raft/replication.go +++ b/vendor/github.com/hashicorp/raft/replication.go @@ -157,7 +157,7 @@ PIPELINE: goto RPC } -// replicateTo is a hepler to replicate(), used to replicate the logs up to a +// replicateTo is a helper to replicate(), used to replicate the logs up to a // given last index. // If the follower log is behind, we take care to bring them up to date. func (r *Raft) replicateTo(s *followerReplication, lastIndex uint64) (shouldStop bool) { @@ -183,7 +183,7 @@ START: // Make the RPC call start = time.Now() - if err := r.trans.AppendEntries(s.peer.Address, &req, &resp); err != nil { + if err := r.trans.AppendEntries(s.peer.ID, s.peer.Address, &req, &resp); err != nil { r.logger.Printf("[ERR] raft: Failed to AppendEntries to %v: %v", s.peer, err) s.failures++ return @@ -278,7 +278,7 @@ func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) { RPCHeader: r.getRPCHeader(), SnapshotVersion: meta.Version, Term: s.currentTerm, - Leader: r.trans.EncodePeer(r.localAddr), + Leader: r.trans.EncodePeer(r.localID, r.localAddr), LastLogIndex: meta.Index, LastLogTerm: meta.Term, Peers: meta.Peers, @@ -290,7 +290,7 @@ func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) { // Make the call start := time.Now() var resp InstallSnapshotResponse - if err := r.trans.InstallSnapshot(s.peer.Address, &req, &resp, snapshot); err != nil { + if err := r.trans.InstallSnapshot(s.peer.ID, s.peer.Address, &req, &resp, snapshot); err != nil { r.logger.Printf("[ERR] raft: Failed to install snapshot %v: %v", snapID, err) s.failures++ return false, err @@ -332,7 +332,7 @@ func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) { req := AppendEntriesRequest{ RPCHeader: r.getRPCHeader(), Term: s.currentTerm, - Leader: r.trans.EncodePeer(r.localAddr), + Leader: r.trans.EncodePeer(r.localID, r.localAddr), } var resp AppendEntriesResponse for { @@ -345,7 +345,7 @@ func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) { } start := time.Now() - if err := r.trans.AppendEntries(s.peer.Address, &req, &resp); err != nil { + if err := r.trans.AppendEntries(s.peer.ID, s.peer.Address, &req, &resp); err != nil { r.logger.Printf("[ERR] raft: Failed to heartbeat to %v: %v", s.peer.Address, err) failures++ select { @@ -367,7 +367,7 @@ func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) { // back to the standard replication which can handle more complex situations. func (r *Raft) pipelineReplicate(s *followerReplication) error { // Create a new pipeline - pipeline, err := r.trans.AppendEntriesPipeline(s.peer.Address) + pipeline, err := r.trans.AppendEntriesPipeline(s.peer.ID, s.peer.Address) if err != nil { return err } @@ -476,7 +476,7 @@ func (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, func (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error { req.RPCHeader = r.getRPCHeader() req.Term = s.currentTerm - req.Leader = r.trans.EncodePeer(r.localAddr) + req.Leader = r.trans.EncodePeer(r.localID, r.localAddr) req.LeaderCommitIndex = r.getCommitIndex() if err := r.setPreviousLog(req, nextIndex); err != nil { return err diff --git a/vendor/github.com/hashicorp/raft/tag.sh b/vendor/github.com/hashicorp/raft/tag.sh new file mode 100755 index 000000000000..cd16623a70dc --- /dev/null +++ b/vendor/github.com/hashicorp/raft/tag.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -e + +# The version must be supplied from the environment. Do not include the +# leading "v". +if [ -z $VERSION ]; then + echo "Please specify a version." + exit 1 +fi + +# Generate the tag. +echo "==> Tagging version $VERSION..." +git commit --allow-empty -a --gpg-sign=348FFC4C -m "Release v$VERSION" +git tag -a -m "Version $VERSION" -s -u 348FFC4C "v${VERSION}" master + +exit 0 diff --git a/vendor/github.com/hashicorp/raft/tcp_transport.go b/vendor/github.com/hashicorp/raft/tcp_transport.go index 9281508a0503..29b2740f6241 100644 --- a/vendor/github.com/hashicorp/raft/tcp_transport.go +++ b/vendor/github.com/hashicorp/raft/tcp_transport.go @@ -28,7 +28,7 @@ func NewTCPTransport( timeout time.Duration, logOutput io.Writer, ) (*NetworkTransport, error) { - return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport { + return newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport { return NewNetworkTransport(stream, maxPool, timeout, logOutput) }) } @@ -42,15 +42,26 @@ func NewTCPTransportWithLogger( timeout time.Duration, logger *log.Logger, ) (*NetworkTransport, error) { - return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport { + return newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport { return NewNetworkTransportWithLogger(stream, maxPool, timeout, logger) }) } +// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of +// a TCP streaming transport layer, using a default logger and the address provider +func NewTCPTransportWithConfig( + bindAddr string, + advertise net.Addr, + config *NetworkTransportConfig, +) (*NetworkTransport, error) { + return newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport { + config.Stream = stream + return NewNetworkTransportWithConfig(config) + }) +} + func newTCPTransport(bindAddr string, advertise net.Addr, - maxPool int, - timeout time.Duration, transportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) { // Try to bind list, err := net.Listen("tcp", bindAddr) diff --git a/vendor/github.com/hashicorp/raft/transport.go b/vendor/github.com/hashicorp/raft/transport.go index 633f97a8c5cb..85459b221d15 100644 --- a/vendor/github.com/hashicorp/raft/transport.go +++ b/vendor/github.com/hashicorp/raft/transport.go @@ -35,20 +35,20 @@ type Transport interface { // AppendEntriesPipeline returns an interface that can be used to pipeline // AppendEntries requests. - AppendEntriesPipeline(target ServerAddress) (AppendPipeline, error) + AppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error) // AppendEntries sends the appropriate RPC to the target node. - AppendEntries(target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error + AppendEntries(id ServerID, target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error // RequestVote sends the appropriate RPC to the target node. - RequestVote(target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error + RequestVote(id ServerID, target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error // InstallSnapshot is used to push a snapshot down to a follower. The data is read from // the ReadCloser and streamed to the client. - InstallSnapshot(target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error + InstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error // EncodePeer is used to serialize a peer's address. - EncodePeer(ServerAddress) []byte + EncodePeer(id ServerID, addr ServerAddress) []byte // DecodePeer is used to deserialize a peer's address. DecodePeer([]byte) ServerAddress diff --git a/vendor/vendor.json b/vendor/vendor.json index c1f64dda14a2..2af79a48600f 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -162,7 +162,7 @@ {"path":"github.com/hashicorp/logutils","revision":"0dc08b1671f34c4250ce212759ebd880f743d883"}, {"path":"github.com/hashicorp/memberlist","checksumSHA1":"1zk7IeGClUqBo+Phsx89p7fQ/rQ=","revision":"23ad4b7d7b38496cd64c241dfd4c60b7794c254a","revisionTime":"2017-02-08T21:15:06Z"}, {"path":"github.com/hashicorp/net-rpc-msgpackrpc","revision":"a14192a58a694c123d8fe5481d4a4727d6ae82f3"}, - {"path":"github.com/hashicorp/raft","checksumSHA1":"ecpaHOImbL/NaivWrUDUUe5461E=","revision":"3a6f3bdfe4fc69e300c6d122b1a92051af6f0b95","revisionTime":"2017-08-07T22:22:24Z"}, + {"path":"github.com/hashicorp/raft","checksumSHA1":"zkA9uvbj1BdlveyqXpVTh1N6ers=","revision":"077966dbc90f342107eb723ec52fdb0463ec789b","revisionTime":"2018-01-17T20:29:25Z","version":"=master","versionExact":"master"}, {"path":"github.com/hashicorp/raft-boltdb","checksumSHA1":"QAxukkv54/iIvLfsUP6IK4R0m/A=","revision":"d1e82c1ec3f15ee991f7cc7ffd5b67ff6f5bbaee","revisionTime":"2015-02-01T20:08:39Z"}, {"path":"github.com/hashicorp/serf/coordinate","checksumSHA1":"/oss17GO4hXGM7QnUdI3VzcAHzA=","revision":"bbeddf0b3ab3072a60525afbd6b6f47d33839eee","revisionTime":"2017-07-14T18:26:01Z"}, {"path":"github.com/hashicorp/serf/serf","checksumSHA1":"pvLOzocYsZtxuJ9pREHRTxYnoa4=","revision":"bbeddf0b3ab3072a60525afbd6b6f47d33839eee","revisionTime":"2017-07-14T18:26:01Z"}, From 517030157ecdba0e9b437fd4d53b0a87d2741b9d Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Fri, 19 Jan 2018 05:12:14 -0500 Subject: [PATCH 22/24] swap raft layer tls wrapper --- command/agent/command.go | 2 + nomad/raft_rpc.go | 24 +++++++- nomad/server.go | 24 +++----- nomad/server_test.go | 120 +++++---------------------------------- 4 files changed, 45 insertions(+), 125 deletions(-) diff --git a/command/agent/command.go b/command/agent/command.go index 98c6fa3a5beb..74bbf96149e2 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -598,6 +598,8 @@ WAIT: } } +// reloadHTTPServer shuts down the existing HTTP server and restarts it. This +// is helpful when reloading the agent configuration. func (c *Command) reloadHTTPServer() error { c.agent.logger.Println("[INFO] agent: Reloading HTTP server with new TLS configuration") diff --git a/nomad/raft_rpc.go b/nomad/raft_rpc.go index 4cad9e734a64..e7f73357d57c 100644 --- a/nomad/raft_rpc.go +++ b/nomad/raft_rpc.go @@ -21,7 +21,8 @@ type RaftLayer struct { connCh chan net.Conn // TLS wrapper - tlsWrap tlsutil.Wrapper + tlsWrap tlsutil.Wrapper + tlsWrapLock sync.RWMutex // Tracks if we are closed closed bool @@ -78,6 +79,21 @@ func (l *RaftLayer) Close() error { return nil } +// getTLSWrapper is used to retrieve the current TLS wrapper +func (l *RaftLayer) getTLSWrapper() tlsutil.Wrapper { + l.tlsWrapLock.RLock() + defer l.tlsWrapLock.RUnlock() + return l.tlsWrap +} + +// ReloadTLS swaps the TLS wrapper. This is useful when upgrading or +// downgrading TLS connections. +func (l *RaftLayer) ReloadTLS(tlsWrap tlsutil.Wrapper) { + l.tlsWrapLock.Lock() + defer l.tlsWrapLock.Unlock() + l.tlsWrap = tlsWrap +} + // Addr is used to return the address of the listener func (l *RaftLayer) Addr() net.Addr { return l.addr @@ -90,8 +106,10 @@ func (l *RaftLayer) Dial(address raft.ServerAddress, timeout time.Duration) (net return nil, err } + tlsWrapper := l.getTLSWrapper() + // Check for tls mode - if l.tlsWrap != nil { + if tlsWrapper != nil { // Switch the connection into TLS mode if _, err := conn.Write([]byte{byte(rpcTLS)}); err != nil { conn.Close() @@ -99,7 +117,7 @@ func (l *RaftLayer) Dial(address raft.ServerAddress, timeout time.Duration) (net } // Wrap the connection in a TLS client - conn, err = l.tlsWrap(conn) + conn, err = tlsWrapper(conn) if err != nil { return nil, err } diff --git a/nomad/server.go b/nomad/server.go index f540d53b7b94..41e58ded0bc2 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -84,8 +84,7 @@ const ( // Server is Nomad server which manages the job queues, // schedulers, and notification bus for agents. type Server struct { - config *Config - configLock sync.Mutex + config *Config logger *log.Logger @@ -97,12 +96,11 @@ type Server struct { // The raft instance is used among Nomad nodes within the // region to protect operations that require strong consistency - leaderCh <-chan bool - raft *raft.Raft - raftLayer *RaftLayer - raftStore *raftboltdb.BoltStore - raftInmem *raft.InmemStore - + leaderCh <-chan bool + raft *raft.Raft + raftLayer *RaftLayer + raftStore *raftboltdb.BoltStore + raftInmem *raft.InmemStore raftTransport *raft.NetworkTransport // fsm is the state machine used with Raft @@ -417,9 +415,7 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { // Keeping configuration in sync is important for other places that require // access to config information, such as rpc.go, where we decide on what kind // of network connections to accept depending on the server configuration - s.configLock.Lock() s.config.TLSConfig = newTLSConfig - s.configLock.Unlock() s.rpcTLS = incomingTLS s.connPool.ReloadTLS(tlsWrap) @@ -436,13 +432,9 @@ func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error { } // Close and reload existing Raft connections - s.raftTransport.Pause() - s.raftLayer.Close() wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) - s.raftLayer = NewRaftLayer(s.rpcAdvertise, wrapper) - s.raftTransport.Reload(s.raftLayer) - - time.Sleep(3 * time.Second) + s.raftLayer.ReloadTLS(wrapper) + s.raftTransport.CloseStreams() s.logger.Printf("[DEBUG] nomad: finished reloading server connections") return nil diff --git a/nomad/server_test.go b/nomad/server_test.go index 34b8ff11382d..bb3381293a12 100644 --- a/nomad/server_test.go +++ b/nomad/server_test.go @@ -14,7 +14,6 @@ import ( "time" "github.com/hashicorp/consul/lib/freeport" - memdb "github.com/hashicorp/go-memdb" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper/uuid" @@ -417,52 +416,9 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { defer s2.Shutdown() testJoin(t, s1, s2) + servers := []*Server{s1, s2} - testutil.WaitForResult(func() (bool, error) { - peers, _ := s1.numPeers() - return peers == 2, nil - }, func(err error) { - t.Fatalf("should have 2 peers") - }) - - testutil.WaitForLeader(t, s2.RPC) - - { - // assert that a job register request will succeed - codec := rpcClient(t, s2) - job := mock.Job() - req := &structs.JobRegisterRequest{ - Job: job, - WriteRequest: structs.WriteRequest{ - Region: "regionFoo", - Namespace: job.Namespace, - }, - } - - // Fetch the response - var resp structs.JobRegisterResponse - err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) - assert.Nil(err) - assert.NotEqual(0, resp.Index) - - // Check for the job in the FSM of each server in the cluster - { - state := s2.fsm.State() - ws := memdb.NewWatchSet() - out, err := state.JobByID(ws, job.Namespace, job.ID) - assert.Nil(err) - assert.NotNil(out) - assert.Equal(out.CreateIndex, resp.JobModifyIndex) - } - { - state := s1.fsm.State() - ws := memdb.NewWatchSet() - out, err := state.JobByID(ws, job.Namespace, job.ID) - assert.Nil(err) - assert.NotNil(out) - assert.Equal(out.CreateIndex, resp.JobModifyIndex) - } - } + testutil.WaitForLeader(t, s1.RPC) newTLSConfig := &config.TLSConfig{ EnableHTTP: true, @@ -476,29 +432,19 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { assert.Nil(err) { - // assert that a job register request will fail between servers that - // should not be able to communicate over Raft - codec := rpcClient(t, s2) - job := mock.Job() - req := &structs.JobRegisterRequest{ - Job: job, - WriteRequest: structs.WriteRequest{ - Region: "regionFoo", - Namespace: job.Namespace, - }, + for _, serv := range servers { + testutil.WaitForResult(func() (bool, error) { + args := &structs.GenericRequest{} + var leader string + err := serv.RPC("Status.Leader", args, &leader) + if leader != "" && err != nil { + return false, fmt.Errorf("Should not have found leader but got %s", leader) + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) } - - // TODO(CK) This occasionally is flaky - var resp structs.JobRegisterResponse - err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) - assert.NotNil(err) - assert.True(connectionReset(err.Error())) - - // Check that the job was not persisted - state := s1.fsm.State() - ws := memdb.NewWatchSet() - out, _ := state.JobByID(ws, job.Namespace, job.ID) - assert.Nil(out) } secondNewTLSConfig := &config.TLSConfig{ @@ -515,42 +461,4 @@ func TestServer_Reload_TLSConnections_Raft(t *testing.T) { assert.Nil(err) testutil.WaitForLeader(t, s2.RPC) - - { - // assert that a job register request will succeed - codec := rpcClient(t, s2) - - job := mock.Job() - req := &structs.JobRegisterRequest{ - Job: job, - WriteRequest: structs.WriteRequest{ - Region: "regionFoo", - Namespace: job.Namespace, - }, - } - - // Fetch the response - var resp structs.JobRegisterResponse - err := msgpackrpc.CallWithCodec(codec, "Job.Register", req, &resp) - assert.Nil(err) - assert.NotEqual(0, resp.Index) - - // Check for the job in the FSM of each server in the cluster - { - state := s2.fsm.State() - ws := memdb.NewWatchSet() - out, err := state.JobByID(ws, job.Namespace, job.ID) - assert.Nil(err) - assert.NotNil(out) // TODO(CK) This occasionally is flaky - assert.Equal(out.CreateIndex, resp.JobModifyIndex) - } - { - state := s1.fsm.State() - ws := memdb.NewWatchSet() - out, err := state.JobByID(ws, job.Namespace, job.ID) - assert.Nil(err) - assert.NotNil(out) - assert.Equal(out.CreateIndex, resp.JobModifyIndex) - } - } } From cab0830eeaa9bcdc77da400632ca786bf58dd0ef Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Mon, 22 Jan 2018 09:02:26 -0500 Subject: [PATCH 23/24] specified version of raft should be master --- vendor/vendor.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/vendor.json b/vendor/vendor.json index 2af79a48600f..672ee1484b04 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -162,7 +162,7 @@ {"path":"github.com/hashicorp/logutils","revision":"0dc08b1671f34c4250ce212759ebd880f743d883"}, {"path":"github.com/hashicorp/memberlist","checksumSHA1":"1zk7IeGClUqBo+Phsx89p7fQ/rQ=","revision":"23ad4b7d7b38496cd64c241dfd4c60b7794c254a","revisionTime":"2017-02-08T21:15:06Z"}, {"path":"github.com/hashicorp/net-rpc-msgpackrpc","revision":"a14192a58a694c123d8fe5481d4a4727d6ae82f3"}, - {"path":"github.com/hashicorp/raft","checksumSHA1":"zkA9uvbj1BdlveyqXpVTh1N6ers=","revision":"077966dbc90f342107eb723ec52fdb0463ec789b","revisionTime":"2018-01-17T20:29:25Z","version":"=master","versionExact":"master"}, + {"path":"github.com/hashicorp/raft","checksumSHA1":"zkA9uvbj1BdlveyqXpVTh1N6ers=","revision":"077966dbc90f342107eb723ec52fdb0463ec789b","revisionTime":"2018-01-17T20:29:25Z","version":"master","versionExact":"master"}, {"path":"github.com/hashicorp/raft-boltdb","checksumSHA1":"QAxukkv54/iIvLfsUP6IK4R0m/A=","revision":"d1e82c1ec3f15ee991f7cc7ffd5b67ff6f5bbaee","revisionTime":"2015-02-01T20:08:39Z"}, {"path":"github.com/hashicorp/serf/coordinate","checksumSHA1":"/oss17GO4hXGM7QnUdI3VzcAHzA=","revision":"bbeddf0b3ab3072a60525afbd6b6f47d33839eee","revisionTime":"2017-07-14T18:26:01Z"}, {"path":"github.com/hashicorp/serf/serf","checksumSHA1":"pvLOzocYsZtxuJ9pREHRTxYnoa4=","revision":"bbeddf0b3ab3072a60525afbd6b6f47d33839eee","revisionTime":"2017-07-14T18:26:01Z"}, From c6eee0128a979a008ecd3eef8ab7dc1aaedbbc9c Mon Sep 17 00:00:00 2001 From: Chelsea Holland Komlo Date: Tue, 23 Jan 2018 05:00:26 -0500 Subject: [PATCH 24/24] add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e23e1d43de98..f0a4f3066d47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ BUG FIXES: * core: Fix search endpoint forwarding for multi-region clusters [[GH-3680](https://github.com/hashicorp/nomad/issues/3680)] + * core: Allow upgrading/downgrading TLS via SIGHUP on both servers and clients [[GH-3492](https://github.com/hashicorp/nomad/issues/3492)] * core: Fix an issue in which batch jobs with queued placements and lost allocations could result in improper placement counts [[GH-3717](https://github.com/hashicorp/nomad/issues/3717)] * config: Revert minimum CPU limit back to 20 from 100.