Skip to content

Commit

Permalink
Nomad agent reload TLS configuration on SIGHUP (#3479)
Browse files Browse the repository at this point in the history
* Allow server TLS configuration to be reloaded via SIGHUP

* dynamic tls reloading for nomad agents

* code cleanup and refactoring

* ensure keyloader is initialized, add comments

* allow downgrading from TLS

* initalize keyloader if necessary

* integration test for tls reload

* fix up test to assert success on reloaded TLS configuration

* failure in loading a new TLS config should remain at current

Reload only the config if agent is already using TLS

* reload agent configuration before specific server/client

lock keyloader before loading/caching a new certificate

* introduce a get-or-set method for keyloader

* fixups from code review

* fix up linting errors

* fixups from code review

* add lock for config updates; improve copy of tls config

* GetCertificate only reloads certificates dynamically for the server

* config updates/copies should be on agent

* improve http integration test

* simplify agent reloading storing a local copy of config

* reuse the same keyloader when reloading

* Test that server and client get reloaded but keep keyloader

* Keyloader exposes GetClientCertificate as well for outgoing connections

* Fix spelling

* correct changelog style
  • Loading branch information
chelseakomlo authored and dadgar committed Nov 15, 2017
1 parent e6df21b commit fa9fd44
Show file tree
Hide file tree
Showing 16 changed files with 532 additions and 71 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
__BACKWARDS INCOMPATIBILITIES:__
* config: Nomad no longer parses Atlas configuration stanzas. Atlas has been
deprecated since earlier this year. If you have an Atlas stanza in your
config file it will have to be removed.
config file it will have to be removed.

IMPROVEMENTS:
* core: Allow operators to reload TLS certificate and key files via SIGHUP
[GH-3479]
* core: Allow agents to be run in `rpc_upgrade_mode` when migrating a cluster
to TLS rather than changing `heartbeat_grace`
* api: Allocations now track and return modify time in addition to create time
Expand Down
7 changes: 6 additions & 1 deletion client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
"github.com/boltdb/bolt"
consulapi "github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/go-multierror"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver"
Expand Down Expand Up @@ -369,6 +369,11 @@ func (c *Client) Leave() error {
return nil
}

// GetConfig returns the config of the client for testing purposes only
func (c *Client) GetConfig() *config.Config {
return c.config
}

// Datacenter returns the datacenter for the given client
func (c *Client) Datacenter() string {
return c.config.Node.Datacenter
Expand Down
1 change: 1 addition & 0 deletions client/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ func (c *Config) TLSConfiguration() *tlsutil.Config {
CAFile: c.TLSConfig.CAFile,
CertFile: c.TLSConfig.CertFile,
KeyFile: c.TLSConfig.KeyFile,
KeyLoader: c.TLSConfig.GetKeyLoader(),
}
return tlsConf
}
38 changes: 37 additions & 1 deletion command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ const (
// scheduled to, and are responsible for interfacing with
// servers to run allocations.
type Agent struct {
config *Config
config *Config
configLock sync.Mutex

logger *log.Logger
logOutput io.Writer

Expand Down Expand Up @@ -724,6 +726,40 @@ func (a *Agent) Stats() map[string]map[string]string {
return stats
}

// Reload handles configuration changes for the agent. Provides a method that
// is easier to unit test, as this action is invoked via SIGHUP.
func (a *Agent) Reload(newConfig *Config) error {
a.configLock.Lock()
defer a.configLock.Unlock()

if newConfig.TLSConfig != nil {

// TODO(chelseakomlo) In a later PR, we will introduce the ability to reload
// TLS configuration if the agent is not running with TLS enabled.
if a.config.TLSConfig != nil {
// Reload the certificates on the keyloader and on success store the
// updated TLS config. It is important to reuse the same keyloader
// as this allows us to dynamically reload configurations not only
// on the Agent but on the Server and Client too (they are
// referencing the same keyloader).
keyloader := a.config.TLSConfig.GetKeyLoader()
_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
if err != nil {
return err
}
a.config.TLSConfig = newConfig.TLSConfig
a.config.TLSConfig.KeyLoader = keyloader
}
}

return nil
}

// GetConfigCopy creates a replica of the agent's config, excluding locks
func (a *Agent) GetConfig() *Config {
return a.config
}

// setupConsul creates the Consul client and starts its main Run loop.
func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
apiConf, err := consulConfig.ApiConfig()
Expand Down
189 changes: 188 additions & 1 deletion command/agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func tmpDir(t testing.TB) string {
return dir
}

func TestAgent_RPCPing(t *testing.T) {
func TestAgent_RPC_Ping(t *testing.T) {
t.Parallel()
agent := NewTestAgent(t, t.Name(), nil)
defer agent.Shutdown()
Expand Down Expand Up @@ -559,3 +559,190 @@ func TestAgent_HTTPCheckPath(t *testing.T) {
t.Errorf("expected client check path to be %q but found %q", expected, check.Path)
}
}

// This test asserts that the keyloader embedded in the TLS config is shared
// across the Agent, Server, and Client. This is essential for certificate
// reloading to work.
func TestServer_Reload_TLS_Shared_Keyloader(t *testing.T) {
t.Parallel()
assert := assert.New(t)

// We will start out with a bad cert and then reload with a good one.
const (
cafile = "../../helper/tlsutil/testdata/ca.pem"
foocert = "../../helper/tlsutil/testdata/nomad-bad.pem"
fookey = "../../helper/tlsutil/testdata/nomad-bad-key.pem"
foocert2 = "../../helper/tlsutil/testdata/nomad-foo.pem"
fookey2 = "../../helper/tlsutil/testdata/nomad-foo-key.pem"
)

agent := NewTestAgent(t, t.Name(), func(c *Config) {
c.TLSConfig = &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert,
KeyFile: fookey,
}
})
defer agent.Shutdown()

originalKeyloader := agent.Config.TLSConfig.GetKeyLoader()
originalCert, err := originalKeyloader.GetOutgoingCertificate(nil)
assert.NotNil(originalKeyloader)
if assert.Nil(err) {
assert.NotNil(originalCert)
}

// Switch to the correct certificates and reload
newConfig := &Config{
TLSConfig: &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert2,
KeyFile: fookey2,
},
}

assert.Nil(agent.Reload(newConfig))
assert.Equal(agent.Config.TLSConfig.CertFile, newConfig.TLSConfig.CertFile)
assert.Equal(agent.Config.TLSConfig.KeyFile, newConfig.TLSConfig.KeyFile)
assert.Equal(agent.Config.TLSConfig.GetKeyLoader(), originalKeyloader)

// Assert is passed through on the server correctly
if assert.NotNil(agent.server.GetConfig().TLSConfig) {
serverKeyloader := agent.server.GetConfig().TLSConfig.GetKeyLoader()
assert.Equal(serverKeyloader, originalKeyloader)
newCert, err := serverKeyloader.GetOutgoingCertificate(nil)
assert.Nil(err)
assert.NotEqual(originalCert, newCert)
}

// Assert is passed through on the client correctly
if assert.NotNil(agent.client.GetConfig().TLSConfig) {
clientKeyloader := agent.client.GetConfig().TLSConfig.GetKeyLoader()
assert.Equal(clientKeyloader, originalKeyloader)
newCert, err := clientKeyloader.GetOutgoingCertificate(nil)
assert.Nil(err)
assert.NotEqual(originalCert, newCert)
}
}

func TestServer_Reload_TLS_Certificate(t *testing.T) {
t.Parallel()
assert := assert.New(t)

const (
cafile = "../../helper/tlsutil/testdata/ca.pem"
foocert = "../../helper/tlsutil/testdata/nomad-bad.pem"
fookey = "../../helper/tlsutil/testdata/nomad-bad-key.pem"
foocert2 = "../../helper/tlsutil/testdata/nomad-foo.pem"
fookey2 = "../../helper/tlsutil/testdata/nomad-foo-key.pem"
)

agentConfig := &Config{
TLSConfig: &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert,
KeyFile: fookey,
},
}

agent := &Agent{
config: agentConfig,
}

newConfig := &Config{
TLSConfig: &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert2,
KeyFile: fookey2,
},
}

originalKeyloader := agentConfig.TLSConfig.GetKeyLoader()
assert.NotNil(originalKeyloader)

err := agent.Reload(newConfig)
assert.Nil(err)
assert.Equal(agent.config.TLSConfig.CertFile, newConfig.TLSConfig.CertFile)
assert.Equal(agent.config.TLSConfig.KeyFile, newConfig.TLSConfig.KeyFile)
assert.Equal(agent.config.TLSConfig.GetKeyLoader(), originalKeyloader)
}

func TestServer_Reload_TLS_Certificate_Invalid(t *testing.T) {
t.Parallel()
assert := assert.New(t)

const (
cafile = "../../helper/tlsutil/testdata/ca.pem"
foocert = "../../helper/tlsutil/testdata/nomad-bad.pem"
fookey = "../../helper/tlsutil/testdata/nomad-bad-key.pem"
foocert2 = "invalid_cert_path"
fookey2 = "invalid_key_path"
)

agentConfig := &Config{
TLSConfig: &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert,
KeyFile: fookey,
},
}

agent := &Agent{
config: agentConfig,
}

newConfig := &Config{
TLSConfig: &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert2,
KeyFile: fookey2,
},
}

err := agent.Reload(newConfig)
assert.NotNil(err)
assert.NotEqual(agent.config.TLSConfig.CertFile, newConfig.TLSConfig.CertFile)
assert.NotEqual(agent.config.TLSConfig.KeyFile, newConfig.TLSConfig.KeyFile)
}

func Test_GetConfig(t *testing.T) {
assert := assert.New(t)

agentConfig := &Config{
Telemetry: &Telemetry{},
Client: &ClientConfig{},
Server: &ServerConfig{},
ACL: &ACLConfig{},
Ports: &Ports{},
Addresses: &Addresses{},
AdvertiseAddrs: &AdvertiseAddrs{},
Vault: &sconfig.VaultConfig{},
Consul: &sconfig.ConsulConfig{},
Sentinel: &sconfig.SentinelConfig{},
}

agent := &Agent{
config: agentConfig,
}

actualAgentConfig := agent.GetConfig()
assert.Equal(actualAgentConfig, agentConfig)
}
30 changes: 16 additions & 14 deletions command/agent/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import (
checkpoint "github.com/hashicorp/go-checkpoint"
gsyslog "github.com/hashicorp/go-syslog"
"github.com/hashicorp/logutils"
"github.com/hashicorp/nomad/helper/flag-helpers"
"github.com/hashicorp/nomad/helper/gated-writer"
flaghelper "github.com/hashicorp/nomad/helper/flag-helpers"
gatedwriter "github.com/hashicorp/nomad/helper/gated-writer"
"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/hashicorp/nomad/version"
"github.com/mitchellh/cli"
Expand Down Expand Up @@ -529,11 +529,11 @@ func (c *Command) Run(args []string) int {
go c.retryJoin(config)

// Wait for exit
return c.handleSignals(config)
return c.handleSignals()
}

// handleSignals blocks until we get an exit-causing signal
func (c *Command) handleSignals(config *Config) int {
func (c *Command) handleSignals() int {
signalCh := make(chan os.Signal, 4)
signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE)

Expand All @@ -557,17 +557,15 @@ WAIT:

// Check if this is a SIGHUP
if sig == syscall.SIGHUP {
if conf := c.handleReload(config); conf != nil {
*config = *conf
}
c.handleReload()
goto WAIT
}

// Check if we should do a graceful leave
graceful := false
if sig == os.Interrupt && config.LeaveOnInt {
if sig == os.Interrupt && c.agent.GetConfig().LeaveOnInt {
graceful = true
} else if sig == syscall.SIGTERM && config.LeaveOnTerm {
} else if sig == syscall.SIGTERM && c.agent.GetConfig().LeaveOnTerm {
graceful = true
}

Expand Down Expand Up @@ -599,12 +597,12 @@ WAIT:
}

// handleReload is invoked when we should reload our configs, e.g. SIGHUP
func (c *Command) handleReload(config *Config) *Config {
func (c *Command) handleReload() {
c.Ui.Output("Reloading configuration...")
newConf := c.readConfig()
if newConf == nil {
c.Ui.Error(fmt.Sprintf("Failed to reload configs"))
return config
return
}

// Change the log level
Expand All @@ -617,7 +615,13 @@ func (c *Command) handleReload(config *Config) *Config {
minLevel, c.logFilter.Levels))

// Keep the current log level
newConf.LogLevel = config.LogLevel
newConf.LogLevel = c.agent.GetConfig().LogLevel
}

// Reloads configuration for an agent running in both client and server mode
err := c.agent.Reload(newConf)
if err != nil {
c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err)
}

if s := c.agent.Server(); s != nil {
Expand All @@ -630,8 +634,6 @@ func (c *Command) handleReload(config *Config) *Config {
}
}
}

return newConf
}

// setupTelemetry is used ot setup the telemetry sub-systems
Expand Down
3 changes: 1 addition & 2 deletions command/agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -680,8 +680,7 @@ func (c *Config) Merge(b *Config) *Config {

// Apply the TLS Config
if result.TLSConfig == nil && b.TLSConfig != nil {
tlsConfig := *b.TLSConfig
result.TLSConfig = &tlsConfig
result.TLSConfig = b.TLSConfig.Copy()
} else if b.TLSConfig != nil {
result.TLSConfig = result.TLSConfig.Merge(b.TLSConfig)
}
Expand Down
Loading

0 comments on commit fa9fd44

Please sign in to comment.