Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nomad agent reload TLS configuration on SIGHUP #3479

Merged
merged 24 commits into from
Nov 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
25a86ac
Allow server TLS configuration to be reloaded via SIGHUP
chelseakomlo Oct 27, 2017
c65e23b
dynamic tls reloading for nomad agents
chelseakomlo Oct 31, 2017
c9acc5f
code cleanup and refactoring
chelseakomlo Nov 1, 2017
256f5a0
ensure keyloader is initialized, add comments
chelseakomlo Nov 1, 2017
cbd18ae
allow downgrading from TLS
chelseakomlo Nov 1, 2017
50a1336
initalize keyloader if necessary
chelseakomlo Nov 1, 2017
ff1d0dd
integration test for tls reload
chelseakomlo Nov 1, 2017
d7ff9f4
fix up test to assert success on reloaded TLS configuration
chelseakomlo Nov 2, 2017
0be15b0
failure in loading a new TLS config should remain at current
chelseakomlo Nov 2, 2017
0c4336e
reload agent configuration before specific server/client
chelseakomlo Nov 2, 2017
2990eb8
introduce a get-or-set method for keyloader
chelseakomlo Nov 3, 2017
b2ef194
fixups from code review
chelseakomlo Nov 3, 2017
fea24fe
fix up linting errors
chelseakomlo Nov 4, 2017
a40aafb
fixups from code review
chelseakomlo Nov 7, 2017
2cade96
add lock for config updates; improve copy of tls config
chelseakomlo Nov 13, 2017
12cb657
GetCertificate only reloads certificates dynamically for the server
chelseakomlo Nov 14, 2017
885c175
config updates/copies should be on agent
chelseakomlo Nov 14, 2017
d46eb3b
improve http integration test
chelseakomlo Nov 14, 2017
3c7bb25
simplify agent reloading storing a local copy of config
dadgar Nov 15, 2017
ae63199
reuse the same keyloader when reloading
dadgar Nov 15, 2017
d7d25b4
Test that server and client get reloaded but keep keyloader
dadgar Nov 15, 2017
9f954dc
Keyloader exposes GetClientCertificate as well for outgoing connections
dadgar Nov 15, 2017
98d6358
Fix spelling
dadgar Nov 15, 2017
75a0c60
correct changelog style
dadgar Nov 15, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
__BACKWARDS INCOMPATIBILITIES:__
* config: Nomad no longer parses Atlas configuration stanzas. Atlas has been
deprecated since earlier this year. If you have an Atlas stanza in your
config file it will have to be removed.
config file it will have to be removed.

IMPROVEMENTS:
* core: Allow operators to reload TLS certificate and key files via SIGHUP
[GH-3479]
* core: Allow agents to be run in `rpc_upgrade_mode` when migrating a cluster
to TLS rather than changing `heartbeat_grace`
* api: Allocations now track and return modify time in addition to create time
Expand Down
7 changes: 6 additions & 1 deletion client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
"github.com/boltdb/bolt"
consulapi "github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/go-multierror"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/driver"
Expand Down Expand Up @@ -369,6 +369,11 @@ func (c *Client) Leave() error {
return nil
}

// GetConfig returns the config of the client for testing purposes only
func (c *Client) GetConfig() *config.Config {
return c.config
}

// Datacenter returns the datacenter for the given client
func (c *Client) Datacenter() string {
return c.config.Node.Datacenter
Expand Down
1 change: 1 addition & 0 deletions client/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ func (c *Config) TLSConfiguration() *tlsutil.Config {
CAFile: c.TLSConfig.CAFile,
CertFile: c.TLSConfig.CertFile,
KeyFile: c.TLSConfig.KeyFile,
KeyLoader: c.TLSConfig.GetKeyLoader(),
}
return tlsConf
}
38 changes: 37 additions & 1 deletion command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ const (
// scheduled to, and are responsible for interfacing with
// servers to run allocations.
type Agent struct {
config *Config
config *Config
configLock sync.Mutex

logger *log.Logger
logOutput io.Writer

Expand Down Expand Up @@ -724,6 +726,40 @@ func (a *Agent) Stats() map[string]map[string]string {
return stats
}

// Reload handles configuration changes for the agent. Provides a method that
// is easier to unit test, as this action is invoked via SIGHUP.
func (a *Agent) Reload(newConfig *Config) error {
a.configLock.Lock()
defer a.configLock.Unlock()

if newConfig.TLSConfig != nil {

// TODO(chelseakomlo) In a later PR, we will introduce the ability to reload
// TLS configuration if the agent is not running with TLS enabled.
if a.config.TLSConfig != nil {
// Reload the certificates on the keyloader and on success store the
// updated TLS config. It is important to reuse the same keyloader
// as this allows us to dynamically reload configurations not only
// on the Agent but on the Server and Client too (they are
// referencing the same keyloader).
keyloader := a.config.TLSConfig.GetKeyLoader()
_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
if err != nil {
return err
}
a.config.TLSConfig = newConfig.TLSConfig
a.config.TLSConfig.KeyLoader = keyloader
}
}

return nil
}

// GetConfigCopy creates a replica of the agent's config, excluding locks
func (a *Agent) GetConfig() *Config {
return a.config
}

// setupConsul creates the Consul client and starts its main Run loop.
func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
apiConf, err := consulConfig.ApiConfig()
Expand Down
189 changes: 188 additions & 1 deletion command/agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func tmpDir(t testing.TB) string {
return dir
}

func TestAgent_RPCPing(t *testing.T) {
func TestAgent_RPC_Ping(t *testing.T) {
t.Parallel()
agent := NewTestAgent(t, t.Name(), nil)
defer agent.Shutdown()
Expand Down Expand Up @@ -559,3 +559,190 @@ func TestAgent_HTTPCheckPath(t *testing.T) {
t.Errorf("expected client check path to be %q but found %q", expected, check.Path)
}
}

// This test asserts that the keyloader embedded in the TLS config is shared
// across the Agent, Server, and Client. This is essential for certificate
// reloading to work.
func TestServer_Reload_TLS_Shared_Keyloader(t *testing.T) {
t.Parallel()
assert := assert.New(t)

// We will start out with a bad cert and then reload with a good one.
const (
cafile = "../../helper/tlsutil/testdata/ca.pem"
foocert = "../../helper/tlsutil/testdata/nomad-bad.pem"
fookey = "../../helper/tlsutil/testdata/nomad-bad-key.pem"
foocert2 = "../../helper/tlsutil/testdata/nomad-foo.pem"
fookey2 = "../../helper/tlsutil/testdata/nomad-foo-key.pem"
)

agent := NewTestAgent(t, t.Name(), func(c *Config) {
c.TLSConfig = &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert,
KeyFile: fookey,
}
})
defer agent.Shutdown()

originalKeyloader := agent.Config.TLSConfig.GetKeyLoader()
originalCert, err := originalKeyloader.GetOutgoingCertificate(nil)
assert.NotNil(originalKeyloader)
if assert.Nil(err) {
assert.NotNil(originalCert)
}

// Switch to the correct certificates and reload
newConfig := &Config{
TLSConfig: &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert2,
KeyFile: fookey2,
},
}

assert.Nil(agent.Reload(newConfig))
assert.Equal(agent.Config.TLSConfig.CertFile, newConfig.TLSConfig.CertFile)
assert.Equal(agent.Config.TLSConfig.KeyFile, newConfig.TLSConfig.KeyFile)
assert.Equal(agent.Config.TLSConfig.GetKeyLoader(), originalKeyloader)

// Assert is passed through on the server correctly
if assert.NotNil(agent.server.GetConfig().TLSConfig) {
serverKeyloader := agent.server.GetConfig().TLSConfig.GetKeyLoader()
assert.Equal(serverKeyloader, originalKeyloader)
newCert, err := serverKeyloader.GetOutgoingCertificate(nil)
assert.Nil(err)
assert.NotEqual(originalCert, newCert)
}

// Assert is passed through on the client correctly
if assert.NotNil(agent.client.GetConfig().TLSConfig) {
clientKeyloader := agent.client.GetConfig().TLSConfig.GetKeyLoader()
assert.Equal(clientKeyloader, originalKeyloader)
newCert, err := clientKeyloader.GetOutgoingCertificate(nil)
assert.Nil(err)
assert.NotEqual(originalCert, newCert)
}
}

func TestServer_Reload_TLS_Certificate(t *testing.T) {
t.Parallel()
assert := assert.New(t)

const (
cafile = "../../helper/tlsutil/testdata/ca.pem"
foocert = "../../helper/tlsutil/testdata/nomad-bad.pem"
fookey = "../../helper/tlsutil/testdata/nomad-bad-key.pem"
foocert2 = "../../helper/tlsutil/testdata/nomad-foo.pem"
fookey2 = "../../helper/tlsutil/testdata/nomad-foo-key.pem"
)

agentConfig := &Config{
TLSConfig: &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert,
KeyFile: fookey,
},
}

agent := &Agent{
config: agentConfig,
}

newConfig := &Config{
TLSConfig: &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert2,
KeyFile: fookey2,
},
}

originalKeyloader := agentConfig.TLSConfig.GetKeyLoader()
assert.NotNil(originalKeyloader)

err := agent.Reload(newConfig)
assert.Nil(err)
assert.Equal(agent.config.TLSConfig.CertFile, newConfig.TLSConfig.CertFile)
assert.Equal(agent.config.TLSConfig.KeyFile, newConfig.TLSConfig.KeyFile)
assert.Equal(agent.config.TLSConfig.GetKeyLoader(), originalKeyloader)
}

func TestServer_Reload_TLS_Certificate_Invalid(t *testing.T) {
t.Parallel()
assert := assert.New(t)

const (
cafile = "../../helper/tlsutil/testdata/ca.pem"
foocert = "../../helper/tlsutil/testdata/nomad-bad.pem"
fookey = "../../helper/tlsutil/testdata/nomad-bad-key.pem"
foocert2 = "invalid_cert_path"
fookey2 = "invalid_key_path"
)

agentConfig := &Config{
TLSConfig: &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert,
KeyFile: fookey,
},
}

agent := &Agent{
config: agentConfig,
}

newConfig := &Config{
TLSConfig: &sconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert2,
KeyFile: fookey2,
},
}

err := agent.Reload(newConfig)
assert.NotNil(err)
assert.NotEqual(agent.config.TLSConfig.CertFile, newConfig.TLSConfig.CertFile)
assert.NotEqual(agent.config.TLSConfig.KeyFile, newConfig.TLSConfig.KeyFile)
}

func Test_GetConfig(t *testing.T) {
assert := assert.New(t)

agentConfig := &Config{
Telemetry: &Telemetry{},
Client: &ClientConfig{},
Server: &ServerConfig{},
ACL: &ACLConfig{},
Ports: &Ports{},
Addresses: &Addresses{},
AdvertiseAddrs: &AdvertiseAddrs{},
Vault: &sconfig.VaultConfig{},
Consul: &sconfig.ConsulConfig{},
Sentinel: &sconfig.SentinelConfig{},
}

agent := &Agent{
config: agentConfig,
}

actualAgentConfig := agent.GetConfig()
assert.Equal(actualAgentConfig, agentConfig)
}
30 changes: 16 additions & 14 deletions command/agent/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import (
checkpoint "github.com/hashicorp/go-checkpoint"
gsyslog "github.com/hashicorp/go-syslog"
"github.com/hashicorp/logutils"
"github.com/hashicorp/nomad/helper/flag-helpers"
"github.com/hashicorp/nomad/helper/gated-writer"
flaghelper "github.com/hashicorp/nomad/helper/flag-helpers"
gatedwriter "github.com/hashicorp/nomad/helper/gated-writer"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this named import necessary?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are automatically added from https://godoc.org/golang.org/x/tools/cmd/goimports

"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/hashicorp/nomad/version"
"github.com/mitchellh/cli"
Expand Down Expand Up @@ -529,11 +529,11 @@ func (c *Command) Run(args []string) int {
go c.retryJoin(config)

// Wait for exit
return c.handleSignals(config)
return c.handleSignals()
}

// handleSignals blocks until we get an exit-causing signal
func (c *Command) handleSignals(config *Config) int {
func (c *Command) handleSignals() int {
signalCh := make(chan os.Signal, 4)
signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE)

Expand All @@ -557,17 +557,15 @@ WAIT:

// Check if this is a SIGHUP
if sig == syscall.SIGHUP {
if conf := c.handleReload(config); conf != nil {
*config = *conf
}
c.handleReload()
goto WAIT
}

// Check if we should do a graceful leave
graceful := false
if sig == os.Interrupt && config.LeaveOnInt {
if sig == os.Interrupt && c.agent.GetConfig().LeaveOnInt {
graceful = true
} else if sig == syscall.SIGTERM && config.LeaveOnTerm {
} else if sig == syscall.SIGTERM && c.agent.GetConfig().LeaveOnTerm {
graceful = true
}

Expand Down Expand Up @@ -599,12 +597,12 @@ WAIT:
}

// handleReload is invoked when we should reload our configs, e.g. SIGHUP
func (c *Command) handleReload(config *Config) *Config {
func (c *Command) handleReload() {
c.Ui.Output("Reloading configuration...")
newConf := c.readConfig()
if newConf == nil {
c.Ui.Error(fmt.Sprintf("Failed to reload configs"))
return config
return
}

// Change the log level
Expand All @@ -617,7 +615,13 @@ func (c *Command) handleReload(config *Config) *Config {
minLevel, c.logFilter.Levels))

// Keep the current log level
newConf.LogLevel = config.LogLevel
newConf.LogLevel = c.agent.GetConfig().LogLevel
}

// Reloads configuration for an agent running in both client and server mode
err := c.agent.Reload(newConf)
if err != nil {
c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err)
}

if s := c.agent.Server(); s != nil {
Expand All @@ -630,8 +634,6 @@ func (c *Command) handleReload(config *Config) *Config {
}
}
}

return newConf
}

// setupTelemetry is used ot setup the telemetry sub-systems
Expand Down
3 changes: 1 addition & 2 deletions command/agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -680,8 +680,7 @@ func (c *Config) Merge(b *Config) *Config {

// Apply the TLS Config
if result.TLSConfig == nil && b.TLSConfig != nil {
tlsConfig := *b.TLSConfig
result.TLSConfig = &tlsConfig
result.TLSConfig = b.TLSConfig.Copy()
} else if b.TLSConfig != nil {
result.TLSConfig = result.TLSConfig.Merge(b.TLSConfig)
}
Expand Down
Loading