Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Client/Server TLS dynamic reload #3492

Merged
merged 24 commits into from
Jan 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7b74957
add ability to upgrade/downgrade nomad agents tls configurations via …
chelseakomlo Nov 20, 2017
b1f8772
fix up downgrading client to plaintext
chelseakomlo Nov 21, 2017
156297c
close raft long-lived connections
chelseakomlo Nov 21, 2017
d4754d5
fixups from code review
chelseakomlo Nov 27, 2017
184fbfe
prevent races when reloading, fully shut down raft
chelseakomlo Nov 28, 2017
c65857c
remove code duplication
chelseakomlo Nov 28, 2017
bee883c
check error on generating tls context
chelseakomlo Nov 29, 2017
18fdd31
reloading tls config should be atomic for clients/servers
chelseakomlo Nov 29, 2017
359358d
code review fixups
chelseakomlo Nov 30, 2017
9ba0e6a
refactor rpc listener methods, wait for proper shutdown
chelseakomlo Nov 30, 2017
a4af400
don't ignore error in http reloading
chelseakomlo Nov 30, 2017
e3cc098
remove unnecessary nil checks; default case
chelseakomlo Dec 4, 2017
c70702e
call reload on agent, client, and server separately
chelseakomlo Dec 5, 2017
11089b2
reload raft transport layer
chelseakomlo Dec 7, 2017
21c1c1d
add documentation
chelseakomlo Jan 9, 2018
bbc5686
adding additional test assertions; differentiate reloading agent and …
chelseakomlo Jan 16, 2018
8de260f
refactor creating a new tls configuration
chelseakomlo Jan 16, 2018
d97c91c
feedback from code review
chelseakomlo Jan 16, 2018
0805c41
fixing up raft reload tests
chelseakomlo Jan 16, 2018
028ba9f
allow for similar error messages for closed connections
chelseakomlo Jan 17, 2018
d443098
vendor raft to master branch
chelseakomlo Jan 19, 2018
5170301
swap raft layer tls wrapper
chelseakomlo Jan 19, 2018
cab0830
specified version of raft should be master
chelseakomlo Jan 22, 2018
c6eee01
add changelog
chelseakomlo Jan 23, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

BUG FIXES:
* core: Fix search endpoint forwarding for multi-region clusters [[GH-3680](https://github.com/hashicorp/nomad/issues/3680)]
* core: Allow upgrading/downgrading TLS via SIGHUP on both servers and clients [[GH-3492](https://github.com/hashicorp/nomad/issues/3492)]
* core: Fix an issue in which batch jobs with queued placements and lost
allocations could result in improper placement counts [[GH-3717](https://github.com/hashicorp/nomad/issues/3717)]
* config: Revert minimum CPU limit back to 20 from 100.
Expand Down
29 changes: 29 additions & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad"
"github.com/hashicorp/nomad/nomad/structs"
nconfig "github.com/hashicorp/nomad/nomad/structs/config"
vaultapi "github.com/hashicorp/vault/api"
"github.com/mitchellh/hashstructure"
"github.com/shirou/gopsutil/host"
Expand Down Expand Up @@ -364,6 +365,34 @@ func (c *Client) init() error {
return nil
}

// reloadTLSConnections allows a client to reload its TLS configuration on the
// fly
func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error {
var tlsWrap tlsutil.RegionWrapper
if newConfig != nil && newConfig.EnableRPC {
tw, err := tlsutil.NewTLSConfiguration(newConfig).OutgoingTLSWrapper()
if err != nil {
return err
}
tlsWrap = tw
}

// Keep the client configuration up to date as we use configuration values to
// decide on what type of connections to accept
c.configLock.Lock()
c.config.TLSConfig = newConfig
c.configLock.Unlock()

c.connPool.ReloadTLS(tlsWrap)

return nil
}

// Reload allows a client to reload its configuration on the fly
func (c *Client) Reload(newConfig *config.Config) error {
return c.reloadTLSConnections(newConfig.TLSConfig)
}

// Leave is used to prepare the client to leave the cluster
func (c *Client) Leave() error {
// TODO
Expand Down
153 changes: 153 additions & 0 deletions client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1001,3 +1001,156 @@ func TestClient_ValidateMigrateToken_ACLDisabled(t *testing.T) {

assert.Equal(c.ValidateMigrateToken("", ""), true)
}

func TestClient_ReloadTLS_UpgradePlaintextToTLS(t *testing.T) {
t.Parallel()
assert := assert.New(t)

s1, addr := testServer(t, func(c *nomad.Config) {
c.Region = "regionFoo"
})
defer s1.Shutdown()
testutil.WaitForLeader(t, s1.RPC)

const (
cafile = "../helper/tlsutil/testdata/ca.pem"
foocert = "../helper/tlsutil/testdata/nomad-foo.pem"
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem"
)

c1 := testClient(t, func(c *config.Config) {
c.Servers = []string{addr}
})
defer c1.Shutdown()

// Registering a node over plaintext should succeed
{
req := structs.NodeSpecificRequest{
NodeID: c1.Node().ID,
QueryOptions: structs.QueryOptions{Region: "regionFoo"},
}

testutil.WaitForResult(func() (bool, error) {
var out structs.SingleNodeResponse
err := c1.RPC("Node.GetNode", &req, &out)
if err != nil {
return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err)
}
return true, nil
},
func(err error) {
t.Fatalf(err.Error())
},
)
}

newConfig := &nconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert,
KeyFile: fookey,
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you want to assert the node registered using plaintext?

err := c1.reloadTLSConnections(newConfig)
assert.Nil(err)

// Registering a node over plaintext should fail after the node has upgraded
// to TLS
{
req := structs.NodeSpecificRequest{
NodeID: c1.Node().ID,
QueryOptions: structs.QueryOptions{Region: "regionFoo"},
}
testutil.WaitForResult(func() (bool, error) {
var out structs.SingleNodeResponse
err := c1.RPC("Node.GetNode", &req, &out)
if err == nil {
return false, fmt.Errorf("client RPC succeeded when it should have failed:\n%+v", err)
}
return true, nil
},
func(err error) {
t.Fatalf(err.Error())
},
)
}
}

func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) {
t.Parallel()
assert := assert.New(t)

s1, addr := testServer(t, func(c *nomad.Config) {
c.Region = "regionFoo"
})
defer s1.Shutdown()
testutil.WaitForLeader(t, s1.RPC)

const (
cafile = "../helper/tlsutil/testdata/ca.pem"
foocert = "../helper/tlsutil/testdata/nomad-foo.pem"
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem"
)

c1 := testClient(t, func(c *config.Config) {
c.Servers = []string{addr}
c.TLSConfig = &nconfig.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: cafile,
CertFile: foocert,
KeyFile: fookey,
}
})
defer c1.Shutdown()

// assert that when one node is running in encrypted mode, a RPC request to a
// node running in plaintext mode should fail
{
req := structs.NodeSpecificRequest{
NodeID: c1.Node().ID,
QueryOptions: structs.QueryOptions{Region: "regionFoo"},
}
testutil.WaitForResult(func() (bool, error) {
var out structs.SingleNodeResponse
err := c1.RPC("Node.GetNode", &req, &out)
if err == nil {
return false, fmt.Errorf("client RPC succeeded when it should have failed :\n%+v", err)
}
return true, nil
},
func(err error) {
t.Fatalf(err.Error())
},
)
}

newConfig := &nconfig.TLSConfig{}

err := c1.reloadTLSConnections(newConfig)
assert.Nil(err)

// assert that when both nodes are in plaintext mode, a RPC request should
// succeed
{
req := structs.NodeSpecificRequest{
NodeID: c1.Node().ID,
QueryOptions: structs.QueryOptions{Region: "regionFoo"},
}
testutil.WaitForResult(func() (bool, error) {
var out structs.SingleNodeResponse
err := c1.RPC("Node.GetNode", &req, &out)
if err != nil {
return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err)
}
return true, nil
},
func(err error) {
t.Fatalf(err.Error())
},
)
}
}
6 changes: 3 additions & 3 deletions client/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,10 @@ func (c *Config) ReadStringListToMapDefault(key, defaultValue string) map[string
return list
}

// TLSConfig returns a TLSUtil Config based on the client configuration
// TLSConfiguration returns a TLSUtil Config based on the existing client
// configuration
func (c *Config) TLSConfiguration() *tlsutil.Config {
tlsConf := &tlsutil.Config{
return &tlsutil.Config{
VerifyIncoming: true,
VerifyOutgoing: true,
VerifyServerHostname: c.TLSConfig.VerifyServerHostname,
Expand All @@ -358,5 +359,4 @@ func (c *Config) TLSConfiguration() *tlsutil.Config {
KeyFile: c.TLSConfig.KeyFile,
KeyLoader: c.TLSConfig.GetKeyLoader(),
}
return tlsConf
}
65 changes: 47 additions & 18 deletions command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -730,37 +730,66 @@ func (a *Agent) Stats() map[string]map[string]string {
return stats
}

// ShouldReload determines if we should reload the configuration and agent
// connections. If the TLS Configuration has not changed, we shouldn't reload.
func (a *Agent) ShouldReload(newConfig *Config) (bool, bool) {
a.configLock.Lock()
defer a.configLock.Unlock()
if a.config.TLSConfig.Equals(newConfig.TLSConfig) {
return false, false
}

return true, true // requires a reload of both agent and http server
}

// Reload handles configuration changes for the agent. Provides a method that
// is easier to unit test, as this action is invoked via SIGHUP.
func (a *Agent) Reload(newConfig *Config) error {
a.configLock.Lock()
defer a.configLock.Unlock()

if newConfig.TLSConfig != nil {

// TODO(chelseakomlo) In a later PR, we will introduce the ability to reload
// TLS configuration if the agent is not running with TLS enabled.
if a.config.TLSConfig != nil {
// Reload the certificates on the keyloader and on success store the
// updated TLS config. It is important to reuse the same keyloader
// as this allows us to dynamically reload configurations not only
// on the Agent but on the Server and Client too (they are
// referencing the same keyloader).
keyloader := a.config.TLSConfig.GetKeyLoader()
_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
if err != nil {
return err
}
a.config.TLSConfig = newConfig.TLSConfig
a.config.TLSConfig.KeyLoader = keyloader
if newConfig == nil || newConfig.TLSConfig == nil {
return fmt.Errorf("cannot reload agent with nil configuration")
}

// This is just a TLS configuration reload, we don't need to refresh
// existing network connections
if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() {

// Reload the certificates on the keyloader and on success store the
// updated TLS config. It is important to reuse the same keyloader
// as this allows us to dynamically reload configurations not only
// on the Agent but on the Server and Client too (they are
// referencing the same keyloader).
keyloader := a.config.TLSConfig.GetKeyLoader()
_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
if err != nil {
return err
}
a.config.TLSConfig = newConfig.TLSConfig
a.config.TLSConfig.KeyLoader = keyloader
return nil
}

// Completely reload the agent's TLS configuration (moving from non-TLS to
// TLS, or vice versa)
// This does not handle errors in loading the new TLS configuration
a.config.TLSConfig = newConfig.TLSConfig.Copy()

if newConfig.TLSConfig.IsEmpty() {
a.logger.Println("[WARN] agent: Downgrading agent's existing TLS configuration to plaintext")
} else {
a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS")
}

return nil
}

// GetConfigCopy creates a replica of the agent's config, excluding locks
// GetConfig creates a locked reference to the agent's config
func (a *Agent) GetConfig() *Config {
a.configLock.Lock()
defer a.configLock.Unlock()

return a.config
}

Expand Down
Loading