Skip to content

Commit

Permalink
Merge pull request #4648 from hashicorp/b-migrate-tls
Browse files Browse the repository at this point in the history
Set hostname when migrating ephemeral disks over TLS
  • Loading branch information
schmichael committed Sep 6, 2018
2 parents 1aec903 + 833dc7b commit 6bd5852
Show file tree
Hide file tree
Showing 11 changed files with 252 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ IMPROVEMENTS:
BUG FIXES:
* core: Reset queued allocation summary to zero when job stopped [[GH-4414](https://github.com/hashicorp/nomad/issues/4414)]
* core: Fix panic due to missing synchronization in delayed evaluations heap [[GH-4632](https://github.com/hashicorp/nomad/issues/4632)]
* client: Fix migrating ephemeral disks when TLS is enabled [[GH-4648](https://github.com/hashicorp/nomad/issues/4648)]
* driver/docker: Fix kill timeout not being respected when timeout is over five
minutes [[GH-4599](https://github.com/hashicorp/nomad/issues/4599)]
* scheduler: Fix nil pointer dereference [[GH-4463](https://github.com/hashicorp/nomad/issues/4463)]
Expand Down
142 changes: 142 additions & 0 deletions client/alloc_watcher_e2e_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package client_test

import (
"bytes"
"fmt"
"io/ioutil"
"path/filepath"
"testing"

"github.com/hashicorp/nomad/command/agent"
"github.com/hashicorp/nomad/nomad"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/require"
)

// TestPrevAlloc_StreamAllocDir_TLS asserts ephemeral disk migrations still
// work when TLS is enabled.
func TestPrevAlloc_StreamAllocDir_TLS(t *testing.T) {
const (
caFn = "../helper/tlsutil/testdata/global-ca.pem"
serverCertFn = "../helper/tlsutil/testdata/global-server.pem"
serverKeyFn = "../helper/tlsutil/testdata/global-server-key.pem"
clientCertFn = "../helper/tlsutil/testdata/global-client.pem"
clientKeyFn = "../helper/tlsutil/testdata/global-client-key.pem"
)
t.Parallel()
require := require.New(t)

server := nomad.TestServer(t, func(c *nomad.Config) {
c.TLSConfig = &config.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: caFn,
CertFile: serverCertFn,
KeyFile: serverKeyFn,
}
})
defer server.Shutdown()
testutil.WaitForLeader(t, server.RPC)

t.Logf("[TEST] Leader started: %s", server.GetConfig().RPCAddr.String())

agentConfFunc := func(c *agent.Config) {
c.Region = "global"
c.TLSConfig = &config.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: caFn,
CertFile: clientCertFn,
KeyFile: clientKeyFn,
}
c.Client.Enabled = true
c.Client.Servers = []string{server.GetConfig().RPCAddr.String()}
}
client1 := agent.NewTestAgent(t, "client1", agentConfFunc)
defer client1.Shutdown()

client2 := agent.NewTestAgent(t, "client2", agentConfFunc)
defer client2.Shutdown()

job := mock.Job()
job.Constraints[0].LTarget = "${node.unique.name}"
job.Constraints[0].RTarget = "client1"
job.TaskGroups[0].Count = 1
job.TaskGroups[0].EphemeralDisk.Sticky = true
job.TaskGroups[0].EphemeralDisk.Migrate = true
job.TaskGroups[0].Tasks[0] = &structs.Task{
Name: "migrate_tls",
Driver: "mock_driver",
Config: map[string]interface{}{
"run_for": "1m",
},
LogConfig: structs.DefaultLogConfig(),
Resources: &structs.Resources{
CPU: 50,
MemoryMB: 25,
},
}
testutil.WaitForRunning(t, server.RPC, job.Copy())

allocArgs := &structs.JobSpecificRequest{}
allocArgs.JobID = job.ID
allocArgs.QueryOptions.Region = "global"
var allocReply structs.JobAllocationsResponse
require.NoError(server.RPC("Job.Allocations", allocArgs, &allocReply))
require.Len(allocReply.Allocations, 1)
origAlloc := allocReply.Allocations[0].ID

// Save a file into alloc dir
contents := []byte("123\n456")
allocFn := filepath.Join(client1.DataDir, "alloc", origAlloc, "alloc", "data", "bar")
require.NoError(ioutil.WriteFile(allocFn, contents, 0666))
t.Logf("[TEST] Wrote initial file: %s", allocFn)

// Migrate alloc to other node
job.Constraints[0].RTarget = "client2"
testutil.WaitForRunning(t, server.RPC, job.Copy())

// Wait for new alloc to be running
var newAlloc *structs.AllocListStub
testutil.WaitForResult(func() (bool, error) {
allocArgs := &structs.JobSpecificRequest{}
allocArgs.JobID = job.ID
allocArgs.QueryOptions.Region = "global"
var allocReply structs.JobAllocationsResponse
require.NoError(server.RPC("Job.Allocations", allocArgs, &allocReply))
if n := len(allocReply.Allocations); n != 2 {
return false, fmt.Errorf("expected 2 allocs found %d", n)
}

// Pick the one that didn't exist before
if allocReply.Allocations[0].ID == origAlloc {
newAlloc = allocReply.Allocations[1]
} else {
newAlloc = allocReply.Allocations[0]
}

return newAlloc.ClientStatus != structs.AllocClientStatusRunning,
fmt.Errorf("client status: %v", newAlloc.ClientStatus)
}, func(err error) {
t.Fatalf("new alloc not running: %v", err)
})

// Wait for file to appear on other client
allocFn2 := filepath.Join(client2.DataDir, "alloc", newAlloc.ID, "alloc", "data", "bar")
t.Logf("[TEST] Comparing against file: %s", allocFn2)
testutil.WaitForResult(func() (bool, error) {
found, err := ioutil.ReadFile(allocFn2)
if err != nil {
return false, err
}
return bytes.Equal(contents, found), fmt.Errorf("contents misatch. expected:\n%s\n\nfound:\n%s\n",
contents, found)
}, func(err error) {
t.Fatalf("file didn't migrate: %v", err)
})
}
7 changes: 4 additions & 3 deletions client/allocrunner/alloc_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,9 +427,10 @@ func (p *remotePrevAlloc) migrateAllocDir(ctx context.Context, nodeAddr string)
apiConfig := nomadapi.DefaultConfig()
apiConfig.Address = nodeAddr
apiConfig.TLSConfig = &nomadapi.TLSConfig{
CACert: p.config.TLSConfig.CAFile,
ClientCert: p.config.TLSConfig.CertFile,
ClientKey: p.config.TLSConfig.KeyFile,
CACert: p.config.TLSConfig.CAFile,
ClientCert: p.config.TLSConfig.CertFile,
ClientKey: p.config.TLSConfig.KeyFile,
TLSServerName: fmt.Sprintf("client.%s.nomad", p.config.Region),
}
apiClient, err := nomadapi.NewClient(apiConfig)
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -666,13 +666,17 @@ func (c *Client) setServersImpl(in []string, force bool) (int, error) {
addr, err := resolveServer(srv)
if err != nil {
c.logger.Printf("[DEBUG] client: ignoring server %s due to resolution error: %v", srv, err)
mu.Lock()
merr.Errors = append(merr.Errors, err)
mu.Unlock()
return
}

// Try to ping to check if it is a real server
if err := c.Ping(addr); err != nil {
mu.Lock()
merr.Errors = append(merr.Errors, fmt.Errorf("Server at address %s failed ping: %v", addr, err))
mu.Unlock()

// If we are forcing the setting of the servers, inject it to
// the serverlist even if we can't ping immediately.
Expand Down
1 change: 1 addition & 0 deletions helper/tlsutil/testdata/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Using [cfssl 1.2.0](https://github.com/cloudflare/cfssl)
| `ca-key-bad.pem` | CA key for bad region |
| `nomad-bad.pem` | Nomad cert for bad region |
| `nomad-bad-key.pem` | Nomad key for bad region |
| `global-*.pem` | For global region |

## Generating self-signed certs
```sh
Expand Down
13 changes: 13 additions & 0 deletions helper/tlsutil/testdata/global-ca.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-----BEGIN CERTIFICATE-----
MIICATCCAaigAwIBAgIUdyw+oCYCUUrIQ68hGVJVRRCxnjMwCgYIKoZIzj0EAwIw
XzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNp
c2NvMRMwEQYDVQQLEwpOb21hZCBEZW1vMRYwFAYDVQQDEw1leGFtcGxlLm5vbWFk
MB4XDTE4MDkwNTIzNTQwMFoXDTIzMDkwNDIzNTQwMFowXzELMAkGA1UEBhMCVVMx
CzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRMwEQYDVQQLEwpO
b21hZCBEZW1vMRYwFAYDVQQDEw1leGFtcGxlLm5vbWFkMFkwEwYHKoZIzj0CAQYI
KoZIzj0DAQcDQgAE6kWmOEIfGJZSh2VHYHuCli+W+dXJOoPN7F01k+bqLcxxuYaS
6ZOT3+J1t7s3zCoF61/m4ITLm/i1GFGcnfzQg6NCMEAwDgYDVR0PAQH/BAQDAgEG
MA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFCEqBD2o3StC6qePPy6WaDknOPh2
MAoGCCqGSM49BAMCA0cAMEQCIFab4iZ4Of3lBztV8PMzorBCBiUDDaqVswACVMhI
xqltAiA/O7LcVvvVYmtcF27NSQLPhh1ibtRjKnTZviBGzwkV3w==
-----END CERTIFICATE-----
5 changes: 5 additions & 0 deletions helper/tlsutil/testdata/global-client-key.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEICWrWIE3q8UlYau6xKhLz43CO9wg36fxG4Qcy+kBItdeoAoGCCqGSM49
AwEHoUQDQgAEvei5KnuNBvuhGrELae9FL61aJeVvXw0iP0j1XpNvOaYhfMMvq9fY
1q4fVN92D1HQN6FsfLNl/YCvdF+sT4qxnQ==
-----END EC PRIVATE KEY-----
15 changes: 15 additions & 0 deletions helper/tlsutil/testdata/global-client.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-----BEGIN CERTIFICATE-----
MIICSTCCAe+gAwIBAgIUZ+VBej1K6fCm2QSvnyRCIBw1e1cwCgYIKoZIzj0EAwIw
XzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNp
c2NvMRMwEQYDVQQLEwpOb21hZCBEZW1vMRYwFAYDVQQDEw1leGFtcGxlLm5vbWFk
MB4XDTE4MDkwNTIzNTQwMFoXDTI4MDkwMjIzNTQwMFowRzELMAkGA1UEBhMCVVMx
CzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRMwEQYDVQQLEwpO
b21hZCBEZW1vMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEvei5KnuNBvuhGrEL
ae9FL61aJeVvXw0iP0j1XpNvOaYhfMMvq9fY1q4fVN92D1HQN6FsfLNl/YCvdF+s
T4qxnaOBoDCBnTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEG
CCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwHQYDVR0OBBYEFBnFzsZ4hOacg/zVkrVT
ChnNTWKTMB8GA1UdIwQYMBaAFCEqBD2o3StC6qePPy6WaDknOPh2MB4GA1UdEQQX
MBWCE2NsaWVudC5nbG9iYWwubm9tYWQwCgYIKoZIzj0EAwIDSAAwRQIhAMjzKDvs
QPw2OX2GXVUABt7czuaP6ZvJhHXkedRkSoNYAiAuYaS0VxaCdSxSXX96FR03Lcaa
FbRG9S396qK/HSlhcA==
-----END CERTIFICATE-----
5 changes: 5 additions & 0 deletions helper/tlsutil/testdata/global-server-key.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEINcyDkLfcVur3bsEvdesW2oUbRMFAyVWyvxAYsNVeSNgoAoGCCqGSM49
AwEHoUQDQgAENcwnm0Z/yFL/hb0xUXu4E7fKebTnt/AWQPyeJtDBGa9NAqw8yCOH
XP8GGSomLgGAvrUj/ZOMgenFNSsUhEJKSA==
-----END EC PRIVATE KEY-----
15 changes: 15 additions & 0 deletions helper/tlsutil/testdata/global-server.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-----BEGIN CERTIFICATE-----
MIICSjCCAe+gAwIBAgIUN/zxE9m1ROiJGALka29tm1ThVDUwCgYIKoZIzj0EAwIw
XzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNp
c2NvMRMwEQYDVQQLEwpOb21hZCBEZW1vMRYwFAYDVQQDEw1leGFtcGxlLm5vbWFk
MB4XDTE4MDkwNTIzNTQwMFoXDTI4MDkwMjIzNTQwMFowRzELMAkGA1UEBhMCVVMx
CzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRMwEQYDVQQLEwpO
b21hZCBEZW1vMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAENcwnm0Z/yFL/hb0x
UXu4E7fKebTnt/AWQPyeJtDBGa9NAqw8yCOHXP8GGSomLgGAvrUj/ZOMgenFNSsU
hEJKSKOBoDCBnTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEG
CCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwHQYDVR0OBBYEFHAAhBdKRVqlgjVWEa5V
vyrSwl13MB8GA1UdIwQYMBaAFCEqBD2o3StC6qePPy6WaDknOPh2MB4GA1UdEQQX
MBWCE3NlcnZlci5nbG9iYWwubm9tYWQwCgYIKoZIzj0EAwIDSQAwRgIhAOsmkXXS
mIVm+zEki3IapO+yD9Te6YA6jmmCszEiWYPbAiEA5irkdcc/27jL3i+Woc38kCxa
Den1x+p62mD/LV+76oI=
-----END CERTIFICATE-----
47 changes: 47 additions & 0 deletions testutil/wait.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package testutil

import (
"fmt"
"os"
"time"

Expand Down Expand Up @@ -74,6 +75,7 @@ func IsTravis() bool {

type rpcFn func(string, interface{}, interface{}) error

// WaitForLeader blocks until a leader is elected.
func WaitForLeader(t testing.T, rpc rpcFn) {
WaitForResult(func() (bool, error) {
args := &structs.GenericRequest{}
Expand All @@ -84,3 +86,48 @@ func WaitForLeader(t testing.T, rpc rpcFn) {
t.Fatalf("failed to find leader: %v", err)
})
}

// WaitForRunning runs a job and blocks until it is running.
func WaitForRunning(t testing.T, rpc rpcFn, job *structs.Job) {
registered := false
WaitForResult(func() (bool, error) {
if !registered {
args := &structs.JobRegisterRequest{}
args.Job = job
args.WriteRequest.Region = "global"
var jobResp structs.JobRegisterResponse
err := rpc("Job.Register", args, &jobResp)
if err != nil {
return false, fmt.Errorf("Job.Register error: %v", err)
}

// Only register once
registered = true
}

args := &structs.JobSummaryRequest{}
args.JobID = job.ID
args.QueryOptions.Region = "global"
var resp structs.JobSummaryResponse
err := rpc("Job.Summary", args, &resp)
if err != nil {
return false, fmt.Errorf("Job.Summary error: %v", err)
}

tgs := len(job.TaskGroups)
summaries := len(resp.JobSummary.Summary)
if tgs != summaries {
return false, fmt.Errorf("task_groups=%d summaries=%d", tgs, summaries)
}

for tg, summary := range resp.JobSummary.Summary {
if summary.Running == 0 {
return false, fmt.Errorf("task_group=%s %#v", tg, resp.JobSummary.Summary)
}
}

return true, nil
}, func(err error) {
t.Fatalf("job not running: %v", err)
})
}

0 comments on commit 6bd5852

Please sign in to comment.