Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set hostname when migrating ephemeral disks over TLS #4648

Merged
merged 3 commits into from
Sep 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ IMPROVEMENTS:
BUG FIXES:
* core: Reset queued allocation summary to zero when job stopped [[GH-4414](https://github.com/hashicorp/nomad/issues/4414)]
* core: Fix panic due to missing synchronization in delayed evaluations heap [[GH-4632](https://github.com/hashicorp/nomad/issues/4632)]
* client: Fix migrating ephemeral disks when TLS is enabled [[GH-4648](https://github.com/hashicorp/nomad/issues/4648)]
* driver/docker: Fix kill timeout not being respected when timeout is over five
minutes [[GH-4599](https://github.com/hashicorp/nomad/issues/4599)]
* scheduler: Fix nil pointer dereference [[GH-4463](https://github.com/hashicorp/nomad/issues/4463)]
Expand Down
142 changes: 142 additions & 0 deletions client/alloc_watcher_e2e_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package client_test

import (
"bytes"
"fmt"
"io/ioutil"
"path/filepath"
"testing"

"github.com/hashicorp/nomad/command/agent"
"github.com/hashicorp/nomad/nomad"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/require"
)

// TestPrevAlloc_StreamAllocDir_TLS asserts ephemeral disk migrations still
// work when TLS is enabled.
func TestPrevAlloc_StreamAllocDir_TLS(t *testing.T) {
const (
caFn = "../helper/tlsutil/testdata/global-ca.pem"
serverCertFn = "../helper/tlsutil/testdata/global-server.pem"
serverKeyFn = "../helper/tlsutil/testdata/global-server-key.pem"
clientCertFn = "../helper/tlsutil/testdata/global-client.pem"
clientKeyFn = "../helper/tlsutil/testdata/global-client-key.pem"
)
t.Parallel()
require := require.New(t)

server := nomad.TestServer(t, func(c *nomad.Config) {
c.TLSConfig = &config.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: caFn,
CertFile: serverCertFn,
KeyFile: serverKeyFn,
}
})
defer server.Shutdown()
testutil.WaitForLeader(t, server.RPC)

t.Logf("[TEST] Leader started: %s", server.GetConfig().RPCAddr.String())

agentConfFunc := func(c *agent.Config) {
c.Region = "global"
c.TLSConfig = &config.TLSConfig{
EnableHTTP: true,
EnableRPC: true,
VerifyServerHostname: true,
CAFile: caFn,
CertFile: clientCertFn,
KeyFile: clientKeyFn,
}
c.Client.Enabled = true
c.Client.Servers = []string{server.GetConfig().RPCAddr.String()}
}
client1 := agent.NewTestAgent(t, "client1", agentConfFunc)
defer client1.Shutdown()

client2 := agent.NewTestAgent(t, "client2", agentConfFunc)
defer client2.Shutdown()

job := mock.Job()
job.Constraints[0].LTarget = "${node.unique.name}"
job.Constraints[0].RTarget = "client1"
job.TaskGroups[0].Count = 1
job.TaskGroups[0].EphemeralDisk.Sticky = true
job.TaskGroups[0].EphemeralDisk.Migrate = true
job.TaskGroups[0].Tasks[0] = &structs.Task{
Name: "migrate_tls",
Driver: "mock_driver",
Config: map[string]interface{}{
"run_for": "1m",
},
LogConfig: structs.DefaultLogConfig(),
Resources: &structs.Resources{
CPU: 50,
MemoryMB: 25,
},
}
testutil.WaitForRunning(t, server.RPC, job.Copy())

allocArgs := &structs.JobSpecificRequest{}
allocArgs.JobID = job.ID
allocArgs.QueryOptions.Region = "global"
var allocReply structs.JobAllocationsResponse
require.NoError(server.RPC("Job.Allocations", allocArgs, &allocReply))
require.Len(allocReply.Allocations, 1)
origAlloc := allocReply.Allocations[0].ID

// Save a file into alloc dir
contents := []byte("123\n456")
allocFn := filepath.Join(client1.DataDir, "alloc", origAlloc, "alloc", "data", "bar")
require.NoError(ioutil.WriteFile(allocFn, contents, 0666))
t.Logf("[TEST] Wrote initial file: %s", allocFn)

// Migrate alloc to other node
job.Constraints[0].RTarget = "client2"
testutil.WaitForRunning(t, server.RPC, job.Copy())

// Wait for new alloc to be running
var newAlloc *structs.AllocListStub
testutil.WaitForResult(func() (bool, error) {
allocArgs := &structs.JobSpecificRequest{}
allocArgs.JobID = job.ID
allocArgs.QueryOptions.Region = "global"
var allocReply structs.JobAllocationsResponse
require.NoError(server.RPC("Job.Allocations", allocArgs, &allocReply))
if n := len(allocReply.Allocations); n != 2 {
return false, fmt.Errorf("expected 2 allocs found %d", n)
}

// Pick the one that didn't exist before
if allocReply.Allocations[0].ID == origAlloc {
newAlloc = allocReply.Allocations[1]
} else {
newAlloc = allocReply.Allocations[0]
}

return newAlloc.ClientStatus != structs.AllocClientStatusRunning,
fmt.Errorf("client status: %v", newAlloc.ClientStatus)
}, func(err error) {
t.Fatalf("new alloc not running: %v", err)
})

// Wait for file to appear on other client
allocFn2 := filepath.Join(client2.DataDir, "alloc", newAlloc.ID, "alloc", "data", "bar")
t.Logf("[TEST] Comparing against file: %s", allocFn2)
testutil.WaitForResult(func() (bool, error) {
found, err := ioutil.ReadFile(allocFn2)
if err != nil {
return false, err
}
return bytes.Equal(contents, found), fmt.Errorf("contents misatch. expected:\n%s\n\nfound:\n%s\n",
contents, found)
}, func(err error) {
t.Fatalf("file didn't migrate: %v", err)
})
}
7 changes: 4 additions & 3 deletions client/allocrunner/alloc_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,9 +427,10 @@ func (p *remotePrevAlloc) migrateAllocDir(ctx context.Context, nodeAddr string)
apiConfig := nomadapi.DefaultConfig()
apiConfig.Address = nodeAddr
apiConfig.TLSConfig = &nomadapi.TLSConfig{
CACert: p.config.TLSConfig.CAFile,
ClientCert: p.config.TLSConfig.CertFile,
ClientKey: p.config.TLSConfig.KeyFile,
CACert: p.config.TLSConfig.CAFile,
ClientCert: p.config.TLSConfig.CertFile,
ClientKey: p.config.TLSConfig.KeyFile,
TLSServerName: fmt.Sprintf("client.%s.nomad", p.config.Region),
}
apiClient, err := nomadapi.NewClient(apiConfig)
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -666,13 +666,17 @@ func (c *Client) setServersImpl(in []string, force bool) (int, error) {
addr, err := resolveServer(srv)
if err != nil {
c.logger.Printf("[DEBUG] client: ignoring server %s due to resolution error: %v", srv, err)
mu.Lock()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good drive by fix.

merr.Errors = append(merr.Errors, err)
mu.Unlock()
return
}

// Try to ping to check if it is a real server
if err := c.Ping(addr); err != nil {
mu.Lock()
merr.Errors = append(merr.Errors, fmt.Errorf("Server at address %s failed ping: %v", addr, err))
mu.Unlock()

// If we are forcing the setting of the servers, inject it to
// the serverlist even if we can't ping immediately.
Expand Down
1 change: 1 addition & 0 deletions helper/tlsutil/testdata/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Using [cfssl 1.2.0](https://github.com/cloudflare/cfssl)
| `ca-key-bad.pem` | CA key for bad region |
| `nomad-bad.pem` | Nomad cert for bad region |
| `nomad-bad-key.pem` | Nomad key for bad region |
| `global-*.pem` | For global region |

## Generating self-signed certs
```sh
Expand Down
13 changes: 13 additions & 0 deletions helper/tlsutil/testdata/global-ca.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-----BEGIN CERTIFICATE-----
MIICATCCAaigAwIBAgIUdyw+oCYCUUrIQ68hGVJVRRCxnjMwCgYIKoZIzj0EAwIw
XzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNp
c2NvMRMwEQYDVQQLEwpOb21hZCBEZW1vMRYwFAYDVQQDEw1leGFtcGxlLm5vbWFk
MB4XDTE4MDkwNTIzNTQwMFoXDTIzMDkwNDIzNTQwMFowXzELMAkGA1UEBhMCVVMx
CzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRMwEQYDVQQLEwpO
b21hZCBEZW1vMRYwFAYDVQQDEw1leGFtcGxlLm5vbWFkMFkwEwYHKoZIzj0CAQYI
KoZIzj0DAQcDQgAE6kWmOEIfGJZSh2VHYHuCli+W+dXJOoPN7F01k+bqLcxxuYaS
6ZOT3+J1t7s3zCoF61/m4ITLm/i1GFGcnfzQg6NCMEAwDgYDVR0PAQH/BAQDAgEG
MA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFCEqBD2o3StC6qePPy6WaDknOPh2
MAoGCCqGSM49BAMCA0cAMEQCIFab4iZ4Of3lBztV8PMzorBCBiUDDaqVswACVMhI
xqltAiA/O7LcVvvVYmtcF27NSQLPhh1ibtRjKnTZviBGzwkV3w==
-----END CERTIFICATE-----
5 changes: 5 additions & 0 deletions helper/tlsutil/testdata/global-client-key.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEICWrWIE3q8UlYau6xKhLz43CO9wg36fxG4Qcy+kBItdeoAoGCCqGSM49
AwEHoUQDQgAEvei5KnuNBvuhGrELae9FL61aJeVvXw0iP0j1XpNvOaYhfMMvq9fY
1q4fVN92D1HQN6FsfLNl/YCvdF+sT4qxnQ==
-----END EC PRIVATE KEY-----
15 changes: 15 additions & 0 deletions helper/tlsutil/testdata/global-client.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-----BEGIN CERTIFICATE-----
MIICSTCCAe+gAwIBAgIUZ+VBej1K6fCm2QSvnyRCIBw1e1cwCgYIKoZIzj0EAwIw
XzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNp
c2NvMRMwEQYDVQQLEwpOb21hZCBEZW1vMRYwFAYDVQQDEw1leGFtcGxlLm5vbWFk
MB4XDTE4MDkwNTIzNTQwMFoXDTI4MDkwMjIzNTQwMFowRzELMAkGA1UEBhMCVVMx
CzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRMwEQYDVQQLEwpO
b21hZCBEZW1vMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEvei5KnuNBvuhGrEL
ae9FL61aJeVvXw0iP0j1XpNvOaYhfMMvq9fY1q4fVN92D1HQN6FsfLNl/YCvdF+s
T4qxnaOBoDCBnTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEG
CCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwHQYDVR0OBBYEFBnFzsZ4hOacg/zVkrVT
ChnNTWKTMB8GA1UdIwQYMBaAFCEqBD2o3StC6qePPy6WaDknOPh2MB4GA1UdEQQX
MBWCE2NsaWVudC5nbG9iYWwubm9tYWQwCgYIKoZIzj0EAwIDSAAwRQIhAMjzKDvs
QPw2OX2GXVUABt7czuaP6ZvJhHXkedRkSoNYAiAuYaS0VxaCdSxSXX96FR03Lcaa
FbRG9S396qK/HSlhcA==
-----END CERTIFICATE-----
5 changes: 5 additions & 0 deletions helper/tlsutil/testdata/global-server-key.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEINcyDkLfcVur3bsEvdesW2oUbRMFAyVWyvxAYsNVeSNgoAoGCCqGSM49
AwEHoUQDQgAENcwnm0Z/yFL/hb0xUXu4E7fKebTnt/AWQPyeJtDBGa9NAqw8yCOH
XP8GGSomLgGAvrUj/ZOMgenFNSsUhEJKSA==
-----END EC PRIVATE KEY-----
15 changes: 15 additions & 0 deletions helper/tlsutil/testdata/global-server.pem
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-----BEGIN CERTIFICATE-----
MIICSjCCAe+gAwIBAgIUN/zxE9m1ROiJGALka29tm1ThVDUwCgYIKoZIzj0EAwIw
XzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNp
c2NvMRMwEQYDVQQLEwpOb21hZCBEZW1vMRYwFAYDVQQDEw1leGFtcGxlLm5vbWFk
MB4XDTE4MDkwNTIzNTQwMFoXDTI4MDkwMjIzNTQwMFowRzELMAkGA1UEBhMCVVMx
CzAJBgNVBAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRMwEQYDVQQLEwpO
b21hZCBEZW1vMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAENcwnm0Z/yFL/hb0x
UXu4E7fKebTnt/AWQPyeJtDBGa9NAqw8yCOHXP8GGSomLgGAvrUj/ZOMgenFNSsU
hEJKSKOBoDCBnTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEG
CCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwHQYDVR0OBBYEFHAAhBdKRVqlgjVWEa5V
vyrSwl13MB8GA1UdIwQYMBaAFCEqBD2o3StC6qePPy6WaDknOPh2MB4GA1UdEQQX
MBWCE3NlcnZlci5nbG9iYWwubm9tYWQwCgYIKoZIzj0EAwIDSQAwRgIhAOsmkXXS
mIVm+zEki3IapO+yD9Te6YA6jmmCszEiWYPbAiEA5irkdcc/27jL3i+Woc38kCxa
Den1x+p62mD/LV+76oI=
-----END CERTIFICATE-----
47 changes: 47 additions & 0 deletions testutil/wait.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package testutil

import (
"fmt"
"os"
"time"

Expand Down Expand Up @@ -74,6 +75,7 @@ func IsTravis() bool {

type rpcFn func(string, interface{}, interface{}) error

// WaitForLeader blocks until a leader is elected.
func WaitForLeader(t testing.T, rpc rpcFn) {
WaitForResult(func() (bool, error) {
args := &structs.GenericRequest{}
Expand All @@ -84,3 +86,48 @@ func WaitForLeader(t testing.T, rpc rpcFn) {
t.Fatalf("failed to find leader: %v", err)
})
}

// WaitForRunning runs a job and blocks until it is running.
func WaitForRunning(t testing.T, rpc rpcFn, job *structs.Job) {
registered := false
WaitForResult(func() (bool, error) {
if !registered {
args := &structs.JobRegisterRequest{}
args.Job = job
args.WriteRequest.Region = "global"
var jobResp structs.JobRegisterResponse
err := rpc("Job.Register", args, &jobResp)
if err != nil {
return false, fmt.Errorf("Job.Register error: %v", err)
}

// Only register once
registered = true
}

args := &structs.JobSummaryRequest{}
args.JobID = job.ID
args.QueryOptions.Region = "global"
var resp structs.JobSummaryResponse
err := rpc("Job.Summary", args, &resp)
if err != nil {
return false, fmt.Errorf("Job.Summary error: %v", err)
}

tgs := len(job.TaskGroups)
summaries := len(resp.JobSummary.Summary)
if tgs != summaries {
return false, fmt.Errorf("task_groups=%d summaries=%d", tgs, summaries)
}

for tg, summary := range resp.JobSummary.Summary {
if summary.Running == 0 {
return false, fmt.Errorf("task_group=%s %#v", tg, resp.JobSummary.Summary)
}
}

return true, nil
}, func(err error) {
t.Fatalf("job not running: %v", err)
})
}