Skip to content

Commit

Permalink
Add disable_file parameter to job's vault stanza (#13343)
Browse files Browse the repository at this point in the history
This complements the `env` parameter, so that the operator can author
tasks that don't share their Vault token with the workload when using 
`image` filesystem isolation. As a result, more powerful tokens can be used 
in a job definition, allowing it to use template stanzas to issue all kinds of 
secrets (database secrets, Vault tokens with very specific policies, etc.), 
without sharing that issuing power with the task itself.

This is accomplished by creating a directory called `private` within
the task's working directory, which shares many properties of
the `secrets` directory (tmpfs where possible, not accessible by
`nomad alloc fs` or Nomad's web UI), but isn't mounted into/bound to the
container.

If the `disable_file` parameter is set to `false` (its default), the Vault token
is also written to the NOMAD_SECRETS_DIR, so the default behavior is
backwards compatible. Even if the operator never changes the default,
they will still benefit from the improved behavior of Nomad never reading
the token back in from that - potentially altered - location.
  • Loading branch information
grembo authored Jun 23, 2023
1 parent ed51605 commit 6f04b91
Show file tree
Hide file tree
Showing 23 changed files with 385 additions and 45 deletions.
3 changes: 3 additions & 0 deletions .changelog/13343.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
vault: Add new configuration `disable_file` to prevent access to the Vault token by tasks that use `image` filesystem isolation
```
4 changes: 4 additions & 0 deletions api/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,7 @@ type Vault struct {
Policies []string `hcl:"policies,optional"`
Namespace *string `mapstructure:"namespace" hcl:"namespace,optional"`
Env *bool `hcl:"env,optional"`
DisableFile *bool `mapstructure:"disable_file" hcl:"disable_file,optional"`
ChangeMode *string `mapstructure:"change_mode" hcl:"change_mode,optional"`
ChangeSignal *string `mapstructure:"change_signal" hcl:"change_signal,optional"`
}
Expand All @@ -927,6 +928,9 @@ func (v *Vault) Canonicalize() {
if v.Env == nil {
v.Env = pointerOf(true)
}
if v.DisableFile == nil {
v.DisableFile = pointerOf(false)
}
if v.Namespace == nil {
v.Namespace = pointerOf("")
}
Expand Down
1 change: 1 addition & 0 deletions api/tasks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ func TestTask_Canonicalize_Vault(t *testing.T) {
input: &Vault{},
expected: &Vault{
Env: pointerOf(true),
DisableFile: pointerOf(false),
Namespace: pointerOf(""),
ChangeMode: pointerOf("restart"),
ChangeSignal: pointerOf("SIGHUP"),
Expand Down
15 changes: 15 additions & 0 deletions client/allocdir/alloc_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ var (
// directory
TaskSecrets = "secrets"

// TaskPrivate is the name of the private directory inside each task
// directory
TaskPrivate = "private"

// TaskDirs is the set of directories created in each tasks directory.
TaskDirs = map[string]os.FileMode{TmpDirName: os.ModeSticky | 0777}

Expand Down Expand Up @@ -306,6 +310,13 @@ func (d *AllocDir) UnmountAll() error {
}
}

if pathExists(dir.PrivateDir) {
if err := removeSecretDir(dir.PrivateDir); err != nil {
mErr.Errors = append(mErr.Errors,
fmt.Errorf("failed to remove the private dir %q: %v", dir.PrivateDir, err))
}
}

// Unmount dev/ and proc/ have been mounted.
if err := dir.unmountSpecialDirs(); err != nil {
mErr.Errors = append(mErr.Errors, err)
Expand Down Expand Up @@ -447,6 +458,10 @@ func (d *AllocDir) ReadAt(path string, offset int64) (io.ReadCloser, error) {
d.mu.RUnlock()
return nil, fmt.Errorf("Reading secret file prohibited: %s", path)
}
if filepath.HasPrefix(p, dir.PrivateDir) {
d.mu.RUnlock()
return nil, fmt.Errorf("Reading private file prohibited: %s", path)
}
}
d.mu.RUnlock()

Expand Down
14 changes: 14 additions & 0 deletions client/allocdir/task_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ type TaskDir struct {
// <task_dir>/secrets/
SecretsDir string

// PrivateDir is the path to private/ directory on the host
// <task_dir>/private/
PrivateDir string

// skip embedding these paths in chroots. Used for avoiding embedding
// client.alloc_dir recursively.
skip map[string]struct{}
Expand Down Expand Up @@ -68,6 +72,7 @@ func newTaskDir(logger hclog.Logger, clientAllocDir, allocDir, taskName string)
SharedTaskDir: filepath.Join(taskDir, SharedAllocName),
LocalDir: filepath.Join(taskDir, TaskLocal),
SecretsDir: filepath.Join(taskDir, TaskSecrets),
PrivateDir: filepath.Join(taskDir, TaskPrivate),
skip: skip,
logger: logger,
}
Expand Down Expand Up @@ -130,6 +135,15 @@ func (t *TaskDir) Build(createChroot bool, chroot map[string]string) error {
return err
}

// Create the private directory
if err := createSecretDir(t.PrivateDir); err != nil {
return err
}

if err := dropDirPermissions(t.PrivateDir, os.ModePerm); err != nil {
return err
}

// Build chroot if chroot filesystem isolation is going to be used
if createChroot {
if err := t.buildChroot(chroot); err != nil {
Expand Down
85 changes: 85 additions & 0 deletions client/allocrunner/taskrunner/task_runner_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package taskrunner

import (
"context"
"os"
"path/filepath"
"syscall"
"testing"
"time"

"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/client/vaultclient"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
)

func TestTaskRunner_DisableFileForVaultToken_UpgradePath(t *testing.T) {
ci.Parallel(t)
ci.SkipTestWithoutRootAccess(t)

// Create test allocation with a Vault block.
alloc := mock.BatchAlloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
task.Config = map[string]any{
"run_for": "0s",
}
task.Vault = &structs.Vault{
Policies: []string{"default"},
}

conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
defer cleanup()

// Remove private dir and write the Vault token to the secrets dir to
// simulate an old task.
err := conf.TaskDir.Build(false, nil)
must.NoError(t, err)

err = syscall.Unmount(conf.TaskDir.PrivateDir, 0)
must.NoError(t, err)
err = os.Remove(conf.TaskDir.PrivateDir)
must.NoError(t, err)

token := "1234"
tokenPath := filepath.Join(conf.TaskDir.SecretsDir, vaultTokenFile)
err = os.WriteFile(tokenPath, []byte(token), 0666)
must.NoError(t, err)

// Setup a test Vault client.
handler := func(*structs.Allocation, []string) (map[string]string, error) {
return map[string]string{task.Name: token}, nil
}
vaultClient := conf.Vault.(*vaultclient.MockVaultClient)
vaultClient.DeriveTokenFn = handler

// Start task runner and wait for task to finish.
tr, err := NewTaskRunner(conf)
must.NoError(t, err)
defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
go tr.Run()
time.Sleep(500 * time.Millisecond)

testWaitForTaskToDie(t, tr)

// Verify task exited successfully.
finalState := tr.TaskState()
must.Eq(t, structs.TaskStateDead, finalState.State)
must.False(t, finalState.Failed)

// Verfiry token is in secrets dir.
tokenPath = filepath.Join(conf.TaskDir.SecretsDir, vaultTokenFile)
data, err := os.ReadFile(tokenPath)
must.NoError(t, err)
must.Eq(t, token, string(data))

// Varify token is not in private dir since the allocation doesn't have
// this path.
tokenPath = filepath.Join(conf.TaskDir.PrivateDir, vaultTokenFile)
_, err = os.Stat(tokenPath)
must.ErrorIs(t, err, os.ErrNotExist)
}
60 changes: 58 additions & 2 deletions client/allocrunner/taskrunner/task_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1644,11 +1644,16 @@ func TestTaskRunner_BlockForVaultToken(t *testing.T) {
require.False(t, finalState.Failed)

// Check that the token is on disk
tokenPath := filepath.Join(conf.TaskDir.SecretsDir, vaultTokenFile)
tokenPath := filepath.Join(conf.TaskDir.PrivateDir, vaultTokenFile)
data, err := os.ReadFile(tokenPath)
require.NoError(t, err)
require.Equal(t, token, string(data))

tokenPath = filepath.Join(conf.TaskDir.SecretsDir, vaultTokenFile)
data, err = os.ReadFile(tokenPath)
require.NoError(t, err)
require.Equal(t, token, string(data))

// Kill task runner to trigger stop hooks
tr.Kill(context.Background(), structs.NewTaskEvent("kill"))
select {
Expand All @@ -1672,6 +1677,57 @@ func TestTaskRunner_BlockForVaultToken(t *testing.T) {
})
}

func TestTaskRunner_DisableFileForVaultToken(t *testing.T) {
ci.Parallel(t)

// Create test allocation with a Vault block disabling the token file in
// the secrets dir.
alloc := mock.BatchAlloc()
task := alloc.Job.TaskGroups[0].Tasks[0]
task.Config = map[string]any{
"run_for": "0s",
}
task.Vault = &structs.Vault{
Policies: []string{"default"},
DisableFile: true,
}

conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name)
defer cleanup()

// Setup a test Vault client
token := "1234"
handler := func(*structs.Allocation, []string) (map[string]string, error) {
return map[string]string{task.Name: token}, nil
}
vaultClient := conf.Vault.(*vaultclient.MockVaultClient)
vaultClient.DeriveTokenFn = handler

// Start task runner and wait for it to complete.
tr, err := NewTaskRunner(conf)
must.NoError(t, err)
defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup"))
go tr.Run()

testWaitForTaskToDie(t, tr)

// Verify task exited successfully.
finalState := tr.TaskState()
must.Eq(t, structs.TaskStateDead, finalState.State)
must.False(t, finalState.Failed)

// Verify token is in the private dir.
tokenPath := filepath.Join(conf.TaskDir.PrivateDir, vaultTokenFile)
data, err := os.ReadFile(tokenPath)
must.NoError(t, err)
must.Eq(t, token, string(data))

// Verify token is not in secrets dir.
tokenPath = filepath.Join(conf.TaskDir.SecretsDir, vaultTokenFile)
_, err = os.Stat(tokenPath)
must.ErrorIs(t, err, os.ErrNotExist)
}

// TestTaskRunner_DeriveToken_Retry asserts that if a recoverable error is
// returned when deriving a vault token a task will continue to block while
// it's retried.
Expand Down Expand Up @@ -1721,7 +1777,7 @@ func TestTaskRunner_DeriveToken_Retry(t *testing.T) {
require.Equal(t, 1, count)

// Check that the token is on disk
tokenPath := filepath.Join(conf.TaskDir.SecretsDir, vaultTokenFile)
tokenPath := filepath.Join(conf.TaskDir.PrivateDir, vaultTokenFile)
data, err := os.ReadFile(tokenPath)
require.NoError(t, err)
require.Equal(t, token, string(data))
Expand Down
55 changes: 42 additions & 13 deletions client/allocrunner/taskrunner/vault_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"context"
"fmt"
"os"
"path"
"path/filepath"
"sync"
"time"
Expand Down Expand Up @@ -80,8 +81,13 @@ type vaultHook struct {
ctx context.Context
cancel context.CancelFunc

// tokenPath is the path in which to read and write the token
tokenPath string
// privateDirTokenPath is the path inside the task's private directory where
// the Vault token is read and written.
privateDirTokenPath string

// secretsDirTokenPath is the path inside the task's secret directory where the
// Vault token is written unless disabled by the task.
secretsDirTokenPath string

// alloc is the allocation
alloc *structs.Allocation
Expand Down Expand Up @@ -131,17 +137,24 @@ func (h *vaultHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRe
// Try to recover a token if it was previously written in the secrets
// directory
recoveredToken := ""
h.tokenPath = filepath.Join(req.TaskDir.SecretsDir, vaultTokenFile)
data, err := os.ReadFile(h.tokenPath)
if err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("failed to recover vault token: %v", err)
}
h.privateDirTokenPath = filepath.Join(req.TaskDir.PrivateDir, vaultTokenFile)
h.secretsDirTokenPath = filepath.Join(req.TaskDir.SecretsDir, vaultTokenFile)

// Handle upgrade path by searching for the previous token in all possible
// paths where the token may be.
for _, path := range []string{h.privateDirTokenPath, h.secretsDirTokenPath} {
data, err := os.ReadFile(path)
if err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("failed to recover vault token from %s: %v", path, err)
}

// Token file doesn't exist
} else {
// Store the recovered token
recoveredToken = string(data)
// Token file doesn't exist in this path.
} else {
// Store the recovered token
recoveredToken = string(data)
break
}
}

// Launch the token manager
Expand Down Expand Up @@ -345,9 +358,25 @@ func (h *vaultHook) deriveVaultToken() (token string, exit bool) {

// writeToken writes the given token to disk
func (h *vaultHook) writeToken(token string) error {
if err := os.WriteFile(h.tokenPath, []byte(token), 0666); err != nil {
// Handle upgrade path by first checking if the tasks private directory
// exists. If it doesn't, this allocation probably existed before the
// private directory was introduced, so keep using the secret directory to
// prevent unnecessary errors during task recovery.
if _, err := os.Stat(path.Dir(h.privateDirTokenPath)); os.IsNotExist(err) {
if err := os.WriteFile(h.secretsDirTokenPath, []byte(token), 0666); err != nil {
return fmt.Errorf("failed to write vault token to secrets dir: %v", err)
}
return nil
}

if err := os.WriteFile(h.privateDirTokenPath, []byte(token), 0600); err != nil {
return fmt.Errorf("failed to write vault token: %v", err)
}
if !h.vaultBlock.DisableFile {
if err := os.WriteFile(h.secretsDirTokenPath, []byte(token), 0666); err != nil {
return fmt.Errorf("failed to write vault token to secrets dir: %v", err)
}
}

return nil
}
Expand Down
1 change: 1 addition & 0 deletions command/agent/job_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,7 @@ func ApiTaskToStructsTask(job *structs.Job, group *structs.TaskGroup,
Policies: apiTask.Vault.Policies,
Namespace: *apiTask.Vault.Namespace,
Env: *apiTask.Vault.Env,
DisableFile: *apiTask.Vault.DisableFile,
ChangeMode: *apiTask.Vault.ChangeMode,
ChangeSignal: *apiTask.Vault.ChangeSignal,
}
Expand Down
2 changes: 2 additions & 0 deletions command/agent/job_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2785,6 +2785,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
Namespace: pointer.Of("ns1"),
Policies: []string{"a", "b", "c"},
Env: pointer.Of(true),
DisableFile: pointer.Of(false),
ChangeMode: pointer.Of("c"),
ChangeSignal: pointer.Of("sighup"),
},
Expand Down Expand Up @@ -3204,6 +3205,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
Namespace: "ns1",
Policies: []string{"a", "b", "c"},
Env: true,
DisableFile: false,
ChangeMode: "c",
ChangeSignal: "sighup",
},
Expand Down
1 change: 1 addition & 0 deletions drivers/shared/executor/executor_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ etc/
lib/
lib64/
local/
private/
proc/
secrets/
sys/
Expand Down
1 change: 1 addition & 0 deletions jobspec/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ func parseVault(result *api.Vault, list *ast.ObjectList) error {
"namespace",
"policies",
"env",
"disable_file",
"change_mode",
"change_signal",
}
Expand Down
Loading

0 comments on commit 6f04b91

Please sign in to comment.