Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

post-release merge for 1.0.3 #9930

Merged
merged 25 commits into from
Jan 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
664b8e0
changelog for release 1.0.2
drewbailey Jan 12, 2021
4c1d4fc
Generate files for 1.0.2 release
hashicorp-nomad Jan 13, 2021
fff533a
Release v1.0.2
hashicorp-nomad Jan 13, 2021
6b73e85
ci: only read/modify `GO_TAGS` field (#9815)
Jan 14, 2021
677353a
Add PID namespacing and e2e test
Jan 25, 2021
75cea8a
Always check that resource constraints were applied
Jan 26, 2021
41681fa
Add unit test for container namespacing
Jan 26, 2021
ea7bab0
Add e2e test for raw exec
Jan 26, 2021
c0f6df7
Add test for alloc exec
Jan 26, 2021
87f80b1
Add a little comment
Jan 26, 2021
109fb53
put exec process in a new IPC namespace
cgbaker Jan 26, 2021
611abc3
modify exec driver test TestExecDriver_DestroyKills all in light of t…
cgbaker Jan 26, 2021
6a067a3
modify exec driver test TestExecDriver_StartWaitStop in light of the …
cgbaker Jan 27, 2021
3eb9cdf
additional e2e utils for multi-task allocs
cgbaker Jan 27, 2021
c990574
e2e: java driver isolation tests
cgbaker Jan 27, 2021
f4fe79a
Backfill unit test for NEWIPC
Jan 27, 2021
bcb78f1
lint some nomad HCL job specs
cgbaker Jan 27, 2021
c191054
attempting to fix flaky tests caused by pid isolation
Jan 27, 2021
2b8d52a
updated changelog for release
cgbaker Jan 28, 2021
08741d9
bump version to 1.0.3
cgbaker Jan 28, 2021
d79b74c
Release v1.0.3
hashicorp-nomad Jan 28, 2021
7f06adf
Merge tag 'v1.0.3' into post-release-1.0.3
cgbaker Jan 29, 2021
460c9a6
version to 1.0.4-dev
cgbaker Jan 29, 2021
352df9c
vesion-specific upgrade guide for 1.0.3 and 0.12.10
cgbaker Jan 29, 2021
d0462cf
remove generated code
cgbaker Jan 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## 1.0.3 (Unreleased)
## 1.0.4 (Unreleased)

FEATURES:
* **Terminating Gateways**: Adds built-in support for running Consul Connect terminating gateways [[GH-9829](https://github.com/hashicorp/nomad/pull/9829)]
Expand All @@ -14,6 +14,11 @@ BUG FIXES:
* scheduler: Fixed a bug where job statuses and summaries where duplicated and miscalculated when registering a job. [[GH-9768](https://github.com/hashicorp/nomad/issues/9768)]
* driver/qemu: Fixed a bug where network namespaces were not supported for QEMU workloads [[GH-9861](https://github.com/hashicorp/nomad/pull/9861)]

## 1.0.3 (January 28, 2021)

SECURITY:
* drivers/exec+java: Modified exec-based drivers to run tasks in private PID/IPC namespaces. CVE-2021-3283 [[GH-9911](https://github.com/hashicorp/nomad/issues/9911)]

## 1.0.2 (January 14, 2021)

IMPROVEMENTS:
Expand Down
188 changes: 94 additions & 94 deletions command/agent/bindata_assetfs.go

Large diffs are not rendered by default.

83 changes: 43 additions & 40 deletions drivers/exec/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"io/ioutil"
"os"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
Expand Down Expand Up @@ -270,9 +269,9 @@ func TestExecDriver_StartWaitRecover(t *testing.T) {
require.NoError(harness.DestroyTask(task.ID, true))
}

// TestExecDriver_DestroyKillsAll asserts that when TaskDestroy is called all
// task processes are cleaned up.
func TestExecDriver_DestroyKillsAll(t *testing.T) {
// TestExecDriver_NoOrphans asserts that when the main
// task dies, the orphans in the PID namespaces are killed by the kernel
func TestExecDriver_NoOrphans(t *testing.T) {
t.Parallel()
require := require.New(t)
ctestutils.ExecCompatible(t)
Expand All @@ -294,50 +293,54 @@ func TestExecDriver_DestroyKillsAll(t *testing.T) {

taskConfig := map[string]interface{}{}
taskConfig["command"] = "/bin/sh"
taskConfig["args"] = []string{"-c", fmt.Sprintf(`sleep 3600 & echo "SLEEP_PID=$!"`)}

// print the child PID in the task PID namespace, then sleep for 5 seconds to give us a chance to examine processes
taskConfig["args"] = []string{"-c", fmt.Sprintf(`sleep 3600 & sleep 20`)}
require.NoError(task.EncodeConcreteDriverConfig(&taskConfig))

handle, _, err := harness.StartTask(task)
require.NoError(err)
defer harness.DestroyTask(task.ID, true)

ch, err := harness.WaitTask(context.Background(), handle.Config.ID)
waitCh, err := harness.WaitTask(context.Background(), handle.Config.ID)
require.NoError(err)

select {
case result := <-ch:
require.True(result.Successful(), "command failed: %#v", result)
case <-time.After(10 * time.Second):
require.Fail("timeout waiting for task to shutdown")
}

sleepPid := 0
require.NoError(harness.WaitUntilStarted(task.ID, 1*time.Second))

// Ensure that the task is marked as dead, but account
// for WaitTask() closing channel before internal state is updated
var childPids []int
taskState := TaskState{}
testutil.WaitForResult(func() (bool, error) {
stdout, err := ioutil.ReadFile(filepath.Join(task.TaskDir().LogDir, "test.stdout.0"))
if err != nil {
return false, fmt.Errorf("failed to output pid file: %v", err)
}

pidMatch := regexp.MustCompile(`SLEEP_PID=(\d+)`).FindStringSubmatch(string(stdout))
if len(pidMatch) != 2 {
return false, fmt.Errorf("failed to find pid in %s", string(stdout))
require.NoError(handle.GetDriverState(&taskState))
if taskState.Pid == 0 {
return false, fmt.Errorf("task PID is zero")
}

pid, err := strconv.Atoi(pidMatch[1])
children, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/task/%d/children", taskState.Pid, taskState.Pid))
if err != nil {
return false, fmt.Errorf("pid parts aren't int: %s", pidMatch[1])
return false, fmt.Errorf("error reading /proc for children: %v", err)
}
pids := strings.Fields(string(children))
if len(pids) < 2 {
return false, fmt.Errorf("error waiting for two children, currently %d", len(pids))
}
for _, cpid := range pids {
p, err := strconv.Atoi(cpid)
if err != nil {
return false, fmt.Errorf("error parsing child pids from /proc: %s", cpid)
}
childPids = append(childPids, p)
}

sleepPid = pid
return true, nil
}, func(err error) {
require.NoError(err)
})

select {
case result := <-waitCh:
require.True(result.Successful(), "command failed: %#v", result)
case <-time.After(30 * time.Second):
require.Fail("timeout waiting for task to shutdown")
}

// isProcessRunning returns an error if process is not running
isProcessRunning := func(pid int) error {
process, err := os.FindProcess(pid)
Expand All @@ -353,20 +356,20 @@ func TestExecDriver_DestroyKillsAll(t *testing.T) {
return nil
}

require.NoError(isProcessRunning(sleepPid))

require.NoError(harness.DestroyTask(task.ID, true))
// task should be dead
require.Error(isProcessRunning(taskState.Pid))

// all children should eventually be killed by OS
testutil.WaitForResult(func() (bool, error) {
err := isProcessRunning(sleepPid)
if err == nil {
return false, fmt.Errorf("child process is still running")
for _, cpid := range childPids {
err := isProcessRunning(cpid)
if err == nil {
return false, fmt.Errorf("child process %d is still running", cpid)
}
if !strings.Contains(err.Error(), "failed to signal process") {
return false, fmt.Errorf("unexpected error: %v", err)
}
}

if !strings.Contains(err.Error(), "failed to signal process") {
return false, fmt.Errorf("unexpected error: %v", err)
}

return true, nil
}, func(err error) {
require.NoError(err)
Expand Down
12 changes: 6 additions & 6 deletions drivers/exec/driver_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ import (
"testing"
"time"

"github.com/stretchr/testify/require"
"golang.org/x/sys/unix"

ctestutils "github.com/hashicorp/nomad/client/testutil"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/plugins/drivers"
dtestutil "github.com/hashicorp/nomad/plugins/drivers/testutils"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/require"
"golang.org/x/sys/unix"
)

func TestExecDriver_StartWaitStop(t *testing.T) {
Expand Down Expand Up @@ -44,6 +45,7 @@ func TestExecDriver_StartWaitStop(t *testing.T) {
defer cleanup()

handle, _, err := harness.StartTask(task)
defer harness.DestroyTask(task.ID, true)
require.NoError(err)

ch, err := harness.WaitTask(context.Background(), handle.Config.ID)
Expand All @@ -52,12 +54,12 @@ func TestExecDriver_StartWaitStop(t *testing.T) {
require.NoError(harness.WaitUntilStarted(task.ID, 1*time.Second))

go func() {
harness.StopTask(task.ID, 2*time.Second, "SIGINT")
harness.StopTask(task.ID, 2*time.Second, "SIGKILL")
}()

select {
case result := <-ch:
require.Equal(int(unix.SIGINT), result.Signal)
require.Equal(int(unix.SIGKILL), result.Signal)
case <-time.After(10 * time.Second):
require.Fail("timeout waiting for task to shutdown")
}
Expand All @@ -77,8 +79,6 @@ func TestExecDriver_StartWaitStop(t *testing.T) {
}, func(err error) {
require.NoError(err)
})

require.NoError(harness.DestroyTask(task.ID, true))
}

func TestExec_ExecTaskStreaming(t *testing.T) {
Expand Down
2 changes: 2 additions & 0 deletions drivers/shared/executor/executor_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,8 @@ func configureIsolation(cfg *lconfigs.Config, command *ExecCommand) error {
// launch with mount namespace
cfg.Namespaces = lconfigs.Namespaces{
{Type: lconfigs.NEWNS},
{Type: lconfigs.NEWPID},
{Type: lconfigs.NEWIPC},
}

if command.NetworkIsolation != nil {
Expand Down
55 changes: 33 additions & 22 deletions drivers/shared/executor/executor_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,35 +107,46 @@ func TestExecutor_IsolationAndConstraints(t *testing.T) {
require.NoError(err)
require.NotZero(ps.Pid)

state, err := executor.Wait(context.Background())
estate, err := executor.Wait(context.Background())
require.NoError(err)
require.Zero(state.ExitCode)
require.Zero(estate.ExitCode)

lexec, ok := executor.(*LibcontainerExecutor)
require.True(ok)

// Check if the resource constraints were applied
if lexec, ok := executor.(*LibcontainerExecutor); ok {
state, err := lexec.container.State()
require.NoError(err)
state, err := lexec.container.State()
require.NoError(err)

memLimits := filepath.Join(state.CgroupPaths["memory"], "memory.limit_in_bytes")
data, err := ioutil.ReadFile(memLimits)
require.NoError(err)
memLimits := filepath.Join(state.CgroupPaths["memory"], "memory.limit_in_bytes")
data, err := ioutil.ReadFile(memLimits)
require.NoError(err)

expectedMemLim := strconv.Itoa(int(execCmd.Resources.NomadResources.Memory.MemoryMB * 1024 * 1024))
actualMemLim := strings.TrimSpace(string(data))
require.Equal(actualMemLim, expectedMemLim)
require.NoError(executor.Shutdown("", 0))
executor.Wait(context.Background())
expectedMemLim := strconv.Itoa(int(execCmd.Resources.NomadResources.Memory.MemoryMB * 1024 * 1024))
actualMemLim := strings.TrimSpace(string(data))
require.Equal(actualMemLim, expectedMemLim)

// Check if Nomad has actually removed the cgroups
tu.WaitForResult(func() (bool, error) {
_, err = os.Stat(memLimits)
if err == nil {
return false, fmt.Errorf("expected an error from os.Stat %s", memLimits)
}
return true, nil
}, func(err error) { t.Error(err) })
// Check that namespaces were applied to the container config
config := lexec.container.Config()
require.NoError(err)

require.Contains(config.Namespaces, lconfigs.Namespace{Type: lconfigs.NEWNS})
require.Contains(config.Namespaces, lconfigs.Namespace{Type: lconfigs.NEWPID})
require.Contains(config.Namespaces, lconfigs.Namespace{Type: lconfigs.NEWIPC})

// Shut down executor
require.NoError(executor.Shutdown("", 0))
executor.Wait(context.Background())

// Check if Nomad has actually removed the cgroups
tu.WaitForResult(func() (bool, error) {
_, err = os.Stat(memLimits)
if err == nil {
return false, fmt.Errorf("expected an error from os.Stat %s", memLimits)
}
return true, nil
}, func(err error) { t.Error(err) })

}
expected := `/:
alloc/
bin/
Expand Down
1 change: 1 addition & 0 deletions e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
_ "github.com/hashicorp/nomad/e2e/deployment"
_ "github.com/hashicorp/nomad/e2e/events"
_ "github.com/hashicorp/nomad/e2e/example"
_ "github.com/hashicorp/nomad/e2e/isolation"
_ "github.com/hashicorp/nomad/e2e/lifecycle"
_ "github.com/hashicorp/nomad/e2e/metrics"
_ "github.com/hashicorp/nomad/e2e/namespaces"
Expand Down
9 changes: 9 additions & 0 deletions e2e/e2eutil/allocs.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,15 @@ func AllocLogs(allocID string, logStream LogStream) (string, error) {
return Command(cmd[0], cmd[1:]...)
}

func AllocTaskLogs(allocID, task string, logStream LogStream) (string, error) {
cmd := []string{"nomad", "alloc", "logs"}
if logStream == LogsStdErr {
cmd = append(cmd, "-stderr")
}
cmd = append(cmd, allocID, task)
return Command(cmd[0], cmd[1:]...)
}

// AllocExec is a convenience wrapper that runs 'nomad alloc exec' with the
// passed execCmd via '/bin/sh -c', retrying if the task isn't ready
func AllocExec(allocID, taskID, execCmd, ns string, wc *WaitConfig) (string, error) {
Expand Down
18 changes: 18 additions & 0 deletions e2e/e2eutil/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,24 @@ func WaitForAllocRunning(t *testing.T, nomadClient *api.Client, allocID string)
})
}

func WaitForAllocTaskRunning(t *testing.T, nomadClient *api.Client, allocID, task string) {
testutil.WaitForResultRetries(retries, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
if err != nil {
return false, err
}

state := "n/a"
if task := alloc.TaskStates[task]; task != nil {
state = task.State
}
return state == structs.AllocClientStatusRunning, fmt.Errorf("expected status running, but was: %s", state)
}, func(err error) {
t.Fatalf("failed to wait on alloc: %v", err)
})
}

func WaitForAllocsRunning(t *testing.T, nomadClient *api.Client, allocIDs []string) {
for _, allocID := range allocIDs {
WaitForAllocRunning(t, nomadClient, allocID)
Expand Down
25 changes: 25 additions & 0 deletions e2e/isolation/input/alloc_exec.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
job "alloc_exec" {
datacenters = ["dc1"]
type = "service"

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "main" {
task "main" {
driver = "exec"

config {
command = "/bin/sleep"
args = ["30s"]
}

resources {
cpu = 100
memory = 64
}
}
}
}
39 changes: 39 additions & 0 deletions e2e/isolation/input/alloc_exec_java.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
job "java_sleep" {
datacenters = ["dc1"]
type = "batch"

group "java" {

task "build" {
lifecycle {
hook = "prestart"
sidecar = false
}

driver = "exec"
config {
command = "javac"
args = ["-d", "${NOMAD_ALLOC_DIR}", "local/Sleep.java"]
}

template {
destination = "local/Sleep.java"
data = <<EOH
public class Sleep {
public static void main(String... s) throws Exception {
Thread.sleep(30000);
}
}
EOH
}
}

task "sleep" {
driver = "java"
config {
class_path = "${NOMAD_ALLOC_DIR}"
class = "Sleep"
}
}
}
}
Loading