Skip to content

Commit

Permalink
drivers/exec: Don't inherit Nomad oom_score_adj value (#10698)
Browse files Browse the repository at this point in the history
Explicitly set the `oom_score_adj` value for `exec` and `java` tasks.

We recommend that the Nomad service to have oom_score_adj of a low value
(e.g. -1000) to avoid having nomad agent OOM Killed if the node is
oversubscriped.

However, Nomad's workloads should not inherit Nomad's process, which is
the default behavior.

Fixes #10663
  • Loading branch information
Mahmood Ali committed Jun 3, 2021
1 parent 0949271 commit 0be58d7
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 0 deletions.
4 changes: 4 additions & 0 deletions drivers/shared/executor/executor_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,10 @@ func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) {

configureCapabilities(cfg, command)

// children should not inherit Nomad agent oom_score_adj value
oomScoreAdj := 0
cfg.OomScoreAdj = &oomScoreAdj

if err := configureIsolation(cfg, command); err != nil {
return nil, err
}
Expand Down
56 changes: 56 additions & 0 deletions drivers/shared/executor/executor_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,62 @@ func TestExecutor_EscapeContainer(t *testing.T) {
require.NoError(err)
}

// TestExecutor_DoesNotInheritOomScoreAdj asserts that the exec processes do not
// inherit the oom_score_adj value of Nomad agent/executor process
func TestExecutor_DoesNotInheritOomScoreAdj(t *testing.T) {
t.Parallel()
testutil.ExecCompatible(t)

oomPath := "/proc/self/oom_score_adj"
origValue, err := os.ReadFile(oomPath)
require.NoError(t, err, "reading oom_score_adj")

err = os.WriteFile(oomPath, []byte("-100"), 0644)
require.NoError(t, err, "setting temporary oom_score_adj")

defer func() {
err := os.WriteFile(oomPath, origValue, 0644)
require.NoError(t, err, "restoring oom_score_adj")
}()

testExecCmd := testExecutorCommandWithChroot(t)
execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir
defer allocDir.Destroy()

execCmd.ResourceLimits = true
execCmd.Cmd = "/bin/bash"
execCmd.Args = []string{"-c", "cat /proc/self/oom_score_adj"}

executor := NewExecutorWithIsolation(testlog.HCLogger(t))
defer executor.Shutdown("SIGKILL", 0)

_, err = executor.Launch(execCmd)
require.NoError(t, err)

ch := make(chan interface{})
go func() {
executor.Wait(context.Background())
close(ch)
}()

select {
case <-ch:
// all good
case <-time.After(5 * time.Second):
require.Fail(t, "timeout waiting for exec to shutdown")
}

expected := "0"
tu.WaitForResult(func() (bool, error) {
output := strings.TrimSpace(testExecCmd.stdout.String())
if output != expected {
return false, fmt.Errorf("oom_score_adj didn't match: want\n%v\n; got:\n%v\n", expected, output)
}
return true, nil
}, func(err error) { require.NoError(t, err) })

}

func TestExecutor_Capabilities(t *testing.T) {
t.Parallel()
testutil.ExecCompatible(t)
Expand Down

0 comments on commit 0be58d7

Please sign in to comment.