Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rkt: create parent cgroup to enable stats #4188

Merged
merged 3 commits into from
Apr 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

IMPROVEMENTS:
* client: Create new process group on process startup. [[GH-3572](https://github.com/hashicorp/nomad/issues/3572)]
* driver/rkt: Enable stats collection for rkt tasks [[GH-4188](https://github.com/hashicorp/nomad/pull/4188)]

BUG FIXES:
* driver/exec: Create process group for Windows process and send Ctrl-Break signal on Shutdown [[GH-2117](https://github.com/hashicorp/nomad/issues/2117)]
Expand Down
3 changes: 1 addition & 2 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,7 @@ test: ## Run the Nomad test suite and/or the Nomad UI test suite
.PHONY: test-nomad
test-nomad: dev ## Run Nomad test suites
@echo "==> Running Nomad test suites:"
@NOMAD_TEST_RKT=1 \
go test $(if $(VERBOSE),-v) \
@go test $(if $(VERBOSE),-v) \
-cover \
-timeout=900s \
-tags="$(if $(HAS_LXC),lxc)" ./... $(if $(VERBOSE), >test.log ; echo $$? > exit-code)
Expand Down
2 changes: 1 addition & 1 deletion client/driver/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ func TestExecDriver_HandlerExec(t *testing.T) {
handle := resp.Handle

// Exec a command that should work and dump the environment
out, code, err := handle.Exec(context.Background(), "/bin/sh", []string{"-c", "env | grep NOMAD"})
out, code, err := handle.Exec(context.Background(), "/bin/sh", []string{"-c", "env | grep ^NOMAD"})
if err != nil {
t.Fatalf("error exec'ing stat: %v", err)
}
Expand Down
17 changes: 12 additions & 5 deletions client/driver/rkt.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,10 @@ func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string, log
return nil, lastErr
}

// This is a successful landing.
// This is a successful landing; log if its not the first attempt.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a test we can add to ensure stats work?

if try > 1 {
logger.Printf("[DEBUG] driver.rkt: retrieved network info for pod UUID %s on attempt %d", uuid, try)
}
return &cstructs.DriverNetwork{
PortMap: portmap,
IP: status.Networks[0].IP.String(),
Expand All @@ -211,7 +214,7 @@ func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string, log
}

waitTime := getJitteredNetworkRetryTime()
logger.Printf("[DEBUG] driver.rkt: getting network info for pod UUID %s failed attempt %d: %v. Sleeping for %v", uuid, try, lastErr, waitTime)
logger.Printf("[DEBUG] driver.rkt: failed getting network info for pod UUID %s attempt %d: %v. Sleeping for %v", uuid, try, lastErr, waitTime)
time.Sleep(waitTime)
}
return nil, fmt.Errorf("timed out, last error: %v", lastErr)
Expand Down Expand Up @@ -665,9 +668,13 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,
return nil, fmt.Errorf("failed to set executor context: %v", err)
}

// Enable ResourceLimits to place the executor in a parent cgroup of
// the rkt container. This allows stats collection via the executor to
// work just like it does for exec.
execCmd := &executor.ExecCommand{
Cmd: absPath,
Args: runArgs,
Cmd: absPath,
Args: runArgs,
ResourceLimits: true,
}
ps, err := execIntf.LaunchCmd(execCmd)
if err != nil {
Expand Down Expand Up @@ -818,7 +825,7 @@ func (h *rktHandle) Kill() error {
}

func (h *rktHandle) Stats() (*cstructs.TaskResourceUsage, error) {
return nil, DriverStatsNotImplemented
return h.executor.Stats()
}

func (h *rktHandle) run() {
Expand Down
161 changes: 94 additions & 67 deletions client/driver/rkt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,12 @@ import (
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/assert"

ctestutils "github.com/hashicorp/nomad/client/testutil"
ctestutil "github.com/hashicorp/nomad/client/testutil"
)

func TestRktVersionRegex(t *testing.T) {
ctestutil.RktCompatible(t)
t.Parallel()
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("NOMAD_TEST_RKT unset, skipping")
}

inputRkt := "rkt version 0.8.1"
inputAppc := "appc version 1.2.0"
Expand All @@ -47,12 +45,9 @@ func TestRktVersionRegex(t *testing.T) {

// The fingerprinter test should always pass, even if rkt is not installed.
func TestRktDriver_Fingerprint(t *testing.T) {
ctestutil.RktCompatible(t)
t.Parallel()
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
ctx := testDriverContexts(t, &structs.Task{Name: "foo", Driver: "rkt"})
d := NewRktDriver(ctx.DriverCtx)
node := &structs.Node{
Expand Down Expand Up @@ -86,15 +81,11 @@ func TestRktDriver_Fingerprint(t *testing.T) {
}

func TestRktDriver_Start_DNS(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
// TODO: use test server to load from a fixture
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -140,14 +131,11 @@ func TestRktDriver_Start_DNS(t *testing.T) {
}

func TestRktDriver_Start_Wait(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -214,14 +202,11 @@ func TestRktDriver_Start_Wait(t *testing.T) {
}

func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -270,14 +255,10 @@ func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) {
}

func TestRktDriver_Start_Wait_AllocDir(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)

exp := []byte{'w', 'i', 'n'}
file := "output.txt"
Expand Down Expand Up @@ -347,13 +328,10 @@ func TestRktDriver_Start_Wait_AllocDir(t *testing.T) {
// TestRktDriver_UserGroup asserts tasks may override the user and group of the
// rkt image.
func TestRktDriver_UserGroup(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}
ctestutils.RktCompatible(t)
require := assert.New(t)

task := &structs.Task{
Expand Down Expand Up @@ -408,13 +386,11 @@ func TestRktDriver_UserGroup(t *testing.T) {
}

func TestRktTrustPrefix(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}
ctestutils.RktCompatible(t)

task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -452,8 +428,9 @@ func TestRktTrustPrefix(t *testing.T) {
}

func TestRktTaskValidate(t *testing.T) {
ctestutil.RktCompatible(t)
t.Parallel()
ctestutils.RktCompatible(t)

task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand All @@ -476,16 +453,12 @@ func TestRktTaskValidate(t *testing.T) {
}
}

// TODO: Port Mapping test should be ran with proper ACI image and test the port access.
func TestRktDriver_PortsMapping(t *testing.T) {
func TestRktDriver_PortMapping(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -525,6 +498,7 @@ func TestRktDriver_PortsMapping(t *testing.T) {
if err != nil {
t.Fatalf("err: %v", err)
}
defer resp.Handle.Kill()
if resp.Network == nil {
t.Fatalf("Expected driver to set a DriverNetwork, but it did not!")
}
Expand All @@ -549,14 +523,11 @@ func TestRktDriver_PortsMapping(t *testing.T) {
// TestRktDriver_PortsMapping_Host asserts that port_map isn't required when
// host networking is used.
func TestRktDriver_PortsMapping_Host(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -591,6 +562,7 @@ func TestRktDriver_PortsMapping_Host(t *testing.T) {
if err != nil {
t.Fatalf("err: %v", err)
}
defer resp.Handle.Kill()
if resp.Network != nil {
t.Fatalf("No network should be returned with --net=host but found: %#v", resp.Network)
}
Expand All @@ -613,14 +585,11 @@ func TestRktDriver_PortsMapping_Host(t *testing.T) {
}

func TestRktDriver_HandlerExec(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -650,24 +619,28 @@ func TestRktDriver_HandlerExec(t *testing.T) {
if err != nil {
t.Fatalf("err: %v", err)
}

// Give the pod a second to start
time.Sleep(time.Second)
defer resp.Handle.Kill()

// Exec a command that should work
out, code, err := resp.Handle.Exec(context.TODO(), "/etcd", []string{"--version"})
if err != nil {
t.Fatalf("error exec'ing etcd --version: %v", err)
}
if code != 0 {
t.Fatalf("expected `etcd --version` to succeed but exit code was: %d\n%s", code, string(out))
}
if expected := []byte("etcd version "); !bytes.HasPrefix(out, expected) {
t.Fatalf("expected output to start with %q but found:\n%q", expected, out)
}
testutil.WaitForResult(func() (bool, error) {
out, code, err := resp.Handle.Exec(context.TODO(), "/etcd", []string{"--version"})
if err != nil {
return false, fmt.Errorf("error exec'ing etcd --version: %v", err)
}
if code != 0 {
return false, fmt.Errorf("expected `etcd --version` to succeed but exit code was: %d\n%s", code, string(out))
}
if expected := []byte("etcd version "); !bytes.HasPrefix(out, expected) {
return false, fmt.Errorf("expected output to start with %q but found:\n%q", expected, out)
}

return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})

// Exec a command that should fail
out, code, err = resp.Handle.Exec(context.TODO(), "/etcd", []string{"--kaljdshf"})
out, code, err := resp.Handle.Exec(context.TODO(), "/etcd", []string{"--kaljdshf"})
if err != nil {
t.Fatalf("error exec'ing bad command: %v", err)
}
Expand All @@ -683,15 +656,69 @@ func TestRktDriver_HandlerExec(t *testing.T) {
}
}

func TestRktDriver_Remove_Error(t *testing.T) {
func TestRktDriver_Stats(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")

task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Config: map[string]interface{}{
"trust_prefix": "coreos.com/etcd",
"image": "coreos.com/etcd:v2.0.4",
"command": "/etcd",
},
LogConfig: &structs.LogConfig{
MaxFiles: 10,
MaxFileSizeMB: 10,
},
Resources: &structs.Resources{
MemoryMB: 128,
CPU: 100,
},
}

ctestutils.RktCompatible(t)
ctx := testDriverContexts(t, task)
defer ctx.AllocDir.Destroy()
d := NewRktDriver(ctx.DriverCtx)

if _, err := d.Prestart(ctx.ExecCtx, task); err != nil {
t.Fatalf("error in prestart: %v", err)
}
resp, err := d.Start(ctx.ExecCtx, task)
if err != nil {
t.Fatalf("err: %v", err)
}
defer resp.Handle.Kill()

testutil.WaitForResult(func() (bool, error) {
stats, err := resp.Handle.Stats()
if err != nil {
return false, err
}
if stats == nil || stats.ResourceUsage == nil {
return false, fmt.Errorf("stats is nil")
}
if stats.ResourceUsage.CpuStats.TotalTicks == 0 {
return false, fmt.Errorf("cpu ticks unset")
}
if stats.ResourceUsage.MemoryStats.RSS == 0 {
return false, fmt.Errorf("rss stats unset")
}
return true, nil
}, func(err error) {
t.Fatalf("error: %v", err)
})

}

func TestRktDriver_Remove_Error(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}

// Removing a nonexistent pod should return an error
if err := rktRemove("00000000-0000-0000-0000-000000000000"); err == nil {
Expand Down
Loading