Skip to content

Commit

Permalink
Merge pull request #4188 from hashicorp/f-rkt-stats
Browse files Browse the repository at this point in the history
rkt: create parent cgroup to enable stats
  • Loading branch information
schmichael committed Apr 24, 2018
2 parents 8cba531 + a98ec5a commit c79a820
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 80 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
IMPROVEMENTS:
* api: Add /v1/jobs/parse api endpoint for rendering HCL jobs files as JSON [[GH-2782](https://github.com/hashicorp/nomad/issues/2782)]
* client: Create new process group on process startup. [[GH-3572](https://github.com/hashicorp/nomad/issues/3572)]
* driver/rkt: Enable stats collection for rkt tasks [[GH-4188](https://github.com/hashicorp/nomad/pull/4188)]

BUG FIXES:
* driver/exec: Create process group for Windows process and send Ctrl-Break signal on Shutdown [[GH-4153](https://github.com/hashicorp/nomad/pull/4153)]
Expand Down
3 changes: 1 addition & 2 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,7 @@ test: ## Run the Nomad test suite and/or the Nomad UI test suite
.PHONY: test-nomad
test-nomad: dev ## Run Nomad test suites
@echo "==> Running Nomad test suites:"
@NOMAD_TEST_RKT=1 \
go test $(if $(VERBOSE),-v) \
@go test $(if $(VERBOSE),-v) \
-cover \
-timeout=900s \
-tags="$(if $(HAS_LXC),lxc)" ./... $(if $(VERBOSE), >test.log ; echo $$? > exit-code)
Expand Down
2 changes: 1 addition & 1 deletion client/driver/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ func TestExecDriver_HandlerExec(t *testing.T) {
handle := resp.Handle

// Exec a command that should work and dump the environment
out, code, err := handle.Exec(context.Background(), "/bin/sh", []string{"-c", "env | grep NOMAD"})
out, code, err := handle.Exec(context.Background(), "/bin/sh", []string{"-c", "env | grep ^NOMAD"})
if err != nil {
t.Fatalf("error exec'ing stat: %v", err)
}
Expand Down
17 changes: 12 additions & 5 deletions client/driver/rkt.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,10 @@ func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string, log
return nil, lastErr
}

// This is a successful landing.
// This is a successful landing; log if its not the first attempt.
if try > 1 {
logger.Printf("[DEBUG] driver.rkt: retrieved network info for pod UUID %s on attempt %d", uuid, try)
}
return &cstructs.DriverNetwork{
PortMap: portmap,
IP: status.Networks[0].IP.String(),
Expand All @@ -211,7 +214,7 @@ func rktGetDriverNetwork(uuid string, driverConfigPortMap map[string]string, log
}

waitTime := getJitteredNetworkRetryTime()
logger.Printf("[DEBUG] driver.rkt: getting network info for pod UUID %s failed attempt %d: %v. Sleeping for %v", uuid, try, lastErr, waitTime)
logger.Printf("[DEBUG] driver.rkt: failed getting network info for pod UUID %s attempt %d: %v. Sleeping for %v", uuid, try, lastErr, waitTime)
time.Sleep(waitTime)
}
return nil, fmt.Errorf("timed out, last error: %v", lastErr)
Expand Down Expand Up @@ -665,9 +668,13 @@ func (d *RktDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse,
return nil, fmt.Errorf("failed to set executor context: %v", err)
}

// Enable ResourceLimits to place the executor in a parent cgroup of
// the rkt container. This allows stats collection via the executor to
// work just like it does for exec.
execCmd := &executor.ExecCommand{
Cmd: absPath,
Args: runArgs,
Cmd: absPath,
Args: runArgs,
ResourceLimits: true,
}
ps, err := execIntf.LaunchCmd(execCmd)
if err != nil {
Expand Down Expand Up @@ -818,7 +825,7 @@ func (h *rktHandle) Kill() error {
}

func (h *rktHandle) Stats() (*cstructs.TaskResourceUsage, error) {
return nil, DriverStatsNotImplemented
return h.executor.Stats()
}

func (h *rktHandle) run() {
Expand Down
161 changes: 94 additions & 67 deletions client/driver/rkt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,12 @@ import (
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/assert"

ctestutils "github.com/hashicorp/nomad/client/testutil"
ctestutil "github.com/hashicorp/nomad/client/testutil"
)

func TestRktVersionRegex(t *testing.T) {
ctestutil.RktCompatible(t)
t.Parallel()
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("NOMAD_TEST_RKT unset, skipping")
}

inputRkt := "rkt version 0.8.1"
inputAppc := "appc version 1.2.0"
Expand All @@ -47,12 +45,9 @@ func TestRktVersionRegex(t *testing.T) {

// The fingerprinter test should always pass, even if rkt is not installed.
func TestRktDriver_Fingerprint(t *testing.T) {
ctestutil.RktCompatible(t)
t.Parallel()
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
ctx := testDriverContexts(t, &structs.Task{Name: "foo", Driver: "rkt"})
d := NewRktDriver(ctx.DriverCtx)
node := &structs.Node{
Expand Down Expand Up @@ -86,15 +81,11 @@ func TestRktDriver_Fingerprint(t *testing.T) {
}

func TestRktDriver_Start_DNS(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
// TODO: use test server to load from a fixture
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -140,14 +131,11 @@ func TestRktDriver_Start_DNS(t *testing.T) {
}

func TestRktDriver_Start_Wait(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -214,14 +202,11 @@ func TestRktDriver_Start_Wait(t *testing.T) {
}

func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -270,14 +255,10 @@ func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) {
}

func TestRktDriver_Start_Wait_AllocDir(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)

exp := []byte{'w', 'i', 'n'}
file := "output.txt"
Expand Down Expand Up @@ -347,13 +328,10 @@ func TestRktDriver_Start_Wait_AllocDir(t *testing.T) {
// TestRktDriver_UserGroup asserts tasks may override the user and group of the
// rkt image.
func TestRktDriver_UserGroup(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}
ctestutils.RktCompatible(t)
require := assert.New(t)

task := &structs.Task{
Expand Down Expand Up @@ -408,13 +386,11 @@ func TestRktDriver_UserGroup(t *testing.T) {
}

func TestRktTrustPrefix(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}
ctestutils.RktCompatible(t)

task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -452,8 +428,9 @@ func TestRktTrustPrefix(t *testing.T) {
}

func TestRktTaskValidate(t *testing.T) {
ctestutil.RktCompatible(t)
t.Parallel()
ctestutils.RktCompatible(t)

task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand All @@ -476,16 +453,12 @@ func TestRktTaskValidate(t *testing.T) {
}
}

// TODO: Port Mapping test should be ran with proper ACI image and test the port access.
func TestRktDriver_PortsMapping(t *testing.T) {
func TestRktDriver_PortMapping(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -525,6 +498,7 @@ func TestRktDriver_PortsMapping(t *testing.T) {
if err != nil {
t.Fatalf("err: %v", err)
}
defer resp.Handle.Kill()
if resp.Network == nil {
t.Fatalf("Expected driver to set a DriverNetwork, but it did not!")
}
Expand All @@ -549,14 +523,11 @@ func TestRktDriver_PortsMapping(t *testing.T) {
// TestRktDriver_PortsMapping_Host asserts that port_map isn't required when
// host networking is used.
func TestRktDriver_PortsMapping_Host(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -591,6 +562,7 @@ func TestRktDriver_PortsMapping_Host(t *testing.T) {
if err != nil {
t.Fatalf("err: %v", err)
}
defer resp.Handle.Kill()
if resp.Network != nil {
t.Fatalf("No network should be returned with --net=host but found: %#v", resp.Network)
}
Expand All @@ -613,14 +585,11 @@ func TestRktDriver_PortsMapping_Host(t *testing.T) {
}

func TestRktDriver_HandlerExec(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")
}

ctestutils.RktCompatible(t)
task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Expand Down Expand Up @@ -650,24 +619,28 @@ func TestRktDriver_HandlerExec(t *testing.T) {
if err != nil {
t.Fatalf("err: %v", err)
}

// Give the pod a second to start
time.Sleep(time.Second)
defer resp.Handle.Kill()

// Exec a command that should work
out, code, err := resp.Handle.Exec(context.TODO(), "/etcd", []string{"--version"})
if err != nil {
t.Fatalf("error exec'ing etcd --version: %v", err)
}
if code != 0 {
t.Fatalf("expected `etcd --version` to succeed but exit code was: %d\n%s", code, string(out))
}
if expected := []byte("etcd version "); !bytes.HasPrefix(out, expected) {
t.Fatalf("expected output to start with %q but found:\n%q", expected, out)
}
testutil.WaitForResult(func() (bool, error) {
out, code, err := resp.Handle.Exec(context.TODO(), "/etcd", []string{"--version"})
if err != nil {
return false, fmt.Errorf("error exec'ing etcd --version: %v", err)
}
if code != 0 {
return false, fmt.Errorf("expected `etcd --version` to succeed but exit code was: %d\n%s", code, string(out))
}
if expected := []byte("etcd version "); !bytes.HasPrefix(out, expected) {
return false, fmt.Errorf("expected output to start with %q but found:\n%q", expected, out)
}

return true, nil
}, func(err error) {
t.Fatalf("err: %v", err)
})

// Exec a command that should fail
out, code, err = resp.Handle.Exec(context.TODO(), "/etcd", []string{"--kaljdshf"})
out, code, err := resp.Handle.Exec(context.TODO(), "/etcd", []string{"--kaljdshf"})
if err != nil {
t.Fatalf("error exec'ing bad command: %v", err)
}
Expand All @@ -683,15 +656,69 @@ func TestRktDriver_HandlerExec(t *testing.T) {
}
}

func TestRktDriver_Remove_Error(t *testing.T) {
func TestRktDriver_Stats(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}
if os.Getenv("NOMAD_TEST_RKT") == "" {
t.Skip("skipping rkt tests")

task := &structs.Task{
Name: "etcd",
Driver: "rkt",
Config: map[string]interface{}{
"trust_prefix": "coreos.com/etcd",
"image": "coreos.com/etcd:v2.0.4",
"command": "/etcd",
},
LogConfig: &structs.LogConfig{
MaxFiles: 10,
MaxFileSizeMB: 10,
},
Resources: &structs.Resources{
MemoryMB: 128,
CPU: 100,
},
}

ctestutils.RktCompatible(t)
ctx := testDriverContexts(t, task)
defer ctx.AllocDir.Destroy()
d := NewRktDriver(ctx.DriverCtx)

if _, err := d.Prestart(ctx.ExecCtx, task); err != nil {
t.Fatalf("error in prestart: %v", err)
}
resp, err := d.Start(ctx.ExecCtx, task)
if err != nil {
t.Fatalf("err: %v", err)
}
defer resp.Handle.Kill()

testutil.WaitForResult(func() (bool, error) {
stats, err := resp.Handle.Stats()
if err != nil {
return false, err
}
if stats == nil || stats.ResourceUsage == nil {
return false, fmt.Errorf("stats is nil")
}
if stats.ResourceUsage.CpuStats.TotalTicks == 0 {
return false, fmt.Errorf("cpu ticks unset")
}
if stats.ResourceUsage.MemoryStats.RSS == 0 {
return false, fmt.Errorf("rss stats unset")
}
return true, nil
}, func(err error) {
t.Fatalf("error: %v", err)
})

}

func TestRktDriver_Remove_Error(t *testing.T) {
ctestutil.RktCompatible(t)
if !testutil.IsTravis() {
t.Parallel()
}

// Removing a nonexistent pod should return an error
if err := rktRemove("00000000-0000-0000-0000-000000000000"); err == nil {
Expand Down
Loading

0 comments on commit c79a820

Please sign in to comment.