diff --git a/CHANGELOG.md b/CHANGELOG.md index e1edcf01a603..fcbc682d9f21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,9 @@ BUG FIXES: * core: Fixed an issue where the leader server could get into a state where it was no longer performing the periodic leader loop duties after a barrier timeout error [GH-3402] + * core: Fixes an issue with jobs that have `auto_revert` set to true, where + reverting to a previously stable job that fails to start up causes an + infinite cycle of reverts [GH-3496] * api: Apply correct memory default when task's do not specify memory explicitly [GH-3520] * cli: Fix passing Consul address via flags [GH-3504] @@ -38,8 +41,7 @@ BUG FIXES: [GH-3502] * client: Fix allocation accounting in GC and trigger GCs on allocation updates [GH-3445] - * core: Fixes an issue with jobs that have `auto_revert` set to true, where reverting - to a previously stable job that fails to start up causes an infinite cycle of reverts [GH-3496] + * driver/rkt: Remove pods on shutdown [GH-3562] * template: Fix issue where multiple environment variable templates would be parsed incorrectly when contents of one have changed after the initial rendering [GH-3529] diff --git a/client/driver/rkt.go b/client/driver/rkt.go index 7e9d99685921..206d5f94fc05 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -227,6 +227,22 @@ func rktManifestMakePortMap(manifest *appcschema.PodManifest, configPortMap map[ return portMap, nil } +// rktRemove pod after it has exited. +func rktRemove(uuid string) error { + errBuf := &bytes.Buffer{} + cmd := exec.Command(rktCmd, "rm", uuid) + cmd.Stdout = ioutil.Discard + cmd.Stderr = errBuf + if err := cmd.Run(); err != nil { + if msg := errBuf.String(); len(msg) > 0 { + return fmt.Errorf("error removing pod %q: %s", uuid, msg) + } + return err + } + + return nil +} + // NewRktDriver is used to create a new rkt driver func NewRktDriver(ctx *DriverContext) Driver { return &RktDriver{DriverContext: *ctx} @@ -671,9 +687,9 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error } exec, pluginClient, err := createExecutorWithConfig(pluginConfig, d.config.LogOutput) if err != nil { - d.logger.Println("[ERROR] driver.rkt: error connecting to plugin so destroying plugin pid and user pid") + d.logger.Println("[ERR] driver.rkt: error connecting to plugin so destroying plugin pid and user pid") if e := destroyPlugin(id.PluginConfig.Pid, id.ExecutorPid); e != nil { - d.logger.Printf("[ERROR] driver.rkt: error destroying plugin and executor pid: %v", e) + d.logger.Printf("[ERR] driver.rkt: error destroying plugin and executor pid: %v", e) } return nil, fmt.Errorf("error connecting to plugin: %v", err) } @@ -771,7 +787,7 @@ func (h *rktHandle) run() { close(h.doneCh) if ps.ExitCode == 0 && werr != nil { if e := killProcess(h.executorPid); e != nil { - h.logger.Printf("[ERROR] driver.rkt: error killing user process: %v", e) + h.logger.Printf("[ERR] driver.rkt: error killing user process: %v", e) } } @@ -781,6 +797,13 @@ func (h *rktHandle) run() { } h.pluginClient.Kill() + // Remove the pod + if err := rktRemove(h.uuid); err != nil { + h.logger.Printf("[ERR] driver.rkt: error removing pod %q - must gc manually: %v", h.uuid, err) + } else { + h.logger.Printf("[DEBUG] driver.rkt: removed pod %q", h.uuid) + } + // Send the results h.waitCh <- dstructs.NewWaitResult(ps.ExitCode, 0, werr) close(h.waitCh) diff --git a/client/driver/rkt_test.go b/client/driver/rkt_test.go index 45534d0611dc..c41f749edc51 100644 --- a/client/driver/rkt_test.go +++ b/client/driver/rkt_test.go @@ -8,6 +8,7 @@ import ( "fmt" "io/ioutil" "os" + "os/exec" "path/filepath" "reflect" "strings" @@ -166,16 +167,16 @@ func TestRktDriver_Start_Wait(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } - defer resp.Handle.Kill() + handle := resp.Handle.(*rktHandle) + defer handle.Kill() // Update should be a no-op - err = resp.Handle.Update(task) - if err != nil { + if err := handle.Update(task); err != nil { t.Fatalf("err: %v", err) } // Signal should be an error - if err = resp.Handle.Signal(syscall.SIGTERM); err == nil { + if err := resp.Handle.Signal(syscall.SIGTERM); err == nil { t.Fatalf("err: %v", err) } @@ -187,6 +188,18 @@ func TestRktDriver_Start_Wait(t *testing.T) { case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): t.Fatalf("timeout") } + + // Make sure pod was removed #3561 + var stderr bytes.Buffer + cmd := exec.Command(rktCmd, "status", handle.uuid) + cmd.Stdout = ioutil.Discard + cmd.Stderr = &stderr + if err := cmd.Run(); err == nil { + t.Fatalf("expected error running 'rkt status %s' on removed container", handle.uuid) + } + if out := stderr.String(); !strings.Contains(out, "no matches found") { + t.Fatalf("expected 'no matches found' but received: %s", out) + } } func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) { @@ -576,3 +589,23 @@ func TestRktDriver_HandlerExec(t *testing.T) { t.Fatalf("error killing handle: %v", err) } } + +func TestRktDriver_Remove_Error(t *testing.T) { + if !testutil.IsTravis() { + t.Parallel() + } + if os.Getenv("NOMAD_TEST_RKT") == "" { + t.Skip("skipping rkt tests") + } + + ctestutils.RktCompatible(t) + + // Removing a non-existent pod should return an error + if err := rktRemove("00000000-0000-0000-0000-000000000000"); err == nil { + t.Fatalf("expected an error") + } + + if err := rktRemove("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); err == nil { + t.Fatalf("expected an error") + } +}