diff --git a/CHANGELOG.md b/CHANGELOG.md index e1edcf01a603..fcbc682d9f21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,9 @@ BUG FIXES: * core: Fixed an issue where the leader server could get into a state where it was no longer performing the periodic leader loop duties after a barrier timeout error [GH-3402] + * core: Fixes an issue with jobs that have `auto_revert` set to true, where + reverting to a previously stable job that fails to start up causes an + infinite cycle of reverts [GH-3496] * api: Apply correct memory default when task's do not specify memory explicitly [GH-3520] * cli: Fix passing Consul address via flags [GH-3504] @@ -38,8 +41,7 @@ BUG FIXES: [GH-3502] * client: Fix allocation accounting in GC and trigger GCs on allocation updates [GH-3445] - * core: Fixes an issue with jobs that have `auto_revert` set to true, where reverting - to a previously stable job that fails to start up causes an infinite cycle of reverts [GH-3496] + * driver/rkt: Remove pods on shutdown [GH-3562] * template: Fix issue where multiple environment variable templates would be parsed incorrectly when contents of one have changed after the initial rendering [GH-3529] diff --git a/client/driver/rkt.go b/client/driver/rkt.go index 7e9d99685921..164817715aad 100644 --- a/client/driver/rkt.go +++ b/client/driver/rkt.go @@ -227,6 +227,14 @@ func rktManifestMakePortMap(manifest *appcschema.PodManifest, configPortMap map[ return portMap, nil } +// rktRemove pod after it has exited. +func rktRemove(uuid string) error { + cmd := exec.Command(rktCmd, "rm", uuid) + cmd.Stdout = ioutil.Discard + cmd.Stderr = ioutil.Discard + return cmd.Run() +} + // NewRktDriver is used to create a new rkt driver func NewRktDriver(ctx *DriverContext) Driver { return &RktDriver{DriverContext: *ctx} @@ -671,9 +679,9 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error } exec, pluginClient, err := createExecutorWithConfig(pluginConfig, d.config.LogOutput) if err != nil { - d.logger.Println("[ERROR] driver.rkt: error connecting to plugin so destroying plugin pid and user pid") + d.logger.Println("[ERR] driver.rkt: error connecting to plugin so destroying plugin pid and user pid") if e := destroyPlugin(id.PluginConfig.Pid, id.ExecutorPid); e != nil { - d.logger.Printf("[ERROR] driver.rkt: error destroying plugin and executor pid: %v", e) + d.logger.Printf("[ERR] driver.rkt: error destroying plugin and executor pid: %v", e) } return nil, fmt.Errorf("error connecting to plugin: %v", err) } @@ -771,7 +779,7 @@ func (h *rktHandle) run() { close(h.doneCh) if ps.ExitCode == 0 && werr != nil { if e := killProcess(h.executorPid); e != nil { - h.logger.Printf("[ERROR] driver.rkt: error killing user process: %v", e) + h.logger.Printf("[ERR] driver.rkt: error killing user process: %v", e) } } @@ -781,6 +789,13 @@ func (h *rktHandle) run() { } h.pluginClient.Kill() + // Remove the pod + if err := rktRemove(h.uuid); err != nil { + h.logger.Printf("[ERR] driver.rkt: error removing pod %q - must gc manually", h.uuid) + } else { + h.logger.Printf("[DEBUG] driver.rkt: removed pod %q", h.uuid) + } + // Send the results h.waitCh <- dstructs.NewWaitResult(ps.ExitCode, 0, werr) close(h.waitCh) diff --git a/client/driver/rkt_test.go b/client/driver/rkt_test.go index 45534d0611dc..d521b04525e0 100644 --- a/client/driver/rkt_test.go +++ b/client/driver/rkt_test.go @@ -8,6 +8,7 @@ import ( "fmt" "io/ioutil" "os" + "os/exec" "path/filepath" "reflect" "strings" @@ -166,16 +167,16 @@ func TestRktDriver_Start_Wait(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } - defer resp.Handle.Kill() + handle := resp.Handle.(*rktHandle) + defer handle.Kill() // Update should be a no-op - err = resp.Handle.Update(task) - if err != nil { + if err := handle.Update(task); err != nil { t.Fatalf("err: %v", err) } // Signal should be an error - if err = resp.Handle.Signal(syscall.SIGTERM); err == nil { + if err := resp.Handle.Signal(syscall.SIGTERM); err == nil { t.Fatalf("err: %v", err) } @@ -187,6 +188,18 @@ func TestRktDriver_Start_Wait(t *testing.T) { case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): t.Fatalf("timeout") } + + // Make sure pod was removed #3561 + var stderr bytes.Buffer + cmd := exec.Command(rktCmd, "status", handle.uuid) + cmd.Stdout = ioutil.Discard + cmd.Stderr = &stderr + if err := cmd.Run(); err == nil { + t.Fatalf("expected error running 'rkt status %s' on removed container", handle.uuid) + } + if out := stderr.String(); !strings.Contains(out, "no matches found") { + t.Fatalf("expected 'no matches found' but received: %s", out) + } } func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) {