hashicorp · schmichael · Nov 17, 2017 · Nov 16, 2017 · Nov 16, 2017 · dadgar
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,6 +30,9 @@ BUG FIXES:
  * core: Fixed an issue where the leader server could get into a state where it
    was no longer performing the periodic leader loop duties after a barrier
    timeout error [GH-3402]
+ * core: Fixes an issue with jobs that have `auto_revert` set to true, where
+   reverting to a previously stable job that fails to start up causes an
+   infinite cycle of reverts [GH-3496]
  * api: Apply correct memory default when task's do not specify memory
    explicitly [GH-3520]
  * cli: Fix passing Consul address via flags [GH-3504]
@@ -38,8 +41,7 @@ BUG FIXES:
    [GH-3502]
  * client: Fix allocation accounting in GC and trigger GCs on allocation
    updates [GH-3445]
- * core: Fixes an issue with jobs that have `auto_revert` set to true, where reverting 
-   to a previously stable job that fails to start up causes an infinite cycle of reverts [GH-3496]
+ * driver/rkt: Remove pods on shutdown [GH-3562]
  * template: Fix issue where multiple environment variable templates would be
    parsed incorrectly when contents of one have changed after the initial
    rendering [GH-3529]

diff --git a/client/driver/rkt.go b/client/driver/rkt.go
@@ -227,6 +227,22 @@ func rktManifestMakePortMap(manifest *appcschema.PodManifest, configPortMap map[
 	return portMap, nil
 }
 
+// rktRemove pod after it has exited.
+func rktRemove(uuid string) error {
+	errBuf := &bytes.Buffer{}
+	cmd := exec.Command(rktCmd, "rm", uuid)
+	cmd.Stdout = ioutil.Discard
+	cmd.Stderr = errBuf
+	if err := cmd.Run(); err != nil {
+		if msg := errBuf.String(); len(msg) > 0 {
+			return fmt.Errorf("error removing pod %q: %s", uuid, msg)
+		}
+		return err
+	}
+
+	return nil
+}
+
 // NewRktDriver is used to create a new rkt driver
 func NewRktDriver(ctx *DriverContext) Driver {
 	return &RktDriver{DriverContext: *ctx}
@@ -671,9 +687,9 @@ func (d *RktDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error
 	}
 	exec, pluginClient, err := createExecutorWithConfig(pluginConfig, d.config.LogOutput)
 	if err != nil {
-		d.logger.Println("[ERROR] driver.rkt: error connecting to plugin so destroying plugin pid and user pid")
+		d.logger.Println("[ERR] driver.rkt: error connecting to plugin so destroying plugin pid and user pid")
 		if e := destroyPlugin(id.PluginConfig.Pid, id.ExecutorPid); e != nil {
-			d.logger.Printf("[ERROR] driver.rkt: error destroying plugin and executor pid: %v", e)
+			d.logger.Printf("[ERR] driver.rkt: error destroying plugin and executor pid: %v", e)
 		}
 		return nil, fmt.Errorf("error connecting to plugin: %v", err)
 	}
@@ -771,7 +787,7 @@ func (h *rktHandle) run() {
 	close(h.doneCh)
 	if ps.ExitCode == 0 && werr != nil {
 		if e := killProcess(h.executorPid); e != nil {
-			h.logger.Printf("[ERROR] driver.rkt: error killing user process: %v", e)
+			h.logger.Printf("[ERR] driver.rkt: error killing user process: %v", e)
 		}
 	}
 
@@ -781,6 +797,13 @@ func (h *rktHandle) run() {
 	}
 	h.pluginClient.Kill()
 
+	// Remove the pod
+	if err := rktRemove(h.uuid); err != nil {
+		h.logger.Printf("[ERR] driver.rkt: error removing pod %q - must gc manually: %v", h.uuid, err)
+	} else {
+		h.logger.Printf("[DEBUG] driver.rkt: removed pod %q", h.uuid)
+	}
+
 	// Send the results
 	h.waitCh <- dstructs.NewWaitResult(ps.ExitCode, 0, werr)
 	close(h.waitCh)

diff --git a/client/driver/rkt_test.go b/client/driver/rkt_test.go
@@ -8,6 +8,7 @@ import (
 	"fmt"
 	"io/ioutil"
 	"os"
+	"os/exec"
 	"path/filepath"
 	"reflect"
 	"strings"
@@ -166,16 +167,16 @@ func TestRktDriver_Start_Wait(t *testing.T) {
 	if err != nil {
 		t.Fatalf("err: %v", err)
 	}
-	defer resp.Handle.Kill()
+	handle := resp.Handle.(*rktHandle)
+	defer handle.Kill()
 
 	// Update should be a no-op
-	err = resp.Handle.Update(task)
-	if err != nil {
+	if err := handle.Update(task); err != nil {
 		t.Fatalf("err: %v", err)
 	}
 
 	// Signal should be an error
-	if err = resp.Handle.Signal(syscall.SIGTERM); err == nil {
+	if err := resp.Handle.Signal(syscall.SIGTERM); err == nil {
 		t.Fatalf("err: %v", err)
 	}
 
@@ -187,6 +188,18 @@ func TestRktDriver_Start_Wait(t *testing.T) {
 	case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second):
 		t.Fatalf("timeout")
 	}
+
+	// Make sure pod was removed #3561
+	var stderr bytes.Buffer
+	cmd := exec.Command(rktCmd, "status", handle.uuid)
+	cmd.Stdout = ioutil.Discard
+	cmd.Stderr = &stderr
+	if err := cmd.Run(); err == nil {
+		t.Fatalf("expected error running 'rkt status %s' on removed container", handle.uuid)
+	}
+	if out := stderr.String(); !strings.Contains(out, "no matches found") {
+		t.Fatalf("expected 'no matches found' but received: %s", out)
+	}
 }
 
 func TestRktDriver_Start_Wait_Skip_Trust(t *testing.T) {
@@ -576,3 +589,23 @@ func TestRktDriver_HandlerExec(t *testing.T) {
 		t.Fatalf("error killing handle: %v", err)
 	}
 }
+
+func TestRktDriver_Remove_Error(t *testing.T) {
+	if !testutil.IsTravis() {
+		t.Parallel()
+	}
+	if os.Getenv("NOMAD_TEST_RKT") == "" {
+		t.Skip("skipping rkt tests")
+	}
+
+	ctestutils.RktCompatible(t)
+
+	// Removing a non-existent pod should return an error
+	if err := rktRemove("00000000-0000-0000-0000-000000000000"); err == nil {
+		t.Fatalf("expected an error")
+	}
+
+	if err := rktRemove("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); err == nil {
+		t.Fatalf("expected an error")
+	}
+}