From fb088f99652552bd767f31ba25a586b682e8e3d6 Mon Sep 17 00:00:00 2001
From: Dennis Marttinen <dennis@weave.works>
Date: Tue, 9 Jul 2019 14:03:20 +0300
Subject: [PATCH 1/5] Support removing CNI networking configuration post-run

---
 cmd/ignite-spawn/ignite-spawn.go |  3 +++
 pkg/metadata/vmmd/vm.go          | 25 +++++++++++++++++++++++++
 pkg/operations/start.go          |  2 +-
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/cmd/ignite-spawn/ignite-spawn.go b/cmd/ignite-spawn/ignite-spawn.go
index 3c79655fa..fa091fe3b 100644
--- a/cmd/ignite-spawn/ignite-spawn.go
+++ b/cmd/ignite-spawn/ignite-spawn.go
@@ -61,6 +61,9 @@ func StartVM(co *options) error {
 	// Remove the snapshot overlay post-run, which also removes the detached backing loop devices
 	defer co.vm.RemoveSnapshot()
 
+	// Remove CNI networking post-run
+	defer co.vm.RemoveCNINetworking()
+
 	// Remove the IP addresses post-run
 	defer co.vm.ClearIPAddresses()
 
diff --git a/pkg/metadata/vmmd/vm.go b/pkg/metadata/vmmd/vm.go
index d6738330a..a187fcc97 100644
--- a/pkg/metadata/vmmd/vm.go
+++ b/pkg/metadata/vmmd/vm.go
@@ -2,6 +2,8 @@ package vmmd
 
 import (
 	"fmt"
+	"github.com/weaveworks/ignite/pkg/network/cni"
+	"github.com/weaveworks/ignite/pkg/runtime/docker"
 	"io/ioutil"
 	"math"
 	"net"
@@ -170,6 +172,29 @@ func (md *VM) ClearPortMappings() {
 	md.Spec.Ports = nil
 }
 
+func (md *VM) RemoveCNINetworking() error {
+	// Skip all other network modes
+	if md.Spec.NetworkMode != api.NetworkModeCNI {
+		return nil
+	}
+
+	// TODO: Both the client and networkPlugin variables should be constructed once,
+	// and accessible throughout the program.
+	client, err := docker.GetDockerClient()
+	if err != nil {
+		return err
+	}
+
+	networkPlugin, err := cni.GetCNINetworkPlugin(client)
+	if err != nil {
+		return err
+	}
+
+	// TODO: The Docker hostname is not a robust way to get this, but
+	// Ignite needs to be daemonized to have a proper implementation
+	return networkPlugin.RemoveContainerNetwork(os.Getenv("HOSTNAME"))
+}
+
 // Generate a new SSH keypair for the vm
 func (md *VM) newSSHKeypair() (string, error) {
 	privKeyPath := path.Join(md.ObjectPath(), fmt.Sprintf(constants.VM_SSH_KEY_TEMPLATE, md.GetUID()))
diff --git a/pkg/operations/start.go b/pkg/operations/start.go
index f9e2713e4..5d7063233 100644
--- a/pkg/operations/start.go
+++ b/pkg/operations/start.go
@@ -15,7 +15,7 @@ import (
 )
 
 func StartVM(vm *vmmd.VM, debug bool) error {
-	// Make sure the VM container does not exist. Don't care about the error
+	// Make sure the VM container does not exist. Don't care about the error.
 	RemoveVMContainer(vm.VM)
 
 	// Setup the snapshot overlay filesystem

From 61356f96a6d9beb6eb6739f0f3834553f9cc541f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lucas=20K=C3=A4ldstr=C3=B6m?= <lucas@weave.works>
Date: Tue, 9 Jul 2019 17:18:04 +0300
Subject: [PATCH 2/5] wip

---
 cmd/ignite-spawn/ignite-spawn.go |  3 ---
 pkg/metadata/vmmd/vm.go          | 11 +++++------
 pkg/operations/remove.go         |  3 +++
 pkg/operations/start.go          |  5 ++++-
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/cmd/ignite-spawn/ignite-spawn.go b/cmd/ignite-spawn/ignite-spawn.go
index 5beac4ea3..a957c4b6d 100644
--- a/cmd/ignite-spawn/ignite-spawn.go
+++ b/cmd/ignite-spawn/ignite-spawn.go
@@ -61,9 +61,6 @@ func StartVM(co *options) error {
 	// Remove the snapshot overlay post-run, which also removes the detached backing loop devices
 	defer co.vm.RemoveSnapshot()
 
-	// Remove CNI networking post-run
-	defer co.vm.RemoveCNINetworking()
-
 	// Remove the IP addresses post-run
 	defer co.vm.ClearIPAddresses()
 
diff --git a/pkg/metadata/vmmd/vm.go b/pkg/metadata/vmmd/vm.go
index daaf5d422..f36ea1f58 100644
--- a/pkg/metadata/vmmd/vm.go
+++ b/pkg/metadata/vmmd/vm.go
@@ -169,7 +169,8 @@ func (md *VM) ClearIPAddresses() {
 	md.Status.IPAddresses = nil
 }
 
-func (md *VM) RemoveCNINetworking() error {
+func (md *VM) RemoveCNINetworking(containerID string) error {
+	// TODO: Get containerID from the API object
 	// Skip all other network modes
 	if md.Spec.Network.Mode != api.NetworkModeCNI {
 		return nil
@@ -186,11 +187,9 @@ func (md *VM) RemoveCNINetworking() error {
 	if err != nil {
 		return err
 	}
-
-	// TODO: The Docker hostname is not a robust way to get this, but
-	// Ignite needs to be daemonized to have a proper implementation
-	// Also, the hostname is the shortened container ID which might not work here
-	return networkPlugin.RemoveContainerNetwork(os.Getenv("HOSTNAME"))
+	
+	fmt.Printf("Trying to remove the container with ID %q from the CNI network\n", containerID)
+	return networkPlugin.RemoveContainerNetwork(containerID)
 }
 
 // Generate a new SSH keypair for the vm
diff --git a/pkg/operations/remove.go b/pkg/operations/remove.go
index 8e23d51d0..b9a6516e9 100644
--- a/pkg/operations/remove.go
+++ b/pkg/operations/remove.go
@@ -53,6 +53,9 @@ func RemoveVMContainer(vm meta.Object) error {
 		return fmt.Errorf("failed to remove container for VM %q: %v", vm.GetUID(), err)
 	}
 
+	// TODO: Call vm.RemoveCNINetworking(containerID) here. Need to lookup the container ID
+	// first, or get it from the API object
+
 	return nil
 }
 
diff --git a/pkg/operations/start.go b/pkg/operations/start.go
index f7eab0e4f..64261afc5 100644
--- a/pkg/operations/start.go
+++ b/pkg/operations/start.go
@@ -72,7 +72,10 @@ func StartVM(vm *vmmd.VM, debug bool) error {
 		}
 		log.Printf("Networking is now handled by CNI")
 	}
-	log.Printf("Started Firecracker VM %q in a container with ID %q", vm.GetUID(), containerID)
+
+	// TODO: Patch the API object here with containerID information
+
+	log.Printf("Started Firecracker VM %q in a container with ID %q", vm.GetUID(), containerID[:10])
 	// TODO: Follow-up the container here with a defer, or dedicated goroutine. We should output
 	// if it started successfully or not
 	return nil

From 872dcb8a830fcffe4d12f8493d5abb3e59077f8f Mon Sep 17 00:00:00 2001
From: Dennis Marttinen <dennis@weave.works>
Date: Mon, 22 Jul 2019 17:19:02 +0300
Subject: [PATCH 3/5] Fix cache indexing Objects without UID returned by
 Storage.New() Also add a safety check for ensuring that cached Objects have
 their UID set.

---
 pkg/storage/cache/cache.go | 16 ++++++----------
 pkg/storage/cache/index.go |  6 ++++++
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/pkg/storage/cache/cache.go b/pkg/storage/cache/cache.go
index b9a5ec13a..bd636a579 100644
--- a/pkg/storage/cache/cache.go
+++ b/pkg/storage/cache/cache.go
@@ -37,16 +37,12 @@ func NewCache(backingStorage storage.Storage) Cache {
 	return c
 }
 
-func (c *cache) New(gvk schema.GroupVersionKind) (obj meta.Object, err error) {
-	// Request the storage to create the Object
-	obj, err = c.storage.New(gvk)
-
-	// If no errors occurred, cache it
-	if err == nil {
-		err = c.index.store(obj)
-	}
-
-	return
+func (c *cache) New(gvk schema.GroupVersionKind) (meta.Object, error) {
+	// Request the storage to create the Object. The
+	// newly generated Object has not got an UID which
+	// is required for indexing, so just return it
+	// without storing it into the cache
+	return c.storage.New(gvk)
 }
 
 func (c *cache) Get(gvk schema.GroupVersionKind, uid meta.UID) (obj meta.Object, err error) {
diff --git a/pkg/storage/cache/index.go b/pkg/storage/cache/index.go
index 1523fa6d8..a6b0b0dc8 100644
--- a/pkg/storage/cache/index.go
+++ b/pkg/storage/cache/index.go
@@ -56,6 +56,12 @@ func (i *index) loadAll() ([]meta.Object, error) {
 }
 
 func store(i *index, obj meta.Object, apiType bool) error {
+	// If store is called for an invalid Object lacking an UID,
+	// panic and print the stack trace. This should never happen.
+	if obj.GetUID() == "" {
+		panic("Attempt to cache invalid Object: missing UID")
+	}
+
 	co, err := newCacheObject(i.storage, obj, apiType)
 	if err != nil {
 		return err

From 52aa28d061bec832f23c4de235a27d00f2617d86 Mon Sep 17 00:00:00 2001
From: Dennis Marttinen <dennis@weave.works>
Date: Mon, 22 Jul 2019 17:21:07 +0300
Subject: [PATCH 4/5] Error check VM container removal, support removing CNI
 networking

---
 pkg/operations/remove.go     | 33 +++++++++++++++++++++++++--------
 pkg/operations/start.go      |  6 ++++--
 pkg/runtime/docker/client.go | 13 +++++++++++++
 pkg/runtime/types.go         |  7 +++++++
 4 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/pkg/operations/remove.go b/pkg/operations/remove.go
index 70ebc367e..1550d27ee 100644
--- a/pkg/operations/remove.go
+++ b/pkg/operations/remove.go
@@ -2,8 +2,8 @@ package operations
 
 import (
 	"fmt"
-	"log"
 
+	log "github.com/sirupsen/logrus"
 	api "github.com/weaveworks/ignite/pkg/apis/ignite"
 	meta "github.com/weaveworks/ignite/pkg/apis/meta/v1alpha1"
 	"github.com/weaveworks/ignite/pkg/client"
@@ -29,8 +29,10 @@ func RemoveVM(c *client.Client, vm *api.VM) error {
 		return err
 	}
 
-	// Force-remove the VM container. Don't care about the error.
-	_ = RemoveVMContainer(vm)
+	// Remove the VM container if it exists
+	if err := RemoveVMContainer(vm); err != nil {
+		return err
+	}
 
 	if logs.Quiet {
 		fmt.Println(vm.GetUID())
@@ -42,15 +44,19 @@ func RemoveVM(c *client.Client, vm *api.VM) error {
 }
 
 func RemoveVMContainer(vm meta.Object) error {
+	containerName := util.NewPrefixer().Prefix(vm.GetUID())
+	result, err := providers.Runtime.InspectContainer(containerName)
+	if err != nil {
+		return nil // The container doesn't exist, bail out
+	}
+
 	// Remove the VM container
-	if err := providers.Runtime.RemoveContainer(util.NewPrefixer().Prefix(vm.GetUID())); err != nil {
+	if err := providers.Runtime.RemoveContainer(result.ID); err != nil {
 		return fmt.Errorf("failed to remove container for VM %q: %v", vm.GetUID(), err)
 	}
 
-	// TODO: Call vm.RemoveCNINetworking(containerID) here. Need to lookup the container ID
-	// first, or get it from the API object
-
-	return nil
+	// Remove the CNI networking of the VM
+	return removeCNINetworking(vm.(*api.VM), result.ID)
 }
 
 // StopVM stops or kills a VM
@@ -83,3 +89,14 @@ func StopVM(vm *api.VM, kill, silent bool) error {
 
 	return nil
 }
+
+func removeCNINetworking(vm *api.VM, containerID string) error {
+	// Skip all other network modes
+	if vm.Spec.Network.Mode != api.NetworkModeCNI {
+		return nil
+	}
+
+	// Perform the removal
+	log.Printf("Trying to remove the container with ID %q from the CNI network", containerID)
+	return providers.NetworkPlugin.RemoveContainerNetwork(containerID)
+}
diff --git a/pkg/operations/start.go b/pkg/operations/start.go
index a4eb319ae..4d95af57f 100644
--- a/pkg/operations/start.go
+++ b/pkg/operations/start.go
@@ -22,8 +22,10 @@ import (
 )
 
 func StartVM(vm *api.VM, debug bool) error {
-	// Make sure the VM container does not exist. Don't care about the error.
-	_ = RemoveVMContainer(vm)
+	// Remove the VM container if it exists
+	if err := RemoveVMContainer(vm); err != nil {
+		return err
+	}
 
 	// Setup the snapshot overlay filesystem
 	if err := dmlegacy.ActivateSnapshot(vm); err != nil {
diff --git a/pkg/runtime/docker/client.go b/pkg/runtime/docker/client.go
index 87e526a24..6ba020220 100644
--- a/pkg/runtime/docker/client.go
+++ b/pkg/runtime/docker/client.go
@@ -73,6 +73,19 @@ func (dc *dockerClient) ExportImage(image string) (io.ReadCloser, string, error)
 	return rc, config.ID, err
 }
 
+func (dc *dockerClient) InspectContainer(container string) (*runtime.ContainerInspectResult, error) {
+	res, _, err := dc.client.ContainerInspectWithRaw(context.Background(), container, false)
+	if err != nil {
+		return nil, err
+	}
+
+	return &runtime.ContainerInspectResult{
+		ID:     res.ID,
+		Image:  res.Image,
+		Status: res.State.Status,
+	}, nil
+}
+
 func (dc *dockerClient) AttachContainer(container string) (err error) {
 	// TODO: Rework to perform the attach via the Docker client,
 	// this will require manual TTY and signal emulation/handling.
diff --git a/pkg/runtime/types.go b/pkg/runtime/types.go
index 5321f4fd1..02c160d38 100644
--- a/pkg/runtime/types.go
+++ b/pkg/runtime/types.go
@@ -13,6 +13,12 @@ type ImageInspectResult struct {
 	Size        int64
 }
 
+type ContainerInspectResult struct {
+	ID     string
+	Image  string
+	Status string
+}
+
 type Bind struct {
 	HostPath      string
 	ContainerPath string
@@ -36,6 +42,7 @@ type Interface interface {
 	PullImage(image string) (io.ReadCloser, error)
 	ExportImage(image string) (io.ReadCloser, string, error)
 
+	InspectContainer(container string) (*ContainerInspectResult, error)
 	AttachContainer(container string) error
 	RunContainer(image string, config *ContainerConfig, name string) (string, error)
 	StopContainer(container string, timeout *time.Duration) error

From b58702c23dd7ab1f0125b7bfedcca572397d9dab Mon Sep 17 00:00:00 2001
From: Dennis Marttinen <dennis@weave.works>
Date: Mon, 22 Jul 2019 18:05:37 +0300
Subject: [PATCH 5/5] Make the Docker client's stop, kill and remove operations
 blocking This is to ensure no race conditions occur during e.g. `rm -f`,
 where a VM started with `-d` would still be in the process of being killed
 when remove is issued, leading to a crash.

---
 pkg/runtime/docker/client.go | 54 ++++++++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 3 deletions(-)

diff --git a/pkg/runtime/docker/client.go b/pkg/runtime/docker/client.go
index 6ba020220..6bb8fc680 100644
--- a/pkg/runtime/docker/client.go
+++ b/pkg/runtime/docker/client.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types/container"
+	cont "github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/client"
 	"github.com/docker/go-connections/nat"
 	"github.com/weaveworks/ignite/pkg/runtime"
@@ -153,15 +154,40 @@ func (dc *dockerClient) RunContainer(image string, config *runtime.ContainerConf
 }
 
 func (dc *dockerClient) StopContainer(container string, timeout *time.Duration) error {
-	return dc.client.ContainerStop(context.Background(), container, timeout)
+	if err := dc.client.ContainerStop(context.Background(), container, timeout); err != nil {
+		return err
+	}
+
+	// Wait for the container to be stopped
+	return dc.waitForContainer(container, cont.WaitConditionNotRunning, nil)
 }
 
 func (dc *dockerClient) KillContainer(container, signal string) error {
-	return dc.client.ContainerKill(context.Background(), container, signal)
+	if err := dc.client.ContainerKill(context.Background(), container, signal); err != nil {
+		return err
+	}
+
+	// Wait for the container to be killed
+	return dc.waitForContainer(container, cont.WaitConditionNotRunning, nil)
 }
 
 func (dc *dockerClient) RemoveContainer(container string) error {
-	return dc.client.ContainerRemove(context.Background(), container, types.ContainerRemoveOptions{})
+	// Container waiting can fail if the container gets removed before
+	// we reach the waiting fence. Start the waiter in a goroutine before
+	// performing the removal to ensure proper removal detection.
+	errC := make(chan error)
+	readyC := make(chan bool)
+	go func() {
+		errC <- dc.waitForContainer(container, cont.WaitConditionRemoved, &readyC)
+	}()
+
+	<-readyC // The ready channel is used to wait until removal detection has started
+	if err := dc.client.ContainerRemove(context.Background(), container, types.ContainerRemoveOptions{}); err != nil {
+		return err
+	}
+
+	// Wait for the container to be removed
+	return <-errC
 }
 
 func (dc *dockerClient) ContainerLogs(container string) (io.ReadCloser, error) {
@@ -180,6 +206,28 @@ func (dc *dockerClient) ContainerNetNS(container string) (string, error) {
 	return getNetworkNamespace(&r)
 }
 
+func (dc *dockerClient) waitForContainer(container string, condition cont.WaitCondition, readyC *chan bool) error {
+	resultC, errC := dc.client.ContainerWait(context.Background(), container, condition)
+
+	// The ready channel can be used to wait until
+	// the container wait request has been sent to
+	// Docker to ensure proper ordering
+	if readyC != nil {
+		*readyC <- true
+	}
+
+	select {
+	case result := <-resultC:
+		if result.Error != nil {
+			return fmt.Errorf("failed to wait for container %q: %s", container, result.Error.Message)
+		}
+	case err := <-errC:
+		return err
+	}
+
+	return nil
+}
+
 func getNetworkNamespace(c *types.ContainerJSON) (string, error) {
 	if c.State.Pid == 0 {
 		// Docker reports pid 0 for an exited container.