From fb088f99652552bd767f31ba25a586b682e8e3d6 Mon Sep 17 00:00:00 2001 From: Dennis Marttinen <dennis@weave.works> Date: Tue, 9 Jul 2019 14:03:20 +0300 Subject: [PATCH 1/5] Support removing CNI networking configuration post-run --- cmd/ignite-spawn/ignite-spawn.go | 3 +++ pkg/metadata/vmmd/vm.go | 25 +++++++++++++++++++++++++ pkg/operations/start.go | 2 +- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/cmd/ignite-spawn/ignite-spawn.go b/cmd/ignite-spawn/ignite-spawn.go index 3c79655fa..fa091fe3b 100644 --- a/cmd/ignite-spawn/ignite-spawn.go +++ b/cmd/ignite-spawn/ignite-spawn.go @@ -61,6 +61,9 @@ func StartVM(co *options) error { // Remove the snapshot overlay post-run, which also removes the detached backing loop devices defer co.vm.RemoveSnapshot() + // Remove CNI networking post-run + defer co.vm.RemoveCNINetworking() + // Remove the IP addresses post-run defer co.vm.ClearIPAddresses() diff --git a/pkg/metadata/vmmd/vm.go b/pkg/metadata/vmmd/vm.go index d6738330a..a187fcc97 100644 --- a/pkg/metadata/vmmd/vm.go +++ b/pkg/metadata/vmmd/vm.go @@ -2,6 +2,8 @@ package vmmd import ( "fmt" + "github.com/weaveworks/ignite/pkg/network/cni" + "github.com/weaveworks/ignite/pkg/runtime/docker" "io/ioutil" "math" "net" @@ -170,6 +172,29 @@ func (md *VM) ClearPortMappings() { md.Spec.Ports = nil } +func (md *VM) RemoveCNINetworking() error { + // Skip all other network modes + if md.Spec.NetworkMode != api.NetworkModeCNI { + return nil + } + + // TODO: Both the client and networkPlugin variables should be constructed once, + // and accessible throughout the program. + client, err := docker.GetDockerClient() + if err != nil { + return err + } + + networkPlugin, err := cni.GetCNINetworkPlugin(client) + if err != nil { + return err + } + + // TODO: The Docker hostname is not a robust way to get this, but + // Ignite needs to be daemonized to have a proper implementation + return networkPlugin.RemoveContainerNetwork(os.Getenv("HOSTNAME")) +} + // Generate a new SSH keypair for the vm func (md *VM) newSSHKeypair() (string, error) { privKeyPath := path.Join(md.ObjectPath(), fmt.Sprintf(constants.VM_SSH_KEY_TEMPLATE, md.GetUID())) diff --git a/pkg/operations/start.go b/pkg/operations/start.go index f9e2713e4..5d7063233 100644 --- a/pkg/operations/start.go +++ b/pkg/operations/start.go @@ -15,7 +15,7 @@ import ( ) func StartVM(vm *vmmd.VM, debug bool) error { - // Make sure the VM container does not exist. Don't care about the error + // Make sure the VM container does not exist. Don't care about the error. RemoveVMContainer(vm.VM) // Setup the snapshot overlay filesystem From 61356f96a6d9beb6eb6739f0f3834553f9cc541f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20K=C3=A4ldstr=C3=B6m?= <lucas@weave.works> Date: Tue, 9 Jul 2019 17:18:04 +0300 Subject: [PATCH 2/5] wip --- cmd/ignite-spawn/ignite-spawn.go | 3 --- pkg/metadata/vmmd/vm.go | 11 +++++------ pkg/operations/remove.go | 3 +++ pkg/operations/start.go | 5 ++++- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/cmd/ignite-spawn/ignite-spawn.go b/cmd/ignite-spawn/ignite-spawn.go index 5beac4ea3..a957c4b6d 100644 --- a/cmd/ignite-spawn/ignite-spawn.go +++ b/cmd/ignite-spawn/ignite-spawn.go @@ -61,9 +61,6 @@ func StartVM(co *options) error { // Remove the snapshot overlay post-run, which also removes the detached backing loop devices defer co.vm.RemoveSnapshot() - // Remove CNI networking post-run - defer co.vm.RemoveCNINetworking() - // Remove the IP addresses post-run defer co.vm.ClearIPAddresses() diff --git a/pkg/metadata/vmmd/vm.go b/pkg/metadata/vmmd/vm.go index daaf5d422..f36ea1f58 100644 --- a/pkg/metadata/vmmd/vm.go +++ b/pkg/metadata/vmmd/vm.go @@ -169,7 +169,8 @@ func (md *VM) ClearIPAddresses() { md.Status.IPAddresses = nil } -func (md *VM) RemoveCNINetworking() error { +func (md *VM) RemoveCNINetworking(containerID string) error { + // TODO: Get containerID from the API object // Skip all other network modes if md.Spec.Network.Mode != api.NetworkModeCNI { return nil @@ -186,11 +187,9 @@ func (md *VM) RemoveCNINetworking() error { if err != nil { return err } - - // TODO: The Docker hostname is not a robust way to get this, but - // Ignite needs to be daemonized to have a proper implementation - // Also, the hostname is the shortened container ID which might not work here - return networkPlugin.RemoveContainerNetwork(os.Getenv("HOSTNAME")) + + fmt.Printf("Trying to remove the container with ID %q from the CNI network\n", containerID) + return networkPlugin.RemoveContainerNetwork(containerID) } // Generate a new SSH keypair for the vm diff --git a/pkg/operations/remove.go b/pkg/operations/remove.go index 8e23d51d0..b9a6516e9 100644 --- a/pkg/operations/remove.go +++ b/pkg/operations/remove.go @@ -53,6 +53,9 @@ func RemoveVMContainer(vm meta.Object) error { return fmt.Errorf("failed to remove container for VM %q: %v", vm.GetUID(), err) } + // TODO: Call vm.RemoveCNINetworking(containerID) here. Need to lookup the container ID + // first, or get it from the API object + return nil } diff --git a/pkg/operations/start.go b/pkg/operations/start.go index f7eab0e4f..64261afc5 100644 --- a/pkg/operations/start.go +++ b/pkg/operations/start.go @@ -72,7 +72,10 @@ func StartVM(vm *vmmd.VM, debug bool) error { } log.Printf("Networking is now handled by CNI") } - log.Printf("Started Firecracker VM %q in a container with ID %q", vm.GetUID(), containerID) + + // TODO: Patch the API object here with containerID information + + log.Printf("Started Firecracker VM %q in a container with ID %q", vm.GetUID(), containerID[:10]) // TODO: Follow-up the container here with a defer, or dedicated goroutine. We should output // if it started successfully or not return nil From 872dcb8a830fcffe4d12f8493d5abb3e59077f8f Mon Sep 17 00:00:00 2001 From: Dennis Marttinen <dennis@weave.works> Date: Mon, 22 Jul 2019 17:19:02 +0300 Subject: [PATCH 3/5] Fix cache indexing Objects without UID returned by Storage.New() Also add a safety check for ensuring that cached Objects have their UID set. --- pkg/storage/cache/cache.go | 16 ++++++---------- pkg/storage/cache/index.go | 6 ++++++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pkg/storage/cache/cache.go b/pkg/storage/cache/cache.go index b9a5ec13a..bd636a579 100644 --- a/pkg/storage/cache/cache.go +++ b/pkg/storage/cache/cache.go @@ -37,16 +37,12 @@ func NewCache(backingStorage storage.Storage) Cache { return c } -func (c *cache) New(gvk schema.GroupVersionKind) (obj meta.Object, err error) { - // Request the storage to create the Object - obj, err = c.storage.New(gvk) - - // If no errors occurred, cache it - if err == nil { - err = c.index.store(obj) - } - - return +func (c *cache) New(gvk schema.GroupVersionKind) (meta.Object, error) { + // Request the storage to create the Object. The + // newly generated Object has not got an UID which + // is required for indexing, so just return it + // without storing it into the cache + return c.storage.New(gvk) } func (c *cache) Get(gvk schema.GroupVersionKind, uid meta.UID) (obj meta.Object, err error) { diff --git a/pkg/storage/cache/index.go b/pkg/storage/cache/index.go index 1523fa6d8..a6b0b0dc8 100644 --- a/pkg/storage/cache/index.go +++ b/pkg/storage/cache/index.go @@ -56,6 +56,12 @@ func (i *index) loadAll() ([]meta.Object, error) { } func store(i *index, obj meta.Object, apiType bool) error { + // If store is called for an invalid Object lacking an UID, + // panic and print the stack trace. This should never happen. + if obj.GetUID() == "" { + panic("Attempt to cache invalid Object: missing UID") + } + co, err := newCacheObject(i.storage, obj, apiType) if err != nil { return err From 52aa28d061bec832f23c4de235a27d00f2617d86 Mon Sep 17 00:00:00 2001 From: Dennis Marttinen <dennis@weave.works> Date: Mon, 22 Jul 2019 17:21:07 +0300 Subject: [PATCH 4/5] Error check VM container removal, support removing CNI networking --- pkg/operations/remove.go | 33 +++++++++++++++++++++++++-------- pkg/operations/start.go | 6 ++++-- pkg/runtime/docker/client.go | 13 +++++++++++++ pkg/runtime/types.go | 7 +++++++ 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/pkg/operations/remove.go b/pkg/operations/remove.go index 70ebc367e..1550d27ee 100644 --- a/pkg/operations/remove.go +++ b/pkg/operations/remove.go @@ -2,8 +2,8 @@ package operations import ( "fmt" - "log" + log "github.com/sirupsen/logrus" api "github.com/weaveworks/ignite/pkg/apis/ignite" meta "github.com/weaveworks/ignite/pkg/apis/meta/v1alpha1" "github.com/weaveworks/ignite/pkg/client" @@ -29,8 +29,10 @@ func RemoveVM(c *client.Client, vm *api.VM) error { return err } - // Force-remove the VM container. Don't care about the error. - _ = RemoveVMContainer(vm) + // Remove the VM container if it exists + if err := RemoveVMContainer(vm); err != nil { + return err + } if logs.Quiet { fmt.Println(vm.GetUID()) @@ -42,15 +44,19 @@ func RemoveVM(c *client.Client, vm *api.VM) error { } func RemoveVMContainer(vm meta.Object) error { + containerName := util.NewPrefixer().Prefix(vm.GetUID()) + result, err := providers.Runtime.InspectContainer(containerName) + if err != nil { + return nil // The container doesn't exist, bail out + } + // Remove the VM container - if err := providers.Runtime.RemoveContainer(util.NewPrefixer().Prefix(vm.GetUID())); err != nil { + if err := providers.Runtime.RemoveContainer(result.ID); err != nil { return fmt.Errorf("failed to remove container for VM %q: %v", vm.GetUID(), err) } - // TODO: Call vm.RemoveCNINetworking(containerID) here. Need to lookup the container ID - // first, or get it from the API object - - return nil + // Remove the CNI networking of the VM + return removeCNINetworking(vm.(*api.VM), result.ID) } // StopVM stops or kills a VM @@ -83,3 +89,14 @@ func StopVM(vm *api.VM, kill, silent bool) error { return nil } + +func removeCNINetworking(vm *api.VM, containerID string) error { + // Skip all other network modes + if vm.Spec.Network.Mode != api.NetworkModeCNI { + return nil + } + + // Perform the removal + log.Printf("Trying to remove the container with ID %q from the CNI network", containerID) + return providers.NetworkPlugin.RemoveContainerNetwork(containerID) +} diff --git a/pkg/operations/start.go b/pkg/operations/start.go index a4eb319ae..4d95af57f 100644 --- a/pkg/operations/start.go +++ b/pkg/operations/start.go @@ -22,8 +22,10 @@ import ( ) func StartVM(vm *api.VM, debug bool) error { - // Make sure the VM container does not exist. Don't care about the error. - _ = RemoveVMContainer(vm) + // Remove the VM container if it exists + if err := RemoveVMContainer(vm); err != nil { + return err + } // Setup the snapshot overlay filesystem if err := dmlegacy.ActivateSnapshot(vm); err != nil { diff --git a/pkg/runtime/docker/client.go b/pkg/runtime/docker/client.go index 87e526a24..6ba020220 100644 --- a/pkg/runtime/docker/client.go +++ b/pkg/runtime/docker/client.go @@ -73,6 +73,19 @@ func (dc *dockerClient) ExportImage(image string) (io.ReadCloser, string, error) return rc, config.ID, err } +func (dc *dockerClient) InspectContainer(container string) (*runtime.ContainerInspectResult, error) { + res, _, err := dc.client.ContainerInspectWithRaw(context.Background(), container, false) + if err != nil { + return nil, err + } + + return &runtime.ContainerInspectResult{ + ID: res.ID, + Image: res.Image, + Status: res.State.Status, + }, nil +} + func (dc *dockerClient) AttachContainer(container string) (err error) { // TODO: Rework to perform the attach via the Docker client, // this will require manual TTY and signal emulation/handling. diff --git a/pkg/runtime/types.go b/pkg/runtime/types.go index 5321f4fd1..02c160d38 100644 --- a/pkg/runtime/types.go +++ b/pkg/runtime/types.go @@ -13,6 +13,12 @@ type ImageInspectResult struct { Size int64 } +type ContainerInspectResult struct { + ID string + Image string + Status string +} + type Bind struct { HostPath string ContainerPath string @@ -36,6 +42,7 @@ type Interface interface { PullImage(image string) (io.ReadCloser, error) ExportImage(image string) (io.ReadCloser, string, error) + InspectContainer(container string) (*ContainerInspectResult, error) AttachContainer(container string) error RunContainer(image string, config *ContainerConfig, name string) (string, error) StopContainer(container string, timeout *time.Duration) error From b58702c23dd7ab1f0125b7bfedcca572397d9dab Mon Sep 17 00:00:00 2001 From: Dennis Marttinen <dennis@weave.works> Date: Mon, 22 Jul 2019 18:05:37 +0300 Subject: [PATCH 5/5] Make the Docker client's stop, kill and remove operations blocking This is to ensure no race conditions occur during e.g. `rm -f`, where a VM started with `-d` would still be in the process of being killed when remove is issued, leading to a crash. --- pkg/runtime/docker/client.go | 54 ++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/pkg/runtime/docker/client.go b/pkg/runtime/docker/client.go index 6ba020220..6bb8fc680 100644 --- a/pkg/runtime/docker/client.go +++ b/pkg/runtime/docker/client.go @@ -8,6 +8,7 @@ import ( "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" + cont "github.com/docker/docker/api/types/container" "github.com/docker/docker/client" "github.com/docker/go-connections/nat" "github.com/weaveworks/ignite/pkg/runtime" @@ -153,15 +154,40 @@ func (dc *dockerClient) RunContainer(image string, config *runtime.ContainerConf } func (dc *dockerClient) StopContainer(container string, timeout *time.Duration) error { - return dc.client.ContainerStop(context.Background(), container, timeout) + if err := dc.client.ContainerStop(context.Background(), container, timeout); err != nil { + return err + } + + // Wait for the container to be stopped + return dc.waitForContainer(container, cont.WaitConditionNotRunning, nil) } func (dc *dockerClient) KillContainer(container, signal string) error { - return dc.client.ContainerKill(context.Background(), container, signal) + if err := dc.client.ContainerKill(context.Background(), container, signal); err != nil { + return err + } + + // Wait for the container to be killed + return dc.waitForContainer(container, cont.WaitConditionNotRunning, nil) } func (dc *dockerClient) RemoveContainer(container string) error { - return dc.client.ContainerRemove(context.Background(), container, types.ContainerRemoveOptions{}) + // Container waiting can fail if the container gets removed before + // we reach the waiting fence. Start the waiter in a goroutine before + // performing the removal to ensure proper removal detection. + errC := make(chan error) + readyC := make(chan bool) + go func() { + errC <- dc.waitForContainer(container, cont.WaitConditionRemoved, &readyC) + }() + + <-readyC // The ready channel is used to wait until removal detection has started + if err := dc.client.ContainerRemove(context.Background(), container, types.ContainerRemoveOptions{}); err != nil { + return err + } + + // Wait for the container to be removed + return <-errC } func (dc *dockerClient) ContainerLogs(container string) (io.ReadCloser, error) { @@ -180,6 +206,28 @@ func (dc *dockerClient) ContainerNetNS(container string) (string, error) { return getNetworkNamespace(&r) } +func (dc *dockerClient) waitForContainer(container string, condition cont.WaitCondition, readyC *chan bool) error { + resultC, errC := dc.client.ContainerWait(context.Background(), container, condition) + + // The ready channel can be used to wait until + // the container wait request has been sent to + // Docker to ensure proper ordering + if readyC != nil { + *readyC <- true + } + + select { + case result := <-resultC: + if result.Error != nil { + return fmt.Errorf("failed to wait for container %q: %s", container, result.Error.Message) + } + case err := <-errC: + return err + } + + return nil +} + func getNetworkNamespace(c *types.ContainerJSON) (string, error) { if c.State.Pid == 0 { // Docker reports pid 0 for an exited container.