From ddcc9f18b05bf709bab3205af9e13c6d6874eca8 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Wed, 23 Nov 2022 10:06:37 -0600 Subject: [PATCH] client: manually cleanup leaked iptables rules This PR adds a secondary path for cleaning up iptables created for an allocation when the normal CNI library fails to do so. This typically happens when the state of the pause container is unexpected - e.g. deleted out of band from Nomad. Before, the iptables rules would be leaked which could lead to unexpected nat routing behavior later on (in addition to leaked resources). With this change, we scan for the rules created on behalf of the allocation being GC'd and delete them. Fixes #6385 --- client/allocrunner/networking_cni.go | 94 ++++++++++++++++++++++- client/allocrunner/networking_cni_test.go | 18 ++++- 2 files changed, 109 insertions(+), 3 deletions(-) diff --git a/client/allocrunner/networking_cni.go b/client/allocrunner/networking_cni.go index 02881b87436e..745567c0c786 100644 --- a/client/allocrunner/networking_cni.go +++ b/client/allocrunner/networking_cni.go @@ -12,12 +12,14 @@ import ( "math/rand" "os" "path/filepath" + "regexp" "sort" "strings" "time" cni "github.com/containerd/go-cni" cnilibrary "github.com/containernetworking/cni/libcni" + "github.com/coreos/go-iptables/iptables" log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" @@ -226,7 +228,95 @@ func (c *cniNetworkConfigurator) Teardown(ctx context.Context, alloc *structs.Al return err } - return c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))) + if err := c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil { + // most likely the pause container was removed from underneath nomad + return c.forceCleanup(alloc.ID) + } + + return nil +} + +var ( + // ipRuleRe is used to parse a postrouting iptables rule created by nomad, e.g. + // -A POSTROUTING -s 172.26.64.191/32 -m comment --comment "name: \"nomad\" id: \"6b235529-8111-4bbe-520b-d639b1d2a94e\"" -j CNI-50e58ea77dc52e0c731e3799 + ipRuleRe = regexp.MustCompile(`-A POSTROUTING -s (\S+) -m comment --comment "name: \\"nomad\\" id: \\"([[:xdigit:]-]+)\\"" -j (CNI-[[:xdigit:]]+)`) +) + +// forceCleanup is the backup plan for removing the iptables rule and chain associated with +// an allocation that was using bridge networking. The cni library refuses to handle a +// dirty state - e.g. the pause container is removed out of band, and so we must cleanup +// iptables ourselves to avoid leaking rules. +func (c *cniNetworkConfigurator) forceCleanup(allocID string) error { + const ( + natTable = "nat" + postRoutingChain = "POSTROUTING" + commentFmt = `--comment "name: \"nomad\" id: \"%s\""` + ) + + // create a handle to iptables + manipulator, err := iptables.New() + if err != nil { + return fmt.Errorf("failed to detect iptables: %w", err) + } + + // list the rules on the POSTROUTING chain of the nat table + rules, err := manipulator.List(natTable, postRoutingChain) + if err != nil { + return fmt.Errorf("failed to list iptables rules: %w", err) + } + + // find the POSTROUTING rule associated with our allocation + matcher := fmt.Sprintf(commentFmt, allocID) + var ruleToPurge string + for _, rule := range rules { + if strings.Contains(rule, matcher) { + ruleToPurge = rule + break + } + } + + // no rule found for our allocation, just give up + if ruleToPurge == "" { + return fmt.Errorf("failed to find postrouting rule for alloc %s", allocID) + } + + // re-create the rule we need to delete, as tokens + subs := ipRuleRe.FindStringSubmatch(ruleToPurge) + if len(subs) != 4 { + return fmt.Errorf("failed to parse postrouting rule for alloc %s", allocID) + } + cidr := subs[1] + id := subs[2] + chainID := subs[3] + toDel := []string{ + `-s`, + cidr, + `-m`, + `comment`, + `--comment`, + `name: "nomad" id: "` + id + `"`, + `-j`, + chainID, + } + + // remove the jump rule + ok := true + if err = manipulator.Delete(natTable, postRoutingChain, toDel...); err != nil { + c.logger.Warn("failed to remove iptables nat.POSTROUTING rule", "alloc_id", allocID, "chain", chainID, "error", err) + ok = false + } + + // remote the associated chain + if err = manipulator.ClearAndDeleteChain(natTable, chainID); err != nil { + c.logger.Warn("failed to remove iptables nat chain", "chain", chainID, "error", err) + ok = false + } + + if !ok { + return fmt.Errorf("failed to cleanup iptables rules for alloc %s", allocID) + } + + return nil } func (c *cniNetworkConfigurator) ensureCNIInitialized() error { @@ -240,7 +330,7 @@ func (c *cniNetworkConfigurator) ensureCNIInitialized() error { // getPortMapping builds a list of portMapping structs that are used as the // portmapping capability arguments for the portmap CNI plugin func getPortMapping(alloc *structs.Allocation, ignoreHostIP bool) []cni.PortMapping { - ports := []cni.PortMapping{} + var ports []cni.PortMapping if len(alloc.AllocatedResources.Shared.Ports) == 0 && len(alloc.AllocatedResources.Shared.Networks) > 0 { for _, network := range alloc.AllocatedResources.Shared.Networks { diff --git a/client/allocrunner/networking_cni_test.go b/client/allocrunner/networking_cni_test.go index bc759272f507..fdcf4b56a435 100644 --- a/client/allocrunner/networking_cni_test.go +++ b/client/allocrunner/networking_cni_test.go @@ -1,19 +1,35 @@ //go:build linux -// +build linux package allocrunner import ( + "fmt" "net" "testing" cni "github.com/containerd/go-cni" + "github.com/coreos/go-iptables/iptables" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/testlog" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +func TestCNI_DeleteRule(t *testing.T) { + // rule := "" + + manipulator, err := iptables.New() + must.NoError(t, err) + + rules, err := manipulator.List("nat", "POSTROUTING") + must.NoError(t, err) + + for i, s := range rules { + fmt.Printf("rule[%d]: %s\n", i, s) + } +} + // TestCNI_cniToAllocNet_Fallback asserts if a CNI plugin result lacks an IP on // its sandbox interface, the first IP found is used. func TestCNI_cniToAllocNet_Fallback(t *testing.T) {