From fa0afdbdd9782543ab154375f096f7e65f69332c Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Mon, 28 Nov 2022 11:32:16 -0600 Subject: [PATCH] client: manually cleanup leaked iptables rules (#15407) This PR adds a secondary path for cleaning up iptables created for an allocation when the normal CNI library fails to do so. This typically happens when the state of the pause container is unexpected - e.g. deleted out of band from Nomad. Before, the iptables rules would be leaked which could lead to unexpected nat routing behavior later on (in addition to leaked resources). With this change, we scan for the rules created on behalf of the allocation being GC'd and delete them. Fixes #6385 --- .changelog/15407.txt | 3 + client/allocrunner/networking_cni.go | 100 ++++++++++++++++++- client/allocrunner/networking_cni_test.go | 114 +++++++++++++++++++++- 3 files changed, 213 insertions(+), 4 deletions(-) create mode 100644 .changelog/15407.txt diff --git a/.changelog/15407.txt b/.changelog/15407.txt new file mode 100644 index 000000000000..65e776a46cdc --- /dev/null +++ b/.changelog/15407.txt @@ -0,0 +1,3 @@ +```release-note:improvement +client: detect and cleanup leaked iptables rules +``` diff --git a/client/allocrunner/networking_cni.go b/client/allocrunner/networking_cni.go index 02881b87436e..9eb581b711e1 100644 --- a/client/allocrunner/networking_cni.go +++ b/client/allocrunner/networking_cni.go @@ -12,12 +12,14 @@ import ( "math/rand" "os" "path/filepath" + "regexp" "sort" "strings" "time" cni "github.com/containerd/go-cni" cnilibrary "github.com/containernetworking/cni/libcni" + "github.com/coreos/go-iptables/iptables" log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" @@ -226,7 +228,101 @@ func (c *cniNetworkConfigurator) Teardown(ctx context.Context, alloc *structs.Al return err } - return c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))) + if err := c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil { + // create a real handle to iptables + ipt, iptErr := iptables.New() + if iptErr != nil { + return fmt.Errorf("failed to detect iptables: %w", iptErr) + } + // most likely the pause container was removed from underneath nomad + return c.forceCleanup(ipt, alloc.ID) + } + + return nil +} + +// IPTables is a subset of iptables.IPTables +type IPTables interface { + List(table, chain string) ([]string, error) + Delete(table, chain string, rule ...string) error + ClearAndDeleteChain(table, chain string) error +} + +var ( + // ipRuleRe is used to parse a postrouting iptables rule created by nomad, e.g. + // -A POSTROUTING -s 172.26.64.191/32 -m comment --comment "name: \"nomad\" id: \"6b235529-8111-4bbe-520b-d639b1d2a94e\"" -j CNI-50e58ea77dc52e0c731e3799 + ipRuleRe = regexp.MustCompile(`-A POSTROUTING -s (\S+) -m comment --comment "name: \\"nomad\\" id: \\"([[:xdigit:]-]+)\\"" -j (CNI-[[:xdigit:]]+)`) +) + +// forceCleanup is the backup plan for removing the iptables rule and chain associated with +// an allocation that was using bridge networking. The cni library refuses to handle a +// dirty state - e.g. the pause container is removed out of band, and so we must cleanup +// iptables ourselves to avoid leaking rules. +func (c *cniNetworkConfigurator) forceCleanup(ipt IPTables, allocID string) error { + const ( + natTable = "nat" + postRoutingChain = "POSTROUTING" + commentFmt = `--comment "name: \"nomad\" id: \"%s\""` + ) + + // list the rules on the POSTROUTING chain of the nat table + rules, err := ipt.List(natTable, postRoutingChain) + if err != nil { + return fmt.Errorf("failed to list iptables rules: %w", err) + } + + // find the POSTROUTING rule associated with our allocation + matcher := fmt.Sprintf(commentFmt, allocID) + var ruleToPurge string + for _, rule := range rules { + if strings.Contains(rule, matcher) { + ruleToPurge = rule + break + } + } + + // no rule found for our allocation, just give up + if ruleToPurge == "" { + return fmt.Errorf("failed to find postrouting rule for alloc %s", allocID) + } + + // re-create the rule we need to delete, as tokens + subs := ipRuleRe.FindStringSubmatch(ruleToPurge) + if len(subs) != 4 { + return fmt.Errorf("failed to parse postrouting rule for alloc %s", allocID) + } + cidr := subs[1] + id := subs[2] + chainID := subs[3] + toDel := []string{ + `-s`, + cidr, + `-m`, + `comment`, + `--comment`, + `name: "nomad" id: "` + id + `"`, + `-j`, + chainID, + } + + // remove the jump rule + ok := true + if err = ipt.Delete(natTable, postRoutingChain, toDel...); err != nil { + c.logger.Warn("failed to remove iptables nat.POSTROUTING rule", "alloc_id", allocID, "chain", chainID, "error", err) + ok = false + } + + // remote the associated chain + if err = ipt.ClearAndDeleteChain(natTable, chainID); err != nil { + c.logger.Warn("failed to remove iptables nat chain", "chain", chainID, "error", err) + ok = false + } + + if !ok { + return fmt.Errorf("failed to cleanup iptables rules for alloc %s", allocID) + } + + return nil } func (c *cniNetworkConfigurator) ensureCNIInitialized() error { @@ -240,7 +336,7 @@ func (c *cniNetworkConfigurator) ensureCNIInitialized() error { // getPortMapping builds a list of portMapping structs that are used as the // portmapping capability arguments for the portmap CNI plugin func getPortMapping(alloc *structs.Allocation, ignoreHostIP bool) []cni.PortMapping { - ports := []cni.PortMapping{} + var ports []cni.PortMapping if len(alloc.AllocatedResources.Shared.Ports) == 0 && len(alloc.AllocatedResources.Shared.Networks) > 0 { for _, network := range alloc.AllocatedResources.Shared.Networks { diff --git a/client/allocrunner/networking_cni_test.go b/client/allocrunner/networking_cni_test.go index bc759272f507..3bc8f8599089 100644 --- a/client/allocrunner/networking_cni_test.go +++ b/client/allocrunner/networking_cni_test.go @@ -1,19 +1,129 @@ //go:build linux -// +build linux package allocrunner import ( + "errors" "net" "testing" - cni "github.com/containerd/go-cni" + "github.com/containerd/go-cni" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/testlog" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +type mockIPTables struct { + listCall [2]string + listRules []string + listErr error + + deleteCall [2]string + deleteErr error + + clearCall [2]string + clearErr error +} + +func (ipt *mockIPTables) List(table, chain string) ([]string, error) { + ipt.listCall[0], ipt.listCall[1] = table, chain + return ipt.listRules, ipt.listErr +} + +func (ipt *mockIPTables) Delete(table, chain string, rule ...string) error { + ipt.deleteCall[0], ipt.deleteCall[1] = table, chain + return ipt.deleteErr +} + +func (ipt *mockIPTables) ClearAndDeleteChain(table, chain string) error { + ipt.clearCall[0], ipt.clearCall[1] = table, chain + return ipt.clearErr +} + +func (ipt *mockIPTables) assert(t *testing.T, jumpChain string) { + // List assertions + must.Eq(t, "nat", ipt.listCall[0]) + must.Eq(t, "POSTROUTING", ipt.listCall[1]) + + // Delete assertions + must.Eq(t, "nat", ipt.deleteCall[0]) + must.Eq(t, "POSTROUTING", ipt.deleteCall[1]) + + // Clear assertions + must.Eq(t, "nat", ipt.clearCall[0]) + must.Eq(t, jumpChain, ipt.clearCall[1]) +} + +func TestCNI_forceCleanup(t *testing.T) { + t.Run("ok", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + listRules: []string{ + `-A POSTROUTING -m comment --comment "CNI portfwd requiring masquerade" -j CNI-HOSTPORT-MASQ`, + `-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE`, + `-A POSTROUTING -s 172.26.64.216/32 -m comment --comment "name: \"nomad\" id: \"79e8bf2e-a9c8-70ac-8d4e-fa5c4da99fbf\"" -j CNI-f2338c31d4de44472fe99c43`, + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`, + `-A POSTROUTING -s 172.26.64.218/32 -m comment --comment "name: \"nomad\" id: \"5ff6deb7-9bc1-1491-f20c-e87b15de501d\"" -j CNI-2fe7686eac2fe43714a7b850`, + `-A POSTROUTING -m mark --mark 0x2000/0x2000 -j MASQUERADE`, + `-A POSTROUTING -m comment --comment "CNI portfwd masquerade mark" -j MARK --set-xmark 0x2000/0x2000`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.NoError(t, err) + ipt.assert(t, "CNI-5d36f286cfbb35c5776509ec") + }) + + t.Run("missing allocation", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + listRules: []string{ + `-A POSTROUTING -m comment --comment "CNI portfwd requiring masquerade" -j CNI-HOSTPORT-MASQ`, + `-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE`, + `-A POSTROUTING -s 172.26.64.216/32 -m comment --comment "name: \"nomad\" id: \"79e8bf2e-a9c8-70ac-8d4e-fa5c4da99fbf\"" -j CNI-f2338c31d4de44472fe99c43`, + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"262d57a7-8f85-f3a4-9c3b-120c00ccbff1\"" -j CNI-5d36f286cfbb35c5776509ec`, + `-A POSTROUTING -s 172.26.64.218/32 -m comment --comment "name: \"nomad\" id: \"5ff6deb7-9bc1-1491-f20c-e87b15de501d\"" -j CNI-2fe7686eac2fe43714a7b850`, + `-A POSTROUTING -m mark --mark 0x2000/0x2000 -j MASQUERADE`, + `-A POSTROUTING -m comment --comment "CNI portfwd masquerade mark" -j MARK --set-xmark 0x2000/0x2000`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to find postrouting rule for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964") + }) + + t.Run("list error", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{listErr: errors.New("list error")} + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to list iptables rules: list error") + }) + + t.Run("delete error", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + deleteErr: errors.New("delete error"), + listRules: []string{ + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to cleanup iptables rules for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964") + }) + + t.Run("clear error", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + clearErr: errors.New("clear error"), + listRules: []string{ + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to cleanup iptables rules for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964") + }) +} + // TestCNI_cniToAllocNet_Fallback asserts if a CNI plugin result lacks an IP on // its sandbox interface, the first IP found is used. func TestCNI_cniToAllocNet_Fallback(t *testing.T) {