From c90426596d7bd430e194435bb20a6f3c45628c91 Mon Sep 17 00:00:00 2001 From: hc-github-team-nomad-core <82989552+hc-github-team-nomad-core@users.noreply.github.com> Date: Fri, 7 Jul 2023 17:42:52 -0500 Subject: [PATCH] cni: ensure to setup CNI addresses in deterministic order (#17766) (#17826) * cni: ensure to setup CNI addresses in deterministic order Currently as commented in the code the go-cni library returns an unordered map of interfaces. In cases where there are multiple CNI interfaces being created this creates a problem with service registration and healthchecking because the first address in the map is being used. The use case we have where this is an issue is that we run CNI with the macvlan plugin to isolate workloads, but they still need to be able to access the host on a static address to be able to perform local resolving and hit host services like the Consul agent API. To make this work there are 2 options, you either add a macvlan interface on the host with an assigned address for each VLAN you have or you create an additional veth bridged interface in the container namespace. We chose the latter option through a custom CNI plugin but the ordering issue leaves us with incorrect service registration. * Updates after feedback * First check for the CNIResult interfaces length, if it's zero we don't need to proceed at all. * Use sorted interfaces list for the address fallback scenario as well. * Remove "found" log message logic, when an address isn't found an error is returned stating the allocation could not be configured as an address was missing from the CNIResult. If we still need a Warn message then we can add it to the condition that returns the error if no address could be found instead of using the "found" bool logic. Co-authored-by: Yorick Gersie <6005868+ygersie@users.noreply.github.com> --- .changelog/17766.txt | 3 ++ client/allocrunner/networking_cni.go | 48 +++++++++++++---------- client/allocrunner/networking_cni_test.go | 16 ++++++++ 3 files changed, 46 insertions(+), 21 deletions(-) create mode 100644 .changelog/17766.txt diff --git a/.changelog/17766.txt b/.changelog/17766.txt new file mode 100644 index 000000000000..7e722049b849 --- /dev/null +++ b/.changelog/17766.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cni: Ensure to setup CNI addresses in deterministic order +``` diff --git a/client/allocrunner/networking_cni.go b/client/allocrunner/networking_cni.go index 9eb581b711e1..d537d0892d89 100644 --- a/client/allocrunner/networking_cni.go +++ b/client/allocrunner/networking_cni.go @@ -127,46 +127,52 @@ func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Alloc // cniToAllocNet converts a CNIResult to an AllocNetworkStatus or returns an // error. The first interface and IP with a sandbox and address set are // preferred. Failing that the first interface with an IP is selected. -// -// Unfortunately the go-cni library returns interfaces in an unordered map so -// the results may be nondeterministic depending on CNI plugin output. func (c *cniNetworkConfigurator) cniToAllocNet(res *cni.CNIResult) (*structs.AllocNetworkStatus, error) { + if len(res.Interfaces) == 0 { + return nil, fmt.Errorf("failed to configure network: no interfaces found") + } + netStatus := new(structs.AllocNetworkStatus) + // Unfortunately the go-cni library returns interfaces in an unordered map meaning + // the results may be nondeterministic depending on CNI plugin output so make + // sure we sort them by interface name. + names := make([]string, 0, len(res.Interfaces)) + for k := range res.Interfaces { + names = append(names, k) + } + sort.Strings(names) + // Use the first sandbox interface with an IP address - if len(res.Interfaces) > 0 { - for name, iface := range res.Interfaces { - if iface == nil { - // this should never happen but this value is coming from external - // plugins so we should guard against it - delete(res.Interfaces, name) - } + for _, name := range names { + iface := res.Interfaces[name] + if iface == nil { + // this should never happen but this value is coming from external + // plugins so we should guard against it + delete(res.Interfaces, name) + continue + } - if iface.Sandbox != "" && len(iface.IPConfigs) > 0 { - netStatus.Address = iface.IPConfigs[0].IP.String() - netStatus.InterfaceName = name - break - } + if iface.Sandbox != "" && len(iface.IPConfigs) > 0 { + netStatus.Address = iface.IPConfigs[0].IP.String() + netStatus.InterfaceName = name + break } } // If no IP address was found, use the first interface with an address // found as a fallback if netStatus.Address == "" { - var found bool - for name, iface := range res.Interfaces { + for _, name := range names { + iface := res.Interfaces[name] if len(iface.IPConfigs) > 0 { ip := iface.IPConfigs[0].IP.String() c.logger.Debug("no sandbox interface with an address found CNI result, using first available", "interface", name, "ip", ip) netStatus.Address = ip netStatus.InterfaceName = name - found = true break } } - if !found { - c.logger.Warn("no address could be found from CNI result") - } } // If no IP address could be found, return an error diff --git a/client/allocrunner/networking_cni_test.go b/client/allocrunner/networking_cni_test.go index 3bc8f8599089..f8c5412b0c33 100644 --- a/client/allocrunner/networking_cni_test.go +++ b/client/allocrunner/networking_cni_test.go @@ -124,6 +124,22 @@ func TestCNI_forceCleanup(t *testing.T) { }) } +// TestCNI_cniToAllocNet_NoInterfaces asserts an error is returned if CNIResult +// contains no interfaces. +func TestCNI_cniToAllocNet_NoInterfaces(t *testing.T) { + ci.Parallel(t) + + cniResult := &cni.CNIResult{} + + // Only need a logger + c := &cniNetworkConfigurator{ + logger: testlog.HCLogger(t), + } + allocNet, err := c.cniToAllocNet(cniResult) + require.Error(t, err) + require.Nil(t, allocNet) +} + // TestCNI_cniToAllocNet_Fallback asserts if a CNI plugin result lacks an IP on // its sandbox interface, the first IP found is used. func TestCNI_cniToAllocNet_Fallback(t *testing.T) {