Skip to content

Commit

Permalink
Add host-local IPAM GC on startup
Browse files Browse the repository at this point in the history
During CNIServer reconciliation, we perform host-local IPAM garbage
collection (GC) by comparing the set of IPs allocated to local Pods and
the set of IPs currently reserved by the plugin. We release any IP
reserved by the plugin that is not in-use by a local Pod. The purpose is
to avoid leaking IP addresses when there is a bug in the container
runtime, which has happened in the past.

Two key design choices that were made:
* We do not invoke CNI DEL to release IPs, instead we access the
  host-local data which is persisted on the Node, and modify it as
  needed.
* We do not rely on the interface store (as persisted to OVSDB) to
  determine the set of IPs that may have been leaked. In case of an
  Antrea bug, it could be possible (although unlikely) for an IP to
  still be allocated by host-local but be missing from the interface
  store. Intead, we list all allocated IPs from the host-local data (an
  allocated IP corresponds to one disk file).

This approach is essentially the same as our existing script:
https://github.com/antrea-io/antrea/blob/main/hack/gc-host-local.sh

Fixes #4326

Signed-off-by: Antonin Bas <abas@vmware.com>
  • Loading branch information
antoninbas committed Nov 3, 2023
1 parent ab94f29 commit 29e53b5
Show file tree
Hide file tree
Showing 7 changed files with 203 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ updates:
- dependency-name: "antrea.io/ofnet"
- dependency-name: "antrea.io/libOpenflow"
- dependency-name: "github.com/ClickHouse/clickhouse-go/v2" # auto-upgrade involves dependency conflicts
- dependency-name: "github.com/alexflint/go-filemutex"
update-types: ["version-update:semver-major"] # ignore major updates only
- package-ecosystem: "github-actions"
# Workflow files stored in the default location of `.github/workflows`
directory: "/"
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/Microsoft/go-winio v0.6.1
github.com/Microsoft/hcsshim v0.9.10
github.com/TomCodeLV/OVSDB-golang-lib v0.0.0-20200116135253-9bbdfadcd881
github.com/alexflint/go-filemutex v1.2.0
github.com/aws/aws-sdk-go-v2 v1.16.10
github.com/aws/aws-sdk-go-v2/config v1.16.0
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.23
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRF
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/alessio/shellescape v1.2.2/go.mod h1:PZAiSCk0LJaZkiCSkPv8qIobYglO3FPpyFjDCtHLS30=
github.com/alexflint/go-filemutex v0.0.0-20171022225611-72bdc8eae2ae/go.mod h1:CgnQgUtFrFz9mxFNtED3jI5tLDjKlOM+oUF/sTk6ps0=
github.com/alexflint/go-filemutex v1.2.0 h1:1v0TJPDtlhgpW4nJ+GvxCLSlUDC3+gW0CQQvlmfDR/s=
github.com/alexflint/go-filemutex v1.2.0/go.mod h1:mYyQSWvw9Tx2/H2n9qXPb52tTYfE0pZAWcBq5mK025c=
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
Expand Down
103 changes: 103 additions & 0 deletions pkg/agent/cniserver/ipam/hostlocal/gc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright 2023 Antrea Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package hostlocal

import (
"fmt"
"net"
"os"
"path/filepath"
"runtime"
"strings"

"github.com/spf13/afero"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
)

const dataDir = "/var/lib/cni/networks"

// This is a hacky approach as we access the internals of the host-local plugin,
// instead of using the CNI interface. However, crafting a CNI DEL request from
// scratch would also be hacky.
func GarbageCollectContainerIPs(network string, desiredIPs sets.Set[string]) error {
dir := filepath.Join(dataDir, network)

lk, err := NewFileLock(dataDir)
if err != nil {
return err
}
defer lk.Close()
lk.Lock()
defer lk.Unlock()

fs := afero.NewOsFs()
return gcContainerIPs(fs, dir, desiredIPs)
}

// Internal version of GarbageCollectContainerIPs which does not acquire the
// file lock and can work with an arbitrary afero filesystem.
func gcContainerIPs(fs afero.Fs, dir string, desiredIPs sets.Set[string]) error {
paths := make([]string, 0)

if err := afero.Walk(fs, dir, func(path string, info os.FileInfo, err error) error {
if err != nil || info.IsDir() {
return nil
}
paths = append(paths, path)
return nil
}); err != nil {
return fmt.Errorf("error when gathering IP filenames in the host-local data directory: %w", err)
}

allocatedIPs := sets.New[string]()
for _, p := range paths {
ip := getIPFromPath(p)
if net.ParseIP(ip) == nil {
// not a valid IP, nothing to do
continue
}
allocatedIPs.Insert(ip)
if desiredIPs.Has(ip) {
// IP is in-use
continue
}
if err := os.Remove(p); err != nil {
klog.ErrorS(err, "Failed to release unused IP from host-local IPAM plugin", "IP", ip)
}
allocatedIPs.Delete(ip)
klog.InfoS("Unused IP was successfully released from host-local IPAM plugin", "IP", ip)
}

if allocatedIPs.Difference(desiredIPs).Len() > 0 {
return fmt.Errorf("not all unused IPs could be released from host-local IPAM plugin, some IPs may be leaked")
}

// Note that it is perfectly possible for some IPs to be in desiredIPs but not in
// allocatedIPs. This can be the case when another IPAM plugin (e.g., AntreaIPAM) is also
// used.

return nil
}

func getIPFromPath(path string) string {
fname := filepath.Base(path)
// need to unespace IPv6 addresses on Windows
// see https://github.com/containernetworking/plugins/blob/38f18d26ecfef550b8bac02656cc11103fd7cff1/plugins/ipam/host-local/backend/disk/backend.go#L197
if runtime.GOOS == "windows" {
fname = strings.ReplaceAll(fname, "_", ":")
}
return fname
}
62 changes: 62 additions & 0 deletions pkg/agent/cniserver/ipam/hostlocal/lock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2023 Antrea Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package hostlocal

import (
"os"
"path"

"github.com/alexflint/go-filemutex"
)

// This code was copied from https://github.com/containernetworking/plugins/blob/v1.3.0/plugins/ipam/host-local/backend/disk/lock.go

// FileLock wraps os.File to be used as a lock using flock
type FileLock struct {
f *filemutex.FileMutex
}

// NewFileLock opens file/dir at path and returns unlocked FileLock object
func NewFileLock(lockPath string) (*FileLock, error) {
fi, err := os.Stat(lockPath)
if err != nil {
return nil, err
}

if fi.IsDir() {
lockPath = path.Join(lockPath, "lock")
}

f, err := filemutex.New(lockPath)
if err != nil {
return nil, err
}

return &FileLock{f}, nil
}

func (l *FileLock) Close() error {
return l.f.Close()
}

// Lock acquires an exclusive lock
func (l *FileLock) Lock() error {
return l.f.Lock()
}

// Unlock releases the lock
func (l *FileLock) Unlock() error {
return l.f.Unlock()
}
14 changes: 14 additions & 0 deletions pkg/agent/cniserver/ipam/ipam_delegator.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ import (
"github.com/containernetworking/cni/pkg/invoke"
"github.com/containernetworking/cni/pkg/types"
current "github.com/containernetworking/cni/pkg/types/100"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"

"antrea.io/antrea/pkg/agent/cniserver/ipam/hostlocal"
argtypes "antrea.io/antrea/pkg/agent/cniserver/types"
)

Expand Down Expand Up @@ -86,6 +88,18 @@ func (d *IPAMDelegator) Check(args *invoke.Args, k8sArgs *argtypes.K8sArgs, netw
return true, nil
}

// GarbageCollectContainerIPs will IPs allocated by the delegated IPAM plugin
// that are no longer in-use (if there is any). It should be called on an agent
// restart to provide garbage collection for IPs, and to avoid IP leakage in
// case of missed CNI DEL events. Normally, it is not Antrea's responsibility to
// implement this, as the above layers should ensure that there is always one
// successful CNI DEL for every corresponding CNI ADD. However, we include this
// support to increase robustness in case of a container runtime bug.
// Only the host-local plugin is supported.
func GarbageCollectContainerIPs(network string, desiredIPs sets.Set[string]) error {
return hostlocal.GarbageCollectContainerIPs(network, desiredIPs)
}

var defaultExec invoke.Exec = &invoke.DefaultExec{
RawExec: &invoke.RawExec{Stderr: os.Stderr},
}
Expand Down
24 changes: 19 additions & 5 deletions pkg/agent/cniserver/pod_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,8 @@ func (pc *podConfigurator) reconcile(pods []corev1.Pod, containerAccess *contain
// desiredPods is the set of Pods that should be present, based on the
// current list of Pods got from the Kubernetes API.
desiredPods := sets.New[string]()
// desiredPodIPs is the set of IPs allocated to desiredPods.
desiredPodIPs := sets.New[string]()
// knownInterfaces is the list of interfaces currently in the local cache.
knownInterfaces := pc.ifaceStore.GetInterfacesByType(interfacestore.ContainerInterface)

Expand All @@ -430,11 +432,16 @@ func (pc *podConfigurator) reconcile(pods []corev1.Pod, containerAccess *contain
continue
}
desiredPods.Insert(k8s.NamespacedName(pod.Namespace, pod.Name))
for _, podIP := range pod.Status.PodIPs {
desiredPodIPs.Insert(podIP.IP)
}
}

missingIfConfigs := make([]*interfacestore.InterfaceConfig, 0)
for _, containerConfig := range knownInterfaces {
namespacedName := k8s.NamespacedName(containerConfig.PodNamespace, containerConfig.PodName)
namespace := containerConfig.PodNamespace
name := containerConfig.PodName
namespacedName := k8s.NamespacedName(namespace, name)
if desiredPods.Has(namespacedName) {
// Find the OVS ports which are not connected to host interfaces. This is useful on Windows if the runtime is
// containerd, because the host interface is created async from the OVS port.
Expand All @@ -446,7 +453,7 @@ func (pc *podConfigurator) reconcile(pods []corev1.Pod, containerAccess *contain
// We rely on the interface cache / store - which is initialized from the persistent
// OVSDB - to map the Pod to its interface configuration. The interface
// configuration includes the parameters we need to replay the flows.
klog.V(4).Infof("Syncing interface %s for Pod %s", containerConfig.InterfaceName, namespacedName)
klog.V(4).InfoS("Syncing Pod interface", "Pod", klog.KRef(namespace, name), "iface", containerConfig.InterfaceName)
if err := pc.ofClient.InstallPodFlows(
containerConfig.InterfaceName,
containerConfig.IPs,
Expand All @@ -455,20 +462,27 @@ func (pc *podConfigurator) reconcile(pods []corev1.Pod, containerAccess *contain
containerConfig.VLANID,
nil,
); err != nil {
klog.Errorf("Error when re-installing flows for Pod %s", namespacedName)
klog.ErrorS(err, "Error when re-installing flows for Pod", "Pod", klog.KRef(namespace, name))
}
} else {
// clean-up and delete interface
klog.V(4).Infof("Deleting interface %s", containerConfig.InterfaceName)
klog.V(4).InfoS("Deleting interface", "Pod", klog.KRef(namespace, name), "iface", containerConfig.InterfaceName)
if err := pc.removeInterfaces(containerConfig.ContainerID); err != nil {
klog.Errorf("Failed to delete interface %s: %v", containerConfig.InterfaceName, err)
klog.ErrorS(err, "Failed to delete interface", "Pod", klog.KRef(namespace, name), "iface", containerConfig.InterfaceName)
}
// interface should no longer be in store after the call to removeInterfaces
}
}
if len(missingIfConfigs) > 0 {
pc.reconcileMissingPods(missingIfConfigs, containerAccess)
}

// clean-up IPs that may still be allocated
klog.V(4).InfoS("Running IPAM garbage collection for unused Pod IPs")
if err := ipam.GarbageCollectContainerIPs(antreaCNIType, desiredPodIPs); err != nil {
klog.ErrorS(err, "Error when garbage collecting previously-allocated IPs")
}

return nil
}

Expand Down

0 comments on commit 29e53b5

Please sign in to comment.