From e623414959de067bed533a630b3f061a37485ba2 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 6 Dec 2024 17:45:25 +1100 Subject: [PATCH 1/3] [1.2] cgroups: ebpf: use link.Anchor to check for BPF_F_REPLACE support (This is a cherry-pick of dea0e04dd93d3922083e68667d20aac532d31129.) In v0.13.0, cilium/ebpf stopped supporting setting BPF_F_REPLACE as an explicit flag and instead requires us to use link.Anchor to specify where the program should be attached. Commit 216175a9ca84 ("Upgrade Cilium's eBPF library version to 0.16") did update this correctly for the actual attaching logic, but when checking for kernel support we still passed BPF_F_REPLACE. This would result in a generic error being returned, which our feature-support checking logic would treat as being an error the indicates that BPF_F_REPLACE *is* supported, resulting in a regression on pre-5.6 kernels. It turns out that our debug logging saying that this unexpected error was happening was being output as a result of this change, but nobody noticed... Fixes: 216175a9ca84 ("Upgrade Cilium's eBPF library version to 0.16") Signed-off-by: Aleksa Sarai --- libcontainer/cgroups/devices/ebpf_linux.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libcontainer/cgroups/devices/ebpf_linux.go b/libcontainer/cgroups/devices/ebpf_linux.go index 1d3999e664d..0e808e29b32 100644 --- a/libcontainer/cgroups/devices/ebpf_linux.go +++ b/libcontainer/cgroups/devices/ebpf_linux.go @@ -123,12 +123,15 @@ func haveBpfProgReplace() bool { // BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL // we know that the feature isn't present. err = link.RawAttachProgram(link.RawAttachProgramOptions{ - // We rely on this fd being checked after attachFlags. + // We rely on this fd being checked after attachFlags in the kernel. Target: int(devnull.Fd()), - // Attempt to "replace" bad fds with this program. + // Attempt to "replace" our BPF program with itself. This will + // always fail, but we should get -EINVAL if BPF_F_REPLACE is not + // supported. + Anchor: link.ReplaceProgram(prog), Program: prog, Attach: ebpf.AttachCGroupDevice, - Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE, + Flags: unix.BPF_F_ALLOW_MULTI, }) if errors.Is(err, unix.EINVAL) { // not supported From e26e0fde01b03750a5609e9e78e3a2d7be520811 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 6 Dec 2024 17:50:14 +1100 Subject: [PATCH 2/3] [1.2] cgroups: ebpf: also check for ebpf.ErrNotSupported (This is a cherry-pick of dea0e04dd93d3922083e68667d20aac532d31129.) It is possible for LinkAttachProgram to return ErrNotSupported if program attachment is not supported at all (which doesn't matter in this case), but it seems possible that upstream will start returning ErrNotSupported for BPF_F_REPLACE at some point so it's best to make sure we don't cause additional regressions here. Signed-off-by: Aleksa Sarai --- libcontainer/cgroups/devices/ebpf_linux.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libcontainer/cgroups/devices/ebpf_linux.go b/libcontainer/cgroups/devices/ebpf_linux.go index 0e808e29b32..63bc10ec0ad 100644 --- a/libcontainer/cgroups/devices/ebpf_linux.go +++ b/libcontainer/cgroups/devices/ebpf_linux.go @@ -133,11 +133,10 @@ func haveBpfProgReplace() bool { Attach: ebpf.AttachCGroupDevice, Flags: unix.BPF_F_ALLOW_MULTI, }) - if errors.Is(err, unix.EINVAL) { + if errors.Is(err, ebpf.ErrNotSupported) || errors.Is(err, unix.EINVAL) { // not supported return } - // attach_flags test succeeded. if !errors.Is(err, unix.EBADF) { logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err) } From 7242ffa4bf250e0de1b5be1bed5b516b79075847 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 6 Dec 2024 17:54:06 +1100 Subject: [PATCH 3/3] [1.2] cgroup: ebpf: make unexpected errors in haveBpfProgReplace louder (This is a cherry-pick of c0044c7aa403ecf2d9172bd9386d05433b011076.) If we get an unexpected error here, it is probably because of a library or kernel change that could cause our detection logic to be invalid. As a result, these warnings should be louder so users have a chance to tell us about them sooner (or so we might notice them before doing a release, as happened with the 1.2.0 regression). Signed-off-by: Aleksa Sarai --- libcontainer/cgroups/devices/ebpf_linux.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/libcontainer/cgroups/devices/ebpf_linux.go b/libcontainer/cgroups/devices/ebpf_linux.go index 63bc10ec0ad..6a41aff6e1a 100644 --- a/libcontainer/cgroups/devices/ebpf_linux.go +++ b/libcontainer/cgroups/devices/ebpf_linux.go @@ -107,14 +107,14 @@ func haveBpfProgReplace() bool { }, }) if err != nil { - logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err) + logrus.Warnf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err) return } defer prog.Close() devnull, err := os.Open("/dev/null") if err != nil { - logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err) + logrus.Warnf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err) return } defer devnull.Close() @@ -138,7 +138,11 @@ func haveBpfProgReplace() bool { return } if !errors.Is(err, unix.EBADF) { - logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err) + // If we see any new errors here, it's possible that there is a + // regression due to a cilium/ebpf update and the above EINVAL + // checks are not working. So, be loud about it so someone notices + // and we can get the issue fixed quicker. + logrus.Warnf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err) } haveBpfProgReplaceBool = true })