From ef5388b2f640888ae045084723b0d7c53ccf390e Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Fri, 8 Nov 2024 15:45:27 +0100 Subject: [PATCH] pkg/netns: do not loop forever So this is not so simple as one thinks, apparently there are cases where it is impossible to remove the file but umount() worked fine... We fixed one issue that ran into this[1] but there seems to be another[2] problem, unknown cause yet. Regardless of the real fix for issue[2] add a timeout to not hang/loop forever. If we were not able to remove the file after 60s give up and print an error. Leaking these files is not great as the netns references stay around but it will not prevent containers from running. It will only start leaking resources. [1] https://issues.redhat.com/browse/RHEL-59620 [2] https://github.com/containers/podman/issues/24487 Signed-off-by: Paul Holzinger --- pkg/netns/netns_linux.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/netns/netns_linux.go b/pkg/netns/netns_linux.go index 41e0cfd17..0947b9f70 100644 --- a/pkg/netns/netns_linux.go +++ b/pkg/netns/netns_linux.go @@ -273,8 +273,10 @@ func UnmountNS(nsPath string) error { return fmt.Errorf("failed to unmount NS: at %s: %w", nsPath, err) } - for { - if err := os.Remove(nsPath); err != nil { + var err error + // wait for up to 60s in the loop + for range 6000 { + if err = os.Remove(nsPath); err != nil { if errors.Is(err, unix.EBUSY) { // mount is still busy, sleep a moment and try again to remove logrus.Debugf("Netns %s still busy, try removing it again in 10ms", nsPath) @@ -283,12 +285,12 @@ func UnmountNS(nsPath string) error { } // If path does not exists we can return without error. if errors.Is(err, unix.ENOENT) { - break + return nil } return fmt.Errorf("failed to remove ns path: %w", err) } - break + return nil } - return nil + return fmt.Errorf("failed to remove ns path (timeout after 60s): %w", err) }