Skip to content

Commit

Permalink
Merge pull request #2112 from Luap99/netns-cleanup
Browse files Browse the repository at this point in the history
improve netns cleanup
  • Loading branch information
openshift-merge-bot[bot] authored Aug 8, 2024
2 parents 1a8d191 + 515488f commit dc70ee3
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 36 deletions.
45 changes: 23 additions & 22 deletions libnetwork/internal/rootlessnetns/netns_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ func (n *Netns) cleanup() error {
if err := n.cleanupRootlessNetns(); err != nil {
multiErr = multierror.Append(multiErr, wrapError("kill network process", err))
}
if err := os.RemoveAll(n.dir); err != nil {
if err := os.RemoveAll(n.dir); err != nil && !errors.Is(err, fs.ErrNotExist) {
multiErr = multierror.Append(multiErr, wrapError("remove rootless netns dir", err))
}

Expand Down Expand Up @@ -280,6 +280,11 @@ func (n *Netns) setupSlirp4netns(nsPath string) error {
func (n *Netns) cleanupRootlessNetns() error {
pidFile := n.getPath(rootlessNetNsConnPidFile)
pid, err := readPidFile(pidFile)
// do not hard error if the file dos not exists, cleanup should be idempotent
if errors.Is(err, fs.ErrNotExist) {
logrus.Debugf("Rootless netns conn pid file does not exists %s", pidFile)
return nil
}
if err == nil {
// kill the slirp/pasta process so we do not leak it
err = unix.Kill(pid, unix.SIGTERM)
Expand Down Expand Up @@ -512,14 +517,14 @@ func (n *Netns) mountCNIVarDir() error {
return nil
}

func (n *Netns) runInner(toRun func() error) (err error) {
func (n *Netns) runInner(toRun func() error, cleanup bool) (err error) {
nsRef, newNs, err := n.getOrCreateNetns()
if err != nil {
return err
}
defer nsRef.Close()
// If a new netns was created make sure to clean it up again on an error to not leak it.
if newNs {
// If a new netns was created make sure to clean it up again on an error to not leak it if requested.
if newNs && cleanup {
defer func() {
if err != nil {
if err := n.cleanup(); err != nil {
Expand Down Expand Up @@ -555,7 +560,7 @@ func (n *Netns) runInner(toRun func() error) (err error) {
}

func (n *Netns) Setup(nets int, toRun func() error) error {
err := n.runInner(toRun)
err := n.runInner(toRun, true)
if err != nil {
return err
}
Expand All @@ -564,25 +569,22 @@ func (n *Netns) Setup(nets int, toRun func() error) error {
}

func (n *Netns) Teardown(nets int, toRun func() error) error {
var multiErr *multierror.Error
count, countErr := refCount(n.dir, -nets)
if countErr != nil {
multiErr = multierror.Append(multiErr, countErr)
err := n.runInner(toRun, true)
if err != nil {
return err
}
err := n.runInner(toRun)
// decrement only if teardown didn't fail, podman will call us again on errors so we should not double decrement
count, err := refCount(n.dir, -nets)
if err != nil {
multiErr = multierror.Append(multiErr, err)
return err
}

// only cleanup if the ref count did not throw an error
if count == 0 && countErr == nil {
err = n.cleanup()
if err != nil {
multiErr = multierror.Append(multiErr, wrapError("cleanup", err))
}
// cleanup when ref count is 0
if count == 0 {
return n.cleanup()
}

return multiErr.ErrorOrNil()
return nil
}

// Run any long running function in the userns.
Expand All @@ -604,7 +606,7 @@ func (n *Netns) Run(lock *lockfile.LockFile, toRun func() error) error {
return err
}

inErr := n.runInner(inner)
inErr := n.runInner(inner, false)
// make sure to always reset the ref counter afterwards
count, err := refCount(n.dir, -1)
if err != nil {
Expand All @@ -614,9 +616,8 @@ func (n *Netns) Run(lock *lockfile.LockFile, toRun func() error) error {
logrus.Errorf("Failed to decrement ref count: %v", err)
return inErr
}
// runInner() already cleans up the netns when it created a new one on errors
// so we only need to do that if there was no error.
if inErr == nil && count == 0 {

if count == 0 {
err = n.cleanup()
if err != nil {
return wrapError("cleanup", err)
Expand Down
39 changes: 25 additions & 14 deletions pkg/netns/netns_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ import (
"runtime"
"strings"
"sync"
"time"

"github.com/containernetworking/plugins/pkg/ns"
"github.com/containers/storage/pkg/homedir"
"github.com/containers/storage/pkg/unshare"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

Expand Down Expand Up @@ -177,26 +179,35 @@ func newNSPath(nsPath string) (ns.NetNS, error) {

// UnmountNS unmounts the given netns path
func UnmountNS(nsPath string) error {
var rErr error
// Only unmount if it's been bind-mounted (don't touch namespaces in /proc...)
if !strings.HasPrefix(nsPath, "/proc/") {
if err := unix.Unmount(nsPath, unix.MNT_DETACH); err != nil {
// Do not return here, always try to remove below.
// This is important in case podman now is in a new userns compared to
// when the netns was created. The umount will fail EINVAL but removing
// the file will work and the kernel will destroy the bind mount in the
// other ns because of this. We also need it so pasta doesn't leak.
rErr = fmt.Errorf("failed to unmount NS: at %s: %w", nsPath, err)
// EINVAL means the path exists but is not mounted, just try to remove the path below
if err := unix.Unmount(nsPath, unix.MNT_DETACH); err != nil && !errors.Is(err, unix.EINVAL) {
// If path does not exists we can return without error as we have nothing to do.
if errors.Is(err, unix.ENOENT) {
return nil
}

return fmt.Errorf("failed to unmount NS: at %s: %w", nsPath, err)
}

if err := os.Remove(nsPath); err != nil {
err := fmt.Errorf("failed to remove ns path: %w", err)
if rErr != nil {
err = fmt.Errorf("%v, %w", err, rErr)
for {
if err := os.Remove(nsPath); err != nil {
if errors.Is(err, unix.EBUSY) {
// mount is still busy, sleep a moment and try again to remove
logrus.Debugf("Netns %s still busy, try removing it again in 10ms", nsPath)
time.Sleep(10 * time.Millisecond)
continue
}
// If path does not exists we can return without error.
if errors.Is(err, unix.ENOENT) {
break
}
return fmt.Errorf("failed to remove ns path: %w", err)
}
rErr = err
break
}
}

return rErr
return nil
}

0 comments on commit dc70ee3

Please sign in to comment.