Skip to content

Commit

Permalink
Don't bump RLIMIT_NOFILE in exec sessions with '--ulimit host'
Browse files Browse the repository at this point in the history
Starting from commit 9126b45 ("Up default Podman rlimits to
avoid max open files"), Podman started bumping its soft limit for the
maximum number of open file descriptors (RLIMIT_NOFILE or ulimit -n) to
permit exposing a large number of ports to a container.  This was later
fine-tuned in commit a2c1a2d ("podman: bump RLIMIT_NOFILE also
without CAP_SYS_RESOURCE").

Unfortunately, this also increases the limits for 'podman exec' sessions
running in containers created with:
  $ podman create --network host --ulimit host ...

This is what Toolbx uses to provide a containerized interactive command
line environment for software development and troubleshooting the host
operating system.

It confuses developers and system administrators debugging a process
that's leaking file descriptors and crashing on the host OS.  The
crashes either don't reproduce inside the container or they take a lot
longer to reproduce, both of which are frustrating.

Therefore, it will be good to retain the limits, at least for this
specific scenario.

It turns out that since this code was written, the Go runtime has had
two interesting changes.

Starting from Go 1.19 [1], the Go runtime bumps the soft limit for
RLIMIT_NOFILE for all Go programs [2].  This means that there's no
longer any need for Podman to bump it's own limits, because it switched
from requiring Go 1.18 to 1.20 in commit 4dd58f2 ("Move golang
requirement from 1.18 to 1.20").  It's probably good to still log the
detected limits, in case Go's behaviour changes.

Not everybody was happy with this [3], because the higher limits got
propagated to child processes spawned by Go programs.  Among other
things, this can break old programs using select(2) [4].  So, Go's
behaviour was fine-tuned to restore the original soft limit for
RLIMIT_NOFILE when forking a child process [5].

With these two changes in Go, which Podman already uses, if the bumping
of RLIMIT_NOFILE is left to the Go runtime, then the limits are no
longer increased for 'podman exec' sessions.  Otherwise, if Podman
continues to bump the soft limit for RLIMIT_NOFILE on its own, then it
prevents the Go runtime from restoring the original limits when forking,
and leads to the higher limits in 'podman exec' sessions.

The existing 'podman run --ulimit host ... ulimit -Hn' test in
test/e2e/run_test.go was extended to also check the soft limit.  The
similar test for 'podman exec' was moved from test/e2e/toolbox_test.go
to test/e2e/exec_test.go for consistency and because there's nothing
Toolbx specific about it.  The test was similarly extended, and updated
to be more idiomatic.

Due to the behaviour of the Go runtime noted above, and since the tests
are written in Go, the current or soft limit for RLIMIT_NOFILE returned
by syscall.Getrlimit() is the same as the hard limit.

The Alpine Linux image doesn't have a standalone binary for 'ulimit' and
it's picky about the order in which the options are listed.  The -H or
-S must come first, followed by a space, and then the -n.

[1] https://go.dev/doc/go1.19#runtime

[2] Go commit 8427429c592588af ("os: raise open file rlimit at startup")
    golang/go@8427429c592588af
    golang/go#46279

[3] containerd/containerd#8249

[4] http://0pointer.net/blog/file-descriptor-limits.html

[5] Go commit f5eef58e4381259c ("syscall: restore original NOFILE ...")
    golang/go@f5eef58e4381259c
    golang/go#46279

Fixes: #17681

Signed-off-by: Debarshi Ray <rishi@fedoraproject.org>
  • Loading branch information
debarshiray committed Oct 13, 2024
1 parent d512e44 commit 302d638
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 72 deletions.
23 changes: 8 additions & 15 deletions cmd/podman/early_init_darwin.go
Original file line number Diff line number Diff line change
@@ -1,28 +1,21 @@
package main

import (
"fmt"
"os"
"syscall"

"github.com/sirupsen/logrus"
)

func setRLimitsNoFile() error {
func checkRLimits() {
var rLimitNoFile syscall.Rlimit
if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimitNoFile); err != nil {
return fmt.Errorf("getting RLIMITS_NOFILE: %w", err)
}
err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, &syscall.Rlimit{
Max: rLimitNoFile.Max,
Cur: rLimitNoFile.Max,
})
if err != nil {
return fmt.Errorf("setting new RLIMITS_NOFILE: %w", err)
logrus.Debugf("Error getting RLIMITS_NOFILE: %s", err)
return
}
return nil

logrus.Debugf("Got RLIMITS_NOFILE: cur=%d, max=%d", rLimitNoFile.Cur, rLimitNoFile.Max)
}

func earlyInitHook() {
if err := setRLimitsNoFile(); err != nil {
fmt.Fprintf(os.Stderr, "Failed to set RLIMITS_NOFILE: %s\n", err.Error())
}
checkRLimits()
}
29 changes: 9 additions & 20 deletions cmd/podman/early_init_linux.go
Original file line number Diff line number Diff line change
@@ -1,27 +1,19 @@
package main

import (
"fmt"
"os"
"syscall"

"github.com/containers/podman/v5/libpod/define"
"github.com/sirupsen/logrus"
)

func setRLimits() error {
rlimits := new(syscall.Rlimit)
rlimits.Cur = define.RLimitDefaultValue
rlimits.Max = define.RLimitDefaultValue
if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, rlimits); err != nil {
if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, rlimits); err != nil {
return fmt.Errorf("getting rlimits: %w", err)
}
rlimits.Cur = rlimits.Max
if err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, rlimits); err != nil {
return fmt.Errorf("setting new rlimits: %w", err)
}
func checkRLimits() {
var rLimitNoFile syscall.Rlimit
if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimitNoFile); err != nil {
logrus.Debugf("Error getting RLIMITS_NOFILE: %s", err)
return
}
return nil

logrus.Debugf("Got RLIMITS_NOFILE: cur=%d, max=%d", rLimitNoFile.Cur, rLimitNoFile.Max)
}

func setUMask() {
Expand All @@ -30,9 +22,6 @@ func setUMask() {
}

func earlyInitHook() {
if err := setRLimits(); err != nil {
fmt.Fprintf(os.Stderr, "Failed to set rlimits: %s\n", err.Error())
}

checkRLimits()
setUMask()
}
35 changes: 35 additions & 0 deletions test/e2e/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"

. "github.com/containers/podman/v5/test/utils"
. "github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -309,6 +311,39 @@ var _ = Describe("Podman exec", func() {
Expect(session.OutputToString()).To(ContainSubstring("0000000000000000"))
})

It("podman exec limits host test", func() {
SkipIfRemote("This can only be used for local tests")

var l syscall.Rlimit

err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &l)
Expect(err).ToNot(HaveOccurred())

setup := podmanTest.RunTopContainerWithArgs("test1", []string{"--ulimit", "host"})
setup.WaitWithDefaultTimeout()
Expect(setup).Should(ExitCleanly())

session := podmanTest.Podman([]string{"exec", "test1", "sh", "-c", "ulimit -H -n"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())

ulimitCtrStr := strings.TrimSpace(session.OutputToString())
ulimitCtr, err := strconv.ParseUint(ulimitCtrStr, 10, 0)
Expect(err).ToNot(HaveOccurred())

Expect(ulimitCtr).Should(BeNumerically("==", l.Max))

session = podmanTest.Podman([]string{"exec", "test1", "sh", "-c", "ulimit -S -n"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())

ulimitCtrStr = strings.TrimSpace(session.OutputToString())
ulimitCtr, err = strconv.ParseUint(ulimitCtrStr, 10, 0)
Expect(err).ToNot(HaveOccurred())

Expect(ulimitCtr).Should(BeNumerically("<", l.Max))
})

// #10927 ("no logs from conmon"), one of our nastiest flakes
It("podman exec terminal doesn't hang", FlakeAttempts(3), func() {
setup := podmanTest.Podman([]string{"run", "-dti", "--name", "test1", fedoraMinimal, "sleep", "+Inf"})
Expand Down
10 changes: 10 additions & 0 deletions test/e2e/run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,16 @@ USER bin`, BB)
Expect(err).ToNot(HaveOccurred())

Expect(ulimitCtr).Should(BeNumerically(">=", l.Max))

session = podmanTest.Podman([]string{"run", "--rm", "--ulimit", "host", fedoraMinimal, "ulimit", "-Sn"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())

ulimitCtrStr = strings.TrimSpace(session.OutputToString())
ulimitCtr, err = strconv.ParseUint(ulimitCtrStr, 10, 0)
Expect(err).ToNot(HaveOccurred())

Expect(ulimitCtr).Should(BeNumerically("<", l.Max))
})

It("podman run with cidfile", func() {
Expand Down
37 changes: 0 additions & 37 deletions test/e2e/toolbox_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import (
"path"
"strconv"
"strings"
"syscall"

"github.com/containers/podman/v5/libpod/define"
. "github.com/containers/podman/v5/test/utils"
Expand All @@ -60,42 +59,6 @@ var _ = Describe("Toolbox-specific testing", func() {
Expect(session.OutputToString()).To(ContainSubstring("0:123"))
})

It("podman create --ulimit host + podman exec - correctly mirrors hosts ulimits", func() {
if podmanTest.RemoteTest {
Skip("Ulimit check does not work with a remote client")
}
info := GetHostDistributionInfo()
if info.Distribution == "debian" {
// "expected 1048576 to be >= 1073741816"
Skip("FIXME 2024-05-28 fails on debian, maybe because of systemd 256?")
}
var session *PodmanSessionIntegration
var containerHardLimit int
var rlimit syscall.Rlimit
var err error

err = syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rlimit)
Expect(err).ToNot(HaveOccurred())
GinkgoWriter.Printf("Expected value: %d", rlimit.Max)

session = podmanTest.Podman([]string{"create", "--name", "test", "--ulimit", "host", ALPINE,
"sleep", "1000"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())

session = podmanTest.Podman([]string{"start", "test"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())

session = podmanTest.Podman([]string{"exec", "test", "sh", "-c",
"ulimit -H -n"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())
containerHardLimit, err = strconv.Atoi(strings.Trim(session.OutputToString(), "\n"))
Expect(err).ToNot(HaveOccurred())
Expect(containerHardLimit).To(BeNumerically(">=", rlimit.Max))
})

It("podman create --ipc=host --pid=host + podman exec - correct shared memory limit size", func() {
// Comparison of the size of /dev/shm on the host being equal to the one in
// a container
Expand Down

0 comments on commit 302d638

Please sign in to comment.