Skip to content

Commit

Permalink
libct/cg: support hugetlb rsvd
Browse files Browse the repository at this point in the history
This adds support for hugetlb.<pagesize>.rsvd limiting and accounting.

The previous non-rsvd max/limit_in_bytes does not account for reserved
huge page memory, making it possible for a processes to reserve all the
huge page memory, without being able to allocate it (due to cgroup
restrictions).

In practice this makes it possible to successfully mmap more huge page
memory than allowed via the cgroup settings, but when using the memory
the process will get a SIGBUS and crash. This is bad for applications
trying to mmap at startup (and it succeeds), but the program crashes
when starting to use the memory. eg. postgres is doing this by default.

This also keeps writing to the old max/limit_in_bytes, for backward
compatibility.

More info can be found here: https://lkml.org/lkml/2020/2/3/1153

(commit message mostly written by Odin Ugedal)

Co-authored-by: Odin Ugedal <odin@ugedal.com>
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
(cherry picked from commit 4a7d3ae)
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
2 people authored and lifubang committed Oct 20, 2023
1 parent 5ba0e01 commit 7de39df
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 18 deletions.
36 changes: 29 additions & 7 deletions libcontainer/cgroups/fs/hugetlb.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package fs

import (
"errors"
"os"
"strconv"

"github.com/opencontainers/runc/libcontainer/cgroups"
Expand All @@ -19,8 +21,23 @@ func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
}

func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
const suffix = ".limit_in_bytes"
skipRsvd := false

for _, hugetlb := range r.HugetlbLimit {
if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
prefix := "hugetlb." + hugetlb.Pagesize
val := strconv.FormatUint(hugetlb.Limit, 10)
if err := cgroups.WriteFile(path, prefix+suffix, val); err != nil {
return err
}
if skipRsvd {
continue
}
if err := cgroups.WriteFile(path, prefix+".rsvd"+suffix, val); err != nil {
if errors.Is(err, os.ErrNotExist) {
skipRsvd = true
continue
}
return err
}
}
Expand All @@ -32,24 +49,29 @@ func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
if !cgroups.PathExists(path) {
return nil
}
rsvd := ".rsvd"
hugetlbStats := cgroups.HugetlbStats{}
for _, pageSize := range cgroups.HugePageSizes() {
usage := "hugetlb." + pageSize + ".usage_in_bytes"
value, err := fscommon.GetCgroupParamUint(path, usage)
again:
prefix := "hugetlb." + pageSize + rsvd

value, err := fscommon.GetCgroupParamUint(path, prefix+".usage_in_bytes")
if err != nil {
if rsvd != "" && errors.Is(err, os.ErrNotExist) {
rsvd = ""
goto again
}
return err
}
hugetlbStats.Usage = value

maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
value, err = fscommon.GetCgroupParamUint(path, prefix+".max_usage_in_bytes")
if err != nil {
return err
}
hugetlbStats.MaxUsage = value

failcnt := "hugetlb." + pageSize + ".failcnt"
value, err = fscommon.GetCgroupParamUint(path, failcnt)
value, err = fscommon.GetCgroupParamUint(path, prefix+".failcnt")
if err != nil {
return err
}
Expand Down
43 changes: 36 additions & 7 deletions libcontainer/cgroups/fs/hugetlb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ const (
limit = "hugetlb.%s.limit_in_bytes"
maxUsage = "hugetlb.%s.max_usage_in_bytes"
failcnt = "hugetlb.%s.failcnt"

rsvdUsage = "hugetlb.%s.rsvd.usage_in_bytes"
rsvdLimit = "hugetlb.%s.rsvd.limit_in_bytes"
rsvdMaxUsage = "hugetlb.%s.rsvd.max_usage_in_bytes"
rsvdFailcnt = "hugetlb.%s.rsvd.failcnt"
)

func TestHugetlbSetHugetlb(t *testing.T) {
Expand Down Expand Up @@ -52,13 +57,15 @@ func TestHugetlbSetHugetlb(t *testing.T) {
}

for _, pageSize := range cgroups.HugePageSizes() {
limit := fmt.Sprintf(limit, pageSize)
value, err := fscommon.GetCgroupParamUint(path, limit)
if err != nil {
t.Fatal(err)
}
if value != hugetlbAfter {
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
for _, f := range []string{limit, rsvdLimit} {
limit := fmt.Sprintf(f, pageSize)
value, err := fscommon.GetCgroupParamUint(path, limit)
if err != nil {
t.Fatal(err)
}
if value != hugetlbAfter {
t.Fatalf("Set %s failed. Expected: %v, Got: %v", limit, hugetlbAfter, value)
}
}
}
}
Expand All @@ -85,6 +92,28 @@ func TestHugetlbStats(t *testing.T) {
}
}

func TestHugetlbRStatsRsvd(t *testing.T) {
path := tempDir(t, "hugetlb")
for _, pageSize := range cgroups.HugePageSizes() {
writeFileContents(t, path, map[string]string{
fmt.Sprintf(rsvdUsage, pageSize): hugetlbUsageContents,
fmt.Sprintf(rsvdMaxUsage, pageSize): hugetlbMaxUsageContents,
fmt.Sprintf(rsvdFailcnt, pageSize): hugetlbFailcnt,
})
}

hugetlb := &HugetlbGroup{}
actualStats := *cgroups.NewStats()
err := hugetlb.GetStats(path, &actualStats)
if err != nil {
t.Fatal(err)
}
expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
for _, pageSize := range cgroups.HugePageSizes() {
expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
}
}

func TestHugetlbStatsNoUsageFile(t *testing.T) {
path := tempDir(t, "hugetlb")
writeFileContents(t, path, map[string]string{
Expand Down
30 changes: 26 additions & 4 deletions libcontainer/cgroups/fs2/hugetlb.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package fs2

import (
"errors"
"os"
"strconv"

"github.com/opencontainers/runc/libcontainer/cgroups"
Expand All @@ -16,8 +18,22 @@ func setHugeTlb(dirPath string, r *configs.Resources) error {
if !isHugeTlbSet(r) {
return nil
}
const suffix = ".max"
skipRsvd := false
for _, hugetlb := range r.HugetlbLimit {
if err := cgroups.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
prefix := "hugetlb." + hugetlb.Pagesize
val := strconv.FormatUint(hugetlb.Limit, 10)
if err := cgroups.WriteFile(dirPath, prefix+suffix, val); err != nil {
return err
}
if skipRsvd {
continue
}
if err := cgroups.WriteFile(dirPath, prefix+".rsvd"+suffix, val); err != nil {
if errors.Is(err, os.ErrNotExist) {
skipRsvd = true
continue
}
return err
}
}
Expand All @@ -27,15 +43,21 @@ func setHugeTlb(dirPath string, r *configs.Resources) error {

func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
hugetlbStats := cgroups.HugetlbStats{}
rsvd := ".rsvd"
for _, pagesize := range cgroups.HugePageSizes() {
value, err := fscommon.GetCgroupParamUint(dirPath, "hugetlb."+pagesize+".current")
again:
prefix := "hugetlb." + pagesize + rsvd
value, err := fscommon.GetCgroupParamUint(dirPath, prefix+".current")
if err != nil {
if rsvd != "" && errors.Is(err, os.ErrNotExist) {
rsvd = ""
goto again
}
return err
}
hugetlbStats.Usage = value

fileName := "hugetlb." + pagesize + ".events"
value, err = fscommon.GetValueByKey(dirPath, fileName, "max")
value, err = fscommon.GetValueByKey(dirPath, prefix+".events", "max")
if err != nil {
return err
}
Expand Down
64 changes: 64 additions & 0 deletions tests/integration/cgroups.bats
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,70 @@ function setup() {
[[ "$weights" == *"$major:$minor 444"* ]]
}

# Convert size in KB to hugetlb size suffix.
convert_hugetlb_size() {
local size=$1
local units=("KB" "MB" "GB")
local idx=0

while ((size >= 1024)); do
((size /= 1024))
((idx++))
done

echo "$size${units[$idx]}"
}

@test "runc run (hugetlb limits)" {
requires cgroups_hugetlb
[ $EUID -ne 0 ] && requires rootless_cgroup
# shellcheck disable=SC2012 # ls is fine here.
mapfile -t sizes_kb < <(ls /sys/kernel/mm/hugepages/ | sed -e 's/.*hugepages-//' -e 's/kB$//') #
if [ "${#sizes_kb[@]}" -lt 1 ]; then
skip "requires hugetlb"
fi

# Create two arrays:
# - sizes: hugetlb cgroup file suffixes;
# - limits: limits for each size.
for size in "${sizes_kb[@]}"; do
sizes+=("$(convert_hugetlb_size "$size")")
# Limit to 1 page.
limits+=("$((size * 1024))")
done

# Set per-size limits.
for ((i = 0; i < ${#sizes[@]}; i++)); do
size="${sizes[$i]}"
limit="${limits[$i]}"
update_config '.linux.resources.hugepageLimits += [{ pagesize: "'"$size"'", limit: '"$limit"' }]'
done

set_cgroups_path
runc run -d --console-socket "$CONSOLE_SOCKET" test_hugetlb
[ "$status" -eq 0 ]

lim="max"
[ -v CGROUP_V1 ] && lim=".limit_in_bytes"

optional=("")
# Add rsvd, if available.
if test -f "$(get_cgroup_path hugetlb)/hugetlb.${sizes[0]}.rsvd.$lim"; then
optional+=(".rsvd")
fi

# Check if the limits are as expected.
for ((i = 0; i < ${#sizes[@]}; i++)); do
size="${sizes[$i]}"
limit="${limits[$i]}"
for rsvd in "${optional[@]}"; do
param="hugetlb.${size}${rsvd}.$lim"
echo "checking $param"
check_cgroup_value "$param" "$limit"
done
done
}

@test "runc run (cgroup v2 resources.unified only)" {
requires root cgroups_v2

Expand Down

0 comments on commit 7de39df

Please sign in to comment.