From 65fac58a30bc2301c03de055764c3df92698967a Mon Sep 17 00:00:00 2001 From: Roman Penyaev Date: Sat, 21 Sep 2024 16:29:57 +0200 Subject: [PATCH] pillar: configure GOGC based on pillar memory limit or global config Patch introduces two settings for Golang runtime which impacts garbage collector behavior: 1. `gogc.memory.limit.bytes` provides the runtime with a soft memory limit. The runtime undertakes several processes to try to respect this memory limit, including adjustments to the frequency of garbage collections and returning memory to the underlying system more aggressively. The Go API call is described here: https://pkg.go.dev/runtime/debug#SetMemoryLimit By default, EVE setting is disabled (set to 0), meaning the Golang runtime memory limit will be set according to the following equation based on the `memory.limit_in_bytes` hard memory limit provided by the pillar `cgroups`: `limit = memory.limit_in_bytes * 0.6` The constant 0.6 was chosen empirically and is explained by simple logic: `memory.limit_in_bytes` is a hard limit for the whole pillar cgroup, meaning when reached, likely one of the processes will be killed by OOM. In turn Golang runtime memory limit is a soft limit, so the difference must be significant to ensure that after the soft limit is reached, there will be enough memory for the Go garbage collector to do its job and, fortunately, not to hit the hard limit. 2. `gogc.percent` sets the garbage collection target percentage: a collection is triggered when the ratio of freshly allocated data to live data remaining after the previous collection reaches this percentage. The Go API call is described here: https://pkg.go.dev/runtime/debug#SetGCPercent The patch is motivated by a frequently observed bloated `zedbox` application (up to 500MB) that causes an OOM kill call to the /eve or /pillar cgroups. It is assumed that the bloated `zedbox` application is not caused by memory leaks, but by a delayed GC sweep cycle and a unconditionally growing runtime heap size. An explicit memory limit set for the Golang runtime (~400MB in the current version of EVE) should make the GC more aggressive when the soft memory limit is hit, which should result in a significant reduction in allocated but unused memory. Signed-off-by: Roman Penyaev --- pkg/pillar/cmd/zedmanager/zedmanager.go | 12 ++++++++ pkg/pillar/types/global.go | 9 +++++- pkg/pillar/types/global_test.go | 2 ++ pkg/pillar/types/locationconsts.go | 2 ++ pkg/pillar/types/memory.go | 41 +++++++++++++++++++++++++ 5 files changed, 65 insertions(+), 1 deletion(-) diff --git a/pkg/pillar/cmd/zedmanager/zedmanager.go b/pkg/pillar/cmd/zedmanager/zedmanager.go index c40fbc7de5..a7f4e0d110 100644 --- a/pkg/pillar/cmd/zedmanager/zedmanager.go +++ b/pkg/pillar/cmd/zedmanager/zedmanager.go @@ -1431,6 +1431,17 @@ func quantifyChanges(config types.AppInstanceConfig, oldConfig types.AppInstance return needPurge, needRestart, purgeReason, restartReason } +func configureGOGC(gcp *types.ConfigItemValueMap) { + lim := gcp.GlobalValueInt(types.GOGCMemoryLimitInBytes) + per := gcp.GlobalValueInt(types.GOGCPercent) + plim, pper, err := types.ConfigureGOGC(int64(lim), int(per)) + if err != nil { + log.Warningf("configureGOGC: failed '%v'", err) + } else { + log.Functionf("configureGOGC: memory limit set to '%v' (previous '%v'), GC percent set to '%v' (previous '%v')", lim, plim, per, pper) + } +} + func handleGlobalConfigCreate(ctxArg interface{}, key string, statusArg interface{}) { handleGlobalConfigImpl(ctxArg, key, statusArg) @@ -1456,6 +1467,7 @@ func handleGlobalConfigImpl(ctxArg interface{}, key string, ctx.globalConfig = gcp ctx.GCInitialized = true } + configureGOGC(gcp) log.Functionf("handleGlobalConfigImpl done for %s", key) } diff --git a/pkg/pillar/types/global.go b/pkg/pillar/types/global.go index c75e926dbb..637351afb2 100644 --- a/pkg/pillar/types/global.go +++ b/pkg/pillar/types/global.go @@ -229,6 +229,10 @@ const ( EveMemoryLimitInBytes GlobalSettingKey = "memory.eve.limit.bytes" // How much memory overhead is allowed for VMM needs VmmMemoryLimitInMiB GlobalSettingKey = "memory.vmm.limit.MiB" + // GOGCMemoryLimitInBytes global setting key + GOGCMemoryLimitInBytes GlobalSettingKey = "gogc.memory.limit.bytes" + // GOGCPercent global setting key + GOGCPercent GlobalSettingKey = "gogc.percent" // IgnoreMemoryCheckForApps global setting key IgnoreMemoryCheckForApps GlobalSettingKey = "memory.apps.ignore.check" // IgnoreDiskCheckForApps global setting key @@ -843,7 +847,10 @@ func NewConfigItemSpecMap() ConfigItemSpecMap { 100*1024*1024, 0xFFFFFFFF) configItemSpecMap.AddIntItem(StorageZfsReserved, 20, 1, 99) configItemSpecMap.AddIntItem(ForceFallbackCounter, 0, 0, 0xFFFFFFFF) - + // Default GOGC memory limit is 0 + configItemSpecMap.AddIntItem(GOGCMemoryLimitInBytes, 0, 0, 0xFFFFFFFF) + // Default GOGC target percentage is 100, 0 means disable GC + configItemSpecMap.AddIntItem(GOGCPercent, 100, 0, 500) configItemSpecMap.AddIntItem(EveMemoryLimitInBytes, uint32(eveMemoryLimitInBytes), uint32(eveMemoryLimitInBytes), 0xFFFFFFFF) // Limit manual vmm overhead override to 1 PiB diff --git a/pkg/pillar/types/global_test.go b/pkg/pillar/types/global_test.go index 6d36ebce6a..b94f5818f7 100644 --- a/pkg/pillar/types/global_test.go +++ b/pkg/pillar/types/global_test.go @@ -181,6 +181,8 @@ func TestNewConfigItemSpecMap(t *testing.T) { VgaAccess, ConsoleAccess, AllowAppVnc, + GOGCMemoryLimitInBytes, + GOGCPercent, EveMemoryLimitInBytes, VmmMemoryLimitInMiB, IgnoreMemoryCheckForApps, diff --git a/pkg/pillar/types/locationconsts.go b/pkg/pillar/types/locationconsts.go index 839353cb65..11649e5133 100644 --- a/pkg/pillar/types/locationconsts.go +++ b/pkg/pillar/types/locationconsts.go @@ -96,6 +96,8 @@ const ( NewlogUploadAppDir = NewlogDir + "/appUpload" // NewlogKeepSentQueueDir - a circular queue of gzip files already been sent NewlogKeepSentQueueDir = NewlogDir + "/keepSentQueue" + // PillarHardMemoryLimitFile - hard memory reserved for pillar + PillarHardMemoryLimitFile = "/hostfs/sys/fs/cgroup/memory/eve/services/pillar/memory.limit_in_bytes" // EveMemoryLimitFile - stores memory reserved for eve EveMemoryLimitFile = "/hostfs/sys/fs/cgroup/memory/eve/memory.soft_limit_in_bytes" // EveMemoryUsageFile - current usage diff --git a/pkg/pillar/types/memory.go b/pkg/pillar/types/memory.go index accaff8514..a52ff042cd 100644 --- a/pkg/pillar/types/memory.go +++ b/pkg/pillar/types/memory.go @@ -4,11 +4,19 @@ package types import ( + "fmt" "os" + "runtime/debug" "strconv" "strings" ) +// GetPillarHardMemoryLimitInBytes returns hard memory limit +// reserved for pillar in bytes +func GetPillarHardMemoryLimitInBytes() (uint64, error) { + return readUint64File(PillarHardMemoryLimitFile) +} + // GetEveMemoryLimitInBytes returns memory limit // reserved for eve in bytes func GetEveMemoryLimitInBytes() (uint64, error) { @@ -42,3 +50,36 @@ func readUint64File(filename string) (uint64, error) { dataUint64, err := strconv.ParseUint(dataString, 10, 64) return dataUint64, err } + +// ConfigureGOGC sets two main configuration parameters for the +// garbage collector (GOGC): memory limit and percentage (see +// explanation here: https://tip.golang.org/doc/gc-guide). +// If limit is 0, create GOGC limit from the pillar cgroups hard +// memory limit. +func ConfigureGOGC(limit int64, percent int) (int64, int, error) { + if limit == 0 { + // Fallback to value from cgroups if no limit in the configuration + ulimit, err := GetPillarHardMemoryLimitInBytes() + if err != nil { + err := fmt.Errorf("can't receive pillar memory hard limit: '%w'", err) + return -1, -1, err + } + // Reduce actual memory limit to 0.6 of cgroup limit. The logic behind + // the constant is simple: cgroup limit is a hard limit for the whole + // pillar cgroup, meaning when reached, we are killed by OOM. In turn + // GOGC memory limit is a soft limit, so the difference must be + // significant to ensure that after the soft limit is reached, there + // will be enough memory for the GOGC to do its job and, fortunately, + // not to hit the hard limit. + limit = int64(ulimit) * 600 / 1000 + } + if percent == 0 { + // Disable GC + percent = -1 + } + // Set new and retrieve previous values + limit = debug.SetMemoryLimit(limit) + percent = debug.SetGCPercent(percent) + + return limit, percent, nil +}