From fb67296fe40c406ee8f1e13a46d41a9ddd6aac9b Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Mon, 24 Oct 2022 15:19:11 -0500 Subject: [PATCH 1/3] client: ensure minimal cgroup controllers enabled This PR fixes a bug where Nomad could not operate properly on operating systems that set the root cgroup.subtree_control to a set of controllers that do not include the minimal set of controllers needed by Nomad. Nomad needs these controllers enabled to operate: - cpuset - cpu - io - memory - pids Now, Nomad will ensure these controllers are enabled during Client initialization, adding them to cgroup.subtree_control as necessary. This should be particularly helpful on the RHEL/CentOS/Fedora family of system. Ubuntu systems should be unaffected as they enable all controllers by default. Fixes: https://github.com/hashicorp/nomad/issues/14494 --- .changelog/15027.txt | 3 +++ client/lib/cgutil/cpuset_manager_v2.go | 34 ++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 .changelog/15027.txt diff --git a/.changelog/15027.txt b/.changelog/15027.txt new file mode 100644 index 000000000000..9f5f5f0e5ccf --- /dev/null +++ b/.changelog/15027.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where Nomad could not detect cores on recent RHEL systems +``` diff --git a/client/lib/cgutil/cpuset_manager_v2.go b/client/lib/cgutil/cpuset_manager_v2.go index 355d7e200f53..90da05cb6d6e 100644 --- a/client/lib/cgutil/cpuset_manager_v2.go +++ b/client/lib/cgutil/cpuset_manager_v2.go @@ -12,6 +12,7 @@ import ( "time" "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-set" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/lib/cpuset" "github.com/hashicorp/nomad/nomad/structs" @@ -54,16 +55,21 @@ type cpusetManagerV2 struct { } func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) CpusetManager { + if err := minimumRootControllers(); err != nil { + logger.Error("failed to enabled minimum set of cgroup controllers; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + parentAbs := filepath.Join(CgroupRoot, parent) if err := os.MkdirAll(parentAbs, 0o755); err != nil { - logger.Warn("failed to ensure nomad parent cgroup exists; disable cpuset management", "error", err) + logger.Error("failed to ensure nomad parent cgroup exists; disable cpuset management", "error", err) return new(NoopCpusetManager) } if len(reservable) == 0 { // read from group if cpus, err := GetCPUsFromCgroup(parent); err != nil { - logger.Warn("failed to lookup cpus from parent cgroup; disable cpuset management", "error", err) + logger.Error("failed to lookup cpus from parent cgroup; disable cpuset management", "error", err) return new(NoopCpusetManager) } else { reservable = cpus @@ -80,6 +86,30 @@ func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) } } +// minimumControllers sets the minimum set of required controllers on the +// /sys/fs/cgroup/cgroup.subtree_control file. Some systems like Ubuntu turn on +// all controllers by default, and will be unaffected. Other systems like RHEL, +// CentOS, Fedora turn of most controllers, and provide a default that excludes +// controllers needed by Nomad. This helper ensures all of: +// [cpuset, cpu, io, memory, pids] +// are enabled. +func minimumRootControllers() error { + e := new(editor) + s, err := e.read("cgroup.subtree_control") + if err != nil { + return err + } + required := set.From[string]([]string{"cpuset", "cpu", "io", "memory", "pids"}) + enabled := set.From[string](strings.Fields(s)) + needed := required.Difference(enabled) + sb := new(strings.Builder) + for _, controller := range needed.List() { + sb.WriteString("+" + controller + " ") + } + activation := sb.String() + return e.write("cgroup.subtree_control", activation) +} + func (c *cpusetManagerV2) Init() { c.logger.Debug("initializing with", "cores", c.initial) } From ba8acade923fb6380fd37736774b9cb0ab194c58 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Mon, 24 Oct 2022 15:29:53 -0500 Subject: [PATCH 2/3] docs: cleanup doc string --- client/lib/cgutil/cpuset_manager_v2.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/client/lib/cgutil/cpuset_manager_v2.go b/client/lib/cgutil/cpuset_manager_v2.go index 90da05cb6d6e..382a0c5d4ffc 100644 --- a/client/lib/cgutil/cpuset_manager_v2.go +++ b/client/lib/cgutil/cpuset_manager_v2.go @@ -87,11 +87,7 @@ func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) } // minimumControllers sets the minimum set of required controllers on the -// /sys/fs/cgroup/cgroup.subtree_control file. Some systems like Ubuntu turn on -// all controllers by default, and will be unaffected. Other systems like RHEL, -// CentOS, Fedora turn of most controllers, and provide a default that excludes -// controllers needed by Nomad. This helper ensures all of: -// [cpuset, cpu, io, memory, pids] +// /sys/fs/cgroup/cgroup.subtree_control file - ensuring [cpuset, cpu, io, memory, pids] // are enabled. func minimumRootControllers() error { e := new(editor) From d274e957a08be6394da4c537d97b608b24d497bb Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Mon, 24 Oct 2022 15:50:54 -0500 Subject: [PATCH 3/3] client: cleanup controller writes, enhance log messages --- client/lib/cgutil/cpuset_manager_v2.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/client/lib/cgutil/cpuset_manager_v2.go b/client/lib/cgutil/cpuset_manager_v2.go index 382a0c5d4ffc..1157ad373b0d 100644 --- a/client/lib/cgutil/cpuset_manager_v2.go +++ b/client/lib/cgutil/cpuset_manager_v2.go @@ -56,20 +56,20 @@ type cpusetManagerV2 struct { func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) CpusetManager { if err := minimumRootControllers(); err != nil { - logger.Error("failed to enabled minimum set of cgroup controllers; disable cpuset management", "error", err) + logger.Error("failed to enabled minimum set of cgroup controllers; disabling cpuset management", "error", err) return new(NoopCpusetManager) } parentAbs := filepath.Join(CgroupRoot, parent) if err := os.MkdirAll(parentAbs, 0o755); err != nil { - logger.Error("failed to ensure nomad parent cgroup exists; disable cpuset management", "error", err) + logger.Error("failed to ensure nomad parent cgroup exists; disabling cpuset management", "error", err) return new(NoopCpusetManager) } if len(reservable) == 0 { // read from group if cpus, err := GetCPUsFromCgroup(parent); err != nil { - logger.Error("failed to lookup cpus from parent cgroup; disable cpuset management", "error", err) + logger.Error("failed to lookup cpus from parent cgroup; disabling cpuset management", "error", err) return new(NoopCpusetManager) } else { reservable = cpus @@ -95,14 +95,21 @@ func minimumRootControllers() error { if err != nil { return err } + required := set.From[string]([]string{"cpuset", "cpu", "io", "memory", "pids"}) enabled := set.From[string](strings.Fields(s)) needed := required.Difference(enabled) + + if needed.Size() == 0 { + return nil // already sufficient + } + sb := new(strings.Builder) for _, controller := range needed.List() { sb.WriteString("+" + controller + " ") } - activation := sb.String() + + activation := strings.TrimSpace(sb.String()) return e.write("cgroup.subtree_control", activation) }