From 13bd08b15b7f17a560d37563c4d6b04b28619a4c Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Tue, 23 Aug 2022 09:03:37 -0500 Subject: [PATCH] client: refactor cpuset manager initialization This PR refactors the code path in Client startup for setting up the cpuset cgroup manager (non-linux systems not affected). Before, there was a logic bug where we would try to read the cpuset.cpus.effective cgroup interface file before ensuring nomad's parent cgroup existed. Therefor that file would not exist, and the list of useable cpus would be empty. Tasks started thereafter would not have a value set for their cpuset.cpus. The refactoring fixes some less than ideal coding style. Instead we now bootstrap each cpuset manager type (v1/v2) within its own constructor. If something goes awry during bootstrap (e.g. cgroups not enabled), the constructor returns the noop implementation and logs a warning. Fixes #14229 --- .changelog/14230.txt | 3 + client/client.go | 24 +----- client/client_test.go | 6 +- client/lib/cgutil/cgutil_linux.go | 20 +++-- client/lib/cgutil/cgutil_linux_test.go | 19 +++-- client/lib/cgutil/cgutil_noop.go | 2 +- client/lib/cgutil/cpuset_manager.go | 10 +-- client/lib/cgutil/cpuset_manager_v1.go | 92 ++++++++++----------- client/lib/cgutil/cpuset_manager_v1_test.go | 15 ++-- client/lib/cgutil/cpuset_manager_v2.go | 56 +++++-------- client/lib/cgutil/cpuset_manager_v2_test.go | 8 +- 11 files changed, 112 insertions(+), 143 deletions(-) create mode 100644 .changelog/14230.txt diff --git a/.changelog/14230.txt b/.changelog/14230.txt new file mode 100644 index 000000000000..7bb45f945284 --- /dev/null +++ b/.changelog/14230.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where cpuset initialization would not work on first agent startup +``` diff --git a/client/client.go b/client/client.go index 9e8253132ee1..ae6476e61e82 100644 --- a/client/client.go +++ b/client/client.go @@ -8,7 +8,6 @@ import ( "net/rpc" "os" "path/filepath" - "runtime" "sort" "strconv" "strings" @@ -395,7 +394,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie invalidAllocs: make(map[string]struct{}), serversContactedCh: make(chan struct{}), serversContactedOnce: sync.Once{}, - cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, logger), + cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, cfg.ReservableCores, logger), getter: getter.NewGetter(cfg.Artifact), EnterpriseClient: newEnterpriseClient(logger), } @@ -689,25 +688,8 @@ func (c *Client) init() error { "reserved", reserved, ) - // Ensure cgroups are created on linux platform - if runtime.GOOS == "linux" && c.cpusetManager != nil { - // use the client configuration for reservable_cores if set - cores := conf.ReservableCores - if len(cores) == 0 { - // otherwise lookup the effective cores from the parent cgroup - cores, err = cgutil.GetCPUsFromCgroup(conf.CgroupParent) - if err != nil { - c.logger.Warn("failed to lookup cpuset from cgroup parent, and not set as reservable_cores", "parent", conf.CgroupParent) - // will continue with a disabled cpuset manager - } - } - if cpuErr := c.cpusetManager.Init(cores); cpuErr != nil { - // If the client cannot initialize the cgroup then reserved cores will not be reported and the cpuset manager - // will be disabled. this is common when running in dev mode under a non-root user for example. - c.logger.Warn("failed to initialize cpuset cgroup subsystem, cpuset management disabled", "error", cpuErr) - c.cpusetManager = new(cgutil.NoopCpusetManager) - } - } + // startup the CPUSet manager + c.cpusetManager.Init() // setup the check store c.checkStore = checkstore.NewStore(c.logger, c.stateDB) diff --git a/client/client_test.go b/client/client_test.go index f41021a754df..4040ec238a7f 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -15,6 +15,7 @@ import ( trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/fingerprint" + "github.com/hashicorp/nomad/client/lib/cgutil" regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" cstate "github.com/hashicorp/nomad/client/state" "github.com/hashicorp/nomad/command/agent/consul" @@ -736,8 +737,9 @@ func TestClient_Init(t *testing.T) { config.Node = mock.Node() client := &Client{ - config: config, - logger: testlog.HCLogger(t), + config: config, + logger: testlog.HCLogger(t), + cpusetManager: new(cgutil.NoopCpusetManager), } if err := client.init(); err != nil { diff --git a/client/lib/cgutil/cgutil_linux.go b/client/lib/cgutil/cgutil_linux.go index b2cc9a4268c9..178b65b7b836 100644 --- a/client/lib/cgutil/cgutil_linux.go +++ b/client/lib/cgutil/cgutil_linux.go @@ -24,19 +24,25 @@ var UseV2 = cgroups.IsCgroup2UnifiedMode() // will create cgroups. If parent is not set, an appropriate name for the version // of cgroups will be used. func GetCgroupParent(parent string) string { - if UseV2 { - return getParentV2(parent) + switch { + case parent != "": + return parent + case UseV2: + return DefaultCgroupParentV2 + default: + return DefaultCgroupV1Parent } - return getParentV1(parent) } // CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration. -func CreateCPUSetManager(parent string, logger hclog.Logger) CpusetManager { +func CreateCPUSetManager(parent string, reservable []uint16, logger hclog.Logger) CpusetManager { parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config - if UseV2 { - return NewCpusetManagerV2(parent, logger.Named("cpuset.v2")) + switch { + case UseV2: + return NewCpusetManagerV2(parent, reservable, logger.Named("cpuset.v2")) + default: + return NewCpusetManagerV1(parent, reservable, logger.Named("cpuset.v1")) } - return NewCpusetManagerV1(parent, logger.Named("cpuset.v1")) } // GetCPUsFromCgroup gets the effective cpuset value for the given cgroup. diff --git a/client/lib/cgutil/cgutil_linux_test.go b/client/lib/cgutil/cgutil_linux_test.go index 2def4dcbae88..6d43053cb6b9 100644 --- a/client/lib/cgutil/cgutil_linux_test.go +++ b/client/lib/cgutil/cgutil_linux_test.go @@ -13,6 +13,7 @@ import ( "github.com/hashicorp/nomad/helper/uuid" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs2" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -58,19 +59,21 @@ func TestUtil_CreateCPUSetManager(t *testing.T) { t.Run("v1", func(t *testing.T) { testutil.CgroupsCompatibleV1(t) parent := "/" + uuid.Short() - manager := CreateCPUSetManager(parent, logger) - err := manager.Init([]uint16{0}) - require.NoError(t, err) - require.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) + manager := CreateCPUSetManager(parent, []uint16{0}, logger) + manager.Init() + _, ok := manager.(*cpusetManagerV1) + must.True(t, ok) + must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) }) t.Run("v2", func(t *testing.T) { testutil.CgroupsCompatibleV2(t) parent := uuid.Short() + ".slice" - manager := CreateCPUSetManager(parent, logger) - err := manager.Init([]uint16{0}) - require.NoError(t, err) - require.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) + manager := CreateCPUSetManager(parent, []uint16{0}, logger) + manager.Init() + _, ok := manager.(*cpusetManagerV2) + must.True(t, ok) + must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) }) } diff --git a/client/lib/cgutil/cgutil_noop.go b/client/lib/cgutil/cgutil_noop.go index 91a35b14492e..89c86c109842 100644 --- a/client/lib/cgutil/cgutil_noop.go +++ b/client/lib/cgutil/cgutil_noop.go @@ -17,7 +17,7 @@ const ( var UseV2 = false // CreateCPUSetManager creates a no-op CpusetManager for non-Linux operating systems. -func CreateCPUSetManager(string, hclog.Logger) CpusetManager { +func CreateCPUSetManager(string, []uint16, hclog.Logger) CpusetManager { return new(NoopCpusetManager) } diff --git a/client/lib/cgutil/cpuset_manager.go b/client/lib/cgutil/cpuset_manager.go index 7d16c752f84c..b3e56fa60500 100644 --- a/client/lib/cgutil/cpuset_manager.go +++ b/client/lib/cgutil/cpuset_manager.go @@ -18,10 +18,9 @@ const ( // CpusetManager is used to setup cpuset cgroups for each task. type CpusetManager interface { - // Init should be called with the initial set of reservable cores before any - // allocations are managed. Ensures the parent cgroup exists and proper permissions - // are available for managing cgroups. - Init([]uint16) error + // Init should be called before the client starts running allocations. This + // is where the cpuset manager should start doing background operations. + Init() // AddAlloc adds an allocation to the manager AddAlloc(alloc *structs.Allocation) @@ -36,8 +35,7 @@ type CpusetManager interface { type NoopCpusetManager struct{} -func (n NoopCpusetManager) Init([]uint16) error { - return nil +func (n NoopCpusetManager) Init() { } func (n NoopCpusetManager) AddAlloc(alloc *structs.Allocation) { diff --git a/client/lib/cgutil/cpuset_manager_v1.go b/client/lib/cgutil/cpuset_manager_v1.go index ce89cd2bfdfa..bdf3b347d2fc 100644 --- a/client/lib/cgutil/cpuset_manager_v1.go +++ b/client/lib/cgutil/cpuset_manager_v1.go @@ -29,14 +29,52 @@ const ( ) // NewCpusetManagerV1 creates a CpusetManager compatible with cgroups.v1 -func NewCpusetManagerV1(cgroupParent string, logger hclog.Logger) CpusetManager { +func NewCpusetManagerV1(cgroupParent string, _ []uint16, logger hclog.Logger) CpusetManager { if cgroupParent == "" { cgroupParent = DefaultCgroupV1Parent } + + cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", cgroupParent) + if err != nil { + logger.Warn("failed to get cgroup path; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + // ensures that shared cpuset exists and that the cpuset values are copied from the parent if created + if err = cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil { + logger.Warn("failed to ensure cgroup parent exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath) + if err != nil { + logger.Warn("failed to detect parent cpuset settings; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + parentCpuset, err := cpuset.Parse(parentCpus) + if err != nil { + logger.Warn("failed to parse parent cpuset.cpus setting; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + // ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup + if err = os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err != nil { + logger.Warn("failed to ensure reserved cpuset.cpus interface exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + if err = cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil { + logger.Warn("failed to ensure reserved cpuset.mems interface exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + return &cpusetManagerV1{ - cgroupParent: cgroupParent, - cgroupInfo: map[string]allocTaskCgroupInfo{}, - logger: logger, + parentCpuset: parentCpuset, + cgroupParent: cgroupParent, + cgroupParentPath: cgroupParentPath, + cgroupInfo: map[string]allocTaskCgroupInfo{}, + logger: logger, } } @@ -140,48 +178,11 @@ type allocTaskCgroupInfo map[string]*TaskCgroupInfo // Init checks that the cgroup parent and expected child cgroups have been created // If the cgroup parent is set to /nomad then this will ensure that the /nomad/shared // cgroup is initialized. -func (c *cpusetManagerV1) Init(_ []uint16) error { - cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", c.cgroupParent) - if err != nil { - return err - } - c.cgroupParentPath = cgroupParentPath - - // ensures that shared cpuset exists and that the cpuset values are copied from the parent if created - if err := cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil { - return err - } - - parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath) - if err != nil { - return fmt.Errorf("failed to detect parent cpuset settings: %v", err) - } - c.parentCpuset, err = cpuset.Parse(parentCpus) - if err != nil { - return fmt.Errorf("failed to parse parent cpuset.cpus setting: %v", err) - } - - // ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup - if err := os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err == nil { - // cgroup created, leave cpuset.cpus empty but copy cpuset.mems from parent - if err != nil { - return err - } - } else if !os.IsExist(err) { - return err - } - - if err := cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil { - return err - } - +func (c *cpusetManagerV1) Init() { c.doneCh = make(chan struct{}) c.signalCh = make(chan struct{}) - c.logger.Info("initialized cpuset cgroup manager", "parent", c.cgroupParent, "cpuset", c.parentCpuset.String()) - go c.reconcileLoop() - return nil } func (c *cpusetManagerV1) reconcileLoop() { @@ -339,13 +340,6 @@ func getCPUsFromCgroupV1(group string) ([]uint16, error) { return stats.CPUSetStats.CPUs, nil } -func getParentV1(parent string) string { - if parent == "" { - return DefaultCgroupV1Parent - } - return parent -} - // cpusetEnsureParentV1 makes sure that the parent directories of current // are created and populated with the proper cpus and mems files copied // from their respective parent. It does that recursively, starting from diff --git a/client/lib/cgutil/cpuset_manager_v1_test.go b/client/lib/cgutil/cpuset_manager_v1_test.go index 9537f2f87a86..693df77aff4d 100644 --- a/client/lib/cgutil/cpuset_manager_v1_test.go +++ b/client/lib/cgutil/cpuset_manager_v1_test.go @@ -16,7 +16,7 @@ import ( "github.com/stretchr/testify/require" ) -func tmpCpusetManagerV1(t *testing.T) (manager *cpusetManagerV1, cleanup func()) { +func tmpCpusetManagerV1(t *testing.T) (*cpusetManagerV1, func()) { mount, err := FindCgroupMountpointDir() if err != nil || mount == "" { t.Skipf("Failed to find cgroup mount: %v %v", mount, err) @@ -25,15 +25,10 @@ func tmpCpusetManagerV1(t *testing.T) (manager *cpusetManagerV1, cleanup func()) parent := "/gotest-" + uuid.Short() require.NoError(t, cpusetEnsureParentV1(parent)) - manager = &cpusetManagerV1{ - cgroupParent: parent, - cgroupInfo: map[string]allocTaskCgroupInfo{}, - logger: testlog.HCLogger(t), - } - parentPath, err := GetCgroupPathHelperV1("cpuset", parent) require.NoError(t, err) + manager := NewCpusetManagerV1(parent, nil, testlog.HCLogger(t)).(*cpusetManagerV1) return manager, func() { require.NoError(t, cgroups.RemovePaths(map[string]string{"cpuset": parentPath})) } } @@ -42,7 +37,7 @@ func TestCpusetManager_V1_Init(t *testing.T) { manager, cleanup := tmpCpusetManagerV1(t) defer cleanup() - require.NoError(t, manager.Init(nil)) + manager.Init() require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName)) require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) @@ -59,7 +54,7 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) { manager, cleanup := tmpCpusetManagerV1(t) defer cleanup() - require.NoError(t, manager.Init(nil)) + manager.Init() alloc := mock.Alloc() // reserve just one core (the 0th core, which probably exists) @@ -114,7 +109,7 @@ func TestCpusetManager_V1_RemoveAlloc(t *testing.T) { manager, cleanup := tmpCpusetManagerV1(t) defer cleanup() - require.NoError(t, manager.Init(nil)) + manager.Init() alloc1 := mock.Alloc() alloc1Cpuset := cpuset.New(manager.parentCpuset.ToSlice()[0]) diff --git a/client/lib/cgutil/cpuset_manager_v2.go b/client/lib/cgutil/cpuset_manager_v2.go index 1dd5a92b0ab9..b1b44841a34a 100644 --- a/client/lib/cgutil/cpuset_manager_v2.go +++ b/client/lib/cgutil/cpuset_manager_v2.go @@ -52,24 +52,35 @@ type cpusetManagerV2 struct { isolating map[identity]cpuset.CPUSet // isolating tasks using cores from the pool + reserved cores } -func NewCpusetManagerV2(parent string, logger hclog.Logger) CpusetManager { +func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) CpusetManager { + parentAbs := filepath.Join(CgroupRoot, parent) + if err := os.MkdirAll(parentAbs, 0o755); err != nil { + logger.Warn("failed to ensure nomad parent cgroup exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + if len(reservable) == 0 { + // read from group + if cpus, err := GetCPUsFromCgroup(parent); err != nil { + logger.Warn("failed to lookup cpus from parent cgroup; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } else { + reservable = cpus + } + } + return &cpusetManagerV2{ + initial: cpuset.New(reservable...), parent: parent, - parentAbs: filepath.Join(CgroupRoot, parent), + parentAbs: parentAbs, logger: logger, sharing: make(map[identity]nothing), isolating: make(map[identity]cpuset.CPUSet), } } -func (c *cpusetManagerV2) Init(cores []uint16) error { - c.logger.Debug("initializing with", "cores", cores) - if err := c.ensureParent(); err != nil { - c.logger.Error("failed to init cpuset manager", "err", err) - return err - } - c.initial = cpuset.New(cores...) - return nil +func (c *cpusetManagerV2) Init() { + c.logger.Debug("initializing with", "cores", c.initial) } func (c *cpusetManagerV2) AddAlloc(alloc *structs.Allocation) { @@ -284,22 +295,6 @@ func (c *cpusetManagerV2) write(id identity, set cpuset.CPUSet) { } } -// ensureParentCgroup will create parent cgroup for the manager if it does not -// exist yet. No PIDs are added to any cgroup yet. -func (c *cpusetManagerV2) ensureParent() error { - mgr, err := fs2.NewManager(nil, c.parentAbs) - if err != nil { - return err - } - - if err = mgr.Apply(CreationPID); err != nil { - return err - } - - c.logger.Trace("establish cgroup hierarchy", "parent", c.parent) - return nil -} - // fromRoot returns the joined filepath of group on the CgroupRoot func fromRoot(group string) string { return filepath.Join(CgroupRoot, group) @@ -319,12 +314,3 @@ func getCPUsFromCgroupV2(group string) ([]uint16, error) { } return set.ToSlice(), nil } - -// getParentV2 returns parent if set, otherwise the default name of Nomad's -// parent cgroup (i.e. nomad.slice). -func getParentV2(parent string) string { - if parent == "" { - return DefaultCgroupParentV2 - } - return parent -} diff --git a/client/lib/cgutil/cpuset_manager_v2_test.go b/client/lib/cgutil/cpuset_manager_v2_test.go index a6acc50e76f7..ac5489aa01cd 100644 --- a/client/lib/cgutil/cpuset_manager_v2_test.go +++ b/client/lib/cgutil/cpuset_manager_v2_test.go @@ -32,8 +32,8 @@ func TestCpusetManager_V2_AddAlloc(t *testing.T) { cleanup(t, parent) // setup the cpuset manager - manager := NewCpusetManagerV2(parent, logger) - require.NoError(t, manager.Init(systemCores)) + manager := NewCpusetManagerV2(parent, systemCores, logger) + manager.Init() // add our first alloc, isolating 1 core t.Run("first", func(t *testing.T) { @@ -72,8 +72,8 @@ func TestCpusetManager_V2_RemoveAlloc(t *testing.T) { cleanup(t, parent) // setup the cpuset manager - manager := NewCpusetManagerV2(parent, logger) - require.NoError(t, manager.Init(systemCores)) + manager := NewCpusetManagerV2(parent, systemCores, logger) + manager.Init() // alloc1 gets core 0 alloc1 := mock.Alloc()