diff --git a/.changelog/14230.txt b/.changelog/14230.txt new file mode 100644 index 000000000000..94bcaf2a88e5 --- /dev/null +++ b/.changelog/14230.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where cpuset initialization would fail on first run +``` diff --git a/client/client.go b/client/client.go index cd4925b40bb8..fe16b5e91bd7 100644 --- a/client/client.go +++ b/client/client.go @@ -8,7 +8,6 @@ import ( "net/rpc" "os" "path/filepath" - "runtime" "sort" "strconv" "strings" @@ -394,7 +393,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie invalidAllocs: make(map[string]struct{}), serversContactedCh: make(chan struct{}), serversContactedOnce: sync.Once{}, - cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, logger), + cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, cfg.ReservableCores, logger), getter: getter.NewGetter(cfg.Artifact), EnterpriseClient: newEnterpriseClient(logger), } @@ -688,25 +687,8 @@ func (c *Client) init() error { "reserved", reserved, ) - // Ensure cgroups are created on linux platform - if runtime.GOOS == "linux" && c.cpusetManager != nil { - // use the client configuration for reservable_cores if set - cores := conf.ReservableCores - if len(cores) == 0 { - // otherwise lookup the effective cores from the parent cgroup - cores, err = cgutil.GetCPUsFromCgroup(conf.CgroupParent) - if err != nil { - c.logger.Warn("failed to lookup cpuset from cgroup parent, and not set as reservable_cores", "parent", conf.CgroupParent) - // will continue with a disabled cpuset manager - } - } - if cpuErr := c.cpusetManager.Init(cores); cpuErr != nil { - // If the client cannot initialize the cgroup then reserved cores will not be reported and the cpuset manager - // will be disabled. this is common when running in dev mode under a non-root user for example. - c.logger.Warn("failed to initialize cpuset cgroup subsystem, cpuset management disabled", "error", cpuErr) - c.cpusetManager = new(cgutil.NoopCpusetManager) - } - } + // startup the CPUSet manager + c.cpusetManager.Init() // setup the check store c.checkStore = checkstore.NewStore(c.logger, c.stateDB) diff --git a/client/lib/cgutil/cgutil_linux.go b/client/lib/cgutil/cgutil_linux.go index b2cc9a4268c9..178b65b7b836 100644 --- a/client/lib/cgutil/cgutil_linux.go +++ b/client/lib/cgutil/cgutil_linux.go @@ -24,19 +24,25 @@ var UseV2 = cgroups.IsCgroup2UnifiedMode() // will create cgroups. If parent is not set, an appropriate name for the version // of cgroups will be used. func GetCgroupParent(parent string) string { - if UseV2 { - return getParentV2(parent) + switch { + case parent != "": + return parent + case UseV2: + return DefaultCgroupParentV2 + default: + return DefaultCgroupV1Parent } - return getParentV1(parent) } // CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration. -func CreateCPUSetManager(parent string, logger hclog.Logger) CpusetManager { +func CreateCPUSetManager(parent string, reservable []uint16, logger hclog.Logger) CpusetManager { parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config - if UseV2 { - return NewCpusetManagerV2(parent, logger.Named("cpuset.v2")) + switch { + case UseV2: + return NewCpusetManagerV2(parent, reservable, logger.Named("cpuset.v2")) + default: + return NewCpusetManagerV1(parent, reservable, logger.Named("cpuset.v1")) } - return NewCpusetManagerV1(parent, logger.Named("cpuset.v1")) } // GetCPUsFromCgroup gets the effective cpuset value for the given cgroup. diff --git a/client/lib/cgutil/cgutil_linux_test.go b/client/lib/cgutil/cgutil_linux_test.go index 2def4dcbae88..6d43053cb6b9 100644 --- a/client/lib/cgutil/cgutil_linux_test.go +++ b/client/lib/cgutil/cgutil_linux_test.go @@ -13,6 +13,7 @@ import ( "github.com/hashicorp/nomad/helper/uuid" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs2" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -58,19 +59,21 @@ func TestUtil_CreateCPUSetManager(t *testing.T) { t.Run("v1", func(t *testing.T) { testutil.CgroupsCompatibleV1(t) parent := "/" + uuid.Short() - manager := CreateCPUSetManager(parent, logger) - err := manager.Init([]uint16{0}) - require.NoError(t, err) - require.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) + manager := CreateCPUSetManager(parent, []uint16{0}, logger) + manager.Init() + _, ok := manager.(*cpusetManagerV1) + must.True(t, ok) + must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) }) t.Run("v2", func(t *testing.T) { testutil.CgroupsCompatibleV2(t) parent := uuid.Short() + ".slice" - manager := CreateCPUSetManager(parent, logger) - err := manager.Init([]uint16{0}) - require.NoError(t, err) - require.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) + manager := CreateCPUSetManager(parent, []uint16{0}, logger) + manager.Init() + _, ok := manager.(*cpusetManagerV2) + must.True(t, ok) + must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) }) } diff --git a/client/lib/cgutil/cgutil_noop.go b/client/lib/cgutil/cgutil_noop.go index 91a35b14492e..89c86c109842 100644 --- a/client/lib/cgutil/cgutil_noop.go +++ b/client/lib/cgutil/cgutil_noop.go @@ -17,7 +17,7 @@ const ( var UseV2 = false // CreateCPUSetManager creates a no-op CpusetManager for non-Linux operating systems. -func CreateCPUSetManager(string, hclog.Logger) CpusetManager { +func CreateCPUSetManager(string, []uint16, hclog.Logger) CpusetManager { return new(NoopCpusetManager) } diff --git a/client/lib/cgutil/cpuset_manager.go b/client/lib/cgutil/cpuset_manager.go index 7d16c752f84c..b3e56fa60500 100644 --- a/client/lib/cgutil/cpuset_manager.go +++ b/client/lib/cgutil/cpuset_manager.go @@ -18,10 +18,9 @@ const ( // CpusetManager is used to setup cpuset cgroups for each task. type CpusetManager interface { - // Init should be called with the initial set of reservable cores before any - // allocations are managed. Ensures the parent cgroup exists and proper permissions - // are available for managing cgroups. - Init([]uint16) error + // Init should be called before the client starts running allocations. This + // is where the cpuset manager should start doing background operations. + Init() // AddAlloc adds an allocation to the manager AddAlloc(alloc *structs.Allocation) @@ -36,8 +35,7 @@ type CpusetManager interface { type NoopCpusetManager struct{} -func (n NoopCpusetManager) Init([]uint16) error { - return nil +func (n NoopCpusetManager) Init() { } func (n NoopCpusetManager) AddAlloc(alloc *structs.Allocation) { diff --git a/client/lib/cgutil/cpuset_manager_v1.go b/client/lib/cgutil/cpuset_manager_v1.go index ce89cd2bfdfa..bdf3b347d2fc 100644 --- a/client/lib/cgutil/cpuset_manager_v1.go +++ b/client/lib/cgutil/cpuset_manager_v1.go @@ -29,14 +29,52 @@ const ( ) // NewCpusetManagerV1 creates a CpusetManager compatible with cgroups.v1 -func NewCpusetManagerV1(cgroupParent string, logger hclog.Logger) CpusetManager { +func NewCpusetManagerV1(cgroupParent string, _ []uint16, logger hclog.Logger) CpusetManager { if cgroupParent == "" { cgroupParent = DefaultCgroupV1Parent } + + cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", cgroupParent) + if err != nil { + logger.Warn("failed to get cgroup path; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + // ensures that shared cpuset exists and that the cpuset values are copied from the parent if created + if err = cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil { + logger.Warn("failed to ensure cgroup parent exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath) + if err != nil { + logger.Warn("failed to detect parent cpuset settings; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + parentCpuset, err := cpuset.Parse(parentCpus) + if err != nil { + logger.Warn("failed to parse parent cpuset.cpus setting; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + // ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup + if err = os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err != nil { + logger.Warn("failed to ensure reserved cpuset.cpus interface exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + if err = cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil { + logger.Warn("failed to ensure reserved cpuset.mems interface exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + return &cpusetManagerV1{ - cgroupParent: cgroupParent, - cgroupInfo: map[string]allocTaskCgroupInfo{}, - logger: logger, + parentCpuset: parentCpuset, + cgroupParent: cgroupParent, + cgroupParentPath: cgroupParentPath, + cgroupInfo: map[string]allocTaskCgroupInfo{}, + logger: logger, } } @@ -140,48 +178,11 @@ type allocTaskCgroupInfo map[string]*TaskCgroupInfo // Init checks that the cgroup parent and expected child cgroups have been created // If the cgroup parent is set to /nomad then this will ensure that the /nomad/shared // cgroup is initialized. -func (c *cpusetManagerV1) Init(_ []uint16) error { - cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", c.cgroupParent) - if err != nil { - return err - } - c.cgroupParentPath = cgroupParentPath - - // ensures that shared cpuset exists and that the cpuset values are copied from the parent if created - if err := cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil { - return err - } - - parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath) - if err != nil { - return fmt.Errorf("failed to detect parent cpuset settings: %v", err) - } - c.parentCpuset, err = cpuset.Parse(parentCpus) - if err != nil { - return fmt.Errorf("failed to parse parent cpuset.cpus setting: %v", err) - } - - // ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup - if err := os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err == nil { - // cgroup created, leave cpuset.cpus empty but copy cpuset.mems from parent - if err != nil { - return err - } - } else if !os.IsExist(err) { - return err - } - - if err := cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil { - return err - } - +func (c *cpusetManagerV1) Init() { c.doneCh = make(chan struct{}) c.signalCh = make(chan struct{}) - c.logger.Info("initialized cpuset cgroup manager", "parent", c.cgroupParent, "cpuset", c.parentCpuset.String()) - go c.reconcileLoop() - return nil } func (c *cpusetManagerV1) reconcileLoop() { @@ -339,13 +340,6 @@ func getCPUsFromCgroupV1(group string) ([]uint16, error) { return stats.CPUSetStats.CPUs, nil } -func getParentV1(parent string) string { - if parent == "" { - return DefaultCgroupV1Parent - } - return parent -} - // cpusetEnsureParentV1 makes sure that the parent directories of current // are created and populated with the proper cpus and mems files copied // from their respective parent. It does that recursively, starting from diff --git a/client/lib/cgutil/cpuset_manager_v1_test.go b/client/lib/cgutil/cpuset_manager_v1_test.go index 9537f2f87a86..165891f85a50 100644 --- a/client/lib/cgutil/cpuset_manager_v1_test.go +++ b/client/lib/cgutil/cpuset_manager_v1_test.go @@ -42,7 +42,7 @@ func TestCpusetManager_V1_Init(t *testing.T) { manager, cleanup := tmpCpusetManagerV1(t) defer cleanup() - require.NoError(t, manager.Init(nil)) + manager.Init() require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName)) require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) @@ -59,7 +59,7 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) { manager, cleanup := tmpCpusetManagerV1(t) defer cleanup() - require.NoError(t, manager.Init(nil)) + manager.Init() alloc := mock.Alloc() // reserve just one core (the 0th core, which probably exists) @@ -114,7 +114,7 @@ func TestCpusetManager_V1_RemoveAlloc(t *testing.T) { manager, cleanup := tmpCpusetManagerV1(t) defer cleanup() - require.NoError(t, manager.Init(nil)) + manager.Init() alloc1 := mock.Alloc() alloc1Cpuset := cpuset.New(manager.parentCpuset.ToSlice()[0]) diff --git a/client/lib/cgutil/cpuset_manager_v2.go b/client/lib/cgutil/cpuset_manager_v2.go index 1dd5a92b0ab9..b1b44841a34a 100644 --- a/client/lib/cgutil/cpuset_manager_v2.go +++ b/client/lib/cgutil/cpuset_manager_v2.go @@ -52,24 +52,35 @@ type cpusetManagerV2 struct { isolating map[identity]cpuset.CPUSet // isolating tasks using cores from the pool + reserved cores } -func NewCpusetManagerV2(parent string, logger hclog.Logger) CpusetManager { +func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) CpusetManager { + parentAbs := filepath.Join(CgroupRoot, parent) + if err := os.MkdirAll(parentAbs, 0o755); err != nil { + logger.Warn("failed to ensure nomad parent cgroup exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + if len(reservable) == 0 { + // read from group + if cpus, err := GetCPUsFromCgroup(parent); err != nil { + logger.Warn("failed to lookup cpus from parent cgroup; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } else { + reservable = cpus + } + } + return &cpusetManagerV2{ + initial: cpuset.New(reservable...), parent: parent, - parentAbs: filepath.Join(CgroupRoot, parent), + parentAbs: parentAbs, logger: logger, sharing: make(map[identity]nothing), isolating: make(map[identity]cpuset.CPUSet), } } -func (c *cpusetManagerV2) Init(cores []uint16) error { - c.logger.Debug("initializing with", "cores", cores) - if err := c.ensureParent(); err != nil { - c.logger.Error("failed to init cpuset manager", "err", err) - return err - } - c.initial = cpuset.New(cores...) - return nil +func (c *cpusetManagerV2) Init() { + c.logger.Debug("initializing with", "cores", c.initial) } func (c *cpusetManagerV2) AddAlloc(alloc *structs.Allocation) { @@ -284,22 +295,6 @@ func (c *cpusetManagerV2) write(id identity, set cpuset.CPUSet) { } } -// ensureParentCgroup will create parent cgroup for the manager if it does not -// exist yet. No PIDs are added to any cgroup yet. -func (c *cpusetManagerV2) ensureParent() error { - mgr, err := fs2.NewManager(nil, c.parentAbs) - if err != nil { - return err - } - - if err = mgr.Apply(CreationPID); err != nil { - return err - } - - c.logger.Trace("establish cgroup hierarchy", "parent", c.parent) - return nil -} - // fromRoot returns the joined filepath of group on the CgroupRoot func fromRoot(group string) string { return filepath.Join(CgroupRoot, group) @@ -319,12 +314,3 @@ func getCPUsFromCgroupV2(group string) ([]uint16, error) { } return set.ToSlice(), nil } - -// getParentV2 returns parent if set, otherwise the default name of Nomad's -// parent cgroup (i.e. nomad.slice). -func getParentV2(parent string) string { - if parent == "" { - return DefaultCgroupParentV2 - } - return parent -} diff --git a/client/lib/cgutil/cpuset_manager_v2_test.go b/client/lib/cgutil/cpuset_manager_v2_test.go index a6acc50e76f7..ac5489aa01cd 100644 --- a/client/lib/cgutil/cpuset_manager_v2_test.go +++ b/client/lib/cgutil/cpuset_manager_v2_test.go @@ -32,8 +32,8 @@ func TestCpusetManager_V2_AddAlloc(t *testing.T) { cleanup(t, parent) // setup the cpuset manager - manager := NewCpusetManagerV2(parent, logger) - require.NoError(t, manager.Init(systemCores)) + manager := NewCpusetManagerV2(parent, systemCores, logger) + manager.Init() // add our first alloc, isolating 1 core t.Run("first", func(t *testing.T) { @@ -72,8 +72,8 @@ func TestCpusetManager_V2_RemoveAlloc(t *testing.T) { cleanup(t, parent) // setup the cpuset manager - manager := NewCpusetManagerV2(parent, logger) - require.NoError(t, manager.Init(systemCores)) + manager := NewCpusetManagerV2(parent, systemCores, logger) + manager.Init() // alloc1 gets core 0 alloc1 := mock.Alloc()