Skip to content

Commit

Permalink
wip use docker fixer
Browse files Browse the repository at this point in the history
  • Loading branch information
shoenig committed Mar 13, 2022
1 parent bb0ee5e commit 74e0bf1
Show file tree
Hide file tree
Showing 10 changed files with 251 additions and 110 deletions.
111 changes: 20 additions & 91 deletions client/lib/cgutil/cgutil_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"github.com/hashicorp/go-hclog"
"github.com/opencontainers/runc/libcontainer/cgroups"
lcc "github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)

// UseV2 indicates whether only cgroups.v2 is enabled. If cgroups.v2 is not
Expand Down Expand Up @@ -56,14 +55,17 @@ func CgroupID(allocID, task string) string {
return fmt.Sprintf("%s.%s", task, allocID)
}

// ConfigureBasicCgroups will initialize cgroups for v1.
//
// Not used in cgroups.v2
func ConfigureBasicCgroups(cgroup string, config *lcc.Config) error {
if UseV2 {
panic("do not call me for cgroups.v2")
return nil
}

// In V1 we must setup the freezer cgroup ourselves
subsystem := "freezer"
path, err := getCgroupPathHelper(subsystem, filepath.Join(DefaultCgroupV1Parent, cgroup))
path, err := getCgroupPathHelperV1(subsystem, filepath.Join(DefaultCgroupV1Parent, cgroup))
if err != nil {
return fmt.Errorf("failed to find %s cgroup mountpoint: %v", subsystem, err)
}
Expand All @@ -76,94 +78,6 @@ func ConfigureBasicCgroups(cgroup string, config *lcc.Config) error {
return nil
}

func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
return
}
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
return
}
return cpus, mems, nil
}

// cpusetEnsureParent makes sure that the parent directories of current
// are created and populated with the proper cpus and mems files copied
// from their respective parent. It does that recursively, starting from
// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
//
// todo: v1 only?
func cpusetEnsureParent(current string) error {
var st unix.Statfs_t

parent := filepath.Dir(current)
err := unix.Statfs(parent, &st)
if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
return nil
}
// Treat non-existing directory as cgroupfs as it will be created,
// and the root cpuset directory obviously exists.
if err != nil && err != unix.ENOENT {
return &os.PathError{Op: "statfs", Path: parent, Err: err}
}

if err := cpusetEnsureParent(parent); err != nil {
return err
}
if err := os.Mkdir(current, 0755); err != nil && !os.IsExist(err) {
return err
}
return cpusetCopyIfNeeded(current, parent)
}

// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
// directory to the current directory if the file's contents are 0
//
// todo: v1 only?
func cpusetCopyIfNeeded(current, parent string) error {
currentCpus, currentMems, err := getCpusetSubsystemSettings(current)
if err != nil {
return err
}
parentCpus, parentMems, err := getCpusetSubsystemSettings(parent)
if err != nil {
return err
}

if isEmptyCpuset(currentCpus) {
if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
return err
}
}
if isEmptyCpuset(currentMems) {
if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
return err
}
}
return nil
}

func isEmptyCpuset(str string) bool {
return str == "" || str == "\n"
}

func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
fmt.Println("FindCgroupMountPointAndRoot, subsystem:", subsystem, "cgroup:", cgroup)
mnt, root, err := cgroups.FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}

// This is needed for nested containers, because in /proc/self/cgroup we
// see paths from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}

result := filepath.Join(mnt, relCgroup)
return result, nil
}

// FindCgroupMountpointDir is used to find the cgroup mount point on a Linux
// system.
func FindCgroupMountpointDir() (string, error) {
Expand All @@ -177,3 +91,18 @@ func FindCgroupMountpointDir() (string, error) {
}
return mount[0].Mountpoint, nil
}

// CopyCpuset copies the cpuset.cpus value from source into destination.
func CopyCpuset(source, destination string) error {
correct, err := cgroups.ReadFile(source, "cpuset.cpus")
if err != nil {
return err
}

err = cgroups.WriteFile(destination, "cpuset.cpus", correct)
if err != nil {
return err
}

return nil
}
94 changes: 89 additions & 5 deletions client/lib/cgutil/cpuset_manager_v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)

const (
Expand Down Expand Up @@ -135,18 +136,18 @@ type allocTaskCgroupInfo map[string]*TaskCgroupInfo
// If the cgroup parent is set to /nomad then this will ensure that the /nomad/shared
// cgroup is initialized.
func (c *cpusetManagerV1) Init(_ []uint16) error {
cgroupParentPath, err := getCgroupPathHelper("cpuset", c.cgroupParent)
cgroupParentPath, err := getCgroupPathHelperV1("cpuset", c.cgroupParent)
if err != nil {
return err
}
c.cgroupParentPath = cgroupParentPath

// ensures that shared cpuset exists and that the cpuset values are copied from the parent if created
if err := cpusetEnsureParent(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil {
if err := cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil {
return err
}

parentCpus, parentMems, err := getCpusetSubsystemSettings(cgroupParentPath)
parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath)
if err != nil {
return fmt.Errorf("failed to detect parent cpuset settings: %v", err)
}
Expand Down Expand Up @@ -250,7 +251,7 @@ func (c *cpusetManagerV1) reconcileCpusets() {
}

// copy cpuset.mems from parent
_, parentMems, err := getCpusetSubsystemSettings(filepath.Dir(info.CgroupPath))
_, parentMems, err := getCpusetSubsystemSettingsV1(filepath.Dir(info.CgroupPath))
if err != nil {
c.logger.Error("failed to read parent cgroup settings for task", "path", info.CgroupPath, "error", err)
info.Error = err
Expand Down Expand Up @@ -321,7 +322,7 @@ func (c *cpusetManagerV1) reservedCpusetPath() string {
}

func getCPUsFromCgroupV1(group string) ([]uint16, error) {
cgroupPath, err := getCgroupPathHelper("cpuset", group)
cgroupPath, err := getCgroupPathHelperV1("cpuset", group)
if err != nil {
return nil, err
}
Expand All @@ -340,3 +341,86 @@ func getParentV1(parent string) string {
}
return parent
}

// cpusetEnsureParentV1 makes sure that the parent directories of current
// are created and populated with the proper cpus and mems files copied
// from their respective parent. It does that recursively, starting from
// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
func cpusetEnsureParentV1(current string) error {
var st unix.Statfs_t

parent := filepath.Dir(current)
err := unix.Statfs(parent, &st)
if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
return nil
}
// Treat non-existing directory as cgroupfs as it will be created,
// and the root cpuset directory obviously exists.
if err != nil && err != unix.ENOENT {
return &os.PathError{Op: "statfs", Path: parent, Err: err}
}

if err := cpusetEnsureParentV1(parent); err != nil {
return err
}
if err := os.Mkdir(current, 0755); err != nil && !os.IsExist(err) {
return err
}
return cpusetCopyIfNeededV1(current, parent)
}

// cpusetCopyIfNeededV1 copies the cpuset.cpus and cpuset.mems from the parent
// directory to the current directory if the file's contents are 0
func cpusetCopyIfNeededV1(current, parent string) error {
currentCpus, currentMems, err := getCpusetSubsystemSettingsV1(current)
if err != nil {
return err
}
parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(parent)
if err != nil {
return err
}

if isEmptyCpusetV1(currentCpus) {
if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
return err
}
}
if isEmptyCpusetV1(currentMems) {
if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
return err
}
}
return nil
}

func getCpusetSubsystemSettingsV1(parent string) (cpus, mems string, err error) {
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
return
}
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
return
}
return cpus, mems, nil
}

func isEmptyCpusetV1(str string) bool {
return str == "" || str == "\n"
}

func getCgroupPathHelperV1(subsystem, cgroup string) (string, error) {
mnt, root, err := cgroups.FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}

// This is needed for nested containers, because in /proc/self/cgroup we
// see paths from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}

result := filepath.Join(mnt, relCgroup)
return result, nil
}
4 changes: 2 additions & 2 deletions client/lib/cgutil/cpuset_manager_v1_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ func tmpCpusetManagerV1(t *testing.T) (manager *cpusetManagerV1, cleanup func())
}

parent := "/gotest-" + uuid.Short()
require.NoError(t, cpusetEnsureParent(parent))
require.NoError(t, cpusetEnsureParentV1(parent))

manager = &cpusetManagerV1{
cgroupParent: parent,
cgroupInfo: map[string]allocTaskCgroupInfo{},
logger: testlog.HCLogger(t),
}

parentPath, err := getCgroupPathHelper("cpuset", parent)
parentPath, err := getCgroupPathHelperV1("cpuset", parent)
require.NoError(t, err)

return manager, func() { require.NoError(t, cgroups.RemovePaths(map[string]string{"cpuset": parentPath})) }
Expand Down
7 changes: 2 additions & 5 deletions client/lib/cgutil/cpuset_manager_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,18 +250,15 @@ func (c *cpusetManagerV2) remove(path string) {
}

// get the list of pids managed by this scope (should be 0 or 1)
pids, err2 := mgr.GetPids()
if err2 != nil {
c.logger.Warn("failed to list pids", "path", path, "err", err)
return
}
pids, _ := mgr.GetPids()

// do not destroy the scope if a PID is still present
// this is a normal condition when an agent restarts with running tasks
if len(pids) > 0 {
return
}

// remove the cgroup
if err3 := mgr.Destroy(); err3 != nil {
c.logger.Warn("failed to cleanup cgroup", "path", path, "err", err)
return
Expand Down
4 changes: 3 additions & 1 deletion drivers/docker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,9 @@ func (d *Driver) SetConfig(c *base.Config) error {

d.coordinator = newDockerCoordinator(coordinatorConfig)

d.reconciler = newReconciler(d)
d.danglingReconciler = newReconciler(d)

d.cpusetFixer = newCpusetFixer(d)

return nil
}
Expand Down
17 changes: 14 additions & 3 deletions drivers/docker/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/drivers"
pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/ryanuber/go-glob"
)

Expand Down Expand Up @@ -123,7 +124,8 @@ type Driver struct {
detected bool
detectedLock sync.RWMutex

reconciler *containerReconciler
danglingReconciler *containerReconciler
cpusetFixer *cpusetFixer
}

// NewDockerDriver returns a docker implementation of a driver plugin
Expand Down Expand Up @@ -352,7 +354,11 @@ CREATE:
}

if !cgutil.UseV2 {
// TODO compat - can remove when major distros drop cgroups.v1
// TODO compat - remove when major distros drop cgroups.v1
//
// This workaround does not work with cgroups.v2, which only allows setting the PID
// into exactly 1 group. For cgroups.v2, we must use the cpuset fixer to reconcile
// the cpuset value into the cgroups created by docker in the background.
if containerCfg.HostConfig.CPUSet == "" && cfg.Resources.LinuxResources.CpusetCgroupPath != "" {
if err := setCPUSetCgroup(cfg.Resources.LinuxResources.CpusetCgroupPath, container.State.Pid); err != nil {
return nil, nil, fmt.Errorf("failed to set the cpuset cgroup for container: %v", err)
Expand Down Expand Up @@ -851,6 +857,8 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T

CPUShares: task.Resources.LinuxResources.CPUShares,

CgroupParent: "nomad.slice", // configurable

// Binds are used to mount a host volume into the container. We mount a
// local directory for storage and a shared alloc directory that can be
// used to share data between different tasks in the same task group.
Expand Down Expand Up @@ -1196,7 +1204,10 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
config.Env = task.EnvList()

containerName := fmt.Sprintf("%s-%s", strings.ReplaceAll(task.Name, "/", "_"), task.AllocID)
logger.Debug("setting container name", "container_name", containerName)
if cgroups.IsCgroup2UnifiedMode() {
containerName = fmt.Sprintf("%s.%s.scope", task.AllocID, task.Name)
}
logger.Info("setting container name", "container_name", containerName)

var networkingConfig *docker.NetworkingConfig
if len(driverConfig.NetworkAliases) > 0 || driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" {
Expand Down
Loading

0 comments on commit 74e0bf1

Please sign in to comment.