Skip to content

Commit

Permalink
Merge pull request #12274 from hashicorp/f-cgroupsv2
Browse files Browse the repository at this point in the history
client: enable cpuset support for cgroups.v2
  • Loading branch information
shoenig committed Mar 24, 2022
2 parents cb54438 + c27af79 commit 8e77776
Show file tree
Hide file tree
Showing 71 changed files with 1,954 additions and 669 deletions.
3 changes: 3 additions & 0 deletions .changelog/12274.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
Enable support for cgroups v2
```
6 changes: 3 additions & 3 deletions .github/workflows/test-core.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ jobs:
- client/devicemanager
- client/dynamicplugins
- client/fingerprint
# - client/lib/...
- client/lib/...
- client/logmon
- client/pluginmanager
- client/state
Expand All @@ -105,8 +105,8 @@ jobs:
- client/taskenv
- command
- command/agent
# - drivers/docker
# - drivers/exec
- drivers/docker
- drivers/exec
- drivers/java
- drivers/rawexec
- helper/...
Expand Down
1 change: 1 addition & 0 deletions client/alloc_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,7 @@ func TestAlloc_ExecStreaming_ACL_WithIsolation_Image(t *testing.T) {
// TestAlloc_ExecStreaming_ACL_WithIsolation_Chroot asserts that token only needs
// alloc-exec acl policy when chroot isolation is used
func TestAlloc_ExecStreaming_ACL_WithIsolation_Chroot(t *testing.T) {
ci.SkipSlow(t, "flaky on GHA; too much disk IO")
ci.Parallel(t)

if runtime.GOOS != "linux" || unix.Geteuid() != 0 {
Expand Down
2 changes: 1 addition & 1 deletion client/allocrunner/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func testAllocRunnerConfig(t *testing.T, alloc *structs.Allocation) (*Config, fu
PrevAllocMigrator: allocwatcher.NoopPrevAlloc{},
DeviceManager: devicemanager.NoopMockManager(),
DriverManager: drivermanager.TestDriverManager(t),
CpusetManager: cgutil.NoopCpusetManager(),
CpusetManager: new(cgutil.NoopCpusetManager),
ServersContactedCh: make(chan struct{}),
}
return conf, cleanup
Expand Down
22 changes: 13 additions & 9 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie
invalidAllocs: make(map[string]struct{}),
serversContactedCh: make(chan struct{}),
serversContactedOnce: sync.Once{},
cpusetManager: cgutil.NewCpusetManager(cfg.CgroupParent, logger.Named("cpuset_manager")),
cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, logger),
EnterpriseClient: newEnterpriseClient(logger),
}

Expand Down Expand Up @@ -657,19 +657,23 @@ func (c *Client) init() error {

// Ensure cgroups are created on linux platform
if runtime.GOOS == "linux" && c.cpusetManager != nil {
err := c.cpusetManager.Init()
if err != nil {
// if the client cannot initialize the cgroup then reserved cores will not be reported and the cpuset manager
// will be disabled. this is common when running in dev mode under a non-root user for example
c.logger.Warn("could not initialize cpuset cgroup subsystem, cpuset management disabled", "error", err)
c.cpusetManager = cgutil.NoopCpusetManager()
// use the client configuration for reservable_cores if set
cores := c.config.ReservableCores
if len(cores) == 0 {
// otherwise lookup the effective cores from the parent cgroup
cores, _ = cgutil.GetCPUsFromCgroup(c.config.CgroupParent)
}
if cpuErr := c.cpusetManager.Init(cores); cpuErr != nil {
// If the client cannot initialize the cgroup then reserved cores will not be reported and the cpuset manager
// will be disabled. this is common when running in dev mode under a non-root user for example.
c.logger.Warn("failed to initialize cpuset cgroup subsystem, cpuset management disabled", "error", cpuErr)
c.cpusetManager = new(cgutil.NoopCpusetManager)
}
}
return nil
}

// reloadTLSConnections allows a client to reload its TLS configuration on the
// fly
// reloadTLSConnections allows a client to reload its TLS configuration on the fly
func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error {
var tlsWrap tlsutil.RegionWrapper
if newConfig != nil && newConfig.EnableRPC {
Expand Down
3 changes: 1 addition & 2 deletions client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/hashicorp/nomad/client/config"
consulApi "github.com/hashicorp/nomad/client/consul"
"github.com/hashicorp/nomad/client/fingerprint"
cstate "github.com/hashicorp/nomad/client/state"
"github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper/pluginutils/catalog"
"github.com/hashicorp/nomad/helper/pluginutils/singleton"
Expand All @@ -30,8 +31,6 @@ import (
psstructs "github.com/hashicorp/nomad/plugins/shared/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/stretchr/testify/assert"

cstate "github.com/hashicorp/nomad/client/state"
"github.com/stretchr/testify/require"
)

Expand Down
2 changes: 1 addition & 1 deletion client/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ func DefaultConfig() *Config {
CNIConfigDir: "/opt/cni/config",
CNIInterfacePrefix: "eth",
HostNetworks: map[string]*structs.ClientHostNetworkConfig{},
CgroupParent: cgutil.DefaultCgroupParent,
CgroupParent: cgutil.GetCgroupParent(""),
MaxDynamicPort: structs.DefaultMinDynamicPort,
MinDynamicPort: structs.DefaultMaxDynamicPort,
}
Expand Down
61 changes: 46 additions & 15 deletions client/fingerprint/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,55 +3,86 @@ package fingerprint
import (
"time"

"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/lib/cgutil"

log "github.com/hashicorp/go-hclog"
)

const (
cgroupAvailable = "available"
cgroupUnavailable = "unavailable"
interval = 15

cgroupMountPointAttribute = "unique.cgroup.mountpoint"
cgroupVersionAttribute = "unique.cgroup.version"

cgroupDetectInterval = 15 * time.Second
)

type CGroupFingerprint struct {
logger log.Logger
logger hclog.Logger
lastState string
mountPointDetector MountPointDetector
versionDetector CgroupVersionDetector
}

// An interface to isolate calls to the cgroup library
// This facilitates testing where we can implement
// fake mount points to test various code paths
// MountPointDetector isolates calls to the cgroup library.
//
// This facilitates testing where we can implement fake mount points to test
// various code paths.
type MountPointDetector interface {
// MountPoint returns a cgroup mount-point.
//
// In v1, this is one arbitrary subsystem (e.g. /sys/fs/cgroup/cpu).
//
// In v2, this is the actual root mount point (i.e. /sys/fs/cgroup).
MountPoint() (string, error)
}

// Implements the interface detector which calls the cgroups library directly
// DefaultMountPointDetector implements the interface detector which calls the cgroups
// library directly
type DefaultMountPointDetector struct {
}

// MountPoint calls out to the default cgroup library.
func (b *DefaultMountPointDetector) MountPoint() (string, error) {
func (*DefaultMountPointDetector) MountPoint() (string, error) {
return cgutil.FindCgroupMountpointDir()
}

// CgroupVersionDetector isolates calls to the cgroup library.
type CgroupVersionDetector interface {
// CgroupVersion returns v1 or v2 depending on the cgroups version in use.
CgroupVersion() string
}

// DefaultCgroupVersionDetector implements the version detector which calls the
// cgroups library directly.
type DefaultCgroupVersionDetector struct {
}

func (*DefaultCgroupVersionDetector) CgroupVersion() string {
if cgutil.UseV2 {
return "v2"
}
return "v1"
}

// NewCGroupFingerprint returns a new cgroup fingerprinter
func NewCGroupFingerprint(logger log.Logger) Fingerprint {
f := &CGroupFingerprint{
func NewCGroupFingerprint(logger hclog.Logger) Fingerprint {
return &CGroupFingerprint{
logger: logger.Named("cgroup"),
lastState: cgroupUnavailable,
mountPointDetector: &DefaultMountPointDetector{},
mountPointDetector: new(DefaultMountPointDetector),
versionDetector: new(DefaultCgroupVersionDetector),
}
return f
}

// clearCGroupAttributes clears any node attributes related to cgroups that might
// have been set in a previous fingerprint run.
func (f *CGroupFingerprint) clearCGroupAttributes(r *FingerprintResponse) {
r.RemoveAttribute("unique.cgroup.mountpoint")
r.RemoveAttribute(cgroupMountPointAttribute)
r.RemoveAttribute(cgroupVersionAttribute)
}

// Periodic determines the interval at which the periodic fingerprinter will run.
func (f *CGroupFingerprint) Periodic() (bool, time.Duration) {
return true, interval * time.Second
return true, cgroupDetectInterval
}
1 change: 0 additions & 1 deletion client/fingerprint/cgroup_default.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
//go:build !linux
// +build !linux

package fingerprint

Expand Down
22 changes: 10 additions & 12 deletions client/fingerprint/cgroup_linux.go
Original file line number Diff line number Diff line change
@@ -1,37 +1,35 @@
//go:build linux
// +build linux

package fingerprint

import (
"fmt"
)

const (
cgroupAvailable = "available"
)

// Fingerprint tries to find a valid cgroup mount point
// Fingerprint tries to find a valid cgroup mount point and the version of cgroups
// if a mount-point is present.
func (f *CGroupFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintResponse) error {
mount, err := f.mountPointDetector.MountPoint()
if err != nil {
f.clearCGroupAttributes(resp)
return fmt.Errorf("Failed to discover cgroup mount point: %s", err)
return fmt.Errorf("failed to discover cgroup mount point: %s", err)
}

// Check if a cgroup mount point was found
// Check if a cgroup mount point was found.
if mount == "" {

f.clearCGroupAttributes(resp)

if f.lastState == cgroupAvailable {
f.logger.Info("cgroups are unavailable")
f.logger.Warn("cgroups are now unavailable")
}
f.lastState = cgroupUnavailable
return nil
}

resp.AddAttribute("unique.cgroup.mountpoint", mount)
// Check the version in use.
version := f.versionDetector.CgroupVersion()

resp.AddAttribute(cgroupMountPointAttribute, mount)
resp.AddAttribute(cgroupVersionAttribute, version)
resp.Detected = true

if f.lastState == cgroupUnavailable {
Expand Down
Loading

0 comments on commit 8e77776

Please sign in to comment.