Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reduce CPU usage running large numbers of clients #5951

Merged
merged 6 commits into from
Jul 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion drivers/shared/executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error)
return nil, fmt.Errorf("failed to start command path=%q --- args=%q: %v", path, e.childCmd.Args, err)
}

go e.pidCollector.collectPids(e.processExited, getAllPids)
go e.pidCollector.collectPids(e.processExited, e.getAllPids)
go e.wait()
return &ProcessState{Pid: e.childCmd.Process.Pid, ExitCode: -1, Time: time.Now()}, nil
}
Expand Down
4 changes: 4 additions & 0 deletions drivers/shared/executor/executor_basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,7 @@ func NewExecutorWithIsolation(logger hclog.Logger) Executor {
func (e *UniversalExecutor) configureResourceContainer(_ int) error { return nil }

func (e *UniversalExecutor) runAs(_ string) error { return nil }

func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
return getAllPidsByScanning()
}
15 changes: 14 additions & 1 deletion drivers/shared/executor/executor_universal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,24 @@ func (e *UniversalExecutor) configureResourceContainer(pid int) error {
},
}

configureBasicCgroups(cfg)
err := configureBasicCgroups(cfg)
if err != nil {
e.logger.Warn("failed to create cgroup", "error", err)
return err
}

e.resConCtx.groups = cfg.Cgroups
return cgroups.EnterPid(cfg.Cgroups.Paths, pid)
}

func (e *UniversalExecutor) getAllPids() (map[int]*nomadPid, error) {
if e.resConCtx.isEmpty() {
return getAllPidsByScanning()
} else {
return e.resConCtx.getAllPidsByCgroup()
}
}

// DestroyCgroup kills all processes in the cgroup and removes the cgroup
// configuration from the host. This function is idempotent.
func DestroyCgroup(groups *lconfigs.Cgroup, executorPid int) error {
Expand Down
2 changes: 1 addition & 1 deletion drivers/shared/executor/pid_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ func aggregatedResourceUsage(systemCpuStats *stats.CpuStats, pidStats map[string
}
}

func getAllPids() (map[int]*nomadPid, error) {
func getAllPidsByScanning() (map[int]*nomadPid, error) {
allProcesses, err := ps.Processes()
if err != nil {
return nil, err
Expand Down
37 changes: 37 additions & 0 deletions drivers/shared/executor/resource_container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"os"
"sync"

"github.com/hashicorp/nomad/client/stats"
"github.com/opencontainers/runc/libcontainer/cgroups"
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
)

Expand All @@ -23,3 +25,38 @@ func (rc *resourceContainerContext) executorCleanup() error {
}
return nil
}

func (rc *resourceContainerContext) isEmpty() bool {
return rc.groups == nil
}

func (rc *resourceContainerContext) getAllPidsByCgroup() (map[int]*nomadPid, error) {
nPids := map[int]*nomadPid{}

if rc.groups == nil {
return nPids, nil
}

var path string
if p, ok := rc.groups.Paths["freezer"]; ok {
path = p
} else {
path = rc.groups.Path
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For cgroups, we want to use a single cgroup hierarchy. Checking multiple hierarchy would lead to duplicate values in the case of multiple cgroups attached.

In Nomad, we always create the freezer cgroup only in configureBasicCgroups[1], so that's an appropriate one to use. For comparison, docker/runc requires devices cgroup to be mounted, so they only check the devices subsystem[2]

[1]

// Manually create freezer cgroup
cfg.Cgroups.Paths = map[string]string{}
root, err := cgroups.FindCgroupMountpointDir()
if err != nil {
return err
}
if _, err := os.Stat(root); err != nil {
return err
}
freezer := cgroupFs.FreezerGroup{}
subsystem := freezer.Name()
path, err := cgroups.FindCgroupMountpoint("", subsystem)

[2] https://github.com/opencontainers/runc/blob/master/libcontainer/cgroups/fs/apply_raw.go#L286-L289

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in configureBasicCgroups, we make an empty Freezer and ask it it's name, which constantly returns "freezer". Should I do that here? or use just the string over there?


pids, err := cgroups.GetAllPids(path)
if err != nil {
return nPids, err
}

for _, pid := range pids {
nPids[pid] = &nomadPid{
pid: pid,
cpuStatsTotal: stats.NewCpuStats(),
cpuStatsUser: stats.NewCpuStats(),
cpuStatsSys: stats.NewCpuStats(),
}
}

return nPids, nil
}