Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

Commit

Permalink
Merge pull request #447 from jcvenegas/cpuset-ignore
Browse files Browse the repository at this point in the history
cgroup: ignore cpuset if can not be applied.
  • Loading branch information
Julio Montes authored Feb 18, 2019
2 parents ca9d520 + 67b2559 commit e8ca07f
Show file tree
Hide file tree
Showing 23 changed files with 3,440 additions and 40 deletions.
26 changes: 9 additions & 17 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

65 changes: 65 additions & 0 deletions cgroup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//
// Copyright (c) 2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//

package main

import (
"io/ioutil"
"strings"

"github.com/docker/docker/pkg/parsers"
"github.com/sirupsen/logrus"
)

// set function in variable to overwrite for testing.
var getCpusetGuest = func() (string, error) {
cpusetGuestByte, err := ioutil.ReadFile("/sys/fs/cgroup/cpuset/cpuset.cpus")
if err != nil {
return "", err
}

return strings.TrimSpace(string(cpusetGuestByte)), nil
}

// Return the best match for cpuset list in the guest.
// The runtime caller may apply cpuset for specific CPUs in the host.
// The CPUs may not exist on the guest as they are hotplugged based
// on cpu and qouta.
// This function return a working cpuset to apply on the guest.
func getAvailableCpusetList(cpusetReq string) (string, error) {

cpusetGuest, err := getCpusetGuest()
if err != nil {
return "", err
}

cpusetListReq, err := parsers.ParseUintList(cpusetReq)
if err != nil {
return "", err
}

cpusetGuestList, err := parsers.ParseUintList(cpusetGuest)
if err != nil {
return "", err
}

for k := range cpusetListReq {
if !cpusetGuestList[k] {
agentLog.WithFields(logrus.Fields{
"cpuset": cpusetReq,
"cpu": k,
"guest-cpus": cpusetGuest,
}).Warnf("cpu is not in guest cpu list, using guest cpus")
return cpusetGuest, nil
}
}

// All the cpus are valid keep the same cpuset string
agentLog.WithFields(logrus.Fields{
"cpuset": cpusetReq,
}).Debugf("the requested cpuset is valid, using it")
return cpusetReq, nil
}
45 changes: 45 additions & 0 deletions cgroup_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (c) 2019 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//

package main

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestGetAvailableCpusetList(t *testing.T) {
fakeGuestCpuset := "0-3"
getCpusetGuest = func() (string, error) {
return fakeGuestCpuset, nil
}

type testCase struct {
input string
expectedOutput string
}

cases := []testCase{
{"0", "0"},
{"0,1", "0,1"},
{"0,1,2", "0,1,2"},
{"0,1,2,3", "0,1,2,3"},
{"0,1,2,3,4", fakeGuestCpuset},
{"0-3", "0-3"},
{"0-3,4", fakeGuestCpuset},
{"0-4", fakeGuestCpuset},
{"1", "1"},
{"1,3", "1,3"},
{"2-3", "2-3"},
{"2-4", fakeGuestCpuset},
}

for _, c := range cases {
out, err := getAvailableCpusetList(c.input)
assert.Nil(t, err, "Failed to calculate : %v", err)
assert.Equal(t, out, c.expectedOutput)
}
}
52 changes: 52 additions & 0 deletions documentation/features/cpuset.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Cpuset cgroup.

From Kernel documentation:

_" Cpusets provide a mechanism for assigning a set of CPUs and Memory Nodes to
a set of tasks."_

The Kata agent brings compatibility to the cgroup cpuset CPU on the guest side.

The cpuset CPU cgroup will be applied on two events:

- containers creation

- container update

When the runtime requests to apply cpuset cgroup to the agent, the amount of
vcpus available might not be the same to the required vcpus in the request.

This is because the request from the agent client (i.e. the Kata runtime)
passes cpusets that are requested to be placed on the host. This isolates the
container workload on some specific host CPUs. The runtime passes the requested
cpuset to the agent, which tries to apply the cgroup cpuset on the guest.

The runtime only calculates and hot-plugs the CPUSs based on the container
period and quota. This is why the VM will not have the same amount of CPUs as
the host.

Example:

docker run -ti --cpus 2 --cpuset 0,1 busybox

This should result with the container limited to the time of 2 CPUs, but is
only allowed to be scheduled on CPUs 0 and 1.

The following is an example of a similar case with a valid traditional container:

docker run -ti --cpus 2 --cpuset 2,3,4 busybox

Here, the container is limited to 2 CPUs and can be scheduled on CPU 2, 3, and
4.

The Kata runtime only hotplugs 2 CPUs, making it impossible to request that the
guest kernel schedules the workload on vcpu 3 and 4.

## cpuset best effort application.

The Kata agent evaluates the request to see if it is possible to apply the
cpuset request onto the guest.

- If the CPUSs requested are not available in the guest, the request is ignored.
- If the CPUs requested are available, the request is applied by the agent.

79 changes: 58 additions & 21 deletions grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,37 +155,59 @@ func onlineMemResources() error {
return err
}

// updates container's cpuset cgroups visiting each sub-directory in cgroupPath and writing
// newCpuset in the cpuset.cpus file, cookies are used for performance reasons in order to
// updates a cpuset cgroups path visiting each sub-directory in cgroupPath parent and writing
// the maximal set of cpus in cpuset.cpus file, finally the cgroupPath is updated with the requsted
//value.
// cookies are used for performance reasons in order to
// don't update a cgroup twice.
func updateContainerCpuset(cgroupPath string, newCpuset string, cookies cookie) error {
// Each cpuset cgroup MUST BE updated with the actual number of vCPUs.
cpusetPath := cgroupCpusetPath
cgroupsPaths := strings.Split(cgroupPath, "/")
for _, path := range cgroupsPaths {
func updateCpusetPath(cgroupPath string, newCpuset string, cookies cookie) error {
// Each cpuset cgroup parent MUST BE updated with the actual number of vCPUs.
//Start to update from cgroup system root.
cgroupParentPath := cgroupCpusetPath

cpusetGuest, err := getCpusetGuest()
if err != nil {
return err
}

// Update parents with max set of current cpus
//Iterate all parent dirs in order.
//This is needed to ensure the cgroup parent has cpus on needed needed
//by the request.
cgroupsParentPaths := strings.Split(filepath.Dir(cgroupPath), "/")
for _, path := range cgroupsParentPaths {
// Skip if empty.
if path == "" {
continue
}

cpusetPath = filepath.Join(cpusetPath, path)
cgroupParentPath = filepath.Join(cgroupParentPath, path)

// check if the cgroup was already updated.
if cookies[cpusetPath] == true {
agentLog.WithField("path", cpusetPath).Debug("cpuset cgroup already updated")
if cookies[cgroupParentPath] == true {
agentLog.WithField("path", cgroupParentPath).Debug("cpuset cgroup already updated")
continue
}

// Don't use c.container.Set because of it will modify container's config.
// c.container.Set MUST BE used only on update.
cpusetCpusPath := filepath.Join(cpusetPath, "cpuset.cpus")
agentLog.WithField("path", cpusetPath).Debug("updating cpuset cgroup")
if err := ioutil.WriteFile(cpusetCpusPath, []byte(newCpuset), cpusetMode); err != nil {
return fmt.Errorf("Could not update cpuset cgroup '%s': %v", newCpuset, err)
cpusetCpusParentPath := filepath.Join(cgroupParentPath, "cpuset.cpus")

agentLog.WithField("path", cpusetCpusParentPath).Debug("updating cpuset parent cgroup")

if err := ioutil.WriteFile(cpusetCpusParentPath, []byte(cpusetGuest), cpusetMode); err != nil {
return fmt.Errorf("Could not update parent cpuset cgroup (%s) cpuset:'%s': %v", cpusetCpusParentPath, cpusetGuest, err)
}

// add cgroup path to the cookies.
cookies[cpusetPath] = true
cookies[cgroupParentPath] = true
}

// Finally update group path with requested value.
cpusetCpusPath := filepath.Join(cgroupCpusetPath, cgroupPath, "cpuset.cpus")

agentLog.WithField("path", cpusetCpusPath).Debug("updating cpuset cgroup")

if err := ioutil.WriteFile(cpusetCpusPath, []byte(newCpuset), cpusetMode); err != nil {
return fmt.Errorf("Could not update parent cpuset cgroup (%s) cpuset:'%s': %v", cpusetCpusPath, cpusetGuest, err)
}

return nil
Expand Down Expand Up @@ -215,11 +237,10 @@ func (a *agentGRPC) onlineCPUMem(req *pb.OnlineCPUMemRequest) error {

// At this point all CPUs have been connected, we need to know
// the actual range of CPUs
cpus, err := ioutil.ReadFile(sysfsConnectedCPUsPath)
connectedCpus, err := getCpusetGuest()
if err != nil {
return handleError(req.Wait, fmt.Errorf("Could not get the actual range of connected CPUs: %v", err))
}
connectedCpus := strings.Trim(string(cpus), "\t\n ")
agentLog.WithField("range-of-vcpus", connectedCpus).Debug("connecting vCPUs")

cookies := make(cookie)
Expand All @@ -238,13 +259,15 @@ func (a *agentGRPC) onlineCPUMem(req *pb.OnlineCPUMemRequest) error {
// - write /sys/fs/cgroup/cpuset/XXXXX/cpuset.cpus: device or resource busy
// NOTE: updating container cpuset cgroup *parents* won't affect container cpuset cgroup, for example if container cpuset cgroup has "0"
// and its cpuset cgroup *parents* have "0-5", the container will be able to use only the CPU 0.

// cpuset assinged containers are not updated, only we update its parents.
if contConfig.Cgroups.Resources.CpusetCpus != "" {
agentLog.WithField("cpuset", contConfig.Cgroups.Resources.CpusetCpus).Debug("updating container cpuset cgroup parents")
// remove container cgroup directory
cgroupPath = filepath.Dir(cgroupPath)
}

if err := updateContainerCpuset(cgroupPath, connectedCpus, cookies); err != nil {
if err := updateCpusetPath(cgroupPath, connectedCpus, cookies); err != nil {
return handleError(req.Wait, err)
}
}
Expand Down Expand Up @@ -625,6 +648,15 @@ func (a *agentGRPC) CreateContainer(ctx context.Context, req *pb.CreateContainer
return emptyResp, err
}

if ociSpec.Linux.Resources.CPU != nil && ociSpec.Linux.Resources.CPU.Cpus != "" {
availableCpuset, err := getAvailableCpusetList(ociSpec.Linux.Resources.CPU.Cpus)
if err != nil {
return emptyResp, err
}

ociSpec.Linux.Resources.CPU.Cpus = availableCpuset
}

if a.sandbox.guestHooksPresent {
// Add any custom OCI hooks to the spec
a.sandbox.addGuestHooks(ociSpec)
Expand Down Expand Up @@ -1017,8 +1049,13 @@ func (a *agentGRPC) UpdateContainer(ctx context.Context, req *pb.UpdateContainer

// cpuset is a special case where container's cpuset cgroup MUST BE updated
if resources.CpusetCpus != "" {
resources.CpusetCpus, err = getAvailableCpusetList(resources.CpusetCpus)
if err != nil {
return emptyResp, err
}

cookies := make(cookie)
if err = updateContainerCpuset(contConfig.Cgroups.Path, resources.CpusetCpus, cookies); err != nil {
if err = updateCpusetPath(contConfig.Cgroups.Path, resources.CpusetCpus, cookies); err != nil {
agentLog.WithError(err).Warn("Could not update container cpuset cgroup")
}
}
Expand Down
4 changes: 2 additions & 2 deletions grpc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -517,11 +517,11 @@ func TestUpdateContainerCpuset(t *testing.T) {
cookies := make(cookie)
cgroupPath += "///"

err = updateContainerCpuset(cgroupPath, "0-7", cookies)
err = updateCpusetPath(cgroupPath, "0-7", cookies)
assert.NoError(err)

// run again to ensure cookies are used
err = updateContainerCpuset(cgroupPath, "0-7", cookies)
err = updateCpusetPath(cgroupPath, "0-7", cookies)
assert.NoError(err)
}

Expand Down
Loading

0 comments on commit e8ca07f

Please sign in to comment.