Skip to content

Commit

Permalink
Add I/O priority
Browse files Browse the repository at this point in the history
Signed-off-by: utam0k <k0ma@utam0k.jp>
  • Loading branch information
utam0k committed Jun 15, 2023
1 parent 5cf9bb2 commit 9f7d118
Show file tree
Hide file tree
Showing 12 changed files with 137 additions and 2 deletions.
1 change: 0 additions & 1 deletion docs/spec-conformance.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,3 @@ v1.1.0-rc.1 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github
v1.1.0-rc.2 | time namespaces | [#3876](https://github.com/opencontainers/runc/pull/3876)
v1.1.0-rc.2 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))
v1.1.0-rc.3 | `.process.scheduler` | TODO ([#3895](https://github.com/opencontainers/runc/issues/3895))
v1.1.0-rc.3 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783)
2 changes: 1 addition & 1 deletion docs/terminals.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ you use `runc` directly in something like a `systemd` unit file. To disable
this `LISTEN_FDS`-style passing just unset `LISTEN_FDS`.

**Be very careful when passing file descriptors to a container process.** Due
to some Linux kernel (mis)features, a container with access to certain types of
to some Linux kernel misfeatures, a container with access to certain types of
file descriptors (such as `O_PATH` descriptors) outside of the container's root
file system can use these to break out of the container's pivoted mount
namespace. [This has resulted in CVEs in the past.][CVE-2016-9962]
Expand Down
14 changes: 14 additions & 0 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,20 @@ type Config struct {
// RootlessCgroups is set when unlikely to have the full access to cgroups.
// When RootlessCgroups is set, cgroups errors are ignored.
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`

// IOPriority is the container's I/O priority.
IOPriority *IOPriority `json:"io_priority,omitempty"`
}

var IOPrioClassMapping = map[specs.IOPriorityClass]int{
specs.IOPRIO_CLASS_RT: 1,
specs.IOPRIO_CLASS_BE: 2,
specs.IOPRIO_CLASS_IDLE: 3,
}

type IOPriority struct {
Class specs.IOPriorityClass `json:"class"`
Priority int `json:"priority"`
}

type (
Expand Down
12 changes: 12 additions & 0 deletions libcontainer/configs/validate/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ func Validate(config *configs.Config) error {
sysctl,
intelrdtCheck,
rootlessEUIDCheck,
ioPriority,
}
for _, c := range checks {
if err := c(config); err != nil {
Expand Down Expand Up @@ -286,3 +287,14 @@ func isHostNetNS(path string) (bool, error) {

return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil
}

func ioPriority(config *configs.Config) error {
if config.IOPriority == nil {
return nil
}
priority := config.IOPriority.Priority
if priority < 0 || priority > 7 {
return fmt.Errorf("invalid ioPriority.Priority: %d", priority)
}
return nil
}
29 changes: 29 additions & 0 deletions libcontainer/configs/validate/validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,3 +387,32 @@ func TestValidateMounts(t *testing.T) {
}
}
}

func TestValidateIOPriority(t *testing.T) {
testCases := []struct {
isErr bool
priority int
}{
{isErr: false, priority: 0},
{isErr: false, priority: 7},
{isErr: true, priority: -1},
}

for _, tc := range testCases {
ioPriroty := configs.IOPriority{
Priority: tc.priority,
}
config := &configs.Config{
Rootfs: "/var",
IOPriority: &ioPriroty,
}

err := Validate(config)
if tc.isErr && err == nil {
t.Errorf("iopriority: %d, expected error, got nil", tc.priority)
}
if !tc.isErr && err != nil {
t.Errorf("iopriority: %d, expected nil, got error %v", tc.priority, err)
}
}
}
2 changes: 2 additions & 0 deletions libcontainer/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ type Process struct {
//
// For cgroup v2, the only key allowed is "".
SubCgroupPaths map[string]string

IOPriority *configs.IOPriority
}

// Wait waits for the process to exit.
Expand Down
6 changes: 6 additions & 0 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ func (p *setnsProcess) signal(sig os.Signal) error {

func (p *setnsProcess) start() (retErr error) {
defer p.messageSockPair.parent.Close()
if p.process.IOPriority != nil {
if err := utils.SetIOPriority(p.process.IOPriority); err != nil {
return err
}
}

// get the "before" value of oom kill count
oom, _ := p.manager.OOMKillCount()
err := p.cmd.Start()
Expand Down
6 changes: 6 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,12 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
Ambient: spec.Process.Capabilities.Ambient,
}
}
if spec.Process.IOPriority != nil {
config.IOPriority = &configs.IOPriority{
Class: spec.Process.IOPriority.Class,
Priority: spec.Process.IOPriority.Priority,
}
}
}
createHooks(spec, config)
config.Version = specs.Version
Expand Down
8 changes: 8 additions & 0 deletions libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)

type linuxStandardInit struct {
Expand Down Expand Up @@ -159,6 +160,13 @@ func (l *linuxStandardInit) Init() error {
return &os.SyscallError{Syscall: "prctl(SET_NO_NEW_PRIVS)", Err: err}
}
}

if l.config.Config.IOPriority != nil {
if err := utils.SetIOPriority(l.config.Config.IOPriority); err != nil {
return err
}
}

// Tell our parent that we're ready to Execv. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
// write to a socket.
Expand Down
22 changes: 22 additions & 0 deletions libcontainer/utils/utils_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import (
"fmt"
"os"
"strconv"
"syscall"

"github.com/opencontainers/runc/libcontainer/configs"

"golang.org/x/sys/unix"
)
Expand Down Expand Up @@ -67,3 +70,22 @@ func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}

const (
IoprioWhoPgrp = 1
)

func SetIOPriority(ioprio *configs.IOPriority) error {
class, ok := configs.IOPrioClassMapping[ioprio.Class]
if !ok {
return fmt.Errorf("invalid io priority class: %s", ioprio.Class)
}

// Combine class and priority into a single value
iop := (class << 13) | ioprio.Priority
_, _, errno := syscall.RawSyscall(syscall.SYS_IOPRIO_SET, IoprioWhoPgrp, 0, uintptr(iop))
if errno != 0 {
return fmt.Errorf("failed to set io priority: %w", errno)
}
return nil
}
30 changes: 30 additions & 0 deletions tests/integration/ioprio.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bats

load helpers

function setup() {
setup_debian
}

function teardown() {
teardown_bundle
}

@test "ioprio_set is applied to process group" {
# Create a container with a specific I/O priority.
update_config '.process.ioPriority = {"class": "IOPRIO_CLASS_BE", "priority": 4}'

runc run -d --console-socket "$CONSOLE_SOCKET" test_ioprio
[ "$status" -eq 0 ]

# Check the init process.
runc exec test_ioprio ionice -p 1
[ "$status" -eq 0 ]
[[ "$output" = *'best-effort: prio 4'* ]]

# Check the process made from the exec command.
runc exec test_ioprio ionice
[ "$status" -eq 0 ]

[[ "$output" = *'best-effort: prio 4'* ]]
}
7 changes: 7 additions & 0 deletions utils_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
}

if p.IOPriority != nil {
lp.IOPriority = &configs.IOPriority{
Class: p.IOPriority.Class,
Priority: p.IOPriority.Priority,
}
}

if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
Expand Down

0 comments on commit 9f7d118

Please sign in to comment.