Skip to content

Commit

Permalink
Add I/O priority
Browse files Browse the repository at this point in the history
Signed-off-by: utam0k <k0ma@utam0k.jp>
  • Loading branch information
utam0k committed Oct 21, 2023
1 parent cf4c7c9 commit cff9be2
Show file tree
Hide file tree
Showing 12 changed files with 136 additions and 2 deletions.
1 change: 0 additions & 1 deletion docs/spec-conformance.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ Spec version | Feature | PR
v1.0.2 | `.linux.personality` | [#3126](https://github.com/opencontainers/runc/pull/3126)
v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862)
v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))
v1.1.0 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783)


The following features are implemented with some limitations:
Expand Down
2 changes: 1 addition & 1 deletion docs/terminals.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ you use `runc` directly in something like a `systemd` unit file. To disable
this `LISTEN_FDS`-style passing just unset `LISTEN_FDS`.

**Be very careful when passing file descriptors to a container process.** Due
to some Linux kernel (mis)features, a container with access to certain types of
to some Linux kernel misfeatures, a container with access to certain types of
file descriptors (such as `O_PATH` descriptors) outside of the container's root
file system can use these to break out of the container's pivoted mount
namespace. [This has resulted in CVEs in the past.][CVE-2016-9962]
Expand Down
14 changes: 14 additions & 0 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ type Config struct {

// Scheduler represents the scheduling attributes for a process.
Scheduler *Scheduler `json:"scheduler,omitempty"`

// IOPriority is the container's I/O priority.
IOPriority *IOPriority `json:"io_priority,omitempty"`
}

// Scheduler is based on the Linux sched_setattr(2) syscall.
Expand Down Expand Up @@ -284,6 +287,17 @@ func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
}, nil
}

var IOPrioClassMapping = map[specs.IOPriorityClass]int{
specs.IOPRIO_CLASS_RT: 1,
specs.IOPRIO_CLASS_BE: 2,
specs.IOPRIO_CLASS_IDLE: 3,
}

type IOPriority struct {
Class specs.IOPriorityClass `json:"class"`
Priority int `json:"priority"`
}

type (
HookName string
HookList []Hook
Expand Down
12 changes: 12 additions & 0 deletions libcontainer/configs/validate/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func Validate(config *configs.Config) error {
rootlessEUIDCheck,
mountsStrict,
scheduler,
ioPriority,
}
for _, c := range checks {
if err := c(config); err != nil {
Expand Down Expand Up @@ -376,3 +377,14 @@ func scheduler(config *configs.Config) error {
}
return nil
}

func ioPriority(config *configs.Config) error {
if config.IOPriority == nil {
return nil
}
priority := config.IOPriority.Priority
if priority < 0 || priority > 7 {
return fmt.Errorf("invalid ioPriority.Priority: %d", priority)
}
return nil
}
29 changes: 29 additions & 0 deletions libcontainer/configs/validate/validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -666,3 +666,32 @@ func TestValidateScheduler(t *testing.T) {
}
}
}

func TestValidateIOPriority(t *testing.T) {
testCases := []struct {
isErr bool
priority int
}{
{isErr: false, priority: 0},
{isErr: false, priority: 7},
{isErr: true, priority: -1},
}

for _, tc := range testCases {
ioPriroty := configs.IOPriority{
Priority: tc.priority,
}
config := &configs.Config{
Rootfs: "/var",
IOPriority: &ioPriroty,
}

err := Validate(config)
if tc.isErr && err == nil {
t.Errorf("iopriority: %d, expected error, got nil", tc.priority)
}
if !tc.isErr && err != nil {
t.Errorf("iopriority: %d, expected nil, got error %v", tc.priority, err)
}
}
}
2 changes: 2 additions & 0 deletions libcontainer/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ type Process struct {
SubCgroupPaths map[string]string

Scheduler *configs.Scheduler

IOPriority *configs.IOPriority
}

// Wait waits for the process to exit.
Expand Down
7 changes: 7 additions & 0 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,13 @@ func (p *setnsProcess) signal(sig os.Signal) error {

func (p *setnsProcess) start() (retErr error) {
defer p.comm.closeParent()

if p.process.IOPriority != nil {
if err := utils.SetIOPriority(p.process.IOPriority); err != nil {
return err
}
}

// get the "before" value of oom kill count
oom, _ := p.manager.OOMKillCount()
err := p.cmd.Start()
Expand Down
7 changes: 7 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,13 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
s := *spec.Process.Scheduler
config.Scheduler = &s
}

if spec.Process.IOPriority != nil {
config.IOPriority = &configs.IOPriority{
Class: spec.Process.IOPriority.Class,
Priority: spec.Process.IOPriority.Priority,
}
}
}
createHooks(spec, config)
config.Version = specs.Version
Expand Down
6 changes: 6 additions & 0 deletions libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)

type linuxStandardInit struct {
Expand Down Expand Up @@ -165,6 +166,11 @@ func (l *linuxStandardInit) Init() error {
return err
}
}
if l.config.Config.IOPriority != nil {
if err := utils.SetIOPriority(l.config.Config.IOPriority); err != nil {
return err
}
}

// Tell our parent that we're ready to Execv. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
Expand Down
21 changes: 21 additions & 0 deletions libcontainer/utils/utils_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"strconv"
"sync"

"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)

Expand Down Expand Up @@ -98,3 +99,23 @@ func NewSockPair(name string) (parent, child *os.File, err error) {
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}

const (
IoprioWhoPgrp = 1
)

func SetIOPriority(ioprio *configs.IOPriority) error {
class, ok := configs.IOPrioClassMapping[ioprio.Class]
if !ok {
return fmt.Errorf("invalid io priority class: %s", ioprio.Class)
}

// Combine class and priority into a single value
// https://github.com/torvalds/linux/blob/v5.18/include/uapi/linux/ioprio.h#L5-L17
iop := (class << 13) | ioprio.Priority
_, _, errno := unix.RawSyscall(unix.SYS_IOPRIO_SET, IoprioWhoPgrp, 0, uintptr(iop))
if errno != 0 {
return fmt.Errorf("failed to set io priority: %w", errno)
}
return nil
}
30 changes: 30 additions & 0 deletions tests/integration/ioprio.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bats

load helpers

function setup() {
setup_debian
}

function teardown() {
teardown_bundle
}

@test "ioprio_set is applied to process group" {
# Create a container with a specific I/O priority.
update_config '.process.ioPriority = {"class": "IOPRIO_CLASS_BE", "priority": 4}'

runc run -d --console-socket "$CONSOLE_SOCKET" test_ioprio
[ "$status" -eq 0 ]

# Check the init process.
runc exec test_ioprio ionice -p 1
[ "$status" -eq 0 ]
[[ "$output" = *'best-effort: prio 4'* ]]

# Check the process made from the exec command.
runc exec test_ioprio ionice
[ "$status" -eq 0 ]

[[ "$output" = *'best-effort: prio 4'* ]]
}
7 changes: 7 additions & 0 deletions utils_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
lp.Scheduler = &s
}

if p.IOPriority != nil {
lp.IOPriority = &configs.IOPriority{
Class: p.IOPriority.Class,
Priority: p.IOPriority.Priority,
}
}

if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
Expand Down

0 comments on commit cff9be2

Please sign in to comment.