Skip to content

Commit

Permalink
Add I/O priority
Browse files Browse the repository at this point in the history
Signed-off-by: utam0k <k0ma@utam0k.jp>
  • Loading branch information
utam0k committed Feb 20, 2024
1 parent d5e4c33 commit ea17335
Show file tree
Hide file tree
Showing 11 changed files with 126 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/spec-conformance.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Spec version | Feature | PR
-------------|------------------------------------------|----------------------------------------------------------
v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862)
v1.1.0 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783)
v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))

## Architectures

Expand Down
2 changes: 1 addition & 1 deletion docs/terminals.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ you use `runc` directly in something like a `systemd` unit file. To disable
this `LISTEN_FDS`-style passing just unset `LISTEN_FDS`.

**Be very careful when passing file descriptors to a container process.** Due
to some Linux kernel (mis)features, a container with access to certain types of
to some Linux kernel misfeatures, a container with access to certain types of
file descriptors (such as `O_PATH` descriptors) outside of the container's root
file system can use these to break out of the container's pivoted mount
namespace. [This has resulted in CVEs in the past.][CVE-2016-9962]
Expand Down
11 changes: 11 additions & 0 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,9 @@ type Config struct {

// Personality contains configuration for the Linux personality syscall.
Personality *LinuxPersonality `json:"personality,omitempty"`

// IOPriority is the container's I/O priority.
IOPriority *IOPriority `json:"io_priority,omitempty"`
}

// Scheduler is based on the Linux sched_setattr(2) syscall.
Expand Down Expand Up @@ -283,6 +286,14 @@ func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
}, nil
}

var IOPrioClassMapping = map[specs.IOPriorityClass]int{
specs.IOPRIO_CLASS_RT: 1,
specs.IOPRIO_CLASS_BE: 2,
specs.IOPRIO_CLASS_IDLE: 3,
}

type IOPriority = specs.LinuxIOPriority

type (
HookName string
HookList []Hook
Expand Down
12 changes: 12 additions & 0 deletions libcontainer/configs/validate/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func Validate(config *configs.Config) error {
rootlessEUIDCheck,
mountsStrict,
scheduler,
ioPriority,
}
for _, c := range checks {
if err := c(config); err != nil {
Expand Down Expand Up @@ -396,3 +397,14 @@ func scheduler(config *configs.Config) error {
}
return nil
}

func ioPriority(config *configs.Config) error {
if config.IOPriority == nil {
return nil
}
priority := config.IOPriority.Priority
if priority < 0 || priority > 7 {
return fmt.Errorf("invalid ioPriority.Priority: %d", priority)
}
return nil
}
29 changes: 29 additions & 0 deletions libcontainer/configs/validate/validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -842,3 +842,32 @@ func TestValidateScheduler(t *testing.T) {
}
}
}

func TestValidateIOPriority(t *testing.T) {
testCases := []struct {
isErr bool
priority int
}{
{isErr: false, priority: 0},
{isErr: false, priority: 7},
{isErr: true, priority: -1},
}

for _, tc := range testCases {
ioPriroty := configs.IOPriority{
Priority: tc.priority,
}
config := &configs.Config{
Rootfs: "/var",
IOPriority: &ioPriroty,
}

err := Validate(config)
if tc.isErr && err == nil {
t.Errorf("iopriority: %d, expected error, got nil", tc.priority)
}
if !tc.isErr && err != nil {
t.Errorf("iopriority: %d, expected nil, got error %v", tc.priority, err)
}
}
}
2 changes: 2 additions & 0 deletions libcontainer/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ type Process struct {
SubCgroupPaths map[string]string

Scheduler *configs.Scheduler

IOPriority *configs.IOPriority
}

// Wait waits for the process to exit.
Expand Down
25 changes: 25 additions & 0 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ func (p *setnsProcess) signal(sig os.Signal) error {

func (p *setnsProcess) start() (retErr error) {
defer p.comm.closeParent()

if p.process.IOPriority != nil {
if err := setIOPriority(p.process.IOPriority); err != nil {
return err
}
}

// get the "before" value of oom kill count
oom, _ := p.manager.OOMKillCount()
err := p.cmd.Start()
Expand Down Expand Up @@ -972,3 +979,21 @@ func initWaiter(r io.Reader) chan error {

return ch
}

func setIOPriority(ioprio *configs.IOPriority) error {
const ioprioWhoPgrp = 1

class, ok := configs.IOPrioClassMapping[ioprio.Class]
if !ok {
return fmt.Errorf("invalid io priority class: %s", ioprio.Class)
}

// Combine class and priority into a single value
// https://github.com/torvalds/linux/blob/v5.18/include/uapi/linux/ioprio.h#L5-L17
iop := (class << 13) | ioprio.Priority
_, _, errno := unix.RawSyscall(unix.SYS_IOPRIO_SET, ioprioWhoPgrp, 0, uintptr(iop))
if errno != 0 {
return fmt.Errorf("failed to set io priority: %w", errno)
}
return nil
}
5 changes: 5 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,11 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
s := *spec.Process.Scheduler
config.Scheduler = &s
}

if spec.Process.IOPriority != nil {
ioPriority := *spec.Process.IOPriority
config.IOPriority = &ioPriority
}
}
createHooks(spec, config)
config.Version = specs.Version
Expand Down
5 changes: 5 additions & 0 deletions libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ func (l *linuxStandardInit) Init() error {
return err
}
}
if l.config.Config.IOPriority != nil {
if err := setIOPriority(l.config.Config.IOPriority); err != nil {
return err
}
}

// Tell our parent that we're ready to Execv. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
Expand Down
30 changes: 30 additions & 0 deletions tests/integration/ioprio.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bats

load helpers

function setup() {
setup_debian
}

function teardown() {
teardown_bundle
}

@test "ioprio_set is applied to process group" {
# Create a container with a specific I/O priority.
update_config '.process.ioPriority = {"class": "IOPRIO_CLASS_BE", "priority": 4}'

runc run -d --console-socket "$CONSOLE_SOCKET" test_ioprio
[ "$status" -eq 0 ]

# Check the init process.
runc exec test_ioprio ionice -p 1
[ "$status" -eq 0 ]
[[ "$output" = *'best-effort: prio 4'* ]]

# Check the process made from the exec command.
runc exec test_ioprio ionice
[ "$status" -eq 0 ]

[[ "$output" = *'best-effort: prio 4'* ]]
}
5 changes: 5 additions & 0 deletions utils_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
lp.Scheduler = &s
}

if p.IOPriority != nil {
ioPriority := *p.IOPriority
lp.IOPriority = &ioPriority
}

if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
Expand Down

0 comments on commit ea17335

Please sign in to comment.