-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rootless Containers #774
Rootless Containers #774
Changes from all commits
6bd4bd9
d2f4969
baeef29
f0876b0
76aeaf8
d04cbc4
2ce3357
ba38383
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
// +build linux | ||
|
||
package rootless | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/opencontainers/runc/libcontainer/cgroups" | ||
"github.com/opencontainers/runc/libcontainer/cgroups/fs" | ||
"github.com/opencontainers/runc/libcontainer/configs" | ||
"github.com/opencontainers/runc/libcontainer/configs/validate" | ||
) | ||
|
||
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code | ||
// needlessly. We should probably export this list. | ||
|
||
var subsystems = []subsystem{ | ||
&fs.CpusetGroup{}, | ||
&fs.DevicesGroup{}, | ||
&fs.MemoryGroup{}, | ||
&fs.CpuGroup{}, | ||
&fs.CpuacctGroup{}, | ||
&fs.PidsGroup{}, | ||
&fs.BlkioGroup{}, | ||
&fs.HugetlbGroup{}, | ||
&fs.NetClsGroup{}, | ||
&fs.NetPrioGroup{}, | ||
&fs.PerfEventGroup{}, | ||
&fs.FreezerGroup{}, | ||
&fs.NameGroup{GroupName: "name=systemd"}, | ||
} | ||
|
||
type subsystem interface { | ||
// Name returns the name of the subsystem. | ||
Name() string | ||
|
||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'. | ||
GetStats(path string, stats *cgroups.Stats) error | ||
} | ||
|
||
// The noop cgroup manager is used for rootless containers, because we currently | ||
// cannot manage cgroups if we are in a rootless setup. This manager is chosen | ||
// by factory if we are in rootless mode. We error out if any cgroup options are | ||
// set in the config -- this may change in the future with upcoming kernel features | ||
// like the cgroup namespace. | ||
|
||
type Manager struct { | ||
Cgroups *configs.Cgroup | ||
Paths map[string]string | ||
} | ||
|
||
func (m *Manager) Apply(pid int) error { | ||
// If there are no cgroup settings, there's nothing to do. | ||
if m.Cgroups == nil { | ||
return nil | ||
} | ||
|
||
// We can't set paths. | ||
// TODO(cyphar): Implement the case where the runner of a rootless container | ||
// owns their own cgroup, which would allow us to set up a | ||
// cgroup for each path. | ||
if m.Cgroups.Paths != nil { | ||
return fmt.Errorf("cannot change cgroup path in rootless container") | ||
} | ||
|
||
// We load the paths into the manager. | ||
paths := make(map[string]string) | ||
for _, sys := range subsystems { | ||
name := sys.Name() | ||
|
||
path, err := cgroups.GetOwnCgroupPath(name) | ||
if err != nil { | ||
// Ignore paths we couldn't resolve. | ||
continue | ||
} | ||
|
||
paths[name] = path | ||
} | ||
|
||
m.Paths = paths | ||
return nil | ||
} | ||
|
||
func (m *Manager) GetPaths() map[string]string { | ||
return m.Paths | ||
} | ||
|
||
func (m *Manager) Set(container *configs.Config) error { | ||
// We have to re-do the validation here, since someone might decide to | ||
// update a rootless container. | ||
return validate.New().Validate(container) | ||
} | ||
|
||
func (m *Manager) GetPids() ([]int, error) { | ||
dir, err := cgroups.GetOwnCgroupPath("devices") | ||
if err != nil { | ||
return nil, err | ||
} | ||
return cgroups.GetPids(dir) | ||
} | ||
|
||
func (m *Manager) GetAllPids() ([]int, error) { | ||
dir, err := cgroups.GetOwnCgroupPath("devices") | ||
if err != nil { | ||
return nil, err | ||
} | ||
return cgroups.GetAllPids(dir) | ||
} | ||
|
||
func (m *Manager) GetStats() (*cgroups.Stats, error) { | ||
// TODO(cyphar): We can make this work if we figure out a way to allow usage | ||
// of cgroups with a rootless container. While this doesn't | ||
// actually require write access to a cgroup directory, the | ||
// statistics are not useful if they can be affected by | ||
// non-container processes. | ||
return nil, fmt.Errorf("cannot get cgroup stats in rootless container") | ||
} | ||
|
||
func (m *Manager) Freeze(state configs.FreezerState) error { | ||
// TODO(cyphar): We can make this work if we figure out a way to allow usage | ||
// of cgroups with a rootless container. | ||
return fmt.Errorf("cannot use freezer cgroup in rootless container") | ||
} | ||
|
||
func (m *Manager) Destroy() error { | ||
// We don't have to do anything here because we didn't do any setup. | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,7 +109,7 @@ type Mount struct { | |
Subsystems []string | ||
} | ||
|
||
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) { | ||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { | ||
if len(m.Subsystems) == 0 { | ||
return "", fmt.Errorf("no subsystem for mount") | ||
} | ||
|
@@ -203,8 +203,8 @@ func GetAllSubsystems() ([]string, error) { | |
return subsystems, nil | ||
} | ||
|
||
// GetThisCgroupDir returns the relative path to the cgroup docker is running in. | ||
func GetThisCgroupDir(subsystem string) (string, error) { | ||
// GetOwnCgroup returns the relative path to the cgroup docker is running in. | ||
func GetOwnCgroup(subsystem string) (string, error) { | ||
cgroups, err := ParseCgroupFile("/proc/self/cgroup") | ||
if err != nil { | ||
return "", err | ||
|
@@ -213,8 +213,16 @@ func GetThisCgroupDir(subsystem string) (string, error) { | |
return getControllerPath(subsystem, cgroups) | ||
} | ||
|
||
func GetInitCgroupDir(subsystem string) (string, error) { | ||
func GetOwnCgroupPath(subsystem string) (string, error) { | ||
cgroup, err := GetOwnCgroup(subsystem) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
return getCgroupPathHelper(subsystem, cgroup) | ||
} | ||
|
||
func GetInitCgroup(subsystem string) (string, error) { | ||
cgroups, err := ParseCgroupFile("/proc/1/cgroup") | ||
if err != nil { | ||
return "", err | ||
|
@@ -223,6 +231,31 @@ func GetInitCgroupDir(subsystem string) (string, error) { | |
return getControllerPath(subsystem, cgroups) | ||
} | ||
|
||
func GetInitCgroupPath(subsystem string) (string, error) { | ||
cgroup, err := GetInitCgroup(subsystem) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
return getCgroupPathHelper(subsystem, cgroup) | ||
} | ||
|
||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is kind of subtle, can you keep the comments? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only comment that makes sense now is on the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this function yes, and maybe leave the other comment to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair enough. Not sure why I removed them in the first place. |
||
mnt, root, err := FindCgroupMountpointAndRoot(subsystem) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
// This is needed for nested containers, because in /proc/self/cgroup we | ||
// see pathes from host, which don't exist in container. | ||
relCgroup, err := filepath.Rel(root, cgroup) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
return filepath.Join(mnt, relCgroup), nil | ||
} | ||
|
||
func readProcsFile(dir string) ([]int, error) { | ||
f, err := os.Open(filepath.Join(dir, CgroupProcesses)) | ||
if err != nil { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need this since it's not used? And I doubt it'll be any usage.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need it now, but the systemd cgroup manager does use
GetInitCgroup
. I can drop it if you prefer, this is more for the benefit of users oflibcontainer
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's probably erroneous, maybe just nobody using systemd cgroup inside a container, I'm OK we keep it, so we'll say no to subsequent PR which'll try to remove this unused function :)