From 6bd4bd9030a979d27323ee4f1c4c5dd44469dc45 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 17 Jan 2017 12:25:21 +1100 Subject: [PATCH 1/8] *: handle unprivileged operations and !dumpable Effectively, !dumpable makes implementing rootless containers quite hard, due to a bunch of different operations on /proc/self no longer being possible without reordering everything. !dumpable only really makes sense when you are switching between different security contexts, which is only the case when we are joining namespaces. Unfortunately this means that !dumpable will still have issues in this instance, and it should only be necessary to set !dumpable if we are not joining USER namespaces (new kernels have protections that make !dumpable no longer necessary). But that's a topic for another time. This also includes code to unset and then re-set dumpable when doing the USER namespace mappings. This should also be safe because in principle processes in a container can't see us until after we fork into the PID namespace (which happens after the user mapping). In rootless containers, it is not possible to set a non-dumpable process's /proc/self/oom_score_adj (it's owned by root and thus not writeable). Thus, it needs to be set inside nsexec before we set ourselves as non-dumpable. Signed-off-by: Aleksa Sarai --- libcontainer/container_linux.go | 6 ++++ libcontainer/init_linux.go | 8 ----- libcontainer/message_linux.go | 14 ++++---- libcontainer/nsenter/nsexec.c | 64 +++++++++++++++++++++++++++------ libcontainer/process_linux.go | 8 ----- 5 files changed, 68 insertions(+), 32 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 28dff866be3..da685402eac 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -1455,5 +1455,11 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na } } + // write oom_score_adj + r.AddData(&Bytemsg{ + Type: OomScoreAdjAttr, + Value: []byte(fmt.Sprintf("%d", c.config.OomScoreAdj)), + }) + return bytes.NewReader(r.Serialize()), nil } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 39b83a4eb10..0f5d412ac0a 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -6,10 +6,8 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "net" "os" - "strconv" "strings" "syscall" "unsafe" @@ -369,12 +367,6 @@ func setupRlimits(limits []configs.Rlimit, pid int) error { return nil } -func setOomScoreAdj(oomScoreAdj int, pid int) error { - path := fmt.Sprintf("/proc/%d/oom_score_adj", pid) - - return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600) -} - const _P_PID = 1 type siginfo struct { diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go index a189c7244bf..321d6642ff4 100644 --- a/libcontainer/message_linux.go +++ b/libcontainer/message_linux.go @@ -11,12 +11,14 @@ import ( // list of known message types we want to send to bootstrap program // The number is randomly chosen to not conflict with known netlink types const ( - InitMsg uint16 = 62000 - CloneFlagsAttr uint16 = 27281 - NsPathsAttr uint16 = 27282 - UidmapAttr uint16 = 27283 - GidmapAttr uint16 = 27284 - SetgroupAttr uint16 = 27285 + InitMsg uint16 = 62000 + CloneFlagsAttr uint16 = 27281 + NsPathsAttr uint16 = 27282 + UidmapAttr uint16 = 27283 + GidmapAttr uint16 = 27284 + SetgroupAttr uint16 = 27285 + OomScoreAdjAttr uint16 = 27286 + // When syscall.NLA_HDRLEN is in gccgo, take this out. syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1) ) diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 51bd1e3eccc..9630206e398 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -72,18 +72,21 @@ struct nlconfig_t { char *namespaces; size_t namespaces_len; uint8_t is_setgroup; + char *oom_score_adj; + size_t oom_score_adj_len; }; /* * List of netlink message types sent to us as part of bootstrapping the init. * These constants are defined in libcontainer/message_linux.go. */ -#define INIT_MSG 62000 +#define INIT_MSG 62000 #define CLONE_FLAGS_ATTR 27281 #define NS_PATHS_ATTR 27282 -#define UIDMAP_ATTR 27283 -#define GIDMAP_ATTR 27284 +#define UIDMAP_ATTR 27283 +#define GIDMAP_ATTR 27284 #define SETGROUP_ATTR 27285 +#define OOM_SCORE_ADJ_ATTR 27286 /* * Use the raw syscall for versions of glibc which don't include a function for @@ -186,7 +189,7 @@ static void update_setgroups(int pid, enum policy_t setgroup) } } -static void update_uidmap(int pid, char *map, int map_len) +static void update_uidmap(int pid, char *map, size_t map_len) { if (map == NULL || map_len <= 0) return; @@ -195,7 +198,7 @@ static void update_uidmap(int pid, char *map, int map_len) bail("failed to update /proc/%d/uid_map", pid); } -static void update_gidmap(int pid, char *map, int map_len) +static void update_gidmap(int pid, char *map, size_t map_len) { if (map == NULL || map_len <= 0) return; @@ -204,6 +207,15 @@ static void update_gidmap(int pid, char *map, int map_len) bail("failed to update /proc/%d/gid_map", pid); } +static void update_oom_score_adj(char *data, size_t len) +{ + if (data == NULL || len <= 0) + return; + + if (write_file(data, len, "/proc/self/oom_score_adj") < 0) + bail("failed to update /proc/self/oom_score_adj"); +} + /* A dummy function that just jumps to the given jumpval. */ static int child_func(void *arg) __attribute__ ((noinline)); static int child_func(void *arg) @@ -317,6 +329,10 @@ static void nl_parse(int fd, struct nlconfig_t *config) case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; + case OOM_SCORE_ADJ_ATTR: + config->oom_score_adj = current; + config->oom_score_adj_len = payload_len; + break; case NS_PATHS_ATTR: config->namespaces = current; config->namespaces_len = payload_len; @@ -425,14 +441,32 @@ void nsexec(void) if (pipenum == -1) return; - /* make the process non-dumpable */ - if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) { - bail("failed to set process as non-dumpable"); - } - /* Parse all of the netlink configuration. */ nl_parse(pipenum, &config); + /* Set oom_score_adj. This has to be done before !dumpable because + * /proc/self/oom_score_adj is not writeable unless you're an privileged + * user (if !dumpable is set). All children inherit their parent's + * oom_score_adj value on fork(2) so this will always be propagated + * properly. + */ + update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len); + + /* + * Make the process non-dumpable, to avoid various race conditions that + * could cause processes in namespaces we're joining to access host + * resources (or potentially execute code). + * + * However, if the number of namespaces we are joining is 0, we are not + * going to be switching to a different security context. Thus setting + * ourselves to be non-dumpable only breaks things (like rootless + * containers), which is the recommendation from the kernel folks. + */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) + bail("failed to set process as non-dumpable"); + } + /* Pipe so we can tell the child when we've finished setting up. */ if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0) bail("failed to setup sync pipe between parent and child"); @@ -681,6 +715,11 @@ void nsexec(void) * clone_parent rant). So signal our parent to hook us up. */ + /* Switching is only necessary if we joined namespaces. */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0) + bail("failed to set process as dumpable"); + } s = SYNC_USERMAP_PLS; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); @@ -691,6 +730,11 @@ void nsexec(void) bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); if (s != SYNC_USERMAP_ACK) bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); + /* Switching is only necessary if we joined namespaces. */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) + bail("failed to set process as dumpable"); + } } /* diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 0f79a3811b7..c60f4730146 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -85,10 +85,6 @@ func (p *setnsProcess) start() (err error) { return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) } } - // set oom_score_adj - if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { - return newSystemErrorWithCause(err, "setting oom score") - } // set rlimits, this has to be done here because we lose permissions // to raise the limits once we enter a user-namespace if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { @@ -285,10 +281,6 @@ func (p *initProcess) start() error { if err := p.manager.Set(p.config.Config); err != nil { return newSystemErrorWithCause(err, "setting cgroup config for ready process") } - // set oom_score_adj - if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { - return newSystemErrorWithCause(err, "setting oom score for ready process") - } // set rlimits, this has to be done here because we lose permissions // to raise the limits once we enter a user-namespace if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { From d2f49696b09a60f5ab60f7db8259c52a2a2cdbed Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sat, 23 Apr 2016 23:39:42 +1000 Subject: [PATCH 2/8] runc: add support for rootless containers This enables the support for the rootless container mode. There are many restrictions on what rootless containers can do, so many different runC commands have been disabled: * runc checkpoint * runc events * runc pause * runc ps * runc restore * runc resume * runc update The following commands work: * runc create * runc delete * runc exec * runc kill * runc list * runc run * runc spec * runc state In addition, any specification options that imply joining cgroups have also been disabled. This is due to support for unprivileged subtree management not being available from Linux upstream. Signed-off-by: Aleksa Sarai --- Makefile | 2 +- checkpoint.go | 5 + exec.go | 3 - libcontainer/configs/config.go | 3 + libcontainer/configs/validate/rootless.go | 117 +++++++++++ .../configs/validate/rootless_test.go | 195 ++++++++++++++++++ libcontainer/configs/validate/validator.go | 5 + libcontainer/container_linux.go | 49 ++++- libcontainer/init_linux.go | 41 +++- libcontainer/message_linux.go | 1 + libcontainer/nsenter/nsexec.c | 26 ++- libcontainer/process_linux.go | 28 ++- libcontainer/specconv/example.go | 160 ++++++++++++++ libcontainer/specconv/spec_linux.go | 31 ++- libcontainer/specconv/spec_linux_test.go | 80 ++++++- list.go | 19 +- ps.go | 5 + restore.go | 6 + spec.go | 150 +------------- utils.go | 3 - utils_linux.go | 6 + 21 files changed, 742 insertions(+), 193 deletions(-) create mode 100644 libcontainer/configs/validate/rootless.go create mode 100644 libcontainer/configs/validate/rootless_test.go create mode 100644 libcontainer/specconv/example.go diff --git a/Makefile b/Makefile index b82884af6cb..5fff5151bf1 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$') PREFIX := $(DESTDIR)/usr/local -BINDIR := $(PREFIX)/sbin +BINDIR := $(PREFIX)/bin GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null) GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g") RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN)) diff --git a/checkpoint.go b/checkpoint.go index dd7704f6161..78977d71a35 100644 --- a/checkpoint.go +++ b/checkpoint.go @@ -39,6 +39,11 @@ checkpointed.`, if err := checkArgs(context, 1, exactArgs); err != nil { return err } + // XXX: Currently this is untested with rootless containers. + if isRootless() { + return fmt.Errorf("runc checkpoint requires root") + } + container, err := getContainer(context) if err != nil { return err diff --git a/exec.go b/exec.go index 84061e6b705..22f2689abcc 100644 --- a/exec.go +++ b/exec.go @@ -90,9 +90,6 @@ following will output a list of processes running in the container: if err := checkArgs(context, 1, minArgs); err != nil { return err } - if os.Geteuid() != 0 { - return fmt.Errorf("runc should be run as root") - } if err := revisePidFile(context); err != nil { return err } diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 890cd7d19c7..98f4b8585f3 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -183,6 +183,9 @@ type Config struct { // NoNewKeyring will not allocated a new session keyring for the container. It will use the // callers keyring in this case. NoNewKeyring bool `json:"no_new_keyring"` + + // Rootless specifies whether the container is a rootless container. + Rootless bool `json:"rootless"` } type Hooks struct { diff --git a/libcontainer/configs/validate/rootless.go b/libcontainer/configs/validate/rootless.go new file mode 100644 index 00000000000..1e83cedd0d3 --- /dev/null +++ b/libcontainer/configs/validate/rootless.go @@ -0,0 +1,117 @@ +package validate + +import ( + "fmt" + "os" + "reflect" + "strings" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +var ( + geteuid = os.Geteuid + getegid = os.Getegid +) + +func (v *ConfigValidator) rootless(config *configs.Config) error { + if err := rootlessMappings(config); err != nil { + return err + } + if err := rootlessMount(config); err != nil { + return err + } + // Currently, cgroups cannot effectively be used in rootless containers. + // The new cgroup namespace doesn't really help us either because it doesn't + // have nice interactions with the user namespace (we're working with upstream + // to fix this). + if err := rootlessCgroup(config); err != nil { + return err + } + + // XXX: We currently can't verify the user config at all, because + // configs.Config doesn't store the user-related configs. So this + // has to be verified by setupUser() in init_linux.go. + + return nil +} + +func rootlessMappings(config *configs.Config) error { + rootuid, err := config.HostUID() + if err != nil { + return fmt.Errorf("failed to get root uid from uidMappings: %v", err) + } + if euid := geteuid(); euid != 0 { + if !config.Namespaces.Contains(configs.NEWUSER) { + return fmt.Errorf("rootless containers require user namespaces") + } + if rootuid != euid { + return fmt.Errorf("rootless containers cannot map container root to a different host user") + } + } + + rootgid, err := config.HostGID() + if err != nil { + return fmt.Errorf("failed to get root gid from gidMappings: %v", err) + } + + // Similar to the above test, we need to make sure that we aren't trying to + // map to a group ID that we don't have the right to be. + if rootgid != getegid() { + return fmt.Errorf("rootless containers cannot map container root to a different host group") + } + + // We can only map one user and group inside a container (our own). + if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 { + return fmt.Errorf("rootless containers cannot map more than one user") + } + if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 { + return fmt.Errorf("rootless containers cannot map more than one group") + } + + return nil +} + +// cgroup verifies that the user isn't trying to set any cgroup limits or paths. +func rootlessCgroup(config *configs.Config) error { + // Nothing set at all. + if config.Cgroups == nil || config.Cgroups.Resources == nil { + return nil + } + + // Used for comparing to the zero value. + left := reflect.ValueOf(*config.Cgroups.Resources) + right := reflect.Zero(left.Type()) + + // This is all we need to do, since specconv won't add cgroup options in + // rootless mode. + if !reflect.DeepEqual(left.Interface(), right.Interface()) { + return fmt.Errorf("cannot specify resource limits in rootless container") + } + + return nil +} + +// mount verifies that the user isn't trying to set up any mounts they don't have +// the rights to do. In addition, it makes sure that no mount has a `uid=` or +// `gid=` option that doesn't resolve to root. +func rootlessMount(config *configs.Config) error { + // XXX: We could whitelist allowed devices at this point, but I'm not + // convinced that's a good idea. The kernel is the best arbiter of + // access control. + + for _, mount := range config.Mounts { + // Check that the options list doesn't contain any uid= or gid= entries + // that don't resolve to root. + for _, opt := range strings.Split(mount.Data, ",") { + if strings.HasPrefix(opt, "uid=") && opt != "uid=0" { + return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0") + } + if strings.HasPrefix(opt, "gid=") && opt != "gid=0" { + return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0") + } + } + } + + return nil +} diff --git a/libcontainer/configs/validate/rootless_test.go b/libcontainer/configs/validate/rootless_test.go new file mode 100644 index 00000000000..23d678d97ea --- /dev/null +++ b/libcontainer/configs/validate/rootless_test.go @@ -0,0 +1,195 @@ +package validate + +import ( + "testing" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +func init() { + geteuid = func() int { return 1337 } + getegid = func() int { return 7331 } +} + +func rootlessConfig() *configs.Config { + return &configs.Config{ + Rootfs: "/var", + Rootless: true, + Namespaces: configs.Namespaces( + []configs.Namespace{ + {Type: configs.NEWUSER}, + }, + ), + UidMappings: []configs.IDMap{ + { + HostID: geteuid(), + ContainerID: 0, + Size: 1, + }, + }, + GidMappings: []configs.IDMap{ + { + HostID: getegid(), + ContainerID: 0, + Size: 1, + }, + }, + } +} + +func TestValidateRootless(t *testing.T) { + validator := New() + + config := rootlessConfig() + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur: %+v", err) + } +} + +/* rootlessMappings() */ + +func TestValidateRootlessUserns(t *testing.T) { + validator := New() + + config := rootlessConfig() + config.Namespaces = nil + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if user namespaces not set") + } +} + +func TestValidateRootlessMappingUid(t *testing.T) { + validator := New() + + config := rootlessConfig() + config.UidMappings = nil + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if no uid mappings provided") + } + + config = rootlessConfig() + config.UidMappings[0].HostID = geteuid() + 1 + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if geteuid() != mapped uid") + } + + config = rootlessConfig() + config.UidMappings[0].Size = 1024 + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if more than one uid mapped") + } + + config = rootlessConfig() + config.UidMappings = append(config.UidMappings, configs.IDMap{ + HostID: geteuid() + 1, + ContainerID: 0, + Size: 1, + }) + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if more than one uid extent mapped") + } +} + +func TestValidateRootlessMappingGid(t *testing.T) { + validator := New() + + config := rootlessConfig() + config.GidMappings = nil + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if no gid mappings provided") + } + + config = rootlessConfig() + config.GidMappings[0].HostID = getegid() + 1 + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if getegid() != mapped gid") + } + + config = rootlessConfig() + config.GidMappings[0].Size = 1024 + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if more than one gid mapped") + } + + config = rootlessConfig() + config.GidMappings = append(config.GidMappings, configs.IDMap{ + HostID: getegid() + 1, + ContainerID: 0, + Size: 1, + }) + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if more than one gid extent mapped") + } +} + +/* rootlessMount() */ + +func TestValidateRootlessMountUid(t *testing.T) { + config := rootlessConfig() + validator := New() + + config.Mounts = []*configs.Mount{ + { + Source: "devpts", + Destination: "/dev/pts", + Device: "devpts", + }, + } + + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when uid= not set in mount options: %+v", err) + } + + config.Mounts[0].Data = "uid=5" + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur when setting uid=5 in mount options") + } + + config.Mounts[0].Data = "uid=0" + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when setting uid=0 in mount options: %+v", err) + } +} + +func TestValidateRootlessMountGid(t *testing.T) { + config := rootlessConfig() + validator := New() + + config.Mounts = []*configs.Mount{ + { + Source: "devpts", + Destination: "/dev/pts", + Device: "devpts", + }, + } + + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when gid= not set in mount options: %+v", err) + } + + config.Mounts[0].Data = "gid=5" + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur when setting gid=5 in mount options") + } + + config.Mounts[0].Data = "gid=0" + if err := validator.Validate(config); err != nil { + t.Errorf("Expected error to not occur when setting gid=0 in mount options: %+v", err) + } +} + +/* rootlessCgroup() */ + +func TestValidateRootlessCgroup(t *testing.T) { + validator := New() + + config := rootlessConfig() + config.Cgroups = &configs.Cgroup{ + Resources: &configs.Resources{ + PidsLimit: 1337, + }, + } + if err := validator.Validate(config); err == nil { + t.Errorf("Expected error to occur if cgroup limits set") + } +} diff --git a/libcontainer/configs/validate/validator.go b/libcontainer/configs/validate/validator.go index f076f506a24..0dd580ac901 100644 --- a/libcontainer/configs/validate/validator.go +++ b/libcontainer/configs/validate/validator.go @@ -40,6 +40,11 @@ func (v *ConfigValidator) Validate(config *configs.Config) error { if err := v.sysctl(config); err != nil { return err } + if config.Rootless { + if err := v.rootless(config); err != nil { + return err + } + } return nil } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index da685402eac..c3dd42d27ea 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -51,6 +51,9 @@ type State struct { // Platform specific fields below here + // Specifies if the container was started under the rootless mode. + Rootless bool `json:"rootless"` + // Path to all the cgroups setup for a container. Key is cgroup subsystem name // with the value as the path. CgroupPaths map[string]string `json:"cgroup_paths"` @@ -452,6 +455,7 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { PassedFilesCount: len(process.ExtraFiles), ContainerId: c.ID(), NoNewPrivileges: c.config.NoNewPrivileges, + Rootless: c.config.Rootless, AppArmorProfile: c.config.AppArmorProfile, ProcessLabel: c.config.ProcessLabel, Rlimits: c.config.Rlimits, @@ -622,6 +626,13 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() + // TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has + // support for doing unprivileged dumps, but the setup of + // rootless containers might make this complicated. + if c.config.Rootless { + return fmt.Errorf("cannot checkpoint a rootless container") + } + if err := c.checkCriuVersion("1.5.2"); err != nil { return err } @@ -791,6 +802,13 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { c.m.Lock() defer c.m.Unlock() + + // TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have + // support for unprivileged restore at the moment. + if c.config.Rootless { + return fmt.Errorf("cannot restore a rootless container") + } + if err := c.checkCriuVersion("1.5.2"); err != nil { return err } @@ -918,6 +936,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { } func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { + // XXX: Do we need to deal with this case? AFAIK criu still requires root. if err := c.cgroupManager.Apply(pid); err != nil { return err } @@ -1314,6 +1333,7 @@ func (c *linuxContainer) currentState() (*State, error) { InitProcessStartTime: startTime, Created: c.created, }, + Rootless: c.config.Rootless, CgroupPaths: c.cgroupManager.GetPaths(), NamespacePaths: make(map[configs.NamespaceType]string), ExternalDescriptors: externalDescriptors, @@ -1441,16 +1461,19 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na Type: GidmapAttr, Value: b, }) - // check if we have CAP_SETGID to setgroup properly - pid, err := capability.NewPid(os.Getpid()) - if err != nil { - return nil, err - } - if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) { - r.AddData(&Boolmsg{ - Type: SetgroupAttr, - Value: true, - }) + // The following only applies if we are root. + if !c.config.Rootless { + // check if we have CAP_SETGID to setgroup properly + pid, err := capability.NewPid(os.Getpid()) + if err != nil { + return nil, err + } + if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) { + r.AddData(&Boolmsg{ + Type: SetgroupAttr, + Value: true, + }) + } } } } @@ -1461,5 +1484,11 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na Value: []byte(fmt.Sprintf("%d", c.config.OomScoreAdj)), }) + // write rootless + r.AddData(&Boolmsg{ + Type: RootlessAttr, + Value: c.config.Rootless, + }) + return bytes.NewReader(r.Serialize()), nil } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 0f5d412ac0a..118783516ae 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -58,6 +58,7 @@ type initConfig struct { ContainerId string `json:"containerid"` Rlimits []configs.Rlimit `json:"rlimits"` CreateConsole bool `json:"create_console"` + Rootless bool `json:"rootless"` } type initer interface { @@ -229,18 +230,21 @@ func syncParentHooks(pipe io.ReadWriter) error { func setupUser(config *initConfig) error { // Set up defaults. defaultExecUser := user.ExecUser{ - Uid: syscall.Getuid(), - Gid: syscall.Getgid(), + Uid: 0, + Gid: 0, Home: "/", } + passwdPath, err := user.GetPasswdPath() if err != nil { return err } + groupPath, err := user.GetGroupPath() if err != nil { return err } + execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) if err != nil { return err @@ -253,22 +257,49 @@ func setupUser(config *initConfig) error { return err } } + + if config.Rootless { + if execUser.Uid != 0 { + return fmt.Errorf("cannot run as a non-root user in a rootless container") + } + + if execUser.Gid != 0 { + return fmt.Errorf("cannot run as a non-root group in a rootless container") + } + + // We cannot set any additional groups in a rootless container and thus we + // bail if the user asked us to do so. TODO: We currently can't do this + // earlier, but if libcontainer.Process.User was typesafe this might work. + if len(addGroups) > 0 { + return fmt.Errorf("cannot set any additional groups in a rootless container") + } + } + // before we change to the container's user make sure that the processes STDIO // is correctly owned by the user that we are switching to. if err := fixStdioPermissions(execUser); err != nil { return err } - suppGroups := append(execUser.Sgids, addGroups...) - if err := syscall.Setgroups(suppGroups); err != nil { - return err + + // This isn't allowed in an unprivileged user namespace since Linux 3.19. + // There's nothing we can do about /etc/group entries, so we silently + // ignore setting groups here (since the user didn't explicitly ask us to + // set the group). + if !config.Rootless { + suppGroups := append(execUser.Sgids, addGroups...) + if err := syscall.Setgroups(suppGroups); err != nil { + return err + } } if err := system.Setgid(execUser.Gid); err != nil { return err } + if err := system.Setuid(execUser.Uid); err != nil { return err } + // if we didn't get HOME already, set it based on the user's HOME if envHome := os.Getenv("HOME"); envHome == "" { if err := os.Setenv("HOME", execUser.Home); err != nil { diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go index 321d6642ff4..bc725a227d6 100644 --- a/libcontainer/message_linux.go +++ b/libcontainer/message_linux.go @@ -18,6 +18,7 @@ const ( GidmapAttr uint16 = 27284 SetgroupAttr uint16 = 27285 OomScoreAdjAttr uint16 = 27286 + RootlessAttr uint16 = 27287 // When syscall.NLA_HDRLEN is in gccgo, take this out. syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1) diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 9630206e398..0ad68834388 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -72,6 +72,7 @@ struct nlconfig_t { char *namespaces; size_t namespaces_len; uint8_t is_setgroup; + uint8_t is_rootless; char *oom_score_adj; size_t oom_score_adj_len; }; @@ -87,6 +88,7 @@ struct nlconfig_t { #define GIDMAP_ATTR 27284 #define SETGROUP_ATTR 27285 #define OOM_SCORE_ADJ_ATTR 27286 +#define ROOTLESS_ATTR 27287 /* * Use the raw syscall for versions of glibc which don't include a function for @@ -175,6 +177,7 @@ static void update_setgroups(int pid, enum policy_t setgroup) policy = "deny"; break; case SETGROUPS_DEFAULT: + default: /* Nothing to do. */ return; } @@ -329,6 +332,9 @@ static void nl_parse(int fd, struct nlconfig_t *config) case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; + case ROOTLESS_ATTR: + config->is_rootless = readint8(current); + break; case OOM_SCORE_ADJ_ATTR: config->oom_score_adj = current; config->oom_score_adj_len = payload_len; @@ -574,9 +580,21 @@ void nsexec(void) exit(ret); case SYNC_USERMAP_PLS: - /* Enable setgroups(2) if we've been asked to. */ + /* + * Enable setgroups(2) if we've been asked to. But we also + * have to explicitly disable setgroups(2) if we're + * creating a rootless container (this is required since + * Linux 3.19). + */ + if (config.is_rootless && config.is_setgroup) { + kill(child, SIGKILL); + bail("cannot allow setgroup in an unprivileged user namespace setup"); + } + if (config.is_setgroup) update_setgroups(child, SETGROUPS_ALLOW); + if (config.is_rootless) + update_setgroups(child, SETGROUPS_DENY); /* Set up mappings. */ update_uidmap(child, config.uidmap, config.uidmap_len); @@ -818,8 +836,10 @@ void nsexec(void) if (setgid(0) < 0) bail("setgid failed"); - if (setgroups(0, NULL) < 0) - bail("setgroups failed"); + if (!config.is_rootless && config.is_setgroup) { + if (setgroups(0, NULL) < 0) + bail("setgroups failed"); + } s = SYNC_CHILD_READY; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index c60f4730146..e8b7506dbcd 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -80,7 +80,8 @@ func (p *setnsProcess) start() (err error) { if err = p.execSetns(); err != nil { return newSystemErrorWithCause(err, "executing setns process") } - if len(p.cgroupPaths) > 0 { + // We can't join cgroups if we're in a rootless container. + if !p.config.Rootless && len(p.cgroupPaths) > 0 { if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) } @@ -253,13 +254,15 @@ func (p *initProcess) start() error { return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) } p.setExternalDescriptors(fds) - // Do this before syncing with child so that no children - // can escape the cgroup - if err := p.manager.Apply(p.pid()); err != nil { - return newSystemErrorWithCause(err, "applying cgroup configuration for process") + if !p.container.config.Rootless { + // Do this before syncing with child so that no children can escape the + // cgroup. We can't do this if we're not running as root. + if err := p.manager.Apply(p.pid()); err != nil { + return newSystemErrorWithCause(err, "applying cgroup configuration for process") + } } defer func() { - if err != nil { + if err != nil && !p.container.config.Rootless { // TODO: should not be the responsibility to call here p.manager.Destroy() } @@ -278,8 +281,11 @@ func (p *initProcess) start() error { ierr := parseSync(p.parentPipe, func(sync *syncT) error { switch sync.Type { case procReady: - if err := p.manager.Set(p.config.Config); err != nil { - return newSystemErrorWithCause(err, "setting cgroup config for ready process") + // We can't set cgroups if we're in a rootless container. + if !p.container.config.Rootless { + if err := p.manager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting cgroup config for ready process") + } } // set rlimits, this has to be done here because we lose permissions // to raise the limits once we enter a user-namespace @@ -424,6 +430,12 @@ func getPipeFds(pid int) ([]string, error) { f := filepath.Join(dirPath, strconv.Itoa(i)) target, err := os.Readlink(f) if err != nil { + // Ignore permission errors, for rootless containers and other + // non-dumpable processes. if we can't get the fd for a particular + // file, there's not much we can do. + if os.IsPermission(err) { + continue + } return fds, err } fds[i] = target diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go new file mode 100644 index 00000000000..44fad97e5cc --- /dev/null +++ b/libcontainer/specconv/example.go @@ -0,0 +1,160 @@ +package specconv + +import ( + "runtime" + + "github.com/opencontainers/runtime-spec/specs-go" +) + +func sPtr(s string) *string { return &s } + +// ExampleSpec returns an example spec file, with many options set so a user +// can see what a standard spec file looks like. +func ExampleSpec() *specs.Spec { + return &specs.Spec{ + Version: specs.Version, + Platform: specs.Platform{ + OS: runtime.GOOS, + Arch: runtime.GOARCH, + }, + Root: specs.Root{ + Path: "rootfs", + Readonly: true, + }, + Process: specs.Process{ + Terminal: true, + User: specs.User{}, + Args: []string{ + "sh", + }, + Env: []string{ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm", + }, + Cwd: "/", + NoNewPrivileges: true, + Capabilities: &specs.LinuxCapabilities{ + Bounding: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + Permitted: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + Inheritable: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + Ambient: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + Effective: []string{ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE", + }, + }, + Rlimits: []specs.LinuxRlimit{ + { + Type: "RLIMIT_NOFILE", + Hard: uint64(1024), + Soft: uint64(1024), + }, + }, + }, + Hostname: "runc", + Mounts: []specs.Mount{ + { + Destination: "/proc", + Type: "proc", + Source: "proc", + Options: nil, + }, + { + Destination: "/dev", + Type: "tmpfs", + Source: "tmpfs", + Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"}, + }, + { + Destination: "/dev/pts", + Type: "devpts", + Source: "devpts", + Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"}, + }, + { + Destination: "/dev/shm", + Type: "tmpfs", + Source: "shm", + Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"}, + }, + { + Destination: "/dev/mqueue", + Type: "mqueue", + Source: "mqueue", + Options: []string{"nosuid", "noexec", "nodev"}, + }, + { + Destination: "/sys", + Type: "sysfs", + Source: "sysfs", + Options: []string{"nosuid", "noexec", "nodev", "ro"}, + }, + { + Destination: "/sys/fs/cgroup", + Type: "cgroup", + Source: "cgroup", + Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, + }, + }, + Linux: &specs.Linux{ + MaskedPaths: []string{ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + }, + ReadonlyPaths: []string{ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger", + }, + Resources: &specs.LinuxResources{ + Devices: []specs.LinuxDeviceCgroup{ + { + Allow: false, + Access: "rwm", + }, + }, + }, + Namespaces: []specs.LinuxNamespace{ + { + Type: "pid", + }, + { + Type: "network", + }, + { + Type: "ipc", + }, + { + Type: "uts", + }, + { + Type: "mount", + }, + }, + }, + } +} diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 52b3ca112d1..346b2689d65 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -145,6 +145,7 @@ type CreateOpts struct { NoPivotRoot bool NoNewKeyring bool Spec *specs.Spec + Rootless bool } // CreateLibcontainerConfig creates a new libcontainer configuration from a @@ -175,6 +176,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { Hostname: spec.Hostname, Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)), NoNewKeyring: opts.NoNewKeyring, + Rootless: opts.Rootless, } exists := false @@ -208,7 +210,7 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { if err := setupUserNamespace(spec, config); err != nil { return nil, err } - c, err := createCgroupConfig(opts.CgroupName, opts.UseSystemdCgroup, spec) + c, err := createCgroupConfig(opts) if err != nil { return nil, err } @@ -264,8 +266,14 @@ func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount { } } -func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (*configs.Cgroup, error) { - var myCgroupPath string +func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) { + var ( + myCgroupPath string + + spec = opts.Spec + useSystemdCgroup = opts.UseSystemdCgroup + name = opts.CgroupName + ) c := &configs.Cgroup{ Resources: &configs.Resources{}, @@ -301,9 +309,14 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (* c.Path = myCgroupPath } - c.Resources.AllowedDevices = allowedDevices - if spec.Linux == nil { - return c, nil + // In rootless containers, any attempt to make cgroup changes will fail. + // libcontainer will validate this and we shouldn't add any cgroup options + // the user didn't specify. + if !opts.Rootless { + c.Resources.AllowedDevices = allowedDevices + if spec.Linux == nil { + return c, nil + } } r := spec.Linux.Resources if r == nil { @@ -340,8 +353,10 @@ func createCgroupConfig(name string, useSystemdCgroup bool, spec *specs.Spec) (* } c.Resources.Devices = append(c.Resources.Devices, dd) } - // append the default allowed devices to the end of the list - c.Resources.Devices = append(c.Resources.Devices, allowedDevices...) + if !opts.Rootless { + // append the default allowed devices to the end of the list + c.Resources.Devices = append(c.Resources.Devices, allowedDevices...) + } if r.Memory != nil { if r.Memory.Limit != nil { c.Resources.Memory = *r.Memory.Limit diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go index baa2638adac..741fae63097 100644 --- a/libcontainer/specconv/spec_linux_test.go +++ b/libcontainer/specconv/spec_linux_test.go @@ -3,8 +3,10 @@ package specconv import ( + "os" "testing" + "github.com/opencontainers/runc/libcontainer/configs/validate" "github.com/opencontainers/runtime-spec/specs-go" ) @@ -16,7 +18,13 @@ func TestLinuxCgroupsPathSpecified(t *testing.T) { CgroupsPath: cgroupsPath, } - cgroup, err := createCgroupConfig("ContainerID", false, spec) + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + } + + cgroup, err := createCgroupConfig(opts) if err != nil { t.Errorf("Couldn't create Cgroup config: %v", err) } @@ -28,8 +36,13 @@ func TestLinuxCgroupsPathSpecified(t *testing.T) { func TestLinuxCgroupsPathNotSpecified(t *testing.T) { spec := &specs.Spec{} + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + } - cgroup, err := createCgroupConfig("ContainerID", false, spec) + cgroup, err := createCgroupConfig(opts) if err != nil { t.Errorf("Couldn't create Cgroup config: %v", err) } @@ -39,6 +52,26 @@ func TestLinuxCgroupsPathNotSpecified(t *testing.T) { } } +func TestSpecconvExampleValidate(t *testing.T) { + spec := ExampleSpec() + spec.Root.Path = "/" + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + } + + config, err := CreateLibcontainerConfig(opts) + if err != nil { + t.Errorf("Couldn't create libcontainer config: %v", err) + } + + validator := validate.New() + if err := validator.Validate(config); err != nil { + t.Errorf("Expected specconv to produce valid container config: %v", err) + } +} + func TestDupNamespaces(t *testing.T) { spec := &specs.Spec{ Linux: &specs.Linux{ @@ -62,3 +95,46 @@ func TestDupNamespaces(t *testing.T) { t.Errorf("Duplicated namespaces should be forbidden") } } + +func TestRootlessSpecconvValidate(t *testing.T) { + spec := &specs.Spec{ + Linux: specs.Linux{ + Namespaces: []specs.Namespace{ + { + Type: specs.UserNamespace, + }, + }, + UIDMappings: []specs.IDMapping{ + { + HostID: uint32(os.Geteuid()), + ContainerID: 0, + Size: 1, + }, + }, + GIDMappings: []specs.IDMapping{ + { + HostID: uint32(os.Getegid()), + ContainerID: 0, + Size: 1, + }, + }, + }, + } + + opts := &CreateOpts{ + CgroupName: "ContainerID", + UseSystemdCgroup: false, + Spec: spec, + Rootless: true, + } + + config, err := CreateLibcontainerConfig(opts) + if err != nil { + t.Errorf("Couldn't create libcontainer config: %v", err) + } + + validator := validate.New() + if err := validator.Validate(config); err != nil { + t.Errorf("Expected specconv to produce valid rootless container config: %v", err) + } +} diff --git a/list.go b/list.go index c7550a2a853..1c3b9aa8352 100644 --- a/list.go +++ b/list.go @@ -7,12 +7,14 @@ import ( "io/ioutil" "os" "path/filepath" + "syscall" "text/tabwriter" "time" "encoding/json" "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/user" "github.com/opencontainers/runc/libcontainer/utils" "github.com/urfave/cli" ) @@ -38,6 +40,8 @@ type containerState struct { Created time.Time `json:"created"` // Annotations is the user defined annotations added to the config. Annotations map[string]string `json:"annotations,omitempty"` + // The owner of the state directory (the owner of the container). + Owner string `json:"owner"` } var listCommand = cli.Command{ @@ -85,14 +89,15 @@ To list containers created using a non-default value for "--root": switch context.String("format") { case "table": w := tabwriter.NewWriter(os.Stdout, 12, 1, 3, ' ', 0) - fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\n") + fmt.Fprint(w, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tOWNER\n") for _, item := range s { - fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\n", + fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%s\t%s\n", item.ID, item.InitProcessPid, item.Status, item.Bundle, - item.Created.Format(time.RFC3339Nano)) + item.Created.Format(time.RFC3339Nano), + item.Owner) } if err := w.Flush(); err != nil { return err @@ -126,6 +131,13 @@ func getContainers(context *cli.Context) ([]containerState, error) { var s []containerState for _, item := range list { if item.IsDir() { + // This cast is safe on Linux. + stat := item.Sys().(*syscall.Stat_t) + owner, err := user.LookupUid(int(stat.Uid)) + if err != nil { + owner.Name = string(stat.Uid) + } + container, err := factory.Load(item.Name()) if err != nil { fmt.Fprintf(os.Stderr, "load container %s: %v\n", item.Name(), err) @@ -155,6 +167,7 @@ func getContainers(context *cli.Context) ([]containerState, error) { Rootfs: state.BaseState.Config.Rootfs, Created: state.BaseState.Created, Annotations: annotations, + Owner: owner.Name, }) } } diff --git a/ps.go b/ps.go index b8a1b111b4c..6e0c7376a1b 100644 --- a/ps.go +++ b/ps.go @@ -28,6 +28,11 @@ var psCommand = cli.Command{ if err := checkArgs(context, 1, minArgs); err != nil { return err } + // XXX: Currently not supported with rootless containers. + if isRootless() { + return fmt.Errorf("runc ps requires root") + } + container, err := getContainer(context) if err != nil { return err diff --git a/restore.go b/restore.go index afc604653f7..06f635f130d 100644 --- a/restore.go +++ b/restore.go @@ -3,6 +3,7 @@ package main import ( + "fmt" "os" "syscall" @@ -86,6 +87,11 @@ using the runc checkpoint command.`, if err := checkArgs(context, 1, exactArgs); err != nil { return err } + // XXX: Currently this is untested with rootless containers. + if isRootless() { + return fmt.Errorf("runc restore requires root") + } + imagePath := context.String("image-path") id := context.Args().First() if id == "" { diff --git a/spec.go b/spec.go index 1b55c6b4c2b..d7df312a853 100644 --- a/spec.go +++ b/spec.go @@ -10,6 +10,7 @@ import ( "runtime" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/specconv" "github.com/opencontainers/runtime-spec/specs-go" "github.com/urfave/cli" ) @@ -68,152 +69,7 @@ container on your host.`, if err := checkArgs(context, 0, exactArgs); err != nil { return err } - spec := specs.Spec{ - Version: specs.Version, - Platform: specs.Platform{ - OS: runtime.GOOS, - Arch: runtime.GOARCH, - }, - Root: specs.Root{ - Path: "rootfs", - Readonly: true, - }, - Process: specs.Process{ - Terminal: true, - User: specs.User{}, - Args: []string{ - "sh", - }, - Env: []string{ - "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", - "TERM=xterm", - }, - Cwd: "/", - NoNewPrivileges: true, - Capabilities: &specs.LinuxCapabilities{ - Bounding: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Permitted: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Inheritable: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Ambient: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Effective: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - }, - Rlimits: []specs.LinuxRlimit{ - { - Type: "RLIMIT_NOFILE", - Hard: uint64(1024), - Soft: uint64(1024), - }, - }, - }, - Hostname: "runc", - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "proc", - Source: "proc", - Options: nil, - }, - { - Destination: "/dev", - Type: "tmpfs", - Source: "tmpfs", - Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"}, - }, - { - Destination: "/dev/pts", - Type: "devpts", - Source: "devpts", - Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"}, - }, - { - Destination: "/dev/shm", - Type: "tmpfs", - Source: "shm", - Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"}, - }, - { - Destination: "/dev/mqueue", - Type: "mqueue", - Source: "mqueue", - Options: []string{"nosuid", "noexec", "nodev"}, - }, - { - Destination: "/sys", - Type: "sysfs", - Source: "sysfs", - Options: []string{"nosuid", "noexec", "nodev", "ro"}, - }, - { - Destination: "/sys/fs/cgroup", - Type: "cgroup", - Source: "cgroup", - Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, - }, - }, - Linux: &specs.Linux{ - MaskedPaths: []string{ - "/proc/kcore", - "/proc/latency_stats", - "/proc/timer_list", - "/proc/timer_stats", - "/proc/sched_debug", - "/sys/firmware", - }, - ReadonlyPaths: []string{ - "/proc/asound", - "/proc/bus", - "/proc/fs", - "/proc/irq", - "/proc/sys", - "/proc/sysrq-trigger", - }, - Resources: &specs.LinuxResources{ - Devices: []specs.LinuxDeviceCgroup{ - { - Allow: false, - Access: "rwm", - }, - }, - }, - Namespaces: []specs.LinuxNamespace{ - { - Type: "pid", - }, - { - Type: "network", - }, - { - Type: "ipc", - }, - { - Type: "uts", - }, - { - Type: "mount", - }, - }, - }, - } + spec := specconv.ExampleSpec() checkNoFile := func(name string) error { _, err := os.Stat(name) @@ -234,7 +90,7 @@ container on your host.`, if err := checkNoFile(specConfig); err != nil { return err } - data, err := json.MarshalIndent(&spec, "", "\t") + data, err := json.MarshalIndent(spec, "", "\t") if err != nil { return err } diff --git a/utils.go b/utils.go index 1286fd6f2d7..98f93a4cfad 100644 --- a/utils.go +++ b/utils.go @@ -63,9 +63,6 @@ func setupSpec(context *cli.Context) (*specs.Spec, error) { if err != nil { return nil, err } - if os.Geteuid() != 0 { - return nil, fmt.Errorf("runc should be run as root") - } return spec, nil } diff --git a/utils_linux.go b/utils_linux.go index dcf156c8c5b..767015ed027 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -186,6 +186,11 @@ func createPidFile(path string, process *libcontainer.Process) error { return os.Rename(tmpName, path) } +// XXX: Currently we autodetect rootless mode. +func isRootless() bool { + return os.Geteuid() != 0 +} + func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcontainer.Container, error) { config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{ CgroupName: id, @@ -193,6 +198,7 @@ func createContainer(context *cli.Context, id string, spec *specs.Spec) (libcont NoPivotRoot: context.Bool("no-pivot"), NoNewKeyring: context.Bool("no-new-keyring"), Spec: spec, + Rootless: isRootless(), }) if err != nil { return nil, err From baeef298582869504e73651e2b0fb78b156e5783 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 26 Apr 2016 02:19:39 +1000 Subject: [PATCH 3/8] rootless: add rootless cgroup manager The rootless cgroup manager acts as a noop for all set and apply operations. It is just used for rootless setups. Currently this is far too simple (we need to add opportunistic cgroup management), but is good enough as a first-pass at a noop cgroup manager. Signed-off-by: Aleksa Sarai --- libcontainer/cgroups/fs/apply_raw.go | 24 +--- libcontainer/cgroups/rootless/rootless.go | 128 ++++++++++++++++++ libcontainer/cgroups/systemd/apply_systemd.go | 2 +- libcontainer/cgroups/utils.go | 41 +++++- libcontainer/container_linux.go | 8 ++ libcontainer/factory_linux.go | 22 +++ libcontainer/process_linux.go | 20 ++- libcontainer/rootfs_linux.go | 2 +- 8 files changed, 210 insertions(+), 37 deletions(-) create mode 100644 libcontainer/cgroups/rootless/rootless.go diff --git a/libcontainer/cgroups/fs/apply_raw.go b/libcontainer/cgroups/fs/apply_raw.go index d316313c28e..22d82acb4e2 100644 --- a/libcontainer/cgroups/fs/apply_raw.go +++ b/libcontainer/cgroups/fs/apply_raw.go @@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) { }, nil } -func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) { - // Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating - // process could in container and shared pid namespace with host, and - // /proc/1/cgroup could point to whole other world of cgroups. - initPath, err := cgroups.GetThisCgroupDir(subsystem) - if err != nil { - return "", err - } - // This is needed for nested containers, because in /proc/self/cgroup we - // see pathes from host, which don't exist in container. - relDir, err := filepath.Rel(root, initPath) - if err != nil { - return "", err - } - return filepath.Join(mountpoint, relDir), nil -} - func (raw *cgroupData) path(subsystem string) (string, error) { - mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem) + mnt, err := cgroups.FindCgroupMountpoint(subsystem) // If we didn't mount the subsystem, there is no point we make the path. if err != nil { return "", err @@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) { return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil } - parentPath, err := raw.parentPath(subsystem, mnt, root) + // Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating + // process could in container and shared pid namespace with host, and + // /proc/1/cgroup could point to whole other world of cgroups. + parentPath, err := cgroups.GetOwnCgroupPath(subsystem) if err != nil { return "", err } diff --git a/libcontainer/cgroups/rootless/rootless.go b/libcontainer/cgroups/rootless/rootless.go new file mode 100644 index 00000000000..b1efbfd9997 --- /dev/null +++ b/libcontainer/cgroups/rootless/rootless.go @@ -0,0 +1,128 @@ +// +build linux + +package rootless + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/configs/validate" +) + +// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code +// needlessly. We should probably export this list. + +var subsystems = []subsystem{ + &fs.CpusetGroup{}, + &fs.DevicesGroup{}, + &fs.MemoryGroup{}, + &fs.CpuGroup{}, + &fs.CpuacctGroup{}, + &fs.PidsGroup{}, + &fs.BlkioGroup{}, + &fs.HugetlbGroup{}, + &fs.NetClsGroup{}, + &fs.NetPrioGroup{}, + &fs.PerfEventGroup{}, + &fs.FreezerGroup{}, + &fs.NameGroup{GroupName: "name=systemd"}, +} + +type subsystem interface { + // Name returns the name of the subsystem. + Name() string + + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error +} + +// The noop cgroup manager is used for rootless containers, because we currently +// cannot manage cgroups if we are in a rootless setup. This manager is chosen +// by factory if we are in rootless mode. We error out if any cgroup options are +// set in the config -- this may change in the future with upcoming kernel features +// like the cgroup namespace. + +type Manager struct { + Cgroups *configs.Cgroup + Paths map[string]string +} + +func (m *Manager) Apply(pid int) error { + // If there are no cgroup settings, there's nothing to do. + if m.Cgroups == nil { + return nil + } + + // We can't set paths. + // TODO(cyphar): Implement the case where the runner of a rootless container + // owns their own cgroup, which would allow us to set up a + // cgroup for each path. + if m.Cgroups.Paths != nil { + return fmt.Errorf("cannot change cgroup path in rootless container") + } + + // We load the paths into the manager. + paths := make(map[string]string) + for _, sys := range subsystems { + name := sys.Name() + + path, err := cgroups.GetOwnCgroupPath(name) + if err != nil { + // Ignore paths we couldn't resolve. + continue + } + + paths[name] = path + } + + m.Paths = paths + return nil +} + +func (m *Manager) GetPaths() map[string]string { + return m.Paths +} + +func (m *Manager) Set(container *configs.Config) error { + // We have to re-do the validation here, since someone might decide to + // update a rootless container. + return validate.New().Validate(container) +} + +func (m *Manager) GetPids() ([]int, error) { + dir, err := cgroups.GetOwnCgroupPath("devices") + if err != nil { + return nil, err + } + return cgroups.GetPids(dir) +} + +func (m *Manager) GetAllPids() ([]int, error) { + dir, err := cgroups.GetOwnCgroupPath("devices") + if err != nil { + return nil, err + } + return cgroups.GetAllPids(dir) +} + +func (m *Manager) GetStats() (*cgroups.Stats, error) { + // TODO(cyphar): We can make this work if we figure out a way to allow usage + // of cgroups with a rootless container. While this doesn't + // actually require write access to a cgroup directory, the + // statistics are not useful if they can be affected by + // non-container processes. + return nil, fmt.Errorf("cannot get cgroup stats in rootless container") +} + +func (m *Manager) Freeze(state configs.FreezerState) error { + // TODO(cyphar): We can make this work if we figure out a way to allow usage + // of cgroups with a rootless container. + return fmt.Errorf("cannot use freezer cgroup in rootless container") +} + +func (m *Manager) Destroy() error { + // We don't have to do anything here because we didn't do any setup. + return nil +} diff --git a/libcontainer/cgroups/systemd/apply_systemd.go b/libcontainer/cgroups/systemd/apply_systemd.go index 2872bfac78a..456c57d975d 100644 --- a/libcontainer/cgroups/systemd/apply_systemd.go +++ b/libcontainer/cgroups/systemd/apply_systemd.go @@ -426,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { return "", err } - initPath, err := cgroups.GetInitCgroupDir(subsystem) + initPath, err := cgroups.GetInitCgroup(subsystem) if err != nil { return "", err } diff --git a/libcontainer/cgroups/utils.go b/libcontainer/cgroups/utils.go index 52fc87eb3e6..5db37344983 100644 --- a/libcontainer/cgroups/utils.go +++ b/libcontainer/cgroups/utils.go @@ -109,7 +109,7 @@ type Mount struct { Subsystems []string } -func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) { +func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { if len(m.Subsystems) == 0 { return "", fmt.Errorf("no subsystem for mount") } @@ -203,8 +203,8 @@ func GetAllSubsystems() ([]string, error) { return subsystems, nil } -// GetThisCgroupDir returns the relative path to the cgroup docker is running in. -func GetThisCgroupDir(subsystem string) (string, error) { +// GetOwnCgroup returns the relative path to the cgroup docker is running in. +func GetOwnCgroup(subsystem string) (string, error) { cgroups, err := ParseCgroupFile("/proc/self/cgroup") if err != nil { return "", err @@ -213,8 +213,16 @@ func GetThisCgroupDir(subsystem string) (string, error) { return getControllerPath(subsystem, cgroups) } -func GetInitCgroupDir(subsystem string) (string, error) { +func GetOwnCgroupPath(subsystem string) (string, error) { + cgroup, err := GetOwnCgroup(subsystem) + if err != nil { + return "", err + } + return getCgroupPathHelper(subsystem, cgroup) +} + +func GetInitCgroup(subsystem string) (string, error) { cgroups, err := ParseCgroupFile("/proc/1/cgroup") if err != nil { return "", err @@ -223,6 +231,31 @@ func GetInitCgroupDir(subsystem string) (string, error) { return getControllerPath(subsystem, cgroups) } +func GetInitCgroupPath(subsystem string) (string, error) { + cgroup, err := GetInitCgroup(subsystem) + if err != nil { + return "", err + } + + return getCgroupPathHelper(subsystem, cgroup) +} + +func getCgroupPathHelper(subsystem, cgroup string) (string, error) { + mnt, root, err := FindCgroupMountpointAndRoot(subsystem) + if err != nil { + return "", err + } + + // This is needed for nested containers, because in /proc/self/cgroup we + // see pathes from host, which don't exist in container. + relCgroup, err := filepath.Rel(root, cgroup) + if err != nil { + return "", err + } + + return filepath.Join(mnt, relCgroup), nil +} + func readProcsFile(dir string) ([]int, error) { f, err := os.Open(filepath.Join(dir, CgroupProcesses)) if err != nil { diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index c3dd42d27ea..f3b73ee0a91 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -520,10 +520,18 @@ func (c *linuxContainer) Resume() error { } func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { + // XXX(cyphar): This requires cgroups. + if c.config.Rootless { + return nil, fmt.Errorf("cannot get OOM notifications from rootless container") + } return notifyOnOOM(c.cgroupManager.GetPaths()) } func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { + // XXX(cyphar): This requires cgroups. + if c.config.Rootless { + return nil, fmt.Errorf("cannot get memory pressure notifications from rootless container") + } return notifyMemoryPressure(c.cgroupManager.GetPaths(), level) } diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index d553287553a..1f965e62908 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -15,6 +15,7 @@ import ( "github.com/docker/docker/pkg/mount" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs" + "github.com/opencontainers/runc/libcontainer/cgroups/rootless" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs/validate" @@ -73,6 +74,20 @@ func Cgroupfs(l *LinuxFactory) error { return nil } +// RootlessCgroups is an options func to configure a LinuxFactory to +// return containers that use the "rootless" cgroup manager, which will +// fail to do any operations not possible to do with an unprivileged user. +// It should only be used in conjunction with rootless containers. +func RootlessCgroups(l *LinuxFactory) error { + l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { + return &rootless.Manager{ + Cgroups: config, + Paths: paths, + } + } + return nil +} + // TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. func TmpfsRoot(l *LinuxFactory) error { mounted, err := mount.Mounted(l.Root) @@ -169,6 +184,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err if err := os.Chown(containerRoot, uid, gid); err != nil { return nil, newGenericError(err, SystemError) } + if config.Rootless { + RootlessCgroups(l) + } c := &linuxContainer{ id: id, root: containerRoot, @@ -195,6 +213,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) { processStartTime: state.InitProcessStartTime, fds: state.ExternalDescriptors, } + // We have to use the RootlessManager. + if state.Rootless { + RootlessCgroups(l) + } c := &linuxContainer{ initProcess: r, initProcessStartTime: state.InitProcessStartTime, diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index e8b7506dbcd..bfe99551d4e 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -254,15 +254,14 @@ func (p *initProcess) start() error { return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) } p.setExternalDescriptors(fds) - if !p.container.config.Rootless { - // Do this before syncing with child so that no children can escape the - // cgroup. We can't do this if we're not running as root. - if err := p.manager.Apply(p.pid()); err != nil { - return newSystemErrorWithCause(err, "applying cgroup configuration for process") - } + // Do this before syncing with child so that no children can escape the + // cgroup. We don't need to worry about not doing this and not being root + // because we'd be using the rootless cgroup manager in that case. + if err := p.manager.Apply(p.pid()); err != nil { + return newSystemErrorWithCause(err, "applying cgroup configuration for process") } defer func() { - if err != nil && !p.container.config.Rootless { + if err != nil { // TODO: should not be the responsibility to call here p.manager.Destroy() } @@ -281,11 +280,8 @@ func (p *initProcess) start() error { ierr := parseSync(p.parentPipe, func(sync *syncT) error { switch sync.Type { case procReady: - // We can't set cgroups if we're in a rootless container. - if !p.container.config.Rootless { - if err := p.manager.Set(p.config.Config); err != nil { - return newSystemErrorWithCause(err, "setting cgroup config for ready process") - } + if err := p.manager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting cgroup config for ready process") } // set rlimits, this has to be done here because we lose permissions // to raise the limits once we enter a user-namespace diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 2635fd6f99c..b4948687e27 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -348,7 +348,7 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) { var binds []*configs.Mount for _, mm := range mounts { - dir, err := mm.GetThisCgroupDir(cgroupPaths) + dir, err := mm.GetOwnCgroup(cgroupPaths) if err != nil { return nil, err } From f0876b04276226533b95e54c57e622f7d076f6f0 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sat, 18 Mar 2017 04:32:16 +1100 Subject: [PATCH 4/8] libcontainer: configs: add proper HostUID and HostGID Previously Host{U,G}ID only gave you the root mapping, which isn't very useful if you are trying to do other things with the IDMaps. Signed-off-by: Aleksa Sarai --- libcontainer/configs/config_unix.go | 40 +++++++++++++++-------- libcontainer/configs/config_unix_test.go | 16 ++++----- libcontainer/configs/validate/rootless.go | 4 +-- libcontainer/container_linux.go | 4 +-- libcontainer/factory_linux.go | 4 +-- libcontainer/specconv/spec_linux.go | 4 +-- utils_linux.go | 4 +-- 7 files changed, 44 insertions(+), 32 deletions(-) diff --git a/libcontainer/configs/config_unix.go b/libcontainer/configs/config_unix.go index a60554a7b96..84463995d05 100644 --- a/libcontainer/configs/config_unix.go +++ b/libcontainer/configs/config_unix.go @@ -4,38 +4,50 @@ package configs import "fmt" -// HostUID gets the root uid for the process on host which could be non-zero -// when user namespaces are enabled. -func (c Config) HostUID() (int, error) { +// HostUID gets the translated uid for the process on host which could be +// different when user namespaces are enabled. +func (c Config) HostUID(containerId int) (int, error) { if c.Namespaces.Contains(NEWUSER) { if c.UidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") + return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.") } - id, found := c.hostIDFromMapping(0, c.UidMappings) + id, found := c.hostIDFromMapping(containerId, c.UidMappings) if !found { - return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.") } return id, nil } - // Return default root uid 0 - return 0, nil + // Return unchanged id. + return containerId, nil } -// HostGID gets the root gid for the process on host which could be non-zero +// HostRootUID gets the root uid for the process on host which could be non-zero // when user namespaces are enabled. -func (c Config) HostGID() (int, error) { +func (c Config) HostRootUID() (int, error) { + return c.HostUID(0) +} + +// HostGID gets the translated gid for the process on host which could be +// different when user namespaces are enabled. +func (c Config) HostGID(containerId int) (int, error) { if c.Namespaces.Contains(NEWUSER) { if c.GidMappings == nil { return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") } - id, found := c.hostIDFromMapping(0, c.GidMappings) + id, found := c.hostIDFromMapping(containerId, c.GidMappings) if !found { - return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.") + return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.") } return id, nil } - // Return default root gid 0 - return 0, nil + // Return unchanged id. + return containerId, nil +} + +// HostRootGID gets the root gid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c Config) HostRootGID() (int, error) { + return c.HostGID(0) } // Utility function that gets a host ID for a container ID from user namespace map diff --git a/libcontainer/configs/config_unix_test.go b/libcontainer/configs/config_unix_test.go index dc01cd0132f..7f966152d6d 100644 --- a/libcontainer/configs/config_unix_test.go +++ b/libcontainer/configs/config_unix_test.go @@ -65,11 +65,11 @@ func TestRemoveNamespace(t *testing.T) { } } -func TestHostUIDNoUSERNS(t *testing.T) { +func TestHostRootUIDNoUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{}, } - uid, err := config.HostUID() + uid, err := config.HostRootUID() if err != nil { t.Fatal(err) } @@ -78,7 +78,7 @@ func TestHostUIDNoUSERNS(t *testing.T) { } } -func TestHostUIDWithUSERNS(t *testing.T) { +func TestHostRootUIDWithUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{{Type: NEWUSER}}, UidMappings: []IDMap{ @@ -89,7 +89,7 @@ func TestHostUIDWithUSERNS(t *testing.T) { }, }, } - uid, err := config.HostUID() + uid, err := config.HostRootUID() if err != nil { t.Fatal(err) } @@ -98,11 +98,11 @@ func TestHostUIDWithUSERNS(t *testing.T) { } } -func TestHostGIDNoUSERNS(t *testing.T) { +func TestHostRootGIDNoUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{}, } - uid, err := config.HostGID() + uid, err := config.HostRootGID() if err != nil { t.Fatal(err) } @@ -111,7 +111,7 @@ func TestHostGIDNoUSERNS(t *testing.T) { } } -func TestHostGIDWithUSERNS(t *testing.T) { +func TestHostRootGIDWithUSERNS(t *testing.T) { config := &Config{ Namespaces: Namespaces{{Type: NEWUSER}}, GidMappings: []IDMap{ @@ -122,7 +122,7 @@ func TestHostGIDWithUSERNS(t *testing.T) { }, }, } - uid, err := config.HostGID() + uid, err := config.HostRootGID() if err != nil { t.Fatal(err) } diff --git a/libcontainer/configs/validate/rootless.go b/libcontainer/configs/validate/rootless.go index 1e83cedd0d3..0cebfaf801a 100644 --- a/libcontainer/configs/validate/rootless.go +++ b/libcontainer/configs/validate/rootless.go @@ -37,7 +37,7 @@ func (v *ConfigValidator) rootless(config *configs.Config) error { } func rootlessMappings(config *configs.Config) error { - rootuid, err := config.HostUID() + rootuid, err := config.HostRootUID() if err != nil { return fmt.Errorf("failed to get root uid from uidMappings: %v", err) } @@ -50,7 +50,7 @@ func rootlessMappings(config *configs.Config) error { } } - rootgid, err := config.HostGID() + rootgid, err := config.HostRootGID() if err != nil { return fmt.Errorf("failed to get root gid from gidMappings: %v", err) } diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index f3b73ee0a91..faecc4683e5 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -307,11 +307,11 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error { } func (c *linuxContainer) createExecFifo() error { - rootuid, err := c.Config().HostUID() + rootuid, err := c.Config().HostRootUID() if err != nil { return err } - rootgid, err := c.Config().HostGID() + rootgid, err := c.Config().HostRootGID() if err != nil { return err } diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 1f965e62908..6a0f8558373 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -164,11 +164,11 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err if err := l.Validator.Validate(config); err != nil { return nil, newGenericError(err, ConfigInvalid) } - uid, err := config.HostUID() + uid, err := config.HostRootUID() if err != nil { return nil, newGenericError(err, SystemError) } - gid, err := config.HostGID() + gid, err := config.HostRootGID() if err != nil { return nil, newGenericError(err, SystemError) } diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 346b2689d65..1575ae03793 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -610,11 +610,11 @@ func setupUserNamespace(spec *specs.Spec, config *configs.Config) error { for _, m := range spec.Linux.GIDMappings { config.GidMappings = append(config.GidMappings, create(m)) } - rootUID, err := config.HostUID() + rootUID, err := config.HostRootUID() if err != nil { return err } - rootGID, err := config.HostGID() + rootGID, err := config.HostRootGID() if err != nil { return err } diff --git a/utils_linux.go b/utils_linux.go index 767015ed027..c6a8c028e6a 100644 --- a/utils_linux.go +++ b/utils_linux.go @@ -242,12 +242,12 @@ func (r *runner) run(config *specs.Process) (int, error) { for i := baseFd; i < baseFd+r.preserveFDs; i++ { process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i))) } - rootuid, err := r.container.Config().HostUID() + rootuid, err := r.container.Config().HostRootUID() if err != nil { r.destroy() return -1, err } - rootgid, err := r.container.Config().HostGID() + rootgid, err := r.container.Config().HostRootGID() if err != nil { r.destroy() return -1, err From 76aeaf8181fa0038983503294b02e0f42663953c Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sat, 18 Mar 2017 04:33:14 +1100 Subject: [PATCH 5/8] libcontainer: init: fix unmapped console fchown If the stdio of the container is owned by a group which is not mapped in the user namespace, attempting to fchown the file descriptor will result in EINVAL. Counteract this by simply not doing an fchown if the group owner of the file descriptor has no host mapping according to the configured GIDMappings. Signed-off-by: Aleksa Sarai --- libcontainer/init_linux.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 118783516ae..99cc02cbd02 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -277,7 +277,7 @@ func setupUser(config *initConfig) error { // before we change to the container's user make sure that the processes STDIO // is correctly owned by the user that we are switching to. - if err := fixStdioPermissions(execUser); err != nil { + if err := fixStdioPermissions(config, execUser); err != nil { return err } @@ -312,7 +312,7 @@ func setupUser(config *initConfig) error { // fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. // The ownership needs to match because it is created outside of the container and needs to be // localized. -func fixStdioPermissions(u *user.ExecUser) error { +func fixStdioPermissions(config *initConfig, u *user.ExecUser) error { var null syscall.Stat_t if err := syscall.Stat("/dev/null", &null); err != nil { return err @@ -326,10 +326,20 @@ func fixStdioPermissions(u *user.ExecUser) error { if err := syscall.Fstat(int(fd), &s); err != nil { return err } + // Skip chown of /dev/null if it was used as one of the STDIO fds. if s.Rdev == null.Rdev { continue } + + // Skip chown if s.Gid is actually an unmapped gid in the host. While + // this is a bit dodgy if it just so happens that the console _is_ + // owned by overflow_gid, there's no way for us to disambiguate this as + // a userspace program. + if _, err := config.Config.HostGID(int(s.Gid)); err != nil { + continue + } + // We only change the uid owner (as it is possible for the mount to // prefer a different gid, and there's no reason for us to change it). // The reason why we don't just leave the default uid=X mount setup is From d04cbc49d2ae4488a566eab86102c398522aaf14 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Mon, 9 May 2016 21:26:11 +1000 Subject: [PATCH 6/8] rootless: add autogenerated rootless config from `runc spec` Since this is a runC-specific feature, this belongs here over in opencontainers/ocitools (which is for generic OCI runtimes). In addition, we don't create a new network namespace. This is because currently if you want to set up a veth bridge you need CAP_NET_ADMIN in both network namespaces' pinned user namespace to create the necessary interfaces in each network namespace. Signed-off-by: Aleksa Sarai --- libcontainer/specconv/example.go | 73 +++++++++++++++++++++++- libcontainer/specconv/spec_linux_test.go | 30 ++-------- spec.go | 11 +++- 3 files changed, 85 insertions(+), 29 deletions(-) diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go index 44fad97e5cc..9a4460ce7fb 100644 --- a/libcontainer/specconv/example.go +++ b/libcontainer/specconv/example.go @@ -1,16 +1,18 @@ package specconv import ( + "os" "runtime" + "strings" "github.com/opencontainers/runtime-spec/specs-go" ) func sPtr(s string) *string { return &s } -// ExampleSpec returns an example spec file, with many options set so a user -// can see what a standard spec file looks like. -func ExampleSpec() *specs.Spec { +// Example returns an example spec file, with many options set so a user can +// see what a standard spec file looks like. +func Example() *specs.Spec { return &specs.Spec{ Version: specs.Version, Platform: specs.Platform{ @@ -158,3 +160,68 @@ func ExampleSpec() *specs.Spec { }, } } + +// ExampleRootless returns an example spec file that works with rootless +// containers. It's essentially a modified version of the specfile from +// Example(). +func ToRootless(spec *specs.Spec) { + var namespaces []specs.LinuxNamespace + + // Remove networkns from the spec. + for _, ns := range spec.Linux.Namespaces { + switch ns.Type { + case specs.NetworkNamespace, specs.UserNamespace: + // Do nothing. + default: + namespaces = append(namespaces, ns) + } + } + // Add userns to the spec. + namespaces = append(namespaces, specs.LinuxNamespace{ + Type: specs.UserNamespace, + }) + spec.Linux.Namespaces = namespaces + + // Add mappings for the current user. + spec.Linux.UIDMappings = []specs.LinuxIDMapping{{ + HostID: uint32(os.Geteuid()), + ContainerID: 0, + Size: 1, + }} + spec.Linux.GIDMappings = []specs.LinuxIDMapping{{ + HostID: uint32(os.Getegid()), + ContainerID: 0, + Size: 1, + }} + + // Fix up mounts. + var mounts []specs.Mount + for _, mount := range spec.Mounts { + // Ignore all mounts that are under /sys. + if strings.HasPrefix(mount.Destination, "/sys") { + continue + } + + // Remove all gid= and uid= mappings. + var options []string + for _, option := range mount.Options { + if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") { + options = append(options, option) + } + } + + mount.Options = options + mounts = append(mounts, mount) + } + // Add the sysfs mount as an rbind. + mounts = append(mounts, specs.Mount{ + Source: "/sys", + Destination: "/sys", + Type: "none", + Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, + }) + spec.Mounts = mounts + + // Remove cgroup settings. + spec.Linux.Resources = nil +} diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go index 741fae63097..f7292f36829 100644 --- a/libcontainer/specconv/spec_linux_test.go +++ b/libcontainer/specconv/spec_linux_test.go @@ -3,7 +3,6 @@ package specconv import ( - "os" "testing" "github.com/opencontainers/runc/libcontainer/configs/validate" @@ -53,8 +52,9 @@ func TestLinuxCgroupsPathNotSpecified(t *testing.T) { } func TestSpecconvExampleValidate(t *testing.T) { - spec := ExampleSpec() + spec := Example() spec.Root.Path = "/" + opts := &CreateOpts{ CgroupName: "ContainerID", UseSystemdCgroup: false, @@ -97,29 +97,9 @@ func TestDupNamespaces(t *testing.T) { } func TestRootlessSpecconvValidate(t *testing.T) { - spec := &specs.Spec{ - Linux: specs.Linux{ - Namespaces: []specs.Namespace{ - { - Type: specs.UserNamespace, - }, - }, - UIDMappings: []specs.IDMapping{ - { - HostID: uint32(os.Geteuid()), - ContainerID: 0, - Size: 1, - }, - }, - GIDMappings: []specs.IDMapping{ - { - HostID: uint32(os.Getegid()), - ContainerID: 0, - Size: 1, - }, - }, - }, - } + spec := Example() + spec.Root.Path = "/" + ToRootless(spec) opts := &CreateOpts{ CgroupName: "ContainerID", diff --git a/spec.go b/spec.go index d7df312a853..9024ad4cd7f 100644 --- a/spec.go +++ b/spec.go @@ -64,12 +64,21 @@ container on your host.`, Value: "", Usage: "path to the root of the bundle directory", }, + cli.BoolFlag{ + Name: "rootless", + Usage: "generate a configuration for a rootless container", + }, }, Action: func(context *cli.Context) error { if err := checkArgs(context, 0, exactArgs); err != nil { return err } - spec := specconv.ExampleSpec() + spec := specconv.Example() + + rootless := context.Bool("rootless") + if rootless { + specconv.ToRootless(spec) + } checkNoFile := func(name string) error { _, err := os.Stat(name) From 2ce33574d0c77a68915d6f71c8cdfa48767f52d9 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 10 May 2016 22:22:13 +1000 Subject: [PATCH 7/8] integration: added root requires This is in preperation of allowing us to run the integration test suite on rootless containers. Signed-off-by: Aleksa Sarai --- tests/integration/cgroups.bats | 8 ++++++-- tests/integration/checkpoint.bats | 3 ++- tests/integration/helpers.bash | 10 +++++++++- tests/integration/kill.bats | 1 - tests/integration/pause.bats | 6 ++++++ tests/integration/update.bats | 6 +++++- 6 files changed, 28 insertions(+), 6 deletions(-) diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index 9ab6f432beb..90095a7ea87 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -28,7 +28,9 @@ function check_cgroup_value() { } @test "runc update --kernel-memory (initialized)" { - requires cgroups_kmem + # XXX: currently cgroups require root containers. + requires cgroups_kmem root + # Add cgroup path sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json @@ -56,7 +58,9 @@ EOF } @test "runc update --kernel-memory (uninitialized)" { - requires cgroups_kmem + # XXX: currently cgroups require root containers. + requires cgroups_kmem root + # Add cgroup path sed -i 's/\("linux": {\)/\1\n "cgroupsPath": "\/runc-cgroups-integration-test",/' ${BUSYBOX_BUNDLE}/config.json diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats index 34d1b0363f7..e91fd651439 100644 --- a/tests/integration/checkpoint.bats +++ b/tests/integration/checkpoint.bats @@ -12,7 +12,8 @@ function teardown() { } @test "checkpoint and restore" { - requires criu + # XXX: currently criu require root containers. + requires criu root # criu does not work with external terminals so.. # setting terminal and root:readonly: to false diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index e4c2cb93611..654833321b3 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -40,6 +40,9 @@ CGROUP_CPU_BASE_PATH=$(grep "cgroup" /proc/self/mountinfo | gawk 'toupper($NF) ~ KMEM="${CGROUP_MEMORY_BASE_PATH}/memory.kmem.limit_in_bytes" RT_PERIOD="${CGROUP_CPU_BASE_PATH}/cpu.rt_period_us" +# Check if we're in rootless mode. +ROOTLESS=$(id -u) + # Wrapper for runc. function runc() { run __runc "$@" @@ -68,7 +71,12 @@ function requires() { case $var in criu) if [ ! -e "$CRIU" ]; then - skip "Test requires ${var}." + skip "test requires ${var}" + fi + ;; + root) + if [ "$ROOTLESS" -ne 0 ]; then + skip "test requires ${var}" fi ;; cgroups_kmem) diff --git a/tests/integration/kill.bats b/tests/integration/kill.bats index a049de65708..74246fadbad 100644 --- a/tests/integration/kill.bats +++ b/tests/integration/kill.bats @@ -13,7 +13,6 @@ function teardown() { @test "kill detached busybox" { - # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] diff --git a/tests/integration/pause.bats b/tests/integration/pause.bats index 2f46a6cae07..30d98b57dbb 100644 --- a/tests/integration/pause.bats +++ b/tests/integration/pause.bats @@ -12,6 +12,9 @@ function teardown() { } @test "runc pause and resume" { + # XXX: currently cgroups require root containers. + requires root + # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] @@ -34,6 +37,9 @@ function teardown() { } @test "runc pause and resume with nonexist container" { + # XXX: currently cgroups require root containers. + requires root + # run test_busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] diff --git a/tests/integration/update.bats b/tests/integration/update.bats index 9aaf1b9c35f..4a6bf7fc492 100644 --- a/tests/integration/update.bats +++ b/tests/integration/update.bats @@ -50,7 +50,11 @@ function check_cgroup_value() { # TODO: test rt cgroup updating @test "update" { - requires cgroups_kmem + # XXX: currently cgroups require root containers. + # XXX: Also, this test should be split into separate sections so that we + # can skip kmem without skipping update tests overall. + requires cgroups_kmem root + # run a few busyboxes detached runc run -d --console-socket $CONSOLE_SOCKET test_update [ "$status" -eq 0 ] From ba38383a394b4b616b13ceab8f009bbc7a380f66 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 11 May 2016 17:45:00 +1000 Subject: [PATCH 8/8] tests: add rootless integration tests This adds targets for rootless integration tests, as well as all of the required setup in order to get the tests to run. This includes quite a few changes, because of a lot of assumptions about things running as root within the bats scripts (which is not true when setting up rootless containers). Signed-off-by: Aleksa Sarai --- Dockerfile | 7 +++++ Makefile | 11 +++++-- tests/integration/checkpoint.bats | 5 ++-- tests/integration/delete.bats | 2 ++ tests/integration/events.bats | 12 ++++++++ tests/integration/exec.bats | 3 ++ tests/integration/help.bats | 1 + tests/integration/helpers.bash | 23 +++++++++++---- tests/integration/ps.bats | 11 ++++++- tests/integration/spec.bats | 4 +-- tests/integration/start_detached.bats | 3 ++ tests/integration/start_hello.bats | 3 ++ tests/integration/state.bats | 42 ++++++++++++++++++++------- tests/integration/tty.bats | 14 +++++++++ 14 files changed, 117 insertions(+), 24 deletions(-) diff --git a/Dockerfile b/Dockerfile index c971448ccbc..fd9be94c098 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,7 @@ RUN echo 'deb http://httpredir.debian.org/debian jessie-backports main' > /etc/a RUN apt-get update && apt-get install -y \ build-essential \ curl \ + sudo \ gawk \ iptables \ jq \ @@ -22,6 +23,12 @@ RUN apt-get update && apt-get install -y \ --no-install-recommends \ && apt-get clean +# Add a dummy user for the rootless integration tests. While runC does +# not require an entry in /etc/passwd to operate, one of the tests uses +# `git clone` -- and `git clone` does not allow you to clone a +# repository if the current uid does not have an entry in /etc/passwd. +RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless + # install bats RUN cd /tmp \ && git clone https://github.com/sstephenson/bats.git \ diff --git a/Makefile b/Makefile index 5fff5151bf1..1cecca176f8 100644 --- a/Makefile +++ b/Makefile @@ -79,10 +79,10 @@ runcimage: docker build -t $(RUNC_IMAGE) . test: - make unittest integration + make unittest integration rootlessintegration localtest: - make localunittest localintegration + make localunittest localintegration localrootlessintegration unittest: runcimage docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest @@ -96,6 +96,13 @@ integration: runcimage localintegration: all bats -t tests/integration${TESTFLAGS} +rootlessintegration: runcimage + docker run -e TESTFLAGS -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) --cap-drop=ALL -u rootless $(RUNC_IMAGE) make localintegration + +# FIXME: This should not be separate from rootlessintegration's method of running. +localrootlessintegration: all + sudo -u rootless -H PATH="${PATH}" bats -t tests/integration${TESTFLAGS} + shell: all docker run -e TESTFLAGS -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats index e91fd651439..cd969a80695 100644 --- a/tests/integration/checkpoint.bats +++ b/tests/integration/checkpoint.bats @@ -59,8 +59,9 @@ function teardown() { [[ "${output}" == *"running"* ]] } -@test "checkpoint(pre-dump) and restore" { - requires criu +@test "checkpoint --pre-dump and restore" { + # XXX: currently criu require root containers. + requires criu root # criu does not work with external terminals so.. # setting terminal and root:readonly: to false diff --git a/tests/integration/delete.bats b/tests/integration/delete.bats index cdadd7dcce5..2c11e79b691 100644 --- a/tests/integration/delete.bats +++ b/tests/integration/delete.bats @@ -22,11 +22,13 @@ function teardown() { testcontainer test_busybox running runc kill test_busybox KILL + [ "$status" -eq 0 ] # wait for busybox to be in the destroyed state retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" # delete test_busybox runc delete test_busybox + [ "$status" -eq 0 ] runc state test_busybox [ "$status" -ne 0 ] diff --git a/tests/integration/events.bats b/tests/integration/events.bats index 182b721b8af..23500733b45 100644 --- a/tests/integration/events.bats +++ b/tests/integration/events.bats @@ -12,6 +12,9 @@ function teardown() { } @test "events --stats" { + # XXX: currently cgroups require root containers. + requires root + # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] @@ -27,6 +30,9 @@ function teardown() { } @test "events --interval default " { + # XXX: currently cgroups require root containers. + requires root + # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] @@ -54,6 +60,9 @@ function teardown() { } @test "events --interval 1s " { + # XXX: currently cgroups require root containers. + requires root + # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] @@ -80,6 +89,9 @@ function teardown() { } @test "events --interval 100ms " { + # XXX: currently cgroups require root containers. + requires root + # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] diff --git a/tests/integration/exec.bats b/tests/integration/exec.bats index ba60ea17183..f172f9bd88f 100644 --- a/tests/integration/exec.bats +++ b/tests/integration/exec.bats @@ -112,6 +112,9 @@ function teardown() { } @test "runc exec --user" { + # --user can't work in rootless containers + requires root + # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] diff --git a/tests/integration/help.bats b/tests/integration/help.bats index ca404f342af..163de2d35cf 100644 --- a/tests/integration/help.bats +++ b/tests/integration/help.bats @@ -57,6 +57,7 @@ load helpers [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ resume+ ]] + # We don't use runc_spec here, because we're just testing the help page. runc spec -h [ "$status" -eq 0 ] [[ ${lines[1]} =~ runc\ spec+ ]] diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index 654833321b3..fc8c2904223 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -4,7 +4,7 @@ INTEGRATION_ROOT=$(dirname "$(readlink -f "$BASH_SOURCE")") RUNC="${INTEGRATION_ROOT}/../../runc" RECVTTY="${INTEGRATION_ROOT}/../../contrib/cmd/recvtty/recvtty" -GOPATH="${INTEGRATION_ROOT}/../../../.." +GOPATH="$(mktemp -d --tmpdir runc-integration-gopath.XXXXXX)" # Test data path. TESTDATA="${INTEGRATION_ROOT}/testdata" @@ -27,7 +27,7 @@ KERNEL_MINOR="${KERNEL_VERSION#$KERNEL_MAJOR.}" KERNEL_MINOR="${KERNEL_MINOR%%.*}" # Root state path. -ROOT="$BATS_TMPDIR/runc" +ROOT=$(mktemp -d "$BATS_TMPDIR/runc.XXXXXX") # Path to console socket. CONSOLE_SOCKET="$BATS_TMPDIR/console.sock" @@ -58,6 +58,17 @@ function __runc() { "$RUNC" --root "$ROOT" "$@" } +# Wrapper for runc spec. +function runc_spec() { + local args="" + + if [ "$ROOTLESS" -ne 0 ]; then + args+="--rootless" + fi + + runc spec $args "$@" +} + # Fails the current test, providing the error given. function fail() { echo "$@" >&2 @@ -187,18 +198,18 @@ function setup_busybox() { if [ ! -e $BUSYBOX_IMAGE ]; then curl -o $BUSYBOX_IMAGE -sSL 'https://github.com/docker-library/busybox/raw/a0558a9006ce0dd6f6ec5d56cfd3f32ebeeb815f/glibc/busybox.tar.xz' fi - tar -C "$BUSYBOX_BUNDLE"/rootfs -xf "$BUSYBOX_IMAGE" + tar --exclude './dev/*' -C "$BUSYBOX_BUNDLE"/rootfs -xf "$BUSYBOX_IMAGE" cd "$BUSYBOX_BUNDLE" - runc spec + runc_spec } function setup_hello() { setup_recvtty run mkdir "$HELLO_BUNDLE" run mkdir "$HELLO_BUNDLE"/rootfs - tar -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE" + tar --exclude './dev/*' -C "$HELLO_BUNDLE"/rootfs -xf "$HELLO_IMAGE" cd "$HELLO_BUNDLE" - runc spec + runc_spec sed -i 's;"sh";"/hello";' config.json } diff --git a/tests/integration/ps.bats b/tests/integration/ps.bats index 7a200150daa..c000af66304 100644 --- a/tests/integration/ps.bats +++ b/tests/integration/ps.bats @@ -12,6 +12,9 @@ function teardown() { } @test "ps" { + # ps is not supported, it requires cgroups + requires root + # start busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] @@ -24,10 +27,13 @@ function teardown() { runc ps test_busybox [ "$status" -eq 0 ] [[ ${lines[0]} =~ UID\ +PID\ +PPID\ +C\ +STIME\ +TTY\ +TIME\ +CMD+ ]] - [[ "${lines[1]}" == *"root"*[0-9]* ]] + [[ "${lines[1]}" == *"$(id -un 2>/dev/null)"*[0-9]* ]] } @test "ps -f json" { + # ps is not supported, it requires cgroups + requires root + # start busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] @@ -43,6 +49,9 @@ function teardown() { } @test "ps -e -x" { + # ps is not supported, it requires cgroups + requires root + # start busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] diff --git a/tests/integration/spec.bats b/tests/integration/spec.bats index 79bb6907651..e9f28fbfdd2 100644 --- a/tests/integration/spec.bats +++ b/tests/integration/spec.bats @@ -26,7 +26,7 @@ function teardown() { [ ! -e config.json ] # test generation of spec does not return an error - runc spec + runc_spec [ "$status" -eq 0 ] # test generation of spec created our config.json (spec) @@ -51,7 +51,7 @@ function teardown() { [ ! -e "$HELLO_BUNDLE"/config.json ] # test generation of spec does not return an error - runc spec --bundle "$HELLO_BUNDLE" + runc_spec --bundle "$HELLO_BUNDLE" [ "$status" -eq 0 ] # test generation of spec created our config.json (spec) diff --git a/tests/integration/start_detached.bats b/tests/integration/start_detached.bats index 605fde225a7..08036ddd9e2 100644 --- a/tests/integration/start_detached.bats +++ b/tests/integration/start_detached.bats @@ -23,6 +23,9 @@ function teardown() { } @test "runc run detached ({u,g}id != 0)" { + # cannot start containers as another user in rootless setup + requires root + # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. sed -i 's;"uid": 0;"uid": 1000;g' config.json diff --git a/tests/integration/start_hello.bats b/tests/integration/start_hello.bats index 6de65e07e71..2e935728085 100644 --- a/tests/integration/start_hello.bats +++ b/tests/integration/start_hello.bats @@ -21,6 +21,9 @@ function teardown() { } @test "runc run ({u,g}id != 0)" { + # cannot start containers as another user in rootless setup + requires root + # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. sed -i 's;"uid": 0;"uid": 1000;g' config.json diff --git a/tests/integration/state.bats b/tests/integration/state.bats index eed2eb3c4ac..3772c1e5a91 100644 --- a/tests/integration/state.bats +++ b/tests/integration/state.bats @@ -11,7 +11,37 @@ function teardown() { teardown_busybox } -@test "state" { +@test "state (kill + delete)" { + runc state test_busybox + [ "$status" -ne 0 ] + + # run busybox detached + runc run -d --console-socket $CONSOLE_SOCKET test_busybox + [ "$status" -eq 0 ] + + # check state + wait_for_container 15 1 test_busybox + + testcontainer test_busybox running + + runc kill test_busybox KILL + [ "$status" -eq 0 ] + + # wait for busybox to be in the destroyed state + retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" + + # delete test_busybox + runc delete test_busybox + [ "$status" -eq 0 ] + + runc state test_busybox + [ "$status" -ne 0 ] +} + +@test "state (pause + resume)" { + # XXX: pause and resume require cgroups. + requires root + runc state test_busybox [ "$status" -ne 0 ] @@ -37,14 +67,4 @@ function teardown() { # test state of busybox is back to running testcontainer test_busybox running - - runc kill test_busybox KILL - # wait for busybox to be in the destroyed state - retry 10 1 eval "__runc state test_busybox | grep -q 'stopped'" - - # delete test_busybox - runc delete test_busybox - - runc state test_busybox - [ "$status" -ne 0 ] } diff --git a/tests/integration/tty.bats b/tests/integration/tty.bats index b9a1f108e20..9e817dbf873 100644 --- a/tests/integration/tty.bats +++ b/tests/integration/tty.bats @@ -24,6 +24,10 @@ function teardown() { } @test "runc run [tty owner]" { + # tty chmod is not doable in rootless containers. + # TODO: this can be made as a change to the gid test. + requires root + # Replace sh script with stat. sed -i 's/"sh"/"sh", "-c", "stat -c %u:%g $(tty) | tr : \\\\\\\\n"/' config.json @@ -36,6 +40,9 @@ function teardown() { } @test "runc run [tty owner] ({u,g}id != 0)" { + # tty chmod is not doable in rootless containers. + requires root + # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. sed -i 's;"uid": 0;"uid": 1000;g' config.json @@ -72,6 +79,10 @@ function teardown() { } @test "runc exec [tty owner]" { + # tty chmod is not doable in rootless containers. + # TODO: this can be made as a change to the gid test. + requires root + # run busybox detached runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] @@ -90,6 +101,9 @@ function teardown() { } @test "runc exec [tty owner] ({u,g}id != 0)" { + # tty chmod is not doable in rootless containers. + requires root + # replace "uid": 0 with "uid": 1000 # and do a similar thing for gid. sed -i 's;"uid": 0;"uid": 1000;g' config.json