Skip to content

Commit

Permalink
Adjust user ns join order and become root in the ns after joined user ns
Browse files Browse the repository at this point in the history
fix: opencontainers#4466, in containerd, for user ns pod, the net and user ns has been
created before start the container, and let runc join these two ns when
starting the init process, it works for normal systems, except systems
with selinux enabling and has mount label configed.

We can resolve it with two steps:
1. Join the user ns after joined all other namespaces, there may be some
namespaces are not owned by the user namespace;
2. Should also become root in the namespace, if we have joined a user ns
path like what we do in unsharing a new user ns.

Signed-off-by: lifubang <lifubang@acmcoder.com>
  • Loading branch information
lifubang committed Oct 24, 2024
1 parent d545279 commit 09d2ef1
Showing 1 changed file with 25 additions and 4 deletions.
29 changes: 25 additions & 4 deletions libcontainer/nsenter/nsexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,13 @@ void nl_free(struct nlconfig_t *config)
free(config->data);
}

void join_namespaces(char *nslist)
/*
Join all namespaces except user ns for rootful container(issue: #4466),
we return user ns fd to left it join in the next step if there is.
*/
int join_namespaces(char *nslist, bool rootless)
{
int num = 0, i;
int num = 0, userns_fd = -1, i;
char *saveptr = NULL;
char *namespace = strtok_r(nslist, ",", &saveptr);
struct namespace_t {
Expand Down Expand Up @@ -501,6 +505,12 @@ void join_namespaces(char *nslist)
struct namespace_t *ns = &namespaces[i];
int flag = nsflag(ns->type);

if (flag == CLONE_NEWUSER) {
userns_fd = ns->fd;
if (!rootless)
continue;
}

write_log(DEBUG, "setns(%#x) into %s namespace (with path %s)", flag, ns->type, ns->path);
if (setns(ns->fd, flag) < 0)
bail("failed to setns into %s namespace", ns->type);
Expand All @@ -509,6 +519,7 @@ void join_namespaces(char *nslist)
}

free(namespaces);
return userns_fd;
}

static inline int sane_kill(pid_t pid, int signum)
Expand Down Expand Up @@ -829,6 +840,8 @@ void nsexec(void)
case STAGE_CHILD:{
pid_t stage2_pid = -1;
enum sync_t s;
int userns_fd = -1;
bool has_userns = false;

/* For debugging. */
current_stage = STAGE_CHILD;
Expand All @@ -849,7 +862,7 @@ void nsexec(void)
* using cmsg(3) but that's just annoying.
*/
if (config.namespaces)
join_namespaces(config.namespaces);
userns_fd = join_namespaces(config.namespaces, config.is_rootless_euid);

/*
* Deal with user namespaces first. They are quite special, as they
Expand All @@ -870,6 +883,7 @@ void nsexec(void)
* in some scenarios. This also mirrors how LXC deals with this
* problem.
*/
has_userns = (config.cloneflags & CLONE_NEWUSER) || userns_fd > -1;
if (config.cloneflags & CLONE_NEWUSER) {
try_unshare(CLONE_NEWUSER, "user namespace");
config.cloneflags &= ~CLONE_NEWUSER;
Expand Down Expand Up @@ -907,7 +921,14 @@ void nsexec(void)
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
bail("failed to re-set process as non-dumpable");
}

}
if (userns_fd > -1 && !config.is_rootless_euid) {
if (setns(userns_fd, CLONE_NEWUSER) < 0) {
bail("failed to join user namespace");
}
close(userns_fd);
}
if (has_userns) {
/* Become root in the namespace proper. */
if (setresuid(0, 0, 0) < 0)
bail("failed to become root in user namespace");
Expand Down

0 comments on commit 09d2ef1

Please sign in to comment.