Skip to content

Commit

Permalink
setns then cloneflags when joining user namespace
Browse files Browse the repository at this point in the history
for now, when a custom user namespace is required, we joins all existing
namespaces first before creating new ones. Idea: this can probably be
generalized so we performs all setns first and clones new namespaces after, but
probably also requires setting uid/gid maps in C.

Signed-off-by: Daniel, Dao Quang Minh <dqminh89@gmail.com>
  • Loading branch information
dqminh committed Jul 13, 2015
1 parent d07e5fc commit 652a368
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 26 deletions.
8 changes: 8 additions & 0 deletions libcontainer/configs/namespaces_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,11 @@ func (n *Namespaces) index(t NamespaceType) int {
func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}

func (n *Namespaces) PathOf(t NamespaceType) string {
i := n.index(t)
if i == -1 {
return ""
}
return (*n)[i].Path
}
40 changes: 24 additions & 16 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
}

func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
// set init process environment
env := []string{"_LIBCONTAINER_INITTYPE=standard"}
var doClone bool

cloneFlags := c.config.Namespaces.CloneFlags()
if cloneFlags&syscall.CLONE_NEWUSER != 0 {
if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil {
Expand All @@ -168,17 +172,23 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
cmd.SysProcAttr.Credential = &syscall.Credential{}
}
}
cmd.SysProcAttr.Cloneflags = cloneFlags
// if we required to create a new user namespace, delegates to golang
// implementation to be able to set uid/gid mappings in a standard way,
// otherwise do it inside nsexec by passing the clone flags because we dont
// have to perform any additional setup when start a new process.
if c.config.Namespaces.PathOf(configs.NEWUSER) == "" {
cmd.SysProcAttr.Cloneflags = cloneFlags
} else {
// let nsexec clone namespaces instead of go
doClone = true
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_CLONEFLAGS=%d",
cloneFlags))
}

// set init process environment
env := []string{"_LIBCONTAINER_INITTYPE=standard"}
var joinNamespaces configs.Namespaces
var doClone bool
nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces {
if ns.Path != "" {
nsMaps[ns.Type] = ns.Path
joinNamespaces = append(joinNamespaces, ns)
if ns.Type == configs.NEWPID {
doClone = true
}
Expand All @@ -198,13 +208,12 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
cmd.Env = append(cmd.Env, env...)

return &initProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
config: c.newInitConfig(p),
joinNamespaces: joinNamespaces,
doClone: doClone,
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
config: c.newInitConfig(p),
doClone: doClone,
}, nil
}

Expand Down Expand Up @@ -829,9 +838,8 @@ func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceTyp
configs.NEWPID,
configs.NEWNS,
}
// For now, only join user namespace if this is an exec in process and the
// container supports user namespace
if !doInit && c.config.Namespaces.Contains(configs.NEWUSER) {
// join userns if the init process explicitly requires NEWUSER
if c.config.Namespaces.Contains(configs.NEWUSER) {
nsTypes = append(nsTypes, configs.NEWUSER)
}
for _, nsType := range nsTypes {
Expand Down
91 changes: 91 additions & 0 deletions libcontainer/integration/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -916,3 +916,94 @@ func TestInitJoinPID(t *testing.T) {
t.Errorf("unexpected running process, output %q", out)
}
}

func TestInitJoinNetworkAndUser(t *testing.T) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
t.Skip("userns is unsupported")
}
if testing.Short() {
return
}
rootfs, err := newRootfs()
ok(t, err)
defer remove(rootfs)

// Execute a long-running container
config1 := newTemplateConfig(rootfs)
config1.UidMappings = []configs.IDMap{{0, 0, 1000}}
config1.GidMappings = []configs.IDMap{{0, 0, 1000}}
config1.Namespaces = append(config1.Namespaces, configs.Namespace{Type: configs.NEWUSER})
container1, err := newContainer(config1)
ok(t, err)
defer container1.Destroy()

stdinR1, stdinW1, err := os.Pipe()
ok(t, err)
init1 := &libcontainer.Process{
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR1,
}
err = container1.Start(init1)
stdinR1.Close()
defer stdinW1.Close()
ok(t, err)

// get the state of the first container
state1, err := container1.State()
ok(t, err)
netns1 := state1.NamespacePaths[configs.NEWNET]
userns1 := state1.NamespacePaths[configs.NEWUSER]

// Start a container inside the existing pidns but with different cgroups
rootfs2, err := newRootfs()
ok(t, err)
defer remove(rootfs2)

config2 := newTemplateConfig(rootfs2)
config2.UidMappings = []configs.IDMap{{0, 0, 1000}}
config2.GidMappings = []configs.IDMap{{0, 0, 1000}}
config2.Namespaces.Add(configs.NEWNET, netns1)
config2.Namespaces.Add(configs.NEWUSER, userns1)
config2.Cgroups.Name = "test2"
container2, err := newContainerWithName("testCT2", config2)
ok(t, err)
defer container2.Destroy()

stdinR2, stdinW2, err := os.Pipe()
ok(t, err)
init2 := &libcontainer.Process{
Args: []string{"cat"},
Env: standardEnvironment,
Stdin: stdinR2,
}
err = container2.Start(init2)
stdinR2.Close()
defer stdinW2.Close()
ok(t, err)
// get the state of the second container
state2, err := container2.State()
ok(t, err)

for _, ns := range []string{"net", "user"} {
ns1, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/%s", state1.InitProcessPid, ns))
ok(t, err)
ns2, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/%s", state2.InitProcessPid, ns))
ok(t, err)
if ns1 != ns2 {
t.Errorf("%s(%s), wanted %s", ns, ns2, ns1)
}
}

// check that namespaces are not the same
if reflect.DeepEqual(state2.NamespacePaths, state1.NamespacePaths) {
t.Errorf("Namespaces(%v), original %v", state2.NamespacePaths,
state1.NamespacePaths)
}
// Stop init processes one by one. Stop the second container should
// not stop the first.
stdinW2.Close()
waitProcess(init2, t)
stdinW1.Close()
waitProcess(init1, t)
}
24 changes: 20 additions & 4 deletions libcontainer/nsenter/nsexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,15 @@ int setns(int fd, int nstype)
#endif
#endif

static int clone_parent(jmp_buf * env) __attribute__ ((noinline));
static int clone_parent(jmp_buf * env)
static int clone_parent(jmp_buf * env, int flags) __attribute__ ((noinline));
static int clone_parent(jmp_buf * env, int flags)
{
struct clone_arg ca;
int child;

ca.env = env;
child = clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
child = clone(child_func, ca.stack_ptr,
CLONE_PARENT | SIGCHLD | flags, &ca);

return child;
}
Expand All @@ -81,6 +82,7 @@ void nsexec()
jmp_buf env;
char buf[PATH_MAX], *val, *nspaths;
int nsLen, child, len, pipenum, consolefd = -1;
int cloneflags;
char *console;

// _LIBCONTAINER_NSPATH if exists is a comma-separated list of namespaces
Expand All @@ -89,6 +91,20 @@ void nsexec()
if (nspaths == NULL) {
return;
}
// _LIBCONTAINER_CLONEFLAGS is set when we want nsexec to setup namespaces
// after setns. Default to 0 which means namespaces will not be created
val = getenv("_LIBCONTAINER_CLONEFLAGS");
if (val == NULL) {
cloneflags = 0;
} else {
cloneflags = atoi(val);
snprintf(buf, sizeof(buf), "%d", cloneflags);
if (strcmp(val, buf)) {
pr_perror("Unable to parse _LIBCONTAINER_CLONEFLAGS");
exit(1);
}
}

// get the init pipe to communicate with parent
val = getenv("_LIBCONTAINER_INITPIPE");
if (val == NULL) {
Expand Down Expand Up @@ -178,7 +194,7 @@ void nsexec()
// We must fork to actually enter the PID namespace, use CLONE_PARENT
// so the child can have the right parent, and we don't need to forward
// the child's exit code or resend its death signal.
child = clone_parent(&env);
child = clone_parent(&env, cloneflags);
if (child < 0) {
pr_perror("Unable to fork");
exit(1);
Expand Down
8 changes: 2 additions & 6 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"syscall"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
)

Expand Down Expand Up @@ -167,10 +166,7 @@ type initProcess struct {
container *linuxContainer
fds []string

// joinNamespaces are additional namespaces that the init process will join
// instead of creating new ones
joinNamespaces configs.Namespaces
doClone bool
doClone bool
}

func (p *initProcess) pid() int {
Expand Down Expand Up @@ -217,7 +213,7 @@ func (p *initProcess) start() error {
return newSystemError(err)
}
// if we need to clone a new child process
if len(p.joinNamespaces) > 0 && p.doClone {
if p.doClone {
if err := p.execSetns(); err != nil {
return newSystemError(err)
}
Expand Down

0 comments on commit 652a368

Please sign in to comment.