Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[rootless] Support detach netns #2535

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,30 +115,51 @@ jobs:
- ubuntu: 20.04
containerd: v1.6.24
target: test-integration-rootless
rootlesskit: v1.1.1
- ubuntu: 20.04
containerd: v1.7.7
target: test-integration-rootless
rootlesskit: v1.1.1
- ubuntu: 22.04
containerd: v1.7.7
target: test-integration-rootless
rootlesskit: v1.1.1
- ubuntu: 22.04
containerd: main
target: test-integration-rootless
rootlesskit: v1.1.1
- ubuntu: 20.04
containerd: v1.7.7
target: test-integration-rootless
rootlesskit: v2.0.0-alpha.0
- ubuntu: 22.04
containerd: v1.7.7
target: test-integration-rootless
rootlesskit: v2.0.0-alpha.0
- ubuntu: 22.04
containerd: main
target: test-integration-rootless
rootlesskit: v2.0.0-alpha.0
- ubuntu: 20.04
containerd: v1.6.24
target: test-integration-rootless-port-slirp4netns
rootlesskit: v1.1.1
- ubuntu: 20.04
containerd: v1.7.7
target: test-integration-rootless-port-slirp4netns
rootlesskit: v1.1.1
- ubuntu: 22.04
containerd: v1.7.7
target: test-integration-rootless-port-slirp4netns
rootlesskit: v1.1.1
- ubuntu: 22.04
containerd: main
target: test-integration-rootless-port-slirp4netns
rootlesskit: v1.1.1
env:
UBUNTU_VERSION: "${{ matrix.ubuntu }}"
CONTAINERD_VERSION: "${{ matrix.containerd }}"
ROOTLESSKIT_VERSION: "${{ matrix.rootlesskit }}"
TEST_TARGET: "${{ matrix.target }}"
steps:
- uses: actions/checkout@v4.1.1
Expand All @@ -147,7 +168,7 @@ jobs:
- name: "Register QEMU (tonistiigi/binfmt)"
run: docker run --privileged --rm tonistiigi/binfmt --install all
- name: "Prepare (network driver=slirp4netns, port driver=builtin)"
run: DOCKER_BUILDKIT=1 docker build -t ${TEST_TARGET} --target ${TEST_TARGET} --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} .
run: DOCKER_BUILDKIT=1 docker build -t ${TEST_TARGET} --target ${TEST_TARGET} --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} --build-arg ROOTLESSKIT_VERSION=${ROOTLESSKIT_VERSION} .
fahedouch marked this conversation as resolved.
Show resolved Hide resolved
- name: "Test (network driver=slirp4netns, port driver=builtin)"
run: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=1 ${TEST_TARGET}

Expand Down
6 changes: 6 additions & 0 deletions Dockerfile.d/SHA256SUMS.d/rootlesskit-v2.0.0-alpha.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
74baab4363ff68d060c788605f5bc5979d3c662f0351b033ae874a3a72e6ebce rootlesskit-aarch64.tar.gz
630dce1a26263d6a7a9461656f3e004c63386b7d4ca71fdaaa37cd075e0f677c rootlesskit-armv7l.tar.gz
4377eb874bb202b7a00354b0924898de81d818753ac730cee8d16262eadd5617 rootlesskit-ppc64le.tar.gz
92861409fa4db5e8344a1b5409ea4e5cb47fa7db706b4647ff627e15bc806ffc rootlesskit-riscv64.tar.gz
5ea02fba90e5656660aa7eca66aece2b5c3207e01d147495da2f55cfb4726663 rootlesskit-s390x.tar.gz
3db2ac3022efc7d030f48fb60a0d568e9dcf8700bb3e0c926e02a4b080caa629 rootlesskit-x86_64.tar.gz
12 changes: 12 additions & 0 deletions extras/rootless/containerd-rootless.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,17 @@ if [ -z $_CONTAINERD_ROOTLESS_CHILD ]; then
export _CONTAINERD_ROOTLESS_SELINUX
fi
fi

detachNetns=
if command -v rootlesskit >/dev/null 2>&1; then
# If --detach-netns is present in --help, rootlesskit is >= v2.0.0.
if rootlesskit --help | grep -qw -- --detach-netns; then
detachNetns="--detach-netns"
else
echo "rootlesskit found but seems older than v2.0.0. Network namespace will kept attached."
fi
fi

# Re-exec the script via RootlessKit, so as to create unprivileged {user,mount,network} namespaces.
#
# --copy-up allows removing/creating files in the directories by creating tmpfs and symlinks
Expand All @@ -116,6 +127,7 @@ if [ -z $_CONTAINERD_ROOTLESS_CHILD ]; then
# * /run: copy-up is required so that we can create /run/containerd (hardcoded) in our namespace
# * /var/lib: copy-up is required so that we can create /var/lib/containerd in our namespace
exec rootlesskit \
$detachNetns \
--state-dir=$CONTAINERD_ROOTLESS_ROOTLESSKIT_STATE_DIR \
--net=$net --mtu=$mtu \
--slirp4netns-sandbox=$CONTAINERD_ROOTLESS_ROOTLESSKIT_SLIRP4NETNS_SANDBOX \
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ require (
github.com/pelletier/go-toml/v2 v2.1.0
github.com/rootless-containers/bypass4netns v0.3.0
github.com/rootless-containers/rootlesskit v1.1.1
github.com/rootless-containers/rootlesskit/v2 v2.0.0-alpha.1
github.com/spf13/cobra v1.7.0
github.com/spf13/pflag v1.0.5
github.com/tidwall/gjson v1.17.0
Expand Down Expand Up @@ -84,7 +85,7 @@ require (
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/google/uuid v1.3.1 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/klauspost/compress v1.17.2
Expand Down
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20230323073829-e72429f035bd h1:r8yyd+DJDmsUhGrRBxH5Pj7KeFK5l+Y3FsgT8keqKtk=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
Expand Down Expand Up @@ -263,6 +263,8 @@ github.com/rootless-containers/bypass4netns v0.3.0 h1:UwI55zWDZz7OGyN4YWgfCKdsI5
github.com/rootless-containers/bypass4netns v0.3.0/go.mod h1:IXHPjkQlJRygNBCN0hSSR3ITX6kDKr3aAaGHx6APd+g=
github.com/rootless-containers/rootlesskit v1.1.1 h1:F5psKWoWY9/VjZ3ifVcaosjvFZJOagX85U22M0/EQZE=
github.com/rootless-containers/rootlesskit v1.1.1/go.mod h1:UD5GoA3dqKCJrnvnhVgQQnweMF2qZnf9KLw8EewcMZI=
github.com/rootless-containers/rootlesskit/v2 v2.0.0-alpha.1 h1:EUh0kAOAmbKw2wlrYDvMgqrOix+XmPP6S8ouAxBb1fM=
github.com/rootless-containers/rootlesskit/v2 v2.0.0-alpha.1/go.mod h1:TOmphx2+hH4/98eGg0/ZXVcU8KWcvfymtQnt7Y2XSp0=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
Expand Down
17 changes: 16 additions & 1 deletion pkg/cmd/container/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import (
"github.com/containerd/nerdctl/pkg/namestore"
"github.com/containerd/nerdctl/pkg/platformutil"
"github.com/containerd/nerdctl/pkg/referenceutil"
"github.com/containerd/nerdctl/pkg/rootlessutil"
"github.com/containerd/nerdctl/pkg/strutil"
dockercliopts "github.com/docker/cli/opts"
dockeropts "github.com/docker/docker/opts"
Expand Down Expand Up @@ -282,6 +283,20 @@ func Create(ctx context.Context, client *containerd.Client, args []string, netMa

opts = append(opts, propagateContainerdLabelsToOCIAnnotations())

detachNetNs, err := rootlessutil.DetectRootlesskitFeature("--detach-netns")
if err != nil {
return nil, nil, err
}
if rootlessutil.IsRootlessChild() && detachNetNs {
stateDir, err := rootlessutil.RootlessKitStateDir()
if err != nil {
return nil, nil, err
}
if err := newContainerDetachNetNs(stateDir, id, &opts); err != nil {
return nil, nil, err
}
}

var s specs.Spec
spec := containerd.WithSpec(&s, opts...)

Expand Down Expand Up @@ -418,7 +433,7 @@ func withNerdctlOCIHook(cmd string, args []string) (oci.SpecOpts, error) {
args = append([]string{cmd}, append(args, "internal", "oci-hook")...)
return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
if s.Hooks == nil {
s.Hooks = &specs.Hooks{}
s.Hooks = new(specs.Hooks)
}
crArgs := append(args, "createRuntime")
s.Hooks.CreateRuntime = append(s.Hooks.CreateRuntime, specs.Hook{
Expand Down
41 changes: 41 additions & 0 deletions pkg/cmd/container/create_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
Copyright The containerd Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package container

import (
"fmt"
"path/filepath"

"github.com/containerd/containerd/oci"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/rootless-containers/rootlesskit/v2/pkg/child"
)

func newContainerDetachNetNs(stateDir, id string, opts *[]oci.SpecOpts) error {
return ns.WithNetNSPath(filepath.Join(stateDir, "netns"), func(_ ns.NetNS) error {
containerDetachNetNs := filepath.Join(stateDir, fmt.Sprintf("netns-%s", id))
if err := child.NewNetNsWithPathWithoutEnter(containerDetachNetNs); err != nil {
return err
}
*opts = append(*opts, oci.WithLinuxNamespace(specs.LinuxNamespace{
Type: specs.NetworkNamespace,
Path: containerDetachNetNs,
}))
return nil
})
}
26 changes: 26 additions & 0 deletions pkg/cmd/container/create_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//go:build !linux

/*
Copyright The containerd Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package container

import "github.com/containerd/containerd/oci"

func newContainerDetachNetNs(_, _ string, _ *[]oci.SpecOpts) error {
//no op for !linux
return nil
}
32 changes: 28 additions & 4 deletions pkg/ocihook/ocihook.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import (
"path/filepath"
"strings"

"runtime"

gocni "github.com/containerd/go-cni"
"github.com/containerd/log"
"github.com/containerd/nerdctl/pkg/bypass4netnsutil"
Expand All @@ -38,9 +40,9 @@ import (
"github.com/containerd/nerdctl/pkg/rootlessutil"
types100 "github.com/containernetworking/cni/pkg/types/100"
"github.com/opencontainers/runtime-spec/specs-go"

b4nndclient "github.com/rootless-containers/bypass4netns/pkg/api/daemon/client"
rlkclient "github.com/rootless-containers/rootlesskit/pkg/api/client"
"github.com/vishvananda/netns"
)

const (
Expand Down Expand Up @@ -86,6 +88,24 @@ func Run(stdin io.Reader, stderr io.Writer, event, dataStore, cniPath, cniNetcon
return err
}

detachNetNs, err := rootlessutil.DetectRootlesskitFeature("--detach-netns")
if err != nil {
return err
}
if rootlessutil.IsRootlessChild() && detachNetNs {
stateDir, err := rootlessutil.RootlessKitStateDir()
if err != nil {
return err
}
ns, err := netns.GetFromPath(filepath.Join(stateDir, "netns"))
if err != nil {
return err
}
if err = netns.Set(ns); err != nil {
return fmt.Errorf("switch to detached netns: %w", err)
}
}

switch event {
case "createRuntime":
return onCreateRuntime(opts)
Expand Down Expand Up @@ -268,16 +288,15 @@ func getExtraHosts(state *specs.State) (map[string]string, error) {

func getNetNSPath(state *specs.State) (string, error) {
// If we have a network-namespace annotation we use it over the passed Pid.
netNsPath, netNsFound := state.Annotations[NetworkNamespace]
if netNsFound {
if netNsPath, netNsFound := state.Annotations[NetworkNamespace]; netNsFound {
if _, err := os.Stat(netNsPath); err != nil {
return "", err
}

return netNsPath, nil
}

if state.Pid == 0 && !netNsFound {
if state.Pid == 0 {
return "", errors.New("both state.Pid and the netNs annotation are unset")
}

Expand Down Expand Up @@ -403,10 +422,15 @@ func onCreateRuntime(opts *handlerOpts) error {
ExtraHosts: opts.extraHosts,
Name: opts.state.Annotations[labels.Name],
}
runtime.LockOSThread()
// nsents verified here we are in detached netwoprk ns
// nsPath verified is pointing to the nested detached ns
// user ns is the detch user ns
cniRes, err := opts.cni.Setup(ctx, opts.fullID, nsPath, namespaceOpts...)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

 time="2023-10-29T13:30:10Z" level=fatal msg="failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: error running hook #0: error running hook: exit status 1, stdout: , stderr: time=\"2023-10-29T13:30:10Z\" level=fatal msg=\"failed to call cni.Setup: plugin type=\\\"firewall\\\" failed (add): failed to list iptables chains: running [/usr/sbin/iptables -t filter -S --wait]: exit status 3: iptables v1.8.4 (legacy): can't initialize iptables table `filter': Permission denied (you must be root)\\nPerhaps iptables or your kernel needs to be upgraded.\\n\"\nFailed to write to log, write /home/rootless/.local/share/nerdctl/1935db59/containers/nerdctl-test/0bb9ad1b47ae32ef3ffda72250edcf338ccbadf8d9572d9503e67faded379d57/oci-hook.createRuntime.log: file already closed: unknown"

https://github.com/containerd/nerdctl/actions/runs/6683660354/job/18160087021?pr=2535

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you dump /proc/self/status right after netns.Set to see if CAP_NET_ADMIN is gained in the namespaces?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we need to re-exec the ocihook process to gain the caps

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

@fahedouch fahedouch Oct 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you dump /proc/self/status right after netns.Set to see if CAP_NET_ADMIN is gained in the namespaces?

SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000000000
SigIgn: 0000000000000000
SigCgt: fffffffd7fc1feff
CapInh: 0000000000000000
CapPrm: 000001ffffffffff
CapEff: 000001ffffffffff
CapBnd: 000001ffffffffff
CapAmb: 0000000000000000
 capsh --decode=000001ffffffffff
0x000001ffffffffff=cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read,cap_perfmon,cap_bpf,cap_checkpoint_restore

@AkihiroSuda cap_net_admin is already present right after netns.Set

Copy link
Member Author

@fahedouch fahedouch Oct 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

	if rootlessutil.IsRootlessChild() && detachNetNs {
		stateDir, err := rootlessutil.RootlessKitStateDir()
		if err != nil {
			return err
		}
		ns, err := netns.GetFromPath(filepath.Join(stateDir, "netns"))
		if err != nil {
			return err
		}
		if err = netns.Set(ns); err != nil {
			return fmt.Errorf("switch to detached netns: %w", err)
		}
		ok, err := rootlessutil.HasCaps()
		if err != nil {
			return err
		}
		fmt.Println(ok)

hasCaps()
the above code is printing true

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some progress in:

We can just run the hook with nsenter -n/detached/netns -- nerdctl internal oci-hook ... and call it a day.

if err != nil {
return fmt.Errorf("failed to call cni.Setup: %w", err)
}
runtime.UnlockOSThread()
cniResRaw := cniRes.Raw()
for i, cniName := range opts.cniNames {
hsMeta.Networks[cniName] = cniResRaw[i]
Expand Down
12 changes: 11 additions & 1 deletion pkg/rootlessutil/parent_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,25 @@ func ParentMain(hostGatewayIP string) error {
if err != nil {
return err
}

// args are compatible with both util-linux nsenter and busybox nsenter
args := []string{
"-r/", // root dir (busybox nsenter wants this to be explicitly specified),
"-w" + wd, // work dir
"--preserve-credentials",
"-m", "-n", "-U",
"-m", "-U",
"-t", strconv.Itoa(childPid),
"-F", // no fork
}

detachNetNs, err := DetectRootlesskitFeature("--detach-netns")
if err != nil {
return err
}
if !detachNetNs {
args = append(args, "-n")
}

args = append(args, os.Args...)
log.L.Debugf("rootless parent main: executing %q with %v", arg0, args)

Expand Down
24 changes: 24 additions & 0 deletions pkg/rootlessutil/parent_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//go:build !linux

/*
Copyright The containerd Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package rootlessutil

func RootlessKitStateDir() (string, error) {
// no op for !linux
return "", nil
}
Loading