diff --git a/Dockerfile.tests b/Dockerfile.tests index f4d2ad0..3deea7f 100644 --- a/Dockerfile.tests +++ b/Dockerfile.tests @@ -1,20 +1,20 @@ -ARG LIBSLIRP_COMMIT=v4.3.0 - -FROM ubuntu:18.04 AS build -RUN apt update && apt install -y automake autotools-dev make gcc libglib2.0-dev libcap-dev libseccomp-dev git ninja-build python3-pip -RUN pip3 install meson -RUN git clone https://gitlab.freedesktop.org/slirp/libslirp.git /libslirp -WORKDIR /libslirp -ARG LIBSLIRP_COMMIT -RUN git pull && git checkout ${LIBSLIRP_COMMIT} && meson setup build && ninja -C build install -COPY . /slirp4netns -WORKDIR /slirp4netns -RUN chown -R 1000:1000 /slirp4netns -USER 1000:1000 -RUN ./autogen.sh && ./configure && make -j $(nproc) - -FROM build AS test -USER 0 -RUN apt update && apt install -y git libtool iproute2 clang clang-format clang-tidy iputils-ping iperf3 nmap jq -USER 1000:1000 -CMD ["make", "ci"] +ARG LIBSLIRP_COMMIT=v4.3.0 + +FROM ubuntu:18.04 AS build +RUN apt update && apt install -y automake autotools-dev make gcc libglib2.0-dev libcap-dev libseccomp-dev git ninja-build python3-pip +RUN pip3 install meson +RUN git clone https://gitlab.freedesktop.org/slirp/libslirp.git /libslirp +WORKDIR /libslirp +ARG LIBSLIRP_COMMIT +RUN git pull && git checkout ${LIBSLIRP_COMMIT} && meson setup build && ninja -C build install +COPY . /slirp4netns +WORKDIR /slirp4netns +RUN chown -R 1000:1000 /slirp4netns +USER 1000:1000 +RUN ./autogen.sh && ./configure && make -j $(nproc) + +FROM build AS test +USER 0 +RUN apt update && apt install -y git libtool iproute2 clang clang-format clang-tidy iputils-ping iperf3 nmap jq +USER 1000:1000 +CMD ["make", "ci"] diff --git a/Makefile.am b/Makefile.am index 0b8e2e8..1ac7485 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,68 +1,68 @@ -bin_PROGRAMS = slirp4netns - -AM_CFLAGS = @GLIB_CFLAGS@ @SLIRP_CFLAGS@ @LIBCAP_CFLAGS@ @LIBSECCOMP_CFLAGS@ - -noinst_LIBRARIES = libparson.a - -AM_TESTS_ENVIRONMENT = PATH="$(abs_top_builddir):$(PATH)" -TESTS = tests/test-slirp4netns.sh tests/test-slirp4netns-configure.sh tests/test-slirp4netns-exit-fd.sh tests/test-slirp4netns-ready-fd.sh tests/test-slirp4netns-api-socket.sh tests/test-slirp4netns-disable-host-loopback.sh tests/test-slirp4netns-cidr.sh - -EXTRA_DIST = \ - slirp4netns.1.md \ - slirp4netns.1 \ - $(TESTS) \ - tests/common.sh \ - slirp4netns.h \ - api.h \ - sandbox.h \ - seccompfilter.h \ - tests/slirp4netns-no-unmount.sh \ - vendor/parson/LICENSE \ - vendor/parson/README.md \ - vendor/parson/parson.h - -# define specific commit if git available or it was replaced during git-archive creation -COMMIT := $(shell V=$Format:%H$ ; \ - expr match "$$V" ormat: >/dev/null \ - && (cd "$$abs_srcdir" && [ -d .git ] && git describe --always --abbrev=0 --dirty --exclude=\* || echo unknown) \ - || echo "$$V" ) -DEFINE_COMMIT = -DCOMMIT="\"$(COMMIT)\"" - -slirp4netns_CFLAGS = $(AM_CFLAGS) $(DEFINE_COMMIT) -libparson_a_CFLAGS = $(AM_CFLAGS) -I$(abs_top_builddir)/vendor/parson -libparson_a_SOURCES = vendor/parson/parson.c - -slirp4netns_SOURCES = main.c slirp4netns.c api.c sandbox.c seccompfilter.c -slirp4netns_LDADD = libparson.a @GLIB_LIBS@ @SLIRP_LIBS@ @LIBSECCOMP_LIBS@ -lpthread -man1_MANS = slirp4netns.1 - -generate-man: - go-md2man -in slirp4netns.1.md -out slirp4netns.1 - -CLANGTIDY = clang-tidy -warnings-as-errors='*' - -CLANGFORMAT = clang-format - -lint: - $(CLANGTIDY) $(slirp4netns_SOURCES) -- $(AM_CFLAGS) - -lint-full: - $(CLANGTIDY) $(slirp4netns_SOURCES) $(libparson_a_SOURCES) -- $(AM_CFLAGS) - -indent: - $(CLANGFORMAT) -i $(slirp4netns_SOURCES) - -benchmark: - benchmarks/benchmark-iperf3.sh - benchmarks/benchmark-iperf3-reverse.sh - -ci: - $(MAKE) indent - git diff --exit-code -# TODO: make sure ./vendor is synced with ./vendor.sh - $(MAKE) lint - $(MAKE) -j $(shell nproc) distcheck || ( find . -name test-suite.log | xargs cat; exit 1 ) - PATH=$(shell pwd):$$PATH $(MAKE) benchmark MTU=1500 - PATH=$(shell pwd):$$PATH $(MAKE) benchmark MTU=65520 - -.PHONY: generate-man lint lint-full indent benchmark ci +bin_PROGRAMS = slirp4netns + +AM_CFLAGS = @GLIB_CFLAGS@ @SLIRP_CFLAGS@ @LIBCAP_CFLAGS@ @LIBSECCOMP_CFLAGS@ + +noinst_LIBRARIES = libparson.a + +AM_TESTS_ENVIRONMENT = PATH="$(abs_top_builddir):$(PATH)" +TESTS = tests/test-slirp4netns.sh tests/test-slirp4netns-configure.sh tests/test-slirp4netns-exit-fd.sh tests/test-slirp4netns-ready-fd.sh tests/test-slirp4netns-api-socket.sh tests/test-slirp4netns-disable-host-loopback.sh tests/test-slirp4netns-cidr.sh tests/test-slirp4netns-outbound-addr.sh tests/test-slirp4netns-disable-dns.sh + +EXTRA_DIST = \ + slirp4netns.1.md \ + slirp4netns.1 \ + $(TESTS) \ + tests/common.sh \ + slirp4netns.h \ + api.h \ + sandbox.h \ + seccompfilter.h \ + tests/slirp4netns-no-unmount.sh \ + vendor/parson/LICENSE \ + vendor/parson/README.md \ + vendor/parson/parson.h + +# define specific commit if git available or it was replaced during git-archive creation +COMMIT := $(shell V=$Format:%H$ ; \ + expr match "$$V" ormat: >/dev/null \ + && (cd "$$abs_srcdir" && [ -d .git ] && git describe --always --abbrev=0 --dirty --exclude=\* || echo unknown) \ + || echo "$$V" ) +DEFINE_COMMIT = -DCOMMIT="\"$(COMMIT)\"" + +slirp4netns_CFLAGS = $(AM_CFLAGS) $(DEFINE_COMMIT) +libparson_a_CFLAGS = $(AM_CFLAGS) -I$(abs_top_builddir)/vendor/parson +libparson_a_SOURCES = vendor/parson/parson.c + +slirp4netns_SOURCES = main.c slirp4netns.c api.c sandbox.c seccompfilter.c +slirp4netns_LDADD = libparson.a @GLIB_LIBS@ @SLIRP_LIBS@ @LIBSECCOMP_LIBS@ -lpthread +man1_MANS = slirp4netns.1 + +generate-man: + go-md2man -in slirp4netns.1.md -out slirp4netns.1 + +CLANGTIDY = clang-tidy -warnings-as-errors='*' + +CLANGFORMAT = clang-format + +lint: + $(CLANGTIDY) $(slirp4netns_SOURCES) -- $(AM_CFLAGS) + +lint-full: + $(CLANGTIDY) $(slirp4netns_SOURCES) $(libparson_a_SOURCES) -- $(AM_CFLAGS) + +indent: + $(CLANGFORMAT) -i $(slirp4netns_SOURCES) + +benchmark: + benchmarks/benchmark-iperf3.sh + benchmarks/benchmark-iperf3-reverse.sh + +ci: + $(MAKE) indent + git diff --exit-code +# TODO: make sure ./vendor is synced with ./vendor.sh + $(MAKE) lint + $(MAKE) -j $(shell nproc) distcheck || ( find . -name test-suite.log | xargs cat; exit 1 ) + PATH=$(shell pwd):$$PATH $(MAKE) benchmark MTU=1500 + PATH=$(shell pwd):$$PATH $(MAKE) benchmark MTU=65520 + +.PHONY: generate-man lint lint-full indent benchmark ci diff --git a/main.c b/main.c index 2272368..765e4c7 100644 --- a/main.c +++ b/main.c @@ -1,754 +1,914 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#define _GNU_SOURCE -#include "config.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "slirp4netns.h" - -#define DEFAULT_MTU (1500) -#define DEFAULT_CIDR ("10.0.2.0/24") -#define DEFAULT_VHOST_OFFSET (2) // 10.0.2.2 -#define DEFAULT_VDHCPSTART_OFFSET (15) // 10.0.2.15 -#define DEFAULT_VNAMESERVER_OFFSET (3) // 10.0.2.3 -#define DEFAULT_RECOMMENDED_VGUEST_OFFSET (100) // 10.0.2.100 -#define DEFAULT_NETNS_TYPE ("pid") -#define NETWORK_PREFIX_MIN (1) -// >=26 is not supported because the recommended guest IP is set to network addr -// + 100 . -#define NETWORK_PREFIX_MAX (25) - -static int nsenter(pid_t target_pid, char *netns, char *userns, - bool only_userns) -{ - int usernsfd = -1, netnsfd = -1; - if (!only_userns && !netns) { - if (asprintf(&netns, "/proc/%d/ns/net", target_pid) < 0) { - perror("cannot get netns path"); - return -1; - } - } - if (!userns && target_pid) { - if (asprintf(&userns, "/proc/%d/ns/user", target_pid) < 0) { - perror("cannot get userns path"); - return -1; - } - } - if (!only_userns && (netnsfd = open(netns, O_RDONLY)) < 0) { - perror(netns); - return netnsfd; - } - if (userns && (usernsfd = open(userns, O_RDONLY)) < 0) { - perror(userns); - return usernsfd; - } - - if (usernsfd != -1) { - int r = setns(usernsfd, CLONE_NEWUSER); - if (only_userns && r < 0) { - perror("setns(CLONE_NEWUSER)"); - return -1; - } - close(usernsfd); - } - if (netnsfd != -1 && setns(netnsfd, CLONE_NEWNET) < 0) { - perror("setns(CLONE_NEWNET)"); - return -1; - } - close(netnsfd); - return 0; -} - -static int open_tap(const char *tapname) -{ - int fd; - struct ifreq ifr; - if ((fd = open("/dev/net/tun", O_RDWR)) < 0) { - perror("open(\"/dev/net/tun\")"); - return fd; - } - memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - strncpy(ifr.ifr_name, tapname, sizeof(ifr.ifr_name) - 1); - if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) { - perror("ioctl(TUNSETIFF)"); - close(fd); - return -1; - } - return fd; -} - -static int sendfd(int sock, int fd) -{ - ssize_t rc; - struct msghdr msg; - struct cmsghdr *cmsg; - char cmsgbuf[CMSG_SPACE(sizeof(fd))]; - struct iovec iov; - char dummy = '\0'; - memset(&msg, 0, sizeof(msg)); - iov.iov_base = &dummy; - iov.iov_len = 1; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = cmsgbuf; - msg.msg_controllen = sizeof(cmsgbuf); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); - memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd)); - msg.msg_controllen = cmsg->cmsg_len; - if ((rc = sendmsg(sock, &msg, 0)) < 0) { - perror("sendmsg"); - } - return rc; -} - -static int configure_network(const char *tapname, - struct slirp4netns_config *cfg) -{ - struct rtentry route; - struct ifreq ifr; - struct sockaddr_in *sai = (struct sockaddr_in *)&ifr.ifr_addr; - int sockfd; - - sockfd = socket(AF_INET, SOCK_DGRAM, 0); - if (sockfd < 0) { - perror("cannot create socket"); - return -1; - } - - // set loopback device to UP - struct ifreq ifr_lo = { .ifr_name = "lo", - .ifr_flags = IFF_UP | IFF_RUNNING }; - if (ioctl(sockfd, SIOCSIFFLAGS, &ifr_lo) < 0) { - perror("cannot set device up"); - return -1; - } - - memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_UP | IFF_RUNNING; - strncpy(ifr.ifr_name, tapname, sizeof(ifr.ifr_name) - 1); - - if (ioctl(sockfd, SIOCSIFFLAGS, &ifr) < 0) { - perror("cannot set device up"); - return -1; - } - - ifr.ifr_mtu = (int)cfg->mtu; - if (ioctl(sockfd, SIOCSIFMTU, &ifr) < 0) { - perror("cannot set MTU"); - return -1; - } - - sai->sin_family = AF_INET; - sai->sin_port = 0; - sai->sin_addr = cfg->recommended_vguest; - - if (ioctl(sockfd, SIOCSIFADDR, &ifr) < 0) { - perror("cannot set device address"); - return -1; - } - - sai->sin_addr = cfg->vnetmask; - if (ioctl(sockfd, SIOCSIFNETMASK, &ifr) < 0) { - perror("cannot set device netmask"); - return -1; - } - - memset(&route, 0, sizeof(route)); - sai = (struct sockaddr_in *)&route.rt_gateway; - sai->sin_family = AF_INET; - sai->sin_addr = cfg->vhost; - sai = (struct sockaddr_in *)&route.rt_dst; - sai->sin_family = AF_INET; - sai->sin_addr.s_addr = INADDR_ANY; - sai = (struct sockaddr_in *)&route.rt_genmask; - sai->sin_family = AF_INET; - sai->sin_addr.s_addr = INADDR_ANY; - - route.rt_flags = RTF_UP | RTF_GATEWAY; - route.rt_metric = 0; - route.rt_dev = (char *)tapname; - - if (ioctl(sockfd, SIOCADDRT, &route) < 0) { - perror("set route"); - return -1; - } - return 0; -} - -static int child(int sock, pid_t target_pid, bool do_config_network, - const char *tapname, char *netns_path, char *userns_path, - struct slirp4netns_config *cfg) -{ - int rc, tapfd; - if ((rc = nsenter(target_pid, netns_path, userns_path, false)) < 0) { - return rc; - } - if ((tapfd = open_tap(tapname)) < 0) { - return tapfd; - } - if (do_config_network && configure_network(tapname, cfg) < 0) { - return -1; - } - if (sendfd(sock, tapfd) < 0) { - close(tapfd); - close(sock); - return -1; - } - fprintf(stderr, "sent tapfd=%d for %s\n", tapfd, tapname); - close(sock); - return 0; -} - -static int recvfd(int sock) -{ - int fd; - ssize_t rc; - struct msghdr msg; - struct cmsghdr *cmsg; - char cmsgbuf[CMSG_SPACE(sizeof(fd))]; - struct iovec iov; - char dummy = '\0'; - memset(&msg, 0, sizeof(msg)); - iov.iov_base = &dummy; - iov.iov_len = 1; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = cmsgbuf; - msg.msg_controllen = sizeof(cmsgbuf); - if ((rc = recvmsg(sock, &msg, 0)) < 0) { - perror("recvmsg"); - return (int)rc; - } - if (rc == 0) { - fprintf(stderr, "the message is empty\n"); - return -1; - } - cmsg = CMSG_FIRSTHDR(&msg); - if (cmsg == NULL || cmsg->cmsg_type != SCM_RIGHTS) { - fprintf(stderr, "the message does not contain fd\n"); - return -1; - } - memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); - return fd; -} - -static int parent(int sock, int ready_fd, int exit_fd, const char *api_socket, - struct slirp4netns_config *cfg, pid_t target_pid) -{ - int rc, tapfd; - if ((tapfd = recvfd(sock)) < 0) { - return tapfd; - } - fprintf(stderr, "received tapfd=%d\n", tapfd); - close(sock); - printf("Starting slirp\n"); - printf("* MTU: %d\n", cfg->mtu); - printf("* Network: %s\n", inet_ntoa(cfg->vnetwork)); - printf("* Netmask: %s\n", inet_ntoa(cfg->vnetmask)); - printf("* Gateway: %s\n", inet_ntoa(cfg->vhost)); - printf("* DNS: %s\n", inet_ntoa(cfg->vnameserver)); - printf("* Recommended IP: %s\n", inet_ntoa(cfg->recommended_vguest)); - if (api_socket != NULL) { - printf("* API Socket: %s\n", api_socket); - } - if (!cfg->disable_host_loopback) { - printf( - "WARNING: 127.0.0.1:* on the host is accessible as %s (set " - "--disable-host-loopback to prohibit connecting to 127.0.0.1:*)\n", - inet_ntoa(cfg->vhost)); - } - if (cfg->enable_sandbox && geteuid() != 0) { - if ((rc = nsenter(target_pid, NULL, NULL, true)) < 0) { - close(tapfd); - return rc; - } - if ((rc = setegid(0)) < 0) { - fprintf(stderr, "setegid(0)\n"); - close(tapfd); - return rc; - } - if ((rc = seteuid(0)) < 0) { - fprintf(stderr, "seteuid(0)\n"); - close(tapfd); - return rc; - } - } - if ((rc = do_slirp(tapfd, ready_fd, exit_fd, api_socket, cfg)) < 0) { - fprintf(stderr, "do_slirp failed\n"); - close(tapfd); - return rc; - } - /* NOT REACHED */ - return 0; -} - -static void usage(const char *argv0) -{ - printf("Usage: %s [OPTION]... PID|PATH TAPNAME\n", argv0); - printf("User-mode networking for unprivileged network namespaces.\n\n"); - printf("-c, --configure bring up the interface\n"); - printf("-e, --exit-fd=FD specify the FD for terminating " - "slirp4netns\n"); - printf("-r, --ready-fd=FD specify the FD to write to when the " - "network is configured\n"); - /* v0.2.0 */ - printf("-m, --mtu=MTU specify MTU (default=%d, max=65521)\n", - DEFAULT_MTU); - printf("-6, --enable-ipv6 enable IPv6 (experimental)\n"); - /* v0.3.0 */ - printf("-a, --api-socket=PATH specify API socket path\n"); - printf( - "--cidr=CIDR specify network address CIDR (default=%s)\n", - DEFAULT_CIDR); - printf("--disable-host-loopback prohibit connecting to 127.0.0.1:* on the " - "host namespace\n"); - /* v0.4.0 */ - printf("--netns-type=TYPE specify network namespace type ([path|pid], " - "default=%s)\n", - DEFAULT_NETNS_TYPE); - printf("--userns-path=PATH specify user namespace path\n"); - printf( - "--enable-sandbox create a new mount namespace (and drop all " - "caps except CAP_NET_BIND_SERVICE if running as the root)\n"); - printf("--enable-seccomp enable seccomp to limit syscalls " - "(experimental)\n"); - /* others */ - printf("-h, --help show this help and exit\n"); - printf("-v, --version show version and exit\n"); -} - -// version output is runc-compatible and machine-parsable -static void version() -{ - printf("slirp4netns version %s\n", VERSION ? VERSION : PACKAGE_VERSION); -#ifdef COMMIT - printf("commit: %s\n", COMMIT); -#endif - printf("libslirp: %s\n", slirp_version_string()); -} - -struct options { - pid_t target_pid; // argv[1] - char *tapname; // argv[2] - bool do_config_network; // -c - int exit_fd; // -e - int ready_fd; // -r - unsigned int mtu; // -m - bool disable_host_loopback; // --disable-host-loopback - char *cidr; // --cidr - bool enable_ipv6; // -6 - char *api_socket; // -a - char *netns_type; // argv[1] - char *netns_path; // --netns-path - char *userns_path; // --userns-path - bool enable_sandbox; // --enable-sandbox - bool enable_seccomp; // --enable-seccomp -}; - -static void options_init(struct options *options) -{ - memset(options, 0, sizeof(*options)); - options->exit_fd = options->ready_fd = -1; - options->mtu = DEFAULT_MTU; -} - -static void options_destroy(struct options *options) -{ - if (options->tapname != NULL) { - free(options->tapname); - options->tapname = NULL; - } - if (options->cidr != NULL) { - free(options->cidr); - options->cidr = NULL; - } - if (options->api_socket != NULL) { - free(options->api_socket); - options->api_socket = NULL; - } - if (options->netns_type != NULL) { - free(options->netns_type); - options->netns_type = NULL; - } - if (options->netns_path != NULL) { - free(options->netns_path); - options->netns_path = NULL; - } - if (options->userns_path != NULL) { - free(options->userns_path); - options->userns_path = NULL; - } -} - -// * caller does not need to call options_init() -// * caller needs to call options_destroy() after calling this function. -// * this function calls exit() on an error. -static void parse_args(int argc, char *const argv[], struct options *options) -{ - int opt; - char *strtol_e = NULL; - char *optarg_cidr = NULL; - char *optarg_netns_type = NULL; - char *optarg_userns_path = NULL; - char *optarg_api_socket = NULL; -#define CIDR -42 -#define DISABLE_HOST_LOOPBACK -43 -#define NETNS_TYPE -44 -#define USERNS_PATH -45 -#define ENABLE_SANDBOX -46 -#define ENABLE_SECCOMP -47 -#define _DEPRECATED_NO_HOST_LOOPBACK \ - -10043 // deprecated in favor of disable-host-loopback -#define _DEPRECATED_CREATE_SANDBOX \ - -10044 // deprecated in favor of enable-sandbox - const struct option longopts[] = { - { "configure", no_argument, NULL, 'c' }, - { "exit-fd", required_argument, NULL, 'e' }, - { "ready-fd", required_argument, NULL, 'r' }, - { "mtu", required_argument, NULL, 'm' }, - { "cidr", required_argument, NULL, CIDR }, - { "disable-host-loopback", no_argument, NULL, DISABLE_HOST_LOOPBACK }, - { "no-host-loopback", no_argument, NULL, _DEPRECATED_NO_HOST_LOOPBACK }, - { "netns-type", required_argument, NULL, NETNS_TYPE }, - { "userns-path", required_argument, NULL, USERNS_PATH }, - { "api-socket", required_argument, NULL, 'a' }, - { "enable-ipv6", no_argument, NULL, '6' }, - { "enable-sandbox", no_argument, NULL, ENABLE_SANDBOX }, - { "create-sandbox", no_argument, NULL, _DEPRECATED_CREATE_SANDBOX }, - { "enable-seccomp", no_argument, NULL, ENABLE_SECCOMP }, - { "help", no_argument, NULL, 'h' }, - { "version", no_argument, NULL, 'v' }, - { 0, 0, 0, 0 }, - }; - options_init(options); - /* NOTE: clang-tidy hates strdup(optarg) in the while loop (#112) */ - while ((opt = getopt_long(argc, argv, "ce:r:m:a:6hv", longopts, NULL)) != - -1) { - switch (opt) { - case 'c': - options->do_config_network = true; - break; - case 'e': - errno = 0; - options->exit_fd = strtol(optarg, &strtol_e, 10); - if (errno || *strtol_e != '\0' || options->exit_fd < 0) { - fprintf(stderr, "exit-fd must be a non-negative integer\n"); - goto error; - } - break; - case 'r': - errno = 0; - options->ready_fd = strtol(optarg, &strtol_e, 10); - if (errno || *strtol_e != '\0' || options->ready_fd < 0) { - fprintf(stderr, "ready-fd must be a non-negative integer\n"); - goto error; - } - break; - case 'm': - errno = 0; - options->mtu = strtol(optarg, &strtol_e, 10); - if (errno || *strtol_e != '\0' || options->mtu <= 0 || - options->mtu > 65521) { - fprintf(stderr, "MTU must be a positive integer (< 65522)\n"); - goto error; - } - break; - case CIDR: - optarg_cidr = optarg; - break; - case _DEPRECATED_NO_HOST_LOOPBACK: - // There was no tagged release with support for --no-host-loopback. - // So no one will be affected by removal of --no-host-loopback. - printf("WARNING: --no-host-loopback is deprecated and will be " - "removed in future releases, please use " - "--disable-host-loopback instead.\n"); - /* FALLTHROUGH */ - case DISABLE_HOST_LOOPBACK: - options->disable_host_loopback = true; - break; - case _DEPRECATED_CREATE_SANDBOX: - // There was no tagged release with support for --create-sandbox. - // So no one will be affected by removal of --create-sandbox. - printf("WARNING: --create-sandbox is deprecated and will be " - "removed in future releases, please use " - "--enable-sandbox instead.\n"); - /* FALLTHROUGH */ - case ENABLE_SANDBOX: - options->enable_sandbox = true; - break; - case ENABLE_SECCOMP: - printf("WARNING: Support for seccomp is experimental\n"); - options->enable_seccomp = true; - break; - case NETNS_TYPE: - optarg_netns_type = optarg; - break; - case USERNS_PATH: - optarg_userns_path = optarg; - if (access(optarg_userns_path, F_OK) == -1) { - fprintf(stderr, "userns path doesn't exist: %s\n", - optarg_userns_path); - goto error; - } - break; - case 'a': - optarg_api_socket = optarg; - break; - case '6': - options->enable_ipv6 = true; - printf("WARNING: Support for IPv6 is experimental\n"); - break; - case 'h': - usage(argv[0]); - exit(EXIT_SUCCESS); - break; - case 'v': - version(); - exit(EXIT_SUCCESS); - break; - default: - goto error; - break; - } - } - if (optarg_cidr != NULL) { - options->cidr = strdup(optarg_cidr); - } - if (optarg_netns_type != NULL) { - options->netns_type = strdup(optarg_netns_type); - } - if (optarg_userns_path != NULL) { - options->userns_path = strdup(optarg_userns_path); - } - if (optarg_api_socket != NULL) { - options->api_socket = strdup(optarg_api_socket); - } -#undef CIDR -#undef DISABLE_HOST_LOOPBACK -#undef NETNS_TYPE -#undef USERNS_PATH -#undef _DEPRECATED_NO_HOST_LOOPBACK -#undef ENABLE_SANDBOX -#undef ENABLE_SECCOMP - if (argc - optind < 2) { - goto error; - } - if (!options->netns_type || - strcmp(options->netns_type, DEFAULT_NETNS_TYPE) == 0) { - errno = 0; - options->target_pid = strtol(argv[optind], &strtol_e, 10); - if (errno || *strtol_e != '\0' || options->target_pid <= 0) { - fprintf(stderr, "PID must be a positive integer\n"); - goto error; - } - } else { - options->netns_path = strdup(argv[optind]); - if (access(options->netns_path, F_OK) == -1) { - perror("existing path expected when --netns-type=path"); - goto error; - } - } - options->tapname = strdup(argv[optind + 1]); - return; -error: - usage(argv[0]); - options_destroy(options); - exit(EXIT_FAILURE); -} - -static int from_regmatch(char *buf, size_t buf_len, regmatch_t match, - const char *orig) -{ - size_t len = match.rm_eo - match.rm_so; - if (len > buf_len - 1) { - return -1; - } - memset(buf, 0, buf_len); - strncpy(buf, &orig[match.rm_so], len); - return 0; -} - -static int parse_cidr(struct in_addr *network, struct in_addr *netmask, - const char *cidr) -{ - int rc = 0; - regex_t r; - regmatch_t matches[4]; - size_t nmatch = sizeof(matches) / sizeof(matches[0]); - const char *cidr_regex = "^(([0-9]{1,3}\\.){3}[0-9]{1,3})/([0-9]{1,2})$"; - char snetwork[16], sprefix[16]; - int prefix; - rc = regcomp(&r, cidr_regex, REG_EXTENDED); - if (rc != 0) { - fprintf(stderr, "internal regex error\n"); - rc = -1; - goto finish; - } - rc = regexec(&r, cidr, nmatch, matches, 0); - if (rc != 0) { - fprintf(stderr, "invalid CIDR: %s\n", cidr); - rc = -1; - goto finish; - } - rc = from_regmatch(snetwork, sizeof(snetwork), matches[1], cidr); - if (rc < 0) { - fprintf(stderr, "invalid CIDR: %s\n", cidr); - goto finish; - } - rc = from_regmatch(sprefix, sizeof(sprefix), matches[3], cidr); - if (rc < 0) { - fprintf(stderr, "invalid CIDR: %s\n", cidr); - goto finish; - } - if (inet_pton(AF_INET, snetwork, network) != 1) { - fprintf(stderr, "invalid network address: %s\n", snetwork); - rc = -1; - goto finish; - } - errno = 0; - prefix = strtoul(sprefix, NULL, 10); - if (errno) { - fprintf(stderr, "invalid prefix length: %s\n", sprefix); - rc = -1; - goto finish; - } - if (prefix < NETWORK_PREFIX_MIN || prefix > NETWORK_PREFIX_MAX) { - fprintf(stderr, "prefix length needs to be %d-%d\n", NETWORK_PREFIX_MIN, - NETWORK_PREFIX_MAX); - rc = -1; - goto finish; - } - netmask->s_addr = htonl(~((1 << (32 - prefix)) - 1)); - if ((network->s_addr & netmask->s_addr) != network->s_addr) { - fprintf(stderr, "CIDR needs to be a network address like 10.0.2.0/24, " - "not like 10.0.2.100/24\n"); - rc = -1; - goto finish; - } -finish: - regfree(&r); - return rc; -} - -static int slirp4netns_config_from_cidr(struct slirp4netns_config *cfg, - const char *cidr) -{ - int rc; - rc = parse_cidr(&cfg->vnetwork, &cfg->vnetmask, cidr); - if (rc < 0) { - goto finish; - } - cfg->vhost.s_addr = - htonl(ntohl(cfg->vnetwork.s_addr) + DEFAULT_VHOST_OFFSET); - cfg->vdhcp_start.s_addr = - htonl(ntohl(cfg->vnetwork.s_addr) + DEFAULT_VDHCPSTART_OFFSET); - cfg->vnameserver.s_addr = - htonl(ntohl(cfg->vnetwork.s_addr) + DEFAULT_VNAMESERVER_OFFSET); - cfg->recommended_vguest.s_addr = - htonl(ntohl(cfg->vnetwork.s_addr) + DEFAULT_RECOMMENDED_VGUEST_OFFSET); -finish: - return rc; -} - -static int slirp4netns_config_from_options(struct slirp4netns_config *cfg, - struct options *opt) -{ - int rc = 0; - cfg->mtu = opt->mtu; - rc = slirp4netns_config_from_cidr(cfg, opt->cidr == NULL ? DEFAULT_CIDR : - opt->cidr); - if (rc < 0) { - goto finish; - } - cfg->enable_ipv6 = cfg->enable_ipv6; - cfg->disable_host_loopback = opt->disable_host_loopback; - cfg->enable_sandbox = opt->enable_sandbox; - cfg->enable_seccomp = opt->enable_seccomp; -finish: - return rc; -} - -int main(int argc, char *const argv[]) -{ - int sv[2]; - pid_t child_pid; - struct options options; - struct slirp4netns_config slirp4netns_config; - int exit_status = 0; - - parse_args(argc, argv, &options); - if (slirp4netns_config_from_options(&slirp4netns_config, &options) < 0) { - exit_status = EXIT_FAILURE; - goto finish; - } - if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv) < 0) { - perror("socketpair"); - exit_status = EXIT_FAILURE; - goto finish; - } - if ((child_pid = fork()) < 0) { - perror("fork"); - exit_status = EXIT_FAILURE; - goto finish; - } - if (child_pid == 0) { - if (child(sv[1], options.target_pid, options.do_config_network, - options.tapname, options.netns_path, options.userns_path, - &slirp4netns_config) < 0) { - exit_status = EXIT_FAILURE; - goto finish; - } - } else { - int ret, child_wstatus, child_status; - do - ret = waitpid(child_pid, &child_wstatus, 0); - while (ret < 0 && errno == EINTR); - if (ret < 0) { - perror("waitpid"); - exit_status = EXIT_FAILURE; - goto finish; - } - if (!WIFEXITED(child_wstatus)) { - fprintf(stderr, "child failed(wstatus=%d, !WIFEXITED)\n", - child_wstatus); - exit_status = EXIT_FAILURE; - goto finish; - } - child_status = WEXITSTATUS(child_wstatus); - if (child_status != 0) { - fprintf(stderr, "child failed(%d)\n", child_status); - exit_status = child_status; - goto finish; - } - if (parent(sv[0], options.ready_fd, options.exit_fd, options.api_socket, - &slirp4netns_config, options.target_pid) < 0) { - fprintf(stderr, "parent failed\n"); - exit_status = EXIT_FAILURE; - goto finish; - } - } -finish: - options_destroy(&options); - exit(exit_status); - return 0; -} +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#define _GNU_SOURCE +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "slirp4netns.h" +#include + +#define DEFAULT_MTU (1500) +#define DEFAULT_CIDR ("10.0.2.0/24") +#define DEFAULT_VHOST_OFFSET (2) // 10.0.2.2 +#define DEFAULT_VDHCPSTART_OFFSET (15) // 10.0.2.15 +#define DEFAULT_VNAMESERVER_OFFSET (3) // 10.0.2.3 +#define DEFAULT_RECOMMENDED_VGUEST_OFFSET (100) // 10.0.2.100 +#define DEFAULT_NETNS_TYPE ("pid") +#define NETWORK_PREFIX_MIN (1) +// >=26 is not supported because the recommended guest IP is set to network addr +// + 100 . +#define NETWORK_PREFIX_MAX (25) + +static int nsenter(pid_t target_pid, char *netns, char *userns, + bool only_userns) +{ + int usernsfd = -1, netnsfd = -1; + if (!only_userns && !netns) { + if (asprintf(&netns, "/proc/%d/ns/net", target_pid) < 0) { + perror("cannot get netns path"); + return -1; + } + } + if (!userns && target_pid) { + if (asprintf(&userns, "/proc/%d/ns/user", target_pid) < 0) { + perror("cannot get userns path"); + return -1; + } + } + if (!only_userns && (netnsfd = open(netns, O_RDONLY)) < 0) { + perror(netns); + return netnsfd; + } + if (userns && (usernsfd = open(userns, O_RDONLY)) < 0) { + perror(userns); + return usernsfd; + } + + if (usernsfd != -1) { + int r = setns(usernsfd, CLONE_NEWUSER); + if (only_userns && r < 0) { + perror("setns(CLONE_NEWUSER)"); + return -1; + } + close(usernsfd); + } + if (netnsfd != -1 && setns(netnsfd, CLONE_NEWNET) < 0) { + perror("setns(CLONE_NEWNET)"); + return -1; + } + close(netnsfd); + return 0; +} + +static int open_tap(const char *tapname) +{ + int fd; + struct ifreq ifr; + if ((fd = open("/dev/net/tun", O_RDWR)) < 0) { + perror("open(\"/dev/net/tun\")"); + return fd; + } + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + strncpy(ifr.ifr_name, tapname, sizeof(ifr.ifr_name) - 1); + if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) { + perror("ioctl(TUNSETIFF)"); + close(fd); + return -1; + } + return fd; +} + +static int sendfd(int sock, int fd) +{ + ssize_t rc; + struct msghdr msg; + struct cmsghdr *cmsg; + char cmsgbuf[CMSG_SPACE(sizeof(fd))]; + struct iovec iov; + char dummy = '\0'; + memset(&msg, 0, sizeof(msg)); + iov.iov_base = &dummy; + iov.iov_len = 1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); + memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd)); + msg.msg_controllen = cmsg->cmsg_len; + if ((rc = sendmsg(sock, &msg, 0)) < 0) { + perror("sendmsg"); + } + return rc; +} + +static int configure_network(const char *tapname, + struct slirp4netns_config *cfg) +{ + struct rtentry route; + struct ifreq ifr; + struct sockaddr_in *sai = (struct sockaddr_in *)&ifr.ifr_addr; + int sockfd; + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) { + perror("cannot create socket"); + return -1; + } + + // set loopback device to UP + struct ifreq ifr_lo = { .ifr_name = "lo", + .ifr_flags = IFF_UP | IFF_RUNNING }; + if (ioctl(sockfd, SIOCSIFFLAGS, &ifr_lo) < 0) { + perror("cannot set device up"); + return -1; + } + + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_UP | IFF_RUNNING; + strncpy(ifr.ifr_name, tapname, sizeof(ifr.ifr_name) - 1); + + if (ioctl(sockfd, SIOCSIFFLAGS, &ifr) < 0) { + perror("cannot set device up"); + return -1; + } + + ifr.ifr_mtu = (int)cfg->mtu; + if (ioctl(sockfd, SIOCSIFMTU, &ifr) < 0) { + perror("cannot set MTU"); + return -1; + } + + sai->sin_family = AF_INET; + sai->sin_port = 0; + sai->sin_addr = cfg->recommended_vguest; + + if (ioctl(sockfd, SIOCSIFADDR, &ifr) < 0) { + perror("cannot set device address"); + return -1; + } + + sai->sin_addr = cfg->vnetmask; + if (ioctl(sockfd, SIOCSIFNETMASK, &ifr) < 0) { + perror("cannot set device netmask"); + return -1; + } + + memset(&route, 0, sizeof(route)); + sai = (struct sockaddr_in *)&route.rt_gateway; + sai->sin_family = AF_INET; + sai->sin_addr = cfg->vhost; + sai = (struct sockaddr_in *)&route.rt_dst; + sai->sin_family = AF_INET; + sai->sin_addr.s_addr = INADDR_ANY; + sai = (struct sockaddr_in *)&route.rt_genmask; + sai->sin_family = AF_INET; + sai->sin_addr.s_addr = INADDR_ANY; + + route.rt_flags = RTF_UP | RTF_GATEWAY; + route.rt_metric = 0; + route.rt_dev = (char *)tapname; + + if (ioctl(sockfd, SIOCADDRT, &route) < 0) { + perror("set route"); + return -1; + } + return 0; +} + +static int child(int sock, pid_t target_pid, bool do_config_network, + const char *tapname, char *netns_path, char *userns_path, + struct slirp4netns_config *cfg) +{ + int rc, tapfd; + if ((rc = nsenter(target_pid, netns_path, userns_path, false)) < 0) { + return rc; + } + if ((tapfd = open_tap(tapname)) < 0) { + return tapfd; + } + if (do_config_network && configure_network(tapname, cfg) < 0) { + return -1; + } + if (sendfd(sock, tapfd) < 0) { + close(tapfd); + close(sock); + return -1; + } + fprintf(stderr, "sent tapfd=%d for %s\n", tapfd, tapname); + close(sock); + return 0; +} + +static int recvfd(int sock) +{ + int fd; + ssize_t rc; + struct msghdr msg; + struct cmsghdr *cmsg; + char cmsgbuf[CMSG_SPACE(sizeof(fd))]; + struct iovec iov; + char dummy = '\0'; + memset(&msg, 0, sizeof(msg)); + iov.iov_base = &dummy; + iov.iov_len = 1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + if ((rc = recvmsg(sock, &msg, 0)) < 0) { + perror("recvmsg"); + return (int)rc; + } + if (rc == 0) { + fprintf(stderr, "the message is empty\n"); + return -1; + } + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL || cmsg->cmsg_type != SCM_RIGHTS) { + fprintf(stderr, "the message does not contain fd\n"); + return -1; + } + memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); + return fd; +} + +static int parent(int sock, int ready_fd, int exit_fd, const char *api_socket, + struct slirp4netns_config *cfg, pid_t target_pid) +{ + int rc, tapfd; + if ((tapfd = recvfd(sock)) < 0) { + return tapfd; + } + fprintf(stderr, "received tapfd=%d\n", tapfd); + close(sock); + printf("Starting slirp\n"); + printf("* MTU: %d\n", cfg->mtu); + printf("* Network: %s\n", inet_ntoa(cfg->vnetwork)); + printf("* Netmask: %s\n", inet_ntoa(cfg->vnetmask)); + printf("* Gateway: %s\n", inet_ntoa(cfg->vhost)); + printf("* DNS: %s\n", inet_ntoa(cfg->vnameserver)); + printf("* Recommended IP: %s\n", inet_ntoa(cfg->recommended_vguest)); + if (api_socket != NULL) { + printf("* API Socket: %s\n", api_socket); + } +#if SLIRP_CONFIG_VERSION_MAX >= 2 + if (cfg->enable_outbound_addr) { + printf("* Outbound IPv4: %s\n", + inet_ntoa(cfg->outbound_addr.sin_addr)); + } + if (cfg->enable_outbound_addr6) { + char str[INET6_ADDRSTRLEN]; + if (inet_ntop(AF_INET6, &cfg->outbound_addr6.sin6_addr, str, + INET6_ADDRSTRLEN) != NULL) { + printf("* Outbound IPv6: %s\n", str); + } + } +#endif + if (!cfg->disable_host_loopback) { + printf( + "WARNING: 127.0.0.1:* on the host is accessible as %s (set " + "--disable-host-loopback to prohibit connecting to 127.0.0.1:*)\n", + inet_ntoa(cfg->vhost)); + } + if (cfg->enable_sandbox && geteuid() != 0) { + if ((rc = nsenter(target_pid, NULL, NULL, true)) < 0) { + close(tapfd); + return rc; + } + if ((rc = setegid(0)) < 0) { + fprintf(stderr, "setegid(0)\n"); + close(tapfd); + return rc; + } + if ((rc = seteuid(0)) < 0) { + fprintf(stderr, "seteuid(0)\n"); + close(tapfd); + return rc; + } + } + if ((rc = do_slirp(tapfd, ready_fd, exit_fd, api_socket, cfg)) < 0) { + fprintf(stderr, "do_slirp failed\n"); + close(tapfd); + return rc; + } + /* NOT REACHED */ + return 0; +} + +static void usage(const char *argv0) +{ + printf("Usage: %s [OPTION]... PID|PATH TAPNAME\n", argv0); + printf("User-mode networking for unprivileged network namespaces.\n\n"); + printf("-c, --configure bring up the interface\n"); + printf("-e, --exit-fd=FD specify the FD for terminating " + "slirp4netns\n"); + printf("-r, --ready-fd=FD specify the FD to write to when the " + "network is configured\n"); + /* v0.2.0 */ + printf("-m, --mtu=MTU specify MTU (default=%d, max=65521)\n", + DEFAULT_MTU); + printf("-6, --enable-ipv6 enable IPv6 (experimental)\n"); + /* v0.3.0 */ + printf("-a, --api-socket=PATH specify API socket path\n"); + printf( + "--cidr=CIDR specify network address CIDR (default=%s)\n", + DEFAULT_CIDR); + printf("--disable-host-loopback prohibit connecting to 127.0.0.1:* on the " + "host namespace\n"); + /* v0.4.0 */ + printf("--netns-type=TYPE specify network namespace type ([path|pid], " + "default=%s)\n", + DEFAULT_NETNS_TYPE); + printf("--userns-path=PATH specify user namespace path\n"); + printf( + "--enable-sandbox create a new mount namespace (and drop all " + "caps except CAP_NET_BIND_SERVICE if running as the root)\n"); + printf("--enable-seccomp enable seccomp to limit syscalls " + "(experimental)\n"); + /* v1.1.0 */ +#if SLIRP_CONFIG_VERSION_MAX >= 2 + printf("--outbound-addr=IPv4 sets outbound ipv4 address to bound to " + "(experimental)\n"); + printf("--outbound-addr6=IPv6 sets outbound ipv6 address to bound to " + "(experimental)\n"); +#endif +#if SLIRP_CONFIG_VERSION_MAX >= 3 + printf("--disable-dns disables 10.0.2.3 (or configured internal " + "ip) to host dns redirect (experimental)\n"); +#endif + /* others */ + printf("-h, --help show this help and exit\n"); + printf("-v, --version show version and exit\n"); +} + +// version output is runc-compatible and machine-parsable +static void version() +{ + printf("slirp4netns version %s\n", VERSION ? VERSION : PACKAGE_VERSION); +#ifdef COMMIT + printf("commit: %s\n", COMMIT); +#endif + printf("libslirp: %s\n", slirp_version_string()); + printf("SLIRP_CONFIG_VERSION_MAX: %d\n", SLIRP_CONFIG_VERSION_MAX); +} + +struct options { + char *tapname; // argv[2] + char *cidr; // --cidr + char *api_socket; // -a + char *netns_type; // argv[1] + char *netns_path; // --netns-path + char *userns_path; // --userns-path + char *outbound_addr; // --outbound-addr + char *outbound_addr6; // --outbound-addr6 + pid_t target_pid; // argv[1] + int exit_fd; // -e + int ready_fd; // -r + unsigned int mtu; // -m + bool do_config_network; // -c + bool disable_host_loopback; // --disable-host-loopback + bool enable_ipv6; // -6 + bool enable_sandbox; // --enable-sandbox + bool enable_seccomp; // --enable-seccomp + bool disable_dns; // --disable-dns +}; + +static void options_init(struct options *options) +{ + memset(options, 0, sizeof(*options)); + options->exit_fd = options->ready_fd = -1; + options->mtu = DEFAULT_MTU; +} + +static void options_destroy(struct options *options) +{ + if (options->tapname != NULL) { + free(options->tapname); + options->tapname = NULL; + } + if (options->cidr != NULL) { + free(options->cidr); + options->cidr = NULL; + } + if (options->api_socket != NULL) { + free(options->api_socket); + options->api_socket = NULL; + } + if (options->netns_type != NULL) { + free(options->netns_type); + options->netns_type = NULL; + } + if (options->netns_path != NULL) { + free(options->netns_path); + options->netns_path = NULL; + } + if (options->userns_path != NULL) { + free(options->userns_path); + options->userns_path = NULL; + } + if (options->outbound_addr != NULL) { + free(options->outbound_addr); + options->outbound_addr = NULL; + } + if (options->outbound_addr6 != NULL) { + free(options->outbound_addr6); + options->outbound_addr6 = NULL; + } +} + +// * caller does not need to call options_init() +// * caller needs to call options_destroy() after calling this function. +// * this function calls exit() on an error. +static void parse_args(int argc, char *const argv[], struct options *options) +{ + int opt; + char *strtol_e = NULL; + char *optarg_cidr = NULL; + char *optarg_netns_type = NULL; + char *optarg_userns_path = NULL; + char *optarg_api_socket = NULL; + char *optarg_outbound_addr = NULL; + char *optarg_outbound_addr6 = NULL; +#define CIDR -42 +#define DISABLE_HOST_LOOPBACK -43 +#define NETNS_TYPE -44 +#define USERNS_PATH -45 +#define ENABLE_SANDBOX -46 +#define ENABLE_SECCOMP -47 +#define OUTBOUND_ADDR -48 +#define OUTBOUND_ADDR6 -49 +#define DISABLE_DNS -50 +#define _DEPRECATED_NO_HOST_LOOPBACK \ + -10043 // deprecated in favor of disable-host-loopback +#define _DEPRECATED_CREATE_SANDBOX \ + -10044 // deprecated in favor of enable-sandbox + const struct option longopts[] = { + { "configure", no_argument, NULL, 'c' }, + { "exit-fd", required_argument, NULL, 'e' }, + { "ready-fd", required_argument, NULL, 'r' }, + { "mtu", required_argument, NULL, 'm' }, + { "cidr", required_argument, NULL, CIDR }, + { "disable-host-loopback", no_argument, NULL, DISABLE_HOST_LOOPBACK }, + { "no-host-loopback", no_argument, NULL, _DEPRECATED_NO_HOST_LOOPBACK }, + { "netns-type", required_argument, NULL, NETNS_TYPE }, + { "userns-path", required_argument, NULL, USERNS_PATH }, + { "api-socket", required_argument, NULL, 'a' }, + { "enable-ipv6", no_argument, NULL, '6' }, + { "enable-sandbox", no_argument, NULL, ENABLE_SANDBOX }, + { "create-sandbox", no_argument, NULL, _DEPRECATED_CREATE_SANDBOX }, + { "enable-seccomp", no_argument, NULL, ENABLE_SECCOMP }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'v' }, + { "outbound-addr", required_argument, NULL, OUTBOUND_ADDR }, + { "outbound-addr6", required_argument, NULL, OUTBOUND_ADDR6 }, + { "disable-dns", no_argument, NULL, DISABLE_DNS }, + { 0, 0, 0, 0 }, + }; + options_init(options); + /* NOTE: clang-tidy hates strdup(optarg) in the while loop (#112) */ + while ((opt = getopt_long(argc, argv, "ce:r:m:a:6hv", longopts, NULL)) != + -1) { + switch (opt) { + case 'c': + options->do_config_network = true; + break; + case 'e': + errno = 0; + options->exit_fd = strtol(optarg, &strtol_e, 10); + if (errno || *strtol_e != '\0' || options->exit_fd < 0) { + fprintf(stderr, "exit-fd must be a non-negative integer\n"); + goto error; + } + break; + case 'r': + errno = 0; + options->ready_fd = strtol(optarg, &strtol_e, 10); + if (errno || *strtol_e != '\0' || options->ready_fd < 0) { + fprintf(stderr, "ready-fd must be a non-negative integer\n"); + goto error; + } + break; + case 'm': + errno = 0; + options->mtu = strtol(optarg, &strtol_e, 10); + if (errno || *strtol_e != '\0' || options->mtu <= 0 || + options->mtu > 65521) { + fprintf(stderr, "MTU must be a positive integer (< 65522)\n"); + goto error; + } + break; + case CIDR: + optarg_cidr = optarg; + break; + case _DEPRECATED_NO_HOST_LOOPBACK: + // There was no tagged release with support for --no-host-loopback. + // So no one will be affected by removal of --no-host-loopback. + printf("WARNING: --no-host-loopback is deprecated and will be " + "removed in future releases, please use " + "--disable-host-loopback instead.\n"); + /* FALLTHROUGH */ + case DISABLE_HOST_LOOPBACK: + options->disable_host_loopback = true; + break; + case _DEPRECATED_CREATE_SANDBOX: + // There was no tagged release with support for --create-sandbox. + // So no one will be affected by removal of --create-sandbox. + printf("WARNING: --create-sandbox is deprecated and will be " + "removed in future releases, please use " + "--enable-sandbox instead.\n"); + /* FALLTHROUGH */ + case ENABLE_SANDBOX: + options->enable_sandbox = true; + break; + case ENABLE_SECCOMP: + printf("WARNING: Support for seccomp is experimental\n"); + options->enable_seccomp = true; + break; + case NETNS_TYPE: + optarg_netns_type = optarg; + break; + case USERNS_PATH: + optarg_userns_path = optarg; + if (access(optarg_userns_path, F_OK) == -1) { + fprintf(stderr, "userns path doesn't exist: %s\n", + optarg_userns_path); + goto error; + } + break; + case 'a': + optarg_api_socket = optarg; + break; + case '6': + options->enable_ipv6 = true; + printf("WARNING: Support for IPv6 is experimental\n"); + break; + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + break; + case 'v': + version(); + exit(EXIT_SUCCESS); + break; + case OUTBOUND_ADDR: + printf("WARNING: Support for --outbount-addr is experimental\n"); + optarg_outbound_addr = optarg; + break; + case OUTBOUND_ADDR6: + printf("WARNING: Support for --outbount-addr6 is experimental\n"); + optarg_outbound_addr6 = optarg; + break; + case DISABLE_DNS: + options->disable_dns = true; + break; + default: + goto error; + break; + } + } + if (optarg_cidr != NULL) { + options->cidr = strdup(optarg_cidr); + } + if (optarg_netns_type != NULL) { + options->netns_type = strdup(optarg_netns_type); + } + if (optarg_userns_path != NULL) { + options->userns_path = strdup(optarg_userns_path); + } + if (optarg_api_socket != NULL) { + options->api_socket = strdup(optarg_api_socket); + } + if (optarg_outbound_addr != NULL) { + options->outbound_addr = strdup(optarg_outbound_addr); + } + if (optarg_outbound_addr6 != NULL) { + options->outbound_addr6 = strdup(optarg_outbound_addr6); + } +#undef CIDR +#undef DISABLE_HOST_LOOPBACK +#undef NETNS_TYPE +#undef USERNS_PATH +#undef _DEPRECATED_NO_HOST_LOOPBACK +#undef ENABLE_SANDBOX +#undef ENABLE_SECCOMP +#undef OUTBOUND_ADDR +#undef OUTBOUND_ADDR6 +#undef DISABLE_DNS + if (argc - optind < 2) { + goto error; + } + if (!options->netns_type || + strcmp(options->netns_type, DEFAULT_NETNS_TYPE) == 0) { + errno = 0; + options->target_pid = strtol(argv[optind], &strtol_e, 10); + if (errno || *strtol_e != '\0' || options->target_pid <= 0) { + fprintf(stderr, "PID must be a positive integer\n"); + goto error; + } + } else { + options->netns_path = strdup(argv[optind]); + if (access(options->netns_path, F_OK) == -1) { + perror("existing path expected when --netns-type=path"); + goto error; + } + } + options->tapname = strdup(argv[optind + 1]); + return; +error: + usage(argv[0]); + options_destroy(options); + exit(EXIT_FAILURE); +} + +static int from_regmatch(char *buf, size_t buf_len, regmatch_t match, + const char *orig) +{ + size_t len = match.rm_eo - match.rm_so; + if (len > buf_len - 1) { + return -1; + } + memset(buf, 0, buf_len); + strncpy(buf, &orig[match.rm_so], len); + return 0; +} + +static int parse_cidr(struct in_addr *network, struct in_addr *netmask, + const char *cidr) +{ + int rc = 0; + regex_t r; + regmatch_t matches[4]; + size_t nmatch = sizeof(matches) / sizeof(matches[0]); + const char *cidr_regex = "^(([0-9]{1,3}\\.){3}[0-9]{1,3})/([0-9]{1,2})$"; + char snetwork[16], sprefix[16]; + int prefix; + rc = regcomp(&r, cidr_regex, REG_EXTENDED); + if (rc != 0) { + fprintf(stderr, "internal regex error\n"); + rc = -1; + goto finish; + } + rc = regexec(&r, cidr, nmatch, matches, 0); + if (rc != 0) { + fprintf(stderr, "invalid CIDR: %s\n", cidr); + rc = -1; + goto finish; + } + rc = from_regmatch(snetwork, sizeof(snetwork), matches[1], cidr); + if (rc < 0) { + fprintf(stderr, "invalid CIDR: %s\n", cidr); + goto finish; + } + rc = from_regmatch(sprefix, sizeof(sprefix), matches[3], cidr); + if (rc < 0) { + fprintf(stderr, "invalid CIDR: %s\n", cidr); + goto finish; + } + if (inet_pton(AF_INET, snetwork, network) != 1) { + fprintf(stderr, "invalid network address: %s\n", snetwork); + rc = -1; + goto finish; + } + errno = 0; + prefix = strtoul(sprefix, NULL, 10); + if (errno) { + fprintf(stderr, "invalid prefix length: %s\n", sprefix); + rc = -1; + goto finish; + } + if (prefix < NETWORK_PREFIX_MIN || prefix > NETWORK_PREFIX_MAX) { + fprintf(stderr, "prefix length needs to be %d-%d\n", NETWORK_PREFIX_MIN, + NETWORK_PREFIX_MAX); + rc = -1; + goto finish; + } + netmask->s_addr = htonl(~((1 << (32 - prefix)) - 1)); + if ((network->s_addr & netmask->s_addr) != network->s_addr) { + fprintf(stderr, "CIDR needs to be a network address like 10.0.2.0/24, " + "not like 10.0.2.100/24\n"); + rc = -1; + goto finish; + } +finish: + regfree(&r); + return rc; +} + +static int slirp4netns_config_from_cidr(struct slirp4netns_config *cfg, + const char *cidr) +{ + int rc; + rc = parse_cidr(&cfg->vnetwork, &cfg->vnetmask, cidr); + if (rc < 0) { + goto finish; + } + cfg->vhost.s_addr = + htonl(ntohl(cfg->vnetwork.s_addr) + DEFAULT_VHOST_OFFSET); + cfg->vdhcp_start.s_addr = + htonl(ntohl(cfg->vnetwork.s_addr) + DEFAULT_VDHCPSTART_OFFSET); + cfg->vnameserver.s_addr = + htonl(ntohl(cfg->vnetwork.s_addr) + DEFAULT_VNAMESERVER_OFFSET); + cfg->recommended_vguest.s_addr = + htonl(ntohl(cfg->vnetwork.s_addr) + DEFAULT_RECOMMENDED_VGUEST_OFFSET); +finish: + return rc; +} + +static int get_interface_addr(const char *interface, int af, void *addr) +{ + struct ifaddrs *ifaddr, *ifa; + if (interface == NULL) + return -1; + + if (getifaddrs(&ifaddr) == -1) { + fprintf(stderr, "getifaddrs failed to obtain interface addresses"); + return -1; + } + + for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { + if (ifa->ifa_addr == NULL || ifa->ifa_name == NULL) + continue; + if (ifa->ifa_addr->sa_family == af) { + if (strcmp(ifa->ifa_name, interface) == 0) { + if (af == AF_INET) { + *(struct in_addr *)addr = + ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; + } else { + *(struct in6_addr *)addr = + ((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr; + } + return 0; + } + } + } + return -1; +} + +static int slirp4netns_config_from_options(struct slirp4netns_config *cfg, + struct options *opt) +{ + int rc = 0; + cfg->mtu = opt->mtu; + rc = slirp4netns_config_from_cidr(cfg, opt->cidr == NULL ? DEFAULT_CIDR : + opt->cidr); + if (rc < 0) { + goto finish; + } + cfg->enable_ipv6 = cfg->enable_ipv6; + cfg->disable_host_loopback = opt->disable_host_loopback; + cfg->enable_sandbox = opt->enable_sandbox; + cfg->enable_seccomp = opt->enable_seccomp; + +#if SLIRP_CONFIG_VERSION_MAX >= 2 + cfg->enable_outbound_addr = false; + cfg->enable_outbound_addr6 = false; +#endif + + if (opt->outbound_addr != NULL) { +#if SLIRP_CONFIG_VERSION_MAX >= 2 + cfg->outbound_addr.sin_family = AF_INET; + cfg->outbound_addr.sin_port = 0; // Any local port will do + if (inet_pton(AF_INET, opt->outbound_addr, + &cfg->outbound_addr.sin_addr) == 1) { + cfg->enable_outbound_addr = true; + } else { + if (get_interface_addr(opt->outbound_addr, AF_INET, + &cfg->outbound_addr.sin_addr) != 0) { + fprintf(stderr, "outbound-addr has to be valid ipv4 address or " + "iterface name."); + rc = -1; + goto finish; + } + cfg->enable_outbound_addr = true; + } +#else + fprintf(stderr, "slirp4netns has to be compiled against libslrip 4.2.0 " + "or newer for --outbound-addr support."); + rc = -1; + goto finish; +#endif + } + if (opt->outbound_addr6 != NULL) { +#if SLIRP_CONFIG_VERSION_MAX >= 2 + cfg->outbound_addr6.sin6_family = AF_INET6; + cfg->outbound_addr6.sin6_port = 0; // Any local port will do + if (inet_pton(AF_INET6, opt->outbound_addr6, + &cfg->outbound_addr6.sin6_addr) == 1) { + cfg->enable_outbound_addr6 = true; + } else { + if (get_interface_addr(opt->outbound_addr, AF_INET6, + &cfg->outbound_addr6.sin6_addr) != 0) { + fprintf(stderr, "outbound-addr has to be valid ipv4 address or " + "iterface name."); + rc = -1; + goto finish; + } + cfg->enable_outbound_addr6 = true; + } +#else + fprintf(stderr, "slirp4netns has to be compiled against libslirp 4.2.0 " + "or newer for --outbound-addr6 support."); + rc = -1; + goto finish; +#endif + } + +#if SLIRP_CONFIG_VERSION_MAX >= 3 + cfg->disable_dns = opt->disable_dns; +#else + if (opt->disable_dns) { + fprintf(stderr, "slirp4netns has to be compiled against libslirp 4.3.0 " + "or newer for --disable-dns support."); + rc = -1; + goto finish; + } +#endif +finish: + return rc; +} + +int main(int argc, char *const argv[]) +{ + int sv[2]; + pid_t child_pid; + struct options options; + struct slirp4netns_config slirp4netns_config; + int exit_status = 0; + + parse_args(argc, argv, &options); + if (slirp4netns_config_from_options(&slirp4netns_config, &options) < 0) { + exit_status = EXIT_FAILURE; + goto finish; + } + if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv) < 0) { + perror("socketpair"); + exit_status = EXIT_FAILURE; + goto finish; + } + if ((child_pid = fork()) < 0) { + perror("fork"); + exit_status = EXIT_FAILURE; + goto finish; + } + if (child_pid == 0) { + if (child(sv[1], options.target_pid, options.do_config_network, + options.tapname, options.netns_path, options.userns_path, + &slirp4netns_config) < 0) { + exit_status = EXIT_FAILURE; + goto finish; + } + } else { + int ret, child_wstatus, child_status; + do + ret = waitpid(child_pid, &child_wstatus, 0); + while (ret < 0 && errno == EINTR); + if (ret < 0) { + perror("waitpid"); + exit_status = EXIT_FAILURE; + goto finish; + } + if (!WIFEXITED(child_wstatus)) { + fprintf(stderr, "child failed(wstatus=%d, !WIFEXITED)\n", + child_wstatus); + exit_status = EXIT_FAILURE; + goto finish; + } + child_status = WEXITSTATUS(child_wstatus); + if (child_status != 0) { + fprintf(stderr, "child failed(%d)\n", child_status); + exit_status = child_status; + goto finish; + } + if (parent(sv[0], options.ready_fd, options.exit_fd, options.api_socket, + &slirp4netns_config, options.target_pid) < 0) { + fprintf(stderr, "parent failed\n"); + exit_status = EXIT_FAILURE; + goto finish; + } + } +finish: + options_destroy(&options); + exit(exit_status); + return 0; +} diff --git a/slirp4netns.1.md b/slirp4netns.1.md index 445aebb..7c88440 100644 --- a/slirp4netns.1.md +++ b/slirp4netns.1.md @@ -1,236 +1,265 @@ -SLIRP4NETNS 1 "March 2020" "Rootless Containers" "User Commands" -================================================== - -# NAME - -slirp4netns - User-mode networking for unprivileged network namespaces - -# SYNOPSIS - -slirp4netns [OPTION]... PID|PATH TAPNAME - -# DESCRIPTION - -slirp4netns provides user-mode networking ("slirp") for network namespaces. - -Unlike **veth**(4), slirp4netns does not require the root privileges on the host. - -Default configuration: - -* MTU: 1500 -* CIDR: 10.0.2.0/24 -* Gateway/Host: 10.0.2.2 (network address + 2) -* DNS: 10.0.2.3 (network address + 3) -* IPv6 CIDR: fd00::/64 -* IPv6 Gateway/Host: fd00::2 -* IPv6 DNS: fd00::3 - -# OPTIONS - -**-c**, **--configure** -bring up the TAP interface. IP will be set to 10.0.2.100 (network address + 100) by default. IPv6 will be set to a random address. -Starting with v0.4.0, the loopback interface (**lo**) is brought up as well. - -**-e**, **--exit-fd=FD** -specify the FD for terminating slirp4netns. -When the FD is specified, slirp4netns exits when a **poll(2)** event happens on the FD. - -**-r**, **--ready-fd=FD** -specify the FD to write to when the initialization steps are finished. -When the FD is specified, slirp4netns writes **"1"** to the FD and close the FD. -Prior to v0.4.0, the FD was written after the network configuration (**-c**) -but before the API socket configuration (**-a**). - -**-m**, **--mtu=MTU** (since v0.2.0) -specify MTU (max=65521). - -**-6**, **--enable-ipv6** (since v0.2.0, EXPERIMENTAL) -enable IPv6 - -**-a**, **--api-socket** (since v0.3.0) -API socket path - -**--cidr** (since v0.3.0) -specify CIDR, e.g. 10.0.2.0/24 - -**--disable-host-loopback** (since v0.3.0) -prohibit connecting to 127.0.0.1:\* on the host namespace - -**--netns-type=TYPE** (since v0.4.0) -specify network namespace type ([path|pid], default=pid) - -**--userns-path=PATH** (since v0.4.0) -specify user namespace path - -**--enable-sandbox** (since v0.4.0) -enter the user namespace and create a new mount namespace where only /etc and -/run are mounted from the host. - -Requires **/etc/resolv.conf** not to be a symlink to a file outside /etc and /run. - -When running as the root, the process does not enter the user namespace but all -the capabilities except `CAP_NET_BIND_SERVICE` are dropped. - -**--enable-seccomp** (since v0.4.0, EXPERIMENTAL) -enable **seccomp(2)** to limit syscalls. -Typically used in conjunction with **--enable-sandbox**. - -**-h**, **--help** (since v0.2.0) -show help and exit - -**-v**, **--version** (since v0.2.0) -show version and exit - -# EXAMPLE - -Terminal 1: Create user/network/mount namespaces -```console -$ unshare --user --map-root-user --net --mount -unshared$ echo $$ > /tmp/pid -``` - -Terminal 2: Start slirp4netns -```console -$ slirp4netns --configure --mtu=65520 $(cat /tmp/pid) tap0 -starting slirp, MTU=65520 -... -``` - -Terminal 1: Make sure **tap0** is configured and connected to the Internet -```console -unshared$ ip a -1: lo: mtu 65536 qdisc noop state DOWN group default qlen 1000 - link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 -3: tap0: mtu 65520 qdisc fq_codel state UNKNOWN group default qlen 1000 - link/ether c2:28:0c:0e:29:06 brd ff:ff:ff:ff:ff:ff - inet 10.0.2.100/24 brd 10.0.2.255 scope global tap0 - valid_lft forever preferred_lft forever - inet6 fe80::c028:cff:fe0e:2906/64 scope link - valid_lft forever preferred_lft forever -unshared$ echo "nameserver 10.0.2.3" > /tmp/resolv.conf -unshared$ mount --bind /tmp/resolv.conf /etc/resolv.conf -unshared$ curl https://example.com -``` - -Bind-mounting **/etc/resolv.conf** is only needed when **/etc/resolv.conf** on -the host refers to loopback addresses (**127.0.0.X**, typically because of -**dnsmasq**(8) or **systemd-resolved.service**(8)) that cannot be accessed from -the namespace. - -If your **/etc/resolv.conf** on the host is managed by **networkmanager**(8) -or **systemd-resolved.service**(8), you might need to mount a new filesystem on -**/etc** instead, so as to prevent the new **/etc/resolv.conf** from being -unmounted unexpectedly when **/etc/resolv.conf** on the host is regenerated. - -```console -unshared$ mkdir /tmp/a /tmp/b -unshared$ mount --rbind /etc /tmp/a -unshared$ mount --rbind /tmp/b /etc -unshared$ mkdir /etc/.ro -unshared$ mount --move /tmp/a /etc/.ro -unshared$ cd /etc -unshared$ for f in .ro/*; do ln -s $f $(basename $f); done -unshared$ rm resolv.conf -unshared$ echo "nameserver 10.0.2.3" > /tmp/resolv.conf -unshared$ curl https://example.com -``` - -# ROUTING PING PACKETS - -To route ping packets, you need to set up **net.ipv4.ping_group_range** properly -as the root. - -e.g. -```console -$ sudo sh -c "echo 0 2147483647 > /proc/sys/net/ipv4/ping_group_range" -``` - -# FILTERING CONNECTIONS - -By default, ports listening on **INADDR_LOOPBACK** (**127.0.0.1**) on the host are accessible from the child namespace via the gateway (default: **10.0.2.2**). -**--disable-host-loopback** can be used to prohibit connecting to **INADDR_LOOPBACK** on the host. - -However, a host loopback address might be still accessible via the built-in DNS (default: **10.0.2.3**) if `/etc/resolv.conf` on the host refers to a loopback address. -You may want to set up iptables for limiting access to the built-in DNS in such a case. - -```console -unshared$ iptables -A OUTPUT -d 10.0.2.3 -p udp --dport 53 -j ACCEPT -unshared$ iptables -A OUTPUT -d 10.0.2.3 -j DROP -``` - -# API SOCKET - -slirp4netns can provide QMP-like API server over an UNIX socket file: - -```console -$ slirp4netns --api-socket /tmp/slirp4netns.sock ... -``` - -**add_hostfwd**: Expose a port (IPv4 only) - -```console -$ json='{"execute": "add_hostfwd", "arguments": {"proto": "tcp", "host_addr": "0.0.0.0", "host_port": 8080, "guest_addr": "10.0.2.100", "guest_port": 80}}' -$ echo -n $json | nc -U /tmp/slirp4netns.sock -{ "return": {"id": 42}} -``` - -If **host_addr** is not specified, then it defaults to "0.0.0.0". - -If **guest_addr** is not specified, then it will be set to the default address that corresponds to --configure. - -**list_hostfwd**: List exposed ports - -```console -$ json='{"execute": "list_hostfwd"}' -$ echo -n $json | nc -U /tmp/slirp4netns.sock -{ "return": {"entries": [{"id": 42, "proto": "tcp", "host_addr": "0.0.0.0", "host_port": 8080, "guest_addr": "10.0.2.100", "guest_port": 80}]}} -``` - -**remove_hostfwd**: Remove an exposed port - -```console -$ json='{"execute": "remove_hostfwd", "arguments": {"id": 42}}' -$ echo -n $json | nc -U /tmp/slirp4netns.sock -{ "return": {}} -``` - -Remarks: - -* Client needs to **shutdown(2)** the socket with **SHUT_WR** after sending every request. - i.e. No support for keep-alive and timeout. -* slirp4netns "stops the world" during processing API requests. -* A request must be less than 4096 bytes. -* JSON responses may contain **error** instead of **return**. - -# DEFINED NAMESPACE PATHS -A user can define a network namespace path as opposed to the default process ID: - -```console -$ slirp4netns --netns-type=path ... /path/to/netns tap0 -``` -Currently, the **netns-type=TYPE** argument supports **path** or **pid** args with the default being **pid**. - -Additionally, a **--userns-path=PATH** argument can be included to override any user namespace path defaults -```console -$ slirp4netns --netns-type=path --userns-path=/path/to/userns /path/to/netns tap0 -``` - -# BUGS - -Kernel 4.20 bumped up the default value of **/proc/sys/net/ipv4/tcp_rmem** from 87380 to 131072. -This is known to slow down slirp4netns port forwarding: **https://github.com/rootless-containers/slirp4netns/issues/128**. - -As a workaround, you can adjust the value of **/proc/sys/net/ipv4/tcp_rmem** inside the namespace. -No real root privilege is needed to modify the file since kernel 4.15. - -```console -unshared$ c=$(cat /proc/sys/net/ipv4/tcp_rmem); echo $c | sed -e s/131072/87380/g > /proc/sys/net/ipv4/tcp_rmem -``` - -# SEE ALSO - -**network_namespaces**(7), **user_namespaces**(7), **veth**(4) - -# AVAILABILITY - -The slirp4netns command is available from **https://github.com/rootless-containers/slirp4netns** under GNU GENERAL PUBLIC LICENSE Version 2 (or later). +SLIRP4NETNS 1 "March 2020" "Rootless Containers" "User Commands" +================================================== + +# NAME + +slirp4netns - User-mode networking for unprivileged network namespaces + +# SYNOPSIS + +slirp4netns [OPTION]... PID|PATH TAPNAME + +# DESCRIPTION + +slirp4netns provides user-mode networking ("slirp") for network namespaces. + +Unlike **veth**(4), slirp4netns does not require the root privileges on the host. + +Default configuration: + +* MTU: 1500 +* CIDR: 10.0.2.0/24 +* Gateway/Host: 10.0.2.2 (network address + 2) +* DNS: 10.0.2.3 (network address + 3) +* IPv6 CIDR: fd00::/64 +* IPv6 Gateway/Host: fd00::2 +* IPv6 DNS: fd00::3 + +# OPTIONS + +**-c**, **--configure** +bring up the TAP interface. IP will be set to 10.0.2.100 (network address + 100) by default. IPv6 will be set to a random address. +Starting with v0.4.0, the loopback interface (**lo**) is brought up as well. + +**-e**, **--exit-fd=FD** +specify the FD for terminating slirp4netns. +When the FD is specified, slirp4netns exits when a **poll(2)** event happens on the FD. + +**-r**, **--ready-fd=FD** +specify the FD to write to when the initialization steps are finished. +When the FD is specified, slirp4netns writes **"1"** to the FD and close the FD. +Prior to v0.4.0, the FD was written after the network configuration (**-c**) +but before the API socket configuration (**-a**). + +**-m**, **--mtu=MTU** (since v0.2.0) +specify MTU (max=65521). + +**-6**, **--enable-ipv6** (since v0.2.0, EXPERIMENTAL) +enable IPv6 + +**-a**, **--api-socket** (since v0.3.0) +API socket path + +**--cidr** (since v0.3.0) +specify CIDR, e.g. 10.0.2.0/24 + +**--disable-host-loopback** (since v0.3.0) +prohibit connecting to 127.0.0.1:\* on the host namespace + +**--netns-type=TYPE** (since v0.4.0) +specify network namespace type ([path|pid], default=pid) + +**--userns-path=PATH** (since v0.4.0) +specify user namespace path + +**--enable-sandbox** (since v0.4.0) +enter the user namespace and create a new mount namespace where only /etc and +/run are mounted from the host. + +Requires **/etc/resolv.conf** not to be a symlink to a file outside /etc and /run. + +When running as the root, the process does not enter the user namespace but all +the capabilities except `CAP_NET_BIND_SERVICE` are dropped. + +**--enable-seccomp** (since v0.4.0, EXPERIMENTAL) +enable **seccomp(2)** to limit syscalls. +Typically used in conjunction with **--enable-sandbox**. + +**--outbound-addr=IPv4** (since v1.1.0, EXPERIMENTAL) +specify outbound ipv4 address slirp should bind to + +**--outbound-addr=INTERFACE** (since v1.1.0, EXPERIMENTAL) +specify outbound interface slirp should bind to (ipv4 traffic only) + +**--outbound-addr=IPv6** (since v1.1.0, EXPERIMENTAL) +specify outbound ipv6 address slirp should bind to + +**--outbound-addr6=INTERFACE** (since v1.1.0, EXPERIMENTAL) +specify outbound interface slirp should bind to (ipv6 traffic only) + +**--disable-dns** (since v1.1.0) +disable built-in DNS (10.0.2.3 by default) + +**-h**, **--help** (since v0.2.0) +show help and exit + +**-v**, **--version** (since v0.2.0) +show version and exit + + +# EXAMPLE + +Terminal 1: Create user/network/mount namespaces +```console +$ unshare --user --map-root-user --net --mount +unshared$ echo $$ > /tmp/pid +``` + +Terminal 2: Start slirp4netns +```console +$ slirp4netns --configure --mtu=65520 $(cat /tmp/pid) tap0 +starting slirp, MTU=65520 +... +``` + +Terminal 1: Make sure **tap0** is configured and connected to the Internet +```console +unshared$ ip a +1: lo: mtu 65536 qdisc noop state DOWN group default qlen 1000 + link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 +3: tap0: mtu 65520 qdisc fq_codel state UNKNOWN group default qlen 1000 + link/ether c2:28:0c:0e:29:06 brd ff:ff:ff:ff:ff:ff + inet 10.0.2.100/24 brd 10.0.2.255 scope global tap0 + valid_lft forever preferred_lft forever + inet6 fe80::c028:cff:fe0e:2906/64 scope link + valid_lft forever preferred_lft forever +unshared$ echo "nameserver 10.0.2.3" > /tmp/resolv.conf +unshared$ mount --bind /tmp/resolv.conf /etc/resolv.conf +unshared$ curl https://example.com +``` + +Bind-mounting **/etc/resolv.conf** is only needed when **/etc/resolv.conf** on +the host refers to loopback addresses (**127.0.0.X**, typically because of +**dnsmasq**(8) or **systemd-resolved.service**(8)) that cannot be accessed from +the namespace. + +If your **/etc/resolv.conf** on the host is managed by **networkmanager**(8) +or **systemd-resolved.service**(8), you might need to mount a new filesystem on +**/etc** instead, so as to prevent the new **/etc/resolv.conf** from being +unmounted unexpectedly when **/etc/resolv.conf** on the host is regenerated. + +```console +unshared$ mkdir /tmp/a /tmp/b +unshared$ mount --rbind /etc /tmp/a +unshared$ mount --rbind /tmp/b /etc +unshared$ mkdir /etc/.ro +unshared$ mount --move /tmp/a /etc/.ro +unshared$ cd /etc +unshared$ for f in .ro/*; do ln -s $f $(basename $f); done +unshared$ rm resolv.conf +unshared$ echo "nameserver 10.0.2.3" > /tmp/resolv.conf +unshared$ curl https://example.com +``` + +# ROUTING PING PACKETS + +To route ping packets, you need to set up **net.ipv4.ping_group_range** properly +as the root. + +e.g. +```console +$ sudo sh -c "echo 0 2147483647 > /proc/sys/net/ipv4/ping_group_range" +``` + +# FILTERING CONNECTIONS + +By default, ports listening on **INADDR_LOOPBACK** (**127.0.0.1**) on the host are accessible from the child namespace via the gateway (default: **10.0.2.2**). +**--disable-host-loopback** can be used to prohibit connecting to **INADDR_LOOPBACK** on the host. + +However, a host loopback address might be still accessible via the built-in DNS (default: **10.0.2.3**) if `/etc/resolv.conf` on the host refers to a loopback address. +You may want to set up iptables for limiting access to the built-in DNS in such a case. + +```console +unshared$ iptables -A OUTPUT -d 10.0.2.3 -p udp --dport 53 -j ACCEPT +unshared$ iptables -A OUTPUT -d 10.0.2.3 -j DROP +``` + +# API SOCKET + +slirp4netns can provide QMP-like API server over an UNIX socket file: + +```console +$ slirp4netns --api-socket /tmp/slirp4netns.sock ... +``` + +**add_hostfwd**: Expose a port (IPv4 only) + +```console +$ json='{"execute": "add_hostfwd", "arguments": {"proto": "tcp", "host_addr": "0.0.0.0", "host_port": 8080, "guest_addr": "10.0.2.100", "guest_port": 80}}' +$ echo -n $json | nc -U /tmp/slirp4netns.sock +{ "return": {"id": 42}} +``` + +If **host_addr** is not specified, then it defaults to "0.0.0.0". + +If **guest_addr** is not specified, then it will be set to the default address that corresponds to --configure. + +**list_hostfwd**: List exposed ports + +```console +$ json='{"execute": "list_hostfwd"}' +$ echo -n $json | nc -U /tmp/slirp4netns.sock +{ "return": {"entries": [{"id": 42, "proto": "tcp", "host_addr": "0.0.0.0", "host_port": 8080, "guest_addr": "10.0.2.100", "guest_port": 80}]}} +``` + +**remove_hostfwd**: Remove an exposed port + +```console +$ json='{"execute": "remove_hostfwd", "arguments": {"id": 42}}' +$ echo -n $json | nc -U /tmp/slirp4netns.sock +{ "return": {}} +``` + +Remarks: + +* Client needs to **shutdown(2)** the socket with **SHUT_WR** after sending every request. + i.e. No support for keep-alive and timeout. +* slirp4netns "stops the world" during processing API requests. +* A request must be less than 4096 bytes. +* JSON responses may contain **error** instead of **return**. + +# DEFINED NAMESPACE PATHS +A user can define a network namespace path as opposed to the default process ID: + +```console +$ slirp4netns --netns-type=path ... /path/to/netns tap0 +``` +Currently, the **netns-type=TYPE** argument supports **path** or **pid** args with the default being **pid**. + +Additionally, a **--userns-path=PATH** argument can be included to override any user namespace path defaults +```console +$ slirp4netns --netns-type=path --userns-path=/path/to/userns /path/to/netns tap0 +``` + +# OUTBOUND ADDRESSES +A user can defined preferred outbound ipv4 and ipv6 address in multi IP scenarios. + +```console +$ slirp4netns --outbound-addr=10.2.2.10 --outbound-addr6=fe80::10 ... +``` + +Optionally you can use interface names instead of ip addresses. + +```console +$ slirp4netns --outbound-addr=eth0 --outbound-addr6=eth0 ... +``` + +# BUGS + +Kernel 4.20 bumped up the default value of **/proc/sys/net/ipv4/tcp_rmem** from 87380 to 131072. +This is known to slow down slirp4netns port forwarding: **https://github.com/rootless-containers/slirp4netns/issues/128**. + +As a workaround, you can adjust the value of **/proc/sys/net/ipv4/tcp_rmem** inside the namespace. +No real root privilege is needed to modify the file since kernel 4.15. + +```console +unshared$ c=$(cat /proc/sys/net/ipv4/tcp_rmem); echo $c | sed -e s/131072/87380/g > /proc/sys/net/ipv4/tcp_rmem +``` + +# SEE ALSO + +**network_namespaces**(7), **user_namespaces**(7), **veth**(4) + +# AVAILABILITY + +The slirp4netns command is available from **https://github.com/rootless-containers/slirp4netns** under GNU GENERAL PUBLIC LICENSE Version 2 (or later). diff --git a/slirp4netns.c b/slirp4netns.c index 4b16af7..8748808 100644 --- a/slirp4netns.c +++ b/slirp4netns.c @@ -275,6 +275,24 @@ Slirp *create_slirp(void *opaque, struct slirp4netns_config *s4nn) cfg.if_mtu = s4nn->mtu; cfg.if_mru = s4nn->mtu; cfg.disable_host_loopback = s4nn->disable_host_loopback; +#if SLIRP_CONFIG_VERSION_MAX >= 2 + cfg.outbound_addr = NULL; + cfg.outbound_addr6 = NULL; + if (s4nn->enable_outbound_addr) { + cfg.version = 2; + cfg.outbound_addr = &s4nn->outbound_addr; + } + if (s4nn->enable_outbound_addr6) { + cfg.version = 2; + cfg.outbound_addr6 = &s4nn->outbound_addr6; + } +#endif +#if SLIRP_CONFIG_VERSION_MAX >= 3 + if (s4nn->disable_dns) { + cfg.version = 3; + cfg.disable_dns = true; + } +#endif slirp = slirp_new(&cfg, &libslirp_cb, opaque); if (slirp == NULL) { fprintf(stderr, "slirp_new failed\n"); diff --git a/slirp4netns.h b/slirp4netns.h index a5df49d..afe6065 100644 --- a/slirp4netns.h +++ b/slirp4netns.h @@ -1,21 +1,32 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef SLIRP4NETNS_H -# define SLIRP4NETNS_H +#define SLIRP4NETNS_H #include struct slirp4netns_config { - unsigned int mtu; - struct in_addr vnetwork; // 10.0.2.0 - struct in_addr vnetmask; // 255.255.255.0 - struct in_addr vhost; // 10.0.2.2 - struct in_addr vdhcp_start; // 10.0.2.15 - struct in_addr vnameserver; // 10.0.2.3 - struct in_addr recommended_vguest; // 10.0.2.100 (slirp itself is unaware of vguest) - bool enable_ipv6; - bool disable_host_loopback; - bool enable_sandbox; - bool enable_seccomp; + unsigned int mtu; + struct in_addr vnetwork; // 10.0.2.0 + struct in_addr vnetmask; // 255.255.255.0 + struct in_addr vhost; // 10.0.2.2 + struct in_addr vdhcp_start; // 10.0.2.15 + struct in_addr vnameserver; // 10.0.2.3 + struct in_addr + recommended_vguest; // 10.0.2.100 (slirp itself is unaware of vguest) + bool enable_ipv6; + bool disable_host_loopback; + bool enable_sandbox; + bool enable_seccomp; +#if SLIRP_CONFIG_VERSION_MAX >= 2 + bool enable_outbound_addr; + struct sockaddr_in outbound_addr; + bool enable_outbound_addr6; + struct sockaddr_in6 outbound_addr6; +#endif +#if SLIRP_CONFIG_VERSION_MAX >= 3 + bool disable_dns; +#endif }; -int do_slirp(int tapfd, int readyfd, int exitfd, const char *api_socket, struct slirp4netns_config *cfg); +int do_slirp(int tapfd, int readyfd, int exitfd, const char *api_socket, + struct slirp4netns_config *cfg); #endif diff --git a/tests/common.sh b/tests/common.sh index 5f94e89..c7f9aa7 100755 --- a/tests/common.sh +++ b/tests/common.sh @@ -50,6 +50,18 @@ function wait_for_ping_connectivity { done } +function wait_for_connectivity { + COUNTER=0 + while [ $COUNTER -lt 40 ]; do + if echo "wait_for_connectivity" | nsenter --preserve-credentials -U -n --target=$1 ncat -v $2 $3; then + break + else + sleep 0.5 + fi + let COUNTER=COUNTER+1 + done +} + function wait_for_file_content { # Wait for a file to get the specified content. COUNTER=0 diff --git a/tests/test-slirp4netns-disable-dns.sh b/tests/test-slirp4netns-disable-dns.sh new file mode 100755 index 0000000..c148f3d --- /dev/null +++ b/tests/test-slirp4netns-disable-dns.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -xeuo pipefail + +SLIRP_CONFIG_VERSION_MAX=$(slirp4netns -v | grep "SLIRP_CONFIG_VERSION_MAX: " | sed 's#SLIRP_CONFIG_VERSION_MAX: \(\)##') + +if [ "${SLIRP_CONFIG_VERSION_MAX:-0}" -lt 3 ]; then + printf "'--disable-dns' requires SLIRP_CONFIG_VERSION_MAX 3 or newer. Test skipped..." + exit 0 +fi + +. $(dirname $0)/common.sh + +port=53 +unshare -r -n sleep infinity & +child=$! + +wait_for_network_namespace $child + +mtu=${MTU:=1500} +slirp4netns -c --mtu $mtu --disable-dns $child tun11 & +slirp_pid=$! + +wait_for_network_device $child tun11 +# ping to 10.0.2.2 +wait_for_ping_connectivity $child 10.0.2.2 + +function cleanup() { + kill -9 $child $slirp_pid +} +trap cleanup EXIT + +set +e +err=$(echo "should fail" | nsenter --preserve-credentials -U -n --target=$child ncat -v 10.0.2.3 $port 2>&1) +set -e +echo $err | grep "Connection timed out" diff --git a/tests/test-slirp4netns-outbound-addr.sh b/tests/test-slirp4netns-outbound-addr.sh new file mode 100755 index 0000000..4ad1947 --- /dev/null +++ b/tests/test-slirp4netns-outbound-addr.sh @@ -0,0 +1,53 @@ +#!/bin/bash +set -xeuo pipefail + +SLIRP_CONFIG_VERSION_MAX=$(slirp4netns -v | grep "SLIRP_CONFIG_VERSION_MAX: " | sed 's#SLIRP_CONFIG_VERSION_MAX: \(\)##') + +if [ "${SLIRP_CONFIG_VERSION_MAX:-0}" -lt 2 ]; then + printf "'--disable-dns' requires SLIRP_CONFIG_VERSION_MAX 2 or newer. Test skipped..." + exit 0 +fi + +. $(dirname $0)/common.sh + +IPv4_1="127.0.0.1" +IPv4_2=$(ip a | sed -En 's/127.0.0.1//;s/.*inet (addr:)?(([0-9]*\.){3}[0-9]*).*/\2/p' | head -n 1) + +# For future ipv6 tests +#IPv6_1="::1" +#IPv6_2=$(ip a | sed -En 's/::1\/128//;s/.*inet6 (addr:)?([^ ]*)\/.*$/\2/p' | head -n 1) + +function cleanup() { + rm -rf ncat.log + kill -9 $child $slirp_pid || exit 0 +} +trap cleanup EXIT + +port=12122 +mtu=${MTU:=1500} + +IPs=("$IPv4_1" "$IPv4_2") +for ip in "${IPs[@]}"; do + ncat -l $port -v >ncat.log 2>&1 & + ncat1=$! + + unshare -r -n sleep infinity & + child=$! + + wait_for_network_namespace $child + + slirp4netns -c --mtu $mtu --outbound-addr="$ip" $child tun11 & + slirp_pid=$! + + wait_for_network_device $child tun11 + + wait_for_connectivity $child 10.0.2.2 $port + + wait_process_exits $ncat1 + if ! grep "$ip" ncat.log; then + printf "%s not found in ncat.log" "$ip" + exit 1 + fi + cleanup + let port=port+1 +done