Skip to content

Commit

Permalink
cilium: Use cilium-envoy-starter to run Envoy without privileges
Browse files Browse the repository at this point in the history
Introduce a `cilium-envoy-starter` that forks and execs `cilium-envoy`
after having dropped all privileges.

Perform bpf map and privileged socket option operations in
`cilium-envoy-starter`, as requested by Cilium filters running in
`cilium-envoy` via a pipe between the two. Currently the protocol over
this pipe supports only the following operations:

- dump capabilities cilium-envoy-starter is running with
- open bpf maps
- perform bpf map lookups
- set socket options

cilium-envoy-starter fails to start if it does not have adequate
privileges needed for the above operations.

cilium-envoy now exits if it is running with any privileges when Cilium
filters are first configured.

Implementation detail:

- libcap is not used, as it would be hard to cross-compile for both arm64
  and amd64. Fortunately we only need to drop capabilities, so using the
  syscall interface is not complicated.

- Patch Envoy to support setting socket options from listener filters and
  use them to set privileged options for the listener.

- Remove the unused support for creating and manipulating bpf maps.

- Remove setrlimit call that is only needed if the process is creating
  bpf maps.

- Minor cleanup in socket option implementation to move unused fields
  from SocketMarkOption class to SocketOption where they are used.

Signed-off-by: Jarno Rajahalme <jarno@isovalent.com>
  • Loading branch information
jrajahalme committed Sep 4, 2023
1 parent 35a929a commit 9a0da50
Show file tree
Hide file tree
Showing 20 changed files with 1,110 additions and 154 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
/bazel-*

/cilium-envoy
/cilium-envoy-starter
/Dockerfile.istio_proxy
/Dockerfile.builder-refresh

Expand Down
8 changes: 8 additions & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ exports_files([
"linux/type_mapper.h",
])

envoy_cc_binary(
name = "cilium-envoy-starter",
deps = [
"//starter:main_entry_lib",
],
repository = "@envoy",
)

envoy_cc_binary(
name = "cilium-envoy",
repository = "@envoy",
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ RUN --mount=target=/root/.cache,type=cache,id=$TARGETARCH,sharing=private if [ "
#
# Build dependencies
#
RUN --mount=target=/root/.cache,type=cache,id=$TARGETARCH,sharing=private BAZEL_BUILD_OPTS="${BAZEL_BUILD_OPTS} --disk_cache=/tmp/bazel-cache" PKG_BUILD=1 V=$V DEBUG=$DEBUG DESTDIR=/tmp/install make bazel-bin/cilium-envoy
RUN --mount=target=/root/.cache,type=cache,id=$TARGETARCH,sharing=private BAZEL_BUILD_OPTS="${BAZEL_BUILD_OPTS} --disk_cache=/tmp/bazel-cache" PKG_BUILD=1 V=$V DEBUG=$DEBUG DESTDIR=/tmp/install make bazel-bin/cilium-envoy-starter bazel-bin/cilium-envoy

# By default this stage picks up the result of the build above, but ARCHIVE_IMAGE can be
# overridden to point to a saved image of an earlier run of that stage.
Expand Down
17 changes: 13 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ include Makefile.defs

COMPILER_DEP := clang.bazelrc

ENVOY_BINS = cilium-envoy bazel-bin/cilium-envoy
ENVOY_BINS = cilium-envoy bazel-bin/cilium-envoy cilium-envoy-starter bazel-bin/cilium-envoy-starter
ENVOY_TESTS = bazel-bin/tests/*_test

SHELL=/bin/bash -o pipefail
Expand Down Expand Up @@ -61,7 +61,7 @@ else
endif

ifdef PKG_BUILD
all: cilium-envoy
all: cilium-envoy-starter cilium-envoy
else
include Makefile.dev
include Makefile.docker
Expand Down Expand Up @@ -102,6 +102,13 @@ bazel-bin/cilium-envoy: $(COMPILER_DEP) SOURCE_VERSION
cilium-envoy: bazel-bin/cilium-envoy
mv $< $@

bazel-bin/cilium-envoy-starter: $(COMPILER_DEP) SOURCE_VERSION
@$(ECHO_BAZEL)
$(BAZEL) $(BAZEL_OPTS) build $(BAZEL_BUILD_OPTS) //:cilium-envoy-starter $(BAZEL_FILTER)

cilium-envoy-starter: bazel-bin/cilium-envoy-starter
mv $< $@

BAZEL_CACHE := $(subst --disk_cache=,,$(filter --disk_cache=%, $(BAZEL_BUILD_OPTS)))

GLIBC_VERSION ?= $(shell ldd --version | sed -n 's/.*GLIBC \([0-9.]\+\).*/\1/p')
Expand All @@ -116,12 +123,14 @@ $(DESTDIR)$(GLIBC_DIR): bazel-bin/cilium-envoy
$(SUDO) cp /usr/$${ARCH_TAG}-linux-gnu/lib/$$lib $@; \
done

install: bazel-bin/cilium-envoy
install: bazel-bin/cilium-envoy-starter bazel-bin/cilium-envoy
$(SUDO) $(INSTALL) -m 0755 -d $(DESTDIR)$(BINDIR)
$(SUDO) $(INSTALL) -m 0755 -T $< $(DESTDIR)$(BINDIR)/cilium-envoy
$(SUDO) $(INSTALL) -m 0755 -T bazel-bin/cilium-envoy-starter $(DESTDIR)$(BINDIR)/cilium-envoy-starter
$(SUDO) $(INSTALL) -m 0755 -T bazel-bin/cilium-envoy $(DESTDIR)$(BINDIR)/cilium-envoy

install-glibc: install $(DESTDIR)$(GLIBC_DIR)
LD_LINUX=$$(basename $$(patchelf --print-interpreter bazel-bin/cilium-envoy)); \
$(SUDO) patchelf --set-interpreter $(GLIBC_DIR)/$${LD_LINUX} --set-rpath $(GLIBC_DIR) $(DESTDIR)$(BINDIR)/cilium-envoy-starter
$(SUDO) patchelf --set-interpreter $(GLIBC_DIR)/$${LD_LINUX} --set-rpath $(GLIBC_DIR) $(DESTDIR)$(BINDIR)/cilium-envoy

# Remove the binaries
Expand Down
1 change: 1 addition & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ git_repository(
"@//patches:0002-upstream-Add-callback-for-upstream-authorization.patch",
"@//patches:0003-tcp_proxy-Add-filter-state-proxy_read_before_connect.patch",
"@//patches:0004-router-Do-not-set-SNI-or-SAN-due-to-auto_sni-or-auto.patch",
"@//patches:0005-listener-add-socket-options.patch",
],
)

Expand Down
1 change: 1 addition & 0 deletions cilium/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ envoy_cc_library(
],
repository = "@envoy",
deps = [
"//starter:privileged_service_client_lib",
"@envoy//source/common/common:logger_lib",
"@envoy//source/common/common:utility_lib",
],
Expand Down
93 changes: 10 additions & 83 deletions cilium/bpf.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,5 @@
#include "cilium/bpf.h"

#include <string.h>
#include <sys/resource.h>
#include <unistd.h>

#include <cstdint>
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>
#include "starter/privileged_service_client.h"

#include "source/common/common/utility.h"

Expand All @@ -24,10 +13,7 @@ enum {
};

Bpf::Bpf(uint32_t map_type, uint32_t key_size, uint32_t value_size)
: fd_(-1), map_type_(map_type), key_size_(key_size), value_size_(value_size) {
struct rlimit rl = {RLIM_INFINITY, RLIM_INFINITY};
setrlimit(RLIMIT_MEMLOCK, &rl);
}
: fd_(-1), map_type_(map_type), key_size_(key_size), value_size_(value_size) {}

Bpf::~Bpf() { close(); }

Expand All @@ -39,10 +25,10 @@ void Bpf::close() {

bool Bpf::open(const std::string& path) {
bool log_on_error = ENVOY_LOG_CHECK_LEVEL(trace);
union bpf_attr attr = {};
attr.pathname = uintptr_t(path.c_str());

fd_ = bpfSyscall(BPF_OBJ_GET, &attr);
auto& cilium_calls = PrivilegedService::Singleton::get();
auto ret = cilium_calls.bpf_open(path.c_str());
fd_ = ret.return_value_;
if (fd_ >= 0) {
// Open fdinfo to check the map type and key and value size.
std::string line;
Expand Down Expand Up @@ -98,79 +84,20 @@ bool Bpf::open(const std::string& path) {
bpf_file_path);
}
close();
} else if (errno == ENOENT && log_on_error) {
} else if (ret.errno_ == ENOENT && log_on_error) {
ENVOY_LOG(debug, "cilium.bpf_metadata: bpf syscall for map {} failed: {}", path,
Envoy::errorDetails(errno));
Envoy::errorDetails(ret.errno_));
} else if (log_on_error) {
ENVOY_LOG(warn, "cilium.bpf_metadata: bpf syscall for map {} failed: {}", path,
Envoy::errorDetails(errno));
Envoy::errorDetails(ret.errno_));
}

return false;
}

bool Bpf::create(uint32_t max_entries, uint32_t flags) {
union bpf_attr attr = {};
attr.map_type = map_type_;
attr.key_size = key_size_;
attr.value_size = value_size_;
attr.max_entries = max_entries;
attr.map_flags = flags;

fd_ = bpfSyscall(BPF_MAP_CREATE, &attr);
return fd_ >= 0;
}

bool Bpf::pin(const std::string& path) {
union bpf_attr attr = {};
attr.pathname = uintptr_t(path.c_str());
attr.bpf_fd = uint32_t(fd_);

return bpfSyscall(BPF_OBJ_PIN, &attr) == 0;
}

bool Bpf::insert(const void* key, const void* value) {
union bpf_attr attr = {};
attr.map_fd = uint32_t(fd_);
attr.key = uintptr_t(key);
attr.value = uintptr_t(value);
attr.flags = BPF_ANY;

return bpfSyscall(BPF_MAP_UPDATE_ELEM, &attr) == 0;
}

bool Bpf::remove(const void* key) {
union bpf_attr attr = {};
attr.map_fd = uint32_t(fd_);
attr.key = uintptr_t(key);
attr.flags = BPF_ANY;

return bpfSyscall(BPF_MAP_DELETE_ELEM, &attr) == 0;
}

bool Bpf::lookup(const void* key, void* value) {
union bpf_attr attr = {};
attr.map_fd = uint32_t(fd_);
attr.key = uintptr_t(key);
attr.value = uintptr_t(value);

return bpfSyscall(BPF_MAP_LOOKUP_ELEM, &attr) == 0;
}

#ifndef __NR_bpf
#if defined(__i386__)
#define __NR_bpf 357
#elif defined(__x86_64__)
#define __NR_bpf 321
#elif defined(__aarch64__)
#define __NR_bpf 280
#else
#error __NR_bpf not defined.
#endif
#endif

int Bpf::bpfSyscall(int cmd, union bpf_attr* attr) {
return ::syscall(__NR_bpf, cmd, attr, sizeof(*attr));
auto& cilium_calls = PrivilegedService::Singleton::get();
return cilium_calls.bpf_lookup(fd_, key, key_size_, value, value_size_).return_value_ == 0;
}

} // namespace Cilium
Expand Down
36 changes: 0 additions & 36 deletions cilium/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,39 +46,6 @@ class Bpf : public Logger::Loggable<Logger::Id::filter> {
*/
bool open(const std::string& path);

/**
* Create a new bpf map.
* @param max_entries the maximum capacity of the bpf map to be created. For
* many map types memory for this number of entries is allocated when the map
* is created.
* @param flags the required flags for the map type. Typically 0.
* @returns boolean for success of the operation.
*/
bool create(uint32_t max_entries, uint32_t flags);

/**
* Pin the map to a file system path.
* @param path the file system path where to pin the bpf map.
* @returns boolean for success of the operation.
*/
bool pin(const std::string& path);

/**
* Insert an entry with value and identified with the key to the map.
* @param key pointer to the key identifying the new entry to be inserted.
* @param value pointer to the value to be stored in the new entry to be
* inserted.
* @returns boolean for success of the operation.
*/
bool insert(const void* key, const void* value);

/**
* Delete the entry identified with the key from the map, if it exists.
* @param key pointer to the key identifying the new entry to be inserted.
* @returns boolean for success of the operation.
*/
bool remove(const void* key);

/**
* Lookup an entry from the bpf map identified with the key, storing the found
* value, if any.
Expand All @@ -88,9 +55,6 @@ class Bpf : public Logger::Loggable<Logger::Id::filter> {
*/
bool lookup(const void* key, void* value);

private:
int bpfSyscall(int cmd, union bpf_attr* attr);

protected:
int fd_;

Expand Down
14 changes: 8 additions & 6 deletions cilium/bpf_metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ class BpfMetadataConfigFactory : public NamedListenerFilterConfigFactory {
MessageUtil::downcastAndValidate<const ::cilium::BpfMetadata&>(
proto_config, context.messageValidationVisitor()),
context);
// Set the socket mark option for the listen socket.
// Can use identity 0 on the listen socket option, as the bpf datapath is only interested
// in whether the proxy is ingress, egress, or if there is no proxy at all.
std::shared_ptr<Envoy::Network::Socket::Options> options = std::make_shared<Envoy::Network::Socket::Options>();
uint32_t mark = (config->is_ingress_) ? 0x0A00 : 0x0B00;
options->push_back(std::make_shared<Cilium::SocketMarkOption>(mark, 0));
context.addListenSocketOptions(options);

return [listener_filter_matcher,
config](Network::ListenerFilterManager& filter_manager) mutable -> void {
filter_manager.addAcceptFilter(listener_filter_matcher,
Expand Down Expand Up @@ -319,12 +327,6 @@ bool Config::getMetadata(Network::ConnectionSocket& socket) {
src_address = nullptr;
}

// Add transparent options if either original or explicitly set source address is used
if (src_address || ipv4_source_address || ipv6_source_address) {
socket.addOptions(Network::SocketOptionFactory::buildIpTransparentOptions());
socket.addOptions(Network::SocketOptionFactory::buildReusePortOptions());
}

// Add metadata for policy based listener filter chain matching.
// This requires the TLS inspector, if used, to run before us.
// Note: This requires egress policy be known before upstream host selection,
Expand Down
2 changes: 1 addition & 1 deletion cilium/network_filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ Network::FilterStatus Instance::onNewConnection() {

// Pass metadata from tls_inspector to the filterstate, if any & not already
// set via upstream cluster config, but not in a sidecar, which have no mark
if (sni != "" && option->mark_ != 0) {
if (sni != "" && !option->isSidecar()) {
auto filterState = conn.streamInfo().filterState();
auto have_sni =
filterState->hasData<Network::UpstreamServerName>(Network::UpstreamServerName::key());
Expand Down
Loading

0 comments on commit 9a0da50

Please sign in to comment.