Skip to content

Commit

Permalink
Merge pull request #2767 from BenTheElder/systemd-entrypoint
Browse files Browse the repository at this point in the history
entrypoint cleanup + non-systemd-host fix
  • Loading branch information
k8s-ci-robot authored May 19, 2022
2 parents 440899b + 4175f82 commit 1e2f525
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 47 deletions.
3 changes: 2 additions & 1 deletion images/base/files/etc/systemd/system/kubelet.service
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ Restart=always
StartLimitInterval=0
# NOTE: kind deviates from upstream here with a lower RestartSec
RestartSec=1s
# and here
# And by adding the [Service] lines below
CPUAccounting=true
MemoryAccounting=true
Slice=kubelet.slice

[Install]
WantedBy=multi-user.target
7 changes: 7 additions & 0 deletions images/base/files/etc/systemd/system/kubelet.slice
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[Unit]
Description=slice used to run Kubernetes / Kubelet
Before=slices.target

[Slice]
MemoryAccounting=true
CPUAccounting=true
94 changes: 50 additions & 44 deletions images/base/files/usr/local/bin/entrypoint
Original file line number Diff line number Diff line change
Expand Up @@ -55,43 +55,43 @@ validate_userns() {
}

overlayfs_preferrable() {
if [[ -z "$userns" ]]; then
# If we are outside userns, we can always assume overlayfs is preferrable
return 0
fi

# Debian 10 and 11 supports overlayfs in userns with a "permit_mount_in_userns" kernel patch,
# but known to be unstable, so we avoid using it https://github.com/moby/moby/issues/42302
if [[ -e "/sys/module/overlay/parameters/permit_mounts_in_userns" ]]; then
echo "INFO: UserNS: kernel seems supporting overlayfs with permit_mounts_in_userns, but avoiding due to instability."
return 1
fi

# Check overlayfs availability, by attempting to mount it.
#
# Overlayfs inside userns is known to be available for the following environments:
# - Kernel >= 5.11 (but 5.11 and 5.12 have issues on SELinux hosts. Fixed in 5.13.)
# - Ubuntu kernel
# - Debian kernel (but avoided due to instability, see the /sys/module/overlay/... check above)
# - Sysbox
tmp=$(mktemp -d)
mkdir -p "${tmp}/l" "${tmp}/u" "${tmp}/w" "${tmp}/m"
if ! mount -t overlay -o lowerdir="${tmp}/l,upperdir=${tmp}/u,workdir=${tmp}/w" overlay "${tmp}/m"; then
echo "INFO: UserNS: kernel does not seem to support overlayfs."
rm -rf "${tmp}"
return 1
fi
umount "${tmp}/m"
rm -rf "${tmp}"

# Detect whether SELinux is Enforcing (or Permitted) by grepping /proc/self/attr/current .
# Note that we cannot use `getenforce` command here because /sys/fs/selinux is typically not mounted for containers.
if grep -q "_t:" "/proc/self/attr/current"; then
# When the kernel is before v5.13 and SELinux is enforced, fuse-overlayfs might be safer, so we print a warning (but not an error).
# https://github.com/torvalds/linux/commit/7fa2e79a6bb924fa4b2de5766dab31f0f47b5ab6
echo "WARN: UserNS: SELinux might be Enforcing. If you see an error related to overlayfs, try setting \`KIND_EXPERIMENTAL_CONTAINERD_SNAPSHOTTER=fuse-overlayfs\` ." >&2
fi
return 0
if [[ -z "$userns" ]]; then
# If we are outside userns, we can always assume overlayfs is preferrable
return 0
fi

# Debian 10 and 11 supports overlayfs in userns with a "permit_mount_in_userns" kernel patch,
# but known to be unstable, so we avoid using it https://github.com/moby/moby/issues/42302
if [[ -e "/sys/module/overlay/parameters/permit_mounts_in_userns" ]]; then
echo "INFO: UserNS: kernel seems supporting overlayfs with permit_mounts_in_userns, but avoiding due to instability."
return 1
fi

# Check overlayfs availability, by attempting to mount it.
#
# Overlayfs inside userns is known to be available for the following environments:
# - Kernel >= 5.11 (but 5.11 and 5.12 have issues on SELinux hosts. Fixed in 5.13.)
# - Ubuntu kernel
# - Debian kernel (but avoided due to instability, see the /sys/module/overlay/... check above)
# - Sysbox
tmp=$(mktemp -d)
mkdir -p "${tmp}/l" "${tmp}/u" "${tmp}/w" "${tmp}/m"
if ! mount -t overlay -o lowerdir="${tmp}/l,upperdir=${tmp}/u,workdir=${tmp}/w" overlay "${tmp}/m"; then
echo "INFO: UserNS: kernel does not seem to support overlayfs."
rm -rf "${tmp}"
return 1
fi
umount "${tmp}/m"
rm -rf "${tmp}"

# Detect whether SELinux is Enforcing (or Permitted) by grepping /proc/self/attr/current .
# Note that we cannot use `getenforce` command here because /sys/fs/selinux is typically not mounted for containers.
if grep -q "_t:" "/proc/self/attr/current"; then
# When the kernel is before v5.13 and SELinux is enforced, fuse-overlayfs might be safer, so we print a warning (but not an error).
# https://github.com/torvalds/linux/commit/7fa2e79a6bb924fa4b2de5766dab31f0f47b5ab6
echo "WARN: UserNS: SELinux might be Enforcing. If you see an error related to overlayfs, try setting \`KIND_EXPERIMENTAL_CONTAINERD_SNAPSHOTTER=fuse-overlayfs\` ." >&2
fi
return 0
}

configure_containerd() {
Expand Down Expand Up @@ -208,6 +208,8 @@ fix_cgroup() {
return
fi
echo 'INFO: detected cgroup v1'
# We're looking for the cgroup-path for the cpu controller for the
# current process. this tells us what cgroup-path the container is in.
local current_cgroup
current_cgroup=$(grep -E '^[^:]*:([^:]*,)?cpu(,[^,:]*)?:.*' /proc/self/cgroup | cut -d: -f3)
if [ "$current_cgroup" = "/" ]; then
Expand All @@ -225,16 +227,14 @@ fix_cgroup() {
# See: https://d2iq.com/blog/running-kind-inside-a-kubernetes-cluster-for-continuous-integration
# Capture initial state before modifying
#
# Basically we're looking for the cgroup-path for the cpu controller for the
# current process. this tells us what cgroup-path the container is in.
# Then we collect the subsystems that are active on this path.
# Then we collect the subsystems that are active on our current process.
# We assume the cpu controller is in use on all node containers,
# and other controllers use the same sub-path.
#
# See: https://man7.org/linux/man-pages/man7/cgroups.7.html
echo 'INFO: fix cgroup mounts for all subsystems'
local cgroup_subsystems
cgroup_subsystems=$(findmnt -lun -o source,target -t cgroup | grep "${current_cgroup}" | awk '{print $2}')
cgroup_subsystems=$(findmnt -lun -o source,target -t cgroup | grep -F "${current_cgroup}" | awk '{print $2}')
# Unmount the cgroup subsystems that are not known to runtime used to
# run the container we are in. Those subsystems are not properly scoped
# (i.e. the root cgroup is exposed, rather than something like docker/xxxx).
Expand All @@ -245,7 +245,7 @@ fix_cgroup() {
#
# See https://github.com/kubernetes/kubernetes/issues/109182
local unsupported_cgroups
unsupported_cgroups=$(findmnt -lun -o source,target -t cgroup | grep_allow_nomatch -v "${current_cgroup}" | awk '{print $2}')
unsupported_cgroups=$(findmnt -lun -o source,target -t cgroup | grep_allow_nomatch -v -F "${current_cgroup}" | awk '{print $2}')
if [ -n "$unsupported_cgroups" ]; then
local mnt
echo "$unsupported_cgroups" |
Expand Down Expand Up @@ -298,9 +298,15 @@ fix_cgroup() {
mount --make-rprivate /sys/fs/cgroup
echo "${cgroup_subsystems}" |
while IFS= read -r subsystem; do
mount_kubelet_cgroup_root "/kubelet" "${subsystem}"
mount_kubelet_cgroup_root "/kubelet.slice" "${subsystem}"
mount_kubelet_cgroup_root /kubelet "${subsystem}"
mount_kubelet_cgroup_root /kubelet.slice "${subsystem}"
done
# workaround for hosts not running systemd
# we only do this for kubelet.slice because it's not relevant when not using
# the systemd cgroup driver
if [[ ! "${cgroup_subsystems}" = */sys/fs/cgroup/systemd* ]]; then
mount_kubelet_cgroup_root /kubelet.slice /sys/fs/cgroup/systemd
fi
}

fix_machine_id() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/config/defaults/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ limitations under the License.
package defaults

// Image is the default for the Config.Image field, aka the default node image.
const Image = "kindest/node:v1.24.0@sha256:406fd86d48eaf4c04c7280cd1d2ca1d61e7d0d61ddef0125cb097bc7b82ed6a1"
const Image = "kindest/node:v1.24.0@sha256:0866296e693efe1fed79d5e6c7af8df71fc73ae45e3679af05342239cdc5bc8e"
2 changes: 1 addition & 1 deletion pkg/build/nodeimage/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ package nodeimage
const DefaultImage = "kindest/node:latest"

// DefaultBaseImage is the default base image used
const DefaultBaseImage = "docker.io/kindest/base:v20220510-78c84f01"
const DefaultBaseImage = "docker.io/kindest/base:v20220518-0ffcf8d6"

0 comments on commit 1e2f525

Please sign in to comment.