From 4173bb93c00cbaa8fc89294e9c4188766bbfe3b4 Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Fri, 13 May 2022 11:57:12 -0700 Subject: [PATCH 1/6] improve comments and normalize indentation --- images/base/files/usr/local/bin/entrypoint | 80 +++++++++++----------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/images/base/files/usr/local/bin/entrypoint b/images/base/files/usr/local/bin/entrypoint index b665cd2f7e..ff40cf9c96 100755 --- a/images/base/files/usr/local/bin/entrypoint +++ b/images/base/files/usr/local/bin/entrypoint @@ -55,43 +55,43 @@ validate_userns() { } overlayfs_preferrable() { - if [[ -z "$userns" ]]; then - # If we are outside userns, we can always assume overlayfs is preferrable - return 0 - fi - - # Debian 10 and 11 supports overlayfs in userns with a "permit_mount_in_userns" kernel patch, - # but known to be unstable, so we avoid using it https://github.com/moby/moby/issues/42302 - if [[ -e "/sys/module/overlay/parameters/permit_mounts_in_userns" ]]; then - echo "INFO: UserNS: kernel seems supporting overlayfs with permit_mounts_in_userns, but avoiding due to instability." - return 1 - fi - - # Check overlayfs availability, by attempting to mount it. - # - # Overlayfs inside userns is known to be available for the following environments: - # - Kernel >= 5.11 (but 5.11 and 5.12 have issues on SELinux hosts. Fixed in 5.13.) - # - Ubuntu kernel - # - Debian kernel (but avoided due to instability, see the /sys/module/overlay/... check above) - # - Sysbox - tmp=$(mktemp -d) - mkdir -p "${tmp}/l" "${tmp}/u" "${tmp}/w" "${tmp}/m" - if ! mount -t overlay -o lowerdir="${tmp}/l,upperdir=${tmp}/u,workdir=${tmp}/w" overlay "${tmp}/m"; then - echo "INFO: UserNS: kernel does not seem to support overlayfs." - rm -rf "${tmp}" - return 1 - fi - umount "${tmp}/m" - rm -rf "${tmp}" - - # Detect whether SELinux is Enforcing (or Permitted) by grepping /proc/self/attr/current . - # Note that we cannot use `getenforce` command here because /sys/fs/selinux is typically not mounted for containers. - if grep -q "_t:" "/proc/self/attr/current"; then - # When the kernel is before v5.13 and SELinux is enforced, fuse-overlayfs might be safer, so we print a warning (but not an error). - # https://github.com/torvalds/linux/commit/7fa2e79a6bb924fa4b2de5766dab31f0f47b5ab6 - echo "WARN: UserNS: SELinux might be Enforcing. If you see an error related to overlayfs, try setting \`KIND_EXPERIMENTAL_CONTAINERD_SNAPSHOTTER=fuse-overlayfs\` ." >&2 - fi - return 0 + if [[ -z "$userns" ]]; then + # If we are outside userns, we can always assume overlayfs is preferrable + return 0 + fi + + # Debian 10 and 11 supports overlayfs in userns with a "permit_mount_in_userns" kernel patch, + # but known to be unstable, so we avoid using it https://github.com/moby/moby/issues/42302 + if [[ -e "/sys/module/overlay/parameters/permit_mounts_in_userns" ]]; then + echo "INFO: UserNS: kernel seems supporting overlayfs with permit_mounts_in_userns, but avoiding due to instability." + return 1 + fi + + # Check overlayfs availability, by attempting to mount it. + # + # Overlayfs inside userns is known to be available for the following environments: + # - Kernel >= 5.11 (but 5.11 and 5.12 have issues on SELinux hosts. Fixed in 5.13.) + # - Ubuntu kernel + # - Debian kernel (but avoided due to instability, see the /sys/module/overlay/... check above) + # - Sysbox + tmp=$(mktemp -d) + mkdir -p "${tmp}/l" "${tmp}/u" "${tmp}/w" "${tmp}/m" + if ! mount -t overlay -o lowerdir="${tmp}/l,upperdir=${tmp}/u,workdir=${tmp}/w" overlay "${tmp}/m"; then + echo "INFO: UserNS: kernel does not seem to support overlayfs." + rm -rf "${tmp}" + return 1 + fi + umount "${tmp}/m" + rm -rf "${tmp}" + + # Detect whether SELinux is Enforcing (or Permitted) by grepping /proc/self/attr/current . + # Note that we cannot use `getenforce` command here because /sys/fs/selinux is typically not mounted for containers. + if grep -q "_t:" "/proc/self/attr/current"; then + # When the kernel is before v5.13 and SELinux is enforced, fuse-overlayfs might be safer, so we print a warning (but not an error). + # https://github.com/torvalds/linux/commit/7fa2e79a6bb924fa4b2de5766dab31f0f47b5ab6 + echo "WARN: UserNS: SELinux might be Enforcing. If you see an error related to overlayfs, try setting \`KIND_EXPERIMENTAL_CONTAINERD_SNAPSHOTTER=fuse-overlayfs\` ." >&2 + fi + return 0 } configure_containerd() { @@ -208,6 +208,8 @@ fix_cgroup() { return fi echo 'INFO: detected cgroup v1' + # We're looking for the cgroup-path for the cpu controller for the + # current process. this tells us what cgroup-path the container is in. local current_cgroup current_cgroup=$(grep -E '^[^:]*:([^:]*,)?cpu(,[^,:]*)?:.*' /proc/self/cgroup | cut -d: -f3) if [ "$current_cgroup" = "/" ]; then @@ -225,9 +227,7 @@ fix_cgroup() { # See: https://d2iq.com/blog/running-kind-inside-a-kubernetes-cluster-for-continuous-integration # Capture initial state before modifying # - # Basically we're looking for the cgroup-path for the cpu controller for the - # current process. this tells us what cgroup-path the container is in. - # Then we collect the subsystems that are active on this path. + # Then we collect the subsystems that are active on our current process. # We assume the cpu controller is in use on all node containers, # and other controllers use the same sub-path. # From 225da1a1cc485fc383825c0696f5a26f9362ff6d Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Fri, 13 May 2022 12:06:08 -0700 Subject: [PATCH 2/6] ensure systemd """subsystem""" --- images/base/files/usr/local/bin/entrypoint | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/images/base/files/usr/local/bin/entrypoint b/images/base/files/usr/local/bin/entrypoint index ff40cf9c96..217be394aa 100755 --- a/images/base/files/usr/local/bin/entrypoint +++ b/images/base/files/usr/local/bin/entrypoint @@ -298,9 +298,15 @@ fix_cgroup() { mount --make-rprivate /sys/fs/cgroup echo "${cgroup_subsystems}" | while IFS= read -r subsystem; do - mount_kubelet_cgroup_root "/kubelet" "${subsystem}" - mount_kubelet_cgroup_root "/kubelet.slice" "${subsystem}" + mount_kubelet_cgroup_root /kubelet "${subsystem}" + mount_kubelet_cgroup_root /kubelet.slice "${subsystem}" done + # workaround for hosts not running systemd + # we only do this for kubelet.slice because it's not relevant when not using + # the systemd cgroup driver + if [[ ! "${cgroup_subsystems}" = */sys/fs/cgroup/systemd* ]]; then + mount_kubelet_cgroup_root /kubelet.slice /sys/fs/cgroup/systemd + fi } fix_machine_id() { From cb8af455b3afb7cd26dd2b9b0299274dab9020fb Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Fri, 13 May 2022 12:07:48 -0700 Subject: [PATCH 3/6] search for fixed strings when searching for fixed strings these are not regexes --- images/base/files/usr/local/bin/entrypoint | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/images/base/files/usr/local/bin/entrypoint b/images/base/files/usr/local/bin/entrypoint index 217be394aa..42656190b5 100755 --- a/images/base/files/usr/local/bin/entrypoint +++ b/images/base/files/usr/local/bin/entrypoint @@ -234,7 +234,7 @@ fix_cgroup() { # See: https://man7.org/linux/man-pages/man7/cgroups.7.html echo 'INFO: fix cgroup mounts for all subsystems' local cgroup_subsystems - cgroup_subsystems=$(findmnt -lun -o source,target -t cgroup | grep "${current_cgroup}" | awk '{print $2}') + cgroup_subsystems=$(findmnt -lun -o source,target -t cgroup | grep -F "${current_cgroup}" | awk '{print $2}') # Unmount the cgroup subsystems that are not known to runtime used to # run the container we are in. Those subsystems are not properly scoped # (i.e. the root cgroup is exposed, rather than something like docker/xxxx). @@ -245,7 +245,7 @@ fix_cgroup() { # # See https://github.com/kubernetes/kubernetes/issues/109182 local unsupported_cgroups - unsupported_cgroups=$(findmnt -lun -o source,target -t cgroup | grep_allow_nomatch -v "${current_cgroup}" | awk '{print $2}') + unsupported_cgroups=$(findmnt -lun -o source,target -t cgroup | grep_allow_nomatch -v -F "${current_cgroup}" | awk '{print $2}') if [ -n "$unsupported_cgroups" ]; then local mnt echo "$unsupported_cgroups" | From 0ffcf8d6959f98ed038a925830b25f1f77480838 Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Wed, 18 May 2022 21:24:34 -0700 Subject: [PATCH 4/6] add a real systemd kubelet slice --- images/base/files/etc/systemd/system/kubelet.service | 3 ++- images/base/files/etc/systemd/system/kubelet.slice | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 images/base/files/etc/systemd/system/kubelet.slice diff --git a/images/base/files/etc/systemd/system/kubelet.service b/images/base/files/etc/systemd/system/kubelet.service index 4968e8f1c4..e6f7274466 100644 --- a/images/base/files/etc/systemd/system/kubelet.service +++ b/images/base/files/etc/systemd/system/kubelet.service @@ -15,9 +15,10 @@ Restart=always StartLimitInterval=0 # NOTE: kind deviates from upstream here with a lower RestartSec RestartSec=1s -# and here +# And by adding the [Service] lines below CPUAccounting=true MemoryAccounting=true +Slice=kubelet.slice [Install] WantedBy=multi-user.target diff --git a/images/base/files/etc/systemd/system/kubelet.slice b/images/base/files/etc/systemd/system/kubelet.slice new file mode 100644 index 0000000000..4c63f92f38 --- /dev/null +++ b/images/base/files/etc/systemd/system/kubelet.slice @@ -0,0 +1,7 @@ +[Unit] +Description=slice used to run Kubernetes / Kubelet +Before=slices.target + +[Slice] +MemoryAccounting=true +CPUAccounting=true From 66fa9622cb35bbc5ee9f56e478d77f9c556fd6db Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Wed, 18 May 2022 21:29:12 -0700 Subject: [PATCH 5/6] bump base image --- pkg/build/nodeimage/defaults.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/build/nodeimage/defaults.go b/pkg/build/nodeimage/defaults.go index f2de5b159c..01342b1071 100644 --- a/pkg/build/nodeimage/defaults.go +++ b/pkg/build/nodeimage/defaults.go @@ -20,4 +20,4 @@ package nodeimage const DefaultImage = "kindest/node:latest" // DefaultBaseImage is the default base image used -const DefaultBaseImage = "docker.io/kindest/base:v20220510-78c84f01" +const DefaultBaseImage = "docker.io/kindest/base:v20220518-0ffcf8d6" From 4175f8236e841b2da0d2b746dd47d9ea4fb962c9 Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Thu, 19 May 2022 08:55:26 -0700 Subject: [PATCH 6/6] bump node image --- pkg/apis/config/defaults/image.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/apis/config/defaults/image.go b/pkg/apis/config/defaults/image.go index 60dd476656..6255777646 100644 --- a/pkg/apis/config/defaults/image.go +++ b/pkg/apis/config/defaults/image.go @@ -18,4 +18,4 @@ limitations under the License. package defaults // Image is the default for the Config.Image field, aka the default node image. -const Image = "kindest/node:v1.24.0@sha256:406fd86d48eaf4c04c7280cd1d2ca1d61e7d0d61ddef0125cb097bc7b82ed6a1" +const Image = "kindest/node:v1.24.0@sha256:0866296e693efe1fed79d5e6c7af8df71fc73ae45e3679af05342239cdc5bc8e"