Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

eks-prow-build-cluster: Use dedicated Managed Node Groups (MNGs) per Availability Zone (AZ) #6320

Merged
merged 4 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 17 additions & 83 deletions infra/aws/terraform/prow-build-cluster/bootstrap/node_bootstrap.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env bash

# Copyright 2023 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -12,48 +14,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.

## intended to be used as a node pre-bootstrap script
## based on: https://github.com/awslabs/amazon-eks-ami/pull/1171

# We're intentionally disabling SC2148 because we don't need shebang here.
# This script is integrated as part of another script that already includes it.
# shellcheck disable=SC2148

set -o errexit
set -o nounset
set -o pipefail

## sysctl settings (required by Prow to avoid inotify issues)
sysctl -w fs.inotify.max_user_watches=1048576
sysctl -w fs.inotify.max_user_instances=8192

## Increase vm.min_free_kbytes from 67584 to 540672 as recommend by the AWS support
## to try to mitigate https://github.com/kubernetes/k8s.io/issues/5473
## The general guidance for the vm.min_free_kbytes parameter is to not have it exceed 5%
## of the total system memory which in the case of an r5d.4xlarge would be about 6400MB.
## For the sake of testing, let's increase this value from 67584 to 540672 (a 8x increase)
## to bring this up to about 540MB.
echo 540672 > /proc/sys/vm/min_free_kbytes
ROOT_DIR="/.bottlerocket/rootfs"
MNT_DIR="${ROOT_DIR}/mnt/k8s-disks"

## Set up ephemeral disks (SSDs) to be used by containerd and kubelet
mkdir -p "${MNT_DIR}"

MNT_DIR="/mnt/k8s-disks"
## Set up ephemeral disk (SSD) to be used by containerd and kubelet

# Pick the first NVMe disk. In this case, we care about only one disk,
# additional disks are not much of use for us.
# We don't want to deal with RAID because we don't gain much from it.
disk=$(find -L /dev/disk/by-id/ -xtype l -name '*NVMe_Instance_Storage_*' | head -n 1)
disk=$(find -L "${ROOT_DIR}/dev/disk/by-id/" -xtype l -name '*NVMe_Instance_Storage_*' | head -n 1)

if [[ -z "${disk}" ]]; then
echo "no ephemeral disks found, skipping disk setup"
exit 0
fi

# Get devices of NVMe instance storage ephemeral disks
# Get device of NVMe instance storage ephemeral disks
dev=$(realpath "${disk}")

# Mounts and creates xfs file systems on chosen EC2 instance store NVMe disk
# without existing file system. Mounts in /mnt/k8s-disks
# Mount and create xfs file systems on chosen EC2 instance store NVMe disk
# without existing file system
if [[ -z "$(lsblk "${dev}" -o fstype --noheadings)" ]]; then
mkfs.xfs -l su=8b "${dev}"
fi
Expand All @@ -63,64 +49,12 @@ if [[ -n "$(lsblk "${dev}" -o MOUNTPOINT --noheadings)" ]]; then
exit 0
fi

# Get mount point for the disk.
mount_point="${MNT_DIR}"
mount_unit_name="$(systemd-escape --path --suffix=mount "${mount_point}")"

mkdir -p "${mount_point}"

# Create systemd service to mount the disk.
cat > "/etc/systemd/system/${mount_unit_name}" << EOF
[Unit]
Description=Mount EC2 Instance Store NVMe disk
[Mount]
What=${dev}
Where=${mount_point}
Type=xfs
Options=defaults,noatime
[Install]
WantedBy=multi-user.target
EOF

systemd-analyze verify "${mount_unit_name}"
systemctl enable "${mount_unit_name}" --now
# Mount the disk in /mnt/k8s-disks
mount -t xfs -o defaults,noatime "${dev}" "${MNT_DIR}"

## Create mount points on SSD for containerd and kubelet
needs_linked=""

# Stop containerd and kubelet if they are running.
for unit in "containerd" "kubelet"; do
if [[ "$(systemctl is-active var-lib-${unit}.mount)" != "active" ]]; then
needs_linked+=" ${unit}"
fi
done

systemctl stop containerd.service snap.kubelet-eks.daemon.service

# Transfer state directories to the disk, if they exist.
for unit in ${needs_linked}; do
var_lib_mount_point="/var/lib/${unit}"
unit_mount_point="${mount_point}/${unit}"

echo "Copying ${var_lib_mount_point}/ to ${unit_mount_point}/"
cp -a "${var_lib_mount_point}/" "${unit_mount_point}/"

mount_unit_name="$(systemd-escape --path --suffix=mount "${var_lib_mount_point}")"

cat > "/etc/systemd/system/${mount_unit_name}" << EOF
[Unit]
Description=Mount ${unit} on EC2 Instance Store NVMe disk
[Mount]
What=${unit_mount_point}
Where=${var_lib_mount_point}
Type=none
Options=bind
[Install]
WantedBy=multi-user.target
EOF
systemd-analyze verify "${mount_unit_name}"
systemctl enable "${mount_unit_name}" --now
# Mount containerd and kubelet directories to /mnt/k8s-disks
for unit in containerd kubelet ; do
mkdir -p "${MNT_DIR}/${unit}"
mount --rbind "${MNT_DIR}/${unit}" "${ROOT_DIR}/var/lib/${unit}"
mount --make-rshared "${ROOT_DIR}/var/lib/${unit}"
done

# Start again stopped services.
systemctl start containerd.service snap.kubelet-eks.daemon.service

This file was deleted.

This file was deleted.

7 changes: 4 additions & 3 deletions infra/aws/terraform/prow-build-cluster/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ module "eks" {
}

eks_managed_node_groups = {
build-blue = local.node_group_build_blue
build-green = local.node_group_build_green
stable = local.node_group_stable
stable = local.node_group_stable
build-us-east-2a = local.node_group_build_us_east_2a
build-us-east-2b = local.node_group_build_us_east_2b
build-us-east-2c = local.node_group_build_us_east_2c
}
}
2 changes: 0 additions & 2 deletions infra/aws/terraform/prow-build-cluster/node_group_blue.tf
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ locals {
max_unavailable_percentage = var.node_max_unavailable_percentage
}

pre_bootstrap_user_data = file("${path.module}/bootstrap/node_bootstrap.sh")

capacity_type = "ON_DEMAND"
instance_types = var.node_instance_types_blue

Expand Down
2 changes: 0 additions & 2 deletions infra/aws/terraform/prow-build-cluster/node_group_green.tf
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ locals {
max_unavailable_percentage = var.node_max_unavailable_percentage
}

pre_bootstrap_user_data = file("${path.module}/bootstrap/node_bootstrap_green.sh")

capacity_type = "ON_DEMAND"
instance_types = var.node_instance_types_green

Expand Down
2 changes: 0 additions & 2 deletions infra/aws/terraform/prow-build-cluster/node_group_stable.tf
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ locals {
max_unavailable_percentage = var.node_max_unavailable_percentage
}

pre_bootstrap_user_data = file("${path.module}/bootstrap/node_bootstrap.sh")

capacity_type = "ON_DEMAND"
instance_types = var.node_instance_types_stable

Expand Down
Loading