From a2bc4fe7801c8c076b99eb603b91a59ccc1153fe Mon Sep 17 00:00:00 2001 From: Ed Robinson Date: Mon, 30 Oct 2023 15:21:28 +0000 Subject: [PATCH] Backport cluster critial fargate profiles + karpenter module (#352) * Backport fargate profile for cluster-critial addons This feature is backported to 1.24 to ease the upgrade process to 1.25 By upgrading to the latest 1.24 module version we can add the fargate profile before the cluster critical autoscaling group is removed. * Backport karpenter module to 1.24 Allows karpenter to be installed or updated before upgrading a cluster to 1.25 --- modules/cluster/fargate.tf | 44 ++++ modules/cluster/outputs.tf | 19 +- modules/cluster/variables.tf | 6 + modules/karpenter/README.md | 51 +++++ modules/karpenter/controller_iam.tf | 263 ++++++++++++++++++++++++ modules/karpenter/data.tf | 3 + modules/karpenter/fargate.tf | 11 + modules/karpenter/interruption_queue.tf | 77 +++++++ modules/karpenter/node_iam.tf | 35 ++++ modules/karpenter/outputs.tf | 3 + modules/karpenter/variables.tf | 18 ++ modules/karpenter/versions.tf | 10 + 12 files changed, 532 insertions(+), 8 deletions(-) create mode 100644 modules/cluster/fargate.tf create mode 100644 modules/karpenter/README.md create mode 100644 modules/karpenter/controller_iam.tf create mode 100644 modules/karpenter/data.tf create mode 100644 modules/karpenter/fargate.tf create mode 100644 modules/karpenter/interruption_queue.tf create mode 100644 modules/karpenter/node_iam.tf create mode 100644 modules/karpenter/outputs.tf create mode 100644 modules/karpenter/variables.tf create mode 100644 modules/karpenter/versions.tf diff --git a/modules/cluster/fargate.tf b/modules/cluster/fargate.tf new file mode 100644 index 00000000..3c51d8fd --- /dev/null +++ b/modules/cluster/fargate.tf @@ -0,0 +1,44 @@ +resource "aws_eks_fargate_profile" "critical_pods" { + cluster_name = aws_eks_cluster.control_plane.name + fargate_profile_name = "${var.name}-critical-pods" + pod_execution_role_arn = aws_iam_role.fargate.arn + subnet_ids = values(var.vpc_config.private_subnet_ids) + + dynamic "selector" { + for_each = var.fargate_namespaces + + content { + namespace = selector.value + labels = {} + } + } +} + +resource "aws_iam_role" "fargate" { + name = "${var.iam_role_name_prefix}Fargate-${var.name}" + assume_role_policy = data.aws_iam_policy_document.fargate_assume_role_policy.json + description = "Fargate execution role for pods on ${var.name} eks cluster" +} + +data "aws_iam_policy_document" "fargate_assume_role_policy" { + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["eks-fargate-pods.amazonaws.com"] + } + } +} + +resource "aws_iam_role_policy_attachment" "fargate_managed_policies" { + for_each = toset([ + "arn:aws:iam::aws:policy/AmazonEKSFargatePodExecutionRolePolicy", + "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", + ]) + + role = aws_iam_role.fargate.id + policy_arn = each.value +} + diff --git a/modules/cluster/outputs.tf b/modules/cluster/outputs.tf index c1e0ef1f..9ebfdcc4 100644 --- a/modules/cluster/outputs.tf +++ b/modules/cluster/outputs.tf @@ -1,13 +1,16 @@ locals { config = { - name = aws_eks_cluster.control_plane.name - endpoint = aws_eks_cluster.control_plane.endpoint - ca_data = aws_eks_cluster.control_plane.certificate_authority[0].data - vpc_id = var.vpc_config.vpc_id - private_subnet_ids = var.vpc_config.private_subnet_ids - node_security_group = aws_eks_cluster.control_plane.vpc_config.0.cluster_security_group_id - node_instance_profile = var.iam_config.node_role - tags = var.tags + name = aws_eks_cluster.control_plane.name + endpoint = aws_eks_cluster.control_plane.endpoint + arn = aws_eks_cluster.control_plane.arn + ca_data = aws_eks_cluster.control_plane.certificate_authority[0].data + vpc_id = var.vpc_config.vpc_id + private_subnet_ids = var.vpc_config.private_subnet_ids + node_security_group = aws_eks_cluster.control_plane.vpc_config.0.cluster_security_group_id + node_instance_profile = var.iam_config.node_role + tags = var.tags + iam_role_name_prefix = var.iam_role_name_prefix + fargate_execution_role_arn = aws_iam_role.fargate.arn } } diff --git a/modules/cluster/variables.tf b/modules/cluster/variables.tf index 0d778dbf..c980c50e 100644 --- a/modules/cluster/variables.tf +++ b/modules/cluster/variables.tf @@ -223,3 +223,9 @@ variable "security_group_ids" { default = [] description = "A list of security group IDs for the cross-account elastic network interfaces that Amazon EKS creates to use to allow communication with the Kubernetes control plane. *WARNING* changes to this list will cause the cluster to be recreated." } + +variable "fargate_namespaces" { + type = set(string) + default = ["kube-system", "flux-system"] + description = "A list of namespaces to create fargate profiles for, should be set to a list of namespaces critical for flux / cluster bootstrapping" +} diff --git a/modules/karpenter/README.md b/modules/karpenter/README.md new file mode 100644 index 00000000..9035dfee --- /dev/null +++ b/modules/karpenter/README.md @@ -0,0 +1,51 @@ +# Karpenter + +This module configures the resources required to run the +karpenter node-provisioning tool in an eks cluster. + +* Fargate Profile - to run karpenter +* IAM roles for the fargate controller and nodes to be provisioned by karpenter +* SQS queue to provide events (spot interruption etc) to karpenter + +It does not install karpenter itself to the cluster - and we recomend +that you use helm as per the [karpenter documentation](https://karpenter.sh/docs/getting-started/getting-started-with-karpenter/#4-install-karpenter) + +It is provided as a submodule so the core module is less opinionated. + +However we test the core module and the karpenter module +in our test suite to ensure that the different components we use in our +clusters at cookpad intergrate correctly. + + +## Example + +You should pass cluster and oidc config from the cluster to the karpenter module. + +You will also need to add the IAM role of nodes created by karpenter to the aws_auth_role_map +so they can connect to the cluster. + +```hcl +module "cluster" { + source = "cookpad/eks/aws" + name = "hal-9000" + vpc_config = module.vpc.config + + aws_auth_role_map = [ + { + username = "system:node:{{EC2PrivateDNSName}}" + rolearn = module.karpenter.node_role_arn + groups = [ + "system:bootstrappers", + "system:nodes", + ] + }, + ] +} + +module "karpenter" { + source = "cookpad/eks/aws//modules/karpenter" + + cluster_config = module.cluster.config + oidc_config = module.cluster.oidc_config +} +``` diff --git a/modules/karpenter/controller_iam.tf b/modules/karpenter/controller_iam.tf new file mode 100644 index 00000000..97617140 --- /dev/null +++ b/modules/karpenter/controller_iam.tf @@ -0,0 +1,263 @@ +resource "aws_iam_role" "karpenter_controller" { + name = "${var.cluster_config.iam_role_name_prefix}Karpenter-${var.cluster_config.name}" + assume_role_policy = data.aws_iam_policy_document.karpenter_controller_assume_role_policy.json + description = "Karpenter controller role for ${var.cluster_config.name} cluster" +} + +data "aws_iam_policy_document" "karpenter_controller_assume_role_policy" { + statement { + actions = ["sts:AssumeRoleWithWebIdentity"] + effect = "Allow" + + condition { + test = "StringEquals" + variable = "${replace(var.oidc_config.url, "https://", "")}:sub" + values = ["system:serviceaccount:karpenter:karpenter"] + } + + condition { + test = "StringEquals" + variable = "${replace(var.oidc_config.url, "https://", "")}:aud" + values = ["sts.amazonaws.com"] + } + + principals { + identifiers = [var.oidc_config.arn] + type = "Federated" + } + } +} + +resource "aws_iam_role_policy" "karpenter_controller" { + name = "KarpenterController" + role = aws_iam_role.karpenter_controller.id + policy = data.aws_iam_policy_document.karpenter_controller.json +} + +data "aws_iam_policy_document" "karpenter_controller" { + statement { + sid = "AllowScopedEC2InstanceActions" + effect = "Allow" + + # tfsec:ignore:aws-iam-no-policy-wildcards + resources = [ + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}::image/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}::snapshot/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:spot-instances-request/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:security-group/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:subnet/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*", + ] + + actions = [ + "ec2:RunInstances", + "ec2:CreateFleet", + ] + } + + statement { + sid = "AllowScopedEC2InstanceActionsWithTags" + effect = "Allow" + + # tfsec:ignore:aws-iam-no-policy-wildcards + resources = [ + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:fleet/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:volume/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:network-interface/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*", + ] + + actions = [ + "ec2:RunInstances", + "ec2:CreateFleet", + "ec2:CreateLaunchTemplate", + ] + + condition { + test = "StringEquals" + variable = "aws:RequestTag/kubernetes.io/cluster/${var.cluster_config.name}" + values = ["owned"] + } + + condition { + test = "StringLike" + variable = "aws:RequestTag/karpenter.sh/provisioner-name" + values = ["*"] + } + } + + statement { + sid = "AllowScopedResourceCreationTagging" + effect = "Allow" + + # tfsec:ignore:aws-iam-no-policy-wildcards + resources = [ + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:fleet/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:volume/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:network-interface/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*", + ] + + actions = ["ec2:CreateTags"] + + condition { + test = "StringEquals" + variable = "aws:RequestTag/kubernetes.io/cluster/${var.cluster_config.name}" + values = ["owned"] + } + + condition { + test = "StringEquals" + variable = "ec2:CreateAction" + + values = [ + "RunInstances", + "CreateFleet", + "CreateLaunchTemplate", + ] + } + + condition { + test = "StringLike" + variable = "aws:RequestTag/karpenter.sh/provisioner-name" + values = ["*"] + } + } + + statement { + sid = "AllowMachineMigrationTagging" + effect = "Allow" + # tfsec:ignore:aws-iam-no-policy-wildcards + resources = ["arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*"] + actions = ["ec2:CreateTags"] + + condition { + test = "StringEquals" + variable = "aws:ResourceTag/kubernetes.io/cluster/${var.cluster_config.name}" + values = ["owned"] + } + + condition { + test = "StringEquals" + variable = "aws:RequestTag/karpenter.sh/managed-by" + values = [var.cluster_config.name] + } + + condition { + test = "StringLike" + variable = "aws:RequestTag/karpenter.sh/provisioner-name" + values = ["*"] + } + + condition { + test = "ForAllValues:StringEquals" + variable = "aws:TagKeys" + + values = [ + "karpenter.sh/provisioner-name", + "karpenter.sh/managed-by", + ] + } + } + + statement { + sid = "AllowScopedDeletion" + effect = "Allow" + + # tfsec:ignore:aws-iam-no-policy-wildcards + resources = [ + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*", + ] + + actions = [ + "ec2:TerminateInstances", + "ec2:DeleteLaunchTemplate", + ] + + condition { + test = "StringEquals" + variable = "aws:ResourceTag/kubernetes.io/cluster/${var.cluster_config.name}" + values = ["owned"] + } + + condition { + test = "StringLike" + variable = "aws:ResourceTag/karpenter.sh/provisioner-name" + values = ["*"] + } + } + + statement { + sid = "AllowRegionalReadActions" + effect = "Allow" + resources = ["*"] + + actions = [ + "ec2:DescribeAvailabilityZones", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceTypeOfferings", + "ec2:DescribeInstanceTypes", + "ec2:DescribeLaunchTemplates", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSpotPriceHistory", + "ec2:DescribeSubnets", + ] + + condition { + test = "StringEquals" + variable = "aws:RequestedRegion" + values = [data.aws_region.current.name] + } + } + + statement { + sid = "AllowSSMReadActions" + effect = "Allow" + resources = ["arn:${data.aws_partition.current.partition}:ssm:${data.aws_region.current.name}::parameter/aws/service/*"] + actions = ["ssm:GetParameter"] + } + + statement { + sid = "AllowPricingReadActions" + effect = "Allow" + resources = ["*"] + actions = ["pricing:GetProducts"] + } + + statement { + sid = "AllowInterruptionQueueActions" + effect = "Allow" + resources = [aws_sqs_queue.karpenter_interruption.arn] + + actions = [ + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ReceiveMessage", + ] + } + + statement { + sid = "AllowPassingInstanceRole" + effect = "Allow" + resources = [aws_iam_role.karpenter_node.arn] + actions = ["iam:PassRole"] + + condition { + test = "StringEquals" + variable = "iam:PassedToService" + values = ["ec2.amazonaws.com"] + } + } + + statement { + sid = "AllowAPIServerEndpointDiscovery" + effect = "Allow" + resources = [var.cluster_config.arn] + actions = ["eks:DescribeCluster"] + } +} diff --git a/modules/karpenter/data.tf b/modules/karpenter/data.tf new file mode 100644 index 00000000..a2bf82b2 --- /dev/null +++ b/modules/karpenter/data.tf @@ -0,0 +1,3 @@ +data "aws_caller_identity" "current" {} +data "aws_partition" "current" {} +data "aws_region" "current" {} diff --git a/modules/karpenter/fargate.tf b/modules/karpenter/fargate.tf new file mode 100644 index 00000000..92631209 --- /dev/null +++ b/modules/karpenter/fargate.tf @@ -0,0 +1,11 @@ +resource "aws_eks_fargate_profile" "critical_pods" { + cluster_name = var.cluster_config.name + fargate_profile_name = "${var.cluster_config.name}-karpenter" + pod_execution_role_arn = var.cluster_config.fargate_execution_role_arn + subnet_ids = values(var.cluster_config.private_subnet_ids) + + selector { + namespace = "karpenter" + labels = {} + } +} diff --git a/modules/karpenter/interruption_queue.tf b/modules/karpenter/interruption_queue.tf new file mode 100644 index 00000000..bfd4bad3 --- /dev/null +++ b/modules/karpenter/interruption_queue.tf @@ -0,0 +1,77 @@ +resource "aws_sqs_queue" "karpenter_interruption" { + name = "Karpenter-${var.cluster_config.name}" + message_retention_seconds = 300 + sqs_managed_sse_enabled = true +} + +resource "aws_sqs_queue_policy" "karpenter_interruption" { + queue_url = aws_sqs_queue.karpenter_interruption.url + policy = data.aws_iam_policy_document.karpenter_interruption_queue_policy.json +} + +data "aws_iam_policy_document" "karpenter_interruption_queue_policy" { + statement { + sid = "SqsWrite" + actions = ["sqs:SendMessage"] + resources = [aws_sqs_queue.karpenter_interruption.arn] + principals { + type = "Service" + identifiers = [ + "events.amazonaws.com", + "sqs.amazonaws.com", + ] + } + } +} + +locals { + karpenter_events = { + health_event = { + name = "HealthEvent" + description = "Karpenter interrupt - AWS health event" + event_pattern = { + source = ["aws.health"] + detail-type = ["AWS Health Event"] + } + } + spot_interupt = { + name = "SpotInterrupt" + description = "Karpenter interrupt - EC2 spot instance interruption warning" + event_pattern = { + source = ["aws.ec2"] + detail-type = ["EC2 Spot Instance Interruption Warning"] + } + } + instance_rebalance = { + name = "InstanceRebalance" + description = "Karpenter interrupt - EC2 instance rebalance recommendation" + event_pattern = { + source = ["aws.ec2"] + detail-type = ["EC2 Instance Rebalance Recommendation"] + } + } + instance_state_change = { + name = "InstanceStateChange" + description = "Karpenter interrupt - EC2 instance state-change notification" + event_pattern = { + source = ["aws.ec2"] + detail-type = ["EC2 Instance State-change Notification"] + } + } + } +} + +resource "aws_cloudwatch_event_rule" "karpenter" { + for_each = local.karpenter_events + + name = "Karpenter${each.value.name}-${var.cluster_config.name}" + description = each.value.description + event_pattern = jsonencode(each.value.event_pattern) +} + +resource "aws_cloudwatch_event_target" "karpenter" { + for_each = local.karpenter_events + rule = aws_cloudwatch_event_rule.karpenter[each.key].name + target_id = "KarpenterInterruptionQueueTarget" + arn = aws_sqs_queue.karpenter_interruption.arn +} diff --git a/modules/karpenter/node_iam.tf b/modules/karpenter/node_iam.tf new file mode 100644 index 00000000..a1e1de43 --- /dev/null +++ b/modules/karpenter/node_iam.tf @@ -0,0 +1,35 @@ +resource "aws_iam_role" "karpenter_node" { + name = "${var.cluster_config.iam_role_name_prefix}KarpenterNode-${var.cluster_config.name}" + assume_role_policy = data.aws_iam_policy_document.karpenter_node_assume_role_policy.json + description = "Karpenter node role for ${var.cluster_config.name} cluster" +} + +data "aws_iam_policy_document" "karpenter_node_assume_role_policy" { + statement { + sid = "EKSNodeAssumeRole" + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.${data.aws_partition.current.dns_suffix}"] + } + } +} + + +resource "aws_iam_role_policy_attachment" "karpenter_node_managed_policies" { + for_each = toset([ + "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", + "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", + "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", + "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore", + ]) + + role = aws_iam_role.karpenter_node.id + policy_arn = each.value +} + +resource "aws_iam_instance_profile" "karpenter_node" { + name = aws_iam_role.karpenter_node.name + role = aws_iam_role.karpenter_node.name +} diff --git a/modules/karpenter/outputs.tf b/modules/karpenter/outputs.tf new file mode 100644 index 00000000..4f296d46 --- /dev/null +++ b/modules/karpenter/outputs.tf @@ -0,0 +1,3 @@ +output "node_role_arn" { + value = aws_iam_role.karpenter_node.arn +} diff --git a/modules/karpenter/variables.tf b/modules/karpenter/variables.tf new file mode 100644 index 00000000..4f4a9eb2 --- /dev/null +++ b/modules/karpenter/variables.tf @@ -0,0 +1,18 @@ +variable "cluster_config" { + description = "EKS cluster config object" + type = object({ + name = string + arn = string + private_subnet_ids = map(string) + iam_role_name_prefix = string + fargate_execution_role_arn = string + }) +} + +variable "oidc_config" { + description = "OIDC config object" + type = object({ + url = string + arn = string + }) +} diff --git a/modules/karpenter/versions.tf b/modules/karpenter/versions.tf new file mode 100644 index 00000000..bbbe93e9 --- /dev/null +++ b/modules/karpenter/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.47.0" + } + } +}