diff --git a/modules/cluster/fargate.tf b/modules/cluster/fargate.tf new file mode 100644 index 00000000..3c51d8fd --- /dev/null +++ b/modules/cluster/fargate.tf @@ -0,0 +1,44 @@ +resource "aws_eks_fargate_profile" "critical_pods" { + cluster_name = aws_eks_cluster.control_plane.name + fargate_profile_name = "${var.name}-critical-pods" + pod_execution_role_arn = aws_iam_role.fargate.arn + subnet_ids = values(var.vpc_config.private_subnet_ids) + + dynamic "selector" { + for_each = var.fargate_namespaces + + content { + namespace = selector.value + labels = {} + } + } +} + +resource "aws_iam_role" "fargate" { + name = "${var.iam_role_name_prefix}Fargate-${var.name}" + assume_role_policy = data.aws_iam_policy_document.fargate_assume_role_policy.json + description = "Fargate execution role for pods on ${var.name} eks cluster" +} + +data "aws_iam_policy_document" "fargate_assume_role_policy" { + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["eks-fargate-pods.amazonaws.com"] + } + } +} + +resource "aws_iam_role_policy_attachment" "fargate_managed_policies" { + for_each = toset([ + "arn:aws:iam::aws:policy/AmazonEKSFargatePodExecutionRolePolicy", + "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", + ]) + + role = aws_iam_role.fargate.id + policy_arn = each.value +} + diff --git a/modules/cluster/outputs.tf b/modules/cluster/outputs.tf index c1e0ef1f..9ebfdcc4 100644 --- a/modules/cluster/outputs.tf +++ b/modules/cluster/outputs.tf @@ -1,13 +1,16 @@ locals { config = { - name = aws_eks_cluster.control_plane.name - endpoint = aws_eks_cluster.control_plane.endpoint - ca_data = aws_eks_cluster.control_plane.certificate_authority[0].data - vpc_id = var.vpc_config.vpc_id - private_subnet_ids = var.vpc_config.private_subnet_ids - node_security_group = aws_eks_cluster.control_plane.vpc_config.0.cluster_security_group_id - node_instance_profile = var.iam_config.node_role - tags = var.tags + name = aws_eks_cluster.control_plane.name + endpoint = aws_eks_cluster.control_plane.endpoint + arn = aws_eks_cluster.control_plane.arn + ca_data = aws_eks_cluster.control_plane.certificate_authority[0].data + vpc_id = var.vpc_config.vpc_id + private_subnet_ids = var.vpc_config.private_subnet_ids + node_security_group = aws_eks_cluster.control_plane.vpc_config.0.cluster_security_group_id + node_instance_profile = var.iam_config.node_role + tags = var.tags + iam_role_name_prefix = var.iam_role_name_prefix + fargate_execution_role_arn = aws_iam_role.fargate.arn } } diff --git a/modules/cluster/variables.tf b/modules/cluster/variables.tf index 0d778dbf..c980c50e 100644 --- a/modules/cluster/variables.tf +++ b/modules/cluster/variables.tf @@ -223,3 +223,9 @@ variable "security_group_ids" { default = [] description = "A list of security group IDs for the cross-account elastic network interfaces that Amazon EKS creates to use to allow communication with the Kubernetes control plane. *WARNING* changes to this list will cause the cluster to be recreated." } + +variable "fargate_namespaces" { + type = set(string) + default = ["kube-system", "flux-system"] + description = "A list of namespaces to create fargate profiles for, should be set to a list of namespaces critical for flux / cluster bootstrapping" +} diff --git a/modules/karpenter/README.md b/modules/karpenter/README.md new file mode 100644 index 00000000..9035dfee --- /dev/null +++ b/modules/karpenter/README.md @@ -0,0 +1,51 @@ +# Karpenter + +This module configures the resources required to run the +karpenter node-provisioning tool in an eks cluster. + +* Fargate Profile - to run karpenter +* IAM roles for the fargate controller and nodes to be provisioned by karpenter +* SQS queue to provide events (spot interruption etc) to karpenter + +It does not install karpenter itself to the cluster - and we recomend +that you use helm as per the [karpenter documentation](https://karpenter.sh/docs/getting-started/getting-started-with-karpenter/#4-install-karpenter) + +It is provided as a submodule so the core module is less opinionated. + +However we test the core module and the karpenter module +in our test suite to ensure that the different components we use in our +clusters at cookpad intergrate correctly. + + +## Example + +You should pass cluster and oidc config from the cluster to the karpenter module. + +You will also need to add the IAM role of nodes created by karpenter to the aws_auth_role_map +so they can connect to the cluster. + +```hcl +module "cluster" { + source = "cookpad/eks/aws" + name = "hal-9000" + vpc_config = module.vpc.config + + aws_auth_role_map = [ + { + username = "system:node:{{EC2PrivateDNSName}}" + rolearn = module.karpenter.node_role_arn + groups = [ + "system:bootstrappers", + "system:nodes", + ] + }, + ] +} + +module "karpenter" { + source = "cookpad/eks/aws//modules/karpenter" + + cluster_config = module.cluster.config + oidc_config = module.cluster.oidc_config +} +``` diff --git a/modules/karpenter/controller_iam.tf b/modules/karpenter/controller_iam.tf new file mode 100644 index 00000000..97617140 --- /dev/null +++ b/modules/karpenter/controller_iam.tf @@ -0,0 +1,263 @@ +resource "aws_iam_role" "karpenter_controller" { + name = "${var.cluster_config.iam_role_name_prefix}Karpenter-${var.cluster_config.name}" + assume_role_policy = data.aws_iam_policy_document.karpenter_controller_assume_role_policy.json + description = "Karpenter controller role for ${var.cluster_config.name} cluster" +} + +data "aws_iam_policy_document" "karpenter_controller_assume_role_policy" { + statement { + actions = ["sts:AssumeRoleWithWebIdentity"] + effect = "Allow" + + condition { + test = "StringEquals" + variable = "${replace(var.oidc_config.url, "https://", "")}:sub" + values = ["system:serviceaccount:karpenter:karpenter"] + } + + condition { + test = "StringEquals" + variable = "${replace(var.oidc_config.url, "https://", "")}:aud" + values = ["sts.amazonaws.com"] + } + + principals { + identifiers = [var.oidc_config.arn] + type = "Federated" + } + } +} + +resource "aws_iam_role_policy" "karpenter_controller" { + name = "KarpenterController" + role = aws_iam_role.karpenter_controller.id + policy = data.aws_iam_policy_document.karpenter_controller.json +} + +data "aws_iam_policy_document" "karpenter_controller" { + statement { + sid = "AllowScopedEC2InstanceActions" + effect = "Allow" + + # tfsec:ignore:aws-iam-no-policy-wildcards + resources = [ + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}::image/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}::snapshot/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:spot-instances-request/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:security-group/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:subnet/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*", + ] + + actions = [ + "ec2:RunInstances", + "ec2:CreateFleet", + ] + } + + statement { + sid = "AllowScopedEC2InstanceActionsWithTags" + effect = "Allow" + + # tfsec:ignore:aws-iam-no-policy-wildcards + resources = [ + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:fleet/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:volume/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:network-interface/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*", + ] + + actions = [ + "ec2:RunInstances", + "ec2:CreateFleet", + "ec2:CreateLaunchTemplate", + ] + + condition { + test = "StringEquals" + variable = "aws:RequestTag/kubernetes.io/cluster/${var.cluster_config.name}" + values = ["owned"] + } + + condition { + test = "StringLike" + variable = "aws:RequestTag/karpenter.sh/provisioner-name" + values = ["*"] + } + } + + statement { + sid = "AllowScopedResourceCreationTagging" + effect = "Allow" + + # tfsec:ignore:aws-iam-no-policy-wildcards + resources = [ + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:fleet/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:volume/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:network-interface/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*", + ] + + actions = ["ec2:CreateTags"] + + condition { + test = "StringEquals" + variable = "aws:RequestTag/kubernetes.io/cluster/${var.cluster_config.name}" + values = ["owned"] + } + + condition { + test = "StringEquals" + variable = "ec2:CreateAction" + + values = [ + "RunInstances", + "CreateFleet", + "CreateLaunchTemplate", + ] + } + + condition { + test = "StringLike" + variable = "aws:RequestTag/karpenter.sh/provisioner-name" + values = ["*"] + } + } + + statement { + sid = "AllowMachineMigrationTagging" + effect = "Allow" + # tfsec:ignore:aws-iam-no-policy-wildcards + resources = ["arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*"] + actions = ["ec2:CreateTags"] + + condition { + test = "StringEquals" + variable = "aws:ResourceTag/kubernetes.io/cluster/${var.cluster_config.name}" + values = ["owned"] + } + + condition { + test = "StringEquals" + variable = "aws:RequestTag/karpenter.sh/managed-by" + values = [var.cluster_config.name] + } + + condition { + test = "StringLike" + variable = "aws:RequestTag/karpenter.sh/provisioner-name" + values = ["*"] + } + + condition { + test = "ForAllValues:StringEquals" + variable = "aws:TagKeys" + + values = [ + "karpenter.sh/provisioner-name", + "karpenter.sh/managed-by", + ] + } + } + + statement { + sid = "AllowScopedDeletion" + effect = "Allow" + + # tfsec:ignore:aws-iam-no-policy-wildcards + resources = [ + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:instance/*", + "arn:${data.aws_partition.current.partition}:ec2:${data.aws_region.current.name}:*:launch-template/*", + ] + + actions = [ + "ec2:TerminateInstances", + "ec2:DeleteLaunchTemplate", + ] + + condition { + test = "StringEquals" + variable = "aws:ResourceTag/kubernetes.io/cluster/${var.cluster_config.name}" + values = ["owned"] + } + + condition { + test = "StringLike" + variable = "aws:ResourceTag/karpenter.sh/provisioner-name" + values = ["*"] + } + } + + statement { + sid = "AllowRegionalReadActions" + effect = "Allow" + resources = ["*"] + + actions = [ + "ec2:DescribeAvailabilityZones", + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceTypeOfferings", + "ec2:DescribeInstanceTypes", + "ec2:DescribeLaunchTemplates", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSpotPriceHistory", + "ec2:DescribeSubnets", + ] + + condition { + test = "StringEquals" + variable = "aws:RequestedRegion" + values = [data.aws_region.current.name] + } + } + + statement { + sid = "AllowSSMReadActions" + effect = "Allow" + resources = ["arn:${data.aws_partition.current.partition}:ssm:${data.aws_region.current.name}::parameter/aws/service/*"] + actions = ["ssm:GetParameter"] + } + + statement { + sid = "AllowPricingReadActions" + effect = "Allow" + resources = ["*"] + actions = ["pricing:GetProducts"] + } + + statement { + sid = "AllowInterruptionQueueActions" + effect = "Allow" + resources = [aws_sqs_queue.karpenter_interruption.arn] + + actions = [ + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + "sqs:ReceiveMessage", + ] + } + + statement { + sid = "AllowPassingInstanceRole" + effect = "Allow" + resources = [aws_iam_role.karpenter_node.arn] + actions = ["iam:PassRole"] + + condition { + test = "StringEquals" + variable = "iam:PassedToService" + values = ["ec2.amazonaws.com"] + } + } + + statement { + sid = "AllowAPIServerEndpointDiscovery" + effect = "Allow" + resources = [var.cluster_config.arn] + actions = ["eks:DescribeCluster"] + } +} diff --git a/modules/karpenter/data.tf b/modules/karpenter/data.tf new file mode 100644 index 00000000..a2bf82b2 --- /dev/null +++ b/modules/karpenter/data.tf @@ -0,0 +1,3 @@ +data "aws_caller_identity" "current" {} +data "aws_partition" "current" {} +data "aws_region" "current" {} diff --git a/modules/karpenter/fargate.tf b/modules/karpenter/fargate.tf new file mode 100644 index 00000000..92631209 --- /dev/null +++ b/modules/karpenter/fargate.tf @@ -0,0 +1,11 @@ +resource "aws_eks_fargate_profile" "critical_pods" { + cluster_name = var.cluster_config.name + fargate_profile_name = "${var.cluster_config.name}-karpenter" + pod_execution_role_arn = var.cluster_config.fargate_execution_role_arn + subnet_ids = values(var.cluster_config.private_subnet_ids) + + selector { + namespace = "karpenter" + labels = {} + } +} diff --git a/modules/karpenter/interruption_queue.tf b/modules/karpenter/interruption_queue.tf new file mode 100644 index 00000000..bfd4bad3 --- /dev/null +++ b/modules/karpenter/interruption_queue.tf @@ -0,0 +1,77 @@ +resource "aws_sqs_queue" "karpenter_interruption" { + name = "Karpenter-${var.cluster_config.name}" + message_retention_seconds = 300 + sqs_managed_sse_enabled = true +} + +resource "aws_sqs_queue_policy" "karpenter_interruption" { + queue_url = aws_sqs_queue.karpenter_interruption.url + policy = data.aws_iam_policy_document.karpenter_interruption_queue_policy.json +} + +data "aws_iam_policy_document" "karpenter_interruption_queue_policy" { + statement { + sid = "SqsWrite" + actions = ["sqs:SendMessage"] + resources = [aws_sqs_queue.karpenter_interruption.arn] + principals { + type = "Service" + identifiers = [ + "events.amazonaws.com", + "sqs.amazonaws.com", + ] + } + } +} + +locals { + karpenter_events = { + health_event = { + name = "HealthEvent" + description = "Karpenter interrupt - AWS health event" + event_pattern = { + source = ["aws.health"] + detail-type = ["AWS Health Event"] + } + } + spot_interupt = { + name = "SpotInterrupt" + description = "Karpenter interrupt - EC2 spot instance interruption warning" + event_pattern = { + source = ["aws.ec2"] + detail-type = ["EC2 Spot Instance Interruption Warning"] + } + } + instance_rebalance = { + name = "InstanceRebalance" + description = "Karpenter interrupt - EC2 instance rebalance recommendation" + event_pattern = { + source = ["aws.ec2"] + detail-type = ["EC2 Instance Rebalance Recommendation"] + } + } + instance_state_change = { + name = "InstanceStateChange" + description = "Karpenter interrupt - EC2 instance state-change notification" + event_pattern = { + source = ["aws.ec2"] + detail-type = ["EC2 Instance State-change Notification"] + } + } + } +} + +resource "aws_cloudwatch_event_rule" "karpenter" { + for_each = local.karpenter_events + + name = "Karpenter${each.value.name}-${var.cluster_config.name}" + description = each.value.description + event_pattern = jsonencode(each.value.event_pattern) +} + +resource "aws_cloudwatch_event_target" "karpenter" { + for_each = local.karpenter_events + rule = aws_cloudwatch_event_rule.karpenter[each.key].name + target_id = "KarpenterInterruptionQueueTarget" + arn = aws_sqs_queue.karpenter_interruption.arn +} diff --git a/modules/karpenter/node_iam.tf b/modules/karpenter/node_iam.tf new file mode 100644 index 00000000..a1e1de43 --- /dev/null +++ b/modules/karpenter/node_iam.tf @@ -0,0 +1,35 @@ +resource "aws_iam_role" "karpenter_node" { + name = "${var.cluster_config.iam_role_name_prefix}KarpenterNode-${var.cluster_config.name}" + assume_role_policy = data.aws_iam_policy_document.karpenter_node_assume_role_policy.json + description = "Karpenter node role for ${var.cluster_config.name} cluster" +} + +data "aws_iam_policy_document" "karpenter_node_assume_role_policy" { + statement { + sid = "EKSNodeAssumeRole" + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.${data.aws_partition.current.dns_suffix}"] + } + } +} + + +resource "aws_iam_role_policy_attachment" "karpenter_node_managed_policies" { + for_each = toset([ + "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", + "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", + "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", + "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore", + ]) + + role = aws_iam_role.karpenter_node.id + policy_arn = each.value +} + +resource "aws_iam_instance_profile" "karpenter_node" { + name = aws_iam_role.karpenter_node.name + role = aws_iam_role.karpenter_node.name +} diff --git a/modules/karpenter/outputs.tf b/modules/karpenter/outputs.tf new file mode 100644 index 00000000..4f296d46 --- /dev/null +++ b/modules/karpenter/outputs.tf @@ -0,0 +1,3 @@ +output "node_role_arn" { + value = aws_iam_role.karpenter_node.arn +} diff --git a/modules/karpenter/variables.tf b/modules/karpenter/variables.tf new file mode 100644 index 00000000..4f4a9eb2 --- /dev/null +++ b/modules/karpenter/variables.tf @@ -0,0 +1,18 @@ +variable "cluster_config" { + description = "EKS cluster config object" + type = object({ + name = string + arn = string + private_subnet_ids = map(string) + iam_role_name_prefix = string + fargate_execution_role_arn = string + }) +} + +variable "oidc_config" { + description = "OIDC config object" + type = object({ + url = string + arn = string + }) +} diff --git a/modules/karpenter/versions.tf b/modules/karpenter/versions.tf new file mode 100644 index 00000000..bbbe93e9 --- /dev/null +++ b/modules/karpenter/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_version = ">= 1.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.47.0" + } + } +}