From 1c44394c9441f78e450de57d04e05b6c91f11249 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 14 Dec 2023 10:00:35 -0700 Subject: [PATCH 01/60] Upgrade Terraform AWS IAM module to 5.32.1. --- main.tf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/main.tf b/main.tf index 02a64d4..b205a19 100644 --- a/main.tf +++ b/main.tf @@ -113,7 +113,7 @@ resource "null_resource" "eks_kubeconfig" { # Authorize Amazon Load Balancer Controller module "eks_lb_irsa" { source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.30.0" + version = "5.32.1" role_name = "${var.cluster_name}-lb-role" attach_load_balancer_controller_policy = true @@ -131,7 +131,7 @@ module "eks_lb_irsa" { # Authorize VPC CNI via IRSA. module "eks_vpc_cni_irsa" { source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.30.0" + version = "5.32.1" role_name = "${var.cluster_name}-vpc-cni-role" attach_vpc_cni_policy = true @@ -150,7 +150,7 @@ module "eks_vpc_cni_irsa" { # Allow PVCs backed by EBS module "eks_ebs_csi_irsa" { source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.30.0" + version = "5.32.1" role_name = "${var.cluster_name}-ebs-csi-role" attach_ebs_csi_policy = true @@ -168,7 +168,7 @@ module "eks_ebs_csi_irsa" { # Allow PVCs backed by EFS module "eks_efs_csi_controller_irsa" { source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.30.0" + version = "5.32.1" role_name = "${var.cluster_name}-efs-csi-controller-role" attach_efs_csi_policy = true @@ -186,7 +186,7 @@ module "eks_efs_csi_controller_irsa" { module "eks_efs_csi_node_irsa" { source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.30.0" + version = "5.32.1" role_name = "${var.cluster_name}-efs-csi-node-role" oidc_providers = { @@ -438,7 +438,7 @@ resource "null_resource" "eks_nvidia_device_plugin" { module "cert_manager_irsa" { count = local.cert_manager ? 1 : 0 source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.30.0" + version = "5.32.1" role_name = "${var.cluster_name}-cert-manager-role" From 7b19135aebc615ce2e5645cbe7b597f5c1a6f070 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 14 Dec 2023 10:01:29 -0700 Subject: [PATCH 02/60] Upgrade Terraform AWS EKS module to 19.21.0. --- main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.tf b/main.tf index b205a19..8cab0e7 100644 --- a/main.tf +++ b/main.tf @@ -41,7 +41,7 @@ resource "aws_security_group" "eks_efs_sg" { # EKS Cluster module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks-no-public-cluster-access tfsec:ignore:aws-eks-no-public-cluster-access-to-cidr source = "terraform-aws-modules/eks/aws" - version = "19.17.2" + version = "19.21.0" cluster_name = var.cluster_name cluster_version = var.kubernetes_version From 9c220ba286cf2bb464902493407f45523d742edc Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 14 Dec 2023 10:02:26 -0700 Subject: [PATCH 03/60] Upgrade `cert-manager` to 1.13.3. --- variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variables.tf b/variables.tf index 43d24c2..058dbaa 100644 --- a/variables.tf +++ b/variables.tf @@ -1,5 +1,5 @@ variable "cert_manager_version" { - default = "1.13.1" + default = "1.13.3" description = "Version of cert-manager to install." type = string } From 1dc70fda45caf8aa452ae88fcbfad091ad705e79 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 14 Dec 2023 10:05:41 -0700 Subject: [PATCH 04/60] Upgrade AWS EBS/EFS CSI controllers and Nvidia device plugin to their latest versions. --- variables.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/variables.tf b/variables.tf index 058dbaa..9f0b542 100644 --- a/variables.tf +++ b/variables.tf @@ -95,13 +95,13 @@ variable "default_max_size" { } variable "ebs_csi_driver_version" { - default = "2.24.0" + default = "2.25.0" description = "Version of the EFS CSI storage driver to install." type = string } variable "efs_csi_driver_version" { - default = "2.5.0" + default = "2.5.2" description = "Version of the EFS CSI storage driver to install." type = string } @@ -177,7 +177,7 @@ variable "nvidia_device_plugin" { } variable "nvidia_device_plugin_version" { - default = "0.14.1" + default = "0.14.3" description = "Version of the Nvidia device plugin to install." type = string } From 0306664864becff7c04d6e58795e6216cdf502a2 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 14 Dec 2023 15:07:31 -0700 Subject: [PATCH 05/60] Refactor into separate source files. --- cert-manager.tf | 92 +++++++++++ cni.tf | 18 +++ ebs-csi.tf | 85 ++++++++++ efs-csi.tf | 148 ++++++++++++++++++ lb.tf | 48 ++++++ main.tf | 404 ------------------------------------------------ nvidia.tf | 10 ++ 7 files changed, 401 insertions(+), 404 deletions(-) create mode 100644 cert-manager.tf create mode 100644 cni.tf create mode 100644 ebs-csi.tf create mode 100644 efs-csi.tf create mode 100644 lb.tf create mode 100644 nvidia.tf diff --git a/cert-manager.tf b/cert-manager.tf new file mode 100644 index 0000000..2f984f2 --- /dev/null +++ b/cert-manager.tf @@ -0,0 +1,92 @@ +## cert-manager +locals { + cert_manager = length(var.cert_manager_route53_zone_id) > 0 +} + +module "cert_manager_irsa" { + count = local.cert_manager ? 1 : 0 + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "5.32.1" + + role_name = "${var.cluster_name}-cert-manager-role" + + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = [ + "cert-manager:cert-manager", + ] + } + } + tags = var.tags +} + +data "aws_iam_policy_document" "cert_manager" { + count = local.cert_manager ? 1 : 0 + statement { + actions = [ + "route53:GetChange" + ] + resources = ["arn:aws:route53:::change/*"] + } + + statement { + actions = [ + "route53:ChangeResourceRecordSets", + "route53:ListResourceRecordSets", + ] + resources = ["arn:aws:route53:::hostedzone/${var.cert_manager_route53_zone_id}"] + } +} + +resource "aws_iam_policy" "cert_manager" { + count = local.cert_manager ? 1 : 0 + name = "AmazonEKS_Cert_Manager_Policy-${var.cluster_name}" + description = "Provides permissions for cert-manager" + policy = data.aws_iam_policy_document.cert_manager[0].json + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "cert_manager" { + count = local.cert_manager ? 1 : 0 + role = "${var.cluster_name}-cert-manager-role" + policy_arn = aws_iam_policy.cert_manager[0].arn + depends_on = [ + module.cert_manager_irsa[0] + ] +} + +resource "helm_release" "cert_manager" { + count = local.cert_manager ? 1 : 0 + name = "cert-manager" + namespace = "cert-manager" + create_namespace = true + chart = "cert-manager" + repository = "https://charts.jetstack.io" + version = "v${var.cert_manager_version}" + keyring = "${path.module}/cert-manager-keyring.gpg" + verify = var.helm_verify + + # Set up values so CRDs are installed with the chart, the service account has + # correct annotations, and that the pod's security context has permissions + # to read the account token: + # https://cert-manager.io/docs/configuration/acme/dns01/route53/#service-annotation + values = [ + yamlencode({ + "installCRDs" = true + "securityContext" = { + "fsGroup" = 1001 + } + "serviceAccount" = { + "annotations" = { + "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-cert-manager-role" + } + } + }) + ] + + depends_on = [ + module.cert_manager_irsa[0], + module.eks, + ] +} diff --git a/cni.tf b/cni.tf new file mode 100644 index 0000000..44086ed --- /dev/null +++ b/cni.tf @@ -0,0 +1,18 @@ +# Authorize VPC CNI via IRSA. +module "eks_vpc_cni_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "5.32.1" + + role_name = "${var.cluster_name}-vpc-cni-role" + attach_vpc_cni_policy = true + vpc_cni_enable_ipv4 = true + + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = ["kube-system:aws-node"] + } + } + + tags = var.tags +} diff --git a/ebs-csi.tf b/ebs-csi.tf new file mode 100644 index 0000000..9ec08a7 --- /dev/null +++ b/ebs-csi.tf @@ -0,0 +1,85 @@ +## EBS CSI Storage Driver + +# Allow PVCs backed by EBS +module "eks_ebs_csi_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "5.32.1" + + role_name = "${var.cluster_name}-ebs-csi-role" + attach_ebs_csi_policy = true + + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"] + } + } + + tags = var.tags +} + +resource "helm_release" "aws_ebs_csi_driver" { + name = "aws-ebs-csi-driver" + namespace = "kube-system" + chart = "aws-ebs-csi-driver" + repository = "https://kubernetes-sigs.github.io/aws-ebs-csi-driver" + version = var.ebs_csi_driver_version + + values = [ + yamlencode({ + "controller" = { + "extraVolumeTags" = var.tags + "serviceAccount" = { + "annotations" = { + "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-ebs-csi-role" + } + } + } + "image" = { + "repository" = "${var.csi_ecr_repository_id}.dkr.ecr.${local.aws_region}.amazonaws.com/eks/aws-ebs-csi-driver" + } + }) + ] + + depends_on = [ + module.eks_ebs_csi_irsa, + module.eks, + ] +} + +# Make EBS CSI with gp3 default storage driver +resource "kubernetes_storage_class" "eks_ebs_storage_class" { + metadata { + annotations = { + "storageclass.kubernetes.io/is-default-class" = "true" + } + labels = {} + name = "ebs-sc" + } + + mount_options = [] + parameters = {} + storage_provisioner = "ebs.csi.aws.com" + volume_binding_mode = "WaitForFirstConsumer" + + depends_on = [ + helm_release.aws_ebs_csi_driver, + ] +} + +# Don't want gp2 storageclass set as default. +resource "kubernetes_annotations" "eks_disable_gp2" { + api_version = "storage.k8s.io/v1" + kind = "StorageClass" + metadata { + name = "gp2" + } + annotations = { + "storageclass.kubernetes.io/is-default-class" = "false" + } + force = true + + depends_on = [ + kubernetes_storage_class.eks_ebs_storage_class + ] +} diff --git a/efs-csi.tf b/efs-csi.tf new file mode 100644 index 0000000..e376ee5 --- /dev/null +++ b/efs-csi.tf @@ -0,0 +1,148 @@ +## EFS CSI Storage Driver +resource "aws_security_group" "eks_efs_sg" { + name = "${var.cluster_name}-efs-sg" + description = "Security group for EFS clients in EKS VPC" + vpc_id = var.vpc_id + + ingress { + description = "Ingress NFS traffic for EFS" + from_port = 2049 + to_port = 2049 + protocol = "tcp" + cidr_blocks = [var.vpc_cidr] + } + + tags = var.tags +} + +# Allow PVCs backed by EFS +module "eks_efs_csi_controller_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "5.32.1" + + role_name = "${var.cluster_name}-efs-csi-controller-role" + attach_efs_csi_policy = true + + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = [ + "kube-system:efs-csi-controller-sa", + ] + } + } + tags = var.tags +} + +module "eks_efs_csi_node_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "5.32.1" + + role_name = "${var.cluster_name}-efs-csi-node-role" + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = [ + "kube-system:efs-csi-node-sa", + ] + } + } + tags = var.tags +} + +data "aws_iam_policy_document" "eks_efs_csi_node" { + statement { + actions = [ + "elasticfilesystem:DescribeMountTargets", + "ec2:DescribeAvailabilityZones", + ] + resources = ["*"] # tfsec:ignore:aws-iam-no-policy-wildcards + } +} + +resource "aws_iam_policy" "eks_efs_csi_node" { + name = "AmazonEKS_EFS_CSI_Node_Policy-${var.cluster_name}" + description = "Provides node permissions to use the EFS CSI driver" + policy = data.aws_iam_policy_document.eks_efs_csi_node.json + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "eks_efs_csi_node" { + role = "${var.cluster_name}-efs-csi-node-role" + policy_arn = aws_iam_policy.eks_efs_csi_node.arn + depends_on = [ + module.eks_efs_csi_node_irsa + ] +} + +resource "aws_efs_file_system" "eks_efs" { + creation_token = "${var.cluster_name}-efs" + encrypted = true + kms_key_id = aws_kms_key.this.arn + tags = var.tags +} + +resource "aws_efs_mount_target" "eks_efs_private" { + count = length(var.private_subnets) + file_system_id = aws_efs_file_system.eks_efs.id + subnet_id = var.private_subnets[count.index] + security_groups = [aws_security_group.eks_efs_sg.id] +} + +resource "helm_release" "aws_efs_csi_driver" { + name = "aws-efs-csi-driver" + namespace = "kube-system" + chart = "aws-efs-csi-driver" + repository = "https://kubernetes-sigs.github.io/aws-efs-csi-driver" + version = var.efs_csi_driver_version + + values = [ + yamlencode({ + "controller" = { + "serviceAccount" = { + "annotations" = { + "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-controller-role" + } + } + "tags" = var.tags + } + "image" = { + "repository" = "${var.csi_ecr_repository_id}.dkr.ecr.${local.aws_region}.amazonaws.com/eks/aws-efs-csi-driver" + } + "node" = { + "serviceAccount" = { + "annotations" = { + "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-node-role" + } + } + } + }) + ] + + depends_on = [ + module.eks_efs_csi_controller_irsa, + module.eks, + ] +} + +resource "kubernetes_storage_class" "eks_efs_storage_class" { + metadata { + annotations = {} + name = "efs-sc" + labels = {} + } + + mount_options = [] + parameters = { + "provisioningMode" = "efs-ap" + "fileSystemId" = aws_efs_file_system.eks_efs.id + "directoryPerms" = "755" + "uid" = "0" + "gid" = "0" + } + storage_provisioner = "efs.csi.aws.com" + + depends_on = [ + helm_release.aws_efs_csi_driver, + ] +} diff --git a/lb.tf b/lb.tf new file mode 100644 index 0000000..5e5fa6d --- /dev/null +++ b/lb.tf @@ -0,0 +1,48 @@ +## AWS Load Balancer Controller + +# Authorize Amazon Load Balancer Controller +module "eks_lb_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "5.32.1" + + role_name = "${var.cluster_name}-lb-role" + attach_load_balancer_controller_policy = true + + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = ["kube-system:aws-load-balancer-controller"] + } + } + + tags = var.tags +} + +resource "helm_release" "aws_lb_controller" { + name = "aws-load-balancer-controller" + namespace = "kube-system" + chart = "aws-load-balancer-controller" + repository = "https://aws.github.io/eks-charts" + version = var.lb_controller_version + + values = [ + yamlencode({ + "clusterName" = var.cluster_name + "defaultTags" = var.tags + "region" = local.aws_region + "serviceAccount" = { + "annotations" = { + "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-lb-role" + "eks.amazonaws.com/sts-regional-endpoints" = "true" + } + "name" = "aws-load-balancer-controller" + } + "vpcId" = var.vpc_id + }) + ] + + depends_on = [ + module.eks_lb_irsa, + module.eks, + ] +} diff --git a/main.tf b/main.tf index 8cab0e7..8cd260e 100644 --- a/main.tf +++ b/main.tf @@ -11,8 +11,6 @@ locals { groups = ["system:masters"] } ] - - cert_manager = length(var.cert_manager_route53_zone_id) > 0 } resource "aws_kms_key" "this" { @@ -22,22 +20,6 @@ resource "aws_kms_key" "this" { tags = var.tags } -resource "aws_security_group" "eks_efs_sg" { - name = "${var.cluster_name}-efs-sg" - description = "Security group for EFS clients in EKS VPC" - vpc_id = var.vpc_id - - ingress { - description = "Ingress NFS traffic for EFS" - from_port = 2049 - to_port = 2049 - protocol = "tcp" - cidr_blocks = [var.vpc_cidr] - } - - tags = var.tags -} - # EKS Cluster module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks-no-public-cluster-access tfsec:ignore:aws-eks-no-public-cluster-access-to-cidr source = "terraform-aws-modules/eks/aws" @@ -110,136 +92,6 @@ resource "null_resource" "eks_kubeconfig" { ] } -# Authorize Amazon Load Balancer Controller -module "eks_lb_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" - - role_name = "${var.cluster_name}-lb-role" - attach_load_balancer_controller_policy = true - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:aws-load-balancer-controller"] - } - } - - tags = var.tags -} - -# Authorize VPC CNI via IRSA. -module "eks_vpc_cni_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" - - role_name = "${var.cluster_name}-vpc-cni-role" - attach_vpc_cni_policy = true - vpc_cni_enable_ipv4 = true - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:aws-node"] - } - } - - tags = var.tags -} - -# Allow PVCs backed by EBS -module "eks_ebs_csi_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" - - role_name = "${var.cluster_name}-ebs-csi-role" - attach_ebs_csi_policy = true - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"] - } - } - - tags = var.tags -} - -# Allow PVCs backed by EFS -module "eks_efs_csi_controller_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" - - role_name = "${var.cluster_name}-efs-csi-controller-role" - attach_efs_csi_policy = true - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = [ - "kube-system:efs-csi-controller-sa", - ] - } - } - tags = var.tags -} - -module "eks_efs_csi_node_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" - - role_name = "${var.cluster_name}-efs-csi-node-role" - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = [ - "kube-system:efs-csi-node-sa", - ] - } - } - tags = var.tags -} - -data "aws_iam_policy_document" "eks_efs_csi_node" { - statement { - actions = [ - "elasticfilesystem:DescribeMountTargets", - "ec2:DescribeAvailabilityZones", - ] - resources = ["*"] # tfsec:ignore:aws-iam-no-policy-wildcards - } -} - -resource "aws_iam_policy" "eks_efs_csi_node" { - name = "AmazonEKS_EFS_CSI_Node_Policy-${var.cluster_name}" - description = "Provides node permissions to use the EFS CSI driver" - policy = data.aws_iam_policy_document.eks_efs_csi_node.json - tags = var.tags -} - -resource "aws_iam_role_policy_attachment" "eks_efs_csi_node" { - role = "${var.cluster_name}-efs-csi-node-role" - policy_arn = aws_iam_policy.eks_efs_csi_node.arn - depends_on = [ - module.eks_efs_csi_node_irsa - ] -} - -resource "aws_efs_file_system" "eks_efs" { - creation_token = "${var.cluster_name}-efs" - encrypted = true - kms_key_id = aws_kms_key.this.arn - tags = var.tags -} - -resource "aws_efs_mount_target" "eks_efs_private" { - count = length(var.private_subnets) - file_system_id = aws_efs_file_system.eks_efs.id - subnet_id = var.private_subnets[count.index] - security_groups = [aws_security_group.eks_efs_sg.id] -} - - ## Kubernetes-level configuration provider "helm" { @@ -266,259 +118,3 @@ provider "kubernetes" { args = ["eks", "get-token", "--region", local.aws_region, "--cluster-name", module.eks.cluster_name] } } - -## AWS Load Balancer Controller -resource "helm_release" "aws_lb_controller" { - name = "aws-load-balancer-controller" - namespace = "kube-system" - chart = "aws-load-balancer-controller" - repository = "https://aws.github.io/eks-charts" - version = var.lb_controller_version - - values = [ - yamlencode({ - "clusterName" = var.cluster_name - "defaultTags" = var.tags - "region" = local.aws_region - "serviceAccount" = { - "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-lb-role" - "eks.amazonaws.com/sts-regional-endpoints" = "true" - } - "name" = "aws-load-balancer-controller" - } - "vpcId" = var.vpc_id - }) - ] - - depends_on = [ - module.eks_lb_irsa, - module.eks, - ] -} - -## EBS CSI Storage Driver -resource "helm_release" "aws_ebs_csi_driver" { - name = "aws-ebs-csi-driver" - namespace = "kube-system" - chart = "aws-ebs-csi-driver" - repository = "https://kubernetes-sigs.github.io/aws-ebs-csi-driver" - version = var.ebs_csi_driver_version - - values = [ - yamlencode({ - "controller" = { - "extraVolumeTags" = var.tags - "serviceAccount" = { - "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-ebs-csi-role" - } - } - } - "image" = { - "repository" = "${var.csi_ecr_repository_id}.dkr.ecr.${local.aws_region}.amazonaws.com/eks/aws-ebs-csi-driver" - } - }) - ] - - depends_on = [ - module.eks_ebs_csi_irsa, - module.eks, - ] -} - -# Make EBS CSI with gp3 default storage driver -resource "kubernetes_storage_class" "eks_ebs_storage_class" { - metadata { - annotations = { - "storageclass.kubernetes.io/is-default-class" = "true" - } - labels = {} - name = "ebs-sc" - } - - mount_options = [] - parameters = {} - storage_provisioner = "ebs.csi.aws.com" - volume_binding_mode = "WaitForFirstConsumer" - - depends_on = [ - helm_release.aws_ebs_csi_driver, - ] -} - -# Don't want gp2 storageclass set as default. -resource "kubernetes_annotations" "eks_disable_gp2" { - api_version = "storage.k8s.io/v1" - kind = "StorageClass" - metadata { - name = "gp2" - } - annotations = { - "storageclass.kubernetes.io/is-default-class" = "false" - } - force = true - - depends_on = [ - kubernetes_storage_class.eks_ebs_storage_class - ] -} - -## EFS CSI Storage Driver -resource "helm_release" "aws_efs_csi_driver" { - name = "aws-efs-csi-driver" - namespace = "kube-system" - chart = "aws-efs-csi-driver" - repository = "https://kubernetes-sigs.github.io/aws-efs-csi-driver" - version = var.efs_csi_driver_version - - values = [ - yamlencode({ - "controller" = { - "serviceAccount" = { - "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-controller-role" - } - } - "tags" = var.tags - } - "image" = { - "repository" = "${var.csi_ecr_repository_id}.dkr.ecr.${local.aws_region}.amazonaws.com/eks/aws-efs-csi-driver" - } - "node" = { - "serviceAccount" = { - "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-node-role" - } - } - } - }) - ] - - depends_on = [ - module.eks_efs_csi_controller_irsa, - module.eks, - ] -} - -resource "kubernetes_storage_class" "eks_efs_storage_class" { - metadata { - annotations = {} - name = "efs-sc" - labels = {} - } - - mount_options = [] - parameters = { - "provisioningMode" = "efs-ap" - "fileSystemId" = aws_efs_file_system.eks_efs.id - "directoryPerms" = "755" - "uid" = "0" - "gid" = "0" - } - storage_provisioner = "efs.csi.aws.com" - - depends_on = [ - helm_release.aws_efs_csi_driver, - ] -} - -## Nvidia Device Plugin for GPU support -resource "null_resource" "eks_nvidia_device_plugin" { - count = var.nvidia_device_plugin ? 1 : 0 - provisioner "local-exec" { - command = "kubectl --context='${var.cluster_name}' apply --filename='https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v${var.nvidia_device_plugin_version}/nvidia-device-plugin.yml'" - } - depends_on = [ - helm_release.aws_lb_controller, - ] -} - -## cert-manager -module "cert_manager_irsa" { - count = local.cert_manager ? 1 : 0 - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" - - role_name = "${var.cluster_name}-cert-manager-role" - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = [ - "cert-manager:cert-manager", - ] - } - } - tags = var.tags -} - -data "aws_iam_policy_document" "cert_manager" { - count = local.cert_manager ? 1 : 0 - statement { - actions = [ - "route53:GetChange" - ] - resources = ["arn:aws:route53:::change/*"] - } - - statement { - actions = [ - "route53:ChangeResourceRecordSets", - "route53:ListResourceRecordSets", - ] - resources = ["arn:aws:route53:::hostedzone/${var.cert_manager_route53_zone_id}"] - } -} - -resource "aws_iam_policy" "cert_manager" { - count = local.cert_manager ? 1 : 0 - name = "AmazonEKS_Cert_Manager_Policy-${var.cluster_name}" - description = "Provides permissions for cert-manager" - policy = data.aws_iam_policy_document.cert_manager[0].json - tags = var.tags -} - -resource "aws_iam_role_policy_attachment" "cert_manager" { - count = local.cert_manager ? 1 : 0 - role = "${var.cluster_name}-cert-manager-role" - policy_arn = aws_iam_policy.cert_manager[0].arn - depends_on = [ - module.cert_manager_irsa[0] - ] -} - -resource "helm_release" "cert_manager" { - count = local.cert_manager ? 1 : 0 - name = "cert-manager" - namespace = "cert-manager" - create_namespace = true - chart = "cert-manager" - repository = "https://charts.jetstack.io" - version = "v${var.cert_manager_version}" - keyring = "${path.module}/cert-manager-keyring.gpg" - verify = var.helm_verify - - # Set up values so CRDs are installed with the chart, the service account has - # correct annotations, and that the pod's security context has permissions - # to read the account token: - # https://cert-manager.io/docs/configuration/acme/dns01/route53/#service-annotation - values = [ - yamlencode({ - "installCRDs" = true - "securityContext" = { - "fsGroup" = 1001 - } - "serviceAccount" = { - "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-cert-manager-role" - } - } - }) - ] - - depends_on = [ - module.cert_manager_irsa[0], - module.eks, - ] -} diff --git a/nvidia.tf b/nvidia.tf new file mode 100644 index 0000000..6fc2f47 --- /dev/null +++ b/nvidia.tf @@ -0,0 +1,10 @@ +## Nvidia Device Plugin for GPU support +resource "null_resource" "eks_nvidia_device_plugin" { + count = var.nvidia_device_plugin ? 1 : 0 + provisioner "local-exec" { + command = "kubectl --context='${var.cluster_name}' apply --filename='https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v${var.nvidia_device_plugin_version}/nvidia-device-plugin.yml'" + } + depends_on = [ + helm_release.aws_lb_controller, + ] +} From c4e986b10a0b7db6973e60972b447a9bdaae2866 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 14 Dec 2023 15:22:56 -0700 Subject: [PATCH 06/60] Have AWS module manage EKS KMS resources by default. --- kms.tf | 7 +++++++ main.tf | 15 +++++---------- variables.tf | 12 ++++++++++++ 3 files changed, 24 insertions(+), 10 deletions(-) create mode 100644 kms.tf diff --git a/kms.tf b/kms.tf new file mode 100644 index 0000000..c7619e2 --- /dev/null +++ b/kms.tf @@ -0,0 +1,7 @@ +resource "aws_kms_key" "this" { + count = var.kms_manage ? 1 : 0 + deletion_window_in_days = var.kms_key_deletion_window_in_days + description = "KMS Key for EKS Secrets" + enable_key_rotation = true + tags = var.tags +} diff --git a/main.tf b/main.tf index 8cd260e..13b9ff8 100644 --- a/main.tf +++ b/main.tf @@ -13,13 +13,6 @@ locals { ] } -resource "aws_kms_key" "this" { - deletion_window_in_days = 10 - description = "KMS Key for EKS Secrets" - enable_key_rotation = true - tags = var.tags -} - # EKS Cluster module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks-no-public-cluster-access tfsec:ignore:aws-eks-no-public-cluster-access-to-cidr source = "terraform-aws-modules/eks/aws" @@ -45,10 +38,13 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- } cluster_addons_timeouts = var.cluster_addons_timeouts cluster_enabled_log_types = var.cluster_enabled_log_types - cluster_encryption_config = { + + cluster_encryption_config = var.kms_manage ? { provider_key_arn = aws_kms_key.this.arn resources = ["secrets"] - } + } : { resources = ["secrets"] } + create_kms_key = var.kms_manage ? false : true + kms_key_deletion_window_in_days = var.kms_key_deletion_window_in_days cluster_endpoint_private_access = var.cluster_endpoint_private_access cluster_endpoint_public_access = var.cluster_endpoint_public_access @@ -56,7 +52,6 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- cluster_security_group_additional_rules = var.cluster_security_group_additional_rules aws_auth_roles = local.aws_auth_roles - create_kms_key = false manage_aws_auth_configmap = true enable_irsa = true subnet_ids = concat(var.public_subnets, var.private_subnets) diff --git a/variables.tf b/variables.tf index 9f0b542..116a034 100644 --- a/variables.tf +++ b/variables.tf @@ -129,6 +129,18 @@ variable "iam_role_attach_cni_policy" { type = bool } +variable "kms_manage" { + default = false + description = "Manage EKS KMS resource instead of the AWS module" + type = bool +} + +variable "kms_key_deletion_window_in_days" { + description = "The waiting period, specified in number of days. After the waiting period ends, AWS KMS deletes the KMS key. If you specify a value, it must be between `7` and `30`, inclusive." + type = number + default = 10 +} + variable "lb_controller_version" { default = "1.6.1" description = "Version of the AWS Load Balancer Controller chart to install." From 06d7d563f1abc1fd6cc160ddab4f46bbbadd5e24 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 09:34:25 -0700 Subject: [PATCH 07/60] Take into account that KMS resource has a count now. --- efs-csi.tf | 2 +- main.tf | 2 +- outputs.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/efs-csi.tf b/efs-csi.tf index e376ee5..2c36650 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -78,7 +78,7 @@ resource "aws_iam_role_policy_attachment" "eks_efs_csi_node" { resource "aws_efs_file_system" "eks_efs" { creation_token = "${var.cluster_name}-efs" encrypted = true - kms_key_id = aws_kms_key.this.arn + kms_key_id = var.kms_manage ? aws_kms_key.this[0].arn : module.eks.kms_key_arn tags = var.tags } diff --git a/main.tf b/main.tf index 13b9ff8..ead7670 100644 --- a/main.tf +++ b/main.tf @@ -40,7 +40,7 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- cluster_enabled_log_types = var.cluster_enabled_log_types cluster_encryption_config = var.kms_manage ? { - provider_key_arn = aws_kms_key.this.arn + provider_key_arn = aws_kms_key.this[0].arn resources = ["secrets"] } : { resources = ["secrets"] } create_kms_key = var.kms_manage ? false : true diff --git a/outputs.tf b/outputs.tf index febab23..7d4a4ad 100644 --- a/outputs.tf +++ b/outputs.tf @@ -20,7 +20,7 @@ output "eks_managed_node_groups" { output "kms_key_arn" { description = "The Amazon Resource Name (ARN) of the KMS key for the EKS cluster." - value = aws_kms_key.this.arn + value = var.kms_manage ? aws_kms_key.this[0].arn : module.eks.kms_key_arn } output "node_security_group_arn" { From b88f38289d036d180513d04ca4d87873a01b39a5 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 10:20:38 -0700 Subject: [PATCH 08/60] Rename AWS data from `default` to `current`. --- README.md | 4 ++-- main.tf | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 1a7a54e..de847b5 100644 --- a/README.md +++ b/README.md @@ -17,11 +17,11 @@ This module provides an opinionated way to configure an AWS EKS cluster using: Here's an example using a VPC defined using the [terraform-aws-vpc](https://github.com/terraform-aws-modules/terraform-aws-vpc) module: ``` -data "aws_availability_zones" "default" {} +data "aws_availability_zones" "current" {} locals { cluster_name = "test-eks" - vpc_azs = slice(data.aws_availability_zones.default.names, 0, 2) + vpc_azs = slice(data.aws_availability_zones.current.names, 0, 2) vpc_cidr = "10.100.0.0/16" vpc_subnets = cidrsubnets(local.vpc_cidr, 6, 6, 4, 4) diff --git a/main.tf b/main.tf index ead7670..ff94243 100644 --- a/main.tf +++ b/main.tf @@ -1,9 +1,10 @@ -data "aws_caller_identity" "default" {} -data "aws_region" "default" {} +data "aws_caller_identity" "current" {} +data "aws_partition" "current" {} +data "aws_region" "current" {} locals { - aws_account_id = data.aws_caller_identity.default.account_id - aws_region = data.aws_region.default.name + aws_account_id = data.aws_caller_identity.current.account_id + aws_region = data.aws_region.current.name aws_auth_roles = [ for role in var.system_masters_roles : { rolearn = "arn:aws:iam::${local.aws_account_id}:role/${role}" From 82f1bbbacb2d32726be10894ba22c7e1d3f311e4 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 10:24:16 -0700 Subject: [PATCH 09/60] Use AWS partition when refrencing ARNs. --- cert-manager.tf | 6 +++--- ebs-csi.tf | 2 +- efs-csi.tf | 4 ++-- lb.tf | 2 +- main.tf | 3 ++- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/cert-manager.tf b/cert-manager.tf index 2f984f2..e69e707 100644 --- a/cert-manager.tf +++ b/cert-manager.tf @@ -27,7 +27,7 @@ data "aws_iam_policy_document" "cert_manager" { actions = [ "route53:GetChange" ] - resources = ["arn:aws:route53:::change/*"] + resources = ["arn:${local.aws_partition}:route53:::change/*"] } statement { @@ -35,7 +35,7 @@ data "aws_iam_policy_document" "cert_manager" { "route53:ChangeResourceRecordSets", "route53:ListResourceRecordSets", ] - resources = ["arn:aws:route53:::hostedzone/${var.cert_manager_route53_zone_id}"] + resources = ["arn:${local.aws_partition}:route53:::hostedzone/${var.cert_manager_route53_zone_id}"] } } @@ -79,7 +79,7 @@ resource "helm_release" "cert_manager" { } "serviceAccount" = { "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-cert-manager-role" + "eks.amazonaws.com/role-arn" = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-cert-manager-role" } } }) diff --git a/ebs-csi.tf b/ebs-csi.tf index 9ec08a7..f1d05c1 100644 --- a/ebs-csi.tf +++ b/ebs-csi.tf @@ -31,7 +31,7 @@ resource "helm_release" "aws_ebs_csi_driver" { "extraVolumeTags" = var.tags "serviceAccount" = { "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-ebs-csi-role" + "eks.amazonaws.com/role-arn" = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-ebs-csi-role" } } } diff --git a/efs-csi.tf b/efs-csi.tf index 2c36650..4ad75d1 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -101,7 +101,7 @@ resource "helm_release" "aws_efs_csi_driver" { "controller" = { "serviceAccount" = { "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-controller-role" + "eks.amazonaws.com/role-arn" = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-controller-role" } } "tags" = var.tags @@ -112,7 +112,7 @@ resource "helm_release" "aws_efs_csi_driver" { "node" = { "serviceAccount" = { "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-node-role" + "eks.amazonaws.com/role-arn" = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-node-role" } } } diff --git a/lb.tf b/lb.tf index 5e5fa6d..8fdebe4 100644 --- a/lb.tf +++ b/lb.tf @@ -32,7 +32,7 @@ resource "helm_release" "aws_lb_controller" { "region" = local.aws_region "serviceAccount" = { "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:aws:iam::${local.aws_account_id}:role/${var.cluster_name}-lb-role" + "eks.amazonaws.com/role-arn" = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-lb-role" "eks.amazonaws.com/sts-regional-endpoints" = "true" } "name" = "aws-load-balancer-controller" diff --git a/main.tf b/main.tf index ff94243..a36bd67 100644 --- a/main.tf +++ b/main.tf @@ -4,10 +4,11 @@ data "aws_region" "current" {} locals { aws_account_id = data.aws_caller_identity.current.account_id + aws_partition = data.aws_partition.current.partition aws_region = data.aws_region.current.name aws_auth_roles = [ for role in var.system_masters_roles : { - rolearn = "arn:aws:iam::${local.aws_account_id}:role/${role}" + rolearn = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${role}" username = role groups = ["system:masters"] } From e03318226cda237f4d50dbb69e2d246bf1f74df8 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 10:29:10 -0700 Subject: [PATCH 10/60] Attempt to fix EFS CSI node policy to be compatible with managed KMS policy. --- efs-csi.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/efs-csi.tf b/efs-csi.tf index 4ad75d1..ec0c2b3 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -55,6 +55,7 @@ data "aws_iam_policy_document" "eks_efs_csi_node" { actions = [ "elasticfilesystem:DescribeMountTargets", "ec2:DescribeAvailabilityZones", + "ec2:DescribeNetworkInterfaceAttribute", ] resources = ["*"] # tfsec:ignore:aws-iam-no-policy-wildcards } From 0b7e627425b005cfbc65f16f42ef7d7748836248 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 10:37:27 -0700 Subject: [PATCH 11/60] Enable default KMS key policy by default so it's possible to create encrypted EFS access points with it. --- efs-csi.tf | 1 - main.tf | 1 + variables.tf | 6 ++++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/efs-csi.tf b/efs-csi.tf index ec0c2b3..4ad75d1 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -55,7 +55,6 @@ data "aws_iam_policy_document" "eks_efs_csi_node" { actions = [ "elasticfilesystem:DescribeMountTargets", "ec2:DescribeAvailabilityZones", - "ec2:DescribeNetworkInterfaceAttribute", ] resources = ["*"] # tfsec:ignore:aws-iam-no-policy-wildcards } diff --git a/main.tf b/main.tf index a36bd67..e4faaa7 100644 --- a/main.tf +++ b/main.tf @@ -47,6 +47,7 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- } : { resources = ["secrets"] } create_kms_key = var.kms_manage ? false : true kms_key_deletion_window_in_days = var.kms_key_deletion_window_in_days + kms_key_enable_default_policy = var.kms_key_enable_default_policy cluster_endpoint_private_access = var.cluster_endpoint_private_access cluster_endpoint_public_access = var.cluster_endpoint_public_access diff --git a/variables.tf b/variables.tf index 116a034..14c6e58 100644 --- a/variables.tf +++ b/variables.tf @@ -141,6 +141,12 @@ variable "kms_key_deletion_window_in_days" { default = 10 } +variable "kms_key_enable_default_policy" { + description = "Specifies whether to enable the default key policy. Defaults to `true` to workaround EFS permissions." + type = bool + default = true +} + variable "lb_controller_version" { default = "1.6.1" description = "Version of the AWS Load Balancer Controller chart to install." From b2036f5d6de52b5997da326d26e24f0a02e68b1e Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 13:11:39 -0700 Subject: [PATCH 12/60] Add `cluster_addons_overrides` variable to allow further customization of addons. --- main.tf | 33 ++++++++++++++++++--------------- variables.tf | 18 ++++++++++++------ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/main.tf b/main.tf index e4faaa7..6662a80 100644 --- a/main.tf +++ b/main.tf @@ -23,21 +23,24 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- cluster_name = var.cluster_name cluster_version = var.kubernetes_version - cluster_addons = { - coredns = { - most_recent = var.cluster_addons_most_recent - preserve = true - } - kube-proxy = { - most_recent = var.cluster_addons_most_recent - preserve = true - } - vpc-cni = { - most_recent = var.cluster_addons_most_recent - preserve = true - service_account_role_arn = module.eks_vpc_cni_irsa.iam_role_arn - } - } + cluster_addons = merge( + { + coredns = { + most_recent = var.cluster_addons_most_recent + preserve = true + } + kube-proxy = { + most_recent = var.cluster_addons_most_recent + preserve = true + } + vpc-cni = { + most_recent = var.cluster_addons_most_recent + preserve = true + service_account_role_arn = module.eks_vpc_cni_irsa.iam_role_arn + } + }, + var.cluster_addons_overrides + ) cluster_addons_timeouts = var.cluster_addons_timeouts cluster_enabled_log_types = var.cluster_enabled_log_types diff --git a/variables.tf b/variables.tf index 14c6e58..e20dbea 100644 --- a/variables.tf +++ b/variables.tf @@ -16,6 +16,12 @@ variable "cluster_addons_most_recent" { default = true } +variable "cluster_addons_overrides" { + description = "Override parameters for cluster addons." + type = map(any) + default = {} +} + variable "cluster_addons_timeouts" { description = "Create, update, and delete timeout configurations for the cluster addons" type = map(string) @@ -55,6 +61,12 @@ variable "cluster_name" { type = string } +variable "cluster_security_group_additional_rules" { + description = "Additional security group rules to add to the cluster security group created." + type = map(any) + default = {} +} + # The ECR repository is not the same for every region, in particular # those for govcloud: # https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html @@ -153,12 +165,6 @@ variable "lb_controller_version" { type = string } -variable "cluster_security_group_additional_rules" { - description = "Additional security group rules to add to the cluster security group created." - type = any - default = {} -} - variable "node_security_group_additional_rules" { default = { ingress_self_all = { From 453ef0cc723f687a686283e24d02fd51fc6e100c Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 13:17:36 -0700 Subject: [PATCH 13/60] Add variables to support specifying Fargate profiles. --- main.tf | 3 +++ variables.tf | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/main.tf b/main.tf index 6662a80..ba2e5c7 100644 --- a/main.tf +++ b/main.tf @@ -79,6 +79,9 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- } eks_managed_node_groups = var.eks_managed_node_groups + fargate_profiles = var.fargate_profiles + fargate_profile_defaults = var.fargate_profile_defaults + node_security_group_tags = var.node_security_group_tags tags = var.tags } diff --git a/variables.tf b/variables.tf index e20dbea..3eca526 100644 --- a/variables.tf +++ b/variables.tf @@ -119,8 +119,21 @@ variable "efs_csi_driver_version" { } variable "eks_managed_node_groups" { - description = "Managed node groups for the EKS cluster." - type = any + description = "Map of managed node groups for the EKS cluster." + type = map(any) + default = {} +} + +variable "fargate_profiles" { + description = "Map of Fargate Profile definitions to create" + type = map(any) + default = {} +} + +variable "fargate_profile_defaults" { + description = "Map of Fargate Profile default configurations" + type = map(any) + default = {} } variable "helm_verify" { From e83d4be0d85f8c5cda7922cf4e0dd254089a1b87 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 13:33:08 -0700 Subject: [PATCH 14/60] Add passthrough variables for controlling creation of cluster/node security groups. --- main.tf | 6 ++++-- variables.tf | 12 ++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/main.tf b/main.tf index ba2e5c7..5fca7e4 100644 --- a/main.tf +++ b/main.tf @@ -41,8 +41,10 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- }, var.cluster_addons_overrides ) - cluster_addons_timeouts = var.cluster_addons_timeouts - cluster_enabled_log_types = var.cluster_enabled_log_types + cluster_addons_timeouts = var.cluster_addons_timeouts + cluster_enabled_log_types = var.cluster_enabled_log_types + create_cluster_security_group = var.create_cluster_security_group + create_node_security_group = var.create_node_security_group cluster_encryption_config = var.kms_manage ? { provider_key_arn = aws_kms_key.this[0].arn diff --git a/variables.tf b/variables.tf index 3eca526..e278b0b 100644 --- a/variables.tf +++ b/variables.tf @@ -67,6 +67,18 @@ variable "cluster_security_group_additional_rules" { default = {} } +variable "create_cluster_security_group" { + description = "Determines if a security group is created for the cluster. Note: the EKS service creates a primary security group for the cluster by default" + type = bool + default = true +} + +variable "create_node_security_group" { + description = "Determines whether to create a security group for the node groups or use the existing `node_security_group_id`" + type = bool + default = true +} + # The ECR repository is not the same for every region, in particular # those for govcloud: # https://docs.aws.amazon.com/eks/latest/userguide/add-ons-images.html From 76c5daa129dfbbaa7943c906f2f881c63bba686b Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 14:32:00 -0700 Subject: [PATCH 15/60] Add `aws_auth_roles` variable to enable customization. --- main.tf | 17 ++++++++++------- variables.tf | 6 ++++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/main.tf b/main.tf index 5fca7e4..6cf4798 100644 --- a/main.tf +++ b/main.tf @@ -6,13 +6,16 @@ locals { aws_account_id = data.aws_caller_identity.current.account_id aws_partition = data.aws_partition.current.partition aws_region = data.aws_region.current.name - aws_auth_roles = [ - for role in var.system_masters_roles : { - rolearn = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${role}" - username = role - groups = ["system:masters"] - } - ] + aws_auth_roles = concat( + [ + for role in var.system_masters_roles : { + rolearn = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${role}" + username = role + groups = ["system:masters"] + } + ], + var.aws_auth_roles + ) } # EKS Cluster diff --git a/variables.tf b/variables.tf index e278b0b..abbbd8d 100644 --- a/variables.tf +++ b/variables.tf @@ -1,3 +1,9 @@ +variable "aws_auth_roles" { + description = "List of role maps to add to the aws-auth configmap" + type = list(any) + default = [] +} + variable "cert_manager_version" { default = "1.13.3" description = "Version of cert-manager to install." From 35558cf8983c0001409222b2b632f62ac199959f Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 15:03:18 -0700 Subject: [PATCH 16/60] Remove default `preserve` flag to EKS add-ons. --- main.tf | 3 --- 1 file changed, 3 deletions(-) diff --git a/main.tf b/main.tf index 6cf4798..e64478b 100644 --- a/main.tf +++ b/main.tf @@ -30,15 +30,12 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- { coredns = { most_recent = var.cluster_addons_most_recent - preserve = true } kube-proxy = { most_recent = var.cluster_addons_most_recent - preserve = true } vpc-cni = { most_recent = var.cluster_addons_most_recent - preserve = true service_account_role_arn = module.eks_vpc_cni_irsa.iam_role_arn } }, From 7c9802dc012817d1c3d43ad6bd5aac2f73980fa6 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 16:07:17 -0700 Subject: [PATCH 17/60] Add ability to use Karpenter. --- karpenter.tf | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ main.tf | 17 ++++++++++++++- variables.tf | 24 +++++++++++++++------ 3 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 karpenter.tf diff --git a/karpenter.tf b/karpenter.tf new file mode 100644 index 0000000..205b38c --- /dev/null +++ b/karpenter.tf @@ -0,0 +1,61 @@ +provider "aws" { + region = "us-east-1" + alias = "virginia" +} + +data "aws_ecrpublic_authorization_token" "current" { + provider = aws.virginia +} + +module "karpenter" { + count = var.karpenter ? 1 : 0 + source = "terraform-aws-modules/eks/aws//modules/karpenter" + version = "19.21.0" + + cluster_name = var.cluster_name + irsa_oidc_provider_arn = module.eks.oidc_provider_arn + tags = var.tags +} + +resource "helm_release" "karpenter" { + count = var.karpenter ? 1 : 0 + namespace = "karpenter" + create_namespace = true + + name = "karpenter" + repository = "oci://public.ecr.aws/karpenter" + repository_username = data.aws_ecrpublic_authorization_token.current.user_name + repository_password = data.aws_ecrpublic_authorization_token.current.password + chart = "karpenter" + version = "v${var.karpenter_version}" + + set { + name = "settings.aws.clusterName" + value = var.cluster_name + } + + set { + name = "settings.aws.clusterEndpoint" + value = module.eks.cluster_endpoint + } + + set { + name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" + value = module.karpenter.irsa_arn + } + + set { + name = "settings.aws.defaultInstanceProfile" + value = module.karpenter.instance_profile_name + } + + set { + name = "settings.aws.interruptionQueueName" + value = module.karpenter.queue_name + } + + depends_on = [ + module.eks, + module.karpenter[0], + ] +} diff --git a/main.tf b/main.tf index e64478b..d3d472b 100644 --- a/main.tf +++ b/main.tf @@ -6,6 +6,16 @@ locals { aws_account_id = data.aws_caller_identity.current.account_id aws_partition = data.aws_partition.current.partition aws_region = data.aws_region.current.name + aws_auth_karpenter_roles = var.karpenter ? [ + { + rolearn = module.karpenter[0].role_arn + username = "system:node:{{EC2PrivateDNSName}}" + groups = [ + "system:bootstrappers", + "system:nodes", + ] + }, + ] : [] aws_auth_roles = concat( [ for role in var.system_masters_roles : { @@ -14,6 +24,7 @@ locals { groups = ["system:masters"] } ], + local.aws_auth_karpenter_roles, var.aws_auth_roles ) } @@ -85,7 +96,11 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- fargate_profile_defaults = var.fargate_profile_defaults node_security_group_tags = var.node_security_group_tags - tags = var.tags + + tags = merge( + var.tags, + var.karpenter ? { "karpenter.sh/discovery" = var.cluster_name } : {} + ) } # Add EKS to default kubeconfig and set context for it. diff --git a/variables.tf b/variables.tf index abbbd8d..12a7c60 100644 --- a/variables.tf +++ b/variables.tf @@ -160,18 +160,24 @@ variable "helm_verify" { type = bool } -variable "kubernetes_version" { - default = "1.28" - description = "Kubernetes version to use for the EKS cluster." - type = string -} - variable "iam_role_attach_cni_policy" { default = true description = "Whether to attach CNI policy to EKS Node groups." type = bool } +variable "karpenter" { + description = "Whether to use Karpenter with the EKS cluster." + type = bool + default = false +} + +variable "karpenter_version" { + description = "Version of Karpenter Helm chart to install on the EKS cluster." + type = string + default = "0.32.3" +} + variable "kms_manage" { default = false description = "Manage EKS KMS resource instead of the AWS module" @@ -190,6 +196,12 @@ variable "kms_key_enable_default_policy" { default = true } +variable "kubernetes_version" { + default = "1.28" + description = "Kubernetes version to use for the EKS cluster." + type = string +} + variable "lb_controller_version" { default = "1.6.1" description = "Version of the AWS Load Balancer Controller chart to install." From 1b60faacddc0507565dd91e537af8833d023d45e Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 16:08:13 -0700 Subject: [PATCH 18/60] Fix references to Karpenter module. --- karpenter.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index 205b38c..7b4674d 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -41,17 +41,17 @@ resource "helm_release" "karpenter" { set { name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" - value = module.karpenter.irsa_arn + value = module.karpenter[0].irsa_arn } set { name = "settings.aws.defaultInstanceProfile" - value = module.karpenter.instance_profile_name + value = module.karpenter[0].instance_profile_name } set { name = "settings.aws.interruptionQueueName" - value = module.karpenter.queue_name + value = module.karpenter[0].queue_name } depends_on = [ From 5156b106269b5e13adf39f4f4e1e8510e4bd61e9 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Fri, 15 Dec 2023 16:21:25 -0700 Subject: [PATCH 19/60] Automatically configure CoreDNS to run on Fargate when profiles are provided. --- main.tf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.tf b/main.tf index d3d472b..8336023 100644 --- a/main.tf +++ b/main.tf @@ -39,8 +39,10 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- cluster_addons = merge( { + # Configure CoreDNS to run on Fargate when profiles are provided. coredns = { - most_recent = var.cluster_addons_most_recent + configuration_values = length(var.fargate_profiles) > 0 ? jsonencode({ computeType = "Fargate" }) : "" + most_recent = var.cluster_addons_most_recent } kube-proxy = { most_recent = var.cluster_addons_most_recent From 4020bbdaf5945ed1e0a6f36e51c9b5f895cb3539 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Mon, 18 Dec 2023 13:52:43 -0700 Subject: [PATCH 20/60] Allow disabling of the EFS CSI driver optional. --- efs-csi.tf | 26 +++++++++++++++++--------- main.tf | 4 +--- variables.tf | 6 ++++++ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/efs-csi.tf b/efs-csi.tf index 4ad75d1..32c2c3e 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -1,5 +1,6 @@ ## EFS CSI Storage Driver resource "aws_security_group" "eks_efs_sg" { + count = var.efs_csi_driver ? 1 : 0 name = "${var.cluster_name}-efs-sg" description = "Security group for EFS clients in EKS VPC" vpc_id = var.vpc_id @@ -17,6 +18,7 @@ resource "aws_security_group" "eks_efs_sg" { # Allow PVCs backed by EFS module "eks_efs_csi_controller_irsa" { + count = var.efs_csi_driver ? 1 : 0 source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" version = "5.32.1" @@ -35,6 +37,7 @@ module "eks_efs_csi_controller_irsa" { } module "eks_efs_csi_node_irsa" { + count = var.efs_csi_driver ? 1 : 0 source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" version = "5.32.1" @@ -51,6 +54,7 @@ module "eks_efs_csi_node_irsa" { } data "aws_iam_policy_document" "eks_efs_csi_node" { + count = var.efs_csi_driver ? 1 : 0 statement { actions = [ "elasticfilesystem:DescribeMountTargets", @@ -61,21 +65,24 @@ data "aws_iam_policy_document" "eks_efs_csi_node" { } resource "aws_iam_policy" "eks_efs_csi_node" { + count = var.efs_csi_driver ? 1 : 0 name = "AmazonEKS_EFS_CSI_Node_Policy-${var.cluster_name}" description = "Provides node permissions to use the EFS CSI driver" - policy = data.aws_iam_policy_document.eks_efs_csi_node.json + policy = data.aws_iam_policy_document.eks_efs_csi_node[0].json tags = var.tags } resource "aws_iam_role_policy_attachment" "eks_efs_csi_node" { + count = var.efs_csi_driver ? 1 : 0 role = "${var.cluster_name}-efs-csi-node-role" - policy_arn = aws_iam_policy.eks_efs_csi_node.arn + policy_arn = aws_iam_policy.eks_efs_csi_node[0].arn depends_on = [ - module.eks_efs_csi_node_irsa + module.eks_efs_csi_node_irsa[0] ] } resource "aws_efs_file_system" "eks_efs" { + count = var.efs_csi_driver ? 1 : 0 creation_token = "${var.cluster_name}-efs" encrypted = true kms_key_id = var.kms_manage ? aws_kms_key.this[0].arn : module.eks.kms_key_arn @@ -83,13 +90,14 @@ resource "aws_efs_file_system" "eks_efs" { } resource "aws_efs_mount_target" "eks_efs_private" { - count = length(var.private_subnets) - file_system_id = aws_efs_file_system.eks_efs.id + count = var.efs_csi_driver ? length(var.private_subnets) : 0 + file_system_id = aws_efs_file_system.eks_efs[0].id subnet_id = var.private_subnets[count.index] - security_groups = [aws_security_group.eks_efs_sg.id] + security_groups = aws_security_group.eks_efs_sg[*].id } resource "helm_release" "aws_efs_csi_driver" { + count = var.efs_csi_driver ? 1 : 0 name = "aws-efs-csi-driver" namespace = "kube-system" chart = "aws-efs-csi-driver" @@ -120,7 +128,7 @@ resource "helm_release" "aws_efs_csi_driver" { ] depends_on = [ - module.eks_efs_csi_controller_irsa, + module.eks_efs_csi_controller_irsa[0], module.eks, ] } @@ -135,7 +143,7 @@ resource "kubernetes_storage_class" "eks_efs_storage_class" { mount_options = [] parameters = { "provisioningMode" = "efs-ap" - "fileSystemId" = aws_efs_file_system.eks_efs.id + "fileSystemId" = aws_efs_file_system.eks_efs[0].id "directoryPerms" = "755" "uid" = "0" "gid" = "0" @@ -143,6 +151,6 @@ resource "kubernetes_storage_class" "eks_efs_storage_class" { storage_provisioner = "efs.csi.aws.com" depends_on = [ - helm_release.aws_efs_csi_driver, + helm_release.aws_efs_csi_driver[0], ] } diff --git a/main.tf b/main.tf index 8336023..39edace 100644 --- a/main.tf +++ b/main.tf @@ -88,9 +88,7 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- iam_role_attach_cni_policy = var.iam_role_attach_cni_policy max_size = var.default_max_size min_size = var.default_min_size - vpc_security_group_ids = [ - aws_security_group.eks_efs_sg.id, - ] + vpc_security_group_ids = aws_security_group.eks_efs_sg[*].id } eks_managed_node_groups = var.eks_managed_node_groups diff --git a/variables.tf b/variables.tf index 12a7c60..27f468c 100644 --- a/variables.tf +++ b/variables.tf @@ -130,6 +130,12 @@ variable "ebs_csi_driver_version" { type = string } +variable "efs_csi_driver" { + description = "Install and configure the EFS CSI storage driver." + type = bool + default = true +} + variable "efs_csi_driver_version" { default = "2.5.2" description = "Version of the EFS CSI storage driver to install." From 90f0bcfd72072cbd3f5a43832e5dda08e898ea1c Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Mon, 18 Dec 2023 15:23:34 -0700 Subject: [PATCH 21/60] Install the `eks-pod-identity-agent` cluster addon by default. --- main.tf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main.tf b/main.tf index 39edace..5abb632 100644 --- a/main.tf +++ b/main.tf @@ -44,6 +44,9 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- configuration_values = length(var.fargate_profiles) > 0 ? jsonencode({ computeType = "Fargate" }) : "" most_recent = var.cluster_addons_most_recent } + eks-pod-identity-agent = { + most_recent = var.cluster_addons_most_recent + } kube-proxy = { most_recent = var.cluster_addons_most_recent } From bb110ba657fbf790569510e6569c4da75bfce314 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Mon, 18 Dec 2023 15:28:08 -0700 Subject: [PATCH 22/60] Use `set` statements instead of YAML for EFS CSI Helm chart values. --- efs-csi.tf | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/efs-csi.tf b/efs-csi.tf index 32c2c3e..cdf204f 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -104,28 +104,25 @@ resource "helm_release" "aws_efs_csi_driver" { repository = "https://kubernetes-sigs.github.io/aws-efs-csi-driver" version = var.efs_csi_driver_version - values = [ - yamlencode({ - "controller" = { - "serviceAccount" = { - "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-controller-role" - } - } - "tags" = var.tags - } - "image" = { - "repository" = "${var.csi_ecr_repository_id}.dkr.ecr.${local.aws_region}.amazonaws.com/eks/aws-efs-csi-driver" - } - "node" = { - "serviceAccount" = { - "annotations" = { - "eks.amazonaws.com/role-arn" = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-node-role" - } - } - } - }) - ] + set { + name = "controller.serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" + value = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-controller-role" + } + + set { + name = "controller.tags" + value = var.tags + } + + set { + name = "image.repository" + value = "${var.csi_ecr_repository_id}.dkr.ecr.${local.aws_region}.amazonaws.com/eks/aws-efs-csi-driver" + } + + set { + name = "node.serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" + value = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-node-role" + } depends_on = [ module.eks_efs_csi_controller_irsa[0], From 438d07311ff3c4f867cc4c1301399a75cdd4e808 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Mon, 18 Dec 2023 15:31:43 -0700 Subject: [PATCH 23/60] Revert 90f0bcfd72072cbd3f5a43832e5dda08e898ea1c. --- efs-csi.tf | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/efs-csi.tf b/efs-csi.tf index cdf204f..32c2c3e 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -104,25 +104,28 @@ resource "helm_release" "aws_efs_csi_driver" { repository = "https://kubernetes-sigs.github.io/aws-efs-csi-driver" version = var.efs_csi_driver_version - set { - name = "controller.serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" - value = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-controller-role" - } - - set { - name = "controller.tags" - value = var.tags - } - - set { - name = "image.repository" - value = "${var.csi_ecr_repository_id}.dkr.ecr.${local.aws_region}.amazonaws.com/eks/aws-efs-csi-driver" - } - - set { - name = "node.serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" - value = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-node-role" - } + values = [ + yamlencode({ + "controller" = { + "serviceAccount" = { + "annotations" = { + "eks.amazonaws.com/role-arn" = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-controller-role" + } + } + "tags" = var.tags + } + "image" = { + "repository" = "${var.csi_ecr_repository_id}.dkr.ecr.${local.aws_region}.amazonaws.com/eks/aws-efs-csi-driver" + } + "node" = { + "serviceAccount" = { + "annotations" = { + "eks.amazonaws.com/role-arn" = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-efs-csi-node-role" + } + } + } + }) + ] depends_on = [ module.eks_efs_csi_controller_irsa[0], From c374e211ff7fabd35611f84ea1766223dccb61f0 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Mon, 18 Dec 2023 15:33:16 -0700 Subject: [PATCH 24/60] Need to add a count to the EFS storage class resource. --- efs-csi.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/efs-csi.tf b/efs-csi.tf index 32c2c3e..de1dd03 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -134,6 +134,8 @@ resource "helm_release" "aws_efs_csi_driver" { } resource "kubernetes_storage_class" "eks_efs_storage_class" { + count = var.efs_csi_driver ? 1 : 0 + metadata { annotations = {} name = "efs-sc" From 3ba55e8ff45bfb506137632bab3ff89e783b81ea Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Mon, 18 Dec 2023 16:16:19 -0700 Subject: [PATCH 25/60] Add toggle variables to make EBS CSI Driver and Load Balancer Controller optional. --- ebs-csi.tf | 12 +++++++++--- lb.tf | 4 +++- variables.tf | 12 ++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/ebs-csi.tf b/ebs-csi.tf index f1d05c1..6d64faa 100644 --- a/ebs-csi.tf +++ b/ebs-csi.tf @@ -2,6 +2,7 @@ # Allow PVCs backed by EBS module "eks_ebs_csi_irsa" { + count = var.ebs_csi_driver ? 1 : 0 source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" version = "5.32.1" @@ -19,6 +20,7 @@ module "eks_ebs_csi_irsa" { } resource "helm_release" "aws_ebs_csi_driver" { + count = var.ebs_csi_driver ? 1 : 0 name = "aws-ebs-csi-driver" namespace = "kube-system" chart = "aws-ebs-csi-driver" @@ -42,13 +44,15 @@ resource "helm_release" "aws_ebs_csi_driver" { ] depends_on = [ - module.eks_ebs_csi_irsa, + module.eks_ebs_csi_irsa[0], module.eks, ] } # Make EBS CSI with gp3 default storage driver resource "kubernetes_storage_class" "eks_ebs_storage_class" { + count = var.ebs_csi_driver ? 1 : 0 + metadata { annotations = { "storageclass.kubernetes.io/is-default-class" = "true" @@ -63,12 +67,14 @@ resource "kubernetes_storage_class" "eks_ebs_storage_class" { volume_binding_mode = "WaitForFirstConsumer" depends_on = [ - helm_release.aws_ebs_csi_driver, + helm_release.aws_ebs_csi_driver[0], ] } # Don't want gp2 storageclass set as default. resource "kubernetes_annotations" "eks_disable_gp2" { + count = var.ebs_csi_driver ? 1 : 0 + api_version = "storage.k8s.io/v1" kind = "StorageClass" metadata { @@ -80,6 +86,6 @@ resource "kubernetes_annotations" "eks_disable_gp2" { force = true depends_on = [ - kubernetes_storage_class.eks_ebs_storage_class + kubernetes_storage_class.eks_ebs_storage_class[0] ] } diff --git a/lb.tf b/lb.tf index 8fdebe4..a62b1d3 100644 --- a/lb.tf +++ b/lb.tf @@ -2,6 +2,7 @@ # Authorize Amazon Load Balancer Controller module "eks_lb_irsa" { + count = var.lb_controller ? 1 : 0 source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" version = "5.32.1" @@ -19,6 +20,7 @@ module "eks_lb_irsa" { } resource "helm_release" "aws_lb_controller" { + count = var.lb_controller ? 1 : 0 name = "aws-load-balancer-controller" namespace = "kube-system" chart = "aws-load-balancer-controller" @@ -42,7 +44,7 @@ resource "helm_release" "aws_lb_controller" { ] depends_on = [ - module.eks_lb_irsa, + module.eks_lb_irsa[0], module.eks, ] } diff --git a/variables.tf b/variables.tf index 27f468c..840cc47 100644 --- a/variables.tf +++ b/variables.tf @@ -124,6 +124,12 @@ variable "default_max_size" { type = number } +variable "ebs_csi_driver" { + description = "Install and configure the EBS CSI storage driver." + type = bool + default = true +} + variable "ebs_csi_driver_version" { default = "2.25.0" description = "Version of the EFS CSI storage driver to install." @@ -208,6 +214,12 @@ variable "kubernetes_version" { type = string } +variable "lb_controller" { + description = "Install and configure the AWS Load Balancer controller." + type = bool + default = true +} + variable "lb_controller_version" { default = "1.6.1" description = "Version of the AWS Load Balancer Controller chart to install." From c6b9b56ca46b23f84878ebcce4d9ff415559d7bb Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Mon, 18 Dec 2023 16:46:06 -0700 Subject: [PATCH 26/60] Remove `repository_{username,password}` variables in attempt to eliminate resource churn. --- karpenter.tf | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index 7b4674d..d37abc0 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -1,12 +1,3 @@ -provider "aws" { - region = "us-east-1" - alias = "virginia" -} - -data "aws_ecrpublic_authorization_token" "current" { - provider = aws.virginia -} - module "karpenter" { count = var.karpenter ? 1 : 0 source = "terraform-aws-modules/eks/aws//modules/karpenter" @@ -22,12 +13,10 @@ resource "helm_release" "karpenter" { namespace = "karpenter" create_namespace = true - name = "karpenter" - repository = "oci://public.ecr.aws/karpenter" - repository_username = data.aws_ecrpublic_authorization_token.current.user_name - repository_password = data.aws_ecrpublic_authorization_token.current.password - chart = "karpenter" - version = "v${var.karpenter_version}" + name = "karpenter" + repository = "oci://public.ecr.aws/karpenter" + chart = "karpenter" + version = "v${var.karpenter_version}" set { name = "settings.aws.clusterName" From 273a7c1b79f25ab7b767abf4a2cba5a9f33ba05b Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 11:14:43 -0700 Subject: [PATCH 27/60] * Install `karpenter-crd` chart to ensure Karpenter's CRDs are managed as well. * Add `karpenter_values` variable to enable custom `karpenter` chart values (e.g., `controller.resources`). --- karpenter.tf | 61 ++++++++++++++++++++++++++++++---------------------- variables.tf | 10 +++++++-- 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index d37abc0..c31bb12 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -8,43 +8,52 @@ module "karpenter" { tags = var.tags } -resource "helm_release" "karpenter" { +resource "helm_release" "karpenter_crd" { count = var.karpenter ? 1 : 0 namespace = "karpenter" create_namespace = true - name = "karpenter" + name = "karpenter-crd" repository = "oci://public.ecr.aws/karpenter" - chart = "karpenter" + chart = "karpenter-crd" version = "v${var.karpenter_version}" - set { - name = "settings.aws.clusterName" - value = var.cluster_name - } - - set { - name = "settings.aws.clusterEndpoint" - value = module.eks.cluster_endpoint - } + depends_on = [ + module.eks, + module.karpenter[0], + ] +} - set { - name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" - value = module.karpenter[0].irsa_arn - } +resource "helm_release" "karpenter" { + count = var.karpenter ? 1 : 0 + namespace = "karpenter" + create_namespace = true - set { - name = "settings.aws.defaultInstanceProfile" - value = module.karpenter[0].instance_profile_name - } + name = "karpenter" + repository = "oci://public.ecr.aws/karpenter" + chart = "karpenter" + version = "v${var.karpenter_version}" - set { - name = "settings.aws.interruptionQueueName" - value = module.karpenter[0].queue_name - } + values = [ + yamlencode({ + "serviceAccount" = { + "annotations" = { + "eks.amazonaws.com/role-arn" = module.karpenter[0].irsa_arn + } + } + "settings" = { + "aws" = { + "clusterEndpoint" = module.eks.cluster_endpoint + "clusterName" = var.cluster_name + "defaultInstanceProfile" = module.karpenter[0].instance_profile_name + "interruptionQueueName" = module.karpenter[0].queue_name + } + } + }), + yamlencode(var.karpenter_values), + ] depends_on = [ - module.eks, - module.karpenter[0], + helm_release.karpenter_crd[0] ] } diff --git a/variables.tf b/variables.tf index 840cc47..36a0f43 100644 --- a/variables.tf +++ b/variables.tf @@ -155,13 +155,13 @@ variable "eks_managed_node_groups" { } variable "fargate_profiles" { - description = "Map of Fargate Profile definitions to create" + description = "Map of Fargate Profile definitions to create." type = map(any) default = {} } variable "fargate_profile_defaults" { - description = "Map of Fargate Profile default configurations" + description = "Map of Fargate Profile default configurations." type = map(any) default = {} } @@ -184,6 +184,12 @@ variable "karpenter" { default = false } +variable "karpenter_values" { + description = "Additional custom values to use when installing the Karpenter Helm chart." + type = map(any) + default = {} +} + variable "karpenter_version" { description = "Version of Karpenter Helm chart to install on the EKS cluster." type = string From ab0055b8a1bc23e222d7ee2de822572e643bb966 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 11:31:13 -0700 Subject: [PATCH 28/60] Switch to using the NVIDIA GPU Operator, via Helm chart, instead of just installing the device plugin. --- nvidia.tf | 19 ++++++++++++------- variables.tf | 10 +++++----- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/nvidia.tf b/nvidia.tf index 6fc2f47..4ebcc4b 100644 --- a/nvidia.tf +++ b/nvidia.tf @@ -1,10 +1,15 @@ -## Nvidia Device Plugin for GPU support -resource "null_resource" "eks_nvidia_device_plugin" { - count = var.nvidia_device_plugin ? 1 : 0 - provisioner "local-exec" { - command = "kubectl --context='${var.cluster_name}' apply --filename='https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v${var.nvidia_device_plugin_version}/nvidia-device-plugin.yml'" - } +## NVIDIA GPU Operator + +resource "helm_release" "nvidia_gpu_operator" { + count = var.nvidia_gpu_operator ? 1 : 0 + chart = "gpu-operator" + create_namespace = true + name = "gpu-operator" + namespace = "nvidia/gpu-operator" + repository = "https://helm.ngc.nvidia.com/nvidia" + version = "v${var.nvidia_gpu_operator_version}" + depends_on = [ - helm_release.aws_lb_controller, + module.eks, ] } diff --git a/variables.tf b/variables.tf index 36a0f43..78d2043 100644 --- a/variables.tf +++ b/variables.tf @@ -261,15 +261,15 @@ variable "node_security_group_tags" { default = {} } -variable "nvidia_device_plugin" { +variable "nvidia_gpu_operator" { default = false - description = "Whether to install the Nvidia device plugin driver" + description = "Whether to install the NVIDIA GPU Operator." type = bool } -variable "nvidia_device_plugin_version" { - default = "0.14.3" - description = "Version of the Nvidia device plugin to install." +variable "nvidia_gpu_operator_version" { + default = "23.9.1" + description = "Version of the NVIDIA GPU Operator Helm chart to install." type = string } From 70e353ea3617a8e931ac3bbf46077b6ffbdd3eea Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 11:36:03 -0700 Subject: [PATCH 29/60] Add custom values variables for EBS/EFS CSI, AWS Load Balancer, and `cert-manager` Helm charts. --- cert-manager.tf | 3 ++- ebs-csi.tf | 3 ++- efs-csi.tf | 3 ++- lb.tf | 3 ++- variables.tf | 24 ++++++++++++++++++++++++ 5 files changed, 32 insertions(+), 4 deletions(-) diff --git a/cert-manager.tf b/cert-manager.tf index e69e707..5238d53 100644 --- a/cert-manager.tf +++ b/cert-manager.tf @@ -82,7 +82,8 @@ resource "helm_release" "cert_manager" { "eks.amazonaws.com/role-arn" = "arn:${local.aws_partition}:iam::${local.aws_account_id}:role/${var.cluster_name}-cert-manager-role" } } - }) + }), + yamlencode(var.cert_manager_values), ] depends_on = [ diff --git a/ebs-csi.tf b/ebs-csi.tf index 6d64faa..b0a35f6 100644 --- a/ebs-csi.tf +++ b/ebs-csi.tf @@ -40,7 +40,8 @@ resource "helm_release" "aws_ebs_csi_driver" { "image" = { "repository" = "${var.csi_ecr_repository_id}.dkr.ecr.${local.aws_region}.amazonaws.com/eks/aws-ebs-csi-driver" } - }) + }), + yamlencode(var.ebs_csi_driver_values), ] depends_on = [ diff --git a/efs-csi.tf b/efs-csi.tf index de1dd03..840fa92 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -124,7 +124,8 @@ resource "helm_release" "aws_efs_csi_driver" { } } } - }) + }), + yamlencode(var.efs_csi_driver_values), ] depends_on = [ diff --git a/lb.tf b/lb.tf index a62b1d3..4b6b5df 100644 --- a/lb.tf +++ b/lb.tf @@ -40,7 +40,8 @@ resource "helm_release" "aws_lb_controller" { "name" = "aws-load-balancer-controller" } "vpcId" = var.vpc_id - }) + }), + yamlencode(var.lb_values), ] depends_on = [ diff --git a/variables.tf b/variables.tf index 78d2043..3c2cb6b 100644 --- a/variables.tf +++ b/variables.tf @@ -4,6 +4,12 @@ variable "aws_auth_roles" { default = [] } +variable "cert_manager_values" { + description = "Additional custom values for the cert-manager Helm chart." + type = map(any) + default = {} +} + variable "cert_manager_version" { default = "1.13.3" description = "Version of cert-manager to install." @@ -130,6 +136,12 @@ variable "ebs_csi_driver" { default = true } +variable "ebs_csi_driver_values" { + description = "Additional custom values for the EBS CSI Driver Helm chart." + type = map(any) + default = {} +} + variable "ebs_csi_driver_version" { default = "2.25.0" description = "Version of the EFS CSI storage driver to install." @@ -142,6 +154,12 @@ variable "efs_csi_driver" { default = true } +variable "efs_csi_driver_values" { + description = "Additional custom values for the EFS CSI Driver Helm chart." + type = map(any) + default = {} +} + variable "efs_csi_driver_version" { default = "2.5.2" description = "Version of the EFS CSI storage driver to install." @@ -226,6 +244,12 @@ variable "lb_controller" { default = true } +variable "lb_values" { + description = "Additional custom values for the AWS Load Balancer Controller Helm chart." + type = map(any) + default = {} +} + variable "lb_controller_version" { default = "1.6.1" description = "Version of the AWS Load Balancer Controller chart to install." From 44de663e76e9364816294c8345ad4e06d3a0062f Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 11:39:00 -0700 Subject: [PATCH 30/60] Upgrade Terraform AWS IAM module to 5.33.0. --- cert-manager.tf | 2 +- cni.tf | 2 +- ebs-csi.tf | 2 +- efs-csi.tf | 4 ++-- lb.tf | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cert-manager.tf b/cert-manager.tf index 5238d53..d3f1bff 100644 --- a/cert-manager.tf +++ b/cert-manager.tf @@ -6,7 +6,7 @@ locals { module "cert_manager_irsa" { count = local.cert_manager ? 1 : 0 source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" + version = "5.33.0" role_name = "${var.cluster_name}-cert-manager-role" diff --git a/cni.tf b/cni.tf index 44086ed..930ae87 100644 --- a/cni.tf +++ b/cni.tf @@ -1,7 +1,7 @@ # Authorize VPC CNI via IRSA. module "eks_vpc_cni_irsa" { source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" + version = "5.33.0" role_name = "${var.cluster_name}-vpc-cni-role" attach_vpc_cni_policy = true diff --git a/ebs-csi.tf b/ebs-csi.tf index b0a35f6..6cf13b5 100644 --- a/ebs-csi.tf +++ b/ebs-csi.tf @@ -4,7 +4,7 @@ module "eks_ebs_csi_irsa" { count = var.ebs_csi_driver ? 1 : 0 source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" + version = "5.33.0" role_name = "${var.cluster_name}-ebs-csi-role" attach_ebs_csi_policy = true diff --git a/efs-csi.tf b/efs-csi.tf index 840fa92..c2c7d88 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -20,7 +20,7 @@ resource "aws_security_group" "eks_efs_sg" { module "eks_efs_csi_controller_irsa" { count = var.efs_csi_driver ? 1 : 0 source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" + version = "5.33.0" role_name = "${var.cluster_name}-efs-csi-controller-role" attach_efs_csi_policy = true @@ -39,7 +39,7 @@ module "eks_efs_csi_controller_irsa" { module "eks_efs_csi_node_irsa" { count = var.efs_csi_driver ? 1 : 0 source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" + version = "5.33.0" role_name = "${var.cluster_name}-efs-csi-node-role" oidc_providers = { diff --git a/lb.tf b/lb.tf index 4b6b5df..72abb18 100644 --- a/lb.tf +++ b/lb.tf @@ -4,7 +4,7 @@ module "eks_lb_irsa" { count = var.lb_controller ? 1 : 0 source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "5.32.1" + version = "5.33.0" role_name = "${var.cluster_name}-lb-role" attach_load_balancer_controller_policy = true From 128fe57c9d47543ba9d6db0fd8163c8f98cb4f12 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 13:12:52 -0700 Subject: [PATCH 31/60] Don't assume `kube-system` is running on Fargate when profiles are provided. --- main.tf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/main.tf b/main.tf index 5abb632..19ac2b9 100644 --- a/main.tf +++ b/main.tf @@ -39,10 +39,8 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- cluster_addons = merge( { - # Configure CoreDNS to run on Fargate when profiles are provided. coredns = { - configuration_values = length(var.fargate_profiles) > 0 ? jsonencode({ computeType = "Fargate" }) : "" - most_recent = var.cluster_addons_most_recent + most_recent = var.cluster_addons_most_recent } eks-pod-identity-agent = { most_recent = var.cluster_addons_most_recent From 6f7279f7939cc690465b7cf351a70e1787772255 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 13:54:39 -0700 Subject: [PATCH 32/60] Turns out that ECR credentials are required for Karpenter artifacts. --- karpenter.tf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/karpenter.tf b/karpenter.tf index c31bb12..a351ef1 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -1,3 +1,13 @@ +# Required to access Karpenter artifacts in AWS public repositories. +provider "aws" { + region = "us-east-1" + alias = "virginia" +} + +data "aws_ecrpublic_authorization_token" "current" { + provider = aws.virginia +} + module "karpenter" { count = var.karpenter ? 1 : 0 source = "terraform-aws-modules/eks/aws//modules/karpenter" @@ -34,6 +44,11 @@ resource "helm_release" "karpenter" { chart = "karpenter" version = "v${var.karpenter_version}" + # XXX: Unfortunately, AWS ECR credentials leads to resource churn + # as the password will change. + repository_username = data.aws_ecrpublic_authorization_token.current.user_name + repository_password = data.aws_ecrpublic_authorization_token.current.password + values = [ yamlencode({ "serviceAccount" = { From 9098d4b27752bdd66ee64a1e409c5dfc4ee1b4a8 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 13:55:33 -0700 Subject: [PATCH 33/60] Ditto. --- karpenter.tf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/karpenter.tf b/karpenter.tf index a351ef1..8eefcb6 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -28,6 +28,11 @@ resource "helm_release" "karpenter_crd" { chart = "karpenter-crd" version = "v${var.karpenter_version}" + # XXX: Unfortunately, AWS ECR credentials leads to resource churn + # as the password will change. + repository_username = data.aws_ecrpublic_authorization_token.current.user_name + repository_password = data.aws_ecrpublic_authorization_token.current.password + depends_on = [ module.eks, module.karpenter[0], From 26d21cdd08a20c5ea9561d532a80a70d2a3dc346 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 14:33:51 -0700 Subject: [PATCH 34/60] Add Karpenter outputs. --- outputs.tf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/outputs.tf b/outputs.tf index 7d4a4ad..f3517ee 100644 --- a/outputs.tf +++ b/outputs.tf @@ -18,6 +18,16 @@ output "eks_managed_node_groups" { value = module.eks.eks_managed_node_groups } +output "karpenter_role_name" { + description = "The name of the Karpenter IAM role" + value = var.karpenter ? module.karpenter[0].role_name : null +} + +output "karpenter_role_arn" { + description = "The Amazon Resource Name (ARN) specifying the Karpenter IAM role" + value = var.karpenter ? module.karpenter[0].role_arn : null +} + output "kms_key_arn" { description = "The Amazon Resource Name (ARN) of the KMS key for the EKS cluster." value = var.kms_manage ? aws_kms_key.this[0].arn : module.eks.kms_key_arn From 7c3ebe1b0f9c3c38781b4edd74aa07cdc472ce8c Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 16:00:11 -0700 Subject: [PATCH 35/60] Add Karpenter instance profile outputs. --- outputs.tf | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/outputs.tf b/outputs.tf index f3517ee..ef761ec 100644 --- a/outputs.tf +++ b/outputs.tf @@ -18,14 +18,29 @@ output "eks_managed_node_groups" { value = module.eks.eks_managed_node_groups } +output "karpenter_role_arn" { + description = "The Amazon Resource Name (ARN) specifying the Karpenter IAM role" + value = var.karpenter ? module.karpenter[0].role_arn : null +} + output "karpenter_role_name" { description = "The name of the Karpenter IAM role" value = var.karpenter ? module.karpenter[0].role_name : null } -output "karpenter_role_arn" { - description = "The Amazon Resource Name (ARN) specifying the Karpenter IAM role" - value = var.karpenter ? module.karpenter[0].role_arn : null +output "karpenter_instance_profile_arn" { + description = "The Amazon Resource Name (ARN) specifying the Karpenter IAM instance profile" + value = var.karpenter ? module.karpenter[0].instance_profile_arn : null +} + +output "karpenter_instance_profile_id" { + description = "The name of the Karpenter IAM instance profile ID" + value = var.karpenter ? module.karpenter[0].instance_profile_id : null +} + +output "karpenter_instance_profile_name" { + description = "The name of the Karpenter IAM instance profile nome" + value = var.karpenter ? module.karpenter[0].instance_profile_name : null } output "kms_key_arn" { From 6de78e45f70dcd54e4d0223a209ea326caf4a431 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 16:07:07 -0700 Subject: [PATCH 36/60] Make it possible to disable CoreDNS from cluster addons. --- main.tf | 4 +++- variables.tf | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/main.tf b/main.tf index 19ac2b9..6d5f2cf 100644 --- a/main.tf +++ b/main.tf @@ -38,10 +38,12 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- cluster_version = var.kubernetes_version cluster_addons = merge( - { + var.cluster_addons_coredns ? { coredns = { most_recent = var.cluster_addons_most_recent } + } : {}, + { eks-pod-identity-agent = { most_recent = var.cluster_addons_most_recent } diff --git a/variables.tf b/variables.tf index 3c2cb6b..9d55cae 100644 --- a/variables.tf +++ b/variables.tf @@ -22,6 +22,12 @@ variable "cert_manager_route53_zone_id" { type = string } +variable "cluster_addons_coredns" { + description = "Indicates whether to install the CoreDNS cluster addon." + type = bool + default = true +} + variable "cluster_addons_most_recent" { description = "Indicates whether to use the most recent version of cluster addons" type = bool From fa093ee663858695f86a5a21cab45ba58e640494 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 17:22:35 -0700 Subject: [PATCH 37/60] Set `enable_karpenter_instance_profile_creation=true` by default. --- karpenter.tf | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index 8eefcb6..0553f02 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -13,20 +13,20 @@ module "karpenter" { source = "terraform-aws-modules/eks/aws//modules/karpenter" version = "19.21.0" - cluster_name = var.cluster_name - irsa_oidc_provider_arn = module.eks.oidc_provider_arn - tags = var.tags + cluster_name = var.cluster_name + enable_karpenter_instance_profile_creation = true + irsa_oidc_provider_arn = module.eks.oidc_provider_arn + tags = var.tags } resource "helm_release" "karpenter_crd" { count = var.karpenter ? 1 : 0 - namespace = "karpenter" create_namespace = true - - name = "karpenter-crd" - repository = "oci://public.ecr.aws/karpenter" - chart = "karpenter-crd" - version = "v${var.karpenter_version}" + name = "karpenter-crd" + namespace = "karpenter" + repository = "oci://public.ecr.aws/karpenter" + chart = "karpenter-crd" + version = "v${var.karpenter_version}" # XXX: Unfortunately, AWS ECR credentials leads to resource churn # as the password will change. @@ -40,11 +40,9 @@ resource "helm_release" "karpenter_crd" { } resource "helm_release" "karpenter" { - count = var.karpenter ? 1 : 0 - namespace = "karpenter" - create_namespace = true - + count = var.karpenter ? 1 : 0 name = "karpenter" + namespace = "karpenter" repository = "oci://public.ecr.aws/karpenter" chart = "karpenter" version = "v${var.karpenter_version}" From c00766cf429675f82052c2cdf25df047d889b811 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 17:35:25 -0700 Subject: [PATCH 38/60] Ignore changes in `repository_password` for Karpenter's `helm_release` resource. --- karpenter.tf | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index 0553f02..0f68c92 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -28,11 +28,13 @@ resource "helm_release" "karpenter_crd" { chart = "karpenter-crd" version = "v${var.karpenter_version}" - # XXX: Unfortunately, AWS ECR credentials leads to resource churn - # as the password will change. repository_username = data.aws_ecrpublic_authorization_token.current.user_name repository_password = data.aws_ecrpublic_authorization_token.current.password + lifecycle { + ignore_changes = [repository_password] + } + depends_on = [ module.eks, module.karpenter[0], @@ -47,8 +49,6 @@ resource "helm_release" "karpenter" { chart = "karpenter" version = "v${var.karpenter_version}" - # XXX: Unfortunately, AWS ECR credentials leads to resource churn - # as the password will change. repository_username = data.aws_ecrpublic_authorization_token.current.user_name repository_password = data.aws_ecrpublic_authorization_token.current.password @@ -71,6 +71,10 @@ resource "helm_release" "karpenter" { yamlencode(var.karpenter_values), ] + lifecycle { + ignore_changes = [repository_password] + } + depends_on = [ helm_release.karpenter_crd[0] ] From 93ed06ebcaa4dda2852db74074fb7da6fbeb9e74 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Tue, 19 Dec 2023 18:00:50 -0700 Subject: [PATCH 39/60] Enable Karpenter's webhooks by default. --- karpenter.tf | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index 0f68c92..da0fc1b 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -54,17 +54,20 @@ resource "helm_release" "karpenter" { values = [ yamlencode({ - "serviceAccount" = { - "annotations" = { + serviceAccount = { + annotations = { "eks.amazonaws.com/role-arn" = module.karpenter[0].irsa_arn } } - "settings" = { - "aws" = { - "clusterEndpoint" = module.eks.cluster_endpoint - "clusterName" = var.cluster_name - "defaultInstanceProfile" = module.karpenter[0].instance_profile_name - "interruptionQueueName" = module.karpenter[0].queue_name + settings = { + aws = { + clusterEndpoint = module.eks.cluster_endpoint + clusterName = var.cluster_name + defaultInstanceProfile = module.karpenter[0].instance_profile_name + interruptionQueueName = module.karpenter[0].queue_name + } + webhook = { + enabled = true } } }), From 696e3c7bba07c6a29591c941e3dc67cba299f0f9 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 09:37:39 -0700 Subject: [PATCH 40/60] Rename variable to `lb_controller_values` for consistency; upgrade AWS Load Balancer Controller version to 1.6.2. --- lb.tf | 2 +- variables.tf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lb.tf b/lb.tf index 72abb18..396c852 100644 --- a/lb.tf +++ b/lb.tf @@ -41,7 +41,7 @@ resource "helm_release" "aws_lb_controller" { } "vpcId" = var.vpc_id }), - yamlencode(var.lb_values), + yamlencode(var.lb_controller_values), ] depends_on = [ diff --git a/variables.tf b/variables.tf index 9d55cae..7abd0cd 100644 --- a/variables.tf +++ b/variables.tf @@ -250,14 +250,14 @@ variable "lb_controller" { default = true } -variable "lb_values" { +variable "lb_controller_values" { description = "Additional custom values for the AWS Load Balancer Controller Helm chart." type = map(any) default = {} } variable "lb_controller_version" { - default = "1.6.1" + default = "1.6.2" description = "Version of the AWS Load Balancer Controller chart to install." type = string } From 51e4ba1f31c9ec10ec723ee7400251ffda744f50 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 11:10:41 -0700 Subject: [PATCH 41/60] Upgrade Karpenter version to 0.33.1 and its Helm chart values accordingly. --- karpenter.tf | 2 -- variables.tf | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index da0fc1b..58cbffd 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -61,9 +61,7 @@ resource "helm_release" "karpenter" { } settings = { aws = { - clusterEndpoint = module.eks.cluster_endpoint clusterName = var.cluster_name - defaultInstanceProfile = module.karpenter[0].instance_profile_name interruptionQueueName = module.karpenter[0].queue_name } webhook = { diff --git a/variables.tf b/variables.tf index 7abd0cd..dbbe6b1 100644 --- a/variables.tf +++ b/variables.tf @@ -217,7 +217,7 @@ variable "karpenter_values" { variable "karpenter_version" { description = "Version of Karpenter Helm chart to install on the EKS cluster." type = string - default = "0.32.3" + default = "0.33.1" } variable "kms_manage" { From bf8b3803bd5da66273c17b73649090a73a55ff61 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 11:20:36 -0700 Subject: [PATCH 42/60] Attempt to omit AWS ECR credentials by using `docker logout public.ecr.aws`. --- karpenter.tf | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index 58cbffd..04625c2 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -1,13 +1,3 @@ -# Required to access Karpenter artifacts in AWS public repositories. -provider "aws" { - region = "us-east-1" - alias = "virginia" -} - -data "aws_ecrpublic_authorization_token" "current" { - provider = aws.virginia -} - module "karpenter" { count = var.karpenter ? 1 : 0 source = "terraform-aws-modules/eks/aws//modules/karpenter" @@ -28,9 +18,6 @@ resource "helm_release" "karpenter_crd" { chart = "karpenter-crd" version = "v${var.karpenter_version}" - repository_username = data.aws_ecrpublic_authorization_token.current.user_name - repository_password = data.aws_ecrpublic_authorization_token.current.password - lifecycle { ignore_changes = [repository_password] } @@ -49,9 +36,6 @@ resource "helm_release" "karpenter" { chart = "karpenter" version = "v${var.karpenter_version}" - repository_username = data.aws_ecrpublic_authorization_token.current.user_name - repository_password = data.aws_ecrpublic_authorization_token.current.password - values = [ yamlencode({ serviceAccount = { From e99cc19869cf141ffa20a2ed43076a9e230be890 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 11:41:43 -0700 Subject: [PATCH 43/60] Attempt Karpenter 0.32.4 instead. --- karpenter.tf | 2 ++ variables.tf | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/karpenter.tf b/karpenter.tf index 04625c2..a068883 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -45,7 +45,9 @@ resource "helm_release" "karpenter" { } settings = { aws = { + clusterEndpoint = module.eks.cluster_endpoint clusterName = var.cluster_name + defaultInstanceProfile = module.karpenter[0].instance_profile_name interruptionQueueName = module.karpenter[0].queue_name } webhook = { diff --git a/variables.tf b/variables.tf index dbbe6b1..8a9504d 100644 --- a/variables.tf +++ b/variables.tf @@ -217,7 +217,7 @@ variable "karpenter_values" { variable "karpenter_version" { description = "Version of Karpenter Helm chart to install on the EKS cluster." type = string - default = "0.33.1" + default = "0.32.4" } variable "kms_manage" { From 036046eb884105eaee771052d615147669dece7b Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 12:31:07 -0700 Subject: [PATCH 44/60] Support customizing whether to wait for all `helm_release` resources. --- cert-manager.tf | 1 + ebs-csi.tf | 1 + efs-csi.tf | 1 + karpenter.tf | 1 + lb.tf | 1 + nvidia.tf | 1 + variables.tf | 38 +++++++++++++++++++++++++++++++++++++- 7 files changed, 43 insertions(+), 1 deletion(-) diff --git a/cert-manager.tf b/cert-manager.tf index d3f1bff..d022515 100644 --- a/cert-manager.tf +++ b/cert-manager.tf @@ -64,6 +64,7 @@ resource "helm_release" "cert_manager" { chart = "cert-manager" repository = "https://charts.jetstack.io" version = "v${var.cert_manager_version}" + wait = var.cert_manager_wait keyring = "${path.module}/cert-manager-keyring.gpg" verify = var.helm_verify diff --git a/ebs-csi.tf b/ebs-csi.tf index 6cf13b5..36b4edb 100644 --- a/ebs-csi.tf +++ b/ebs-csi.tf @@ -26,6 +26,7 @@ resource "helm_release" "aws_ebs_csi_driver" { chart = "aws-ebs-csi-driver" repository = "https://kubernetes-sigs.github.io/aws-ebs-csi-driver" version = var.ebs_csi_driver_version + wait = var.ebs_csi_driver_wait values = [ yamlencode({ diff --git a/efs-csi.tf b/efs-csi.tf index c2c7d88..c5ced86 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -103,6 +103,7 @@ resource "helm_release" "aws_efs_csi_driver" { chart = "aws-efs-csi-driver" repository = "https://kubernetes-sigs.github.io/aws-efs-csi-driver" version = var.efs_csi_driver_version + wait = var.efs_csi_driver_wait values = [ yamlencode({ diff --git a/karpenter.tf b/karpenter.tf index a068883..4e03a5c 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -35,6 +35,7 @@ resource "helm_release" "karpenter" { repository = "oci://public.ecr.aws/karpenter" chart = "karpenter" version = "v${var.karpenter_version}" + wait = var.karpenter_wait values = [ yamlencode({ diff --git a/lb.tf b/lb.tf index 396c852..03a78e4 100644 --- a/lb.tf +++ b/lb.tf @@ -26,6 +26,7 @@ resource "helm_release" "aws_lb_controller" { chart = "aws-load-balancer-controller" repository = "https://aws.github.io/eks-charts" version = var.lb_controller_version + wait = var.lb_controller_wait values = [ yamlencode({ diff --git a/nvidia.tf b/nvidia.tf index 4ebcc4b..00b1ca0 100644 --- a/nvidia.tf +++ b/nvidia.tf @@ -7,6 +7,7 @@ resource "helm_release" "nvidia_gpu_operator" { name = "gpu-operator" namespace = "nvidia/gpu-operator" repository = "https://helm.ngc.nvidia.com/nvidia" + wait = var.nvidia_gpu_operator_wait version = "v${var.nvidia_gpu_operator_version}" depends_on = [ diff --git a/variables.tf b/variables.tf index 8a9504d..3642ecd 100644 --- a/variables.tf +++ b/variables.tf @@ -4,6 +4,12 @@ variable "aws_auth_roles" { default = [] } +variable "cert_manager_wait" { + description = "Wait for the cert-manager Helm chart installation to complete." + type = bool + default = true +} + variable "cert_manager_values" { description = "Additional custom values for the cert-manager Helm chart." type = map(any) @@ -142,6 +148,12 @@ variable "ebs_csi_driver" { default = true } +variable "ebs_csi_driver_wait" { + description = "Wait for the EBS CSI storage driver Helm chart install to complete." + type = bool + default = true +} + variable "ebs_csi_driver_values" { description = "Additional custom values for the EBS CSI Driver Helm chart." type = map(any) @@ -160,6 +172,12 @@ variable "efs_csi_driver" { default = true } +variable "efs_csi_driver_wait" { + description = "Wait for the EFS CSI storage driver Helm chart install to complete." + type = bool + default = true +} + variable "efs_csi_driver_values" { description = "Additional custom values for the EFS CSI Driver Helm chart." type = map(any) @@ -208,6 +226,12 @@ variable "karpenter" { default = false } +variable "karpenter_wait" { + description = "Wait for the Karpenter Helm chart installation to complete." + type = bool + default = true +} + variable "karpenter_values" { description = "Additional custom values to use when installing the Karpenter Helm chart." type = map(any) @@ -245,7 +269,13 @@ variable "kubernetes_version" { } variable "lb_controller" { - description = "Install and configure the AWS Load Balancer controller." + description = "Install and configure the AWS Load Balancer Controller." + type = bool + default = true +} + +variable "lb_controller_wait" { + description = "Wait for the AWS Load Balancer Controller Helm chart install to complete." type = bool default = true } @@ -297,6 +327,12 @@ variable "nvidia_gpu_operator" { type = bool } +variable "nvidia_gpu_operator_wait" { + description = "Wait for the NVIDIA GPU Operator Helm chart installation to complete." + type = bool + default = true +} + variable "nvidia_gpu_operator_version" { default = "23.9.1" description = "Version of the NVIDIA GPU Operator Helm chart to install." From 00c7d5c0aa686028bfa76d2e1a798a0ce2191e29 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 12:32:17 -0700 Subject: [PATCH 45/60] No longer need to custom lifecycle to workaround ECR repository password. --- karpenter.tf | 8 -------- 1 file changed, 8 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index 4e03a5c..67b1f2c 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -18,10 +18,6 @@ resource "helm_release" "karpenter_crd" { chart = "karpenter-crd" version = "v${var.karpenter_version}" - lifecycle { - ignore_changes = [repository_password] - } - depends_on = [ module.eks, module.karpenter[0], @@ -59,10 +55,6 @@ resource "helm_release" "karpenter" { yamlencode(var.karpenter_values), ] - lifecycle { - ignore_changes = [repository_password] - } - depends_on = [ helm_release.karpenter_crd[0] ] From 179da5fce4c79105dd740fece0f6038efe47b850 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 13:57:11 -0700 Subject: [PATCH 46/60] Attempt Karpenter 0.33.1 and update chart values; add Karpenter's IRSA ARN/name to output instead of the instance profile. --- karpenter.tf | 15 ++++++--------- outputs.tf | 25 ++++++++++--------------- variables.tf | 2 +- 3 files changed, 17 insertions(+), 25 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index 67b1f2c..50d1d01 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -41,15 +41,12 @@ resource "helm_release" "karpenter" { } } settings = { - aws = { - clusterEndpoint = module.eks.cluster_endpoint - clusterName = var.cluster_name - defaultInstanceProfile = module.karpenter[0].instance_profile_name - interruptionQueueName = module.karpenter[0].queue_name - } - webhook = { - enabled = true - } + clusterEndpoint = module.eks.cluster_endpoint + clusterName = var.cluster_name + interruptionQueue = module.karpenter[0].queue_name + } + webhook = { + enabled = true } }), yamlencode(var.karpenter_values), diff --git a/outputs.tf b/outputs.tf index ef761ec..b2fbd74 100644 --- a/outputs.tf +++ b/outputs.tf @@ -18,6 +18,16 @@ output "eks_managed_node_groups" { value = module.eks.eks_managed_node_groups } +output "karpenter_irsa_arn" { + description = "The Amazon Resource Name (ARN) for the Karpenter IRSA role" + value = var.karpenter ? module.karpenter[0].irsa_arn : null +} + +output "karpenter_irsa_name" { + description = "The name of the Karpenter IRSA role" + value = var.karpenter ? module.karpenter[0].irsa_name : null +} + output "karpenter_role_arn" { description = "The Amazon Resource Name (ARN) specifying the Karpenter IAM role" value = var.karpenter ? module.karpenter[0].role_arn : null @@ -28,21 +38,6 @@ output "karpenter_role_name" { value = var.karpenter ? module.karpenter[0].role_name : null } -output "karpenter_instance_profile_arn" { - description = "The Amazon Resource Name (ARN) specifying the Karpenter IAM instance profile" - value = var.karpenter ? module.karpenter[0].instance_profile_arn : null -} - -output "karpenter_instance_profile_id" { - description = "The name of the Karpenter IAM instance profile ID" - value = var.karpenter ? module.karpenter[0].instance_profile_id : null -} - -output "karpenter_instance_profile_name" { - description = "The name of the Karpenter IAM instance profile nome" - value = var.karpenter ? module.karpenter[0].instance_profile_name : null -} - output "kms_key_arn" { description = "The Amazon Resource Name (ARN) of the KMS key for the EKS cluster." value = var.kms_manage ? aws_kms_key.this[0].arn : module.eks.kms_key_arn diff --git a/variables.tf b/variables.tf index 3642ecd..5a4c7b8 100644 --- a/variables.tf +++ b/variables.tf @@ -241,7 +241,7 @@ variable "karpenter_values" { variable "karpenter_version" { description = "Version of Karpenter Helm chart to install on the EKS cluster." type = string - default = "0.32.4" + default = "0.33.1" } variable "kms_manage" { From 471efa6990fbd8a0cf454b89329807fad74d0c9d Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 14:24:33 -0700 Subject: [PATCH 47/60] Use pre-release branch of `aws-eks` module to support Karpenter 0.32+. --- karpenter.tf | 19 ++++++++++--------- outputs.tf | 12 ++++++------ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index 50d1d01..1b7dbb5 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -1,12 +1,13 @@ module "karpenter" { - count = var.karpenter ? 1 : 0 - source = "terraform-aws-modules/eks/aws//modules/karpenter" - version = "19.21.0" - - cluster_name = var.cluster_name - enable_karpenter_instance_profile_creation = true - irsa_oidc_provider_arn = module.eks.oidc_provider_arn - tags = var.tags + count = var.karpenter ? 1 : 0 + # XXX: Switch source back to module once upgrade is released, refs + # terraform-aws-modules/terraform-aws-eks#2858 + source = "github.com/radiant-maxar/terraform-aws-eks//karpenter?ref=karpenter-update" + # source = "terraform-aws-modules/eks/aws//modules/karpenter" + # version = "20.x.x" + cluster_name = var.cluster_name + irsa_oidc_provider_arn = module.eks.oidc_provider_arn + tags = var.tags } resource "helm_release" "karpenter_crd" { @@ -37,7 +38,7 @@ resource "helm_release" "karpenter" { yamlencode({ serviceAccount = { annotations = { - "eks.amazonaws.com/role-arn" = module.karpenter[0].irsa_arn + "eks.amazonaws.com/role-arn" = module.karpenter[0].pod_identity_role_arn } } settings = { diff --git a/outputs.tf b/outputs.tf index b2fbd74..da84e15 100644 --- a/outputs.tf +++ b/outputs.tf @@ -18,14 +18,14 @@ output "eks_managed_node_groups" { value = module.eks.eks_managed_node_groups } -output "karpenter_irsa_arn" { - description = "The Amazon Resource Name (ARN) for the Karpenter IRSA role" - value = var.karpenter ? module.karpenter[0].irsa_arn : null +output "karpenter_pod_identity_role_arn" { + description = "The Amazon Resource Name (ARN) specifying the Pod Identity IAM role" + value = var.karpenter ? module.karpenter[0].pod_identity_role_arn : null } -output "karpenter_irsa_name" { - description = "The name of the Karpenter IRSA role" - value = var.karpenter ? module.karpenter[0].irsa_name : null +output "karpenter_pod_identity_role_name" { + description = "The name of the Pod Identity IAM role" + value = var.karpenter ? module.karpenter[0].pod_identity_role_name : null } output "karpenter_role_arn" { From 0ccc8533f416af203deac45d1ada323cc500035b Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 14:26:02 -0700 Subject: [PATCH 48/60] Fix formatting. --- karpenter.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karpenter.tf b/karpenter.tf index 1b7dbb5..b056435 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -1,5 +1,5 @@ module "karpenter" { - count = var.karpenter ? 1 : 0 + count = var.karpenter ? 1 : 0 # XXX: Switch source back to module once upgrade is released, refs # terraform-aws-modules/terraform-aws-eks#2858 source = "github.com/radiant-maxar/terraform-aws-eks//karpenter?ref=karpenter-update" From 802998266bfacbdefff760009e872438f12db48b Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 14:33:48 -0700 Subject: [PATCH 49/60] Fix Karpenter GitHub module source. --- karpenter.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index b056435..3c9ff22 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -1,8 +1,8 @@ module "karpenter" { count = var.karpenter ? 1 : 0 - # XXX: Switch source back to module once upgrade is released, refs + # XXX: Switch source back to module once v20 is released, refs # terraform-aws-modules/terraform-aws-eks#2858 - source = "github.com/radiant-maxar/terraform-aws-eks//karpenter?ref=karpenter-update" + source = "github.com/radiant-maxar/terraform-aws-eks//modules/karpenter?ref=v20-prerelease" # source = "terraform-aws-modules/eks/aws//modules/karpenter" # version = "20.x.x" cluster_name = var.cluster_name From 09b74d01214ab46dca9ff893657b89a1699caabc Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 15:10:37 -0700 Subject: [PATCH 50/60] Karpenter webhook has been deprecated, don't re-enable. --- karpenter.tf | 3 --- 1 file changed, 3 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index 3c9ff22..fae092d 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -46,9 +46,6 @@ resource "helm_release" "karpenter" { clusterName = var.cluster_name interruptionQueue = module.karpenter[0].queue_name } - webhook = { - enabled = true - } }), yamlencode(var.karpenter_values), ] From 5cf46e2953c21fab110e9b8b328915c5433f6f00 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 15:32:19 -0700 Subject: [PATCH 51/60] Allow customizing the Karpenter namespace. --- karpenter.tf | 11 ++++++----- variables.tf | 14 ++++++++++---- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index fae092d..c8758f7 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -5,16 +5,17 @@ module "karpenter" { source = "github.com/radiant-maxar/terraform-aws-eks//modules/karpenter?ref=v20-prerelease" # source = "terraform-aws-modules/eks/aws//modules/karpenter" # version = "20.x.x" - cluster_name = var.cluster_name - irsa_oidc_provider_arn = module.eks.oidc_provider_arn - tags = var.tags + cluster_name = var.cluster_name + irsa_namespace_service_accounts = ["${var.karpenter_namespace}:karpenter"] + irsa_oidc_provider_arn = module.eks.oidc_provider_arn + tags = var.tags } resource "helm_release" "karpenter_crd" { count = var.karpenter ? 1 : 0 create_namespace = true name = "karpenter-crd" - namespace = "karpenter" + namespace = var.karpenter_namespace repository = "oci://public.ecr.aws/karpenter" chart = "karpenter-crd" version = "v${var.karpenter_version}" @@ -28,7 +29,7 @@ resource "helm_release" "karpenter_crd" { resource "helm_release" "karpenter" { count = var.karpenter ? 1 : 0 name = "karpenter" - namespace = "karpenter" + namespace = var.karpenter_namespace repository = "oci://public.ecr.aws/karpenter" chart = "karpenter" version = "v${var.karpenter_version}" diff --git a/variables.tf b/variables.tf index 5a4c7b8..7c3c451 100644 --- a/variables.tf +++ b/variables.tf @@ -226,10 +226,10 @@ variable "karpenter" { default = false } -variable "karpenter_wait" { - description = "Wait for the Karpenter Helm chart installation to complete." - type = bool - default = true +variable "karpenter_namespace" { + default = "karpenter" + description = "Namespace that Karpenter will use." + type = string } variable "karpenter_values" { @@ -238,6 +238,12 @@ variable "karpenter_values" { default = {} } +variable "karpenter_wait" { + description = "Wait for the Karpenter Helm chart installation to complete." + type = bool + default = true +} + variable "karpenter_version" { description = "Version of Karpenter Helm chart to install on the EKS cluster." type = string From 03aa298394391665a7726e6adc5c1330e28fe5a8 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 17:41:37 -0700 Subject: [PATCH 52/60] Add variables to support customizing the namespaces of all `helm_release` resources. --- cert-manager.tf | 4 +-- ebs-csi.tf | 4 +-- efs-csi.tf | 6 ++--- lb.tf | 4 +-- nvidia.tf | 2 +- variables.tf | 70 +++++++++++++++++++++++++++++++++++-------------- 6 files changed, 60 insertions(+), 30 deletions(-) diff --git a/cert-manager.tf b/cert-manager.tf index d022515..e34f3a6 100644 --- a/cert-manager.tf +++ b/cert-manager.tf @@ -14,7 +14,7 @@ module "cert_manager_irsa" { main = { provider_arn = module.eks.oidc_provider_arn namespace_service_accounts = [ - "cert-manager:cert-manager", + "${var.cert_manager_namespace}:cert-manager", ] } } @@ -59,7 +59,7 @@ resource "aws_iam_role_policy_attachment" "cert_manager" { resource "helm_release" "cert_manager" { count = local.cert_manager ? 1 : 0 name = "cert-manager" - namespace = "cert-manager" + namespace = var.cert_manager_namespace create_namespace = true chart = "cert-manager" repository = "https://charts.jetstack.io" diff --git a/ebs-csi.tf b/ebs-csi.tf index 36b4edb..f149f35 100644 --- a/ebs-csi.tf +++ b/ebs-csi.tf @@ -12,7 +12,7 @@ module "eks_ebs_csi_irsa" { oidc_providers = { main = { provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"] + namespace_service_accounts = ["${var.ebs_csi_driver_namespace}:ebs-csi-controller-sa"] } } @@ -22,7 +22,7 @@ module "eks_ebs_csi_irsa" { resource "helm_release" "aws_ebs_csi_driver" { count = var.ebs_csi_driver ? 1 : 0 name = "aws-ebs-csi-driver" - namespace = "kube-system" + namespace = var.ebs_csi_driver_namespace chart = "aws-ebs-csi-driver" repository = "https://kubernetes-sigs.github.io/aws-ebs-csi-driver" version = var.ebs_csi_driver_version diff --git a/efs-csi.tf b/efs-csi.tf index c5ced86..8244c05 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -29,7 +29,7 @@ module "eks_efs_csi_controller_irsa" { main = { provider_arn = module.eks.oidc_provider_arn namespace_service_accounts = [ - "kube-system:efs-csi-controller-sa", + "${var.efs_csi_driver_namespace}:efs-csi-controller-sa", ] } } @@ -46,7 +46,7 @@ module "eks_efs_csi_node_irsa" { main = { provider_arn = module.eks.oidc_provider_arn namespace_service_accounts = [ - "kube-system:efs-csi-node-sa", + "${var.efs_csi_driver_namespace}:efs-csi-node-sa", ] } } @@ -99,7 +99,7 @@ resource "aws_efs_mount_target" "eks_efs_private" { resource "helm_release" "aws_efs_csi_driver" { count = var.efs_csi_driver ? 1 : 0 name = "aws-efs-csi-driver" - namespace = "kube-system" + namespace = var.efs_csi_driver_namespace chart = "aws-efs-csi-driver" repository = "https://kubernetes-sigs.github.io/aws-efs-csi-driver" version = var.efs_csi_driver_version diff --git a/lb.tf b/lb.tf index 03a78e4..fb03650 100644 --- a/lb.tf +++ b/lb.tf @@ -12,7 +12,7 @@ module "eks_lb_irsa" { oidc_providers = { main = { provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:aws-load-balancer-controller"] + namespace_service_accounts = ["${var.lb_controller_namespace}:aws-load-balancer-controller"] } } @@ -22,7 +22,7 @@ module "eks_lb_irsa" { resource "helm_release" "aws_lb_controller" { count = var.lb_controller ? 1 : 0 name = "aws-load-balancer-controller" - namespace = "kube-system" + namespace = var.lb_controller_namespace chart = "aws-load-balancer-controller" repository = "https://aws.github.io/eks-charts" version = var.lb_controller_version diff --git a/nvidia.tf b/nvidia.tf index 00b1ca0..cbf2c31 100644 --- a/nvidia.tf +++ b/nvidia.tf @@ -5,7 +5,7 @@ resource "helm_release" "nvidia_gpu_operator" { chart = "gpu-operator" create_namespace = true name = "gpu-operator" - namespace = "nvidia/gpu-operator" + namespace = var.nvidia_gpu_operator_namespace repository = "https://helm.ngc.nvidia.com/nvidia" wait = var.nvidia_gpu_operator_wait version = "v${var.nvidia_gpu_operator_version}" diff --git a/variables.tf b/variables.tf index 7c3c451..8912dc0 100644 --- a/variables.tf +++ b/variables.tf @@ -4,10 +4,10 @@ variable "aws_auth_roles" { default = [] } -variable "cert_manager_wait" { - description = "Wait for the cert-manager Helm chart installation to complete." - type = bool - default = true +variable "cert_manager_namespace" { + default = "cert-manager" + description = "Namespace that cert-manager will use." + type = string } variable "cert_manager_values" { @@ -16,6 +16,12 @@ variable "cert_manager_values" { default = {} } +variable "cert_manager_wait" { + description = "Wait for the cert-manager Helm chart installation to complete." + type = bool + default = true +} + variable "cert_manager_version" { default = "1.13.3" description = "Version of cert-manager to install." @@ -148,10 +154,10 @@ variable "ebs_csi_driver" { default = true } -variable "ebs_csi_driver_wait" { - description = "Wait for the EBS CSI storage driver Helm chart install to complete." - type = bool - default = true +variable "ebs_csi_driver_namespace" { + default = "kube-system" + description = "Namespace that EBS CSI storage driver will use." + type = string } variable "ebs_csi_driver_values" { @@ -160,6 +166,12 @@ variable "ebs_csi_driver_values" { default = {} } +variable "ebs_csi_driver_wait" { + description = "Wait for the EBS CSI storage driver Helm chart install to complete." + type = bool + default = true +} + variable "ebs_csi_driver_version" { default = "2.25.0" description = "Version of the EFS CSI storage driver to install." @@ -172,10 +184,10 @@ variable "efs_csi_driver" { default = true } -variable "efs_csi_driver_wait" { - description = "Wait for the EFS CSI storage driver Helm chart install to complete." - type = bool - default = true +variable "efs_csi_driver_namespace" { + default = "kube-system" + description = "Namespace that EFS CSI storage driver will use." + type = string } variable "efs_csi_driver_values" { @@ -190,6 +202,12 @@ variable "efs_csi_driver_version" { type = string } +variable "efs_csi_driver_wait" { + description = "Wait for the EFS CSI storage driver Helm chart install to complete." + type = bool + default = true +} + variable "eks_managed_node_groups" { description = "Map of managed node groups for the EKS cluster." type = map(any) @@ -280,10 +298,10 @@ variable "lb_controller" { default = true } -variable "lb_controller_wait" { - description = "Wait for the AWS Load Balancer Controller Helm chart install to complete." - type = bool - default = true +variable "lb_controller_namespace" { + default = "kube-system" + description = "Namespace that AWS Load Balancer Controller will use." + type = string } variable "lb_controller_values" { @@ -298,6 +316,12 @@ variable "lb_controller_version" { type = string } +variable "lb_controller_wait" { + description = "Wait for the AWS Load Balancer Controller Helm chart install to complete." + type = bool + default = true +} + variable "node_security_group_additional_rules" { default = { ingress_self_all = { @@ -333,10 +357,10 @@ variable "nvidia_gpu_operator" { type = bool } -variable "nvidia_gpu_operator_wait" { - description = "Wait for the NVIDIA GPU Operator Helm chart installation to complete." - type = bool - default = true +variable "nvidia_gpu_operator_namespace" { + default = "nvidia/gpu-operator" + description = "Namespace that NVIDIA GPU Operator will use." + type = string } variable "nvidia_gpu_operator_version" { @@ -345,6 +369,12 @@ variable "nvidia_gpu_operator_version" { type = string } +variable "nvidia_gpu_operator_wait" { + description = "Wait for the NVIDIA GPU Operator Helm chart installation to complete." + type = bool + default = true +} + variable "private_subnets" { description = "IDs of the private subnets in the EKS cluster VPC." type = list(any) From 7e9e463b3717604697e2e14479c1a1b53ff7c164 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Wed, 20 Dec 2023 18:03:42 -0700 Subject: [PATCH 53/60] Use the Pod Identity Role ARN. --- main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.tf b/main.tf index 6d5f2cf..8354862 100644 --- a/main.tf +++ b/main.tf @@ -8,7 +8,7 @@ locals { aws_region = data.aws_region.current.name aws_auth_karpenter_roles = var.karpenter ? [ { - rolearn = module.karpenter[0].role_arn + rolearn = module.karpenter[0].pod_identity_role_arn username = "system:node:{{EC2PrivateDNSName}}" groups = [ "system:bootstrappers", From 662858435a381c37f6ff44f64b77f6ec0712793a Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 21 Dec 2023 11:29:32 -0700 Subject: [PATCH 54/60] More Karpenter fixes to support it using the pod-based identity. --- karpenter.tf | 7 +------ main.tf | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/karpenter.tf b/karpenter.tf index c8758f7..3d511dd 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -2,7 +2,7 @@ module "karpenter" { count = var.karpenter ? 1 : 0 # XXX: Switch source back to module once v20 is released, refs # terraform-aws-modules/terraform-aws-eks#2858 - source = "github.com/radiant-maxar/terraform-aws-eks//modules/karpenter?ref=v20-prerelease" + source = "github.com/radiant-maxar/terraform-aws-eks//modules/karpenter?ref=v20.0.0-alpha" # source = "terraform-aws-modules/eks/aws//modules/karpenter" # version = "20.x.x" cluster_name = var.cluster_name @@ -37,11 +37,6 @@ resource "helm_release" "karpenter" { values = [ yamlencode({ - serviceAccount = { - annotations = { - "eks.amazonaws.com/role-arn" = module.karpenter[0].pod_identity_role_arn - } - } settings = { clusterEndpoint = module.eks.cluster_endpoint clusterName = var.cluster_name diff --git a/main.tf b/main.tf index 8354862..6d5f2cf 100644 --- a/main.tf +++ b/main.tf @@ -8,7 +8,7 @@ locals { aws_region = data.aws_region.current.name aws_auth_karpenter_roles = var.karpenter ? [ { - rolearn = module.karpenter[0].pod_identity_role_arn + rolearn = module.karpenter[0].role_arn username = "system:node:{{EC2PrivateDNSName}}" groups = [ "system:bootstrappers", From dfecc340a3a4b23e1cd55b317a801180ca75bd39 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 21 Dec 2023 11:36:37 -0700 Subject: [PATCH 55/60] Bring back Karpenter IRSA annotation. --- karpenter.tf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/karpenter.tf b/karpenter.tf index 3d511dd..8268b98 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -1,7 +1,7 @@ module "karpenter" { - count = var.karpenter ? 1 : 0 # XXX: Switch source back to module once v20 is released, refs # terraform-aws-modules/terraform-aws-eks#2858 + count = var.karpenter ? 1 : 0 source = "github.com/radiant-maxar/terraform-aws-eks//modules/karpenter?ref=v20.0.0-alpha" # source = "terraform-aws-modules/eks/aws//modules/karpenter" # version = "20.x.x" @@ -37,6 +37,11 @@ resource "helm_release" "karpenter" { values = [ yamlencode({ + serviceAccount = { + annotations = { + "eks.amazonaws.com/role-arn" = module.karpenter[0].pod_identity_role_arn + } + } settings = { clusterEndpoint = module.eks.cluster_endpoint clusterName = var.cluster_name From ad829d4b5b63948042a6dd2a3e737c0e3d9f28a1 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 21 Dec 2023 12:14:41 -0700 Subject: [PATCH 56/60] Only have Helm create new namespaces when it's not `kube-system`. --- cert-manager.tf | 2 +- ebs-csi.tf | 15 ++++++++------- efs-csi.tf | 16 +++++++++------- karpenter.tf | 2 +- lb.tf | 15 ++++++++------- nvidia.tf | 2 +- 6 files changed, 28 insertions(+), 24 deletions(-) diff --git a/cert-manager.tf b/cert-manager.tf index e34f3a6..580fff9 100644 --- a/cert-manager.tf +++ b/cert-manager.tf @@ -60,7 +60,7 @@ resource "helm_release" "cert_manager" { count = local.cert_manager ? 1 : 0 name = "cert-manager" namespace = var.cert_manager_namespace - create_namespace = true + create_namespace = var.cert_manager_namespace == "kube-system" ? false : true chart = "cert-manager" repository = "https://charts.jetstack.io" version = "v${var.cert_manager_version}" diff --git a/ebs-csi.tf b/ebs-csi.tf index f149f35..6039ef4 100644 --- a/ebs-csi.tf +++ b/ebs-csi.tf @@ -20,13 +20,14 @@ module "eks_ebs_csi_irsa" { } resource "helm_release" "aws_ebs_csi_driver" { - count = var.ebs_csi_driver ? 1 : 0 - name = "aws-ebs-csi-driver" - namespace = var.ebs_csi_driver_namespace - chart = "aws-ebs-csi-driver" - repository = "https://kubernetes-sigs.github.io/aws-ebs-csi-driver" - version = var.ebs_csi_driver_version - wait = var.ebs_csi_driver_wait + count = var.ebs_csi_driver ? 1 : 0 + chart = "aws-ebs-csi-driver" + create_namespace = var.ebs_csi_driver_namespace == "kube-system" ? false : true + name = "aws-ebs-csi-driver" + namespace = var.ebs_csi_driver_namespace + repository = "https://kubernetes-sigs.github.io/aws-ebs-csi-driver" + version = var.ebs_csi_driver_version + wait = var.ebs_csi_driver_wait values = [ yamlencode({ diff --git a/efs-csi.tf b/efs-csi.tf index 8244c05..8e18f3b 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -97,13 +97,15 @@ resource "aws_efs_mount_target" "eks_efs_private" { } resource "helm_release" "aws_efs_csi_driver" { - count = var.efs_csi_driver ? 1 : 0 - name = "aws-efs-csi-driver" - namespace = var.efs_csi_driver_namespace - chart = "aws-efs-csi-driver" - repository = "https://kubernetes-sigs.github.io/aws-efs-csi-driver" - version = var.efs_csi_driver_version - wait = var.efs_csi_driver_wait + count = var.efs_csi_driver ? 1 : 0 + chart = "aws-efs-csi-driver" + create_namespace = var.efs_csi_driver_namespace == "kube-system" ? false : true + name = "aws-efs-csi-driver" + namespace = var.efs_csi_driver_namespace + repository = "https://kubernetes-sigs.github.io/aws-efs-csi-driver" + version = var.efs_csi_driver_version + wait = var.efs_csi_driver_wait + values = [ yamlencode({ diff --git a/karpenter.tf b/karpenter.tf index 8268b98..57b68a9 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -13,7 +13,7 @@ module "karpenter" { resource "helm_release" "karpenter_crd" { count = var.karpenter ? 1 : 0 - create_namespace = true + create_namespace = var.karpenter_namespace == "kube-system" ? false : true name = "karpenter-crd" namespace = var.karpenter_namespace repository = "oci://public.ecr.aws/karpenter" diff --git a/lb.tf b/lb.tf index fb03650..c74adcf 100644 --- a/lb.tf +++ b/lb.tf @@ -20,13 +20,14 @@ module "eks_lb_irsa" { } resource "helm_release" "aws_lb_controller" { - count = var.lb_controller ? 1 : 0 - name = "aws-load-balancer-controller" - namespace = var.lb_controller_namespace - chart = "aws-load-balancer-controller" - repository = "https://aws.github.io/eks-charts" - version = var.lb_controller_version - wait = var.lb_controller_wait + count = var.lb_controller ? 1 : 0 + chart = "aws-load-balancer-controller" + create_namespace = var.lb_controller_namespace == "kube-system" ? false : true + name = "aws-load-balancer-controller" + namespace = var.lb_controller_namespace + repository = "https://aws.github.io/eks-charts" + version = var.lb_controller_version + wait = var.lb_controller_wait values = [ yamlencode({ diff --git a/nvidia.tf b/nvidia.tf index cbf2c31..a4c5db7 100644 --- a/nvidia.tf +++ b/nvidia.tf @@ -3,7 +3,7 @@ resource "helm_release" "nvidia_gpu_operator" { count = var.nvidia_gpu_operator ? 1 : 0 chart = "gpu-operator" - create_namespace = true + create_namespace = var.nvidia_gpu_operator_namespace == "kube-system" ? false : true name = "gpu-operator" namespace = var.nvidia_gpu_operator_namespace repository = "https://helm.ngc.nvidia.com/nvidia" From 442302e9a3ad5a141a5b685c7fee9ffa21301667 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 21 Dec 2023 12:44:52 -0700 Subject: [PATCH 57/60] Have `karpenter` module use the `iam_role_attach_cni_policy` variable. --- karpenter.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/karpenter.tf b/karpenter.tf index 57b68a9..2a46b8e 100644 --- a/karpenter.tf +++ b/karpenter.tf @@ -1,11 +1,12 @@ module "karpenter" { - # XXX: Switch source back to module once v20 is released, refs + # XXX: Switch source back to module once there is an official v20 release, refs # terraform-aws-modules/terraform-aws-eks#2858 count = var.karpenter ? 1 : 0 source = "github.com/radiant-maxar/terraform-aws-eks//modules/karpenter?ref=v20.0.0-alpha" # source = "terraform-aws-modules/eks/aws//modules/karpenter" # version = "20.x.x" cluster_name = var.cluster_name + iam_role_attach_cni_policy = var.iam_role_attach_cni_policy irsa_namespace_service_accounts = ["${var.karpenter_namespace}:karpenter"] irsa_oidc_provider_arn = module.eks.oidc_provider_arn tags = var.tags From 68de63f76aec391f5b2333845310e7f029e58ba9 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 21 Dec 2023 13:10:43 -0700 Subject: [PATCH 58/60] Use valid namespace name for NVIDIA GPU Operator. --- variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variables.tf b/variables.tf index 8912dc0..d8b077e 100644 --- a/variables.tf +++ b/variables.tf @@ -358,7 +358,7 @@ variable "nvidia_gpu_operator" { } variable "nvidia_gpu_operator_namespace" { - default = "nvidia/gpu-operator" + default = "nvidia-gpu-operator" description = "Namespace that NVIDIA GPU Operator will use." type = string } From 7877e1671da08cea491d08c856423fc45e4fd798 Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 21 Dec 2023 14:52:57 -0700 Subject: [PATCH 59/60] Inline EFS CSI driver policy again and remove separate security group for TCP/2049. --- efs-csi.tf | 108 ++++++++++++++++++++++++++++++++++++++++------------- main.tf | 1 - outputs.tf | 5 +++ 3 files changed, 87 insertions(+), 27 deletions(-) diff --git a/efs-csi.tf b/efs-csi.tf index 8e18f3b..57f0bcf 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -1,19 +1,12 @@ ## EFS CSI Storage Driver -resource "aws_security_group" "eks_efs_sg" { - count = var.efs_csi_driver ? 1 : 0 - name = "${var.cluster_name}-efs-sg" - description = "Security group for EFS clients in EKS VPC" - vpc_id = var.vpc_id - - ingress { - description = "Ingress NFS traffic for EFS" - from_port = 2049 - to_port = 2049 - protocol = "tcp" - cidr_blocks = [var.vpc_cidr] - } - tags = var.tags +locals { + efs_arns = [ + "arn:${local.aws_partition}:elasticfilesystem:${local.aws_region}:${local.aws_account_id}:file-system/*" + ] + efs_access_point_arns = [ + "arn:${local.aws_partition}:elasticfilesystem:${local.aws_region}:${local.aws_account_id}:access-point/*" + ] } # Allow PVCs backed by EFS @@ -22,8 +15,7 @@ module "eks_efs_csi_controller_irsa" { source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" version = "5.33.0" - role_name = "${var.cluster_name}-efs-csi-controller-role" - attach_efs_csi_policy = true + role_name = "${var.cluster_name}-efs-csi-controller-role" oidc_providers = { main = { @@ -42,6 +34,7 @@ module "eks_efs_csi_node_irsa" { version = "5.33.0" role_name = "${var.cluster_name}-efs-csi-node-role" + oidc_providers = { main = { provider_arn = module.eks.oidc_provider_arn @@ -53,29 +46,92 @@ module "eks_efs_csi_node_irsa" { tags = var.tags } -data "aws_iam_policy_document" "eks_efs_csi_node" { +data "aws_iam_policy_document" "aws_efs_csi_driver" { count = var.efs_csi_driver ? 1 : 0 + + statement { + sid = "AllowDescribeAvailabilityZones" + actions = ["ec2:DescribeAvailabilityZones"] + resources = ["*"] # tfsec:ignore:aws-iam-no-policy-wildcards + } + statement { + sid = "AllowDescribeFileSystems" actions = [ - "elasticfilesystem:DescribeMountTargets", - "ec2:DescribeAvailabilityZones", + "elasticfilesystem:DescribeAccessPoints", + "elasticfilesystem:DescribeFileSystems", + "elasticfilesystem:DescribeMountTargets" ] - resources = ["*"] # tfsec:ignore:aws-iam-no-policy-wildcards + resources = flatten([ + local.efs_arns, + local.efs_access_point_arns, + ]) + } + + statement { + actions = [ + "elasticfilesystem:CreateAccessPoint", + "elasticfilesystem:TagResource", + ] + resources = local.efs_arns + + condition { + test = "StringLike" + variable = "aws:RequestTag/efs.csi.aws.com/cluster" + values = ["true"] + } + } + + statement { + sid = "AllowDeleteAccessPoint" + actions = ["elasticfilesystem:DeleteAccessPoint"] + resources = local.efs_access_point_arns + + condition { + test = "StringLike" + variable = "aws:ResourceTag/efs.csi.aws.com/cluster" + values = ["true"] + } + } + + statement { + sid = "ClientReadWrite" + actions = [ + "elasticfilesystem:ClientRootAccess", + "elasticfilesystem:ClientWrite", + "elasticfilesystem:ClientMount", + ] + resources = local.efs_arns + + condition { + test = "Bool" + variable = "elasticfilesystem:AccessedViaMountTarget" + values = ["true"] + } } } -resource "aws_iam_policy" "eks_efs_csi_node" { +resource "aws_iam_policy" "eks_efs_csi_driver" { count = var.efs_csi_driver ? 1 : 0 - name = "AmazonEKS_EFS_CSI_Node_Policy-${var.cluster_name}" - description = "Provides node permissions to use the EFS CSI driver" - policy = data.aws_iam_policy_document.eks_efs_csi_node[0].json + name = "AmazonEKS_EFS_CSI_Policy-${var.cluster_name}" + description = "Provides permissions to manage EFS volumes via the container storage interface driver" + policy = data.aws_iam_policy_document.eks_efs_csi_driver[0].json tags = var.tags } +resource "aws_iam_role_policy_attachment" "eks_efs_csi_controller" { + count = var.efs_csi_driver ? 1 : 0 + role = "${var.cluster_name}-efs-csi-controller-role" + policy_arn = aws_iam_policy.eks_efs_csi_driver[0].arn + depends_on = [ + module.eks_efs_csi_controller_irsa[0] + ] +} + resource "aws_iam_role_policy_attachment" "eks_efs_csi_node" { count = var.efs_csi_driver ? 1 : 0 role = "${var.cluster_name}-efs-csi-node-role" - policy_arn = aws_iam_policy.eks_efs_csi_node[0].arn + policy_arn = aws_iam_policy.eks_efs_csi_driver[0].arn depends_on = [ module.eks_efs_csi_node_irsa[0] ] @@ -93,7 +149,7 @@ resource "aws_efs_mount_target" "eks_efs_private" { count = var.efs_csi_driver ? length(var.private_subnets) : 0 file_system_id = aws_efs_file_system.eks_efs[0].id subnet_id = var.private_subnets[count.index] - security_groups = aws_security_group.eks_efs_sg[*].id + security_groups = [module.eks.cluster_primary_security_group_id] } resource "helm_release" "aws_efs_csi_driver" { diff --git a/main.tf b/main.tf index 6d5f2cf..09a5b27 100644 --- a/main.tf +++ b/main.tf @@ -91,7 +91,6 @@ module "eks" { # tfsec:ignore:aws-ec2-no-public-egress-sgr tfsec:ignore:aws-eks- iam_role_attach_cni_policy = var.iam_role_attach_cni_policy max_size = var.default_max_size min_size = var.default_min_size - vpc_security_group_ids = aws_security_group.eks_efs_sg[*].id } eks_managed_node_groups = var.eks_managed_node_groups diff --git a/outputs.tf b/outputs.tf index da84e15..b616292 100644 --- a/outputs.tf +++ b/outputs.tf @@ -13,6 +13,11 @@ output "cluster_oidc_issuer_url" { value = module.eks.cluster_oidc_issuer_url } +output "cluster_primary_security_group_id" { + description = "Cluster security group that was created by Amazon EKS for the cluster. Managed node groups use this security group for control-plane-to-data-plane communication. Referred to as 'Cluster security group' in the EKS console" + value = module.eks.cluster_primary_security_group_id +} + output "eks_managed_node_groups" { description = "Map of attribute maps for all EKS managed node groups created" value = module.eks.eks_managed_node_groups From 6a4a2f955e9fb8ce6cabc00a201ef3717233cb9d Mon Sep 17 00:00:00 2001 From: Justin Bronn Date: Thu, 21 Dec 2023 14:55:32 -0700 Subject: [PATCH 60/60] Fix resource name. --- efs-csi.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/efs-csi.tf b/efs-csi.tf index 57f0bcf..af68673 100644 --- a/efs-csi.tf +++ b/efs-csi.tf @@ -46,7 +46,7 @@ module "eks_efs_csi_node_irsa" { tags = var.tags } -data "aws_iam_policy_document" "aws_efs_csi_driver" { +data "aws_iam_policy_document" "eks_efs_csi_driver" { count = var.efs_csi_driver ? 1 : 0 statement {