Skip to content

Commit

Permalink
fix(infra): upgrade karpenter to 0.32 & disable compaction (#834)
Browse files Browse the repository at this point in the history
<!-- Please make sure there is an issue that this PR is correlated to. -->

## Changes

<!-- If there are frontend changes, please include screenshots. -->
  • Loading branch information
NathanFlurry committed Jun 1, 2024
1 parent 824936f commit 0976245
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 74 deletions.
71 changes: 49 additions & 22 deletions infra/tf/k8s_cluster_aws/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "19.16.0"
version = "20.12.0"

cluster_name = local.name
cluster_version = local.cluster_version
Expand Down Expand Up @@ -83,30 +83,11 @@ module "eks" {
create_cluster_security_group = false
create_node_security_group = false

manage_aws_auth_configmap = true
aws_auth_roles = [
# Allow users to assume the admin role
{
rolearn = aws_iam_role.eks_admin.arn
username = local.eks_admin_username
groups = [
"system:masters"
]
},
# We need to add in the Karpenter node IAM role for nodes launched by Karpenter
{
rolearn = module.karpenter.role_arn
username = "system:node:{{EC2PrivateDNSName}}"
groups = [
"system:bootstrappers",
"system:nodes",
]
},
]

# Enable root account to manage KMS
kms_key_enable_default_policy = true

authentication_mode = "API_AND_CONFIG_MAP"

fargate_profiles = {
karpenter = {
selectors = [
Expand All @@ -128,3 +109,49 @@ module "eks" {
})
}

# TODO:
# terraform state rm 'module.eks.kubernetes_config_map_v1_data.aws_auth[0]'
# terraform state rm 'module.eks.kubernetes_config_map.aws_auth[0]'
# removed {
# from = module.eks.kubernetes_config_map_v1_data.aws_auth[0]
# lifecycle {
# destroy = false
# }
# }
#
# removed {
# from = module.eks.kubernetes_config_map.aws_auth[0]
# lifecycle {
# destroy = false
# }
# }

module "aws_auth" {
depends_on = [module.eks]

source = "terraform-aws-modules/eks/aws//modules/aws-auth"
version = "~> 20.0"

manage_aws_auth_configmap = true

aws_auth_roles = [
# Allow users to assume the admin role
{
rolearn = aws_iam_role.eks_admin.arn
username = local.eks_admin_username
groups = [
"system:masters"
]
},
# We need to add in the Karpenter node IAM role for nodes launched by Karpenter
{
rolearn = module.karpenter.iam_role_arn
username = "system:node:{{EC2PrivateDNSName}}"
groups = [
"system:bootstrappers",
"system:nodes",
]
},
]
}

140 changes: 89 additions & 51 deletions infra/tf/k8s_cluster_aws/karpenter.tf
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
# TODO: Wait until fargate is up
module "karpenter" {
source = "terraform-aws-modules/eks/aws//modules/karpenter"
version = "19.16.0"
version = "20.12.0"

cluster_name = module.eks.cluster_name
irsa_oidc_provider_arn = module.eks.oidc_provider_arn

policies = {
node_iam_role_additional_policies = {
AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
}

# IRSA backwards compatability
enable_irsa = true
create_instance_profile = true
create_iam_role = true
iam_role_name = "KarpenterIRSA-${module.eks.cluster_name}"
iam_role_description = "Karpenter IAM role for service account"
iam_policy_name = "KarpenterIRSA-${module.eks.cluster_name}"
iam_policy_description = "Karpenter IAM role for service account"

tags = local.tags
}

Expand All @@ -20,7 +29,7 @@ resource "helm_release" "karpenter" {
name = "karpenter"
repository = "oci://public.ecr.aws/karpenter"
chart = "karpenter"
version = "v0.31.0"
version = "v0.32.10"

values = [yamlencode({
controller = {
Expand All @@ -37,85 +46,114 @@ resource "helm_release" "karpenter" {

serviceAccount = {
annotations = {
"eks.amazonaws.com/role-arn" = module.karpenter.irsa_arn
"eks.amazonaws.com/role-arn" = module.karpenter.iam_role_arn
}
}

settings = {
aws = {
clusterName = module.eks.cluster_name
clusterEndpoint = module.eks.cluster_endpoint
defaultInstanceProfile = module.karpenter.instance_profile_name
interruptionQueueName = module.karpenter.queue_name
}
clusterName = module.eks.cluster_name
clusterEndpoint = module.eks.cluster_endpoint
interruptionQueue = module.karpenter.queue_name
}
})]
}

resource "kubectl_manifest" "karpenter_provisioner" {
resource "kubectl_manifest" "karpenter_node_class" {
depends_on = [helm_release.karpenter]

yaml_body = yamlencode({
apiVersion = "karpenter.sh/v1alpha5"
kind = "Provisioner"
apiVersion = "karpenter.k8s.aws/v1beta1"
kind = "EC2NodeClass"
metadata = {
name = "default"
}
spec = {
requirements = [
# See how Karpenter selects instance types:
# https://karpenter.sh/v0.31/faq/#how-does-karpenter-dynamically-select-instance-types

amiFamily = "AL2"
role = module.karpenter.node_iam_role_name
subnetSelectorTerms = [
{
key = "kubernetes.io/os"
operator = "In"
values = ["linux"]
},
{
key = "topology.kubernetes.io/zone"
operator = "In"
values = local.azs
},
{
key = "karpenter.sh/capacity-type"
operator = "In"
values = ["on-demand"]
},
tags = {
"karpenter.sh/discovery" = module.eks.cluster_name
}
}
]
limits = {
resources = {
cpu = 1000
memory = "1000Gi"
securityGroupSelectorTerms = [
{
tags = {
"karpenter.sh/discovery" = module.eks.cluster_name
}
}
}
providerRef = {
name = "default"
}
consolidation = {
enabled = true
]
tags = {
"karpenter.sh/discovery" = module.eks.cluster_name
}
}
})
}

resource "kubectl_manifest" "karpenter_node_template" {
depends_on = [helm_release.karpenter]
resource "kubectl_manifest" "karpenter_node_pool" {
depends_on = [helm_release.karpenter, kubectl_manifest.karpenter_node_class]

yaml_body = yamlencode({
apiVersion = "karpenter.k8s.aws/v1alpha1"
kind = "AWSNodeTemplate"
apiVersion = "karpenter.sh/v1beta1"
kind = "NodePool"
metadata = {
name = "default"
}
spec = {
subnetSelector = {
"karpenter.sh/discovery" = module.eks.cluster_name
template = {
spec = {
nodeClassRef = {
name = "default"
}
requirements = [
# See recommended requirements:
# https://karpenter.sh/v0.37/concepts/nodepools/#capacity-type

{
key = "topology.kubernetes.io/zone"
operator = "In"
values = local.azs
},
{
key = "kubernetes.io/arch"
operator = "In"
values = ["amd64"]
},
{
key = "kubernetes.io/os"
operator = "In"
values = ["linux"]
},
{
key = "karpenter.sh/capacity-type"
operator = "In"
values = ["on-demand"]
},
{
key = "karpenter.k8s.aws/instance-category"
operator = "In"
values = ["c", "m", "r"]
},
{
key = "karpenter.k8s.aws/instance-generation"
operator = "Gt"
values = ["2"]
}
]
}
}
securityGroupSelector = {
"karpenter.sh/discovery" = module.eks.cluster_name
limits = {
cpu = 1000
memory = "1000Gi"
}
tags = {
"karpenter.sh/discovery" = module.eks.cluster_name
disruption = {
# Never kill pods that are currently running
consolidationPolicy = "WhenEmpty"
consolidateAfter = "30s"
# Don't kill nodes arbitrarily
expireAfter = "Never"
# TODO: If switching to WhenUnderutilized, add `budgets` here
}
}
})
Expand Down
2 changes: 1 addition & 1 deletion infra/tf/k8s_cluster_aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "5.16.0"
version = "5.52.0"
}
# TODO Revert to gavinbunney/kubectl once https://github.com/gavinbunney/terraform-provider-kubectl/issues/270 is resolved
kubectl = {
Expand Down

0 comments on commit 0976245

Please sign in to comment.