diff --git a/manifests/modules/autoscaling/compute/karpenter/.workshop/terraform/outputs.tf b/manifests/modules/autoscaling/compute/karpenter/.workshop/terraform/outputs.tf index adc127edb..71880b48e 100644 --- a/manifests/modules/autoscaling/compute/karpenter/.workshop/terraform/outputs.tf +++ b/manifests/modules/autoscaling/compute/karpenter/.workshop/terraform/outputs.tf @@ -5,5 +5,6 @@ output "environment_variables" { KARPENTER_SQS_QUEUE = module.karpenter.queue_name KARPENTER_ROLE = module.karpenter.node_iam_role_name KARPENTER_ROLE_ARN = module.karpenter.node_iam_role_arn + KUBERNETES_VERSION = var.eks_cluster_version } } \ No newline at end of file diff --git a/manifests/modules/autoscaling/compute/karpenter/.workshop/terraform/vars.tf b/manifests/modules/autoscaling/compute/karpenter/.workshop/terraform/vars.tf index b0d2bb770..121d3ced4 100644 --- a/manifests/modules/autoscaling/compute/karpenter/.workshop/terraform/vars.tf +++ b/manifests/modules/autoscaling/compute/karpenter/.workshop/terraform/vars.tf @@ -38,5 +38,5 @@ variable "karpenter_version" { description = "The version of Karpenter to use" type = string # renovate: datasource=github-releases depName=aws/karpenter-provider-aws - default = "0.37.2" + default = "1.0.3" } \ No newline at end of file diff --git a/manifests/modules/autoscaling/compute/karpenter/nodepool/nodeclass.yaml b/manifests/modules/autoscaling/compute/karpenter/nodepool/nodeclass.yaml index 5dec9dfe6..3cdec56fd 100644 --- a/manifests/modules/autoscaling/compute/karpenter/nodepool/nodeclass.yaml +++ b/manifests/modules/autoscaling/compute/karpenter/nodepool/nodeclass.yaml @@ -1,9 +1,11 @@ -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: default spec: - amiFamily: AL2023 # Amazon Linux 2023 + amiFamily: AL2023 + amiSelectorTerms: + - alias: al2023@latest role: "${KARPENTER_ROLE}" subnetSelectorTerms: - tags: diff --git a/manifests/modules/autoscaling/compute/karpenter/nodepool/nodepool.yaml b/manifests/modules/autoscaling/compute/karpenter/nodepool/nodepool.yaml index 78160b13a..246021394 100644 --- a/manifests/modules/autoscaling/compute/karpenter/nodepool/nodepool.yaml +++ b/manifests/modules/autoscaling/compute/karpenter/nodepool/nodepool.yaml @@ -1,4 +1,4 @@ -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: default @@ -16,10 +16,13 @@ spec: operator: In values: ["c5.large", "m5.large", "r5.large", "m5.xlarge"] nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass name: default + expireAfter: 72h limits: cpu: "1000" memory: 1000Gi disruption: - consolidationPolicy: WhenUnderutilized - expireAfter: 720h # 30 * 24h = 720h + consolidationPolicy: WhenEmptyOrUnderutilized + consolidateAfter: 1m diff --git a/website/docs/autoscaling/compute/karpenter/consolidation.md b/website/docs/autoscaling/compute/karpenter/consolidation.md index 21e661c89..2f78a7119 100644 --- a/website/docs/autoscaling/compute/karpenter/consolidation.md +++ b/website/docs/autoscaling/compute/karpenter/consolidation.md @@ -11,9 +11,12 @@ Karpenter automatically discovers nodes that are eligible for disruption and spi Disruption is configured through the `disruption` block in a `NodePool`. You can see highlighted below the policy thats already configured in our `NodePool`. -::yaml{file="manifests/modules/autoscaling/compute/karpenter/nodepool/nodepool.yaml" paths="spec.disruption"} +::yaml{file="manifests/modules/autoscaling/compute/karpenter/nodepool/nodepool.yaml" paths="spec.template.spec.expireAfter,spec.disruption"} -The `consolidationPolicy` can also be set to `WhenEmpty`, which restricts disruption only to nodes that contain no workload pods. Learn more about Disruption on the [Karpenter docs](https://karpenter.sh/docs/concepts/disruption/). +1. `expireAfter` is set to a custom value so that nodes are terminated automatically after 72 hours +2. By using the `WhenEmptyOrUnderutilized` policy Karpenter will replace nodes when it seems them "under-utilized" or they have node workload pods running + +The `consolidationPolicy` can also be set to `WhenEmpty`, which restricts disruption only to nodes that contain no workload pods. Learn more about Disruption on the [Karpenter docs](https://karpenter.sh/docs/concepts/disruption/#consolidation). Scaling out infrastructure is only one side of the equation for operating compute infrastructure in a cost-effective manner. We also need to be able to optimize on an on-going basis such that, for example, workloads running on under-utilized compute instances are compacted to fewer instances. This improves the overall efficiency of how we run workloads on the compute, resulting in less overhead and lower costs. diff --git a/website/docs/autoscaling/compute/karpenter/node-provisioning.md b/website/docs/autoscaling/compute/karpenter/node-provisioning.md index 7b4a2ea31..f7484acd8 100644 --- a/website/docs/autoscaling/compute/karpenter/node-provisioning.md +++ b/website/docs/autoscaling/compute/karpenter/node-provisioning.md @@ -57,7 +57,7 @@ $ kubectl scale -n other deployment/inflate --replicas 5 Because this operation is creating one or more new EC2 instances it will take a while, you can use `kubectl` to wait until its done with this command: -```bash hook=karpenter-deployment +```bash hook=karpenter-deployment timeout=200 $ kubectl rollout status -n other deployment/inflate --timeout=180s ``` diff --git a/website/docs/autoscaling/compute/karpenter/setup-provisioner.md b/website/docs/autoscaling/compute/karpenter/setup-provisioner.md index acde86fe2..fc97a374d 100644 --- a/website/docs/autoscaling/compute/karpenter/setup-provisioner.md +++ b/website/docs/autoscaling/compute/karpenter/setup-provisioner.md @@ -17,10 +17,11 @@ We'll start by applying some custom resources used by Karpenter. First we'll cre And we'll also need an `EC2NodeClass` which provides the specific configuration that applies to AWS: -::yaml{file="manifests/modules/autoscaling/compute/karpenter/nodepool/nodeclass.yaml" paths="spec.subnetSelectorTerms,spec.tags"} +::yaml{file="manifests/modules/autoscaling/compute/karpenter/nodepool/nodeclass.yaml" paths="spec.role,spec.subnetSelectorTerms,spec.tags"} -1. The `subnetSelectorTerms` can be used to look up the subnets where Karpenter should launch the EC2 instances. These tags were automatically set on the associated AWS infrastructure provided for the workshop. `securityGroupSelectorTerms` accomplishes the same function for the security group that will be attached to the EC2 instances. -2. We define a set of tags that will be applied to EC2 instances created which enables accounting and governance. +1. Assign the IAM role that will be applied to the EC2 instance provisioned by Karpenter +2. The `subnetSelectorTerms` can be used to look up the subnets where Karpenter should launch the EC2 instances. These tags were automatically set on the associated AWS infrastructure provided for the workshop. `securityGroupSelectorTerms` accomplishes the same function for the security group that will be attached to the EC2 instances. +3. We define a set of tags that will be applied to EC2 instances created which enables accounting and governance. We've now provided Karpenter with the basic requirements in needs to start provisioning capacity for our cluster.