Skip to content

Commit

Permalink
1.25 (#341)
Browse files Browse the repository at this point in the history
* Remove asg_node_group

* Remove all add-on related code - ready for karpenter + gitops

* Remove no longer required scripts

* Update terraform providers

* Update terraform / kubectl in CI container

* Karpenter fargate role

Allow pods in the karpenter namespace to run on fargate

* Fixup tests

* Remove unused examples

* Remove local tests

* fmt

* Add helm to CI image

* Allow cluster IAM role to be specified

The design here is that new clusters all have a new IAM role per
cluster. But to avoid recreating legacy clusters on upgrade, we
need a way to use an externally managed IAM role.

* Add fargate profile for flux-system namespace

* Fix

* Bring addons back

* Fix addon config

* Also adds fargate profile for `kube-system` or some addons will fail.

* whitespace

* Run coredns on fargate so addon install doesn't fail @ bootstrap
  • Loading branch information
errm committed Jun 21, 2023
1 parent 11c3125 commit f5918ba
Show file tree
Hide file tree
Showing 65 changed files with 730 additions and 2,623 deletions.
17 changes: 12 additions & 5 deletions .github/actions/terratest/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
FROM golang:1.19-alpine3.16
FROM golang:1.20-alpine3.18

WORKDIR /

ARG TERRAFORM_VERSION=1.2.9
ARG KUBECTL_VERSION=1.24.12
ARG TERRAFORM_VERSION=1.4.6
ARG KUBECTL_VERSION=1.25.10
ARG HELM_VERSION=3.12.0

RUN apk add --no-cache \
bash \
Expand All @@ -19,10 +20,16 @@ RUN apk add --no-cache \
echo 'export PATH="$HOME/.tfenv/bin:$PATH"' >> ~/.bash_profile && ln -s ~/.tfenv/bin/* /usr/local/bin && \
tfenv install $TERRAFORM_VERSION && \
echo $TERRAFORM_VERSION > ~/.tfenv/version && \
curl -sfSLO https://storage.googleapis.com/kubernetes-release/release/v${KUBECTL_VERSION}/bin/linux/amd64/kubectl && \
curl -sfSLO https://dl.k8s.io/release/v${KUBECTL_VERSION}/bin/linux/amd64/kubectl && \
chmod u+x kubectl && \
mv ./kubectl /usr/local/bin/kubectl && \
kubectl version --client=true
kubectl version --client=true && \
curl -sfSLO https://get.helm.sh/helm-v${HELM_VERSION}-linux-amd64.tar.gz && \
tar -zxvf helm-v${HELM_VERSION}-linux-amd64.tar.gz && \
mv linux-amd64/helm /usr/local/bin/helm && \
rm -rf linux-amd64 helm-v${HELM_VERSION}-linux-amd64.tar.gz && \
helm version


COPY *.sh ./
ENTRYPOINT ["/entrypoint.sh"]
17 changes: 1 addition & 16 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- uses: actions/checkout@main
- uses: hashicorp/setup-terraform@v1
with:
terraform_version: 1.0.11
terraform_version: 1.4.6
- name: 'Terraform Format'
run: terraform fmt -check -recursive
- name: 'Terraform Init cluster'
Expand All @@ -40,21 +40,6 @@ jobs:
- name: 'Terraform Validate cluster'
run: terraform validate
working-directory: 'examples/cluster'
local-test:
name: Run local tests
runs-on: ubuntu-latest
steps:
- name: 'Checkout'
uses: actions/checkout@main
- name: 'Install Go'
uses: actions/setup-go@v2
with:
go-version: 1.17.2
- name: 'Local Tests'
run: |
cd test
go get ./...
go test -v ./local_test.go
cluster-test:
name: Test cluster module
if: "!contains(github.event.head_commit.message, '[skip ci]')"
Expand Down
2 changes: 1 addition & 1 deletion .terraform-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.2.7
1.4.6
38 changes: 38 additions & 0 deletions addons.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
resource "aws_eks_addon" "vpc-cni" {
cluster_name = aws_eks_cluster.control_plane.name
addon_name = "vpc-cni"
addon_version = local.versions.vpc_cni
resolve_conflicts_on_create = "OVERWRITE"
resolve_conflicts_on_update = "OVERWRITE"
configuration_values = var.vpc_cni_configuration_values
}

resource "aws_eks_addon" "kube-proxy" {
cluster_name = aws_eks_cluster.control_plane.name
addon_name = "kube-proxy"
addon_version = local.versions.kube_proxy
resolve_conflicts_on_create = "OVERWRITE"
resolve_conflicts_on_update = "OVERWRITE"
configuration_values = var.kube_proxy_configuration_values
}

resource "aws_eks_addon" "coredns" {
cluster_name = aws_eks_cluster.control_plane.name
addon_name = "coredns"
addon_version = local.versions.coredns
resolve_conflicts_on_create = "OVERWRITE"
resolve_conflicts_on_update = "OVERWRITE"
configuration_values = var.coredns_configuration_values
depends_on = [aws_eks_fargate_profile.critical_pods]
}

resource "aws_eks_addon" "ebs-csi" {
cluster_name = aws_eks_cluster.control_plane.name
addon_name = "aws-ebs-csi-driver"
addon_version = local.versions.aws_ebs_csi_driver
service_account_role_arn = aws_iam_role.aws_ebs_csi_driver.arn
resolve_conflicts_on_create = "OVERWRITE"
resolve_conflicts_on_update = "OVERWRITE"
configuration_values = var.ebs_csi_configuration_values
depends_on = [aws_eks_fargate_profile.critical_pods]
}
34 changes: 34 additions & 0 deletions aws_ebs_csi_driver_iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
resource "aws_iam_role" "aws_ebs_csi_driver" {
name = "${var.iam_role_name_prefix}EksEBSCSIDriver-${var.name}"
assume_role_policy = data.aws_iam_policy_document.aws_ebs_csi_driver_assume_role_policy.json
description = "EKS CSI driver role for ${var.name} cluster"
}

data "aws_iam_policy_document" "aws_ebs_csi_driver_assume_role_policy" {
statement {
actions = ["sts:AssumeRoleWithWebIdentity"]
effect = "Allow"

condition {
test = "StringEquals"
variable = "${replace(aws_iam_openid_connect_provider.cluster_oidc.url, "https://", "")}:sub"
values = ["system:serviceaccount:kube-system:ebs-csi-controller-sa", "system:serviceaccount:kube-system:ebs-snapshot-controller"]
}

condition {
test = "StringEquals"
variable = "${replace(aws_iam_openid_connect_provider.cluster_oidc.url, "https://", "")}:aud"
values = ["sts.amazonaws.com"]
}

principals {
identifiers = [aws_iam_openid_connect_provider.cluster_oidc.arn]
type = "Federated"
}
}
}

resource "aws_iam_role_policy_attachment" "aws_ebs_csi_driver" {
role = aws_iam_role.aws_ebs_csi_driver.id
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy"
}
44 changes: 44 additions & 0 deletions cluster_iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
locals {
eks_cluster_role_arn = length(var.cluster_role_arn) == 0 ? aws_iam_role.eks_cluster_role[0].arn : var.cluster_role_arn
}

resource "aws_iam_role" "eks_cluster_role" {
count = length(var.cluster_role_arn) == 0 ? 1 : 0
name = "${var.iam_role_name_prefix}EksCluster-${var.name}"
assume_role_policy = data.aws_iam_policy_document.eks_assume_role_policy.json

# Resources running on the cluster are still generating logs when destroying the module resources
# which results in the log group being re-created even after Terraform destroys it. Removing the
# ability for the cluster role to create the log group prevents this log group from being re-created
# outside of Terraform due to services still generating logs during destroy process
inline_policy {
name = "DenyLogGroupCreation"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = ["logs:CreateLogGroup"]
Effect = "Deny"
Resource = "*"
},
]
})
}
}

data "aws_iam_policy_document" "eks_assume_role_policy" {
statement {
principals {
type = "Service"
identifiers = ["eks.amazonaws.com"]
}
actions = ["sts:AssumeRole"]
effect = "Allow"
}
}

resource "aws_iam_role_policy_attachment" "eks_cluster_policy" {
count = length(var.cluster_role_arn) == 0 ? 1 : 0
role = aws_iam_role.eks_cluster_role[0].name
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
}
3 changes: 3 additions & 0 deletions data.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
data "aws_partition" "current" {}
data "aws_caller_identity" "current" {}
data "aws_region" "current" {}
7 changes: 0 additions & 7 deletions examples/cluster/bottlerocket_gpu_node_group/cluster.tf

This file was deleted.

30 changes: 0 additions & 30 deletions examples/cluster/bottlerocket_gpu_node_group/main.tf

This file was deleted.

7 changes: 0 additions & 7 deletions examples/cluster/bottlerocket_node_group/cluster.tf

This file was deleted.

27 changes: 0 additions & 27 deletions examples/cluster/bottlerocket_node_group/main.tf

This file was deleted.

4 changes: 0 additions & 4 deletions examples/cluster/environment/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,3 @@ module "vpc" {
cidr_block = var.cidr_block
availability_zones = ["us-east-1a", "us-east-1b", "us-east-1d"]
}

module "iam" {
source = "../../../modules/iam"
}
4 changes: 0 additions & 4 deletions examples/cluster/environment/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,3 @@ output "private_subnet_ids" {
output "vpc_config" {
value = module.vpc.config
}

output "iam_config" {
value = module.iam.config
}
35 changes: 18 additions & 17 deletions examples/cluster/main.tf
Original file line number Diff line number Diff line change
@@ -1,35 +1,32 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "4.47.0"
}
}
}

provider "aws" {
region = "us-east-1"
allowed_account_ids = ["214219211678"]
}

provider "kubernetes" {
host = module.cluster.config.endpoint
cluster_ca_certificate = base64decode(module.cluster.config.ca_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
args = ["eks", "get-token", "--cluster-name", module.cluster.config.name]
}
}

data "http" "ip" {
url = "http://ipv4.icanhazip.com"
}

module "cluster" {
source = "../../modules/cluster"
source = "../../"

name = var.cluster_name

vpc_config = data.terraform_remote_state.environment.outputs.vpc_config
iam_config = data.terraform_remote_state.environment.outputs.iam_config

critical_addons_node_group_key_name = "development"

critical_addons_coredns_configuration_values = jsonencode({ replicaCount = 3 })
critical_addons_ebs-csi_configuration_values = jsonencode({ node = { tolerateAllTaints = true } })
endpoint_public_access = true
endpoint_public_access_cidrs = ["${chomp(data.http.ip.body)}/32"]
endpoint_public_access = true
endpoint_public_access_cidrs = ["${chomp(data.http.ip.body)}/32"]


aws_auth_role_map = [
Expand All @@ -44,3 +41,7 @@ module "cluster" {
Project = "terraform-aws-eks"
}
}

data "aws_security_group" "nodes" {
id = module.cluster.config.node_security_group
}
4 changes: 4 additions & 0 deletions examples/cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@ output "cluster_config" {
value = module.cluster.config
sensitive = true
}

output "node_security_group_name" {
value = data.aws_security_group.nodes.name
}
7 changes: 0 additions & 7 deletions examples/cluster/standard_node_group/cluster.tf

This file was deleted.

26 changes: 0 additions & 26 deletions examples/cluster/standard_node_group/main.tf

This file was deleted.

Loading

0 comments on commit f5918ba

Please sign in to comment.