Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix monitoring #4

Merged
merged 9 commits into from
May 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [3.0.1] - 2022-05-19

### Fixed
- Prometheus-node-exporter tried to add pods in fargate nodes (fargate doesn't support daemonsets) [Issue #3](https://github.com/nimbux911/terraform-aws-eks/issues/3)
- OTEL manifests failed because namespace didn't exist [Issue #2](https://github.com/nimbux911/terraform-aws-eks/issues/2)
- cert-manager release failed because namespace didn't exist

### Added
- ignore_change option for asg desired_capacity, to be handled by the cluster-autoscaler

## [3.0.0] - 2022-05-06

### Added
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ module "eks_main" {
| min\_size | Minimum size of the autoscaling for the worker nodes. | `string` | `""` | yes |
| max\_pods\_per\_node | Max pods per Kubernetes worker node. | `string` | `"100"` | no |
| desired\_capacity | Desired size of the autoscaling for the worker nodes. | `string` | `""` | yes |
| ignore\_desired\_capacity | Add ignore_changes to desired_capacity | `bool` | `false` | no |
| eks\_worker\_ami\_id | AMI ID for the worker nodes | `string` | `""` | yes |
| target\_group\_arns | ARNs of the target groups for using the worker nodes behind of ELB | `list[string]` | `[]` | no |
| health\_check\_type | Health check type for the worker nodes. | `string` | `"EC2"` | no |
Expand Down
36 changes: 35 additions & 1 deletion asg.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ resource "aws_launch_configuration" "eks" {
}

resource "aws_autoscaling_group" "eks" {
count = var.ignore_desired_capacity || var.helm_cluster_autoscaler_enabled ? 0 : 1
desired_capacity = var.desired_capacity
launch_configuration = aws_launch_configuration.eks.id
max_size = var.max_size
Expand All @@ -58,5 +59,38 @@ resource "aws_autoscaling_group" "eks" {
],
var.asg_tags,
)


}

resource "aws_autoscaling_group" "eks_ignore_desired_capacity" {
count = var.ignore_desired_capacity || var.helm_cluster_autoscaler_enabled ? 1 : 0
desired_capacity = var.desired_capacity
launch_configuration = aws_launch_configuration.eks.id
max_size = var.max_size
min_size = var.min_size
name = var.cluster_name
vpc_zone_identifier = var.subnets_ids
target_group_arns = var.target_group_arns
health_check_type = var.health_check_type

tags = concat(
[
{
"key" = "Name"
"value" = var.cluster_name
"propagate_at_launch" = true
},
{
"key" = "kubernetes.io/cluster/${aws_eks_cluster.main.name}"
"value" = "owned"
"propagate_at_launch" = true
},
],
var.asg_tags,
)

lifecycle {
ignore_changes = [desired_capacity]
}

}
22 changes: 16 additions & 6 deletions helm.tf
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,13 @@ resource "helm_release" "metrics_server" {
}

resource "helm_release" "cert_manager" {
count = var.helm_cert_manager_enabled || var.k8s_opentelemetry_enabled ? 1 : 0
name = "cert-manager"
namespace = "cert-manager"
repository = "https://charts.jetstack.io"
chart = "cert-manager"
version = "1.6.1"
count = var.helm_cert_manager_enabled || var.k8s_opentelemetry_enabled ? 1 : 0
name = "cert-manager"
namespace = "cert-manager"
repository = "https://charts.jetstack.io"
chart = "cert-manager"
create_namespace = true
version = "1.6.1"

set {
name = "installCRDs"
Expand Down Expand Up @@ -122,6 +123,11 @@ resource "helm_release" "prometheus_stack" {
value = var.prometheus_replicas
}

set {
name = "prometheus-node-exporter.nodeSelector.node\\.kubernetes\\.io/instance-type"
value = var.instance_type
}

dynamic "set" {
for_each = var.prometheus_requests_cpu != null ? ["do it"] : []
content {
Expand Down Expand Up @@ -638,6 +644,10 @@ resource "helm_release" "fluent_bit" {
file("${path.module}/helm-values/fluent-bit.yaml")
]

set {
name = "nodeSelector.node\\.kubernetes\\.io/instance-type"
value = var.instance_type
}

}

Expand Down
36 changes: 18 additions & 18 deletions k8s.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,66 +3,66 @@
resource "kubernetes_manifest" "otel-cert-operator-serving" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-cert-operator-serving.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-clusterrolebinding-operator-manager" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-clusterrolebinding-operator-manager.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-clusterrolebinding-operator-proxy" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-clusterrolebinding-operator-proxy.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-clusterrole-operator-manager" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-clusterrole-operator-manager.yaml"))

depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-clusterrole-operator-metrics-reader" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-clusterrole-operator-metrics-reader.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-clusterrole-operator-proxy" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-clusterrole-operator-proxy.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-crd-collectors" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-crd-collectors.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-crd-instrumentations" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-crd-instrumentations.yaml"))

depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-deployment-operator-controller-manager" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-deployment-operator-controller-manager.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-issuer-operator-selfsigned" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-issuer-operator-selfsigned.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-ns-operator-system" {
resource "kubernetes_manifest" "otel-ns-operator-system" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-ns-operator-system.yaml"))
depends_on = [helm_release.cert_manager]
Expand All @@ -71,41 +71,41 @@ resource "kubernetes_manifest" "otel-ns-operator-system" {
resource "kubernetes_manifest" "otel-rolebinding-operator-leader-election" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-rolebinding-operator-leader-election.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-role-operator-system" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-role-operator-system.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-sa-operator-controller-manager" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-sa-operator-controller-manager.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-svc-operator-controller-manager" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-svc-operator-controller-manager.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-svc-operator-webhook" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-svc-operator-webhook.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-webhookconfig" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-webhookconfig.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}

resource "kubernetes_manifest" "otel-webhookvalidation" {
count = var.k8s_opentelemetry_enabled ? 1 : 0
manifest = yamldecode(file("${path.module}/k8s-manifests/otel-webhookvalidation.yaml"))
depends_on = [helm_release.cert_manager]
depends_on = [helm_release.cert_manager, kubernetes_manifest.otel-ns-operator-system]
}
2 changes: 1 addition & 1 deletion outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ output "worker_role_id" {
}

output "asg_name" {
value = aws_autoscaling_group.eks.name
value = var.ignore_desired_capacity || var.helm_cluster_autoscaler_enabled ? aws_autoscaling_group.eks_ignore_desired_capacity[0].name : aws_autoscaling_group.eks[0].name
}

output "eks_certificate_authority" {
Expand Down
3 changes: 3 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ variable "max_size" {}
variable "min_size" {}
variable "max_pods_per_node"{}
variable "desired_capacity" {}
variable "ignore_desired_capacity" {
default = false
}
variable "eks_worker_ami_id" {}
variable "target_group_arns" {
default = []
Expand Down