Skip to content

Commit

Permalink
Merge pull request #25 from wenzel-felix/feature/selfmaintenance
Browse files Browse the repository at this point in the history
Feature/selfmaintenance
  • Loading branch information
wenzel-felix authored Jul 5, 2023
2 parents ec6f8da + 9c9384a commit d14f34f
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 173 deletions.
150 changes: 10 additions & 140 deletions cluster-hccm.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
resource "kubernetes_secret" "hcloud_ccm" {
depends_on = [ hcloud_load_balancer_service.management_lb_k8s_service ]
count = var.cluster_configuration.preinstall_hcloud_controller ? 1 : 0
depends_on = [hcloud_load_balancer_service.management_lb_k8s_service]
count = var.cluster_configuration.preinstall_hcloud_controller ? 1 : 0
metadata {
name = "hcloud"
namespace = "kube-system"
Expand All @@ -12,143 +12,13 @@ resource "kubernetes_secret" "hcloud_ccm" {
}
}

resource "kubernetes_service_account" "hcloud_ccm" {
depends_on = [ hcloud_load_balancer_service.management_lb_k8s_service ]
count = var.cluster_configuration.preinstall_hcloud_controller ? 1 : 0
metadata {
name = "cloud-controller-manager"
namespace = "kube-system"
}
}
resource "helm_release" "hccm" {
depends_on = [kubernetes_secret.hcloud_ccm]
count = var.cluster_configuration.preinstall_hcloud_controller ? 1 : 0
repository = "https://charts.hetzner.cloud"
chart = "hcloud-cloud-controller-manager"
name = "hccm"
namespace = "kube-system"

resource "kubernetes_cluster_role_binding" "hcloud_ccm" {
depends_on = [ hcloud_load_balancer_service.management_lb_k8s_service ]
count = var.cluster_configuration.preinstall_hcloud_controller ? 1 : 0
metadata {
name = "system:cloud-controller-manager"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "cluster-admin"
}
subject {
kind = "ServiceAccount"
name = "cloud-controller-manager"
namespace = "kube-system"
}
}

resource "kubernetes_deployment" "hcloud_ccm" {
depends_on = [ hcloud_load_balancer_service.management_lb_k8s_service ]
count = var.cluster_configuration.preinstall_hcloud_controller ? 1 : 0

lifecycle {
ignore_changes = [
spec[0].template[0].spec[0],
metadata[0].annotations
]
}

metadata {
name = "hcloud-cloud-controller-manager"
namespace = "kube-system"
}

spec {
replicas = 1
revision_history_limit = 2
selector {
match_labels = {
app = "hcloud-cloud-controller-manager"
}
}
template {
metadata {
labels = {
app = "hcloud-cloud-controller-manager"
}
}
spec {
service_account_name = "cloud-controller-manager"
dns_policy = "Default"
toleration {
# Allow HCCM itself to schedule on nodes that have not yet been initialized by HCCM.
key = "node.cloudprovider.kubernetes.io/uninitialized"
value = "true"
effect = "NoSchedule"
}
toleration {
key = "CriticalAddonsOnly"
operator = "Exists"
}
toleration {
# Allow HCCM to schedule on control plane nodes.
key = "node-role.kubernetes.io/master"
effect = "NoSchedule"
operator = "Exists"
}
toleration {
key = "node-role.kubernetes.io/control-plane"
effect = "NoSchedule"
operator = "Exists"
}
toleration {
key = "node.kubernetes.io/not-ready"
effect = "NoExecute"
}
host_network = true
container {
name = "hcloud-cloud-controller-manager"
command = [
"/bin/hcloud-cloud-controller-manager",
"--allow-untagged-cloud",
"--cloud-provider=hcloud",
"--leader-elect=false",
"--route-reconciliation-period=30s",
"--allocate-node-cidrs=true",
"--cluster-cidr=10.244.0.0/16"
]
env {
name = "NODE_NAME"
value_from {
field_ref {
field_path = "spec.nodeName"
}
}
}
env {
name = "HCLOUD_TOKEN"
value_from {
secret_key_ref {
name = "hcloud"
key = "token"
}
}
}
env {
name = "HCLOUD_NETWORK"
value_from {
secret_key_ref {
name = "hcloud"
key = "network"
}
}
}
image = "hetznercloud/hcloud-cloud-controller-manager:v1.14.2"
port {
name = "metrics"
container_port = 8233
}
resources {
requests = {
cpu = "100m"
memory = "50Mi"
}
}
}
priority_class_name = "system-cluster-critical"
}
}
}
values = [file("${path.module}/templates/values/hccm.yaml")]
}
30 changes: 1 addition & 29 deletions cluster-monitoring.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,35 +18,7 @@ resource "helm_release" "prom_stack" {

namespace = "monitoring"

values = [
<<EOF
prometheus:
prometheusSpec:
enableRemoteWriteReceiver: true
enableFeatures:
- remote-write-receiver
grafana:
additionalDataSources:
- name: Tempo
type: tempo
access: browser
orgId: 1
uid: tempo
url: http://${helm_release.tempo[0].name}.${kubernetes_namespace.monitoring[0].metadata[0].name}:3100
isDefault: false
editable: true
jsonData:
httpMethod: GET
serviceMap:
datasourceUid: 'prometheus'
EOF
]
}

resource "random_password" "loki_auth" {
count = var.cluster_configuration.preinstall_monitoring_stack ? 1 : 0
length = 16
special = false
values = [ file("${path.module}/templates/values/kube-prometheus-stack.yaml") ]
}

resource "helm_release" "loki" {
Expand Down
39 changes: 39 additions & 0 deletions cluster-selfmaintenance.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
resource "kubernetes_namespace" "kured" {
depends_on = [ hcloud_load_balancer_service.management_lb_k8s_service ]
count = var.enable_auto_os_updates && local.is_ha_cluster ? 1 : 0
metadata {
name = "kured"
}
}

resource "helm_release" "kured" {
depends_on = [kubernetes_namespace.kured]
count = var.enable_auto_os_updates && local.is_ha_cluster ? 1 : 0
repository = "https://kubereboot.github.io/charts"
chart = "kured"
name = "kured"
namespace = kubernetes_namespace.kured[0].metadata[0].name
version = "3.0.1"
}

data "http" "system_upgrade_controller" {
url = "https://raw.githubusercontent.com/rancher/system-upgrade-controller/master/manifests/system-upgrade-controller.yaml"
}

resource "kubectl_manifest" "system_upgrade_controller" {
depends_on = [ hcloud_load_balancer_service.management_lb_k8s_service ]
for_each = var.enable_auto_kubernetes_updates && local.is_ha_cluster ? {for i in local.system_upgrade_controller_components: index(local.system_upgrade_controller_components, i) => i} : {}
yaml_body = each.value
}

resource "kubectl_manifest" "system_upgrade_controller_server_plan" {
depends_on = [ kubectl_manifest.system_upgrade_controller ]
count = var.enable_auto_kubernetes_updates && local.is_ha_cluster ? 1 : 0
yaml_body = file("${path.module}/templates/manifests/system-upgrade-controller-server.yaml")
}

resource "kubectl_manifest" "system_upgrade_controller_agent_plan" {
depends_on = [ kubectl_manifest.system_upgrade_controller ]
count = var.enable_auto_kubernetes_updates && local.is_ha_cluster ? 1 : 0
yaml_body = file("${path.module}/templates/manifests/system-upgrade-controller-agent.yaml")
}
5 changes: 3 additions & 2 deletions examples/simple-setup/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ cp kubeconfig.yaml ~/.kube/config

Deploy the example:
```bash
k apply -f load_example.yaml
kubectl label namespace default istio-injection=enabled --overwrite
kubectl apply -f load_example.yaml
```

Delete the example:
```bash
k delete -f load_example.yaml
kubectl delete -f load_example.yaml
```
2 changes: 1 addition & 1 deletion examples/simple-setup/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ module "rke2" {
preinstall_monitoring_stack = true
preinstall_istio_service_mesh = true
preinstall_tracing_stack = true
preinstall_hcloud_controller = false
preinstall_hcloud_controller = true
}
create_cloudflare_dns_record = true
cloudflare_zone_id = var.cloudflare_zone_id
Expand Down
4 changes: 4 additions & 0 deletions locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,8 @@ locals {

istio_charts_url = "https://istio-release.storage.googleapis.com/charts"
istio_values = var.cluster_configuration.preinstall_tracing_stack ? [file("${path.module}/templates/values/istiod.yaml")] : []

is_ha_cluster = var.master_node_count >= 3

system_upgrade_controller_components = split("---", data.http.system_upgrade_controller.response_body)
}
28 changes: 28 additions & 0 deletions templates/manifests/system-upgrade-controller-agent.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Agent plan
apiVersion: upgrade.cattle.io/v1
kind: Plan
metadata:
name: agent-plan
namespace: system-upgrade
labels:
rke2-upgrade: agent
spec:
concurrency: 2
nodeSelector:
matchExpressions:
- {key: rke2-upgrade, operator: Exists}
- {key: rke2-upgrade, operator: NotIn, values: ["disabled", "false"]}
# When using k8s version 1.19 or older, swap control-plane with master
- {key: node-role.kubernetes.io/control-plane, operator: NotIn, values: ["true"]}
prepare:
args:
- prepare
- server-plan
image: rancher/rke2-upgrade
serviceAccountName: system-upgrade
cordon: true
drain:
force: true
upgrade:
image: rancher/rke2-upgrade
channel: stable
23 changes: 23 additions & 0 deletions templates/manifests/system-upgrade-controller-server.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Server plan
apiVersion: upgrade.cattle.io/v1
kind: Plan
metadata:
name: server-plan
namespace: system-upgrade
labels:
rke2-upgrade: server
spec:
concurrency: 1
nodeSelector:
matchExpressions:
- {key: rke2-upgrade, operator: Exists}
- {key: rke2-upgrade, operator: NotIn, values: ["disabled", "false"]}
# When using k8s version 1.19 or older, swap control-plane with master
- {key: node-role.kubernetes.io/control-plane, operator: In, values: ["true"]}
serviceAccountName: system-upgrade
cordon: true
# drain:
# force: true
upgrade:
image: rancher/rke2-upgrade
channel: stable
2 changes: 2 additions & 0 deletions templates/values/hccm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
networking:
enabled: true
19 changes: 19 additions & 0 deletions templates/values/kube-prometheus-stack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
prometheus:
prometheusSpec:
enableRemoteWriteReceiver: true
enableFeatures:
- remote-write-receiver
grafana:
additionalDataSources:
- name: Tempo
type: tempo
access: browser
orgId: 1
uid: tempo
url: http://tempo:3100
isDefault: false
editable: true
jsonData:
httpMethod: GET
serviceMap:
datasourceUid: 'prometheus'
14 changes: 13 additions & 1 deletion variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ variable "cluster_configuration" {
preinstall_tracing_stack = bool
})
default = {
preinstall_hcloud_controller = false
preinstall_hcloud_controller = true
preinstall_monitoring_stack = false
preinstall_istio_service_mesh = false
preinstall_tracing_stack = false
Expand Down Expand Up @@ -131,3 +131,15 @@ variable "letsencrypt_issuer" {
default = ""
description = "The email to send notifications regarding let's encrypt."
}

variable "enable_auto_os_updates" {
type = bool
default = true
description = "Whether the OS should be updated automatically."
}

variable "enable_auto_kubernetes_updates" {
type = bool
default = true
description = "Whether the kubernetes version should be updated automatically."
}

0 comments on commit d14f34f

Please sign in to comment.