From 80252f3ffaa4c3e4eba7a180673f6108c46f7483 Mon Sep 17 00:00:00 2001 From: Stanley Yu Date: Wed, 27 Oct 2021 11:38:56 -0400 Subject: [PATCH] feat: Add support for CPU quota configs for node pools (#1032) --- autogen/main/README.md | 4 +++- autogen/main/cluster.tf.tmpl | 9 ++++++-- examples/node_pool/main.tf | 22 ++++++++++--------- .../README.md | 4 +++- .../cluster.tf | 9 ++++++-- modules/beta-private-cluster/README.md | 4 +++- modules/beta-private-cluster/cluster.tf | 9 ++++++-- .../README.md | 4 +++- .../cluster.tf | 9 ++++++-- modules/beta-public-cluster/README.md | 4 +++- modules/beta-public-cluster/cluster.tf | 9 ++++++-- test/integration/node_pool/controls/gcloud.rb | 14 ++++++++++++ 12 files changed, 76 insertions(+), 25 deletions(-) diff --git a/autogen/main/README.md b/autogen/main/README.md index 589791cb5..a86c31040 100644 --- a/autogen/main/README.md +++ b/autogen/main/README.md @@ -176,7 +176,9 @@ The node_pools variable takes the following parameters: | autoscaling | Configuration required by cluster autoscaler to adjust the size of the node pool to the current cluster usage | true | Optional | | auto_upgrade | Whether the nodes will be automatically upgraded | true (if cluster is regional) | Optional | {% if beta_cluster %} -| cpu_manager_policy | The CPU manager policy on the node. One of "none" or "static". | "none" | Optional | +| cpu_manager_policy | The CPU manager policy on the node. One of "none" or "static". | "static" | Optional | +| cpu_cfs_quota | Enforces the Pod's CPU limit. Setting this value to false means that the CPU limits for Pods are ignored | null | Optional | +| cpu_cfs_quota_period | The CPU CFS quota period value, which specifies the period of how often a cgroup's access to CPU resources should be reallocated | null | Optional | {% endif %} | disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional | | disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional | diff --git a/autogen/main/cluster.tf.tmpl b/autogen/main/cluster.tf.tmpl index 90d42f83f..6f9f52209 100644 --- a/autogen/main/cluster.tf.tmpl +++ b/autogen/main/cluster.tf.tmpl @@ -588,10 +588,15 @@ resource "google_container_node_pool" "pools" { boot_disk_kms_key = lookup(each.value, "boot_disk_kms_key", "") dynamic "kubelet_config" { - for_each = contains(keys(each.value), "cpu_manager_policy") ? [1] : [] + for_each = length(setintersection( + keys(each.value), + ["cpu_manager_policy", "cpu_cfs_quota", "cpu_cfs_quota_period"] + )) != 0 ? [1] : [] content { - cpu_manager_policy = lookup(each.value, "cpu_manager_policy") + cpu_manager_policy = lookup(each.value, "cpu_manager_policy", "static") + cpu_cfs_quota = lookup(each.value, "cpu_cfs_quota", null) + cpu_cfs_quota_period = lookup(each.value, "cpu_cfs_quota_period", null) } } diff --git a/examples/node_pool/main.tf b/examples/node_pool/main.tf index 5faf87cb4..f9bce4935 100644 --- a/examples/node_pool/main.tf +++ b/examples/node_pool/main.tf @@ -69,16 +69,18 @@ module "gke" { service_account = var.compute_engine_service_account }, { - name = "pool-03" - machine_type = "n1-standard-2" - node_locations = "${var.region}-b,${var.region}-c" - autoscaling = false - node_count = 2 - disk_type = "pd-standard" - auto_upgrade = true - service_account = var.compute_engine_service_account - pod_range = "test" - sandbox_enabled = true + name = "pool-03" + machine_type = "n1-standard-2" + node_locations = "${var.region}-b,${var.region}-c" + autoscaling = false + node_count = 2 + disk_type = "pd-standard" + auto_upgrade = true + service_account = var.compute_engine_service_account + pod_range = "test" + sandbox_enabled = true + cpu_manager_policy = "static" + cpu_cfs_quota = true }, ] diff --git a/modules/beta-private-cluster-update-variant/README.md b/modules/beta-private-cluster-update-variant/README.md index 072b435b3..8d38bfae0 100644 --- a/modules/beta-private-cluster-update-variant/README.md +++ b/modules/beta-private-cluster-update-variant/README.md @@ -298,7 +298,9 @@ The node_pools variable takes the following parameters: | auto_repair | Whether the nodes will be automatically repaired | true | Optional | | autoscaling | Configuration required by cluster autoscaler to adjust the size of the node pool to the current cluster usage | true | Optional | | auto_upgrade | Whether the nodes will be automatically upgraded | true (if cluster is regional) | Optional | -| cpu_manager_policy | The CPU manager policy on the node. One of "none" or "static". | "none" | Optional | +| cpu_manager_policy | The CPU manager policy on the node. One of "none" or "static". | "static" | Optional | +| cpu_cfs_quota | Enforces the Pod's CPU limit. Setting this value to false means that the CPU limits for Pods are ignored | null | Optional | +| cpu_cfs_quota_period | The CPU CFS quota period value, which specifies the period of how often a cgroup's access to CPU resources should be reallocated | null | Optional | | disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional | | disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional | | effect | Effect for the taint | | Required | diff --git a/modules/beta-private-cluster-update-variant/cluster.tf b/modules/beta-private-cluster-update-variant/cluster.tf index a466a6f94..970b4568c 100644 --- a/modules/beta-private-cluster-update-variant/cluster.tf +++ b/modules/beta-private-cluster-update-variant/cluster.tf @@ -534,10 +534,15 @@ resource "google_container_node_pool" "pools" { boot_disk_kms_key = lookup(each.value, "boot_disk_kms_key", "") dynamic "kubelet_config" { - for_each = contains(keys(each.value), "cpu_manager_policy") ? [1] : [] + for_each = length(setintersection( + keys(each.value), + ["cpu_manager_policy", "cpu_cfs_quota", "cpu_cfs_quota_period"] + )) != 0 ? [1] : [] content { - cpu_manager_policy = lookup(each.value, "cpu_manager_policy") + cpu_manager_policy = lookup(each.value, "cpu_manager_policy", "static") + cpu_cfs_quota = lookup(each.value, "cpu_cfs_quota", null) + cpu_cfs_quota_period = lookup(each.value, "cpu_cfs_quota_period", null) } } diff --git a/modules/beta-private-cluster/README.md b/modules/beta-private-cluster/README.md index c6f82df27..379b0f414 100644 --- a/modules/beta-private-cluster/README.md +++ b/modules/beta-private-cluster/README.md @@ -276,7 +276,9 @@ The node_pools variable takes the following parameters: | auto_repair | Whether the nodes will be automatically repaired | true | Optional | | autoscaling | Configuration required by cluster autoscaler to adjust the size of the node pool to the current cluster usage | true | Optional | | auto_upgrade | Whether the nodes will be automatically upgraded | true (if cluster is regional) | Optional | -| cpu_manager_policy | The CPU manager policy on the node. One of "none" or "static". | "none" | Optional | +| cpu_manager_policy | The CPU manager policy on the node. One of "none" or "static". | "static" | Optional | +| cpu_cfs_quota | Enforces the Pod's CPU limit. Setting this value to false means that the CPU limits for Pods are ignored | null | Optional | +| cpu_cfs_quota_period | The CPU CFS quota period value, which specifies the period of how often a cgroup's access to CPU resources should be reallocated | null | Optional | | disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional | | disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional | | effect | Effect for the taint | | Required | diff --git a/modules/beta-private-cluster/cluster.tf b/modules/beta-private-cluster/cluster.tf index 9198d65b5..04aa845ab 100644 --- a/modules/beta-private-cluster/cluster.tf +++ b/modules/beta-private-cluster/cluster.tf @@ -450,10 +450,15 @@ resource "google_container_node_pool" "pools" { boot_disk_kms_key = lookup(each.value, "boot_disk_kms_key", "") dynamic "kubelet_config" { - for_each = contains(keys(each.value), "cpu_manager_policy") ? [1] : [] + for_each = length(setintersection( + keys(each.value), + ["cpu_manager_policy", "cpu_cfs_quota", "cpu_cfs_quota_period"] + )) != 0 ? [1] : [] content { - cpu_manager_policy = lookup(each.value, "cpu_manager_policy") + cpu_manager_policy = lookup(each.value, "cpu_manager_policy", "static") + cpu_cfs_quota = lookup(each.value, "cpu_cfs_quota", null) + cpu_cfs_quota_period = lookup(each.value, "cpu_cfs_quota_period", null) } } diff --git a/modules/beta-public-cluster-update-variant/README.md b/modules/beta-public-cluster-update-variant/README.md index a06a76817..e9675f8fa 100644 --- a/modules/beta-public-cluster-update-variant/README.md +++ b/modules/beta-public-cluster-update-variant/README.md @@ -285,7 +285,9 @@ The node_pools variable takes the following parameters: | auto_repair | Whether the nodes will be automatically repaired | true | Optional | | autoscaling | Configuration required by cluster autoscaler to adjust the size of the node pool to the current cluster usage | true | Optional | | auto_upgrade | Whether the nodes will be automatically upgraded | true (if cluster is regional) | Optional | -| cpu_manager_policy | The CPU manager policy on the node. One of "none" or "static". | "none" | Optional | +| cpu_manager_policy | The CPU manager policy on the node. One of "none" or "static". | "static" | Optional | +| cpu_cfs_quota | Enforces the Pod's CPU limit. Setting this value to false means that the CPU limits for Pods are ignored | null | Optional | +| cpu_cfs_quota_period | The CPU CFS quota period value, which specifies the period of how often a cgroup's access to CPU resources should be reallocated | null | Optional | | disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional | | disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional | | effect | Effect for the taint | | Required | diff --git a/modules/beta-public-cluster-update-variant/cluster.tf b/modules/beta-public-cluster-update-variant/cluster.tf index 8fb85af29..fbde380f5 100644 --- a/modules/beta-public-cluster-update-variant/cluster.tf +++ b/modules/beta-public-cluster-update-variant/cluster.tf @@ -515,10 +515,15 @@ resource "google_container_node_pool" "pools" { boot_disk_kms_key = lookup(each.value, "boot_disk_kms_key", "") dynamic "kubelet_config" { - for_each = contains(keys(each.value), "cpu_manager_policy") ? [1] : [] + for_each = length(setintersection( + keys(each.value), + ["cpu_manager_policy", "cpu_cfs_quota", "cpu_cfs_quota_period"] + )) != 0 ? [1] : [] content { - cpu_manager_policy = lookup(each.value, "cpu_manager_policy") + cpu_manager_policy = lookup(each.value, "cpu_manager_policy", "static") + cpu_cfs_quota = lookup(each.value, "cpu_cfs_quota", null) + cpu_cfs_quota_period = lookup(each.value, "cpu_cfs_quota_period", null) } } diff --git a/modules/beta-public-cluster/README.md b/modules/beta-public-cluster/README.md index 69fd8fddb..adb0a1e67 100644 --- a/modules/beta-public-cluster/README.md +++ b/modules/beta-public-cluster/README.md @@ -263,7 +263,9 @@ The node_pools variable takes the following parameters: | auto_repair | Whether the nodes will be automatically repaired | true | Optional | | autoscaling | Configuration required by cluster autoscaler to adjust the size of the node pool to the current cluster usage | true | Optional | | auto_upgrade | Whether the nodes will be automatically upgraded | true (if cluster is regional) | Optional | -| cpu_manager_policy | The CPU manager policy on the node. One of "none" or "static". | "none" | Optional | +| cpu_manager_policy | The CPU manager policy on the node. One of "none" or "static". | "static" | Optional | +| cpu_cfs_quota | Enforces the Pod's CPU limit. Setting this value to false means that the CPU limits for Pods are ignored | null | Optional | +| cpu_cfs_quota_period | The CPU CFS quota period value, which specifies the period of how often a cgroup's access to CPU resources should be reallocated | null | Optional | | disk_size_gb | Size of the disk attached to each node, specified in GB. The smallest allowed disk size is 10GB | 100 | Optional | | disk_type | Type of the disk attached to each node (e.g. 'pd-standard' or 'pd-ssd') | pd-standard | Optional | | effect | Effect for the taint | | Required | diff --git a/modules/beta-public-cluster/cluster.tf b/modules/beta-public-cluster/cluster.tf index a1072525a..1268d04d4 100644 --- a/modules/beta-public-cluster/cluster.tf +++ b/modules/beta-public-cluster/cluster.tf @@ -431,10 +431,15 @@ resource "google_container_node_pool" "pools" { boot_disk_kms_key = lookup(each.value, "boot_disk_kms_key", "") dynamic "kubelet_config" { - for_each = contains(keys(each.value), "cpu_manager_policy") ? [1] : [] + for_each = length(setintersection( + keys(each.value), + ["cpu_manager_policy", "cpu_cfs_quota", "cpu_cfs_quota_period"] + )) != 0 ? [1] : [] content { - cpu_manager_policy = lookup(each.value, "cpu_manager_policy") + cpu_manager_policy = lookup(each.value, "cpu_manager_policy", "static") + cpu_cfs_quota = lookup(each.value, "cpu_cfs_quota", null) + cpu_cfs_quota_period = lookup(each.value, "cpu_cfs_quota_period", null) } } diff --git a/test/integration/node_pool/controls/gcloud.rb b/test/integration/node_pool/controls/gcloud.rb index 4ea213df5..fe3f88ce4 100644 --- a/test/integration/node_pool/controls/gcloud.rb +++ b/test/integration/node_pool/controls/gcloud.rb @@ -453,6 +453,20 @@ ) end + it "has the expected kubelet config" do + expect(data['nodePools']).to include( + including( + "name" => "pool-03", + "config" => including( + "kubeletConfig" => including( + "cpuManagerPolicy" => "static", + "cpuCfsQuota" => true + ) + ) + ) + ) + end + it "has the expected linux node config sysctls" do expect(data['nodePools']).to include( including(