From 1484394802818fbb8a036bc6a5b2156dcbae6a3a Mon Sep 17 00:00:00 2001 From: soumyapani <112522451+soumyapani@users.noreply.github.com> Date: Mon, 3 Apr 2023 23:04:01 +0530 Subject: [PATCH] Use GKE version 1.25. Remove PSP and istio from GKE cluster creation. (#136) --- README.md | 1 - aiinfra-cluster/main.tf | 1 - .../modules/aiinfra-compute/main.tf | 1 - .../modules/aiinfra-compute/variables.tf | 6 --- aiinfra-cluster/modules/gke-cluster/main.tf | 53 +------------------ .../modules/gke-cluster/variables.tf | 29 ---------- aiinfra-cluster/validation.tf | 3 -- aiinfra-cluster/variables.tf | 6 --- scripts/_env_var_util.sh | 1 - test/scripts/_env_var_util.sh | 4 +- .../_env_var_util_data/optionals_set.tfvars | 1 - 11 files changed, 2 insertions(+), 104 deletions(-) diff --git a/README.md b/README.md index c571733e7..2a129089b 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,6 @@ The optional parameters are: - __gke__: A private GKE cluster is created with private node pool following the recommendations from the GKE team. 1. ***GKE_NODE_POOL_COUNT***: The number of homogeneous node pools for GKE cluster. Only applicable when `ORCHESTRATOR_TYPE` is `gke`. 1. ***GKE_NODE_COUNT_PER_NODE_POOL***: The desired node count per node pool for GKE cluster. Only applicable when `ORCHESTRATOR_TYPE` is `gke`. -1. ***GKE_IP_CIDR_BLOCK_17***: A /17 CIDR IP range reserved for Metastore infrastructure. GKE pods, services, master subnet ranges are derived from it. Please make sure that the CIDR block is available otherwise the GKE cluster creation will fail. Only applicable when `ORCHESTRATOR_TYPE` is `gke`. Ex: 10.12.0.0/17 1. ***CUSTOM_NODE_POOL***: The custom node pool description for GKE. The structure of the custom node pool is list of node pool objects. The node pool object is ``` name = string diff --git a/aiinfra-cluster/main.tf b/aiinfra-cluster/main.tf index ec889ba93..60f546368 100644 --- a/aiinfra-cluster/main.tf +++ b/aiinfra-cluster/main.tf @@ -163,7 +163,6 @@ module "aiinfra-compute" { module.aiinfra-network ] enable_gke = var.orchestrator_type == "gke" - gke_ip_cidr_block_17 = var.gke_ip_cidr_block_17 node_pools = length(var.custom_node_pools) != 0 || length(local.basic_node_pools) != 0 ? coalescelist(var.custom_node_pools, local.basic_node_pools) : [] } diff --git a/aiinfra-cluster/modules/aiinfra-compute/main.tf b/aiinfra-cluster/modules/aiinfra-compute/main.tf index 2de778a14..4a02c5235 100644 --- a/aiinfra-cluster/modules/aiinfra-compute/main.tf +++ b/aiinfra-cluster/modules/aiinfra-compute/main.tf @@ -234,7 +234,6 @@ module "aiinfra-gke" { name = "${local.resource_prefix}-gke" disk_size_gb = var.disk_size_gb disk_type = var.disk_type - ip_cidr_block_17 = var.gke_ip_cidr_block_17 network_self_link = var.network_self_link subnetwork_self_link = var.subnetwork_self_link node_service_account = lookup(var.service_account, "email", null) diff --git a/aiinfra-cluster/modules/aiinfra-compute/variables.tf b/aiinfra-cluster/modules/aiinfra-compute/variables.tf index 3aa3a3bae..29ceb3437 100644 --- a/aiinfra-cluster/modules/aiinfra-compute/variables.tf +++ b/aiinfra-cluster/modules/aiinfra-compute/variables.tf @@ -284,12 +284,6 @@ variable "enable_gke" { default = false } -variable "gke_ip_cidr_block_17" { - type = string - description = "A /17 CIDR IP range reserved for Metastore infrastructure. GKE pods, services, master subnet ranges are derived from it." - default = null -} - variable "node_pools" { description = "The list of nodepools for the GKE cluster." type = list(object({ diff --git a/aiinfra-cluster/modules/gke-cluster/main.tf b/aiinfra-cluster/modules/gke-cluster/main.tf index 166d9f726..7034863b0 100644 --- a/aiinfra-cluster/modules/gke-cluster/main.tf +++ b/aiinfra-cluster/modules/gke-cluster/main.tf @@ -14,22 +14,6 @@ * limitations under the License. */ -locals { - # This cuts a /17 ip_cidr_range into the following ranges: - # -/17 - # - /18 gke pod range (max 512 nodes with /27 per node) - # - /18 - # - /21 enough for 2k nodes - # - /22 gke service range (enough for 1k services) - # - /28 master range - ip_cidr_block = var.ip_cidr_block_17 == null ? "10.${random_integer.cidr_octet.result}.0.0/17" : var.ip_cidr_block_17 - cidr_blocks = local.ip_cidr_block == "" ? [] : cidrsubnets(local.ip_cidr_block, 18 - 17, 21 - 17, 22 - 17, 28 - 17) - pod_cidr_block = local.ip_cidr_block == "" ? var.pod_cidr_block : local.cidr_blocks[0] - subnet_cidr_block = local.ip_cidr_block == "" ? var.subnet_cidr_block : local.cidr_blocks[1] - service_cidr_block = local.ip_cidr_block == "" ? var.service_cidr_block : local.cidr_blocks[2] - master_cidr_block = local.ip_cidr_block == "" ? var.master_ipv4_cidr_block : local.cidr_blocks[3] -} - # Definition of the private GKE cluster. resource "google_container_cluster" "gke-cluster" { provider = google-beta @@ -46,6 +30,7 @@ resource "google_container_cluster" "gke-cluster" { # documentation for the container_cluster resource. remove_default_node_pool = true initial_node_count = 1 + min_master_version = "1.25.6-gke.1000" network = var.network_self_link subnetwork = var.subnetwork_self_link @@ -57,8 +42,6 @@ resource "google_container_cluster" "gke-cluster" { master_authorized_networks_config { } - private_ipv6_google_access = "PRIVATE_IPV6_GOOGLE_ACCESS_TO_GOOGLE" - # Security Note: Basic Auth Disabled, no client certificate accepted. # The only way to manage the master is via OpenID tokens (aka gcloud). # (requirement H5; go/gke-cluster-pattern#req1.1.7) @@ -67,12 +50,6 @@ resource "google_container_cluster" "gke-cluster" { issue_client_certificate = false } } - # Requires a Pod Security Policy - # - # go/gke-cluster-pattern#req3.3.1 (part of) - pod_security_policy_config { - enabled = true - } # Enable shielded nodes to meet go/gke-cluster-pattern#req1.1.5 enable_shielded_nodes = true @@ -95,21 +72,6 @@ resource "google_container_cluster" "gke-cluster" { provider = "PROVIDER_UNSPECIFIED" } - private_cluster_config { - enable_private_endpoint = true - enable_private_nodes = true - master_ipv4_cidr_block = local.master_cidr_block - master_global_access_config { - enabled = false - } - } - - # Adding this block enables IP aliasing. - ip_allocation_policy { - cluster_ipv4_cidr_block = local.pod_cidr_block == "" ? null : local.pod_cidr_block - services_ipv4_cidr_block = local.service_cidr_block == "" ? null : local.service_cidr_block - } - # This change will also enable the metadata server on nodes. # go/gke-cluster-pattern#req4.1.1#req1.1.5 (parts of, vTPM is another section) workload_identity_config { @@ -142,19 +104,6 @@ resource "google_container_cluster" "gke-cluster" { } addons_config { - # We optionally enable the Istio Add-on. Istio is required if there is - # any pod-to-pod communication. We keep it optional - # to simplify small deployments where there is no pod-to-pod communication - # (for now) - # - # go/gke-cluster-pattern#req5.2.1 for internal traffic, and satified - # only if var.enable_istio = "true". - istio_config { - # Awkward flag to enable istio_config. - disabled = false - auth = "AUTH_MUTUAL_TLS" - } - gce_persistent_disk_csi_driver_config { enabled = true } diff --git a/aiinfra-cluster/modules/gke-cluster/variables.tf b/aiinfra-cluster/modules/gke-cluster/variables.tf index cfa20d2af..388173755 100644 --- a/aiinfra-cluster/modules/gke-cluster/variables.tf +++ b/aiinfra-cluster/modules/gke-cluster/variables.tf @@ -65,35 +65,6 @@ variable "disk_type" { default = "pd-standard" } -variable "ip_cidr_block_17" { - type = string - description = "A /17 CIDR IP range reserved for Metastore infrastructure. GKE pods, services, master subnet ranges are derived from it. If specified, will ignore (master_ipv4/pod/service/subnet)_cidr_block." - default = null -} - -variable "subnet_cidr_block" { - type = string - description = "A CIDR IP block reserved for per cluster subnetwork." - default = "" -} - -variable "master_ipv4_cidr_block" { - type = string - description = "The CIDR block for master node, e.g., 172.16.0.32/28." - default = "" -} - -variable "pod_cidr_block" { - type = string - description = "A CIDR IP block reserved for GKE secondary range for pods." - default = "" -} - -variable "service_cidr_block" { - type = string - description = "A CIDR IP block reserved for GKE secondary range for services." - default = "" -} # GKE Dataplane V2 support. This setting is immutable on clusters. # https://cloud.google.com/kubernetes-engine/docs/concepts/dataplane-v2 diff --git a/aiinfra-cluster/validation.tf b/aiinfra-cluster/validation.tf index a02121297..7e498be45 100644 --- a/aiinfra-cluster/validation.tf +++ b/aiinfra-cluster/validation.tf @@ -22,7 +22,4 @@ locals { validate_custom_node_pool = (length(var.custom_node_pools) > 0 && (var.gke_node_pool_count > 0 || var.gke_node_count_per_node_pool > 0)) ? tobool("Custom node pools are provided. Please do not use gke_node_pool_count and gke_node_count_per_node_pool variables.") : true validate_instance_count = (var.orchestrator_type == "gke" && var.instance_count > 0 ) ? tobool("Please do not use instance_count when orchestrator_type is GKE.") : true validate_basic_node_pool = (var.orchestrator_type == "gke" && ((var.gke_node_pool_count > 0 && var.gke_node_count_per_node_pool == 0) || (var.gke_node_pool_count == 0 && var.gke_node_count_per_node_pool > 0))) ? tobool("Please provide both gke_node_pool_count and gke_node_count_per_node_pool variables for GKE basic node pool.") : true - - validate_gke_ip_cidr_block = (var.orchestrator_type != "gke" && var.gke_ip_cidr_block_17 != null) ? tobool("Orchestrator type is not GKE. Please remove gke_ip_cidr_block_17 variables.") : true - } \ No newline at end of file diff --git a/aiinfra-cluster/variables.tf b/aiinfra-cluster/variables.tf index 2df1ea352..b4d1e7d9d 100644 --- a/aiinfra-cluster/variables.tf +++ b/aiinfra-cluster/variables.tf @@ -193,12 +193,6 @@ variable "gke_node_count_per_node_pool" { default = 0 } -variable "gke_ip_cidr_block_17" { - type = string - description = "A /17 CIDR IP range reserved for Metastore infrastructure. GKE pods, services, master subnet ranges are derived from it." - default = null -} - variable "custom_node_pools" { description = "The list of custom nodepools for the GKE cluster." type = list(object({ diff --git a/scripts/_env_var_util.sh b/scripts/_env_var_util.sh index a2bbdcb54..28f588889 100644 --- a/scripts/_env_var_util.sh +++ b/scripts/_env_var_util.sh @@ -256,7 +256,6 @@ EOF [ -n "${GKE_NODE_POOL_COUNT}" ] && echo "gke_node_pool_count = \"${GKE_NODE_POOL_COUNT}\"" [ -n "${GKE_NODE_COUNT_PER_NODE_POOL}" ] && echo "gke_node_count_per_node_pool = ${GKE_NODE_COUNT_PER_NODE_POOL}" [ -n "${CUSTOM_NODE_POOL}" ] && echo "custom_node_pool = \"${CUSTOM_NODE_POOL}\"" - [ -n "${GKE_IP_CIDR_BLOCK_17}" ] && echo "gke_ip_cidr_block_17 = \"${GKE_IP_CIDR_BLOCK_17}\"" return 0 } diff --git a/test/scripts/_env_var_util.sh b/test/scripts/_env_var_util.sh index c3166a213..1add46913 100644 --- a/test/scripts/_env_var_util.sh +++ b/test/scripts/_env_var_util.sh @@ -35,8 +35,7 @@ _env_var_util::test::unset_env () { ENABLE_NOTEBOOK \ GKE_NODE_POOL_COUNT \ GKE_NODE_COUNT_PER_NODE_POOL \ - CUSTOM_NODE_POOL \ - GKE_IP_CIDR_BLOCK_17 + CUSTOM_NODE_POOL } _env_var_util::test::set_required_env () { @@ -70,7 +69,6 @@ _env_var_util::test::set_optional_env () { GKE_NODE_POOL_COUNT='node' GKE_NODE_COUNT_PER_NODE_POOL=3 CUSTOM_NODE_POOL='custom' - GKE_IP_CIDR_BLOCK_17='10.18.0.0/17' } # Test functions diff --git a/test/scripts/_env_var_util_data/optionals_set.tfvars b/test/scripts/_env_var_util_data/optionals_set.tfvars index e3635eceb..77b11c1b8 100644 --- a/test/scripts/_env_var_util_data/optionals_set.tfvars +++ b/test/scripts/_env_var_util_data/optionals_set.tfvars @@ -37,4 +37,3 @@ enable_notebook = "note" gke_node_pool_count = "node" gke_node_count_per_node_pool = 3 custom_node_pool = "custom" -gke_ip_cidr_block_17 = "10.18.0.0/17"