Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use GKE version 1.25. Remove PSP and istio from GKE cluster creation. #136

Merged
merged 2 commits into from
Apr 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ The optional parameters are:
- __gke__: A private GKE cluster is created with private node pool following the recommendations from the GKE team.
1. ***GKE_NODE_POOL_COUNT***: The number of homogeneous node pools for GKE cluster. Only applicable when `ORCHESTRATOR_TYPE` is `gke`.
1. ***GKE_NODE_COUNT_PER_NODE_POOL***: The desired node count per node pool for GKE cluster. Only applicable when `ORCHESTRATOR_TYPE` is `gke`.
1. ***GKE_IP_CIDR_BLOCK_17***: A /17 CIDR IP range reserved for Metastore infrastructure. GKE pods, services, master subnet ranges are derived from it. Please make sure that the CIDR block is available otherwise the GKE cluster creation will fail. Only applicable when `ORCHESTRATOR_TYPE` is `gke`. Ex: 10.12.0.0/17
1. ***CUSTOM_NODE_POOL***: The custom node pool description for GKE. The structure of the custom node pool is list of node pool objects. The node pool object is
```
name = string
Expand Down
1 change: 0 additions & 1 deletion aiinfra-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ module "aiinfra-compute" {
module.aiinfra-network
]
enable_gke = var.orchestrator_type == "gke"
gke_ip_cidr_block_17 = var.gke_ip_cidr_block_17
node_pools = length(var.custom_node_pools) != 0 || length(local.basic_node_pools) != 0 ? coalescelist(var.custom_node_pools, local.basic_node_pools) : []
}

Expand Down
1 change: 0 additions & 1 deletion aiinfra-cluster/modules/aiinfra-compute/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ module "aiinfra-gke" {
name = "${local.resource_prefix}-gke"
disk_size_gb = var.disk_size_gb
disk_type = var.disk_type
ip_cidr_block_17 = var.gke_ip_cidr_block_17
network_self_link = var.network_self_link
subnetwork_self_link = var.subnetwork_self_link
node_service_account = lookup(var.service_account, "email", null)
Expand Down
6 changes: 0 additions & 6 deletions aiinfra-cluster/modules/aiinfra-compute/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -284,12 +284,6 @@ variable "enable_gke" {
default = false
}

variable "gke_ip_cidr_block_17" {
type = string
description = "A /17 CIDR IP range reserved for Metastore infrastructure. GKE pods, services, master subnet ranges are derived from it."
default = null
}

variable "node_pools" {
description = "The list of nodepools for the GKE cluster."
type = list(object({
Expand Down
53 changes: 1 addition & 52 deletions aiinfra-cluster/modules/gke-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,6 @@
* limitations under the License.
*/

locals {
# This cuts a /17 ip_cidr_range into the following ranges:
# -/17
# - /18 gke pod range (max 512 nodes with /27 per node)
# - /18
# - /21 enough for 2k nodes
# - /22 gke service range (enough for 1k services)
# - /28 master range
ip_cidr_block = var.ip_cidr_block_17 == null ? "10.${random_integer.cidr_octet.result}.0.0/17" : var.ip_cidr_block_17
cidr_blocks = local.ip_cidr_block == "" ? [] : cidrsubnets(local.ip_cidr_block, 18 - 17, 21 - 17, 22 - 17, 28 - 17)
pod_cidr_block = local.ip_cidr_block == "" ? var.pod_cidr_block : local.cidr_blocks[0]
subnet_cidr_block = local.ip_cidr_block == "" ? var.subnet_cidr_block : local.cidr_blocks[1]
service_cidr_block = local.ip_cidr_block == "" ? var.service_cidr_block : local.cidr_blocks[2]
master_cidr_block = local.ip_cidr_block == "" ? var.master_ipv4_cidr_block : local.cidr_blocks[3]
}

# Definition of the private GKE cluster.
resource "google_container_cluster" "gke-cluster" {
provider = google-beta
Expand All @@ -46,6 +30,7 @@ resource "google_container_cluster" "gke-cluster" {
# documentation for the container_cluster resource.
remove_default_node_pool = true
initial_node_count = 1
min_master_version = "1.25.6-gke.1000"

network = var.network_self_link
subnetwork = var.subnetwork_self_link
Expand All @@ -57,8 +42,6 @@ resource "google_container_cluster" "gke-cluster" {
master_authorized_networks_config {
}

private_ipv6_google_access = "PRIVATE_IPV6_GOOGLE_ACCESS_TO_GOOGLE"

# Security Note: Basic Auth Disabled, no client certificate accepted.
# The only way to manage the master is via OpenID tokens (aka gcloud).
# (requirement H5; go/gke-cluster-pattern#req1.1.7)
Expand All @@ -67,12 +50,6 @@ resource "google_container_cluster" "gke-cluster" {
issue_client_certificate = false
}
}
# Requires a Pod Security Policy
#
# go/gke-cluster-pattern#req3.3.1 (part of)
pod_security_policy_config {
enabled = true
}

# Enable shielded nodes to meet go/gke-cluster-pattern#req1.1.5
enable_shielded_nodes = true
Expand All @@ -95,21 +72,6 @@ resource "google_container_cluster" "gke-cluster" {
provider = "PROVIDER_UNSPECIFIED"
}

private_cluster_config {
enable_private_endpoint = true
enable_private_nodes = true
master_ipv4_cidr_block = local.master_cidr_block
master_global_access_config {
enabled = false
}
}

# Adding this block enables IP aliasing.
ip_allocation_policy {
cluster_ipv4_cidr_block = local.pod_cidr_block == "" ? null : local.pod_cidr_block
services_ipv4_cidr_block = local.service_cidr_block == "" ? null : local.service_cidr_block
}

# This change will also enable the metadata server on nodes.
# go/gke-cluster-pattern#req4.1.1#req1.1.5 (parts of, vTPM is another section)
workload_identity_config {
Expand Down Expand Up @@ -142,19 +104,6 @@ resource "google_container_cluster" "gke-cluster" {
}

addons_config {
# We optionally enable the Istio Add-on. Istio is required if there is
# any pod-to-pod communication. We keep it optional
# to simplify small deployments where there is no pod-to-pod communication
# (for now)
#
# go/gke-cluster-pattern#req5.2.1 for internal traffic, and satified
# only if var.enable_istio = "true".
istio_config {
# Awkward flag to enable istio_config.
disabled = false
auth = "AUTH_MUTUAL_TLS"
}

gce_persistent_disk_csi_driver_config {
enabled = true
}
Expand Down
29 changes: 0 additions & 29 deletions aiinfra-cluster/modules/gke-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -65,35 +65,6 @@ variable "disk_type" {
default = "pd-standard"
}

variable "ip_cidr_block_17" {
type = string
description = "A /17 CIDR IP range reserved for Metastore infrastructure. GKE pods, services, master subnet ranges are derived from it. If specified, will ignore (master_ipv4/pod/service/subnet)_cidr_block."
default = null
}

variable "subnet_cidr_block" {
type = string
description = "A CIDR IP block reserved for per cluster subnetwork."
default = ""
}

variable "master_ipv4_cidr_block" {
type = string
description = "The CIDR block for master node, e.g., 172.16.0.32/28."
default = ""
}

variable "pod_cidr_block" {
type = string
description = "A CIDR IP block reserved for GKE secondary range for pods."
default = ""
}

variable "service_cidr_block" {
type = string
description = "A CIDR IP block reserved for GKE secondary range for services."
default = ""
}

# GKE Dataplane V2 support. This setting is immutable on clusters.
# https://cloud.google.com/kubernetes-engine/docs/concepts/dataplane-v2
Expand Down
3 changes: 0 additions & 3 deletions aiinfra-cluster/validation.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,4 @@ locals {
validate_custom_node_pool = (length(var.custom_node_pools) > 0 && (var.gke_node_pool_count > 0 || var.gke_node_count_per_node_pool > 0)) ? tobool("Custom node pools are provided. Please do not use gke_node_pool_count and gke_node_count_per_node_pool variables.") : true
validate_instance_count = (var.orchestrator_type == "gke" && var.instance_count > 0 ) ? tobool("Please do not use instance_count when orchestrator_type is GKE.") : true
validate_basic_node_pool = (var.orchestrator_type == "gke" && ((var.gke_node_pool_count > 0 && var.gke_node_count_per_node_pool == 0) || (var.gke_node_pool_count == 0 && var.gke_node_count_per_node_pool > 0))) ? tobool("Please provide both gke_node_pool_count and gke_node_count_per_node_pool variables for GKE basic node pool.") : true

validate_gke_ip_cidr_block = (var.orchestrator_type != "gke" && var.gke_ip_cidr_block_17 != null) ? tobool("Orchestrator type is not GKE. Please remove gke_ip_cidr_block_17 variables.") : true

}
6 changes: 0 additions & 6 deletions aiinfra-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,6 @@ variable "gke_node_count_per_node_pool" {
default = 0
}

variable "gke_ip_cidr_block_17" {
type = string
description = "A /17 CIDR IP range reserved for Metastore infrastructure. GKE pods, services, master subnet ranges are derived from it."
default = null
}

variable "custom_node_pools" {
description = "The list of custom nodepools for the GKE cluster."
type = list(object({
Expand Down
1 change: 0 additions & 1 deletion scripts/_env_var_util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,6 @@ EOF
[ -n "${GKE_NODE_POOL_COUNT}" ] && echo "gke_node_pool_count = \"${GKE_NODE_POOL_COUNT}\""
[ -n "${GKE_NODE_COUNT_PER_NODE_POOL}" ] && echo "gke_node_count_per_node_pool = ${GKE_NODE_COUNT_PER_NODE_POOL}"
[ -n "${CUSTOM_NODE_POOL}" ] && echo "custom_node_pool = \"${CUSTOM_NODE_POOL}\""
[ -n "${GKE_IP_CIDR_BLOCK_17}" ] && echo "gke_ip_cidr_block_17 = \"${GKE_IP_CIDR_BLOCK_17}\""

return 0
}
4 changes: 1 addition & 3 deletions test/scripts/_env_var_util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ _env_var_util::test::unset_env () {
ENABLE_NOTEBOOK \
GKE_NODE_POOL_COUNT \
GKE_NODE_COUNT_PER_NODE_POOL \
CUSTOM_NODE_POOL \
GKE_IP_CIDR_BLOCK_17
CUSTOM_NODE_POOL
}

_env_var_util::test::set_required_env () {
Expand Down Expand Up @@ -70,7 +69,6 @@ _env_var_util::test::set_optional_env () {
GKE_NODE_POOL_COUNT='node'
GKE_NODE_COUNT_PER_NODE_POOL=3
CUSTOM_NODE_POOL='custom'
GKE_IP_CIDR_BLOCK_17='10.18.0.0/17'
}

# Test functions
Expand Down
1 change: 0 additions & 1 deletion test/scripts/_env_var_util_data/optionals_set.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,3 @@ enable_notebook = "note"
gke_node_pool_count = "node"
gke_node_count_per_node_pool = 3
custom_node_pool = "custom"
gke_ip_cidr_block_17 = "10.18.0.0/17"