From fff007887483803101145be79e8f83c6dd288e0e Mon Sep 17 00:00:00 2001 From: Dan Cassidy <5643061+rux616@users.noreply.github.com> Date: Sat, 6 Feb 2021 06:31:04 +1300 Subject: [PATCH] feat: Add support for TPUs on beta clusters (#810) * Add TPU support in template files * Add autogenerated files for TPU support --- autogen/main/cluster.tf.tmpl | 1 + autogen/main/firewall.tf.tmpl | 9 +++++++++ autogen/main/outputs.tf.tmpl | 5 +++++ autogen/main/variables.tf.tmpl | 6 ++++++ modules/beta-private-cluster-update-variant/README.md | 2 ++ modules/beta-private-cluster-update-variant/cluster.tf | 1 + modules/beta-private-cluster-update-variant/firewall.tf | 5 +++-- modules/beta-private-cluster-update-variant/outputs.tf | 5 +++++ modules/beta-private-cluster-update-variant/variables.tf | 6 ++++++ modules/beta-private-cluster/README.md | 2 ++ modules/beta-private-cluster/cluster.tf | 1 + modules/beta-private-cluster/firewall.tf | 5 +++-- modules/beta-private-cluster/outputs.tf | 5 +++++ modules/beta-private-cluster/variables.tf | 6 ++++++ modules/beta-public-cluster-update-variant/README.md | 2 ++ modules/beta-public-cluster-update-variant/cluster.tf | 1 + modules/beta-public-cluster-update-variant/firewall.tf | 5 +++-- modules/beta-public-cluster-update-variant/outputs.tf | 5 +++++ modules/beta-public-cluster-update-variant/variables.tf | 6 ++++++ modules/beta-public-cluster/README.md | 2 ++ modules/beta-public-cluster/cluster.tf | 1 + modules/beta-public-cluster/firewall.tf | 5 +++-- modules/beta-public-cluster/outputs.tf | 5 +++++ modules/beta-public-cluster/variables.tf | 6 ++++++ 24 files changed, 89 insertions(+), 8 deletions(-) diff --git a/autogen/main/cluster.tf.tmpl b/autogen/main/cluster.tf.tmpl index 4861038e2..33e81fed6 100644 --- a/autogen/main/cluster.tf.tmpl +++ b/autogen/main/cluster.tf.tmpl @@ -110,6 +110,7 @@ resource "google_container_cluster" "primary" { {% if beta_cluster %} enable_intranode_visibility = var.enable_intranode_visibility enable_kubernetes_alpha = var.enable_kubernetes_alpha + enable_tpu = var.enable_tpu dynamic "pod_security_policy_config" { for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : [] diff --git a/autogen/main/firewall.tf.tmpl b/autogen/main/firewall.tf.tmpl index f722e8494..79069daa8 100644 --- a/autogen/main/firewall.tf.tmpl +++ b/autogen/main/firewall.tf.tmpl @@ -34,11 +34,20 @@ resource "google_compute_firewall" "intra_egress" { direction = "EGRESS" target_tags = [local.cluster_network_tag] + {% if beta_cluster %} + destination_ranges = compact([ + local.cluster_endpoint_for_nodes, + local.cluster_subnet_cidr, + local.cluster_alias_ranges_cidr[var.ip_range_pods], + google_container_cluster.primary.tpu_ipv4_cidr_block, + ]) + {% else %} destination_ranges = [ local.cluster_endpoint_for_nodes, local.cluster_subnet_cidr, local.cluster_alias_ranges_cidr[var.ip_range_pods], ] + {% endif %} # Allow all possible protocols allow { protocol = "tcp" } diff --git a/autogen/main/outputs.tf.tmpl b/autogen/main/outputs.tf.tmpl index da7ded33b..0459a9218 100644 --- a/autogen/main/outputs.tf.tmpl +++ b/autogen/main/outputs.tf.tmpl @@ -178,4 +178,9 @@ output "vertical_pod_autoscaling_enabled" { description = "Whether veritical pod autoscaling is enabled" value = local.cluster_vertical_pod_autoscaling_enabled } + +output "tpu_ipv4_cidr_block" { + description = "The IP range in CIDR notation used for the TPUs" + value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null +} {% endif %} diff --git a/autogen/main/variables.tf.tmpl b/autogen/main/variables.tf.tmpl index de571cf0b..8b8d19650 100644 --- a/autogen/main/variables.tf.tmpl +++ b/autogen/main/variables.tf.tmpl @@ -600,4 +600,10 @@ variable "notification_config_topic" { description = "The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}." default = "" } + +variable "enable_tpu" { + type = bool + description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!" + default = false +} {% endif %} diff --git a/modules/beta-private-cluster-update-variant/README.md b/modules/beta-private-cluster-update-variant/README.md index c603620d8..d0ff8ff17 100644 --- a/modules/beta-private-cluster-update-variant/README.md +++ b/modules/beta-private-cluster-update-variant/README.md @@ -183,6 +183,7 @@ Then perform the following commands on the root folder: | enable\_private\_nodes | (Beta) Whether nodes have internal IP addresses only | `bool` | `false` | no | | enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no | | enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no | +| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no | | enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no | | firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers | `list(string)` |
[
"8443",
"9443",
"15017"
]
| no | | firewall\_priority | Priority rule for firewall rules | `number` | `1000` | no | @@ -272,6 +273,7 @@ Then perform the following commands on the root folder: | region | Cluster region | | release\_channel | The release channel of this cluster | | service\_account | The service account to default running nodes as if not overridden in `node_pools`. | +| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs | | type | Cluster type (regional / zonal) | | vertical\_pod\_autoscaling\_enabled | Whether veritical pod autoscaling is enabled | | zones | List of zones in which the cluster resides | diff --git a/modules/beta-private-cluster-update-variant/cluster.tf b/modules/beta-private-cluster-update-variant/cluster.tf index a3e8dbc05..067508406 100644 --- a/modules/beta-private-cluster-update-variant/cluster.tf +++ b/modules/beta-private-cluster-update-variant/cluster.tf @@ -96,6 +96,7 @@ resource "google_container_cluster" "primary" { enable_binary_authorization = var.enable_binary_authorization enable_intranode_visibility = var.enable_intranode_visibility enable_kubernetes_alpha = var.enable_kubernetes_alpha + enable_tpu = var.enable_tpu dynamic "pod_security_policy_config" { for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : [] diff --git a/modules/beta-private-cluster-update-variant/firewall.tf b/modules/beta-private-cluster-update-variant/firewall.tf index a3eef6b58..4a3e64d6b 100644 --- a/modules/beta-private-cluster-update-variant/firewall.tf +++ b/modules/beta-private-cluster-update-variant/firewall.tf @@ -34,11 +34,12 @@ resource "google_compute_firewall" "intra_egress" { direction = "EGRESS" target_tags = [local.cluster_network_tag] - destination_ranges = [ + destination_ranges = compact([ local.cluster_endpoint_for_nodes, local.cluster_subnet_cidr, local.cluster_alias_ranges_cidr[var.ip_range_pods], - ] + google_container_cluster.primary.tpu_ipv4_cidr_block, + ]) # Allow all possible protocols allow { protocol = "tcp" } diff --git a/modules/beta-private-cluster-update-variant/outputs.tf b/modules/beta-private-cluster-update-variant/outputs.tf index c707199d6..6c1d8ce9d 100644 --- a/modules/beta-private-cluster-update-variant/outputs.tf +++ b/modules/beta-private-cluster-update-variant/outputs.tf @@ -175,3 +175,8 @@ output "vertical_pod_autoscaling_enabled" { description = "Whether veritical pod autoscaling is enabled" value = local.cluster_vertical_pod_autoscaling_enabled } + +output "tpu_ipv4_cidr_block" { + description = "The IP range in CIDR notation used for the TPUs" + value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null +} diff --git a/modules/beta-private-cluster-update-variant/variables.tf b/modules/beta-private-cluster-update-variant/variables.tf index 6d8d5c3eb..4436119ae 100644 --- a/modules/beta-private-cluster-update-variant/variables.tf +++ b/modules/beta-private-cluster-update-variant/variables.tf @@ -579,3 +579,9 @@ variable "notification_config_topic" { description = "The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}." default = "" } + +variable "enable_tpu" { + type = bool + description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!" + default = false +} diff --git a/modules/beta-private-cluster/README.md b/modules/beta-private-cluster/README.md index 2e28e9d6c..af93e0cde 100644 --- a/modules/beta-private-cluster/README.md +++ b/modules/beta-private-cluster/README.md @@ -161,6 +161,7 @@ Then perform the following commands on the root folder: | enable\_private\_nodes | (Beta) Whether nodes have internal IP addresses only | `bool` | `false` | no | | enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no | | enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no | +| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no | | enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no | | firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers | `list(string)` |
[
"8443",
"9443",
"15017"
]
| no | | firewall\_priority | Priority rule for firewall rules | `number` | `1000` | no | @@ -250,6 +251,7 @@ Then perform the following commands on the root folder: | region | Cluster region | | release\_channel | The release channel of this cluster | | service\_account | The service account to default running nodes as if not overridden in `node_pools`. | +| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs | | type | Cluster type (regional / zonal) | | vertical\_pod\_autoscaling\_enabled | Whether veritical pod autoscaling is enabled | | zones | List of zones in which the cluster resides | diff --git a/modules/beta-private-cluster/cluster.tf b/modules/beta-private-cluster/cluster.tf index 05fad5ddc..9ef4e4f66 100644 --- a/modules/beta-private-cluster/cluster.tf +++ b/modules/beta-private-cluster/cluster.tf @@ -96,6 +96,7 @@ resource "google_container_cluster" "primary" { enable_binary_authorization = var.enable_binary_authorization enable_intranode_visibility = var.enable_intranode_visibility enable_kubernetes_alpha = var.enable_kubernetes_alpha + enable_tpu = var.enable_tpu dynamic "pod_security_policy_config" { for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : [] diff --git a/modules/beta-private-cluster/firewall.tf b/modules/beta-private-cluster/firewall.tf index a3eef6b58..4a3e64d6b 100644 --- a/modules/beta-private-cluster/firewall.tf +++ b/modules/beta-private-cluster/firewall.tf @@ -34,11 +34,12 @@ resource "google_compute_firewall" "intra_egress" { direction = "EGRESS" target_tags = [local.cluster_network_tag] - destination_ranges = [ + destination_ranges = compact([ local.cluster_endpoint_for_nodes, local.cluster_subnet_cidr, local.cluster_alias_ranges_cidr[var.ip_range_pods], - ] + google_container_cluster.primary.tpu_ipv4_cidr_block, + ]) # Allow all possible protocols allow { protocol = "tcp" } diff --git a/modules/beta-private-cluster/outputs.tf b/modules/beta-private-cluster/outputs.tf index c707199d6..6c1d8ce9d 100644 --- a/modules/beta-private-cluster/outputs.tf +++ b/modules/beta-private-cluster/outputs.tf @@ -175,3 +175,8 @@ output "vertical_pod_autoscaling_enabled" { description = "Whether veritical pod autoscaling is enabled" value = local.cluster_vertical_pod_autoscaling_enabled } + +output "tpu_ipv4_cidr_block" { + description = "The IP range in CIDR notation used for the TPUs" + value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null +} diff --git a/modules/beta-private-cluster/variables.tf b/modules/beta-private-cluster/variables.tf index 6d8d5c3eb..4436119ae 100644 --- a/modules/beta-private-cluster/variables.tf +++ b/modules/beta-private-cluster/variables.tf @@ -579,3 +579,9 @@ variable "notification_config_topic" { description = "The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}." default = "" } + +variable "enable_tpu" { + type = bool + description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!" + default = false +} diff --git a/modules/beta-public-cluster-update-variant/README.md b/modules/beta-public-cluster-update-variant/README.md index 5153a5c06..0411453cc 100644 --- a/modules/beta-public-cluster-update-variant/README.md +++ b/modules/beta-public-cluster-update-variant/README.md @@ -174,6 +174,7 @@ Then perform the following commands on the root folder: | enable\_pod\_security\_policy | enabled - Enable the PodSecurityPolicy controller for this cluster. If enabled, pods must be valid under a PodSecurityPolicy to be created. | `bool` | `false` | no | | enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no | | enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no | +| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no | | enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no | | firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers | `list(string)` |
[
"8443",
"9443",
"15017"
]
| no | | firewall\_priority | Priority rule for firewall rules | `number` | `1000` | no | @@ -259,6 +260,7 @@ Then perform the following commands on the root folder: | region | Cluster region | | release\_channel | The release channel of this cluster | | service\_account | The service account to default running nodes as if not overridden in `node_pools`. | +| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs | | type | Cluster type (regional / zonal) | | vertical\_pod\_autoscaling\_enabled | Whether veritical pod autoscaling is enabled | | zones | List of zones in which the cluster resides | diff --git a/modules/beta-public-cluster-update-variant/cluster.tf b/modules/beta-public-cluster-update-variant/cluster.tf index ae9b1d75b..a3dd72a1c 100644 --- a/modules/beta-public-cluster-update-variant/cluster.tf +++ b/modules/beta-public-cluster-update-variant/cluster.tf @@ -96,6 +96,7 @@ resource "google_container_cluster" "primary" { enable_binary_authorization = var.enable_binary_authorization enable_intranode_visibility = var.enable_intranode_visibility enable_kubernetes_alpha = var.enable_kubernetes_alpha + enable_tpu = var.enable_tpu dynamic "pod_security_policy_config" { for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : [] diff --git a/modules/beta-public-cluster-update-variant/firewall.tf b/modules/beta-public-cluster-update-variant/firewall.tf index 0d0fa0718..f53ebf4e1 100644 --- a/modules/beta-public-cluster-update-variant/firewall.tf +++ b/modules/beta-public-cluster-update-variant/firewall.tf @@ -34,11 +34,12 @@ resource "google_compute_firewall" "intra_egress" { direction = "EGRESS" target_tags = [local.cluster_network_tag] - destination_ranges = [ + destination_ranges = compact([ local.cluster_endpoint_for_nodes, local.cluster_subnet_cidr, local.cluster_alias_ranges_cidr[var.ip_range_pods], - ] + google_container_cluster.primary.tpu_ipv4_cidr_block, + ]) # Allow all possible protocols allow { protocol = "tcp" } diff --git a/modules/beta-public-cluster-update-variant/outputs.tf b/modules/beta-public-cluster-update-variant/outputs.tf index feebc9905..e367d56e4 100644 --- a/modules/beta-public-cluster-update-variant/outputs.tf +++ b/modules/beta-public-cluster-update-variant/outputs.tf @@ -165,3 +165,8 @@ output "vertical_pod_autoscaling_enabled" { description = "Whether veritical pod autoscaling is enabled" value = local.cluster_vertical_pod_autoscaling_enabled } + +output "tpu_ipv4_cidr_block" { + description = "The IP range in CIDR notation used for the TPUs" + value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null +} diff --git a/modules/beta-public-cluster-update-variant/variables.tf b/modules/beta-public-cluster-update-variant/variables.tf index 9552c2333..ad3a9643b 100644 --- a/modules/beta-public-cluster-update-variant/variables.tf +++ b/modules/beta-public-cluster-update-variant/variables.tf @@ -548,3 +548,9 @@ variable "notification_config_topic" { description = "The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}." default = "" } + +variable "enable_tpu" { + type = bool + description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!" + default = false +} diff --git a/modules/beta-public-cluster/README.md b/modules/beta-public-cluster/README.md index 58b3dd1fd..ceb1ff74d 100644 --- a/modules/beta-public-cluster/README.md +++ b/modules/beta-public-cluster/README.md @@ -152,6 +152,7 @@ Then perform the following commands on the root folder: | enable\_pod\_security\_policy | enabled - Enable the PodSecurityPolicy controller for this cluster. If enabled, pods must be valid under a PodSecurityPolicy to be created. | `bool` | `false` | no | | enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no | | enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no | +| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no | | enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no | | firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers | `list(string)` |
[
"8443",
"9443",
"15017"
]
| no | | firewall\_priority | Priority rule for firewall rules | `number` | `1000` | no | @@ -237,6 +238,7 @@ Then perform the following commands on the root folder: | region | Cluster region | | release\_channel | The release channel of this cluster | | service\_account | The service account to default running nodes as if not overridden in `node_pools`. | +| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs | | type | Cluster type (regional / zonal) | | vertical\_pod\_autoscaling\_enabled | Whether veritical pod autoscaling is enabled | | zones | List of zones in which the cluster resides | diff --git a/modules/beta-public-cluster/cluster.tf b/modules/beta-public-cluster/cluster.tf index 792f647ad..5c3703153 100644 --- a/modules/beta-public-cluster/cluster.tf +++ b/modules/beta-public-cluster/cluster.tf @@ -96,6 +96,7 @@ resource "google_container_cluster" "primary" { enable_binary_authorization = var.enable_binary_authorization enable_intranode_visibility = var.enable_intranode_visibility enable_kubernetes_alpha = var.enable_kubernetes_alpha + enable_tpu = var.enable_tpu dynamic "pod_security_policy_config" { for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : [] diff --git a/modules/beta-public-cluster/firewall.tf b/modules/beta-public-cluster/firewall.tf index 0d0fa0718..f53ebf4e1 100644 --- a/modules/beta-public-cluster/firewall.tf +++ b/modules/beta-public-cluster/firewall.tf @@ -34,11 +34,12 @@ resource "google_compute_firewall" "intra_egress" { direction = "EGRESS" target_tags = [local.cluster_network_tag] - destination_ranges = [ + destination_ranges = compact([ local.cluster_endpoint_for_nodes, local.cluster_subnet_cidr, local.cluster_alias_ranges_cidr[var.ip_range_pods], - ] + google_container_cluster.primary.tpu_ipv4_cidr_block, + ]) # Allow all possible protocols allow { protocol = "tcp" } diff --git a/modules/beta-public-cluster/outputs.tf b/modules/beta-public-cluster/outputs.tf index feebc9905..e367d56e4 100644 --- a/modules/beta-public-cluster/outputs.tf +++ b/modules/beta-public-cluster/outputs.tf @@ -165,3 +165,8 @@ output "vertical_pod_autoscaling_enabled" { description = "Whether veritical pod autoscaling is enabled" value = local.cluster_vertical_pod_autoscaling_enabled } + +output "tpu_ipv4_cidr_block" { + description = "The IP range in CIDR notation used for the TPUs" + value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null +} diff --git a/modules/beta-public-cluster/variables.tf b/modules/beta-public-cluster/variables.tf index 9552c2333..ad3a9643b 100644 --- a/modules/beta-public-cluster/variables.tf +++ b/modules/beta-public-cluster/variables.tf @@ -548,3 +548,9 @@ variable "notification_config_topic" { description = "The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}." default = "" } + +variable "enable_tpu" { + type = bool + description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!" + default = false +}