Skip to content

Commit

Permalink
feat: Add support for TPUs on beta clusters (#810)
Browse files Browse the repository at this point in the history
* Add TPU support in template files

* Add autogenerated files for TPU support
  • Loading branch information
rux616 committed Feb 5, 2021
1 parent bf4d0da commit fff0078
Show file tree
Hide file tree
Showing 24 changed files with 89 additions and 8 deletions.
1 change: 1 addition & 0 deletions autogen/main/cluster.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ resource "google_container_cluster" "primary" {
{% if beta_cluster %}
enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
9 changes: 9 additions & 0 deletions autogen/main/firewall.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,20 @@ resource "google_compute_firewall" "intra_egress" {
direction = "EGRESS"

target_tags = [local.cluster_network_tag]
{% if beta_cluster %}
destination_ranges = compact([
local.cluster_endpoint_for_nodes,
local.cluster_subnet_cidr,
local.cluster_alias_ranges_cidr[var.ip_range_pods],
google_container_cluster.primary.tpu_ipv4_cidr_block,
])
{% else %}
destination_ranges = [
local.cluster_endpoint_for_nodes,
local.cluster_subnet_cidr,
local.cluster_alias_ranges_cidr[var.ip_range_pods],
]
{% endif %}

# Allow all possible protocols
allow { protocol = "tcp" }
Expand Down
5 changes: 5 additions & 0 deletions autogen/main/outputs.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,9 @@ output "vertical_pod_autoscaling_enabled" {
description = "Whether veritical pod autoscaling is enabled"
value = local.cluster_vertical_pod_autoscaling_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
{% endif %}
6 changes: 6 additions & 0 deletions autogen/main/variables.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -600,4 +600,10 @@ variable "notification_config_topic" {
description = "The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}."
default = ""
}

variable "enable_tpu" {
type = bool
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}
{% endif %}
2 changes: 2 additions & 0 deletions modules/beta-private-cluster-update-variant/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ Then perform the following commands on the root folder:
| enable\_private\_nodes | (Beta) Whether nodes have internal IP addresses only | `bool` | `false` | no |
| enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no |
| enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no |
| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no |
| enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no |
| firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers | `list(string)` | <pre>[<br> "8443",<br> "9443",<br> "15017"<br>]</pre> | no |
| firewall\_priority | Priority rule for firewall rules | `number` | `1000` | no |
Expand Down Expand Up @@ -272,6 +273,7 @@ Then perform the following commands on the root folder:
| region | Cluster region |
| release\_channel | The release channel of this cluster |
| service\_account | The service account to default running nodes as if not overridden in `node_pools`. |
| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs |
| type | Cluster type (regional / zonal) |
| vertical\_pod\_autoscaling\_enabled | Whether veritical pod autoscaling is enabled |
| zones | List of zones in which the cluster resides |
Expand Down
1 change: 1 addition & 0 deletions modules/beta-private-cluster-update-variant/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ resource "google_container_cluster" "primary" {
enable_binary_authorization = var.enable_binary_authorization
enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
5 changes: 3 additions & 2 deletions modules/beta-private-cluster-update-variant/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ resource "google_compute_firewall" "intra_egress" {
direction = "EGRESS"

target_tags = [local.cluster_network_tag]
destination_ranges = [
destination_ranges = compact([
local.cluster_endpoint_for_nodes,
local.cluster_subnet_cidr,
local.cluster_alias_ranges_cidr[var.ip_range_pods],
]
google_container_cluster.primary.tpu_ipv4_cidr_block,
])

# Allow all possible protocols
allow { protocol = "tcp" }
Expand Down
5 changes: 5 additions & 0 deletions modules/beta-private-cluster-update-variant/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,8 @@ output "vertical_pod_autoscaling_enabled" {
description = "Whether veritical pod autoscaling is enabled"
value = local.cluster_vertical_pod_autoscaling_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
6 changes: 6 additions & 0 deletions modules/beta-private-cluster-update-variant/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -579,3 +579,9 @@ variable "notification_config_topic" {
description = "The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}."
default = ""
}

variable "enable_tpu" {
type = bool
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}
2 changes: 2 additions & 0 deletions modules/beta-private-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ Then perform the following commands on the root folder:
| enable\_private\_nodes | (Beta) Whether nodes have internal IP addresses only | `bool` | `false` | no |
| enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no |
| enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no |
| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no |
| enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no |
| firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers | `list(string)` | <pre>[<br> "8443",<br> "9443",<br> "15017"<br>]</pre> | no |
| firewall\_priority | Priority rule for firewall rules | `number` | `1000` | no |
Expand Down Expand Up @@ -250,6 +251,7 @@ Then perform the following commands on the root folder:
| region | Cluster region |
| release\_channel | The release channel of this cluster |
| service\_account | The service account to default running nodes as if not overridden in `node_pools`. |
| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs |
| type | Cluster type (regional / zonal) |
| vertical\_pod\_autoscaling\_enabled | Whether veritical pod autoscaling is enabled |
| zones | List of zones in which the cluster resides |
Expand Down
1 change: 1 addition & 0 deletions modules/beta-private-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ resource "google_container_cluster" "primary" {
enable_binary_authorization = var.enable_binary_authorization
enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
5 changes: 3 additions & 2 deletions modules/beta-private-cluster/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ resource "google_compute_firewall" "intra_egress" {
direction = "EGRESS"

target_tags = [local.cluster_network_tag]
destination_ranges = [
destination_ranges = compact([
local.cluster_endpoint_for_nodes,
local.cluster_subnet_cidr,
local.cluster_alias_ranges_cidr[var.ip_range_pods],
]
google_container_cluster.primary.tpu_ipv4_cidr_block,
])

# Allow all possible protocols
allow { protocol = "tcp" }
Expand Down
5 changes: 5 additions & 0 deletions modules/beta-private-cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,8 @@ output "vertical_pod_autoscaling_enabled" {
description = "Whether veritical pod autoscaling is enabled"
value = local.cluster_vertical_pod_autoscaling_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
6 changes: 6 additions & 0 deletions modules/beta-private-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -579,3 +579,9 @@ variable "notification_config_topic" {
description = "The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}."
default = ""
}

variable "enable_tpu" {
type = bool
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}
2 changes: 2 additions & 0 deletions modules/beta-public-cluster-update-variant/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ Then perform the following commands on the root folder:
| enable\_pod\_security\_policy | enabled - Enable the PodSecurityPolicy controller for this cluster. If enabled, pods must be valid under a PodSecurityPolicy to be created. | `bool` | `false` | no |
| enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no |
| enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no |
| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no |
| enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no |
| firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers | `list(string)` | <pre>[<br> "8443",<br> "9443",<br> "15017"<br>]</pre> | no |
| firewall\_priority | Priority rule for firewall rules | `number` | `1000` | no |
Expand Down Expand Up @@ -259,6 +260,7 @@ Then perform the following commands on the root folder:
| region | Cluster region |
| release\_channel | The release channel of this cluster |
| service\_account | The service account to default running nodes as if not overridden in `node_pools`. |
| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs |
| type | Cluster type (regional / zonal) |
| vertical\_pod\_autoscaling\_enabled | Whether veritical pod autoscaling is enabled |
| zones | List of zones in which the cluster resides |
Expand Down
1 change: 1 addition & 0 deletions modules/beta-public-cluster-update-variant/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ resource "google_container_cluster" "primary" {
enable_binary_authorization = var.enable_binary_authorization
enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
5 changes: 3 additions & 2 deletions modules/beta-public-cluster-update-variant/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ resource "google_compute_firewall" "intra_egress" {
direction = "EGRESS"

target_tags = [local.cluster_network_tag]
destination_ranges = [
destination_ranges = compact([
local.cluster_endpoint_for_nodes,
local.cluster_subnet_cidr,
local.cluster_alias_ranges_cidr[var.ip_range_pods],
]
google_container_cluster.primary.tpu_ipv4_cidr_block,
])

# Allow all possible protocols
allow { protocol = "tcp" }
Expand Down
5 changes: 5 additions & 0 deletions modules/beta-public-cluster-update-variant/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,8 @@ output "vertical_pod_autoscaling_enabled" {
description = "Whether veritical pod autoscaling is enabled"
value = local.cluster_vertical_pod_autoscaling_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
6 changes: 6 additions & 0 deletions modules/beta-public-cluster-update-variant/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -548,3 +548,9 @@ variable "notification_config_topic" {
description = "The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}."
default = ""
}

variable "enable_tpu" {
type = bool
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}
2 changes: 2 additions & 0 deletions modules/beta-public-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ Then perform the following commands on the root folder:
| enable\_pod\_security\_policy | enabled - Enable the PodSecurityPolicy controller for this cluster. If enabled, pods must be valid under a PodSecurityPolicy to be created. | `bool` | `false` | no |
| enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no |
| enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no |
| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no |
| enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no |
| firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers | `list(string)` | <pre>[<br> "8443",<br> "9443",<br> "15017"<br>]</pre> | no |
| firewall\_priority | Priority rule for firewall rules | `number` | `1000` | no |
Expand Down Expand Up @@ -237,6 +238,7 @@ Then perform the following commands on the root folder:
| region | Cluster region |
| release\_channel | The release channel of this cluster |
| service\_account | The service account to default running nodes as if not overridden in `node_pools`. |
| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs |
| type | Cluster type (regional / zonal) |
| vertical\_pod\_autoscaling\_enabled | Whether veritical pod autoscaling is enabled |
| zones | List of zones in which the cluster resides |
Expand Down
1 change: 1 addition & 0 deletions modules/beta-public-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ resource "google_container_cluster" "primary" {
enable_binary_authorization = var.enable_binary_authorization
enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
5 changes: 3 additions & 2 deletions modules/beta-public-cluster/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ resource "google_compute_firewall" "intra_egress" {
direction = "EGRESS"

target_tags = [local.cluster_network_tag]
destination_ranges = [
destination_ranges = compact([
local.cluster_endpoint_for_nodes,
local.cluster_subnet_cidr,
local.cluster_alias_ranges_cidr[var.ip_range_pods],
]
google_container_cluster.primary.tpu_ipv4_cidr_block,
])

# Allow all possible protocols
allow { protocol = "tcp" }
Expand Down
5 changes: 5 additions & 0 deletions modules/beta-public-cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,8 @@ output "vertical_pod_autoscaling_enabled" {
description = "Whether veritical pod autoscaling is enabled"
value = local.cluster_vertical_pod_autoscaling_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
6 changes: 6 additions & 0 deletions modules/beta-public-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -548,3 +548,9 @@ variable "notification_config_topic" {
description = "The desired Pub/Sub topic to which notifications will be sent by GKE. Format is projects/{project}/topics/{topic}."
default = ""
}

variable "enable_tpu" {
type = bool
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

0 comments on commit fff0078

Please sign in to comment.