diff --git a/README.md b/README.md index c354cd843..0a55b63fd 100644 --- a/README.md +++ b/README.md @@ -54,24 +54,29 @@ module "gke" { node_pools = [ { - name = "default-node-pool" - machine_type = "e2-medium" - node_locations = "us-central1-b,us-central1-c" - min_count = 1 - max_count = 100 - local_ssd_count = 0 - spot = false - disk_size_gb = 100 - disk_type = "pd-standard" - image_type = "COS_CONTAINERD" - enable_gcfs = false - enable_gvnic = false - logging_variant = "DEFAULT" - auto_repair = true - auto_upgrade = true - service_account = "project-service-account@.iam.gserviceaccount.com" - preemptible = false - initial_node_count = 80 + name = "default-node-pool" + machine_type = "e2-medium" + node_locations = "us-central1-b,us-central1-c" + min_count = 1 + max_count = 100 + local_ssd_count = 0 + spot = false + disk_size_gb = 100 + disk_type = "pd-standard" + image_type = "COS_CONTAINERD" + enable_gcfs = false + enable_gvnic = false + logging_variant = "DEFAULT" + auto_repair = true + auto_upgrade = true + service_account = "project-service-account@.iam.gserviceaccount.com" + preemptible = false + initial_node_count = 80 + accelerator_count = 1 + accelerator_type = "nvidia-l4" + gpu_driver_version = "LATEST" + gpu_sharing_strategy = "TIME_SHARING" + max_shared_clients_per_gpu = 2 }, ] diff --git a/autogen/main/README.md b/autogen/main/README.md index 5801d04b4..5b266c511 100644 --- a/autogen/main/README.md +++ b/autogen/main/README.md @@ -96,27 +96,32 @@ module "gke" { {% if autopilot_cluster != true %} node_pools = [ { - name = "default-node-pool" - machine_type = "e2-medium" - node_locations = "us-central1-b,us-central1-c" - min_count = 1 - max_count = 100 - local_ssd_count = 0 - spot = false + name = "default-node-pool" + machine_type = "e2-medium" + node_locations = "us-central1-b,us-central1-c" + min_count = 1 + max_count = 100 + local_ssd_count = 0 + spot = false {% if beta_cluster %} - local_ssd_ephemeral_count = 0 + local_ssd_ephemeral_count = 0 {% endif %} - disk_size_gb = 100 - disk_type = "pd-standard" - image_type = "COS_CONTAINERD" - enable_gcfs = false - enable_gvnic = false - logging_variant = "DEFAULT" - auto_repair = true - auto_upgrade = true - service_account = "project-service-account@.iam.gserviceaccount.com" - preemptible = false - initial_node_count = 80 + disk_size_gb = 100 + disk_type = "pd-standard" + image_type = "COS_CONTAINERD" + enable_gcfs = false + enable_gvnic = false + logging_variant = "DEFAULT" + auto_repair = true + auto_upgrade = true + service_account = "project-service-account@.iam.gserviceaccount.com" + preemptible = false + initial_node_count = 80 + accelerator_count = 1 + accelerator_type = "nvidia-l4" + gpu_driver_version = "LATEST" + gpu_sharing_strategy = "TIME_SHARING" + max_shared_clients_per_gpu = 2 }, ] diff --git a/autogen/main/cluster.tf.tmpl b/autogen/main/cluster.tf.tmpl index d0c7b3716..e129fb421 100644 --- a/autogen/main/cluster.tf.tmpl +++ b/autogen/main/cluster.tf.tmpl @@ -643,6 +643,8 @@ locals { "accelerator_type", "gpu_partition_size", "gpu_driver_version", + "gpu_sharing_strategy", + "max_shared_clients_per_gpu", "enable_secure_boot", "enable_integrity_monitoring", "local_ssd_count", @@ -927,6 +929,14 @@ resource "google_container_node_pool" "windows_pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } diff --git a/cluster.tf b/cluster.tf index 233c4c475..eca430d3e 100644 --- a/cluster.tf +++ b/cluster.tf @@ -644,6 +644,14 @@ resource "google_container_node_pool" "pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } @@ -882,6 +890,14 @@ resource "google_container_node_pool" "windows_pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } diff --git a/modules/beta-private-cluster-update-variant/README.md b/modules/beta-private-cluster-update-variant/README.md index 883af3419..06c96966b 100644 --- a/modules/beta-private-cluster-update-variant/README.md +++ b/modules/beta-private-cluster-update-variant/README.md @@ -84,25 +84,30 @@ module "gke" { node_pools = [ { - name = "default-node-pool" - machine_type = "e2-medium" - node_locations = "us-central1-b,us-central1-c" - min_count = 1 - max_count = 100 - local_ssd_count = 0 - spot = false - local_ssd_ephemeral_count = 0 - disk_size_gb = 100 - disk_type = "pd-standard" - image_type = "COS_CONTAINERD" - enable_gcfs = false - enable_gvnic = false - logging_variant = "DEFAULT" - auto_repair = true - auto_upgrade = true - service_account = "project-service-account@.iam.gserviceaccount.com" - preemptible = false - initial_node_count = 80 + name = "default-node-pool" + machine_type = "e2-medium" + node_locations = "us-central1-b,us-central1-c" + min_count = 1 + max_count = 100 + local_ssd_count = 0 + spot = false + local_ssd_ephemeral_count = 0 + disk_size_gb = 100 + disk_type = "pd-standard" + image_type = "COS_CONTAINERD" + enable_gcfs = false + enable_gvnic = false + logging_variant = "DEFAULT" + auto_repair = true + auto_upgrade = true + service_account = "project-service-account@.iam.gserviceaccount.com" + preemptible = false + initial_node_count = 80 + accelerator_count = 1 + accelerator_type = "nvidia-l4" + gpu_driver_version = "LATEST" + gpu_sharing_strategy = "TIME_SHARING" + max_shared_clients_per_gpu = 2 }, ] diff --git a/modules/beta-private-cluster-update-variant/cluster.tf b/modules/beta-private-cluster-update-variant/cluster.tf index f39cc552e..f5f567154 100644 --- a/modules/beta-private-cluster-update-variant/cluster.tf +++ b/modules/beta-private-cluster-update-variant/cluster.tf @@ -552,6 +552,8 @@ locals { "accelerator_type", "gpu_partition_size", "gpu_driver_version", + "gpu_sharing_strategy", + "max_shared_clients_per_gpu", "enable_secure_boot", "enable_integrity_monitoring", "local_ssd_count", @@ -811,6 +813,14 @@ resource "google_container_node_pool" "pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } @@ -1075,6 +1085,14 @@ resource "google_container_node_pool" "windows_pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } diff --git a/modules/beta-private-cluster/README.md b/modules/beta-private-cluster/README.md index 0f06cabef..59f885d67 100644 --- a/modules/beta-private-cluster/README.md +++ b/modules/beta-private-cluster/README.md @@ -62,25 +62,30 @@ module "gke" { node_pools = [ { - name = "default-node-pool" - machine_type = "e2-medium" - node_locations = "us-central1-b,us-central1-c" - min_count = 1 - max_count = 100 - local_ssd_count = 0 - spot = false - local_ssd_ephemeral_count = 0 - disk_size_gb = 100 - disk_type = "pd-standard" - image_type = "COS_CONTAINERD" - enable_gcfs = false - enable_gvnic = false - logging_variant = "DEFAULT" - auto_repair = true - auto_upgrade = true - service_account = "project-service-account@.iam.gserviceaccount.com" - preemptible = false - initial_node_count = 80 + name = "default-node-pool" + machine_type = "e2-medium" + node_locations = "us-central1-b,us-central1-c" + min_count = 1 + max_count = 100 + local_ssd_count = 0 + spot = false + local_ssd_ephemeral_count = 0 + disk_size_gb = 100 + disk_type = "pd-standard" + image_type = "COS_CONTAINERD" + enable_gcfs = false + enable_gvnic = false + logging_variant = "DEFAULT" + auto_repair = true + auto_upgrade = true + service_account = "project-service-account@.iam.gserviceaccount.com" + preemptible = false + initial_node_count = 80 + accelerator_count = 1 + accelerator_type = "nvidia-l4" + gpu_driver_version = "LATEST" + gpu_sharing_strategy = "TIME_SHARING" + max_shared_clients_per_gpu = 2 }, ] diff --git a/modules/beta-private-cluster/cluster.tf b/modules/beta-private-cluster/cluster.tf index cce610188..3e0125b9b 100644 --- a/modules/beta-private-cluster/cluster.tf +++ b/modules/beta-private-cluster/cluster.tf @@ -735,6 +735,14 @@ resource "google_container_node_pool" "pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } @@ -998,6 +1006,14 @@ resource "google_container_node_pool" "windows_pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } diff --git a/modules/beta-public-cluster-update-variant/README.md b/modules/beta-public-cluster-update-variant/README.md index 12dacc212..c2d1e840f 100644 --- a/modules/beta-public-cluster-update-variant/README.md +++ b/modules/beta-public-cluster-update-variant/README.md @@ -78,25 +78,30 @@ module "gke" { node_pools = [ { - name = "default-node-pool" - machine_type = "e2-medium" - node_locations = "us-central1-b,us-central1-c" - min_count = 1 - max_count = 100 - local_ssd_count = 0 - spot = false - local_ssd_ephemeral_count = 0 - disk_size_gb = 100 - disk_type = "pd-standard" - image_type = "COS_CONTAINERD" - enable_gcfs = false - enable_gvnic = false - logging_variant = "DEFAULT" - auto_repair = true - auto_upgrade = true - service_account = "project-service-account@.iam.gserviceaccount.com" - preemptible = false - initial_node_count = 80 + name = "default-node-pool" + machine_type = "e2-medium" + node_locations = "us-central1-b,us-central1-c" + min_count = 1 + max_count = 100 + local_ssd_count = 0 + spot = false + local_ssd_ephemeral_count = 0 + disk_size_gb = 100 + disk_type = "pd-standard" + image_type = "COS_CONTAINERD" + enable_gcfs = false + enable_gvnic = false + logging_variant = "DEFAULT" + auto_repair = true + auto_upgrade = true + service_account = "project-service-account@.iam.gserviceaccount.com" + preemptible = false + initial_node_count = 80 + accelerator_count = 1 + accelerator_type = "nvidia-l4" + gpu_driver_version = "LATEST" + gpu_sharing_strategy = "TIME_SHARING" + max_shared_clients_per_gpu = 2 }, ] diff --git a/modules/beta-public-cluster-update-variant/cluster.tf b/modules/beta-public-cluster-update-variant/cluster.tf index edfd3e161..6041729f6 100644 --- a/modules/beta-public-cluster-update-variant/cluster.tf +++ b/modules/beta-public-cluster-update-variant/cluster.tf @@ -533,6 +533,8 @@ locals { "accelerator_type", "gpu_partition_size", "gpu_driver_version", + "gpu_sharing_strategy", + "max_shared_clients_per_gpu", "enable_secure_boot", "enable_integrity_monitoring", "local_ssd_count", @@ -792,6 +794,14 @@ resource "google_container_node_pool" "pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } @@ -1056,6 +1066,14 @@ resource "google_container_node_pool" "windows_pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } diff --git a/modules/beta-public-cluster/README.md b/modules/beta-public-cluster/README.md index 5a5ce13de..bd00f7f7a 100644 --- a/modules/beta-public-cluster/README.md +++ b/modules/beta-public-cluster/README.md @@ -56,25 +56,30 @@ module "gke" { node_pools = [ { - name = "default-node-pool" - machine_type = "e2-medium" - node_locations = "us-central1-b,us-central1-c" - min_count = 1 - max_count = 100 - local_ssd_count = 0 - spot = false - local_ssd_ephemeral_count = 0 - disk_size_gb = 100 - disk_type = "pd-standard" - image_type = "COS_CONTAINERD" - enable_gcfs = false - enable_gvnic = false - logging_variant = "DEFAULT" - auto_repair = true - auto_upgrade = true - service_account = "project-service-account@.iam.gserviceaccount.com" - preemptible = false - initial_node_count = 80 + name = "default-node-pool" + machine_type = "e2-medium" + node_locations = "us-central1-b,us-central1-c" + min_count = 1 + max_count = 100 + local_ssd_count = 0 + spot = false + local_ssd_ephemeral_count = 0 + disk_size_gb = 100 + disk_type = "pd-standard" + image_type = "COS_CONTAINERD" + enable_gcfs = false + enable_gvnic = false + logging_variant = "DEFAULT" + auto_repair = true + auto_upgrade = true + service_account = "project-service-account@.iam.gserviceaccount.com" + preemptible = false + initial_node_count = 80 + accelerator_count = 1 + accelerator_type = "nvidia-l4" + gpu_driver_version = "LATEST" + gpu_sharing_strategy = "TIME_SHARING" + max_shared_clients_per_gpu = 2 }, ] diff --git a/modules/beta-public-cluster/cluster.tf b/modules/beta-public-cluster/cluster.tf index dfecb9e86..065e41f74 100644 --- a/modules/beta-public-cluster/cluster.tf +++ b/modules/beta-public-cluster/cluster.tf @@ -716,6 +716,14 @@ resource "google_container_node_pool" "pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } @@ -979,6 +987,14 @@ resource "google_container_node_pool" "windows_pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } diff --git a/modules/private-cluster-update-variant/README.md b/modules/private-cluster-update-variant/README.md index b20d8da1a..4d5eff593 100644 --- a/modules/private-cluster-update-variant/README.md +++ b/modules/private-cluster-update-variant/README.md @@ -82,24 +82,29 @@ module "gke" { node_pools = [ { - name = "default-node-pool" - machine_type = "e2-medium" - node_locations = "us-central1-b,us-central1-c" - min_count = 1 - max_count = 100 - local_ssd_count = 0 - spot = false - disk_size_gb = 100 - disk_type = "pd-standard" - image_type = "COS_CONTAINERD" - enable_gcfs = false - enable_gvnic = false - logging_variant = "DEFAULT" - auto_repair = true - auto_upgrade = true - service_account = "project-service-account@.iam.gserviceaccount.com" - preemptible = false - initial_node_count = 80 + name = "default-node-pool" + machine_type = "e2-medium" + node_locations = "us-central1-b,us-central1-c" + min_count = 1 + max_count = 100 + local_ssd_count = 0 + spot = false + disk_size_gb = 100 + disk_type = "pd-standard" + image_type = "COS_CONTAINERD" + enable_gcfs = false + enable_gvnic = false + logging_variant = "DEFAULT" + auto_repair = true + auto_upgrade = true + service_account = "project-service-account@.iam.gserviceaccount.com" + preemptible = false + initial_node_count = 80 + accelerator_count = 1 + accelerator_type = "nvidia-l4" + gpu_driver_version = "LATEST" + gpu_sharing_strategy = "TIME_SHARING" + max_shared_clients_per_gpu = 2 }, ] diff --git a/modules/private-cluster-update-variant/cluster.tf b/modules/private-cluster-update-variant/cluster.tf index 29d37b663..115b2f3ce 100644 --- a/modules/private-cluster-update-variant/cluster.tf +++ b/modules/private-cluster-update-variant/cluster.tf @@ -486,6 +486,8 @@ locals { "accelerator_type", "gpu_partition_size", "gpu_driver_version", + "gpu_sharing_strategy", + "max_shared_clients_per_gpu", "enable_secure_boot", "enable_integrity_monitoring", "local_ssd_count", @@ -739,6 +741,14 @@ resource "google_container_node_pool" "pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } @@ -978,6 +988,14 @@ resource "google_container_node_pool" "windows_pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } diff --git a/modules/private-cluster/README.md b/modules/private-cluster/README.md index 9d77ba0c2..9794b316d 100644 --- a/modules/private-cluster/README.md +++ b/modules/private-cluster/README.md @@ -60,24 +60,29 @@ module "gke" { node_pools = [ { - name = "default-node-pool" - machine_type = "e2-medium" - node_locations = "us-central1-b,us-central1-c" - min_count = 1 - max_count = 100 - local_ssd_count = 0 - spot = false - disk_size_gb = 100 - disk_type = "pd-standard" - image_type = "COS_CONTAINERD" - enable_gcfs = false - enable_gvnic = false - logging_variant = "DEFAULT" - auto_repair = true - auto_upgrade = true - service_account = "project-service-account@.iam.gserviceaccount.com" - preemptible = false - initial_node_count = 80 + name = "default-node-pool" + machine_type = "e2-medium" + node_locations = "us-central1-b,us-central1-c" + min_count = 1 + max_count = 100 + local_ssd_count = 0 + spot = false + disk_size_gb = 100 + disk_type = "pd-standard" + image_type = "COS_CONTAINERD" + enable_gcfs = false + enable_gvnic = false + logging_variant = "DEFAULT" + auto_repair = true + auto_upgrade = true + service_account = "project-service-account@.iam.gserviceaccount.com" + preemptible = false + initial_node_count = 80 + accelerator_count = 1 + accelerator_type = "nvidia-l4" + gpu_driver_version = "LATEST" + gpu_sharing_strategy = "TIME_SHARING" + max_shared_clients_per_gpu = 2 }, ] diff --git a/modules/private-cluster/cluster.tf b/modules/private-cluster/cluster.tf index 14da21858..0d5780b77 100644 --- a/modules/private-cluster/cluster.tf +++ b/modules/private-cluster/cluster.tf @@ -663,6 +663,14 @@ resource "google_container_node_pool" "pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } } @@ -901,6 +909,14 @@ resource "google_container_node_pool" "windows_pools" { gpu_driver_version = lookup(each.value, "gpu_driver_version", "") } } + + dynamic "gpu_sharing_config" { + for_each = lookup(each.value, "gpu_sharing_strategy", "") != "" ? [1] : [] + content { + gpu_sharing_strategy = lookup(each.value, "gpu_sharing_strategy", "") + max_shared_clients_per_gpu = lookup(each.value, "max_shared_clients_per_gpu", 2) + } + } } }