From 3a3eb46ed58079ef5258193f0c221f2c97450b67 Mon Sep 17 00:00:00 2001 From: Maggie Zhang Date: Thu, 18 Apr 2024 13:57:36 -0400 Subject: [PATCH] Updated to k8s 1.29 and GPU Operator v23.9.2 --- aks/terraform.tfvars | 6 +++--- aks/variables.tf | 6 +++--- eks/terraform.tfvars | 10 +++++----- eks/variables.tf | 10 +++++----- gke/terraform.tfvars | 10 +++++----- gke/variables.tf | 10 +++++----- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/aks/terraform.tfvars b/aks/terraform.tfvars index 7b9f9f1..6a2132b 100644 --- a/aks/terraform.tfvars +++ b/aks/terraform.tfvars @@ -19,9 +19,9 @@ # gpu_node_pool_max_count = 5 # gpu_node_pool_min_count = 2 # gpu_operator_namespace = "gpu-operator" -# gpu_operator_version = "v23.9.1" +# gpu_operator_version = "v23.9.2" # gpu_os_sku = "Ubuntu" -# kubernetes_version = "1.28" +# kubernetes_version = "1.29" # location = "" # nvaie = false -# nvaie_gpu_operator_version = "v23.9.0" +# nvaie_gpu_operator_version = "v23.9.2" diff --git a/aks/variables.tf b/aks/variables.tf index 85b8b63..7699f67 100644 --- a/aks/variables.tf +++ b/aks/variables.tf @@ -25,7 +25,7 @@ variable "cluster_name" { } variable "kubernetes_version" { - default = "1.28" + default = "1.29" description = "Version of Kubernetes to turn on. Run 'az aks get-versions --location --output table' to view all available versions " } @@ -87,7 +87,7 @@ variable "gpu_os_sku" { GPU Operator Variables ****************************/ variable "gpu_operator_version" { - default = "v23.9.1" + default = "v23.9.2" description = "Version of the GPU operator to be installed" } @@ -105,7 +105,7 @@ variable "nvaie" { variable "nvaie_gpu_operator_version" { type = string - default = "v23.9.0" + default = "v23.9.2" description = "The NVIDIA Driver version of GPU Operator. Overrides `gpu_operator_version` when `nvaie` is set to `true`" } diff --git a/eks/terraform.tfvars b/eks/terraform.tfvars index 9479563..8692d1c 100644 --- a/eks/terraform.tfvars +++ b/eks/terraform.tfvars @@ -10,7 +10,7 @@ # aws_profile = "development" # cidr_block = "10.0.0.0/16" # cluster_name = "" -# cluster_version = "1.28" +# cluster_version = "1.29" # cpu_instance_type = "t2.xlarge" # cpu_node_pool_additional_user_data = "" # cpu_node_pool_delete_on_termination = true @@ -28,16 +28,16 @@ # gpu_node_pool_delete_on_termination = true # gpu_node_pool_root_disk_size_gb = 512 # gpu_node_pool_root_volume_type = "gp2" -# gpu_operator_driver_version = "535.129.03" +# gpu_operator_driver_version = "550.54.15" # gpu_operator_namespace = "gpu-operator" -# gpu_operator_version = "v23.9.1" +# gpu_operator_version = "v23.9.2" # max_cpu_nodes = "2" # max_gpu_nodes = "5" # min_cpu_nodes = "0" # min_gpu_nodes = "2" # nvaie = false -# nvaie_gpu_operator_driver_version = "535.129.03" -# nvaie_gpu_operator_version = "v23.9.0" +# nvaie_gpu_operator_driver_version = "550.54.15" +# nvaie_gpu_operator_version = "v23.9.2" # private_subnets = [ # "10.0.0.0/19", # "10.0.32.0/19", diff --git a/eks/variables.tf b/eks/variables.tf index a0589ef..88ca66f 100644 --- a/eks/variables.tf +++ b/eks/variables.tf @@ -28,20 +28,20 @@ variable "cluster_name" { variable "cluster_version" { type = string - default = "1.28" + default = "1.29" description = "Version of EKS to install on the control plane (Major and Minor version only, do not include the patch)" } /************************ GPU Operator Variables *************************/ variable "gpu_operator_version" { - default = "v23.9.1" + default = "v23.9.2" description = "Version of the GPU Operator to deploy. Defaults to latest available. Not set when `nvaie` is set to `true`" } variable "gpu_operator_driver_version" { type = string - default = "535.129.03" + default = "550.54.15" description = "The NVIDIA Driver version deployed with GPU Operator. Defaults to latest available. Not set when `nvaie` is set to true" } @@ -59,13 +59,13 @@ variable "nvaie" { variable "nvaie_gpu_operator_version" { type = string - default = "v23.9.0" + default = "v23.9.2" description = "The NVIDIA Driver version of GPU Operator. Overrides `gpu_operator_version` when `nvaie` is set to `true`" } variable "nvaie_gpu_operator_driver_version" { type = string - default = "535.129.03" + default = "550.54.15" description = "The NVIDIA AI Enterprise version of the NVIDIA driver to be installed with the GPU operator. Overrides `gpu_operator_driver_version` when `nvaie` is set to `true`" } /***************************** diff --git a/gke/terraform.tfvars b/gke/terraform.tfvars index 5144faa..8d83f3c 100644 --- a/gke/terraform.tfvars +++ b/gke/terraform.tfvars @@ -14,18 +14,18 @@ # gpu_instance_type = "n1-standard-4" # gpu_max_node_count = "5" # gpu_min_node_count = "2" -# gpu_operator_driver_version = "535.129.03" +# gpu_operator_driver_version = "550.54.15" # gpu_operator_namespace = "gpu-operator" -# gpu_operator_version = "v23.9.1" +# gpu_operator_version = "v23.9.2" # gpu_type = "nvidia-tesla-v100" -# min_master_version = "1.28" +# min_master_version = "1.29" # network = "" # node_zones = "" # num_cpu_nodes = 1 # num_gpu_nodes = 2 # nvaie = false -# nvaie_gpu_operator_driver_version = "535.129.03" -# nvaie_gpu_operator_version = "v23.9.0" +# nvaie_gpu_operator_driver_version = "550.54.15" +# nvaie_gpu_operator_version = "v23.9.2" # project_id = "" # region = "" # release_channel = "REGULAR" diff --git a/gke/variables.tf b/gke/variables.tf index 50c61c7..25ca1b4 100644 --- a/gke/variables.tf +++ b/gke/variables.tf @@ -49,7 +49,7 @@ variable "release_channel" { } variable "min_master_version" { - default = "1.28" + default = "1.29" description = "The minimum cluster version of the master." } @@ -133,13 +133,13 @@ variable "disk_size_gb" { GPU Operator Variables ***************************/ variable "gpu_operator_version" { - default = "v23.9.1" + default = "v23.9.2" description = "Version of the GPU Operator to deploy. Defaults to latest available. Not set when `nvaie` is set to `true`" } variable "gpu_operator_driver_version" { type = string - default = "535.129.03" + default = "550.54.15" description = "The NVIDIA Driver version deployed with GPU Operator. Defaults to latest available. Not set when `nvaie` is set to true" } @@ -157,12 +157,12 @@ variable "nvaie" { variable "nvaie_gpu_operator_version" { type = string - default = "v23.9.0" + default = "v23.9.2" description = "The NVIDIA Driver version of GPU Operator. Overrides `gpu_operator_version` when `nvaie` is set to `true`" } variable "nvaie_gpu_operator_driver_version" { type = string - default = "535.129.03" + default = "550.54.15" description = "The NVIDIA AI Enterprise version of the NVIDIA driver to be installed with the GPU operator. Overrides `gpu_operator_driver_version` when `nvaie` is set to `true`" }