Skip to content

Commit

Permalink
Updated to k8s 1.29 and GPU Operator v23.9.2
Browse files Browse the repository at this point in the history
  • Loading branch information
MaggieXJZhang committed Apr 18, 2024
1 parent c1b585c commit 3a3eb46
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 26 deletions.
6 changes: 3 additions & 3 deletions aks/terraform.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
# gpu_node_pool_max_count = 5
# gpu_node_pool_min_count = 2
# gpu_operator_namespace = "gpu-operator"
# gpu_operator_version = "v23.9.1"
# gpu_operator_version = "v23.9.2"
# gpu_os_sku = "Ubuntu"
# kubernetes_version = "1.28"
# kubernetes_version = "1.29"
# location = ""
# nvaie = false
# nvaie_gpu_operator_version = "v23.9.0"
# nvaie_gpu_operator_version = "v23.9.2"
6 changes: 3 additions & 3 deletions aks/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ variable "cluster_name" {
}

variable "kubernetes_version" {
default = "1.28"
default = "1.29"
description = "Version of Kubernetes to turn on. Run 'az aks get-versions --location <location> --output table' to view all available versions "
}

Expand Down Expand Up @@ -87,7 +87,7 @@ variable "gpu_os_sku" {
GPU Operator Variables
****************************/
variable "gpu_operator_version" {
default = "v23.9.1"
default = "v23.9.2"
description = "Version of the GPU operator to be installed"
}

Expand All @@ -105,7 +105,7 @@ variable "nvaie" {

variable "nvaie_gpu_operator_version" {
type = string
default = "v23.9.0"
default = "v23.9.2"
description = "The NVIDIA Driver version of GPU Operator. Overrides `gpu_operator_version` when `nvaie` is set to `true`"
}

Expand Down
10 changes: 5 additions & 5 deletions eks/terraform.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# aws_profile = "development"
# cidr_block = "10.0.0.0/16"
# cluster_name = ""
# cluster_version = "1.28"
# cluster_version = "1.29"
# cpu_instance_type = "t2.xlarge"
# cpu_node_pool_additional_user_data = ""
# cpu_node_pool_delete_on_termination = true
Expand All @@ -28,16 +28,16 @@
# gpu_node_pool_delete_on_termination = true
# gpu_node_pool_root_disk_size_gb = 512
# gpu_node_pool_root_volume_type = "gp2"
# gpu_operator_driver_version = "535.129.03"
# gpu_operator_driver_version = "550.54.15"
# gpu_operator_namespace = "gpu-operator"
# gpu_operator_version = "v23.9.1"
# gpu_operator_version = "v23.9.2"
# max_cpu_nodes = "2"
# max_gpu_nodes = "5"
# min_cpu_nodes = "0"
# min_gpu_nodes = "2"
# nvaie = false
# nvaie_gpu_operator_driver_version = "535.129.03"
# nvaie_gpu_operator_version = "v23.9.0"
# nvaie_gpu_operator_driver_version = "550.54.15"
# nvaie_gpu_operator_version = "v23.9.2"
# private_subnets = [
# "10.0.0.0/19",
# "10.0.32.0/19",
Expand Down
10 changes: 5 additions & 5 deletions eks/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,20 @@ variable "cluster_name" {

variable "cluster_version" {
type = string
default = "1.28"
default = "1.29"
description = "Version of EKS to install on the control plane (Major and Minor version only, do not include the patch)"
}
/************************
GPU Operator Variables
*************************/
variable "gpu_operator_version" {
default = "v23.9.1"
default = "v23.9.2"
description = "Version of the GPU Operator to deploy. Defaults to latest available. Not set when `nvaie` is set to `true`"
}

variable "gpu_operator_driver_version" {
type = string
default = "535.129.03"
default = "550.54.15"
description = "The NVIDIA Driver version deployed with GPU Operator. Defaults to latest available. Not set when `nvaie` is set to true"
}

Expand All @@ -59,13 +59,13 @@ variable "nvaie" {

variable "nvaie_gpu_operator_version" {
type = string
default = "v23.9.0"
default = "v23.9.2"
description = "The NVIDIA Driver version of GPU Operator. Overrides `gpu_operator_version` when `nvaie` is set to `true`"
}

variable "nvaie_gpu_operator_driver_version" {
type = string
default = "535.129.03"
default = "550.54.15"
description = "The NVIDIA AI Enterprise version of the NVIDIA driver to be installed with the GPU operator. Overrides `gpu_operator_driver_version` when `nvaie` is set to `true`"
}
/*****************************
Expand Down
10 changes: 5 additions & 5 deletions gke/terraform.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@
# gpu_instance_type = "n1-standard-4"
# gpu_max_node_count = "5"
# gpu_min_node_count = "2"
# gpu_operator_driver_version = "535.129.03"
# gpu_operator_driver_version = "550.54.15"
# gpu_operator_namespace = "gpu-operator"
# gpu_operator_version = "v23.9.1"
# gpu_operator_version = "v23.9.2"
# gpu_type = "nvidia-tesla-v100"
# min_master_version = "1.28"
# min_master_version = "1.29"
# network = ""
# node_zones = ""
# num_cpu_nodes = 1
# num_gpu_nodes = 2
# nvaie = false
# nvaie_gpu_operator_driver_version = "535.129.03"
# nvaie_gpu_operator_version = "v23.9.0"
# nvaie_gpu_operator_driver_version = "550.54.15"
# nvaie_gpu_operator_version = "v23.9.2"
# project_id = ""
# region = ""
# release_channel = "REGULAR"
Expand Down
10 changes: 5 additions & 5 deletions gke/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ variable "release_channel" {
}

variable "min_master_version" {
default = "1.28"
default = "1.29"
description = "The minimum cluster version of the master."
}

Expand Down Expand Up @@ -133,13 +133,13 @@ variable "disk_size_gb" {
GPU Operator Variables
***************************/
variable "gpu_operator_version" {
default = "v23.9.1"
default = "v23.9.2"
description = "Version of the GPU Operator to deploy. Defaults to latest available. Not set when `nvaie` is set to `true`"
}

variable "gpu_operator_driver_version" {
type = string
default = "535.129.03"
default = "550.54.15"
description = "The NVIDIA Driver version deployed with GPU Operator. Defaults to latest available. Not set when `nvaie` is set to true"
}

Expand All @@ -157,12 +157,12 @@ variable "nvaie" {

variable "nvaie_gpu_operator_version" {
type = string
default = "v23.9.0"
default = "v23.9.2"
description = "The NVIDIA Driver version of GPU Operator. Overrides `gpu_operator_version` when `nvaie` is set to `true`"
}

variable "nvaie_gpu_operator_driver_version" {
type = string
default = "535.129.03"
default = "550.54.15"
description = "The NVIDIA AI Enterprise version of the NVIDIA driver to be installed with the GPU operator. Overrides `gpu_operator_driver_version` when `nvaie` is set to `true`"
}

0 comments on commit 3a3eb46

Please sign in to comment.