Skip to content

Commit

Permalink
Enable Ray Autoscaler for the Rag example application (#722)
Browse files Browse the repository at this point in the history
* Enable Ray Autoscaler for the Rag example application

* Update the ray application template
  • Loading branch information
gongmax committed Jul 10, 2024
1 parent f2883eb commit 2bfbcd7
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
2 changes: 1 addition & 1 deletion applications/rag/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ variable "gpu_pools" {
name = "gpu-pool-l4"
machine_type = "g2-standard-24"
autoscaling = true
min_count = 1
min_count = 0
max_count = 3
disk_size_gb = 200
disk_type = "pd-balanced"
Expand Down
2 changes: 1 addition & 1 deletion applications/ray/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ variable "gpu_pools" {
name = "gpu-pool-l4"
machine_type = "g2-standard-24"
autoscaling = true
min_count = 1
min_count = 0
max_count = 3
disk_size_gb = 100
disk_type = "pd-balanced"
Expand Down
14 changes: 8 additions & 6 deletions modules/kuberay-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ head:
# If enableInTreeAutoscaling is true, the autoscaler sidecar will be added to the Ray head pod.
# Ray autoscaler integration is supported only for Ray versions >= 1.11.0
# Ray autoscaler integration is Beta with KubeRay >= 0.3.0 and Ray >= 2.0.0.
# enableInTreeAutoscaling: true
enableInTreeAutoscaling: true
# autoscalerOptions is an OPTIONAL field specifying configuration overrides for the Ray autoscaler.
# The example configuration shown below below represents the DEFAULT values.
# autoscalerOptions:
Expand Down Expand Up @@ -95,17 +95,17 @@ head:
# Ray recommends at least 8G memory for production workloads.
memory: "8G"
# Sum of ephemeral storage requests must be max 10Gi on Autopilot default class.
# This includes, ray-head, gcsfuse-sidecar, and fluent-bit.
ephemeral-storage: 4Gi
# This includes, ray-head, gcsfuse-sidecar, fluent-bit, and ray Autoscaler sidecar which requests 1Gi by default.
ephemeral-storage: 3Gi
requests:
cpu: "4"
memory: "8G"
ephemeral-storage: 4Gi
ephemeral-storage: 3Gi
annotations:
gke-gcsfuse/volumes: "true"
gke-gcsfuse/cpu-limit: "1"
gke-gcsfuse/memory-limit: 2Gi
gke-gcsfuse/ephemeral-storage-limit: 4Gi
gke-gcsfuse/ephemeral-storage-limit: 3Gi
nodeSelector:
iam.gke.io/gke-metadata-server-enabled: "true"
tolerations: []
Expand Down Expand Up @@ -165,7 +165,9 @@ worker:
# uncomment the line below
# disabled: true
groupName: workerGroup
replicas: 1
replicas: 0
minReplicas: 0
maxReplicas: 5
type: worker
labels:
cloud.google.com/gke-ray-node-type: worker
Expand Down

0 comments on commit 2bfbcd7

Please sign in to comment.