Merge branch 'master' into fix-token-expiration

ray-project · Nov 21, 2024 · 8574cde · 8574cde
2 parents ff6d1be + 8e6d110
commit 8574cde
Show file tree

Hide file tree

Showing 234 changed files with 4,125 additions and 2,377 deletions.
diff --git a/.buildkite/core.rayci.yml b/.buildkite/core.rayci.yml
@@ -321,7 +321,7 @@ steps:
     commands:
       - bazel run //ci/ray_ci:test_in_docker -- //... core
         --run-flaky-tests  --build-type clang
-        --parallelism-per-worker 2 --gpus 2
+        --gpus 4
         --build-name coregpubuild
         --only-tags multi_gpu
     depends_on: coregpubuild

diff --git a/.buildkite/others.rayci.yml b/.buildkite/others.rayci.yml
@@ -1,12 +1,8 @@
 group: others
 depends_on:
   - forge
-  - oss-ci-base_build
 steps:
-  #build
-  - name: doctestbuild
-    wanda: ci/docker/doctest.build.wanda.yaml
-
+  # dependencies
   - label: ":tapioca: build: pip-compile dependencies"
     key: pip_compile_dependencies
     instance_type: small
@@ -19,10 +15,13 @@ steps:
       - cp -f ./python/requirements_compiled.txt /artifact-mount/
     soft_fail: true
     job_env: oss-ci-base_test-py3.11
-    depends_on:
-      - oss-ci-base_test-multipy
+    depends_on: oss-ci-base_test-multipy
+
+  # docs
+  - name: doctestbuild
+    wanda: ci/docker/doctest.build.wanda.yaml
+    depends_on: oss-ci-base_build
 
-  # test
   - label: doc tests
     instance_type: large
     commands:
@@ -40,6 +39,7 @@ steps:
         --skip-ray-installation
     depends_on: doctestbuild
 
+  # java
   - label: ":java: java tests"
     tags: java
     instance_type: medium
@@ -48,7 +48,7 @@ steps:
       - docker run -i --rm --volume /tmp/artifacts:/artifact-mount --shm-size=2.5gb
         "$${RAYCI_WORK_REPO}":"$${RAYCI_BUILD_ID}"-corebuild /bin/bash -iecuo pipefail 
         "./java/test.sh"
-    depends_on: [ "corebuild", "forge" ]
+    depends_on: corebuild
 
   # bot
   - label: ":robot_face: CI weekly green metric"

diff --git a/ci/env/install-core-prerelease-dependencies.sh b/ci/env/install-core-prerelease-dependencies.sh
@@ -5,7 +5,5 @@ set -e
 # install all unbounded dependencies in setup.py for ray core
 # TOOD(scv119) reenable grpcio once https://github.com/grpc/grpc/issues/31885 is fixed.
 # TOOD(scv119) reenable jsonschema once https://github.com/ray-project/ray/issues/33411 is fixed.
-for dependency in aiosignal frozenlist requests protobuf
-do
-    python -m pip install -U --pre --upgrade-strategy=eager $dependency
-done
+DEPS=(aiosignal frozenlist requests protobuf)
+python -m pip install -U --pre --upgrade-strategy=eager "${DEPS[@]}"
diff --git a/doc/source/cluster/kubernetes/configs/ray-cluster.gpu.yaml b/doc/source/cluster/kubernetes/configs/ray-cluster.gpu.yaml
@@ -12,7 +12,7 @@ spec:
   ######################headGroupSpec#################################
   # head group template and specs, (perhaps 'group' is not needed in the name)
   headGroupSpec:
-    # logical group name, for this called head-group, also can be functional
+    # logical group name, for this called headgroup, also can be functional
     # pod type head or worker
     # rayNodeType: head # Not needed since it is under the headgroup
     # the following params are used to complete the ray start: ray start --head --block ...

diff --git a/doc/source/cluster/kubernetes/user-guides/kuberay-gcs-ft.md b/doc/source/cluster/kubernetes/user-guides/kuberay-gcs-ft.md
@@ -27,7 +27,7 @@ See {ref}`Ray Serve end-to-end fault tolerance documentation <serve-e2e-ft-guide
 
 * Ray 2.0.0+
 * KubeRay 0.6.0+
-* Redis: single shard, one or multiple replicas
+* Redis: single shard Redis Cluster or Redis Sentinel, one or multiple replicas
 
 ## Quickstart
 

diff --git a/doc/source/custom_directives.py b/doc/source/custom_directives.py
@@ -481,6 +481,7 @@ def key(cls: type) -> str:
 class Framework(ExampleEnum):
     """Framework type for example metadata."""
 
+    AWSNEURON = "AWS Neuron"
     PYTORCH = "PyTorch"
     LIGHTNING = "Lightning"
     TRANSFORMERS = "Transformers"

diff --git a/doc/source/ray-overview/installation.rst b/doc/source/ray-overview/installation.rst
@@ -441,8 +441,8 @@ Install Ray Java with Maven
 ---------------------------
 
 .. note::
-   
-   All Ray Java APIs are experimental and only supported by the community. 
+
+   All Ray Java APIs are experimental and only supported by the community.
 
 Before installing Ray Java with Maven, you should install Ray Python with `pip install -U ray` . Note that the versions of Ray Java and Ray Python must match.
 Note that nightly Ray python wheels are also required if you want to install Ray Java snapshot version.
@@ -506,7 +506,7 @@ Install Ray C++
 
 .. note::
 
-  All Ray C++ APIs are experimental and only supported by the community. 
+  All Ray C++ APIs are experimental and only supported by the community.
 
 You can install and use Ray C++ API as follows.
 

diff --git a/doc/source/serve/getting_started.md b/doc/source/serve/getting_started.md
@@ -101,6 +101,7 @@ parameters in the `@serve.deployment` decorator. The example configures a few co
 * `ray_actor_options`: a dictionary containing configuration options for each replica.
     * `num_cpus`: a float representing the logical number of CPUs each replica should reserve. You can make this a fraction to pack multiple replicas together on a machine with fewer CPUs than replicas.
     * `num_gpus`: a float representing the logical number of GPUs each replica should reserve. You can make this a fraction to pack multiple replicas together on a machine with fewer GPUs than replicas.
+    * `resources`: a dictionary containing other resource requirements for the replicate, such as non-GPU accelerators like HPUs or TPUs.
 
 All these parameters are optional, so feel free to omit them:
 

diff --git a/doc/source/serve/resource-allocation.md b/doc/source/serve/resource-allocation.md
@@ -6,14 +6,14 @@ This guide helps you configure Ray Serve to:
 
 - Scale your deployments horizontally by specifying a number of replicas
 - Scale up and down automatically to react to changing traffic
-- Allocate hardware resources (CPUs, GPUs, etc) for each deployment
+- Allocate hardware resources (CPUs, GPUs, other accelerators, etc) for each deployment
 
 
 (serve-cpus-gpus)=
 
-## Resource management (CPUs, GPUs)
+## Resource management (CPUs, GPUs, accelerators)
 
-You may want to specify a deployment's resource requirements to reserve cluster resources like GPUs.  To assign hardware resources per replica, you can pass resource requirements to
+You may want to specify a deployment's resource requirements to reserve cluster resources like GPUs or other accelerators.  To assign hardware resources per replica, you can pass resource requirements to
 `ray_actor_options`.
 By default, each replica reserves one CPU.
 To learn about options to pass in, take a look at the [Resources with Actors guide](actor-resource-guide).
@@ -27,6 +27,14 @@ def func(*args):
     return do_something_with_my_gpu()
 ```
 
+Or if you want to create a deployment where each replica uses another type of accelerator such as an HPU, follow the example below:
+
+```python
+@serve.deployment(ray_actor_options={"resources": {"HPU": 1}})
+def func(*args):
+    return do_something_with_my_hpu()
+```
+
 (serve-fractional-resources-guide)=
 
 ### Fractional CPUs and fractional GPUs

diff --git a/doc/source/templates/04_finetuning_llms_with_deepspeed/compute_configs/aws_70b_v1.yaml b/doc/source/templates/04_finetuning_llms_with_deepspeed/compute_configs/aws_70b_v1.yaml
@@ -4,7 +4,7 @@ head_node_type:
   name: head_node_type
   instance_type: g5.48xlarge
   resources:
-    custom_resources: 
+    custom_resources:
       large_cpu_mem: 1
 
 worker_node_types:
@@ -14,7 +14,7 @@ worker_node_types:
     max_workers: 3
     use_spot: false
 
-aws:
+advanced_configurations_json:
   TagSpecifications:
     - ResourceType: "instance"
       Tags:

diff --git a/doc/source/templates/04_finetuning_llms_with_deepspeed/compute_configs/aws_70b_v2.yaml b/doc/source/templates/04_finetuning_llms_with_deepspeed/compute_configs/aws_70b_v2.yaml
@@ -4,7 +4,7 @@ head_node_type:
   name: head_node_type
   instance_type: g5.48xlarge
   resources:
-    custom_resources: 
+    custom_resources:
       large_cpu_mem: 1
 
 worker_node_types:
@@ -20,7 +20,7 @@ worker_node_types:
     max_workers: 2
     use_spot: false
 
-aws:
+advanced_configurations_json:
   TagSpecifications:
     - ResourceType: "instance"
       Tags:

diff --git a/doc/source/templates/04_finetuning_llms_with_deepspeed/compute_configs/aws_7b_or_13b.yaml b/doc/source/templates/04_finetuning_llms_with_deepspeed/compute_configs/aws_7b_or_13b.yaml
@@ -12,7 +12,7 @@ worker_node_types:
       max_workers: 16
       use_spot: false
 
-aws:
+advanced_configurations_json:
   TagSpecifications:
     - ResourceType: "instance"
       Tags:

diff --git a/doc/source/templates/README.md b/doc/source/templates/README.md
@@ -32,7 +32,7 @@ To add a template:
     Your template does not need to be a Jupyter notebook. It can also be presented as a
     Python script with `README` instructions of how to run.
 
-2. Add a release test for the template in `release/release_tests.yaml` (for both AWS and GCE).
+2. Add a release test for the template in `release/release_tests.yaml` (for both AWS and GCE). For Data tests, use `release/release_data_tests.yaml` instead.
 
     See the section on workspace templates for an example. Note that the cluster env and
     compute config are a little different for release tests. Use the files in the

diff --git a/...ource/templates/testing/compute_configs/04_finetuning_llms_with_deepspeed/aws_70b_v1.yaml b/...ource/templates/testing/compute_configs/04_finetuning_llms_with_deepspeed/aws_70b_v1.yaml
@@ -5,7 +5,7 @@ head_node_type:
   name: head_node_type
   instance_type: g5.48xlarge
   resources:
-    custom_resources: 
+    custom_resources:
       large_cpu_mem: 1
 
 worker_node_types:
@@ -15,7 +15,7 @@ worker_node_types:
     max_workers: 3
     use_spot: false
 
-aws:
+advanced_configurations_json:
   TagSpecifications:
     - ResourceType: "instance"
       Tags:

diff --git a/...ource/templates/testing/compute_configs/04_finetuning_llms_with_deepspeed/aws_70b_v2.yaml b/...ource/templates/testing/compute_configs/04_finetuning_llms_with_deepspeed/aws_70b_v2.yaml
@@ -5,7 +5,7 @@ head_node_type:
   name: head_node_type
   instance_type: g5.48xlarge
   resources:
-    custom_resources: 
+    custom_resources:
       large_cpu_mem: 1
 
 worker_node_types:
@@ -21,7 +21,7 @@ worker_node_types:
     max_workers: 2
     use_spot: false
 
-aws:
+advanced_configurations_json:
   TagSpecifications:
     - ResourceType: "instance"
       Tags:

diff --git a/doc/source/templates/testing/compute_configs/04_finetuning_llms_with_deepspeed/aws_7b.yaml b/doc/source/templates/testing/compute_configs/04_finetuning_llms_with_deepspeed/aws_7b.yaml
@@ -13,7 +13,7 @@ worker_node_types:
       max_workers: 16
       use_spot: false
 
-aws:
+advanced_configurations_json:
   TagSpecifications:
     - ResourceType: "instance"
       Tags:

diff --git a/doc/source/templates/testing/compute_configs/cpu/aws.yaml b/doc/source/templates/testing/compute_configs/cpu/aws.yaml
@@ -13,7 +13,7 @@ worker_node_types:
   max_workers: 7
   use_spot: false
 
-aws:
+advanced_configurations_json:
   TagSpecifications:
     - ResourceType: "instance"
       Tags:

diff --git a/doc/source/templates/testing/compute_configs/gpu/aws.yaml b/doc/source/templates/testing/compute_configs/gpu/aws.yaml
@@ -13,7 +13,7 @@ worker_node_types:
   max_workers: 3
   use_spot: false
 
-aws:
+advanced_configurations_json:
   TagSpecifications:
     - ResourceType: "instance"
       Tags:

diff --git a/doc/source/train/examples.yml b/doc/source/train/examples.yml
@@ -119,7 +119,17 @@ examples:
     contributor: community
     link: examples/intel_gaudi/llama_pretrain
 
-  - title: Fine-tune a Llama-2 text generation models with DeepSpeed and Hugging Face Accelerate
+  - title: Fine-tune Llama3.1 with AWS Trainium
+    frameworks:
+      - pytorch
+      - aws neuron 
+    skill_level: advanced
+    use_cases:
+      - natural language processing
+      - large language models
+    contributor: community
+    link: examples/aws-trainium/llama3
+  - title: Fine-tune a Llama-2 text generation model with DeepSpeed and Hugging Face Accelerate
     frameworks:
       - accelerate
       - deepspeed

diff --git a/doc/source/train/examples/aws-trainium/llama3.rst b/doc/source/train/examples/aws-trainium/llama3.rst
@@ -0,0 +1,103 @@
+:orphan:
+
+Distributed fine-tuning of Llama 3.1 8B on AWS Trainium with Ray and PyTorch Lightning
+======================================================================================
+
+
+This example demonstrates how to fine-tune the `Llama 3.1 8B <https://huggingface.co/NousResearch/Meta-Llama-3.1-8B/>`__ model on `AWS
+Trainium <https://aws.amazon.com/ai/machine-learning/trainium/>`__ instances using Ray Train, PyTorch Lightning, and AWS Neuron SDK.
+
+AWS Trainium is the machine learning (ML) chip that AWS built for deep
+learning (DL) training of 100B+ parameter models. `AWS Neuron
+SDK <https://aws.amazon.com/machine-learning/neuron/>`__ helps
+developers train models on Trainium accelerators.
+
+Prepare the environment
+-----------------------
+
+See `Setup EKS cluster and tools <https://github.com/aws-neuron/aws-neuron-eks-samples/tree/master/llama3.1_8B_finetune_ray_ptl_neuron#setupeksclusterandtools>`__ for setting up an Amazon EKS cluster leveraging AWS Trainium instances.
+
+Create a Docker image
+---------------------
+When the EKS cluster is ready, create an Amazon ECR repository for building and uploading the Docker image containing artifacts for fine-tuning a Llama3.1 8B model:
+
+1. Clone the repo.
+
+::
+
+   git clone https://github.com/aws-neuron/aws-neuron-eks-samples.git
+
+2. Go to the ``llama3.1_8B_finetune_ray_ptl_neuron`` directory.
+
+::
+
+   cd aws-neuron-eks-samples/llama3.1_8B_finetune_ray_ptl_neuron
+
+3. Trigger the script.
+
+::
+
+   chmod +x 0-kuberay-trn1-llama3-finetune-build-image.sh 
+   ./0-kuberay-trn1-llama3-finetune-build-image.sh
+
+4. Enter the zone your cluster is running in, for example: us-east-2.
+
+5. Verify in the AWS console that the Amazon ECR service has the newly
+   created ``kuberay_trn1_llama3.1_pytorch2`` repository.
+
+6. Update the ECR image ARN in the manifest file used for creating the Ray cluster.
+
+Replace the <AWS_ACCOUNT_ID> and <REGION> placeholders with actual values in the ``1-llama3-finetune-trn1-create-raycluster.yaml`` file using commands below to reflect the ECR image ARN created above:
+
+::
+
+   export AWS_ACCOUNT_ID=<enter_your_aws_account_id> # for ex: 111222333444
+   export REGION=<enter_your_aws_region> # for ex: us-east-2
+   sed -i "s/<AWS_ACCOUNT_ID>/$AWS_ACCOUNT_ID/g" 1-llama3-finetune-trn1-create-raycluster.yaml
+   sed -i "s/<REGION>/$REGION/g" 1-llama3-finetune-trn1-create-raycluster.yaml
+
+Configuring Ray Cluster
+-----------------------
+
+The ``llama3.1_8B_finetune_ray_ptl_neuron`` directory in the AWS Neuron samples repository simplifies the
+Ray configuration. KubeRay provides a manifest that you can apply
+to the cluster to set up the head and worker pods. 
+
+Run the following command to set up the Ray cluster:
+
+::
+
+   kubectl apply -f 1-llama3-finetune-trn1-create-raycluster.yaml
+
+
+Accessing Ray Dashboard
+-----------------------
+Port forward from the cluster to see the state of the Ray dashboard and
+then view it on `http://localhost:8265 <http://localhost:8265/>`__.
+Run it in the background with the following command:
+
+::
+
+   kubectl port-forward service/kuberay-trn1-head-svc 8265:8265 &
+
+Launching Ray Jobs
+------------------
+
+The Ray cluster now ready to handle workloads. Initiate the data preparation and fine-tuning Ray jobs:
+
+1. Launch the Ray job for downloading the dolly-15k dataset and the Llama3.1 8B model artifacts:
+
+::
+
+   kubectl apply -f 2-llama3-finetune-trn1-rayjob-create-data.yaml
+
+2. When the job has executed successfully, run the following fine-tuning job:
+
+::
+
+   kubectl apply -f 3-llama3-finetune-trn1-rayjob-submit-finetuning-job.yaml
+
+3. Monitor the jobs via the Ray Dashboard
+
+
+For detailed information on each of the steps above, see the `AWS documentation link <https://github.com/aws-neuron/aws-neuron-eks-samples/blob/master/llama3.1_8B_finetune_ray_ptl_neuron/README.md/>`__.