From 56328191541f86910879fd6980b3ca81485e32eb Mon Sep 17 00:00:00 2001 From: Kai Fricke Date: Fri, 30 Jun 2023 19:30:16 +0100 Subject: [PATCH] [requirements] Split requirements into different tiers [build_base] (#36808) This PR splits the requirements files into different tiers: - `air/core-requirements.txt` - these are the most important requirements (tier 1) - `air/[library]-requirements.txt` - these are pinned dependencies for libraries (tier 2+3) - `air/[library]-test-requirements.txt` - these are pinned test-dependencies for libraries (tier 4) (tier 2 and 3 may get merged into one tier, tbd). This does not change any packages and should not affect installed packages in docker or CI. The naming format was chosen to be compatible with dependabot. In a follow-up PR, we will compile the tier 1-3 dependencies to ensure all CI runners and Docker images run on the same dependency versions. This PR will split existing requirement files. In total, there will be 4 more such files (`rllib-test-requirements.txt`, `tune-test-requirements.txt`, `train-test-requirements.txt`, `docker/ray-docker-requirements.txt` Signed-off-by: Kai Fricke --- .buildkite/pipeline.build.yml | 4 +- .buildkite/pipeline.gpu.yml | 12 ++-- .buildkite/pipeline.gpu_large.yml | 14 ++--- .buildkite/pipeline.ml.yml | 2 + .github/CODEOWNERS | 4 +- ci/build/build-docker-images.py | 25 +++++--- ci/build/upload_build_info.sh | 2 +- ci/ci.sh | 50 ++++++++++++++++ ci/docker/base.test.Dockerfile | 1 + ci/env/install-dependencies.sh | 57 ++++++++----------- doc/requirements-doc.txt | 2 +- doc/source/ray-contribute/development.rst | 4 +- doc/source/ray-contribute/docs.ipynb | 2 +- .../ray-contribute/getting-involved.rst | 6 +- docker/ray-ml/Dockerfile | 51 +++++------------ .../ray-ml/install-ml-docker-requirements.sh | 41 +++++++++++++ .../accelerate/accelerate_trainer.py | 1 - .../docker/ray-docker-requirements.txt | 14 +++++ .../lint-requirements.txt} | 0 python/requirements/ml/core-requirements.txt | 24 ++++++++ .../data-requirements.txt} | 3 - .../data-test-requirements.txt} | 4 +- ...rements_dl.txt => dl-cpu-requirements.txt} | 5 +- ..._ml_docker.txt => dl-gpu-requirements.txt} | 10 +--- .../requirements/ml/requirements_no_deps.txt | 6 -- python/requirements/ml/requirements_train.txt | 26 --------- python/requirements/ml/requirements_tune.txt | 48 ---------------- .../requirements/ml/requirements_upstream.txt | 9 --- python/requirements/ml/rllib-requirements.txt | 15 +++++ ..._rllib.txt => rllib-test-requirements.txt} | 24 ++------ python/requirements/ml/train-requirements.txt | 2 + .../ml/train-test-requirements.txt | 2 + python/requirements/ml/tune-requirements.txt | 20 +++++++ .../ml/tune-test-requirements.txt | 22 +++++++ .../test-requirements.txt} | 1 + release/long_running_tests/app_config.yaml | 2 +- .../horovod/driver_requirements.txt | 2 +- .../train/driver_requirements.txt | 2 +- .../tune_rllib/driver_requirements.txt | 2 +- release/rllib_tests/app_config.yaml | 4 +- release/rllib_tests/debug_app_config.yaml | 4 +- 41 files changed, 294 insertions(+), 235 deletions(-) create mode 100755 docker/ray-ml/install-ml-docker-requirements.sh create mode 100644 python/requirements/docker/ray-docker-requirements.txt rename python/{requirements_linters.txt => requirements/lint-requirements.txt} (100%) create mode 100644 python/requirements/ml/core-requirements.txt rename python/requirements/{data_processing/requirements.txt => ml/data-requirements.txt} (82%) rename python/requirements/{data_processing/requirements_dataset.txt => ml/data-test-requirements.txt} (69%) rename python/requirements/ml/{requirements_dl.txt => dl-cpu-requirements.txt} (84%) rename python/requirements/ml/{requirements_ml_docker.txt => dl-gpu-requirements.txt} (73%) delete mode 100644 python/requirements/ml/requirements_no_deps.txt delete mode 100644 python/requirements/ml/requirements_train.txt delete mode 100644 python/requirements/ml/requirements_tune.txt delete mode 100644 python/requirements/ml/requirements_upstream.txt create mode 100644 python/requirements/ml/rllib-requirements.txt rename python/requirements/ml/{requirements_rllib.txt => rllib-test-requirements.txt} (60%) create mode 100644 python/requirements/ml/train-requirements.txt create mode 100644 python/requirements/ml/train-test-requirements.txt create mode 100644 python/requirements/ml/tune-requirements.txt create mode 100644 python/requirements/ml/tune-test-requirements.txt rename python/{requirements_test.txt => requirements/test-requirements.txt} (99%) diff --git a/.buildkite/pipeline.build.yml b/.buildkite/pipeline.build.yml index 0d59a4945f5a..21ce7a9e2fba 100644 --- a/.buildkite/pipeline.build.yml +++ b/.buildkite/pipeline.build.yml @@ -231,9 +231,11 @@ instance_size: large commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - - ./ci/env/env_info.sh + # Todo (krfricke): Move mosaicml to train-test-requirements.txt + - pip install "mosaicml==0.12.1" - DOC_TESTING=1 ./ci/env/install-dependencies.sh - ./ci/env/install-horovod.sh + - ./ci/env/env_info.sh - bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=doctest,-gpu python/ray/... doc/... diff --git a/.buildkite/pipeline.gpu.yml b/.buildkite/pipeline.gpu.yml index e6a4ffb364ec..f5184a3aa183 100644 --- a/.buildkite/pipeline.gpu.yml +++ b/.buildkite/pipeline.gpu.yml @@ -6,7 +6,7 @@ # commands: # - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT # - TUNE_TESTING=1 ./ci/env/install-dependencies.sh -# - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt +# - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt # - ./ci/env/env_info.sh # - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu python/ray/tune/... @@ -16,7 +16,7 @@ commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh - - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt + - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt - ./ci/env/env_info.sh # --jobs 1 is necessary as we only have 1 GPU on the machine and running tests in parallel # would cause timeouts as the other scripts would wait for the GPU to become available. @@ -40,7 +40,7 @@ commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - ./ci/env/install-dependencies.sh - - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt + - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt - bazel test --config=ci $(./ci/run/bazel_export_options) --test_tag_filters=gpu python/ray/serve/... # Todo: enable once tests pass @@ -49,7 +49,7 @@ # commands: # - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT # - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh -# - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt +# - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt # - ./ci/env/env_info.sh # - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only # --test_tag_filters=examples_C,examples_D --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --test_env=RLLIB_NUM_GPUS=1 rllib/... @@ -60,7 +60,7 @@ # commands: # - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT # - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh -# - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt +# - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt # - ./ci/env/env_info.sh # - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only # --test_tag_filters=examples_E,examples_F,examples_G,examples_H,examples_I,examples_J,examples_K,examples_L,examples_M,examples_N,examples_O,examples_P --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --test_env=RLLIB_NUM_GPUS=1 @@ -72,7 +72,7 @@ # commands: # - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT # - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh -# - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt +# - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt # - ./ci/env/env_info.sh # - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only # --test_tag_filters=examples_Q,examples_R,examples_S,examples_T,examples_U,examples_V,examples_W,examples_X,examples_Y,examples_Z --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 --test_env=RLLIB_NUM_GPUS=1 diff --git a/.buildkite/pipeline.gpu_large.yml b/.buildkite/pipeline.gpu_large.yml index c79b7b8bbd2c..34643c9e0af1 100644 --- a/.buildkite/pipeline.gpu_large.yml +++ b/.buildkite/pipeline.gpu_large.yml @@ -5,7 +5,7 @@ commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh - - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt + - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt - ./ci/env/install-horovod.sh - ./ci/env/env_info.sh - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only,-ray_air python/ray/train/... @@ -15,7 +15,7 @@ commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - TRAIN_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh - - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt + - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt - ./ci/env/env_info.sh - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=datasets_train,-doctest doc/... @@ -25,7 +25,7 @@ commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - RLLIB_TESTING=1 ./ci/env/install-dependencies.sh - - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt + - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt - ./ci/env/env_info.sh # --jobs 2 is necessary as we only need to have at least 2 gpus on the machine # and running tests in parallel would cause timeouts as the other scripts would @@ -42,7 +42,7 @@ commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - DATA_PROCESSING_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh - - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt + - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt - ./ci/env/install-horovod.sh - ./ci/env/env_info.sh - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu python/ray/air/... python/ray/train/... @@ -53,7 +53,7 @@ commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh - - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt + - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt - ./ci/env/env_info.sh # Test examples with newer version of `transformers` # TODO(amogkam): Remove when https://github.com/ray-project/ray/issues/36011 @@ -65,7 +65,7 @@ commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh - - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt + - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt - ./ci/env/install-horovod.sh - ./ci/env/env_info.sh # Test examples with newer version of `transformers` @@ -82,7 +82,7 @@ - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT - NO_DASHBOARD=1 ./ci/env/install-minimal.sh 3.8 - PYTHON=3.8 DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh - - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt + - pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt - pip uninstall -y pytorch-lightning - pip install lightning==2.0.0 - ./ci/env/env_info.sh diff --git a/.buildkite/pipeline.ml.yml b/.buildkite/pipeline.ml.yml index 408c3bb50246..e177dd7c0a6c 100644 --- a/.buildkite/pipeline.ml.yml +++ b/.buildkite/pipeline.ml.yml @@ -35,6 +35,8 @@ parallelism: 4 commands: - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT + # Todo (krfricke): Move mosaicml to train-test-requirements.txt + - pip install "mosaicml==0.12.1" - TRAIN_TESTING=1 DATA_PROCESSING_TESTING=1 INSTALL_HOROVOD=1 ./ci/env/install-dependencies.sh - ./ci/env/env_info.sh - ./ci/run/run_bazel_test_with_sharding.sh diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index af2c5fb88871..a0d41da2195f 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -101,8 +101,8 @@ /doc/source/ray-air/ @richardliaw @gjoliver @krfricke @xwjiang2010 @amogkam @matthewdeng @Yard1 @maxpumperla @ray-project/ray-docs # ML Docker Dependencies -/python/requirements/ml/requirements_dl.txt @amogkam @krfricke @richardliaw @matthewdeng -/python/requirements/ml/requirements_ml_docker.txt @amogkam @krfricke @richardliaw @matthewdeng +/python/requirements/ml/dl-cpu-requirements.txt @amogkam @krfricke @richardliaw @matthewdeng +/python/requirements/ml/dl-gpu-requirements.txt @amogkam @krfricke @richardliaw @matthewdeng # Ray symbol export /src/ray/ray_version_script.lds @iycheng @ericl @scv119 diff --git a/ci/build/build-docker-images.py b/ci/build/build-docker-images.py index b79f1ee7fff2..556bd78ae9bc 100644 --- a/ci/build/build-docker-images.py +++ b/ci/build/build-docker-images.py @@ -426,20 +426,27 @@ def prep_ray_ml(): "python/requirements.txt", ] ml_requirements_files = [ - "python/requirements/ml/requirements_ml_docker.txt", - "python/requirements/ml/requirements_dl.txt", - "python/requirements/ml/requirements_tune.txt", - "python/requirements/ml/requirements_rllib.txt", - "python/requirements/ml/requirements_train.txt", - "python/requirements/ml/requirements_upstream.txt", - "python/requirements/ml/requirements_no_deps.txt", + "python/requirements/docker/ray-docker-requirements.txt", + "python/requirements/ml/core-requirements.txt", + "python/requirements/ml/data-requirements.txt", + "python/requirements/ml/dl-gpu-requirements.txt", + "python/requirements/ml/dl-cpu-requirements.txt", + "python/requirements/ml/tune-requirements.txt", + "python/requirements/ml/tune-test-requirements.txt", + "python/requirements/ml/rllib-requirements.txt", + "python/requirements/ml/rllib-test-requirements.txt", + "python/requirements/ml/train-requirements.txt", + "python/requirements/ml/train-test-requirements.txt", ] - # We don't need these in the ml docker image + # We don't need these in the ml docker image (or they are installed elsewhere) ignore_requirements = [ "python/requirements/compat/requirements_legacy_compat.txt", + "python/requirements/ml/data-test-requirements.txt", ] - files_on_disk = glob.glob(f"{root_dir}/python/**/requirements*.txt", recursive=True) + files_on_disk = glob.glob( + f"{root_dir}/python/**/*-requirements.txt", recursive=True + ) for file_on_disk in files_on_disk: rel = os.path.relpath(file_on_disk, start=root_dir) print(rel) diff --git a/ci/build/upload_build_info.sh b/ci/build/upload_build_info.sh index 6876d47cd3c1..01a178821722 100755 --- a/ci/build/upload_build_info.sh +++ b/ci/build/upload_build_info.sh @@ -35,7 +35,7 @@ if [[ -z "${BUILDKITE-}" ]]; then aws s3 cp --recursive /tmp/bazel_event_logs "${DST}" else # Codepath for Buildkite - # Keep cryptography/openssl in sync with `requirements_test.txt` + # Keep cryptography/openssl in sync with `requirements/test-requirements.txt` pip install -q -c "${RAY_DIR}/python/requirements.txt" docker aws_requests_auth boto3 cryptography==38.0.1 PyOpenSSL==22.1.0 python .buildkite/copy_files.py --destination logs --path /tmp/bazel_event_logs fi diff --git a/ci/ci.sh b/ci/ci.sh index fc1afab003ce..db26d774b6e6 100755 --- a/ci/ci.sh +++ b/ci/ci.sh @@ -115,6 +115,56 @@ upload_wheels() { ) } + +compile_pip_dependencies() { + # Compile boundaries + + if [[ "${HOSTTYPE}" == "aarch64" || "${HOSTTYPE}" = "arm64" ]]; then + # Resolution currently does not work on aarch64 as some pinned packages + # are not available. Once they are reasonably upgraded we should be able + # to enable this here.p + echo "Skipping for aarch64" + return 0 + fi + + # shellcheck disable=SC2262 + alias pip="python -m pip" + pip install pip-tools + + # Required packages to lookup e.g. dragonfly-opt + HAS_TORCH=0 + python -c "import torch" 2>/dev/null && HAS_TORCH=1 + pip install --no-cache-dir numpy torch + + if [ -f "${WORKSPACE_DIR}/python/requirements_compiled.txt" ]; then + echo requirements_compiled already exists + else + pip-compile --resolver=backtracking -q \ + --pip-args --no-deps --strip-extras --no-annotate --no-header -o \ + "${WORKSPACE_DIR}/python/requirements_compiled.txt" \ + "${WORKSPACE_DIR}/python/requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/lint-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/test-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/docker/ray-docker-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/ml/core-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/ml/data-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/ml/data-test-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/ml/rllib-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/ml/rllib-test-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/ml/train-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/ml/train-test-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/ml/tune-requirements.txt" \ + "${WORKSPACE_DIR}/python/requirements/ml/tune-test-requirements.txt" + fi + + cat "${WORKSPACE_DIR}/python/requirements_compiled.txt" + + if [ "$HAS_TORCH" -eq 0 ]; then + pip uninstall -y torch + fi +} + test_core() { local args=( "//:*" diff --git a/ci/docker/base.test.Dockerfile b/ci/docker/base.test.Dockerfile index 012ceb6ccc54..26a9b0628ea4 100644 --- a/ci/docker/base.test.Dockerfile +++ b/ci/docker/base.test.Dockerfile @@ -65,3 +65,4 @@ WORKDIR /ray COPY . . RUN ./ci/env/install-dependencies.sh init +RUN bash --login -i ./ci/ci.sh compile_pip_dependencies diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh index 2e855c3c2daa..d6986c07280f 100755 --- a/ci/env/install-dependencies.sh +++ b/ci/env/install-dependencies.sh @@ -192,7 +192,7 @@ install_shellcheck() { } install_linters() { - pip install -r "${WORKSPACE_DIR}"/python/requirements_linters.txt + pip install -r "${WORKSPACE_DIR}"/python/requirements/lint-requirements.txt install_shellcheck } @@ -328,7 +328,7 @@ install_pip_packages() { # files. delayed_packages=() - requirements_files+=("${WORKSPACE_DIR}/python/requirements_test.txt") + requirements_files+=("${WORKSPACE_DIR}/python/requirements/test-requirements.txt") if [ "${LINT-}" = 1 ]; then install_linters @@ -350,34 +350,26 @@ install_pip_packages() { # Additional RLlib test dependencies. if [ "${RLLIB_TESTING-}" = 1 ] || [ "${DOC_TESTING-}" = 1 ]; then - requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/requirements_rllib.txt") - #TODO(amogkam): Add this back to requirements_rllib.txt once mlagents no longer pins torch<1.9.0 version. + requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/rllib-requirements.txt") + requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/rllib-test-requirements.txt") + #TODO(amogkam): Add this back to rllib-requirements.txt once mlagents no longer pins torch<1.9.0 version. pip install --no-dependencies mlagents==0.28.0 fi - # Some Ray Train dependencies have to be installed with --no-deps, - # as sub-dependencies conflict. The packages still work for our workflows. - # Todo(krfricke): Try to remove once we move to Python 3.8 in CI. - local install_ml_no_deps=0 - # Additional Train test dependencies. if [ "${TRAIN_TESTING-}" = 1 ] || [ "${DOC_TESTING-}" = 1 ]; then - requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/requirements_train.txt") - install_ml_no_deps=1 + requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/train-requirements.txt") + requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/train-test-requirements.txt") fi # Additional Tune/Doc test dependencies. if [ "${TUNE_TESTING-}" = 1 ] || [ "${DOC_TESTING-}" = 1 ]; then - requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/requirements_tune.txt") - fi - - # For Tune, install upstream dependencies. - if [ "${TUNE_TESTING-}" = 1 ] || [ "${DOC_TESTING-}" = 1 ]; then - requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/requirements_upstream.txt") + requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/tune-requirements.txt") + requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/tune-test-requirements.txt") fi # Additional dependency for Ludwig. - # This cannot be included in requirements_upstream.txt as it has conflicting + # This cannot be included in requirements files as it has conflicting # dependencies with Modin. if [ "${INSTALL_LUDWIG-}" = 1 ]; then # TODO: eventually pin this to master. @@ -386,7 +378,7 @@ install_pip_packages() { fi # Additional dependency for time series libraries. - # This cannot be included in requirements_tune.txt as it has conflicting + # This cannot be included in tune-requirements.txt as it has conflicting # dependencies. if [ "${INSTALL_TIMESERIES_LIBS-}" = 1 ]; then requirements_packages+=("statsforecast==1.5.0") @@ -396,10 +388,10 @@ install_pip_packages() { # Data processing test dependencies. if [ "${DATA_PROCESSING_TESTING-}" = 1 ] || [ "${DOC_TESTING-}" = 1 ]; then - requirements_files+=("${WORKSPACE_DIR}/python/requirements/data_processing/requirements.txt") + requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/data-requirements.txt") fi if [ "${DATA_PROCESSING_TESTING-}" = 1 ]; then - requirements_files+=("${WORKSPACE_DIR}/python/requirements/data_processing/requirements_dataset.txt") + requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/data-test-requirements.txt") if [ -n "${ARROW_VERSION-}" ]; then if [ "${ARROW_VERSION-}" = nightly ]; then delayed_packages+=("--extra-index-url") @@ -416,12 +408,6 @@ install_pip_packages() { fi fi - if [ "${install_ml_no_deps}" = 1 ]; then - # Install these requirements first. Their dependencies may be overwritten later - # by the main install. - pip install -r "${WORKSPACE_DIR}/python/requirements/ml/requirements_no_deps.txt" - fi - retry_pip_install "CC=gcc pip install -Ur ${WORKSPACE_DIR}/python/requirements.txt" # Install deeplearning libraries (Torch + TensorFlow) @@ -431,10 +417,10 @@ install_pip_packages() { # Install right away, as some dependencies (e.g. torch-spline-conv) need # torch to be installed for their own install. pip install -U "torch==${TORCH_VERSION-1.9.0}" "torchvision==${TORCHVISION_VERSION-0.10.0}" - # We won't add requirements_dl.txt as it would otherwise overwrite our custom + # We won't add dl-cpu-requirements.txt as it would otherwise overwrite our custom # torch. Thus we have also have to install tensorflow manually. - TF_PACKAGE=$(grep "tensorflow==" "${WORKSPACE_DIR}/python/requirements/ml/requirements_dl.txt") - TFPROB_PACKAGE=$(grep "tensorflow-probability==" "${WORKSPACE_DIR}/python/requirements/ml/requirements_dl.txt") + TF_PACKAGE=$(grep "tensorflow==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt") + TFPROB_PACKAGE=$(grep "tensorflow-probability==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt") # %%;* deletes everything after ; to get rid of e.g. python version specifiers pip install -U "${TF_PACKAGE%%;*}" "${TFPROB_PACKAGE%%;*}" @@ -442,15 +428,20 @@ install_pip_packages() { # Otherwise, use pinned default torch version. # Again, install right away, as some dependencies (e.g. torch-spline-conv) need # torch to be installed for their own install. - TORCH_PACKAGE=$(grep "torch==" "${WORKSPACE_DIR}/python/requirements/ml/requirements_dl.txt") - TORCHVISION_PACKAGE=$(grep "torchvision==" "${WORKSPACE_DIR}/python/requirements/ml/requirements_dl.txt") + TORCH_PACKAGE=$(grep "torch==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt") + TORCHVISION_PACKAGE=$(grep "torchvision==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt") # %%;* deletes everything after ; to get rid of e.g. python version specifiers pip install "${TORCH_PACKAGE%%;*}" "${TORCHVISION_PACKAGE%%;*}" - requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/requirements_dl.txt") + requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt") fi fi + # AIR core dependencies + if [ "${RLLIB_TESTING-}" = 1 ] || [ "${TRAIN_TESTING-}" = 1 ] || [ "${TUNE_TESTING-}" = 1 ] || [ "${DOC_TESTING-}" = 1 ]; then + requirements_files+=("${WORKSPACE_DIR}/python/requirements/ml/core-requirements.txt") + fi + # Inject our own mirror for the CIFAR10 dataset if [ "${TRAIN_TESTING-}" = 1 ] || [ "${TUNE_TESTING-}" = 1 ] || [ "${DOC_TESTING-}" = 1 ]; then SITE_PACKAGES=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt index 613798164c2a..59a593414d32 100644 --- a/doc/requirements-doc.txt +++ b/doc/requirements-doc.txt @@ -38,7 +38,7 @@ torchvision transformers # Train -mxnet==1.8.0.post0 +mxnet==1.9.1; sys_platform != "darwin" # Ray libraries git+https://github.com/ray-project/tune-sklearn@master#tune-sklearn diff --git a/doc/source/ray-contribute/development.rst b/doc/source/ray-contribute/development.rst index 736c2906cf4b..67ba0ce9bc93 100644 --- a/doc/source/ray-contribute/development.rst +++ b/doc/source/ray-contribute/development.rst @@ -360,13 +360,13 @@ Dependencies for the linter (``scripts/format.sh``) can be installed with: .. code-block:: shell - pip install -r python/requirements_linters.txt + pip install -r python/requirements/lint-requirements.txt Dependencies for running Ray unit tests under ``python/ray/tests`` can be installed with: .. code-block:: shell - pip install -c python/requirements.txt -r python/requirements_test.txt + pip install -c python/requirements.txt -r python/requirements/test-requirements.txt Requirement files for running Ray Data / ML library tests are under ``python/requirements/``. diff --git a/doc/source/ray-contribute/docs.ipynb b/doc/source/ray-contribute/docs.ipynb index b5cb3d3c0438..23f89b38d1c3 100644 --- a/doc/source/ray-contribute/docs.ipynb +++ b/doc/source/ray-contribute/docs.ipynb @@ -66,7 +66,7 @@ "Install the dependencies for our linters to ensure your changes comply with our style guide.\n", "\n", "```shell\n", - "pip install -r ../python/requirements_linters.txt\n", + "pip install -r ../python/requirements/lint-requirements.txt\n", "```\n", "\n", "Build the documentation by running the following command:\n", diff --git a/doc/source/ray-contribute/getting-involved.rst b/doc/source/ray-contribute/getting-involved.rst index 9bccb133a64d..9f8dd61e960a 100644 --- a/doc/source/ray-contribute/getting-involved.rst +++ b/doc/source/ray-contribute/getting-involved.rst @@ -96,7 +96,7 @@ If you are running tests for the first time, you can install the required depend .. code-block:: shell - pip install -c python/requirements.txt -r python/requirements_test.txt + pip install -c python/requirements.txt -r python/requirements/test-requirements.txt Testing for Python development ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -222,11 +222,11 @@ Lint and Formatting We also have tests for code formatting and linting that need to pass before merge. -* For Python formatting, install the `required dependencies `_ first with: +* For Python formatting, install the `required dependencies `_ first with: .. code-block:: shell - pip install -r python/requirements_linters.txt + pip install -r python/requirements/lint-requirements.txt * If developing for C++, you will need `clang-format `_ version ``12`` (download this version of Clang from `here `_) diff --git a/docker/ray-ml/Dockerfile b/docker/ray-ml/Dockerfile index 1667dcf787f3..c93bd09f92c2 100644 --- a/docker/ray-ml/Dockerfile +++ b/docker/ray-ml/Dockerfile @@ -3,44 +3,23 @@ FROM rayproject/ray:nightly"$BASE_IMAGE" # We have to uninstall wrapt this way for Tensorflow compatibility COPY requirements.txt ./ -COPY requirements_no_deps.txt ./ -COPY requirements_dl.txt ./ -COPY requirements_ml_docker.txt ./ -COPY requirements_rllib.txt ./ -COPY requirements_tune.txt ./ -COPY requirements_train.txt ./ -COPY requirements_upstream.txt ./ +COPY dl-cpu-requirements.txt ./ +COPY dl-gpu-requirements.txt ./ +COPY ray-docker-requirements.txt ./ +COPY core-requirements.txt ./ +COPY data-requirements.txt ./ +COPY rllib-requirements.txt ./ +COPY rllib-test-requirements.txt ./ +COPY tune-requirements.txt ./ +COPY tune-test-requirements.txt ./ +COPY train-requirements.txt ./ +COPY train-test-requirements.txt ./ -RUN sudo apt-get update \ - && sudo apt-get install -y gcc \ - cmake \ - libgtk2.0-dev \ - zlib1g-dev \ - libgl1-mesa-dev \ - unzip \ - unrar \ - && $HOME/anaconda3/bin/pip --no-cache-dir install -U pip pip-tools \ - # Install no-deps requirements. Their dependencies may be overwritten - # in subsequent steps - && $HOME/anaconda3/bin/pip --no-cache-dir install -U \ - -r requirements_no_deps.txt \ - # Then, install requirements - && $HOME/anaconda3/bin/pip --no-cache-dir install -U \ - -r requirements.txt \ - # Install other requirements. Keep pinned requirements bounds as constraints - && $HOME/anaconda3/bin/pip --no-cache-dir install -U \ - -c requirements.txt \ - -r requirements_rllib.txt \ - -r requirements_train.txt \ - -r requirements_tune.txt \ - -r requirements_upstream.txt \ - # explicitly install (overwrite) pytorch with CUDA support - && $HOME/anaconda3/bin/pip --no-cache-dir install -U \ - -c requirements.txt \ - -r requirements_ml_docker.txt \ - && sudo rm requirements*.txt \ - && sudo apt-get clean +COPY install-ml-docker-requirements.sh ./ +RUN sudo chmod +x install-ml-docker-requirements.sh && ./install-ml-docker-requirements.sh + +# Export installed packages RUN $HOME/anaconda3/bin/pip freeze > /home/ray/pip-freeze.txt # Make sure tfp is installed correctly and matches tf version. diff --git a/docker/ray-ml/install-ml-docker-requirements.sh b/docker/ray-ml/install-ml-docker-requirements.sh new file mode 100755 index 000000000000..6da9ae18d1fc --- /dev/null +++ b/docker/ray-ml/install-ml-docker-requirements.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# shellcheck disable=SC2139 +alias pip="$HOME/anaconda3/bin/pip" + +sudo apt-get update \ + && sudo apt-get install -y gcc \ + cmake \ + libgtk2.0-dev \ + zlib1g-dev \ + libgl1-mesa-dev \ + unzip \ + unrar + +pip --no-cache-dir install -U pip pip-tools + +# Install requirements +pip --no-cache-dir install -U -r requirements.txt + + +# Install other requirements. Keep pinned requirements bounds as constraints +pip --no-cache-dir install -U \ + -c requirements.txt \ + -r core-requirements.txt \ + -r data-requirements.txt \ + -r rllib-requirements.txt \ + -r rllib-test-requirements.txt \ + -r train-requirements.txt \ + -r train-test-requirements.txt \ + -r tune-requirements.txt \ + -r tune-test-requirements.txt \ + -r ray-docker-requirements.txt + +# explicitly install (overwrite) pytorch with CUDA support +pip --no-cache-dir install -U \ + -c requirements.txt \ + -r dl-gpu-requirements.txt + +sudo apt-get clean + +sudo rm requirements*.txt diff --git a/python/ray/train/huggingface/accelerate/accelerate_trainer.py b/python/ray/train/huggingface/accelerate/accelerate_trainer.py index 57a892b57d81..e51a63feedec 100644 --- a/python/ray/train/huggingface/accelerate/accelerate_trainer.py +++ b/python/ray/train/huggingface/accelerate/accelerate_trainer.py @@ -107,7 +107,6 @@ def train_loop_per_worker(): - Type of launcher This Trainer requires ``accelerate>=0.17.0`` package. - It is tested with ``accelerate==0.17.1``. Example: .. testcode:: diff --git a/python/requirements/docker/ray-docker-requirements.txt b/python/requirements/docker/ray-docker-requirements.txt new file mode 100644 index 000000000000..3023dd86d01e --- /dev/null +++ b/python/requirements/docker/ray-docker-requirements.txt @@ -0,0 +1,14 @@ +# Todo: Fix conflicts with pinned boto3/botocore +# awscli +gsutil + +# Requirements that are shipped in the ML docker image. +ipython + +# Needed for rich visualization for Ray Train and Ray Data. +# Todo: Pin to >=8 when myst-parser is upgraded +# ipywidgets>=8 +ipywidgets + +# Needed for Ray Client error message serialization/deserialization. +tblib diff --git a/python/requirements_linters.txt b/python/requirements/lint-requirements.txt similarity index 100% rename from python/requirements_linters.txt rename to python/requirements/lint-requirements.txt diff --git a/python/requirements/ml/core-requirements.txt b/python/requirements/ml/core-requirements.txt new file mode 100644 index 000000000000..c3f22cc47b62 --- /dev/null +++ b/python/requirements/ml/core-requirements.txt @@ -0,0 +1,24 @@ +# ML tracking integrations +comet-ml==3.31.9 +mlflow==1.30.0; python_version <= '3.7' +mlflow==2.4.1; python_version > '3.7' +wandb==0.13.4 + +# ML training frameworks +xgboost==1.6.2; python_version <= '3.7' +xgboost==1.7.6; python_version > '3.7' +xgboost_ray==0.1.15 + +lightgbm==3.3.5 +lightgbm_ray==0.1.8 + +# Huggingface +transformers==4.19.1 +accelerate==0.20.3 + +# DL libraries +-r dl-cpu-requirements.txt + +# Cloud storage tools +s3fs==2023.1.0; python_version < '3.8' +s3fs==2023.5.0; python_version >= '3.8' diff --git a/python/requirements/data_processing/requirements.txt b/python/requirements/ml/data-requirements.txt similarity index 82% rename from python/requirements/data_processing/requirements.txt rename to python/requirements/ml/data-requirements.txt index c0462a422032..8c8080a5dd44 100644 --- a/python/requirements/data_processing/requirements.txt +++ b/python/requirements/ml/data-requirements.txt @@ -7,11 +7,8 @@ aioboto3==11.0.1; python_version < '3.8' aioboto3==11.2.0; python_version >= '3.8' crc32c==2.3 flask_cors -s3fs==2023.1.0; python_version < '3.8' -s3fs==2023.5.0; python_version >= '3.8' modin==0.12.1; python_version < '3.8' modin==0.22.2; python_version >= '3.8' -pytest-repeat raydp>=0.0.dev0 responses==0.13.4 pymars>=0.8.3 diff --git a/python/requirements/data_processing/requirements_dataset.txt b/python/requirements/ml/data-test-requirements.txt similarity index 69% rename from python/requirements/data_processing/requirements_dataset.txt rename to python/requirements/ml/data-test-requirements.txt index 1b614c54bcb1..894f45e001ed 100644 --- a/python/requirements/data_processing/requirements_dataset.txt +++ b/python/requirements/ml/data-test-requirements.txt @@ -4,5 +4,5 @@ pickle5; python_version < '3.8' python-snappy tensorflow-datasets -datasets<=2.10.1; python_version ~= '3.6' -datasets; python_version >= '3.7' \ No newline at end of file +datasets +pytest-repeat diff --git a/python/requirements/ml/requirements_dl.txt b/python/requirements/ml/dl-cpu-requirements.txt similarity index 84% rename from python/requirements/ml/requirements_dl.txt rename to python/requirements/ml/dl-cpu-requirements.txt index b8097e3e06a0..a5efddea511d 100644 --- a/python/requirements/ml/requirements_dl.txt +++ b/python/requirements/ml/dl-cpu-requirements.txt @@ -1,17 +1,18 @@ # These requirements are used for the CI and CPU-only Docker images so we install CPU only versions of torch. -# For GPU Docker images, you should install requirements_ml_docker.txt afterwards. +# For GPU Docker images, you should install dl-gpu-requirements.txt afterwards. tensorflow==2.11.0; sys_platform != 'darwin' or platform_machine != 'arm64' tensorflow-macos==2.11.0; sys_platform == 'darwin' and platform_machine == 'arm64' tensorflow-probability==0.19.0 tensorflow-datasets -# If you make changes below this line, please also make the corresponding changes to `requirements_ml_docker.txt` +# If you make changes below this line, please also make the corresponding changes to `dl-gpu-requirements.txt` # and to `install-dependencies.sh`! --extra-index-url https://download.pytorch.org/whl/cpu # for CPU versions of torch, torchvision --find-links https://data.pyg.org/whl/torch-1.13.0+cpu.html # for CPU versions of torch-scatter, torch-sparse, torch-cluster, torch-spline-conv torch==1.13.0 +torchmetrics==0.9.3 torchtext==0.14.0 torchvision==0.14.0 torch-scatter==2.1.0 diff --git a/python/requirements/ml/requirements_ml_docker.txt b/python/requirements/ml/dl-gpu-requirements.txt similarity index 73% rename from python/requirements/ml/requirements_ml_docker.txt rename to python/requirements/ml/dl-gpu-requirements.txt index 3a027b9b4869..d989c2ac5bf8 100644 --- a/python/requirements/ml/requirements_ml_docker.txt +++ b/python/requirements/ml/dl-gpu-requirements.txt @@ -1,12 +1,4 @@ -ipython - -# Needed for rich visualization for Ray Train and Ray Data. -ipywidgets>=8 - -# Needed for Ray Client error message serialization/deserialization. -tblib - -# If you make changes below this line, please also make the corresponding changes to `requirements_dl.txt`! +# If you make changes below this line, please also make the corresponding changes to `dl-cpu-requirements.txt`! --extra-index-url https://download.pytorch.org/whl/cu116 # for GPU versions of torch, torchvision --find-links https://data.pyg.org/whl/torch-1.13.0+cu116.html # for GPU versions of torch-scatter, torch-sparse, torch-cluster, torch-spline-conv diff --git a/python/requirements/ml/requirements_no_deps.txt b/python/requirements/ml/requirements_no_deps.txt deleted file mode 100644 index 6d2754211cd2..000000000000 --- a/python/requirements/ml/requirements_no_deps.txt +++ /dev/null @@ -1,6 +0,0 @@ -# These requirements have outdated or incompatible downstream dependencies. -# Thus we install them on a best effort basis before any other packages -# (without constraints), but their dependencies may be overwritten afterwards. - -# mosaicml requires importlib-metadata>5, but flake8 is not compatible with it -mosaicml==0.12.1 diff --git a/python/requirements/ml/requirements_train.txt b/python/requirements/ml/requirements_train.txt deleted file mode 100644 index 9ced832dc787..000000000000 --- a/python/requirements/ml/requirements_train.txt +++ /dev/null @@ -1,26 +0,0 @@ -# TODO(train-team): Remove this once Ray Train is deprecated. - --r requirements_dl.txt - -mlflow==1.30.0; python_version <= '3.7' -mlflow==2.4.1; python_version > '3.7' -tensorboardX==2.6.0 - -# Dependencies for Hugging Face examples & tests: -# `python/ray/train/examples/transformers/transformers_example.py` -# `python/ray/air/examples/huggingface/huggingface_basic_language_modeling_example.py` -# `python/ray/train/tests/test_accelerate_trainer_gpu.py` -# `python/ray/air/tests/test_huggingface_trainer.py` - -# We need transformers>=4.19.0 for HuggingFaceTrainer to work (not available on py 3.6) -# If changing the version here, also change it in HuggingFaceTrainer docstring -transformers==4.18.0; python_version <= '3.6' -transformers==4.19.1; python_version > '3.6' -# We need accelerate>=0.17.0 for AccelerateTrainer to work (not available on py 3.6) -# If changing the version here, also change it in AccelerateTrainer docstring -accelerate==0.5.1; python_version <= '3.6' -accelerate==0.20.3; python_version > '3.6' -# Tracking issue: https://github.com/ray-project/ray/issues/34399 -deepspeed==0.8.3; python_version > '3.6' -datasets==2.0.0 -sentencepiece==0.1.96 diff --git a/python/requirements/ml/requirements_tune.txt b/python/requirements/ml/requirements_tune.txt deleted file mode 100644 index a4be1c4de2ac..000000000000 --- a/python/requirements/ml/requirements_tune.txt +++ /dev/null @@ -1,48 +0,0 @@ --r requirements_dl.txt - -aim==3.16.1 -ax-platform[mysql]==0.2.6; python_version < '3.8' -ax-platform[mysql]==0.3.2; python_version >= '3.8' -bayesian-optimization==1.4.3 -comet-ml==3.31.9 -ConfigSpace==0.7.1 -dragonfly-opt==0.1.7 -flaml==1.1.1 -freezegun==1.1.0 -# Requires decord which is unavailable for arm64 -gluoncv==0.10.5.post0; platform_machine != "arm64" -gpy==1.10.0 - -# Requires libtorrent which is unavailable for arm64 -autorom[accept-rom-license]; platform_machine != "arm64" -h5py==3.7.0 -hpbandster==0.7.4 -HEBO==0.3.2 -hyperopt==0.2.7 -jupyterlab==3.6.1 -lightgbm==3.3.5 -matplotlib!=3.4.3 -mlflow==1.30.0; python_version <= '3.7' -mlflow==2.4.1; python_version > '3.7' -# Unavailable for arm64 in more recent versions -mxnet==1.8.0.post0; platform_machine != "arm64" -nevergrad==0.4.3.post7 -optuna==3.2.0 -# For HEBO compatibility -pymoo==0.5.0 -pytest-remotedata==0.3.2 -lightning-bolts==0.4.0 -pytorch-lightning==1.6.5 -fairscale==0.4.6 -s3fs==2023.1.0; python_version < '3.8' -s3fs==2023.5.0; python_version >= '3.8' -shortuuid==1.0.1 -scikit-optimize==0.9.0 -sigopt==7.5.0 -timm==0.9.2 -transformers==4.18.0; python_version <= '3.6' -transformers==4.19.1; python_version > '3.6' -wandb==0.13.4 -xgboost==1.6.2; python_version <= '3.7' -xgboost==1.7.6; python_version > '3.7' -zoopt==0.4.1 diff --git a/python/requirements/ml/requirements_upstream.txt b/python/requirements/ml/requirements_upstream.txt deleted file mode 100644 index 5db407ce322f..000000000000 --- a/python/requirements/ml/requirements_upstream.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Upstream dependencies that depend on Ray. -# Because they depend on Ray, we can't pin the subdependencies. -# So we separate its own requirements file. - -tune-sklearn==0.4.4 -xgboost_ray==0.1.15 -lightgbm_ray==0.1.8 -modin==0.12.1; python_version < '3.8' -modin==0.22.2; python_version >= '3.8' diff --git a/python/requirements/ml/rllib-requirements.txt b/python/requirements/ml/rllib-requirements.txt new file mode 100644 index 000000000000..07c0480ddacd --- /dev/null +++ b/python/requirements/ml/rllib-requirements.txt @@ -0,0 +1,15 @@ +# For MAML on PyTorch. +higher==0.2.1 +# For auto-generating an env-rendering Window. +pyglet==1.5.15 +imageio-ffmpeg==0.4.5 +# ONNX +# ONNX 1.13.0 depends on protobuf > 3.20, conflicting with tensorflow. +# ONNX 1.12.0 is not published for mac arm64, so we exclude it for now. +onnx==1.12.0; sys_platform != 'darwin' or platform_machine != 'arm64' +onnxruntime==1.14.1; sys_platform != 'darwin' or platform_machine != 'arm64' +tf2onnx==1.13.0; sys_platform != 'darwin' or platform_machine != 'arm64' +rich==12.0.1 +# Msgpack checkpoint stuff. +msgpack +msgpack_numpy \ No newline at end of file diff --git a/python/requirements/ml/requirements_rllib.txt b/python/requirements/ml/rllib-test-requirements.txt similarity index 60% rename from python/requirements/ml/requirements_rllib.txt rename to python/requirements/ml/rllib-test-requirements.txt index 5759d04eb2b8..a5f2693039ad 100644 --- a/python/requirements/ml/requirements_rllib.txt +++ b/python/requirements/ml/rllib-test-requirements.txt @@ -1,4 +1,4 @@ --r requirements_dl.txt +-r dl-cpu-requirements.txt # Environment adapters. # --------------------- @@ -11,7 +11,7 @@ mujoco-py<2.2,>=2.1 # Kaggle envs. kaggle_environments==1.7.11 # Unity3D testing -# TODO(sven): Add this back to requirements_rllib.txt once mlagents no longer pins torch<1.9.0 version. +# TODO(sven): Add this back to rllib-requirements.txt once mlagents no longer pins torch<1.9.0 version. #mlagents==0.28.0 mlagents_envs==0.28.0 # For tests on PettingZoo's multi-agent envs. @@ -30,20 +30,6 @@ tensorflow_estimator==2.11.0 # DeepMind's OpenSpiel open-spiel==1.2 -# Other. -# ------ -# For MAML on PyTorch. -higher==0.2.1 -# For auto-generating an env-rendering Window. -pyglet==1.5.15 -imageio-ffmpeg==0.4.5 -# ONNX -# ONNX 1.13.0 depends on protobuf > 3.20, conflicting with tensorflow. -# ONNX 1.12.0 is not published for mac arm64, so we exclude it for now. -onnx==1.12.0; sys_platform != 'darwin' or platform_machine != 'arm64' -onnxruntime==1.14.1; sys_platform != 'darwin' or platform_machine != 'arm64' -tf2onnx==1.13.0; sys_platform != 'darwin' or platform_machine != 'arm64' -rich==12.0.1 -# Msgpack checkpoint stuff. -msgpack -msgpack_numpy +# Requires libtorrent which is unavailable for arm64 +autorom[accept-rom-license]; platform_machine != "arm64" +h5py==3.7.0 diff --git a/python/requirements/ml/train-requirements.txt b/python/requirements/ml/train-requirements.txt new file mode 100644 index 000000000000..e1c37446f6d0 --- /dev/null +++ b/python/requirements/ml/train-requirements.txt @@ -0,0 +1,2 @@ +deepspeed==0.8.3 +datasets==2.0.0 diff --git a/python/requirements/ml/train-test-requirements.txt b/python/requirements/ml/train-test-requirements.txt new file mode 100644 index 000000000000..35203bac382a --- /dev/null +++ b/python/requirements/ml/train-test-requirements.txt @@ -0,0 +1,2 @@ +evaluate==0.4.0 +sentencepiece==0.1.96 diff --git a/python/requirements/ml/tune-requirements.txt b/python/requirements/ml/tune-requirements.txt new file mode 100644 index 000000000000..fed5be460384 --- /dev/null +++ b/python/requirements/ml/tune-requirements.txt @@ -0,0 +1,20 @@ +# Searchers +ax-platform[mysql]==0.2.6; python_version < '3.8' +ax-platform[mysql]==0.3.2; python_version >= '3.8' + +bayesian-optimization==1.4.3 + +# BOHB +ConfigSpace==0.7.1 +hpbandster==0.7.4 + +dragonfly-opt==0.1.7 +flaml==1.1.1 +# Todo: Use version pin again after HEBO is updated on pypi +git+https://github.com/huawei-noah/HEBO@9a2a674c22518eed35a8b98e5134576741a95410#subdirectory=HEBO +# HEBO==0.3.4 +hyperopt==0.2.7 +nevergrad==0.4.3.post7 +optuna==3.2.0 + +tune-sklearn==0.4.4 diff --git a/python/requirements/ml/tune-test-requirements.txt b/python/requirements/ml/tune-test-requirements.txt new file mode 100644 index 000000000000..49cab3fc3cb0 --- /dev/null +++ b/python/requirements/ml/tune-test-requirements.txt @@ -0,0 +1,22 @@ +-r dl-cpu-requirements.txt + +aim==3.17.5 + +# Requires decord which is unavailable for arm64 +gluoncv==0.10.5.post0; platform_machine != "arm64" +gpy==1.10.0 + +jupyterlab==3.6.1 +matplotlib!=3.4.3 + +mxnet==1.9.1; sys_platform != "darwin" +pytest-remotedata==0.3.2 +lightning-bolts==0.4.0 +pytorch-lightning==1.6.5 +fairscale==0.4.6 +shortuuid==1.0.1 +scikit-optimize==0.9.0 +timm==0.9.2 + +sigopt==7.5.0 +zoopt==0.4.1 diff --git a/python/requirements_test.txt b/python/requirements/test-requirements.txt similarity index 99% rename from python/requirements_test.txt rename to python/requirements/test-requirements.txt index 3f7b27e49fad..79484a15d7b7 100644 --- a/python/requirements_test.txt +++ b/python/requirements/test-requirements.txt @@ -27,6 +27,7 @@ fastapi==0.75.0 feather-format==0.4.1 # Keep compatible with Werkzeug flask==2.1.3 +freezegun==1.1.0 google-api-python-client==2.65.0 google-cloud-storage==2.5.0 gradio==3.11; platform_system != "Windows" diff --git a/release/long_running_tests/app_config.yaml b/release/long_running_tests/app_config.yaml index 5d13555ff713..04d4d19fb227 100755 --- a/release/long_running_tests/app_config.yaml +++ b/release/long_running_tests/app_config.yaml @@ -7,7 +7,7 @@ debian_packages: python: pip_packages: - # These dependencies should be handled by requirements_rllib.txt and requirements_ml_docker.txt and removed here + # These dependencies should be handled by rllib-requirements.txt and removed here - gym>=0.21.0,<0.24.1 - ale-py==0.7.5 - pytest diff --git a/release/ml_user_tests/horovod/driver_requirements.txt b/release/ml_user_tests/horovod/driver_requirements.txt index 867abb6f6bd4..6c8cb758d565 100755 --- a/release/ml_user_tests/horovod/driver_requirements.txt +++ b/release/ml_user_tests/horovod/driver_requirements.txt @@ -2,7 +2,7 @@ # The cluster uses ray-ml Docker image. # ray-ml Docker image installs dependencies from ray/python/requirements/ml/ directory. # We constrain on these requirements file so that the same versions are installed. --c ../../../python/requirements/ml/requirements_dl.txt +-c ../../../python/requirements/ml/dl-cpu-requirements.txt torch torchvision \ No newline at end of file diff --git a/release/ml_user_tests/train/driver_requirements.txt b/release/ml_user_tests/train/driver_requirements.txt index eb46d707860e..df4c7e35f75e 100755 --- a/release/ml_user_tests/train/driver_requirements.txt +++ b/release/ml_user_tests/train/driver_requirements.txt @@ -2,7 +2,7 @@ # The cluster uses ray-ml Docker image. # ray-ml Docker image installs dependencies from ray/python/requirements/ml/ directory. # We constrain on these requirements file so that the same versions are installed. --c ../../../python/requirements/ml/requirements_dl.txt +-c ../../../python/requirements/ml/dl-cpu-requirements.txt torch tensorflow \ No newline at end of file diff --git a/release/ml_user_tests/tune_rllib/driver_requirements.txt b/release/ml_user_tests/tune_rllib/driver_requirements.txt index 2ec0944aef35..56a9ab752ce2 100755 --- a/release/ml_user_tests/tune_rllib/driver_requirements.txt +++ b/release/ml_user_tests/tune_rllib/driver_requirements.txt @@ -2,7 +2,7 @@ # The cluster uses ray-ml Docker image. # ray-ml Docker image installs dependencies from ray/python/requirements/ml/ directory. # We constrain on these requirements file so that the same versions are installed. --c ../../../python/requirements/ml/requirements_dl.txt +-c ../../../python/requirements/ml/dl-cpu-requirements.txt tensorflow torch diff --git a/release/rllib_tests/app_config.yaml b/release/rllib_tests/app_config.yaml index fe802591f9b0..dc765eab31c3 100755 --- a/release/rllib_tests/app_config.yaml +++ b/release/rllib_tests/app_config.yaml @@ -13,8 +13,8 @@ debian_packages: python: pip_packages: - ## These dependencies should be handled by requirements_rllib.txt and - ## requirements_ml_docker.txt and removed here + ## These dependencies should be handled by rllib-requirements.txt + ## and removed here - gymnasium[atari,mujoco]==0.26.3 - ale-py==0.8.0 - gym==0.26.2 diff --git a/release/rllib_tests/debug_app_config.yaml b/release/rllib_tests/debug_app_config.yaml index c51fbcc4d39b..7b07f4c2eea7 100755 --- a/release/rllib_tests/debug_app_config.yaml +++ b/release/rllib_tests/debug_app_config.yaml @@ -13,8 +13,8 @@ debian_packages: python: pip_packages: - ## These dependencies should be handled by requirements_rllib.txt and - ## requirements_ml_docker.txt and removed here + ## These dependencies should be handled by rllib-requirements.txt + ## and removed here - gymnasium[atari,mujoco]==0.26.3 - ale-py==0.8.0 - gym==0.26.2