From ddcbd87f42bcdc2dc70d9c9503047f1af54594d3 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Sat, 8 Jul 2023 10:38:20 -0700 Subject: [PATCH] [Data][Train] Fix remaining issues on DatasetConfig->DataConfig migration (#37215) - Change all examples to use DataConfig. - Update function signature of all Trainer classes. - Add a link in deprecation warning. --------- Signed-off-by: Hao Chen --- .../ray-air/doc_code/computer_vision.py | 14 ++++---- .../ray-air/examples/torch_detection.ipynb | 36 +++++++++++++------ .../datasets_train/datasets_train.py | 9 +++-- python/ray/air/config.py | 3 +- python/ray/train/horovod/horovod_trainer.py | 5 +-- .../accelerate/accelerate_trainer.py | 5 +-- .../transformers/transformers_trainer.py | 5 +-- .../ray/train/lightning/lightning_trainer.py | 5 +-- python/ray/train/mosaic/mosaic_trainer.py | 5 +-- .../train/tensorflow/tensorflow_trainer.py | 5 +-- python/ray/train/torch/torch_trainer.py | 5 +-- .../workloads/data_benchmark.py | 3 +- 12 files changed, 63 insertions(+), 37 deletions(-) diff --git a/doc/source/ray-air/doc_code/computer_vision.py b/doc/source/ray-air/doc_code/computer_vision.py index e2c2905786f3..e9e21fc83139 100644 --- a/doc/source/ray-air/doc_code/computer_vision.py +++ b/doc/source/ray-air/doc_code/computer_vision.py @@ -187,7 +187,7 @@ def train_torch_model(dataset, preprocessor, per_epoch_preprocessor): from ray import train from ray.air import session - from ray.air.config import DatasetConfig, ScalingConfig + from ray.air.config import ScalingConfig from ray.train.torch import TorchCheckpoint, TorchTrainer def train_one_epoch(model, *, criterion, optimizer, batch_size, epoch): @@ -237,13 +237,11 @@ def train_loop_per_worker(config): # __torch_training_loop_stop__ # __torch_trainer_start__ + dataset = per_epoch_preprocessor.transform(dataset) trainer = TorchTrainer( train_loop_per_worker=train_loop_per_worker, train_loop_config={"batch_size": 32, "lr": 0.02, "epochs": 1}, datasets={"train": dataset}, - dataset_config={ - "train": DatasetConfig(per_epoch_preprocessor=per_epoch_preprocessor) - }, scaling_config=ScalingConfig(num_workers=2), preprocessor=preprocessor, ) @@ -288,16 +286,16 @@ def train_loop_per_worker(config): # __tensorflow_training_loop_stop__ # __tensorflow_trainer_start__ - from ray.air import DatasetConfig, ScalingConfig + from ray.air import ScalingConfig from ray.train.tensorflow import TensorflowTrainer + # The following transform operation is lazy. + # It will be re-run every epoch. + dataset = per_epoch_preprocessor.transform(dataset) trainer = TensorflowTrainer( train_loop_per_worker=train_loop_per_worker, train_loop_config={"batch_size": 32, "lr": 0.02, "epochs": 1}, datasets={"train": dataset}, - dataset_config={ - "train": DatasetConfig(per_epoch_preprocessor=per_epoch_preprocessor) - }, scaling_config=ScalingConfig(num_workers=2), preprocessor=preprocessor, ) diff --git a/doc/source/ray-air/examples/torch_detection.ipynb b/doc/source/ray-air/examples/torch_detection.ipynb index 5095caa7b77b..8ba360e53dde 100644 --- a/doc/source/ray-air/examples/torch_detection.ipynb +++ b/doc/source/ray-air/examples/torch_detection.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "da5b9b7e", "metadata": {}, @@ -23,6 +24,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "e9a6d043", "metadata": {}, @@ -45,6 +47,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "9b3d4302", "metadata": {}, @@ -67,6 +70,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "65bf13b8", "metadata": {}, @@ -91,6 +95,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "5567a6d6", "metadata": {}, @@ -112,6 +117,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "f821e93d", "metadata": {}, @@ -153,6 +159,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "b8ab2cf1", "metadata": {}, @@ -210,6 +217,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "686f0885", "metadata": {}, @@ -293,6 +301,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "10d6ed44", "metadata": {}, @@ -332,6 +341,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "db3d0ee6", "metadata": {}, @@ -367,6 +377,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "5ff0097f", "metadata": {}, @@ -375,6 +386,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "87846ae1", "metadata": {}, @@ -438,6 +450,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "e7cdc755", "metadata": {}, @@ -446,6 +459,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "9cfddd49", "metadata": {}, @@ -464,6 +478,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6b68209a", "metadata": {}, @@ -472,6 +487,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "9dbea4b4", "metadata": {}, @@ -503,6 +519,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1c647be8", "metadata": {}, @@ -616,6 +633,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "0d68c97c", "metadata": {}, @@ -624,6 +642,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "eef58891", "metadata": {}, @@ -806,9 +825,12 @@ } ], "source": [ - "from ray.air.config import DatasetConfig, ScalingConfig\n", + "from ray.air.config import ScalingConfig\n", "from ray.train.torch import TorchTrainer\n", "\n", + "# The following transform operation is lazy.\n", + "# It will be re-run every epoch.\n", + "train_dataset = per_epoch_preprocessor.transform(train_dataset)\n", "\n", "trainer = TorchTrainer(\n", " train_loop_per_worker=train_loop_per_worker,\n", @@ -823,19 +845,13 @@ " },\n", " scaling_config=ScalingConfig(num_workers=4, use_gpu=True),\n", " datasets={\"train\": train_dataset},\n", - " dataset_config={\n", - " # Don't augment test images. Only apply `per_epoch_preprocessor` to the train\n", - " # set.\n", - " \"train\": DatasetConfig(\n", - " per_epoch_preprocessor=per_epoch_preprocessor\n", - " ),\n", - " },\n", " preprocessor=preprocessor,\n", ")\n", "results = trainer.fit()" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "838101c2", "metadata": {}, @@ -854,8 +870,8 @@ }, "language_info": { "name": "python", - "version": "3.10.9", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.10.9" }, "vscode": { "interpreter": { diff --git a/doc/source/ray-core/_examples/datasets_train/datasets_train.py b/doc/source/ray-core/_examples/datasets_train/datasets_train.py index eabad34c03ed..f966ac48b9c2 100644 --- a/doc/source/ray-core/_examples/datasets_train/datasets_train.py +++ b/doc/source/ray-core/_examples/datasets_train/datasets_train.py @@ -18,7 +18,8 @@ import boto3 import mlflow import pandas as pd -from ray.air.config import DatasetConfig, ScalingConfig +from ray.air.config import ScalingConfig +from ray.train.data_config import DataConfig from ray.train.torch.torch_trainer import TorchTrainer import torch import torch.nn as nn @@ -601,6 +602,10 @@ def to_torch_dataset(torch_batch_iterator): DROPOUT_EVERY = 5 DROPOUT_PROB = 0.2 + # The following random_shuffle operations are lazy. + # They will be re-run every epoch. + train_dataset = train_dataset.random_shuffle() + test_dataset = test_dataset.random_shuffle() datasets = {"train": train_dataset, "test": test_dataset} config = { @@ -633,7 +638,7 @@ def to_torch_dataset(torch_batch_iterator): resources_per_worker=resources_per_worker, ), run_config=RunConfig(callbacks=callbacks), - dataset_config={"train": DatasetConfig(global_shuffle=True)}, + dataset_config=DataConfig(datasets_to_split=["train", "test"]), ) results = trainer.fit() state_dict = results.checkpoint.to_dict()["model"] diff --git a/python/ray/air/config.py b/python/ray/air/config.py index 4b2e08ba0a82..4852ed8b408d 100644 --- a/python/ray/air/config.py +++ b/python/ray/air/config.py @@ -296,7 +296,8 @@ def from_placement_group_factory( @dataclass @Deprecated( message="Use `ray.train.DataConfig` instead of DatasetConfig to " - "configure data ingest for training." + "configure data ingest for training. " + "See https://docs.ray.io/en/master/ray-air/check-ingest.html for more details." ) class DatasetConfig: """Configuration for ingest of a single Dataset. diff --git a/python/ray/train/horovod/horovod_trainer.py b/python/ray/train/horovod/horovod_trainer.py index 4cbe65173794..cd5b1734a9dd 100644 --- a/python/ray/train/horovod/horovod_trainer.py +++ b/python/ray/train/horovod/horovod_trainer.py @@ -1,6 +1,7 @@ from typing import Dict, Callable, Optional, Union, TYPE_CHECKING -from ray.air.config import ScalingConfig, RunConfig, DatasetConfig +from ray.air.config import ScalingConfig, RunConfig +from ray.train.data_config import DataConfig from ray.train.trainer import GenDataset from ray.air.checkpoint import Checkpoint @@ -181,7 +182,7 @@ def __init__( train_loop_config: Optional[Dict] = None, horovod_config: Optional[HorovodConfig] = None, scaling_config: Optional[ScalingConfig] = None, - dataset_config: Optional[Dict[str, DatasetConfig]] = None, + dataset_config: Optional[DataConfig] = None, run_config: Optional[RunConfig] = None, datasets: Optional[Dict[str, GenDataset]] = None, preprocessor: Optional["Preprocessor"] = None, diff --git a/python/ray/train/huggingface/accelerate/accelerate_trainer.py b/python/ray/train/huggingface/accelerate/accelerate_trainer.py index e51a63feedec..76e8fd1de470 100644 --- a/python/ray/train/huggingface/accelerate/accelerate_trainer.py +++ b/python/ray/train/huggingface/accelerate/accelerate_trainer.py @@ -6,7 +6,8 @@ from ray.air import session from ray.air.checkpoint import Checkpoint -from ray.air.config import DatasetConfig, RunConfig, ScalingConfig +from ray.air.config import RunConfig, ScalingConfig +from ray.train.data_config import DataConfig from ray.train.torch import TorchConfig from ray.train.trainer import GenDataset @@ -263,7 +264,7 @@ def __init__( accelerate_config: Optional[Union[dict, str, Path, os.PathLike]] = None, torch_config: Optional[TorchConfig] = None, scaling_config: Optional[ScalingConfig] = None, - dataset_config: Optional[Dict[str, DatasetConfig]] = None, + dataset_config: Optional[DataConfig] = None, run_config: Optional[RunConfig] = None, datasets: Optional[Dict[str, GenDataset]] = None, preprocessor: Optional["Preprocessor"] = None, diff --git a/python/ray/train/huggingface/transformers/transformers_trainer.py b/python/ray/train/huggingface/transformers/transformers_trainer.py index dae3dd600bf0..696d9ada219d 100644 --- a/python/ray/train/huggingface/transformers/transformers_trainer.py +++ b/python/ray/train/huggingface/transformers/transformers_trainer.py @@ -10,11 +10,12 @@ from ray.air import session from ray.air.checkpoint import Checkpoint -from ray.air.config import DatasetConfig, RunConfig, ScalingConfig +from ray.air.config import RunConfig, ScalingConfig from ray.train.constants import ( EVALUATION_DATASET_KEY, TRAIN_DATASET_KEY, ) +from ray.train.data_config import DataConfig from ray.train.data_parallel_trainer import DataParallelTrainer from ray.train.torch import TorchConfig, TorchTrainer from ray.train.trainer import GenDataset @@ -254,7 +255,7 @@ def __init__( trainer_init_config: Optional[Dict] = None, torch_config: Optional[TorchConfig] = None, scaling_config: Optional[ScalingConfig] = None, - dataset_config: Optional[Dict[str, DatasetConfig]] = None, + dataset_config: Optional[DataConfig] = None, run_config: Optional[RunConfig] = None, datasets: Optional[Dict[str, GenDataset]] = None, preprocessor: Optional["Preprocessor"] = None, diff --git a/python/ray/train/lightning/lightning_trainer.py b/python/ray/train/lightning/lightning_trainer.py index 54a0587f153c..f986c4518a79 100644 --- a/python/ray/train/lightning/lightning_trainer.py +++ b/python/ray/train/lightning/lightning_trainer.py @@ -7,10 +7,11 @@ from pytorch_lightning.plugins.environments import ClusterEnvironment from ray.air import session -from ray.air.config import CheckpointConfig, DatasetConfig, RunConfig, ScalingConfig +from ray.air.config import CheckpointConfig, RunConfig, ScalingConfig from ray.air.constants import MODEL_KEY from ray.air.checkpoint import Checkpoint from ray.data.preprocessor import Preprocessor +from ray.train.data_config import DataConfig from ray.train.trainer import GenDataset from ray.train.torch import TorchTrainer from ray.train.torch.config import TorchConfig @@ -395,7 +396,7 @@ def __init__( *, torch_config: Optional[TorchConfig] = None, scaling_config: Optional[ScalingConfig] = None, - dataset_config: Optional[Dict[str, DatasetConfig]] = None, + dataset_config: Optional[DataConfig] = None, run_config: Optional[RunConfig] = None, datasets: Optional[Dict[str, GenDataset]] = None, datasets_iter_config: Optional[Dict[str, Any]] = None, diff --git a/python/ray/train/mosaic/mosaic_trainer.py b/python/ray/train/mosaic/mosaic_trainer.py index d9c0ce5a6eb2..4b0b5f9a4158 100644 --- a/python/ray/train/mosaic/mosaic_trainer.py +++ b/python/ray/train/mosaic/mosaic_trainer.py @@ -6,7 +6,8 @@ from composer.loggers.logger_destination import LoggerDestination from ray.air.checkpoint import Checkpoint -from ray.air.config import DatasetConfig, RunConfig, ScalingConfig +from ray.air.config import RunConfig, ScalingConfig +from ray.train.data_config import DataConfig from ray.train.mosaic._mosaic_utils import RayLogger from ray.train.torch import TorchConfig, TorchTrainer from ray.train.trainer import GenDataset @@ -139,7 +140,7 @@ def __init__( trainer_init_config: Optional[Dict] = None, torch_config: Optional[TorchConfig] = None, scaling_config: Optional[ScalingConfig] = None, - dataset_config: Optional[Dict[str, DatasetConfig]] = None, + dataset_config: Optional[DataConfig] = None, run_config: Optional[RunConfig] = None, preprocessor: Optional["Preprocessor"] = None, resume_from_checkpoint: Optional[Checkpoint] = None, diff --git a/python/ray/train/tensorflow/tensorflow_trainer.py b/python/ray/train/tensorflow/tensorflow_trainer.py index 8819b3f5a358..f204af9f2c80 100644 --- a/python/ray/train/tensorflow/tensorflow_trainer.py +++ b/python/ray/train/tensorflow/tensorflow_trainer.py @@ -1,9 +1,10 @@ from typing import Callable, Optional, Dict, Union, TYPE_CHECKING +from ray.train.data_config import DataConfig from ray.train.tensorflow.config import TensorflowConfig from ray.train.trainer import GenDataset from ray.train.data_parallel_trainer import DataParallelTrainer -from ray.air.config import ScalingConfig, RunConfig, DatasetConfig +from ray.air.config import ScalingConfig, RunConfig from ray.air.checkpoint import Checkpoint from ray.util import PublicAPI @@ -168,7 +169,7 @@ def __init__( train_loop_config: Optional[Dict] = None, tensorflow_config: Optional[TensorflowConfig] = None, scaling_config: Optional[ScalingConfig] = None, - dataset_config: Optional[Dict[str, DatasetConfig]] = None, + dataset_config: Optional[DataConfig] = None, run_config: Optional[RunConfig] = None, datasets: Optional[Dict[str, GenDataset]] = None, preprocessor: Optional["Preprocessor"] = None, diff --git a/python/ray/train/torch/torch_trainer.py b/python/ray/train/torch/torch_trainer.py index 147aff685cf2..41d908eb2d38 100644 --- a/python/ray/train/torch/torch_trainer.py +++ b/python/ray/train/torch/torch_trainer.py @@ -1,7 +1,8 @@ from typing import TYPE_CHECKING, Callable, Dict, Optional, Union from ray.air.checkpoint import Checkpoint -from ray.air.config import DatasetConfig, RunConfig, ScalingConfig +from ray.air.config import RunConfig, ScalingConfig +from ray.train.data_config import DataConfig from ray.train.data_parallel_trainer import DataParallelTrainer from ray.train.torch.config import TorchConfig from ray.train.trainer import GenDataset @@ -263,7 +264,7 @@ def __init__( train_loop_config: Optional[Dict] = None, torch_config: Optional[TorchConfig] = None, scaling_config: Optional[ScalingConfig] = None, - dataset_config: Optional[Dict[str, DatasetConfig]] = None, + dataset_config: Optional[DataConfig] = None, run_config: Optional[RunConfig] = None, datasets: Optional[Dict[str, GenDataset]] = None, preprocessor: Optional["Preprocessor"] = None, diff --git a/release/air_tests/air_benchmarks/workloads/data_benchmark.py b/release/air_tests/air_benchmarks/workloads/data_benchmark.py index dc34435cc648..d9cae5dd0de6 100644 --- a/release/air_tests/air_benchmarks/workloads/data_benchmark.py +++ b/release/air_tests/air_benchmarks/workloads/data_benchmark.py @@ -4,7 +4,7 @@ import time import ray -from ray.air.config import DatasetConfig, ScalingConfig +from ray.air.config import ScalingConfig from ray.air.util.check_ingest import DummyTrainer from ray.data.preprocessors import BatchMapper @@ -35,7 +35,6 @@ def run_ingest_bulk(dataset, num_workers, num_cpus_per_worker): preprocessor=dummy_prep, num_epochs=1, prefetch_batches=1, - dataset_config={"train": DatasetConfig(split=True)}, ) trainer.fit()