From 950fc333d1eaf35ef31e9b98059f989c605662af Mon Sep 17 00:00:00 2001 From: Justin Yu Date: Mon, 27 Mar 2023 22:50:44 -0700 Subject: [PATCH] [Tune] Don't recommend `tune.run` API in logging messages when using the `Tuner` (#33642) Signed-off-by: Justin Yu --- python/ray/tune/execution/trial_runner.py | 9 +-- python/ray/tune/impl/tuner_internal.py | 1 + python/ray/tune/tune.py | 80 ++++++++++++++--------- 3 files changed, 55 insertions(+), 35 deletions(-) diff --git a/python/ray/tune/execution/trial_runner.py b/python/ray/tune/execution/trial_runner.py index a24d09f778186..9852e6c651a13 100644 --- a/python/ray/tune/execution/trial_runner.py +++ b/python/ray/tune/execution/trial_runner.py @@ -4,6 +4,7 @@ import json import logging import os +from pathlib import Path import time import traceback import warnings @@ -378,14 +379,14 @@ def restore_from_dir(self, experiment_dir: Optional[str] = None) -> List[Trial]: ) # Set checkpoint file to load - logger.info( - f"Using following experiment state file to resume: " f"{newest_state_path}" - ) - logger.warning( f"Attempting to resume experiment from {self._local_experiment_path}. " "This will ignore any new changes to the specification." ) + logger.info( + "Using the newest experiment state file found within the " + f"experiment directory: {Path(newest_state_path).name}" + ) # Actually load data with open(newest_state_path, "r") as f: diff --git a/python/ray/tune/impl/tuner_internal.py b/python/ray/tune/impl/tuner_internal.py index 5d9d99a1224cd..caea955f7b634 100644 --- a/python/ray/tune/impl/tuner_internal.py +++ b/python/ray/tune/impl/tuner_internal.py @@ -595,6 +595,7 @@ def _get_tune_run_arguments(self, trainable: TrainableType) -> Dict[str, Any]: trial_name_creator=self._tune_config.trial_name_creator, trial_dirname_creator=self._tune_config.trial_dirname_creator, chdir_to_trial_dir=self._tune_config.chdir_to_trial_dir, + _tuner_api=True, ) def _fit_internal( diff --git a/python/ray/tune/tune.py b/python/ray/tune/tune.py index 1faa40d04d2a5..cddde5e28d1e5 100644 --- a/python/ray/tune/tune.py +++ b/python/ray/tune/tune.py @@ -4,6 +4,7 @@ import datetime import logging import os +from pathlib import Path import signal import sys import threading @@ -208,6 +209,19 @@ def signal_interrupt_tune_run(sig: int, frame): return experiment_interrupted_event +def _ray_auto_init(entrypoint: str): + """Initialize Ray unless already configured.""" + if os.environ.get("TUNE_DISABLE_AUTO_INIT") == "1": + logger.info("'TUNE_DISABLE_AUTO_INIT=1' detected.") + elif not ray.is_initialized(): + ray.init() + logger.info( + "Initializing Ray automatically. " + "For cluster usage or custom Ray initialization, " + f"call `ray.init(...)` before `{entrypoint}`." + ) + + class _Config(abc.ABC): def to_dict(self) -> dict: """Converts this configuration to a dict format.""" @@ -259,6 +273,7 @@ def run( _remote: Optional[bool] = None, # Passed by the Tuner. _remote_string_queue: Optional[Queue] = None, + _tuner_api: bool = False, ) -> ExperimentAnalysis: """Executes training. @@ -466,6 +481,21 @@ class and registered trainables. remote_run_kwargs = locals().copy() remote_run_kwargs.pop("_remote") + error_message_map = ( + { + "entrypoint": "Tuner(...)", + "search_space_arg": "param_space", + "restore_entrypoint": 'Tuner.restore(path="{path}", trainable=...)', + } + if _tuner_api + else { + "entrypoint": "tune.run(...)", + "search_space_arg": "config", + "restore_entrypoint": "tune.run(..., resume=True)", + } + ) + _ray_auto_init(entrypoint=error_message_map["entrypoint"]) + if _remote is None: _remote = ray.util.client.ray.is_connected() @@ -478,9 +508,6 @@ class and registered trainables. DeprecationWarning, ) - if not trial_executor or isinstance(trial_executor, RayTrialExecutor): - _ray_auto_init() - if _remote: if get_air_verbosity(): logger.warning( @@ -517,8 +544,8 @@ class and registered trainables. if mode and mode not in ["min", "max"]: raise ValueError( - "The `mode` parameter passed to `tune.run()` has to be one of " - "['min', 'max']" + f"The `mode` parameter passed to `{error_message_map['entrypoint']}` " + "must be one of ['min', 'max']" ) air_verbosity = get_air_verbosity() @@ -536,7 +563,8 @@ class and registered trainables. config = config.to_dict() if not isinstance(config, dict): raise ValueError( - "The `config` passed to `tune.run()` must be a dict. " + f"The `{error_message_map['search_space_arg']}` passed to " + f"`{error_message_map['entrypoint']}` must be a dict. " f"Got '{type(config)}' instead." ) @@ -669,8 +697,6 @@ class and registered trainables. max_failures=max_failures, restore=restore, ) - else: - logger.debug("Ignoring some parameters passed into tune.run.") if fail_fast and max_failures != 0: raise ValueError("max_failures must be 0 if fail_fast=True.") @@ -734,7 +760,8 @@ class and registered trainables. ): if _has_unresolved_values(config): raise ValueError( - "You passed a `config` parameter to `tune.run()` with " + f"You passed a `{error_message_map['search_space_arg']}` parameter to " + f"`{error_message_map['entrypoint']}` with " "unresolved parameters, but the search algorithm was already " "instantiated with a search space. Make sure that `config` " "does not contain any more parameter definitions - include " @@ -745,10 +772,11 @@ class and registered trainables. scheduler.set_search_properties, metric, mode, **experiments[0].public_spec ): raise ValueError( - "You passed a `metric` or `mode` argument to `tune.run()`, but " + "You passed a `metric` or `mode` argument to " + f"`{error_message_map['entrypoint']}`, but " "the scheduler you are using was already instantiated with their " "own `metric` and `mode` parameters. Either remove the arguments " - "from your scheduler or from your call to `tune.run()`" + f"from your scheduler or from `{error_message_map['entrypoint']}` args." ) progress_metrics = _detect_progress_metrics(_get_trainable(run_or_experiment)) @@ -830,9 +858,10 @@ class and registered trainables. for exp in experiments: search_alg.add_configurations([exp]) else: - logger.info( - "TrialRunner resumed, ignoring new add_experiment but " - "updating trial resources." + logger.debug( + "You have resumed the Tune run, which means that any newly specified " + "`Experiment`s will be ignored. " + "Tune will just continue what was previously running." ) if resources_per_trial: runner.update_pending_trial_resources(resources_per_trial) @@ -919,10 +948,12 @@ class and registered trainables. ) if experiment_interrupted_event.is_set(): + restore_entrypoint = error_message_map["restore_entrypoint"].format( + path=Path(experiment_checkpoint).parent, + ) logger.warning( - "Experiment has been interrupted, but the most recent state was " - "saved. You can continue running this experiment by passing " - "`resume=True` to `tune.run()`" + "Experiment has been interrupted, but the most recent state was saved.\n" + f"Continue running this experiment with: {restore_entrypoint}" ) ea = ExperimentAnalysis( experiment_checkpoint, @@ -972,7 +1003,7 @@ def run_experiments( raise ValueError("cannot use custom trial executor") if not trial_executor or isinstance(trial_executor, RayTrialExecutor): - _ray_auto_init() + _ray_auto_init(entrypoint="tune.run_experiments(...)") if _remote: if get_air_verbosity(): @@ -1036,16 +1067,3 @@ def run_experiments( callbacks=callbacks, ).trials return trials - - -def _ray_auto_init(): - """Initialize Ray unless already configured.""" - if os.environ.get("TUNE_DISABLE_AUTO_INIT") == "1": - logger.info("'TUNE_DISABLE_AUTO_INIT=1' detected.") - elif not ray.is_initialized(): - logger.info( - "Initializing Ray automatically." - "For cluster usage or custom Ray initialization, " - "call `ray.init(...)` before `tune.run`." - ) - ray.init()