Skip to content

Commit

Permalink
[Tune] Don't recommend tune.run API in logging messages when using …
Browse files Browse the repository at this point in the history
…the `Tuner` (#33642)

Signed-off-by: Justin Yu <justinvyu@berkeley.edu>
  • Loading branch information
justinvyu authored Mar 28, 2023
1 parent b6958ed commit 950fc33
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 35 deletions.
9 changes: 5 additions & 4 deletions python/ray/tune/execution/trial_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import logging
import os
from pathlib import Path
import time
import traceback
import warnings
Expand Down Expand Up @@ -378,14 +379,14 @@ def restore_from_dir(self, experiment_dir: Optional[str] = None) -> List[Trial]:
)

# Set checkpoint file to load
logger.info(
f"Using following experiment state file to resume: " f"{newest_state_path}"
)

logger.warning(
f"Attempting to resume experiment from {self._local_experiment_path}. "
"This will ignore any new changes to the specification."
)
logger.info(
"Using the newest experiment state file found within the "
f"experiment directory: {Path(newest_state_path).name}"
)

# Actually load data
with open(newest_state_path, "r") as f:
Expand Down
1 change: 1 addition & 0 deletions python/ray/tune/impl/tuner_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,7 @@ def _get_tune_run_arguments(self, trainable: TrainableType) -> Dict[str, Any]:
trial_name_creator=self._tune_config.trial_name_creator,
trial_dirname_creator=self._tune_config.trial_dirname_creator,
chdir_to_trial_dir=self._tune_config.chdir_to_trial_dir,
_tuner_api=True,
)

def _fit_internal(
Expand Down
80 changes: 49 additions & 31 deletions python/ray/tune/tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import datetime
import logging
import os
from pathlib import Path
import signal
import sys
import threading
Expand Down Expand Up @@ -208,6 +209,19 @@ def signal_interrupt_tune_run(sig: int, frame):
return experiment_interrupted_event


def _ray_auto_init(entrypoint: str):
"""Initialize Ray unless already configured."""
if os.environ.get("TUNE_DISABLE_AUTO_INIT") == "1":
logger.info("'TUNE_DISABLE_AUTO_INIT=1' detected.")
elif not ray.is_initialized():
ray.init()
logger.info(
"Initializing Ray automatically. "
"For cluster usage or custom Ray initialization, "
f"call `ray.init(...)` before `{entrypoint}`."
)


class _Config(abc.ABC):
def to_dict(self) -> dict:
"""Converts this configuration to a dict format."""
Expand Down Expand Up @@ -259,6 +273,7 @@ def run(
_remote: Optional[bool] = None,
# Passed by the Tuner.
_remote_string_queue: Optional[Queue] = None,
_tuner_api: bool = False,
) -> ExperimentAnalysis:
"""Executes training.
Expand Down Expand Up @@ -466,6 +481,21 @@ class and registered trainables.
remote_run_kwargs = locals().copy()
remote_run_kwargs.pop("_remote")

error_message_map = (
{
"entrypoint": "Tuner(...)",
"search_space_arg": "param_space",
"restore_entrypoint": 'Tuner.restore(path="{path}", trainable=...)',
}
if _tuner_api
else {
"entrypoint": "tune.run(...)",
"search_space_arg": "config",
"restore_entrypoint": "tune.run(..., resume=True)",
}
)
_ray_auto_init(entrypoint=error_message_map["entrypoint"])

if _remote is None:
_remote = ray.util.client.ray.is_connected()

Expand All @@ -478,9 +508,6 @@ class and registered trainables.
DeprecationWarning,
)

if not trial_executor or isinstance(trial_executor, RayTrialExecutor):
_ray_auto_init()

if _remote:
if get_air_verbosity():
logger.warning(
Expand Down Expand Up @@ -517,8 +544,8 @@ class and registered trainables.

if mode and mode not in ["min", "max"]:
raise ValueError(
"The `mode` parameter passed to `tune.run()` has to be one of "
"['min', 'max']"
f"The `mode` parameter passed to `{error_message_map['entrypoint']}` "
"must be one of ['min', 'max']"
)

air_verbosity = get_air_verbosity()
Expand All @@ -536,7 +563,8 @@ class and registered trainables.
config = config.to_dict()
if not isinstance(config, dict):
raise ValueError(
"The `config` passed to `tune.run()` must be a dict. "
f"The `{error_message_map['search_space_arg']}` passed to "
f"`{error_message_map['entrypoint']}` must be a dict. "
f"Got '{type(config)}' instead."
)

Expand Down Expand Up @@ -669,8 +697,6 @@ class and registered trainables.
max_failures=max_failures,
restore=restore,
)
else:
logger.debug("Ignoring some parameters passed into tune.run.")

if fail_fast and max_failures != 0:
raise ValueError("max_failures must be 0 if fail_fast=True.")
Expand Down Expand Up @@ -734,7 +760,8 @@ class and registered trainables.
):
if _has_unresolved_values(config):
raise ValueError(
"You passed a `config` parameter to `tune.run()` with "
f"You passed a `{error_message_map['search_space_arg']}` parameter to "
f"`{error_message_map['entrypoint']}` with "
"unresolved parameters, but the search algorithm was already "
"instantiated with a search space. Make sure that `config` "
"does not contain any more parameter definitions - include "
Expand All @@ -745,10 +772,11 @@ class and registered trainables.
scheduler.set_search_properties, metric, mode, **experiments[0].public_spec
):
raise ValueError(
"You passed a `metric` or `mode` argument to `tune.run()`, but "
"You passed a `metric` or `mode` argument to "
f"`{error_message_map['entrypoint']}`, but "
"the scheduler you are using was already instantiated with their "
"own `metric` and `mode` parameters. Either remove the arguments "
"from your scheduler or from your call to `tune.run()`"
f"from your scheduler or from `{error_message_map['entrypoint']}` args."
)

progress_metrics = _detect_progress_metrics(_get_trainable(run_or_experiment))
Expand Down Expand Up @@ -830,9 +858,10 @@ class and registered trainables.
for exp in experiments:
search_alg.add_configurations([exp])
else:
logger.info(
"TrialRunner resumed, ignoring new add_experiment but "
"updating trial resources."
logger.debug(
"You have resumed the Tune run, which means that any newly specified "
"`Experiment`s will be ignored. "
"Tune will just continue what was previously running."
)
if resources_per_trial:
runner.update_pending_trial_resources(resources_per_trial)
Expand Down Expand Up @@ -919,10 +948,12 @@ class and registered trainables.
)

if experiment_interrupted_event.is_set():
restore_entrypoint = error_message_map["restore_entrypoint"].format(
path=Path(experiment_checkpoint).parent,
)
logger.warning(
"Experiment has been interrupted, but the most recent state was "
"saved. You can continue running this experiment by passing "
"`resume=True` to `tune.run()`"
"Experiment has been interrupted, but the most recent state was saved.\n"
f"Continue running this experiment with: {restore_entrypoint}"
)
ea = ExperimentAnalysis(
experiment_checkpoint,
Expand Down Expand Up @@ -972,7 +1003,7 @@ def run_experiments(
raise ValueError("cannot use custom trial executor")

if not trial_executor or isinstance(trial_executor, RayTrialExecutor):
_ray_auto_init()
_ray_auto_init(entrypoint="tune.run_experiments(...)")

if _remote:
if get_air_verbosity():
Expand Down Expand Up @@ -1036,16 +1067,3 @@ def run_experiments(
callbacks=callbacks,
).trials
return trials


def _ray_auto_init():
"""Initialize Ray unless already configured."""
if os.environ.get("TUNE_DISABLE_AUTO_INIT") == "1":
logger.info("'TUNE_DISABLE_AUTO_INIT=1' detected.")
elif not ray.is_initialized():
logger.info(
"Initializing Ray automatically."
"For cluster usage or custom Ray initialization, "
"call `ray.init(...)` before `tune.run`."
)
ray.init()

0 comments on commit 950fc33

Please sign in to comment.