From 88b2e5a2584c8daea4ea09eba66656b9b7682872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 21 Nov 2022 21:19:13 +0100 Subject: [PATCH] Revert new Hydra launch behavior (#15737) * revert new hydra cwd behavior * remove debug statements * changelog Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jirka Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> --- .../strategies/launchers/subprocess_script.py | 38 ++----- src/pytorch_lightning/CHANGELOG.md | 2 + .../strategies/launchers/subprocess_script.py | 5 +- .../launchers/test_subprocess_script.py | 8 +- .../launchers/test_subprocess_script.py | 106 +----------------- 5 files changed, 23 insertions(+), 136 deletions(-) diff --git a/src/lightning_lite/strategies/launchers/subprocess_script.py b/src/lightning_lite/strategies/launchers/subprocess_script.py index 6a521f36387b3..54ed9bf3fe88d 100644 --- a/src/lightning_lite/strategies/launchers/subprocess_script.py +++ b/src/lightning_lite/strategies/launchers/subprocess_script.py @@ -14,7 +14,7 @@ import os import subprocess import sys -from typing import Any, Callable, Sequence +from typing import Any, Callable, Optional, Sequence, Tuple from lightning_utilities.core.imports import RequirementCache @@ -114,15 +114,16 @@ def _call_children_scripts(self) -> None: # start process # if hydra is available and initialized, make sure to set the cwd correctly hydra_in_use = False + cwd: Optional[str] = None if _HYDRA_AVAILABLE: from hydra.core.hydra_config import HydraConfig hydra_in_use = HydraConfig.initialized() if hydra_in_use: - command = _hydra_subprocess_cmd(local_rank=local_rank) + command, cwd = _hydra_subprocess_cmd(local_rank=local_rank) else: command = _basic_subprocess_cmd() - subprocess.Popen(command, env=env_copy) + subprocess.Popen(command, env=env_copy, cwd=cwd) def _check_can_spawn_children(self) -> None: if self.cluster_environment.local_rank() != 0: @@ -142,10 +143,9 @@ def _basic_subprocess_cmd() -> Sequence[str]: return [sys.executable, "-m", __main__.__spec__.name] + sys.argv[1:] -def _hydra_subprocess_cmd(local_rank: int) -> Sequence[str]: +def _hydra_subprocess_cmd(local_rank: int) -> Tuple[Sequence[str], str]: import __main__ # local import to avoid https://github.com/Lightning-AI/lightning/issues/15218 - from hydra.core.hydra_config import HydraConfig - from hydra.utils import to_absolute_path + from hydra.utils import get_original_cwd, to_absolute_path # when user is using hydra find the absolute path if __main__.__spec__ is None: # pragma: no-cover @@ -153,25 +153,9 @@ def _hydra_subprocess_cmd(local_rank: int) -> Sequence[str]: else: command = [sys.executable, "-m", __main__.__spec__.name] - # extract the hydra configuration - hydra_cfg = HydraConfig.get() + command += sys.argv[1:] - # the location of the hydra configuration files saved for the current job - hydra_output = hydra_cfg.runtime.output_dir - if hydra_cfg.output_subdir is not None: - hydra_output = os.path.join(hydra_output, hydra_cfg.output_subdir) - - # check if experimental re-run capability exists - # otherwise use existing config.yaml which may have issues - pickled_config = os.path.join(hydra_output, "config.pickle") - if os.path.exists(pickled_config): - command += ["--experimental-rerun", pickled_config] - - else: - command += ["-cp", hydra_output, "-cn", "config.yaml"] - command += [ - f"hydra.output_subdir=.pl_ddp_hydra_{local_rank}", - f"hydra.run.dir={hydra_cfg.runtime.output_dir}", - ] - - return command + cwd = get_original_cwd() + os_cwd = f'"{os.getcwd()}"' + command += [f"hydra.run.dir={os_cwd}", f"hydra.job.name=train_ddp_process_{local_rank}"] + return command, cwd diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 4f85b5223fcd5..a1ade9a153f40 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -53,6 +53,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed deprecated `pytorch_lightning.utilities.memory.get_gpu_memory_map` in favor of `pytorch_lightning.accelerators.cuda.get_nvidia_gpu_stats` ([#15617](https://github.com/Lightning-AI/lightning/pull/15617)) +- Temporarily removed support for Hydra multi-run ([#15737](https://github.com/Lightning-AI/lightning/pull/15737)) + ### Fixed diff --git a/src/pytorch_lightning/strategies/launchers/subprocess_script.py b/src/pytorch_lightning/strategies/launchers/subprocess_script.py index 354fcc92276f3..1a10c0e7cad72 100644 --- a/src/pytorch_lightning/strategies/launchers/subprocess_script.py +++ b/src/pytorch_lightning/strategies/launchers/subprocess_script.py @@ -109,17 +109,18 @@ def _call_children_scripts(self) -> None: del env_copy["PL_GLOBAL_SEED"] hydra_in_use = False + cwd: Optional[str] = None if _HYDRA_AVAILABLE: from hydra.core.hydra_config import HydraConfig hydra_in_use = HydraConfig.initialized() if hydra_in_use: - command = _hydra_subprocess_cmd(local_rank) + command, cwd = _hydra_subprocess_cmd(local_rank) else: command = _basic_subprocess_cmd() - subprocess.Popen(command, env=env_copy) + subprocess.Popen(command, env=env_copy, cwd=cwd) def _check_can_spawn_children(self) -> None: if self.cluster_environment.local_rank() != 0: diff --git a/tests/tests_lite/strategies/launchers/test_subprocess_script.py b/tests/tests_lite/strategies/launchers/test_subprocess_script.py index 69fb8ea468fa8..5634340982a6c 100644 --- a/tests/tests_lite/strategies/launchers/test_subprocess_script.py +++ b/tests/tests_lite/strategies/launchers/test_subprocess_script.py @@ -83,7 +83,7 @@ def test_subprocess_script_launcher_launch_processes(popen_mock): @mock.patch("lightning_lite.strategies.launchers.subprocess_script.subprocess.Popen") def test_subprocess_script_launcher_hydra_in_use(popen_mock, _, monkeypatch): basic_command = Mock(return_value="basic_command") - hydra_command = Mock(return_value="hydra_command") + hydra_command = Mock(return_value=("hydra_command", "hydra_cwd")) monkeypatch.setattr(lightning_lite.strategies.launchers.subprocess_script, "_basic_subprocess_cmd", basic_command) monkeypatch.setattr(lightning_lite.strategies.launchers.subprocess_script, "_hydra_subprocess_cmd", hydra_command) @@ -100,7 +100,7 @@ def simulate_launch(): # when hydra not available monkeypatch.setattr(lightning_lite.strategies.launchers.subprocess_script, "_HYDRA_AVAILABLE", False) simulate_launch() - popen_mock.assert_called_with("basic_command", env=ANY) + popen_mock.assert_called_with("basic_command", env=ANY, cwd=None) popen_mock.reset_mock() import hydra @@ -111,7 +111,7 @@ def simulate_launch(): HydraConfigMock.initialized.return_value = False monkeypatch.setattr(hydra.core.hydra_config, "HydraConfig", HydraConfigMock) simulate_launch() - popen_mock.assert_called_with("basic_command", env=ANY) + popen_mock.assert_called_with("basic_command", env=ANY, cwd=None) popen_mock.reset_mock() # when hydra available and initialized @@ -120,5 +120,5 @@ def simulate_launch(): HydraConfigMock.initialized.return_value = True monkeypatch.setattr(hydra.core.hydra_config, "HydraConfig", HydraConfigMock) simulate_launch() - popen_mock.assert_called_with("hydra_command", env=ANY) + popen_mock.assert_called_with("hydra_command", env=ANY, cwd="hydra_cwd") popen_mock.reset_mock() diff --git a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py index 83605f53873db..5b495f13ca316 100644 --- a/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py +++ b/tests/tests_pytorch/strategies/launchers/test_subprocess_script.py @@ -1,35 +1,17 @@ -import logging -import os import sys -from pathlib import Path import pytest from lightning_utilities.core.imports import RequirementCache -from pytorch_lightning.strategies.launchers.subprocess_script import _HYDRA_AVAILABLE from tests_pytorch.helpers.runif import RunIf _HYDRA_WITH_RERUN = RequirementCache("hydra-core>=1.2") _HYDRA_WITH_RUN_PROCESS = RequirementCache("hydra-core>=1.0.7") -if _HYDRA_AVAILABLE: - from omegaconf import OmegaConf if _HYDRA_WITH_RUN_PROCESS: from hydra.test_utils.test_utils import run_process -# fixture to run hydra jobs in a clean temporary directory -# Hydra creates its own output directories and logs -@pytest.fixture -def cleandir(tmp_path): - """Run function in a temporary directory.""" - old_dir = os.getcwd() # get current working directory (cwd) - os.chdir(tmp_path) # change cwd to the temp-directory - yield tmp_path # yields control to the test to be run - os.chdir(old_dir) - logging.shutdown() - - # Script to run from command line script = """ import hydra @@ -64,7 +46,9 @@ def task_fn(cfg): @RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True) @pytest.mark.skipif(not _HYDRA_WITH_RUN_PROCESS, reason=str(_HYDRA_WITH_RUN_PROCESS)) @pytest.mark.parametrize("subdir", [None, "dksa", ".hello"]) -def test_ddp_with_hydra_runjob(cleandir, subdir): +def test_ddp_with_hydra_runjob(subdir, tmpdir, monkeypatch): + monkeypatch.chdir(tmpdir) + # Save script locally with open("temp.py", "w") as fn: fn.write(script) @@ -75,87 +59,3 @@ def test_ddp_with_hydra_runjob(cleandir, subdir): if subdir is not None: cmd += [f"hydra.output_subdir={subdir}"] run_process(cmd) - - # Make sure config.yaml was created for additional - # processes. - logs = list(Path.cwd().glob("**/config.yaml")) - assert len(logs) == devices - - # Make sure the parameter was set and used - cfg = OmegaConf.load(logs[0]) - assert cfg.devices == devices - - # Make sure PL spawned a job that is logged by Hydra - logs = list(Path.cwd().glob("**/*.log")) - assert len(logs) == 1 - - -@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True) -@pytest.mark.skipif(not _HYDRA_WITH_RUN_PROCESS, reason=str(_HYDRA_WITH_RUN_PROCESS)) -@pytest.mark.parametrize("num_jobs", [1, 2]) -def test_ddp_with_hydra_multirunjob(cleandir, num_jobs): - # Save script locally - with open("temp.py", "w") as fn: - fn.write(script) - - # create fake multirun params based on `num_jobs` - fake_param = "+foo=" + ",".join(str(i) for i in range(num_jobs)) - - # Run CLI - run_process([sys.executable, "temp.py", "+devices=2", '+strategy="ddp"', fake_param, "--multirun"]) - - # Make sure config.yaml was created for each job - configs = sorted(Path.cwd().glob("**/.pl_ddp_hydra_*/config.yaml")) - assert len(configs) == num_jobs - - # Make sure the parameter was set and used for each job - for i, config in enumerate(configs): - cfg = OmegaConf.load(config) - local_rank = int(config.parent.parent.parts[-1]) - assert cfg.devices == 2 - assert cfg.foo == local_rank - - logs = list(Path.cwd().glob("**/*.log")) - assert len(logs) == num_jobs - - -yaml_file = """ -hydra: - callbacks: - save_job_info: - _target_: hydra.experimental.callbacks.PickleJobInfoCallback -""" - - -@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True) -@pytest.mark.skipif(not _HYDRA_WITH_RERUN, reason=str(_HYDRA_WITH_RERUN)) -@pytest.mark.parametrize("num_jobs", [1, 2]) -def test_ddp_with_hydra_multirunjob_rerun(cleandir, num_jobs): - # Save script locally - with open("temp.py", "w") as fn: - fn.write(script) - - with open("config.yaml", "w") as fn: - fn.write(yaml_file) - - # create fake multirun params based on `num_jobs` - fake_param = "+foo=" + ",".join(str(i) for i in range(num_jobs)) - - # Run CLI - run_process( - [ - sys.executable, - "temp.py", - "-cp", - ".", - "-cn", - "config.yaml", - "+devices=2", - '+strategy="ddp"', - fake_param, - "--multirun", - ] - ) - - pickles = sorted(Path.cwd().glob("**/.hydra/config.pickle")) - assert len(pickles) == num_jobs