From dc16a1f97b2cad46ce66fc7f4ad69854cff1063a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Thu, 6 Aug 2020 23:32:14 +0200 Subject: [PATCH 01/18] add ddp script variations --- tests/models/data/ddp/train_default_model.py | 54 ++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tests/models/data/ddp/train_default_model.py diff --git a/tests/models/data/ddp/train_default_model.py b/tests/models/data/ddp/train_default_model.py new file mode 100644 index 0000000000000..2949750951099 --- /dev/null +++ b/tests/models/data/ddp/train_default_model.py @@ -0,0 +1,54 @@ +""" +Runs several combinations of `.fit()` and `.test()` on a single node across multiple gpus. +""" +from argparse import ArgumentParser + +from pytorch_lightning import Trainer, seed_everything +from tests.base import EvalModelTemplate + + +def variation_fit_test(trainer, model): + trainer.fit(model) + trainer.test(model) + + +def variation_test_fit(trainer, model): + trainer.test(model) + trainer.fit(model) + + +def variation_test_test(trainer, model): + trainer.test(model) + trainer.test(model) + + +def variation_test_fit_test(trainer, model): + trainer.test(model) + trainer.fit(model) + trainer.test(model) + + +def get_variations(): + variations = [v for v in globals() if v.startswith("variation")] + return variations + + +def main(): + seed_everything(1234) + parser = ArgumentParser(add_help=False) + parser = Trainer.add_argparse_args(parser) + parser.add_argument('--variation', default=variation_fit_test.__name__) + parser.set_defaults(gpus=2) + parser.set_defaults(distributed_backend="ddp") + args = parser.parse_args() + + model = EvalModelTemplate() + trainer = Trainer.from_argparse_args(args) + + # run the chosen variation + run_variation = globals()[args.variation] + run_variation(trainer, model) + + +if __name__ == '__main__': + main() From 903155861ba2f61874f4f32d4ee46197eea2afb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Thu, 6 Aug 2020 23:34:01 +0200 Subject: [PATCH 02/18] add ddp test --- tests/models/test_gpu.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 7497a53083612..2101496b16d2d 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -1,4 +1,7 @@ +import subprocess +import sys from collections import namedtuple +from pathlib import Path import pytest import torch @@ -11,6 +14,7 @@ from pytorch_lightning.trainer.distrib_parts import _parse_gpu_ids, determine_root_gpu_device from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base import EvalModelTemplate +from tests.models.data.ddp import train_default_model PRETEND_N_OF_GPUS = 16 @@ -93,6 +97,26 @@ def test_multi_gpu_model_dp(tmpdir): memory.get_memory_profile('min_max') +@pytest.mark.parametrize('cli_args', [ + pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'), +]) +@pytest.mark.parametrize('variation', train_default_model.get_variations()) +@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") +def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): + file = Path(train_default_model.__file__).absolute() + cli_args = cli_args.split(' ') if cli_args else [] + cli_args += ['--default_root_dir', str(tmpdir)] + command = [sys.executable, file, '--variation', variation] + cli_args + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + std, err = p.communicate(timeout=60) + #assert std and not err + if p.returncode: + print(std) + print(err) + print(command) + raise RuntimeError('error') + + @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_multi_gpu_model_ddp_spawn(tmpdir): tutils.set_random_master_port() From b5bc4d6afa43d422c1a899489a7592b49ce796bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Fri, 7 Aug 2020 02:22:07 +0200 Subject: [PATCH 03/18] rename --- .../{train_default_model.py => train_test_variations.py} | 0 tests/models/test_gpu.py | 8 ++++---- 2 files changed, 4 insertions(+), 4 deletions(-) rename tests/models/data/ddp/{train_default_model.py => train_test_variations.py} (100%) diff --git a/tests/models/data/ddp/train_default_model.py b/tests/models/data/ddp/train_test_variations.py similarity index 100% rename from tests/models/data/ddp/train_default_model.py rename to tests/models/data/ddp/train_test_variations.py diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 2101496b16d2d..890bd3f9ab2d1 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -14,7 +14,7 @@ from pytorch_lightning.trainer.distrib_parts import _parse_gpu_ids, determine_root_gpu_device from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base import EvalModelTemplate -from tests.models.data.ddp import train_default_model +from tests.models.data.ddp import train_test_variations PRETEND_N_OF_GPUS = 16 @@ -100,14 +100,14 @@ def test_multi_gpu_model_dp(tmpdir): @pytest.mark.parametrize('cli_args', [ pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'), ]) -@pytest.mark.parametrize('variation', train_default_model.get_variations()) +@pytest.mark.parametrize('variation', train_test_variations.get_variations()) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): - file = Path(train_default_model.__file__).absolute() + file = Path(train_test_variations.__file__).absolute() cli_args = cli_args.split(' ') if cli_args else [] cli_args += ['--default_root_dir', str(tmpdir)] command = [sys.executable, file, '--variation', variation] + cli_args - p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) std, err = p.communicate(timeout=60) #assert std and not err if p.returncode: From 13fc64afea9c81effa789bc2135f334548f76fdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Fri, 7 Aug 2020 02:38:53 +0200 Subject: [PATCH 04/18] shell --- tests/models/test_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 890bd3f9ab2d1..eb7e632962df3 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -107,7 +107,7 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): cli_args = cli_args.split(' ') if cli_args else [] cli_args += ['--default_root_dir', str(tmpdir)] command = [sys.executable, file, '--variation', variation] + cli_args - p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) std, err = p.communicate(timeout=60) #assert std and not err if p.returncode: From 3163db8cc8dd2edf4783bf5dc54eb648a99cc6e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Fri, 7 Aug 2020 02:43:22 +0200 Subject: [PATCH 05/18] test --- tests/models/test_gpu.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index eb7e632962df3..28cd68d7fec85 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -109,12 +109,14 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): command = [sys.executable, file, '--variation', variation] + cli_args p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) std, err = p.communicate(timeout=60) - #assert std and not err + std = std.decode('utf-8').strip() + err = err.decode('utf-8').strip() + assert std and not err if p.returncode: print(std) print(err) print(command) - raise RuntimeError('error') + pytest.fail(err) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") From bd189a913e44201a8321bdbe4b55e9536c431d41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Fri, 7 Aug 2020 02:55:54 +0200 Subject: [PATCH 06/18] test --- tests/models/test_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 28cd68d7fec85..956d325da2fb2 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -111,7 +111,7 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): std, err = p.communicate(timeout=60) std = std.decode('utf-8').strip() err = err.decode('utf-8').strip() - assert std and not err + # assert std and not err if p.returncode: print(std) print(err) From ce4274f597255405357348fde963b901a36ad08b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Fri, 7 Aug 2020 03:39:02 +0200 Subject: [PATCH 07/18] try call --- tests/models/test_gpu.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 956d325da2fb2..3e1ead264e42c 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -107,16 +107,17 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): cli_args = cli_args.split(' ') if cli_args else [] cli_args += ['--default_root_dir', str(tmpdir)] command = [sys.executable, file, '--variation', variation] + cli_args - p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - std, err = p.communicate(timeout=60) - std = std.decode('utf-8').strip() - err = err.decode('utf-8').strip() + exitcode = subprocess.call(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + raise SystemExit(exitcode) + # std, err = p.communicate(timeout=60) + # std = std.decode('utf-8').strip() + # err = err.decode('utf-8').strip() # assert std and not err - if p.returncode: - print(std) - print(err) - print(command) - pytest.fail(err) + # if p.returncode: + # print(std) + # print(err) + # print(command) + # pytest.fail(err) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") From 886ce192714f6ff043e07f203ba304e61fa67fde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Fri, 7 Aug 2020 15:59:04 +0200 Subject: [PATCH 08/18] try without subprocess --- tests/models/test_gpu.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 3e1ead264e42c..7ff612249d914 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -2,6 +2,7 @@ import sys from collections import namedtuple from pathlib import Path +from unittest import mock import pytest import torch @@ -106,9 +107,9 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): file = Path(train_test_variations.__file__).absolute() cli_args = cli_args.split(' ') if cli_args else [] cli_args += ['--default_root_dir', str(tmpdir)] - command = [sys.executable, file, '--variation', variation] + cli_args - exitcode = subprocess.call(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - raise SystemExit(exitcode) + # command = [sys.executable, file, '--variation', variation] + cli_args + # exitcode = subprocess.call(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # raise SystemExit(exitcode) # std, err = p.communicate(timeout=60) # std = std.decode('utf-8').strip() # err = err.decode('utf-8').strip() @@ -119,6 +120,11 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): # print(command) # pytest.fail(err) + cli_args += ['--variation', variation] + from tests.models.data.ddp.train_test_variations import main + with mock.patch("argparse._sys.argv", ["any.py"] + cli_args): + main() + @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_multi_gpu_model_ddp_spawn(tmpdir): From 884e75948276449e0b52a214fb5a7858a8a636c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Fri, 7 Aug 2020 16:07:59 +0200 Subject: [PATCH 09/18] test --- tests/models/test_gpu.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 7ff612249d914..1028c6e49aede 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -107,9 +107,10 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): file = Path(train_test_variations.__file__).absolute() cli_args = cli_args.split(' ') if cli_args else [] cli_args += ['--default_root_dir', str(tmpdir)] - # command = [sys.executable, file, '--variation', variation] + cli_args - # exitcode = subprocess.call(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - # raise SystemExit(exitcode) + command = [sys.executable, file, '--variation', variation] + cli_args + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p.communicate() + assert p.returncode == 0 # std, err = p.communicate(timeout=60) # std = std.decode('utf-8').strip() # err = err.decode('utf-8').strip() @@ -120,10 +121,10 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): # print(command) # pytest.fail(err) - cli_args += ['--variation', variation] - from tests.models.data.ddp.train_test_variations import main - with mock.patch("argparse._sys.argv", ["any.py"] + cli_args): - main() + # cli_args += ['--variation', variation] + # from tests.models.data.ddp.train_test_variations import main + # with mock.patch("argparse._sys.argv", ["any.py"] + cli_args): + # main() @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") From 65c1cffb4b4ae34bb089a20649f9c65efbf17234 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Fri, 7 Aug 2020 16:54:10 +0200 Subject: [PATCH 10/18] display the error --- tests/models/test_gpu.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 1028c6e49aede..dc2d9416fd5a5 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -110,16 +110,16 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): command = [sys.executable, file, '--variation', variation] + cli_args p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.communicate() - assert p.returncode == 0 - # std, err = p.communicate(timeout=60) - # std = std.decode('utf-8').strip() - # err = err.decode('utf-8').strip() + # assert p.returncode == 0 + std, err = p.communicate(timeout=60) + std = std.decode('utf-8').strip() + err = err.decode('utf-8').strip() # assert std and not err - # if p.returncode: - # print(std) - # print(err) - # print(command) - # pytest.fail(err) + if p.returncode: + print(std) + print(err) + print(command) + pytest.fail(err) # cli_args += ['--variation', variation] # from tests.models.data.ddp.train_test_variations import main From d6c57eb314d69c33a8ac484376fb37925fce8f03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 8 Aug 2020 05:38:14 +0200 Subject: [PATCH 11/18] list all variations --- tests/models/data/ddp/train_test_variations.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/models/data/ddp/train_test_variations.py b/tests/models/data/ddp/train_test_variations.py index 2949750951099..1ac2e110dd599 100644 --- a/tests/models/data/ddp/train_test_variations.py +++ b/tests/models/data/ddp/train_test_variations.py @@ -17,6 +17,11 @@ def variation_test_fit(trainer, model): trainer.fit(model) +def variation_fit_fit(trainer, model): + trainer.fit(model) + trainer.fit(model) + + def variation_test_test(trainer, model): trainer.test(model) trainer.test(model) @@ -29,7 +34,13 @@ def variation_test_fit_test(trainer, model): def get_variations(): - variations = [v for v in globals() if v.startswith("variation")] + variations = [ + "variation_fit_test", + "variation_test_fit", + "variation_fit_fit", + "variation_test_test", + "variation_test_fit_test", + ] return variations From 3be75baedabe93643dc6eb2dfb67d69cb70ab5c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 9 Aug 2020 05:12:50 +0200 Subject: [PATCH 12/18] try string --- tests/models/test_gpu.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index dc2d9416fd5a5..0fa78b919d8cd 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -107,7 +107,8 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): file = Path(train_test_variations.__file__).absolute() cli_args = cli_args.split(' ') if cli_args else [] cli_args += ['--default_root_dir', str(tmpdir)] - command = [sys.executable, file, '--variation', variation] + cli_args + # command = [sys.executable, file, '--variation', variation] + cli_args + command = ['python', file, '--variation', variation] + cli_args p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.communicate() # assert p.returncode == 0 From 25a27480ff66fdc81e8060ed93f35ddfc53d75ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 9 Aug 2020 05:48:37 +0200 Subject: [PATCH 13/18] try copy env --- tests/models/test_gpu.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 0fa78b919d8cd..c21881c1857bc 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -1,3 +1,4 @@ +import os import subprocess import sys from collections import namedtuple @@ -109,7 +110,7 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): cli_args += ['--default_root_dir', str(tmpdir)] # command = [sys.executable, file, '--variation', variation] + cli_args command = ['python', file, '--variation', variation] + cli_args - p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=os.environ.copy()) p.communicate() # assert p.returncode == 0 std, err = p.communicate(timeout=60) From 0911f31765180539b271cd4b0ee62be5900ccbab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 9 Aug 2020 06:27:04 +0200 Subject: [PATCH 14/18] debug --- tests/models/test_gpu.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index c21881c1857bc..26dec8dd97d5b 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -110,6 +110,14 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): cli_args += ['--default_root_dir', str(tmpdir)] # command = [sys.executable, file, '--variation', variation] + cli_args command = ['python', file, '--variation', variation] + cli_args + + # debugging WHY SUBPROCESS PYTHON CANNOT IMPORT PL + p = subprocess.Popen(['pip', 'freeze'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p.communicate() + std, err = p.communicate() + std = std.decode('utf-8') + print(std) + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=os.environ.copy()) p.communicate() # assert p.returncode == 0 From e700f816883f76cc7b283e8b1fd078d76ec1f8ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 9 Aug 2020 06:44:08 +0200 Subject: [PATCH 15/18] pythonpath --- tests/models/test_gpu.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 26dec8dd97d5b..6dacffb7c849f 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -11,6 +11,7 @@ import tests.base.develop_pipelines as tpipes import tests.base.develop_utils as tutils +import pytorch_lightning from pytorch_lightning import Trainer from pytorch_lightning.core import memory from pytorch_lightning.trainer.distrib_parts import _parse_gpu_ids, determine_root_gpu_device @@ -112,13 +113,17 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): command = ['python', file, '--variation', variation] + cli_args # debugging WHY SUBPROCESS PYTHON CANNOT IMPORT PL + p = subprocess.Popen(['pip', 'freeze'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.communicate() std, err = p.communicate() std = std.decode('utf-8') print(std) - p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=os.environ.copy()) + env = os.environ.copy() + env['PYTHONPATH'] = f'{pytorch_lightning.__file__}:' + env['PYTHONPATH'] + + p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) p.communicate() # assert p.returncode == 0 std, err = p.communicate(timeout=60) From 83bd21367e38d146e010accb130adbd9c4bb8255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 9 Aug 2020 06:55:42 +0200 Subject: [PATCH 16/18] path --- tests/models/test_gpu.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 6dacffb7c849f..e930a6f517a5a 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -109,8 +109,7 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): file = Path(train_test_variations.__file__).absolute() cli_args = cli_args.split(' ') if cli_args else [] cli_args += ['--default_root_dir', str(tmpdir)] - # command = [sys.executable, file, '--variation', variation] + cli_args - command = ['python', file, '--variation', variation] + cli_args + command = [sys.executable, str(file), '--variation', variation] + cli_args # debugging WHY SUBPROCESS PYTHON CANNOT IMPORT PL @@ -121,8 +120,8 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): print(std) env = os.environ.copy() - env['PYTHONPATH'] = f'{pytorch_lightning.__file__}:' + env['PYTHONPATH'] - + env['PYTHONPATH'] = f'{pytorch_lightning.__file__}:' + env.get('PYTHONPATH', '') + print('python path', env['PYTHONPATH']) p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) p.communicate() # assert p.returncode == 0 From 1cecde9699c63011b09c9f4acb28121a0003f7e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 9 Aug 2020 07:07:52 +0200 Subject: [PATCH 17/18] update test --- tests/models/test_gpu.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index e930a6f517a5a..58fbce3c351ae 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -110,36 +110,20 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): cli_args = cli_args.split(' ') if cli_args else [] cli_args += ['--default_root_dir', str(tmpdir)] command = [sys.executable, str(file), '--variation', variation] + cli_args - - # debugging WHY SUBPROCESS PYTHON CANNOT IMPORT PL - - p = subprocess.Popen(['pip', 'freeze'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - p.communicate() - std, err = p.communicate() - std = std.decode('utf-8') - print(std) - env = os.environ.copy() env['PYTHONPATH'] = f'{pytorch_lightning.__file__}:' + env.get('PYTHONPATH', '') - print('python path', env['PYTHONPATH']) p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) p.communicate() - # assert p.returncode == 0 std, err = p.communicate(timeout=60) std = std.decode('utf-8').strip() err = err.decode('utf-8').strip() - # assert std and not err + assert std if p.returncode: print(std) print(err) print(command) pytest.fail(err) - # cli_args += ['--variation', variation] - # from tests.models.data.ddp.train_test_variations import main - # with mock.patch("argparse._sys.argv", ["any.py"] + cli_args): - # main() - @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_multi_gpu_model_ddp_spawn(tmpdir): From 1316c553f443a79d512a49dbffec8fe2cc6746c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 9 Aug 2020 07:17:20 +0200 Subject: [PATCH 18/18] change --- tests/models/test_gpu.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 58fbce3c351ae..39137c9805437 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -114,6 +114,7 @@ def test_multi_gpu_model_ddp(tmpdir, cli_args, variation): env['PYTHONPATH'] = f'{pytorch_lightning.__file__}:' + env.get('PYTHONPATH', '') p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) p.communicate() + std, err = p.communicate(timeout=60) std = std.decode('utf-8').strip() err = err.decode('utf-8').strip()