From 1c851b89e1758f7c3b96b71b7b6619af130e81cf Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 23 Feb 2021 23:08:46 +0100 Subject: [PATCH] fixing miss-leading tested acc values (#5876) * fixing tested values * . * tests * yapf * softmax * hvd * rename * lr * duplicate * drop * classif * rm EvalModel * Revert "rm EvalModel" This reverts commit 6c3fb39ebe0c4bfb52357bccfd050438f2c0f31c. * update tests * fix * azure * azure * self * cpu * Apply suggestions from code review Co-authored-by: rohitgr7 --- tests/accelerators/ddp_model.py | 37 +++++---- tests/accelerators/test_ddp.py | 25 +++--- tests/accelerators/test_ddp_spawn.py | 15 ++-- tests/accelerators/test_dp.py | 60 +++++++++++---- tests/base/model_template.py | 2 +- tests/core/test_datamodules.py | 3 +- tests/helpers/pipelines.py | 76 ++++++------------- .../models/data/horovod/test_train_script.py | 30 ++++++++ .../data/horovod/train_default_model.py | 15 ++-- tests/models/test_gpu.py | 7 +- tests/models/test_restore.py | 61 ++++++++------- tests/models/test_tpu.py | 13 +++- tests/trainer/test_dataloaders.py | 11 ++- tests/trainer/test_lr_finder.py | 11 ++- tests/utilities/test_parsing.py | 8 +- 15 files changed, 207 insertions(+), 167 deletions(-) create mode 100644 tests/models/data/horovod/test_train_script.py diff --git a/tests/accelerators/ddp_model.py b/tests/accelerators/ddp_model.py index aa286d2118c13..78d1306665c59 100644 --- a/tests/accelerators/ddp_model.py +++ b/tests/accelerators/ddp_model.py @@ -20,7 +20,8 @@ import torch from pytorch_lightning import seed_everything, Trainer -from tests.base import EvalModelTemplate +from tests.helpers.datamodules import ClassifDataModule +from tests.helpers.simple_models import ClassificationModel def main(): @@ -35,24 +36,28 @@ def main(): parser.set_defaults(accelerator="ddp") args = parser.parse_args() - model = EvalModelTemplate() + dm = ClassifDataModule() + model = ClassificationModel() trainer = Trainer.from_argparse_args(args) - result = {} if args.trainer_method == 'fit': - trainer.fit(model) - result = {'status': 'complete', 'method': args.trainer_method, 'result': None} - if args.trainer_method == 'test': - result = trainer.test(model) - result = {'status': 'complete', 'method': args.trainer_method, 'result': result} - if args.trainer_method == 'fit_test': - trainer.fit(model) - result = trainer.test(model) - result = {'status': 'complete', 'method': args.trainer_method, 'result': result} - - if len(result) > 0: - file_path = os.path.join(args.tmpdir, 'ddp.result') - torch.save(result, file_path) + trainer.fit(model, datamodule=dm) + result = None + elif args.trainer_method == 'test': + result = trainer.test(model, datamodule=dm) + elif args.trainer_method == 'fit_test': + trainer.fit(model, datamodule=dm) + result = trainer.test(model, datamodule=dm) + else: + raise ValueError(f'Unsupported: {args.trainer_method}') + + result_ext = { + 'status': 'complete', + 'method': args.trainer_method, + 'result': result, + } + file_path = os.path.join(args.tmpdir, 'ddp.result') + torch.save(result_ext, file_path) if __name__ == '__main__': diff --git a/tests/accelerators/test_ddp.py b/tests/accelerators/test_ddp.py index b582532cd710e..4de9664fffb7e 100644 --- a/tests/accelerators/test_ddp.py +++ b/tests/accelerators/test_ddp.py @@ -23,14 +23,13 @@ from tests.helpers.boring_model import BoringModel from tests.utilities.distributed import call_training_script +CLI_ARGS = '--max_epochs 1 --gpus 2 --accelerator ddp' + -@pytest.mark.parametrize('cli_args', [ - pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'), -]) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args): +def test_multi_gpu_model_ddp_fit_only(tmpdir): # call the script - std, err = call_training_script(ddp_model, cli_args, 'fit', tmpdir, timeout=120) + call_training_script(ddp_model, CLI_ARGS, 'fit', tmpdir, timeout=120) # load the results of the script result_path = os.path.join(tmpdir, 'ddp.result') @@ -40,13 +39,10 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args): assert result['status'] == 'complete' -@pytest.mark.parametrize('cli_args', [ - pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'), -]) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args): +def test_multi_gpu_model_ddp_test_only(tmpdir): # call the script - call_training_script(ddp_model, cli_args, 'test', tmpdir) + call_training_script(ddp_model, CLI_ARGS, 'test', tmpdir) # load the results of the script result_path = os.path.join(tmpdir, 'ddp.result') @@ -56,13 +52,10 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args): assert result['status'] == 'complete' -@pytest.mark.parametrize('cli_args', [ - pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'), -]) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args): +def test_multi_gpu_model_ddp_fit_test(tmpdir): # call the script - call_training_script(ddp_model, cli_args, 'fit_test', tmpdir, timeout=20) + call_training_script(ddp_model, CLI_ARGS, 'fit_test', tmpdir, timeout=20) # load the results of the script result_path = os.path.join(tmpdir, 'ddp.result') @@ -73,7 +66,7 @@ def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args): model_outs = result['result'] for out in model_outs: - assert out['test_acc'] > 0.90 + assert out['test_acc'] > 0.7 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") diff --git a/tests/accelerators/test_ddp_spawn.py b/tests/accelerators/test_ddp_spawn.py index 1e17947fe6eb9..3ec391d8130c1 100644 --- a/tests/accelerators/test_ddp_spawn.py +++ b/tests/accelerators/test_ddp_spawn.py @@ -20,7 +20,9 @@ from pytorch_lightning.core import memory from pytorch_lightning.trainer import Trainer from pytorch_lightning.trainer.states import TrainerState -from tests.base import EvalModelTemplate +from tests.helpers import BoringModel +from tests.helpers.datamodules import ClassifDataModule +from tests.helpers.simple_models import ClassificationModel @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -29,7 +31,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir): trainer_options = dict( default_root_dir=tmpdir, - callbacks=[EarlyStopping()], + callbacks=[EarlyStopping(monitor='train_acc')], max_epochs=50, limit_train_batches=10, limit_val_batches=10, @@ -37,8 +39,9 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir): accelerator='ddp_spawn', ) - model = EvalModelTemplate() - tpipes.run_model_test(trainer_options, model) + dm = ClassifDataModule() + model = ClassificationModel() + tpipes.run_model_test(trainer_options, model, dm) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -55,7 +58,7 @@ def test_multi_gpu_model_ddp_spawn(tmpdir): progress_bar_refresh_rate=0, ) - model = EvalModelTemplate() + model = BoringModel() tpipes.run_model_test(trainer_options, model) @@ -68,7 +71,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir): """Make sure DDP works with dataloaders passed to fit()""" tutils.set_random_master_port() - model = EvalModelTemplate() + model = BoringModel() fit_options = dict(train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader()) trainer = Trainer( diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py index 6e826719b5b98..7da18f0e81f7c 100644 --- a/tests/accelerators/test_dp.py +++ b/tests/accelerators/test_dp.py @@ -11,27 +11,61 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os +from unittest import mock + import pytest import torch +import torch.nn.functional as F import pytorch_lightning as pl import tests.helpers.pipelines as tpipes import tests.helpers.utils as tutils from pytorch_lightning.callbacks import EarlyStopping from pytorch_lightning.core import memory -from tests.base import EvalModelTemplate +from tests.helpers import BoringModel +from tests.helpers.datamodules import ClassifDataModule +from tests.helpers.simple_models import ClassificationModel PRETEND_N_OF_GPUS = 16 +class CustomClassificationModelDP(ClassificationModel): + + def _step(self, batch, batch_idx): + x, y = batch + logits = self(x) + return {'logits': logits, 'y': y} + + def training_step(self, batch, batch_idx): + out = self._step(batch, batch_idx) + loss = F.cross_entropy(out['logits'], out['y']) + return loss + + def validation_step(self, batch, batch_idx): + return self._step(batch, batch_idx) + + def test_step(self, batch, batch_idx): + return self._step(batch, batch_idx) + + def validation_step_end(self, outputs): + self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y'])) + + def test_step_end(self, outputs): + self.log('test_acc', self.test_acc(outputs['logits'], outputs['y'])) + + @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_multi_gpu_early_stop_dp(tmpdir): """Make sure DDP works. with early stopping""" tutils.set_random_master_port() + dm = ClassifDataModule() + model = CustomClassificationModelDP() + trainer_options = dict( default_root_dir=tmpdir, - callbacks=[EarlyStopping()], + callbacks=[EarlyStopping(monitor='val_acc')], max_epochs=50, limit_train_batches=10, limit_val_batches=10, @@ -39,8 +73,7 @@ def test_multi_gpu_early_stop_dp(tmpdir): accelerator='dp', ) - model = EvalModelTemplate() - tpipes.run_model_test(trainer_options, model) + tpipes.run_model_test(trainer_options, model, dm) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -57,7 +90,7 @@ def test_multi_gpu_model_dp(tmpdir): progress_bar_refresh_rate=0, ) - model = EvalModelTemplate() + model = BoringModel() tpipes.run_model_test(trainer_options, model) @@ -65,14 +98,13 @@ def test_multi_gpu_model_dp(tmpdir): memory.get_memory_profile('min_max') +@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"}) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_dp_test(tmpdir): tutils.set_random_master_port() - import os - os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' - - model = EvalModelTemplate() + dm = ClassifDataModule() + model = CustomClassificationModelDP() trainer = pl.Trainer( default_root_dir=tmpdir, max_epochs=2, @@ -81,17 +113,17 @@ def test_dp_test(tmpdir): gpus=[0, 1], accelerator='dp', ) - trainer.fit(model) + trainer.fit(model, datamodule=dm) assert 'ckpt' in trainer.checkpoint_callback.best_model_path - results = trainer.test() + results = trainer.test(datamodule=dm) assert 'test_acc' in results[0] - old_weights = model.c_d1.weight.clone().detach().cpu() + old_weights = model.layer_0.weight.clone().detach().cpu() - results = trainer.test(model) + results = trainer.test(model, datamodule=dm) assert 'test_acc' in results[0] # make sure weights didn't change - new_weights = model.c_d1.weight.clone().detach().cpu() + new_weights = model.layer_0.weight.clone().detach().cpu() assert torch.all(torch.eq(old_weights, new_weights)) diff --git a/tests/base/model_template.py b/tests/base/model_template.py index 1d36df8f5ef50..1ec2df7865caa 100644 --- a/tests/base/model_template.py +++ b/tests/base/model_template.py @@ -111,7 +111,7 @@ def forward(self, x): x = self.c_d1_drop(x) x = self.c_d2(x) - logits = F.log_softmax(x, dim=1) + logits = F.softmax(x, dim=1) return logits diff --git a/tests/core/test_datamodules.py b/tests/core/test_datamodules.py index aa50405f87cd9..50195fef02426 100644 --- a/tests/core/test_datamodules.py +++ b/tests/core/test_datamodules.py @@ -385,9 +385,8 @@ def _step(self, batch, batch_idx): return {'logits': logits, 'y': y} def training_step(self, batch, batch_idx): - _, y = batch out = self._step(batch, batch_idx) - loss = F.cross_entropy(out['logits'], y) + loss = F.cross_entropy(out['logits'], out['y']) return loss def validation_step(self, batch, batch_idx): diff --git a/tests/helpers/pipelines.py b/tests/helpers/pipelines.py index ec1e81fc2cecb..403bcdfee8c1d 100644 --- a/tests/helpers/pipelines.py +++ b/tests/helpers/pipelines.py @@ -13,39 +13,41 @@ # limitations under the License. import torch -from pytorch_lightning import LightningDataModule, Trainer +from pytorch_lightning import LightningDataModule, LightningModule, Trainer +from pytorch_lightning.metrics.functional import accuracy from pytorch_lightning.trainer.states import TrainerState from pytorch_lightning.utilities import DistributedType from tests.helpers import BoringModel from tests.helpers.utils import get_default_logger, load_model_from_checkpoint, reset_seed -def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50): +def run_model_test_without_loggers( + trainer_options: dict, model: LightningModule, data: LightningDataModule = None, min_acc: float = 0.50 +): reset_seed() # fit model trainer = Trainer(**trainer_options) - trainer.fit(model) + trainer.fit(model, datamodule=data) # correct result and ok accuracy assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" - pretrained_model = load_model_from_checkpoint( - trainer.logger, trainer.checkpoint_callback.best_model_path, type(model) - ) + model2 = load_model_from_checkpoint(trainer.logger, trainer.checkpoint_callback.best_model_path, type(model)) # test new model accuracy - test_loaders = model.test_dataloader() + test_loaders = model2.test_dataloader() if not data else data.test_dataloader() if not isinstance(test_loaders, list): test_loaders = [test_loaders] - for dataloader in test_loaders: - run_prediction(pretrained_model, dataloader, min_acc=min_acc) + if not isinstance(model2, BoringModel): + for dataloader in test_loaders: + run_prediction_eval_model_template(model2, dataloader, min_acc=min_acc) def run_model_test( trainer_options, - model, + model: LightningModule, data: LightningDataModule = None, on_gpu: bool = True, version=None, @@ -76,8 +78,9 @@ def run_model_test( if not isinstance(test_loaders, list): test_loaders = [test_loaders] - for dataloader in test_loaders: - run_prediction(pretrained_model, dataloader, min_acc=min_acc) + if not isinstance(model, BoringModel): + for dataloader in test_loaders: + run_prediction_eval_model_template(model, dataloader, min_acc=min_acc) if with_hpc: if trainer._distrib_type in (DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2): @@ -92,50 +95,17 @@ def run_model_test( trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu) -def run_prediction(trained_model, dataloader, dp=False, min_acc=0.25): - if isinstance(trained_model, BoringModel): - return _boring_model_run_prediction(trained_model, dataloader, min_acc) - else: - return _eval_model_template_run_prediction(trained_model, dataloader, dp, min_acc=min_acc) - - -def _eval_model_template_run_prediction(trained_model, dataloader, dp=False, min_acc=0.50): +@torch.no_grad() +def run_prediction_eval_model_template(trained_model, dataloader, min_acc=0.50): # run prediction on 1 batch + trained_model.cpu() + trained_model.eval() + batch = next(iter(dataloader)) x, y = batch - x = x.view(x.size(0), -1) - - if dp: - with torch.no_grad(): - output = trained_model(batch, 0) - acc = output['val_acc'] - acc = torch.mean(acc).item() - - else: - with torch.no_grad(): - y_hat = trained_model(x) - y_hat = y_hat.cpu() + x = x.flatten(1) - # acc - labels_hat = torch.argmax(y_hat, dim=1) - - y = y.cpu() - acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0) - acc = torch.tensor(acc) - acc = acc.item() + y_hat = trained_model(x) + acc = accuracy(y_hat.cpu(), y.cpu(), top_k=2).item() assert acc >= min_acc, f"This model is expected to get > {min_acc} in test set (it got {acc})" - - -# TODO: This test compares a loss value with a min accuracy - complete non-sense! -# create BoringModels that make actual predictions! -def _boring_model_run_prediction(trained_model, dataloader, min_acc=0.25): - # run prediction on 1 batch - trained_model.cpu() - batch = next(iter(dataloader)) - - with torch.no_grad(): - output = trained_model(batch) - - acc = trained_model.loss(batch, output) - assert acc >= min_acc, f"This model is expected to get, {min_acc} in test set but got {acc}" diff --git a/tests/models/data/horovod/test_train_script.py b/tests/models/data/horovod/test_train_script.py new file mode 100644 index 0000000000000..ee77efeeb8675 --- /dev/null +++ b/tests/models/data/horovod/test_train_script.py @@ -0,0 +1,30 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from tests.models.data.horovod.train_default_model import run_test_from_config + + +def test_horovod_model_script(tmpdir): + """This just for testing/debugging horovod script without horovod...""" + trainer_options = dict( + default_root_dir=str(tmpdir), + weights_save_path=str(tmpdir), + gradient_clip_val=1.0, + progress_bar_refresh_rate=0, + max_epochs=1, + limit_train_batches=0.4, + limit_val_batches=0.2, + deterministic=True, + ) + run_test_from_config(trainer_options, check_size=False, on_gpu=False) diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py index 93a637dda1071..d3868cfd979e6 100644 --- a/tests/models/data/horovod/train_default_model.py +++ b/tests/models/data/horovod/train_default_model.py @@ -37,7 +37,6 @@ print('You requested to import Horovod which is missing or not supported for your OS.') from tests.helpers import BoringModel # noqa: E402 -from tests.helpers.pipelines import run_prediction # noqa: E402 from tests.helpers.utils import reset_seed, set_random_master_port # noqa: E402 parser = argparse.ArgumentParser() @@ -45,7 +44,7 @@ parser.add_argument('--on-gpu', action='store_true', default=False) -def run_test_from_config(trainer_options): +def run_test_from_config(trainer_options, on_gpu, check_size=True): """Trains the default model with the given config.""" set_random_master_port() reset_seed() @@ -60,7 +59,8 @@ def run_test_from_config(trainer_options): assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" # Horovod should be initialized following training. If not, this will raise an exception. - assert hvd.size() == 2 + if check_size: + assert hvd.size() == 2 if trainer.global_rank > 0: return @@ -74,15 +74,16 @@ def run_test_from_config(trainer_options): test_loaders = [test_loaders] for dataloader in test_loaders: - run_prediction(pretrained_model, dataloader) + batch = next(iter(dataloader)) + pretrained_model(batch) # test HPC saving trainer.checkpoint_connector.hpc_save(ckpt_path, trainer.logger) # test HPC loading checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(ckpt_path) - trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=args.on_gpu) + trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu) - if args.on_gpu: + if on_gpu: trainer = Trainer(gpus=1, accelerator='horovod', max_epochs=1) # Test the root_gpu property assert trainer.root_gpu == hvd.local_rank() @@ -90,4 +91,4 @@ def run_test_from_config(trainer_options): if __name__ == "__main__": args = parser.parse_args() - run_test_from_config(json.loads(args.trainer_options)) + run_test_from_config(json.loads(args.trainer_options), args.on_gpu) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index f30f12009450e..ec13ed9112ef0 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -24,6 +24,8 @@ from pytorch_lightning.utilities import device_parser from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers import BoringModel +from tests.helpers.datamodules import ClassifDataModule +from tests.helpers.simple_models import ClassificationModel PRETEND_N_OF_GPUS = 16 @@ -41,8 +43,9 @@ def test_multi_gpu_none_backend(tmpdir): gpus=2, ) - model = BoringModel() - tpipes.run_model_test(trainer_options, model, min_acc=0.20) + dm = ClassifDataModule() + model = ClassificationModel() + tpipes.run_model_test(trainer_options, model, dm) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index a3f88e37bb09a..7d6c104abbd57 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -85,6 +85,28 @@ class GenericValTestLossBoringModel(GenericParentValTestLossBoringModel[int]): pass +class CustomClassificationModelDP(ClassificationModel): + + def _step(self, batch, batch_idx): + x, y = batch + logits = self(x) + return {'logits': logits, 'y': y} + + def training_step(self, batch, batch_idx): + out = self._step(batch, batch_idx) + loss = F.cross_entropy(out['logits'], out['y']) + return loss + + def validation_step(self, batch, batch_idx): + return self._step(batch, batch_idx) + + def test_step(self, batch, batch_idx): + return self._step(batch, batch_idx) + + def validation_step_end(self, outputs): + self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y'])) + + def test_model_properties_resume_from_checkpoint(tmpdir): """ Test that properties like `current_epoch` and `global_step` @@ -198,28 +220,6 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir): tutils.set_random_master_port() - class CustomClassificationModelDP(ClassificationModel): - - def _step(self, batch, batch_idx): - x, y = batch - logits = self(x) - return {'logits': logits, 'y': y} - - def training_step(self, batch, batch_idx): - _, y = batch - out = self._step(batch, batch_idx) - loss = F.cross_entropy(out['logits'], y) - return loss - - def validation_step(self, batch, batch_idx): - return self._step(batch, batch_idx) - - def test_step(self, batch, batch_idx): - return self._step(batch, batch_idx) - - def validation_step_end(self, outputs): - self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y'])) - dm = ClassifDataModule() model = CustomClassificationModelDP(lr=0.1) @@ -259,7 +259,7 @@ def validation_step_end(self, outputs): dataloaders = [dataloaders] for dataloader in dataloaders: - tpipes.run_prediction(pretrained_model, dataloader) + tpipes.run_prediction_eval_model_template(pretrained_model, dataloader) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -307,7 +307,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir): dataloaders = [dataloaders] for dataloader in dataloaders: - tpipes.run_prediction(pretrained_model, dataloader, min_acc=0.1) + tpipes.run_prediction_eval_model_template(pretrained_model, dataloader, min_acc=0.1) def test_running_test_pretrained_model_cpu(tmpdir): @@ -398,7 +398,8 @@ def test_load_model_from_checkpoint(tmpdir, model_template): @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_dp_resume(tmpdir): """Make sure DP continues training correctly.""" - model = BoringModel() + model = CustomClassificationModelDP(lr=0.1) + dm = ClassifDataModule() trainer_options = dict(max_epochs=1, gpus=2, accelerator='dp', default_root_dir=tmpdir) @@ -416,7 +417,7 @@ def test_dp_resume(tmpdir): # fit model trainer = Trainer(**trainer_options) trainer.is_slurm_managing_tasks = True - trainer.fit(model) + trainer.fit(model, datamodule=dm) # track epoch before saving. Increment since we finished the current epoch, don't want to rerun real_global_epoch = trainer.current_epoch + 1 @@ -439,7 +440,7 @@ def test_dp_resume(tmpdir): trainer_options['max_epochs'] = 1 new_trainer = Trainer(**trainer_options) - class CustomModel(BoringModel): + class CustomModel(CustomClassificationModelDP): def __init__(self): super().__init__() @@ -451,19 +452,17 @@ def on_train_start(self): # if model and state loaded correctly, predictions will be good even though we # haven't trained with the new loaded model - dp_model = new_trainer.model - dp_model.eval() new_trainer._running_stage = RunningStage.EVALUATING dataloader = self.train_dataloader() - tpipes.run_prediction(self.trainer.lightning_module, dataloader) + tpipes.run_prediction_eval_model_template(self.trainer.lightning_module, dataloader=dataloader) self.on_train_start_called = True # new model model = CustomModel() # fit new model which should load hpc weights - new_trainer.fit(model) + new_trainer.fit(model, datamodule=dm) assert model.on_train_start_called # test freeze on gpu diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py index bfa8f2432e3a2..6a4605b3e2b36 100644 --- a/tests/models/test_tpu.py +++ b/tests/models/test_tpu.py @@ -223,12 +223,19 @@ def test_tpu_grad_norm(tmpdir): @pl_multi_process_test def test_dataloaders_passed_to_fit(tmpdir): """Test if dataloaders passed to trainer works on TPU""" - tutils.reset_seed() model = BoringModel() - trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, tpu_cores=8) - trainer.fit(model, train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader()) + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + tpu_cores=8, + ) + trainer.fit( + model, + train_dataloader=model.train_dataloader(), + val_dataloaders=model.val_dataloader(), + ) assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index bca8e5dcc531b..fe07e41d20b4c 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -130,7 +130,7 @@ def test_multiple_val_dataloader(tmpdir): # make sure predictions are good for each val set for dataloader in trainer.val_dataloaders: - tpipes.run_prediction(trained_model=model, dataloader=dataloader) + tpipes.run_prediction_eval_model_template(trained_model=model, dataloader=dataloader) @pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific']) @@ -153,8 +153,8 @@ def test_step(self, batch, batch_idx, *args, **kwargs): trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, - limit_val_batches=0.1, - limit_train_batches=0.2, + limit_val_batches=10, + limit_train_batches=100, ) trainer.fit(model) if ckpt_path == 'specific': @@ -162,12 +162,11 @@ def test_step(self, batch, batch_idx, *args, **kwargs): trainer.test(ckpt_path=ckpt_path) # verify there are 2 test loaders - assert len(trainer.test_dataloaders) == 2, \ - 'Multiple test_dataloaders not initiated properly' + assert len(trainer.test_dataloaders) == 2, 'Multiple test_dataloaders not initiated properly' # make sure predictions are good for each test set for dataloader in trainer.test_dataloaders: - tpipes.run_prediction(trainer.model, dataloader) + tpipes.run_prediction_eval_model_template(trainer.model, dataloader) # run the test method trainer.test(ckpt_path=ckpt_path) diff --git a/tests/trainer/test_lr_finder.py b/tests/trainer/test_lr_finder.py index 750b989a7d513..e85c43361976d 100644 --- a/tests/trainer/test_lr_finder.py +++ b/tests/trainer/test_lr_finder.py @@ -229,8 +229,8 @@ def test_accumulation_and_early_stopping(tmpdir): def test_suggestion_parameters_work(tmpdir): """ Test that default skipping does not alter results in basic case """ - hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(**hparams) + dm = ClassifDataModule() + model = ClassificationModel() # logger file to get meta trainer = Trainer( @@ -238,12 +238,11 @@ def test_suggestion_parameters_work(tmpdir): max_epochs=3, ) - lrfinder = trainer.tuner.lr_find(model) + lrfinder = trainer.tuner.lr_find(model, datamodule=dm) lr1 = lrfinder.suggestion(skip_begin=10) # default - lr2 = lrfinder.suggestion(skip_begin=80) # way too high, should have an impact + lr2 = lrfinder.suggestion(skip_begin=150) # way too high, should have an impact - assert lr1 != lr2, \ - 'Skipping parameter did not influence learning rate' + assert lr1 != lr2, 'Skipping parameter did not influence learning rate' def test_suggestion_with_non_finite_values(tmpdir): diff --git a/tests/utilities/test_parsing.py b/tests/utilities/test_parsing.py index 42edb8e48f336..f6f802615f003 100644 --- a/tests/utilities/test_parsing.py +++ b/tests/utilities/test_parsing.py @@ -113,8 +113,8 @@ def test_lightning_getattr(tmpdir): for m in models: with pytest.raises( - AttributeError, - match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule." + AttributeError, + match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule." ): lightning_getattr(m, "this_attr_not_exist") @@ -140,7 +140,7 @@ def test_lightning_setattr(tmpdir): for m in models: with pytest.raises( - AttributeError, - match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule." + AttributeError, + match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule." ): lightning_setattr(m, "this_attr_not_exist", None)