fixing miss-leading tested acc values (#5876)

* fixing tested values * . * tests * yapf * softmax * hvd * rename * lr * duplicate * drop * classif * rm EvalModel * Revert "rm EvalModel" This reverts commit 6c3fb39. * update tests * fix * azure * azure * self * cpu * Apply suggestions from code review Co-authored-by: rohitgr7 <rohitgr1998@gmail.com>
Lightning-AI · Feb 23, 2021 · 1c851b8 · 1c851b8
1 parent ebabe56
commit 1c851b8
Show file tree

Hide file tree

Showing 15 changed files with 207 additions and 167 deletions.
diff --git a/tests/accelerators/ddp_model.py b/tests/accelerators/ddp_model.py
@@ -20,7 +20,8 @@
 import torch
 
 from pytorch_lightning import seed_everything, Trainer
-from tests.base import EvalModelTemplate
+from tests.helpers.datamodules import ClassifDataModule
+from tests.helpers.simple_models import ClassificationModel
 
 
 def main():
@@ -35,24 +36,28 @@ def main():
     parser.set_defaults(accelerator="ddp")
     args = parser.parse_args()
 
-    model = EvalModelTemplate()
+    dm = ClassifDataModule()
+    model = ClassificationModel()
     trainer = Trainer.from_argparse_args(args)
 
-    result = {}
     if args.trainer_method == 'fit':
-        trainer.fit(model)
-        result = {'status': 'complete', 'method': args.trainer_method, 'result': None}
-    if args.trainer_method == 'test':
-        result = trainer.test(model)
-        result = {'status': 'complete', 'method': args.trainer_method, 'result': result}
-    if args.trainer_method == 'fit_test':
-        trainer.fit(model)
-        result = trainer.test(model)
-        result = {'status': 'complete', 'method': args.trainer_method, 'result': result}
-
-    if len(result) > 0:
-        file_path = os.path.join(args.tmpdir, 'ddp.result')
-        torch.save(result, file_path)
+        trainer.fit(model, datamodule=dm)
+        result = None
+    elif args.trainer_method == 'test':
+        result = trainer.test(model, datamodule=dm)
+    elif args.trainer_method == 'fit_test':
+        trainer.fit(model, datamodule=dm)
+        result = trainer.test(model, datamodule=dm)
+    else:
+        raise ValueError(f'Unsupported: {args.trainer_method}')
+
+    result_ext = {
+        'status': 'complete',
+        'method': args.trainer_method,
+        'result': result,
+    }
+    file_path = os.path.join(args.tmpdir, 'ddp.result')
+    torch.save(result_ext, file_path)
 
 
 if __name__ == '__main__':

diff --git a/tests/accelerators/test_ddp.py b/tests/accelerators/test_ddp.py
@@ -23,14 +23,13 @@
 from tests.helpers.boring_model import BoringModel
 from tests.utilities.distributed import call_training_script
 
+CLI_ARGS = '--max_epochs 1 --gpus 2 --accelerator ddp'
+
 
-@pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
-])
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
-def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
+def test_multi_gpu_model_ddp_fit_only(tmpdir):
     # call the script
-    std, err = call_training_script(ddp_model, cli_args, 'fit', tmpdir, timeout=120)
+    call_training_script(ddp_model, CLI_ARGS, 'fit', tmpdir, timeout=120)
 
     # load the results of the script
     result_path = os.path.join(tmpdir, 'ddp.result')
@@ -40,13 +39,10 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
     assert result['status'] == 'complete'
 
 
-@pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
-])
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
-def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
+def test_multi_gpu_model_ddp_test_only(tmpdir):
     # call the script
-    call_training_script(ddp_model, cli_args, 'test', tmpdir)
+    call_training_script(ddp_model, CLI_ARGS, 'test', tmpdir)
 
     # load the results of the script
     result_path = os.path.join(tmpdir, 'ddp.result')
@@ -56,13 +52,10 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
     assert result['status'] == 'complete'
 
 
-@pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
-])
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
-def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args):
+def test_multi_gpu_model_ddp_fit_test(tmpdir):
     # call the script
-    call_training_script(ddp_model, cli_args, 'fit_test', tmpdir, timeout=20)
+    call_training_script(ddp_model, CLI_ARGS, 'fit_test', tmpdir, timeout=20)
 
     # load the results of the script
     result_path = os.path.join(tmpdir, 'ddp.result')
@@ -73,7 +66,7 @@ def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args):
 
     model_outs = result['result']
     for out in model_outs:
-        assert out['test_acc'] > 0.90
+        assert out['test_acc'] > 0.7
 
 
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")

diff --git a/tests/accelerators/test_ddp_spawn.py b/tests/accelerators/test_ddp_spawn.py
@@ -20,7 +20,9 @@
 from pytorch_lightning.core import memory
 from pytorch_lightning.trainer import Trainer
 from pytorch_lightning.trainer.states import TrainerState
-from tests.base import EvalModelTemplate
+from tests.helpers import BoringModel
+from tests.helpers.datamodules import ClassifDataModule
+from tests.helpers.simple_models import ClassificationModel
 
 
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@@ -29,16 +31,17 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
 
     trainer_options = dict(
         default_root_dir=tmpdir,
-        callbacks=[EarlyStopping()],
+        callbacks=[EarlyStopping(monitor='train_acc')],
         max_epochs=50,
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
         accelerator='ddp_spawn',
     )
 
-    model = EvalModelTemplate()
-    tpipes.run_model_test(trainer_options, model)
+    dm = ClassifDataModule()
+    model = ClassificationModel()
+    tpipes.run_model_test(trainer_options, model, dm)
 
 
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@@ -55,7 +58,7 @@ def test_multi_gpu_model_ddp_spawn(tmpdir):
         progress_bar_refresh_rate=0,
     )
 
-    model = EvalModelTemplate()
+    model = BoringModel()
 
     tpipes.run_model_test(trainer_options, model)
 
@@ -68,7 +71,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
     """Make sure DDP works with dataloaders passed to fit()"""
     tutils.set_random_master_port()
 
-    model = EvalModelTemplate()
+    model = BoringModel()
     fit_options = dict(train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader())
 
     trainer = Trainer(

diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py
@@ -11,36 +11,69 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+from unittest import mock
+
 import pytest
 import torch
+import torch.nn.functional as F
 
 import pytorch_lightning as pl
 import tests.helpers.pipelines as tpipes
 import tests.helpers.utils as tutils
 from pytorch_lightning.callbacks import EarlyStopping
 from pytorch_lightning.core import memory
-from tests.base import EvalModelTemplate
+from tests.helpers import BoringModel
+from tests.helpers.datamodules import ClassifDataModule
+from tests.helpers.simple_models import ClassificationModel
 
 PRETEND_N_OF_GPUS = 16
 
 
+class CustomClassificationModelDP(ClassificationModel):
+
+    def _step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        return {'logits': logits, 'y': y}
+
+    def training_step(self, batch, batch_idx):
+        out = self._step(batch, batch_idx)
+        loss = F.cross_entropy(out['logits'], out['y'])
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        return self._step(batch, batch_idx)
+
+    def test_step(self, batch, batch_idx):
+        return self._step(batch, batch_idx)
+
+    def validation_step_end(self, outputs):
+        self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y']))
+
+    def test_step_end(self, outputs):
+        self.log('test_acc', self.test_acc(outputs['logits'], outputs['y']))
+
+
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_multi_gpu_early_stop_dp(tmpdir):
     """Make sure DDP works. with early stopping"""
     tutils.set_random_master_port()
 
+    dm = ClassifDataModule()
+    model = CustomClassificationModelDP()
+
     trainer_options = dict(
         default_root_dir=tmpdir,
-        callbacks=[EarlyStopping()],
+        callbacks=[EarlyStopping(monitor='val_acc')],
         max_epochs=50,
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
         accelerator='dp',
     )
 
-    model = EvalModelTemplate()
-    tpipes.run_model_test(trainer_options, model)
+    tpipes.run_model_test(trainer_options, model, dm)
 
 
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@@ -57,22 +90,21 @@ def test_multi_gpu_model_dp(tmpdir):
         progress_bar_refresh_rate=0,
     )
 
-    model = EvalModelTemplate()
+    model = BoringModel()
 
     tpipes.run_model_test(trainer_options, model)
 
     # test memory helper functions
     memory.get_memory_profile('min_max')
 
 
+@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_dp_test(tmpdir):
     tutils.set_random_master_port()
 
-    import os
-    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
-
-    model = EvalModelTemplate()
+    dm = ClassifDataModule()
+    model = CustomClassificationModelDP()
     trainer = pl.Trainer(
         default_root_dir=tmpdir,
         max_epochs=2,
@@ -81,17 +113,17 @@ def test_dp_test(tmpdir):
         gpus=[0, 1],
         accelerator='dp',
     )
-    trainer.fit(model)
+    trainer.fit(model, datamodule=dm)
     assert 'ckpt' in trainer.checkpoint_callback.best_model_path
-    results = trainer.test()
+    results = trainer.test(datamodule=dm)
     assert 'test_acc' in results[0]
 
-    old_weights = model.c_d1.weight.clone().detach().cpu()
+    old_weights = model.layer_0.weight.clone().detach().cpu()
 
-    results = trainer.test(model)
+    results = trainer.test(model, datamodule=dm)
     assert 'test_acc' in results[0]
 
     # make sure weights didn't change
-    new_weights = model.c_d1.weight.clone().detach().cpu()
+    new_weights = model.layer_0.weight.clone().detach().cpu()
 
     assert torch.all(torch.eq(old_weights, new_weights))
diff --git a/tests/base/model_template.py b/tests/base/model_template.py
@@ -111,7 +111,7 @@ def forward(self, x):
         x = self.c_d1_drop(x)
 
         x = self.c_d2(x)
-        logits = F.log_softmax(x, dim=1)
+        logits = F.softmax(x, dim=1)
 
         return logits
 

diff --git a/tests/core/test_datamodules.py b/tests/core/test_datamodules.py
@@ -385,9 +385,8 @@ def _step(self, batch, batch_idx):
             return {'logits': logits, 'y': y}
 
         def training_step(self, batch, batch_idx):
-            _, y = batch
             out = self._step(batch, batch_idx)
-            loss = F.cross_entropy(out['logits'], y)
+            loss = F.cross_entropy(out['logits'], out['y'])
             return loss
 
         def validation_step(self, batch, batch_idx):