diff --git a/CHANGELOG.md b/CHANGELOG.md index 795233c6c908f..e7a5bbf4fc491 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,6 +81,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed checkpointing interval ([#1272](https://github.com/PyTorchLightning/pytorch-lightning/pull/1272)) - Fixed validation and training loops run the partial dataset ([#1192](https://github.com/PyTorchLightning/pytorch-lightning/pull/1192)) - Fixed running `on_validation_end` only on main process in DDP ([#1125](https://github.com/PyTorchLightning/pytorch-lightning/pull/1125)) +- Fixes `use_amp` issue ([#1145](https://github.com/PyTorchLightning/pytorch-lightning/pull/1145)) +- Fixes using deprecated `use_amp` attribute ([#1145](https://github.com/PyTorchLightning/pytorch-lightning/pull/1145)) ## [0.7.1] - 2020-03-07 diff --git a/pl_examples/basic_examples/gpu_template.py b/pl_examples/basic_examples/gpu_template.py index 408b62387fc8c..c5fa94a3cf140 100644 --- a/pl_examples/basic_examples/gpu_template.py +++ b/pl_examples/basic_examples/gpu_template.py @@ -32,7 +32,7 @@ def main(hparams): max_epochs=hparams.epochs, gpus=hparams.gpus, distributed_backend=hparams.distributed_backend, - use_amp=hparams.use_16bit + precision=16 if hparams.use_16bit else 32, ) # ------------------------ diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index 52c5cf0642f0b..95a894d57988b 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -226,7 +226,7 @@ def main(hparams): gpus=hparams.gpus, max_epochs=hparams.epochs, distributed_backend=hparams.distributed_backend, - use_amp=hparams.use_16bit + precision=16 if hparams.use_16bit else 32, ) if hparams.evaluate: trainer.run_evaluation() diff --git a/pytorch_lightning/trainer/auto_mix_precision.py b/pytorch_lightning/trainer/auto_mix_precision.py index 49bed8b8f69f4..135cf83e288c8 100644 --- a/pytorch_lightning/trainer/auto_mix_precision.py +++ b/pytorch_lightning/trainer/auto_mix_precision.py @@ -14,19 +14,21 @@ class TrainerAMPMixin(ABC): # this is just a summary on variables used in this abstract class, # the proper values/initialisation should be done in child class - use_amp: bool + precision: int def init_amp(self, use_amp): - self.use_amp = use_amp and APEX_AVAILABLE - if self.use_amp: - log.info('Using 16bit precision.') - if use_amp and not APEX_AVAILABLE: # pragma: no-cover - msg = """ + raise ModuleNotFoundError(""" You set `use_amp=True` but do not have apex installed. Install apex first using this guide and rerun with use_amp=True: https://github.com/NVIDIA/apex#linux this run will NOT use 16 bit precision - """ - raise ModuleNotFoundError(msg) + """) + + if self.use_amp: + log.info('Using 16bit precision.') + + @property + def use_amp(self) -> bool: + return self.precision == 16 and APEX_AVAILABLE diff --git a/pytorch_lightning/trainer/distrib_data_parallel.py b/pytorch_lightning/trainer/distrib_data_parallel.py index 7a7f73bea3d70..50941e7b2fa3f 100644 --- a/pytorch_lightning/trainer/distrib_data_parallel.py +++ b/pytorch_lightning/trainer/distrib_data_parallel.py @@ -141,7 +141,6 @@ class TrainerDDPMixin(ABC): logger: Union[LightningLoggerBase, bool] data_parallel_device_ids: ... distributed_backend: str - use_amp: bool amp_level: str use_tpu: bool default_save_path: str @@ -151,6 +150,11 @@ class TrainerDDPMixin(ABC): def num_gpus(self) -> int: """Warning: this is just empty shell for code implemented in other class.""" + @property + @abstractmethod + def use_amp(self) -> bool: + """Warning: this is just empty shell for code implemented in other class.""" + @abstractmethod def copy_trainer_model_properties(self, *args): """Warning: this is just empty shell for code implemented in other class.""" diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py index 7abf987d5c4f3..084b4a677a8e0 100644 --- a/pytorch_lightning/trainer/distrib_parts.py +++ b/pytorch_lightning/trainer/distrib_parts.py @@ -372,7 +372,6 @@ class TrainerDPMixin(ABC): use_dp: bool use_ddp2: bool use_ddp: bool - use_amp: bool testing: bool single_gpu: bool root_gpu: ... @@ -385,6 +384,11 @@ class TrainerDPMixin(ABC): use_tpu: bool data_parallel_device_ids: ... + @property + @abstractmethod + def use_amp(self) -> bool: + """Warning: this is just empty shell for code implemented in other class.""" + @abstractmethod def run_pretrain_routine(self, *args): """Warning: this is just empty shell for code implemented in other class.""" diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 7b0568e7b4c65..d470bcb35a8cd 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -21,8 +21,7 @@ from pytorch_lightning.trainer.callback_config import TrainerCallbackConfigMixin from pytorch_lightning.trainer.callback_hook import TrainerCallbackHookMixin from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin -from pytorch_lightning.trainer.deprecated_api import (TrainerDeprecatedAPITillVer0_8, - TrainerDeprecatedAPITillVer0_9) +from pytorch_lightning.trainer.deprecated_api import TrainerDeprecatedAPITillVer0_8, TrainerDeprecatedAPITillVer0_9 from pytorch_lightning.trainer.distrib_data_parallel import TrainerDDPMixin from pytorch_lightning.trainer.distrib_parts import TrainerDPMixin, parse_gpu_ids, determine_root_gpu_device from pytorch_lightning.trainer.evaluation_loop import TrainerEvaluationLoopMixin @@ -55,13 +54,13 @@ class Trainer( TrainerIOMixin, TrainerOptimizersMixin, + TrainerAMPMixin, TrainerDPMixin, TrainerDDPMixin, TrainerLoggingMixin, TrainerModelHooksMixin, TrainerTrainingTricksMixin, TrainerDataLoadingMixin, - TrainerAMPMixin, TrainerEvaluationLoopMixin, TrainerTrainLoopMixin, TrainerCallbackConfigMixin, @@ -88,7 +87,6 @@ def __init__( gpus: Optional[Union[List[int], str, int]] = None, num_tpu_cores: Optional[int] = None, log_gpu_memory: Optional[str] = None, - show_progress_bar=None, # backward compatible, todo: remove in v0.9.0 progress_bar_refresh_rate: int = 1, overfit_pct: float = 0.0, track_grad_norm: int = -1, @@ -122,7 +120,8 @@ def __init__( nb_gpu_nodes=None, # backward compatible, todo: remove in v0.8.0 max_nb_epochs=None, # backward compatible, todo: remove in v0.8.0 min_nb_epochs=None, # backward compatible, todo: remove in v0.8.0 - use_amp=False, # backward compatible, todo: remove in v0.9.0 + use_amp=None, # backward compatible, todo: remove in v0.9.0 + show_progress_bar=None, # backward compatible, todo: remove in v0.9.0 nb_sanity_val_steps=None, # backward compatible, todo: remove in v0.8.0 **kwargs ): @@ -446,6 +445,12 @@ def __init__( self.amp_level = amp_level self.precision = precision + # Backward compatibility, TODO: remove in v0.9.0 + if use_amp is not None: + warnings.warn("`use_amp` has been replaced by `precision` since v0.7.0" + " and this argument will be removed in v0.9.0", DeprecationWarning) + self.precision = 16 if use_amp else 32 + assert self.precision in (16, 32), 'only 32 or 16 bit precision supported' if self.precision == 16 and self.num_tpu_cores is None: diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index ba974dac12966..73333fb7d59cd 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -200,7 +200,6 @@ class TrainerTrainLoopMixin(ABC): optimizers: ... optimizer_frequencies: ... accumulate_grad_batches: int - use_amp: bool track_grad_norm: ... model: LightningModule interrupted: bool