Skip to content

Commit

Permalink
Remove DeepSpeed version restriction from Lite (Lightning-AI#13967)
Browse files Browse the repository at this point in the history
  • Loading branch information
awaelchli authored and jessecambon committed Aug 16, 2022
1 parent 5dadd47 commit 6365bb7
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .azure/gpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ jobs:
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
pip install -e .[strategies]
pip install deepspeed>0.6.4 # TODO: remove when docker images are upgraded
pip install -U deepspeed # TODO: remove when docker images are upgraded
pip install --requirement requirements/pytorch/devel.txt
pip list
env:
Expand Down
2 changes: 1 addition & 1 deletion requirements/pytorch/strategies.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

fairscale>=0.4.5, <=0.4.6
deepspeed>=0.6.0, <0.7.0
deepspeed>=0.6.0, <=0.7.0
# no need to install with [pytorch] as pytorch is already installed
horovod>=0.21.2, !=0.24.0, <0.25.1
hivemind>=1.0.1, <=1.0.1; sys_platform == 'linux'
3 changes: 3 additions & 0 deletions src/pytorch_lightning/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Raised a `MisconfigurationException` if batch transfer hooks are overriden with `IPUAccelerator` ([13961](https://github.com/Lightning-AI/lightning/pull/13961))


- Updated compatibility for LightningLite to run with the latest DeepSpeed 0.7.0 ([13967](https://github.com/Lightning-AI/lightning/pull/13967))


### Deprecated

- Deprecated `LightningDeepSpeedModule` ([#14000](https://github.com/Lightning-AI/lightning/pull/14000))
Expand Down
15 changes: 0 additions & 15 deletions src/pytorch_lightning/lite/lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
has_iterable_dataset,
)
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.imports import _RequirementAvailable
from pytorch_lightning.utilities.seed import seed_everything


Expand Down Expand Up @@ -106,8 +105,6 @@ def __init__(
self._precision_plugin = self._strategy.precision_plugin
self._models_setup: int = 0

self._check_deepspeed_support()

# wrap the run method so we can inject setup logic or spawn processes for the user
setattr(self, "run", partial(self._run_impl, self.run))

Expand Down Expand Up @@ -459,18 +456,6 @@ def _check_strategy_support(self, strategy: Optional[Union[str, Strategy]]) -> N
f" Choose one of {supported} or pass in a `Strategy` instance."
)

def _check_deepspeed_support(self) -> None:
if (
isinstance(self._strategy, DeepSpeedStrategy)
and self._strategy.zero_stage_3
and _RequirementAvailable("deepspeed>=0.6.5")
):
# https://github.com/microsoft/DeepSpeed/issues/2139
raise RuntimeError(
"DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite and `deepspeed>=0.6.5`."
" Please downgrade deepspeed to 0.6.4 or check if a newer version of Lightning is available."
)

@staticmethod
def _supported_device_types() -> Sequence[_AcceleratorType]:
return (
Expand Down
13 changes: 1 addition & 12 deletions tests/tests_pytorch/lite/test_lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import os
from copy import deepcopy
from unittest import mock
Expand All @@ -30,7 +29,6 @@
from pytorch_lightning.strategies import DeepSpeedStrategy, Strategy
from pytorch_lightning.utilities import _StrategyType
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.imports import _RequirementAvailable
from pytorch_lightning.utilities.seed import pl_worker_init_function
from tests_pytorch.helpers.runif import RunIf

Expand Down Expand Up @@ -480,13 +478,4 @@ def run(self):
assert self.broadcast(True)
assert self.is_global_zero == (self.local_rank == 0)

if _RequirementAvailable("deepspeed>=0.6.5"):
# https://github.com/microsoft/DeepSpeed/issues/2139
raise_if_deepspeed_incompatible = pytest.raises(
RuntimeError, match="DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite"
)
else:
raise_if_deepspeed_incompatible = contextlib.suppress()

with raise_if_deepspeed_incompatible:
Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()
Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()

0 comments on commit 6365bb7

Please sign in to comment.