Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove DeepSpeed version restriction from Lite #13967

Merged
merged 11 commits into from
Aug 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .azure/gpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ jobs:
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
pip install -e .[strategies]
pip install deepspeed>0.6.4 # TODO: remove when docker images are upgraded
awaelchli marked this conversation as resolved.
Show resolved Hide resolved
pip install -U deepspeed # TODO: remove when docker images are upgraded
pip install --requirement requirements/pytorch/devel.txt
pip list
env:
Expand Down
2 changes: 1 addition & 1 deletion requirements/pytorch/strategies.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

fairscale>=0.4.5, <=0.4.6
deepspeed>=0.6.0, <0.7.0
deepspeed>=0.6.0, <=0.7.0
# no need to install with [pytorch] as pytorch is already installed
horovod>=0.21.2, !=0.24.0, <0.25.1
hivemind>=1.0.1, <=1.0.1; sys_platform == 'linux'
3 changes: 3 additions & 0 deletions src/pytorch_lightning/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Raised a `MisconfigurationException` if batch transfer hooks are overriden with `IPUAccelerator` ([13961](https://github.com/Lightning-AI/lightning/pull/13961))


- Updated compatibility for LightningLite to run with the latest DeepSpeed 0.7.0 ([13967](https://github.com/Lightning-AI/lightning/pull/13967))


### Deprecated

- Deprecated `LightningDeepSpeedModule` ([#14000](https://github.com/Lightning-AI/lightning/pull/14000))
Expand Down
15 changes: 0 additions & 15 deletions src/pytorch_lightning/lite/lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
has_iterable_dataset,
)
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.imports import _RequirementAvailable
from pytorch_lightning.utilities.seed import seed_everything


Expand Down Expand Up @@ -106,8 +105,6 @@ def __init__(
self._precision_plugin = self._strategy.precision_plugin
self._models_setup: int = 0

self._check_deepspeed_support()

# wrap the run method so we can inject setup logic or spawn processes for the user
setattr(self, "run", partial(self._run_impl, self.run))

Expand Down Expand Up @@ -459,18 +456,6 @@ def _check_strategy_support(self, strategy: Optional[Union[str, Strategy]]) -> N
f" Choose one of {supported} or pass in a `Strategy` instance."
)

def _check_deepspeed_support(self) -> None:
if (
isinstance(self._strategy, DeepSpeedStrategy)
and self._strategy.zero_stage_3
and _RequirementAvailable("deepspeed>=0.6.5")
):
# https://github.com/microsoft/DeepSpeed/issues/2139
raise RuntimeError(
"DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite and `deepspeed>=0.6.5`."
" Please downgrade deepspeed to 0.6.4 or check if a newer version of Lightning is available."
)

@staticmethod
def _supported_device_types() -> Sequence[_AcceleratorType]:
return (
Expand Down
13 changes: 1 addition & 12 deletions tests/tests_pytorch/lite/test_lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import os
from copy import deepcopy
from unittest import mock
Expand All @@ -30,7 +29,6 @@
from pytorch_lightning.strategies import DeepSpeedStrategy, Strategy
from pytorch_lightning.utilities import _StrategyType
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.imports import _RequirementAvailable
from pytorch_lightning.utilities.seed import pl_worker_init_function
from tests_pytorch.helpers.runif import RunIf

Expand Down Expand Up @@ -480,13 +478,4 @@ def run(self):
assert self.broadcast(True)
assert self.is_global_zero == (self.local_rank == 0)

if _RequirementAvailable("deepspeed>=0.6.5"):
# https://github.com/microsoft/DeepSpeed/issues/2139
raise_if_deepspeed_incompatible = pytest.raises(
RuntimeError, match="DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite"
)
else:
raise_if_deepspeed_incompatible = contextlib.suppress()

with raise_if_deepspeed_incompatible:
Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()
Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()