From 8db30fcaf94e0487590d7921e389214899e45ae2 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 28 Mar 2022 14:27:33 +0100 Subject: [PATCH 01/22] Fix VISSL on GPU and add VISSL GPU CI --- .azure-pipelines/gpu-example-tests.yml | 2 +- .azure-pipelines/testing-template.yml | 12 +++++++++--- flash/core/utilities/imports.py | 2 +- flash/image/embedding/vissl/hooks.py | 5 +++++ tests/examples/test_scripts.py | 8 +++++++- tests/image/embedding/test_model.py | 7 ++++++- 6 files changed, 29 insertions(+), 7 deletions(-) diff --git a/.azure-pipelines/gpu-example-tests.yml b/.azure-pipelines/gpu-example-tests.yml index db47dcbc7a..a1f4e09a6e 100644 --- a/.azure-pipelines/gpu-example-tests.yml +++ b/.azure-pipelines/gpu-example-tests.yml @@ -12,7 +12,7 @@ jobs: parameters: configs: - "image" - - "image,image_extras" + - "icevision" - "text" - "tabular" - "video" diff --git a/.azure-pipelines/testing-template.yml b/.azure-pipelines/testing-template.yml index 42d132fb02..ccf77688ae 100644 --- a/.azure-pipelines/testing-template.yml +++ b/.azure-pipelines/testing-template.yml @@ -36,7 +36,7 @@ jobs: - bash: | # python -m pip install "pip==20.1" - if [ "${{config}}" == "image,image_extras" ]; then pip install '.[image]' icevision effdet icedata; else pip install '.[${{config}}]'; fi + if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; else pip install '.[${{config}}]'; fi pip install '.[test]' --upgrade-strategy only-if-needed pip list displayName: 'Install dependencies' @@ -46,11 +46,17 @@ jobs: pip uninstall -y opencv-python-headless pip install opencv-python-headless==4.5.5.64 displayName: 'Install OpenCV dependencies' - condition: eq('${{ config }}', 'image,image_extras') + condition: eq('${{ config }}', 'icevision') + + - bash: | + pip install git+https://github.com/facebookresearch/ClassyVision.git + pip install git+https://github.com/facebookresearch/vissl.git + displayName: 'Install VISSL dependencies' + condition: eq('${{ config }}', 'vissl') - bash: | python -c "import torch; print(f'found GPUs: {torch.cuda.device_count()}')" - python -m coverage run --source flash -m pytest tests/examples/test_scripts.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=30 + python -m coverage run --source flash -m pytest tests/examples/test_scripts.py tests/image/embedding/test_model.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=30 env: CUDA_VISIBLE_DEVICES: ${{gids}} FLASH_TEST_TOPIC: ${{ config }} diff --git a/flash/core/utilities/imports.py b/flash/core/utilities/imports.py index c5417f73b9..a0b5026c82 100644 --- a/flash/core/utilities/imports.py +++ b/flash/core/utilities/imports.py @@ -291,7 +291,7 @@ def _import_module(self): if "FLASH_TEST_TOPIC" in os.environ: topic = os.environ["FLASH_TEST_TOPIC"] _IMAGE_TESTING = topic == "image" - _IMAGE_EXTRAS_TESTING = topic == "image,image_extras" + _IMAGE_EXTRAS_TESTING = topic == "image,image_extras" or topic == "icevision" or topic == "vissl" _VIDEO_TESTING = topic == "video" _VIDEO_EXTRAS_TESTING = topic == "video,video_extras" _TABULAR_TESTING = topic == "tabular" diff --git a/flash/image/embedding/vissl/hooks.py b/flash/image/embedding/vissl/hooks.py index 6bd090a5ef..db94df6be0 100644 --- a/flash/image/embedding/vissl/hooks.py +++ b/flash/image/embedding/vissl/hooks.py @@ -19,6 +19,7 @@ import flash from flash.core.utilities.compatibility import accelerator_connector from flash.core.utilities.imports import _VISSL_AVAILABLE +from flash.image.embedding.losses.vissl_losses import _recursive_register if _VISSL_AVAILABLE: from classy_vision.hooks.classy_hook import ClassyHook @@ -81,6 +82,10 @@ def on_start(self, task: "flash.image.embedding.vissl.adapter.MockVISSLTask") -> task.loss.info_criterion.precompute_pos_neg_mask() + # Re-register params / devices + _recursive_register(task.loss) + task.loss.to(task.vissl_adapter.adapter_task.device, task.vissl_adapter.adapter_task.dtype) + class AdaptVISSLHooks(ModelHooks): def __init__(self, hooks: List[ClassyHook], task) -> None: diff --git a/tests/examples/test_scripts.py b/tests/examples/test_scripts.py index f57746c65c..10a33477ec 100644 --- a/tests/examples/test_scripts.py +++ b/tests/examples/test_scripts.py @@ -29,6 +29,7 @@ _TABULAR_TESTING, _TEXT_TESTING, _VIDEO_TESTING, + _VISSL_AVAILABLE, ) from tests.examples.utils import run_test from tests.helpers.forked import forked @@ -56,6 +57,12 @@ "image_classification_multi_label.py", marks=pytest.mark.skipif(not _IMAGE_TESTING, reason="image libraries aren't installed"), ), + pytest.param( + "image_embedder.py", + marks=pytest.mark.skipif( + not (_IMAGE_AVAILABLE and _VISSL_AVAILABLE), reason="image libraries aren't installed" + ), + ), pytest.param( "object_detection.py", marks=pytest.mark.skipif( @@ -74,7 +81,6 @@ not (_IMAGE_AVAILABLE and _ICEVISION_AVAILABLE), reason="image libraries aren't installed" ), ), - # pytest.param("finetuning", "object_detection.py"), # TODO: takes too long. pytest.param( "question_answering.py", marks=pytest.mark.skipif(not _TEXT_TESTING, reason="text libraries aren't installed"), diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py index 9cd78e8b74..38f78ad861 100644 --- a/tests/image/embedding/test_model.py +++ b/tests/image/embedding/test_model.py @@ -81,7 +81,12 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform pretraining_transform=pretraining_transform, ) - trainer = flash.Trainer(max_steps=3, max_epochs=1, gpus=torch.cuda.device_count()) + # DINO only works with DDP + if training_strategy == "dino": + trainer = flash.Trainer(max_steps=3, max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp") + else: + trainer = flash.Trainer(max_steps=3, max_epochs=1, gpus=torch.cuda.device_count()) + trainer.fit(embedder, datamodule=datamodule) predictions = trainer.predict(embedder, datamodule=datamodule) for prediction_batch in predictions: From acae1547d97a224e340f63a1c06e6dcdd2867254 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 28 Mar 2022 14:33:02 +0100 Subject: [PATCH 02/22] Add vissl --- .azure-pipelines/gpu-example-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.azure-pipelines/gpu-example-tests.yml b/.azure-pipelines/gpu-example-tests.yml index a1f4e09a6e..e71374ac4f 100644 --- a/.azure-pipelines/gpu-example-tests.yml +++ b/.azure-pipelines/gpu-example-tests.yml @@ -13,6 +13,7 @@ jobs: configs: - "image" - "icevision" + - "vissl" - "text" - "tabular" - "video" From 3e0d42bd1750b465f1b16f776360508c0857652e Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 28 Mar 2022 14:50:37 +0100 Subject: [PATCH 03/22] Fix --- .azure-pipelines/testing-template.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.azure-pipelines/testing-template.yml b/.azure-pipelines/testing-template.yml index ccf77688ae..56f3bff13f 100644 --- a/.azure-pipelines/testing-template.yml +++ b/.azure-pipelines/testing-template.yml @@ -49,6 +49,7 @@ jobs: condition: eq('${{ config }}', 'icevision') - bash: | + pip install fairscale pip install git+https://github.com/facebookresearch/ClassyVision.git pip install git+https://github.com/facebookresearch/vissl.git displayName: 'Install VISSL dependencies' From 23bb7623403b11da43ffd070a0d5dabe90b70692 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 28 Mar 2022 15:15:32 +0100 Subject: [PATCH 04/22] Fix --- .azure-pipelines/testing-template.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/testing-template.yml b/.azure-pipelines/testing-template.yml index 56f3bff13f..e2e7ffd7ce 100644 --- a/.azure-pipelines/testing-template.yml +++ b/.azure-pipelines/testing-template.yml @@ -36,7 +36,7 @@ jobs: - bash: | # python -m pip install "pip==20.1" - if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; else pip install '.[${{config}}]'; fi + if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; elif [ "${{config}}" == "icevision" ]; then pip install '.[image]'; else pip install '.[${{config}}]'; fi pip install '.[test]' --upgrade-strategy only-if-needed pip list displayName: 'Install dependencies' From 651e452da52b1e0402a9e2f70040c89ab979154b Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 28 Mar 2022 15:44:45 +0100 Subject: [PATCH 05/22] Fixes --- .azure-pipelines/testing-template.yml | 2 +- flash_examples/image_embedder.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/testing-template.yml b/.azure-pipelines/testing-template.yml index e2e7ffd7ce..6c69b810f5 100644 --- a/.azure-pipelines/testing-template.yml +++ b/.azure-pipelines/testing-template.yml @@ -36,7 +36,7 @@ jobs: - bash: | # python -m pip install "pip==20.1" - if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; elif [ "${{config}}" == "icevision" ]; then pip install '.[image]'; else pip install '.[${{config}}]'; fi + if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; elif [ "${{config}}" == "vissl" ]; then pip install '.[image]'; else pip install '.[${{config}}]'; fi pip install '.[test]' --upgrade-strategy only-if-needed pip list displayName: 'Install dependencies' diff --git a/flash_examples/image_embedder.py b/flash_examples/image_embedder.py index 7edc360427..db667d20ef 100644 --- a/flash_examples/image_embedder.py +++ b/flash_examples/image_embedder.py @@ -50,7 +50,8 @@ predict_files=[ "data/hymenoptera_data/predict/153783656_85f9c3ac70.jpg", "data/hymenoptera_data/predict/2039585088_c6f47c592e.jpg", - ] + ], + batch_size=3, ) embeddings = trainer.predict(embedder, datamodule=datamodule) From 59907af88a6a48cfac8083fe3a41471d5ab0aedc Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 28 Mar 2022 16:19:33 +0100 Subject: [PATCH 06/22] Speed up example --- flash_examples/image_embedder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flash_examples/image_embedder.py b/flash_examples/image_embedder.py index db667d20ef..50dc9457b7 100644 --- a/flash_examples/image_embedder.py +++ b/flash_examples/image_embedder.py @@ -21,23 +21,23 @@ # 1. Download the data and prepare the datamodule datamodule = ImageClassificationData.from_datasets( train_dataset=CIFAR10(".", download=True), - batch_size=16, + batch_size=4, ) # 2. Build the task embedder = ImageEmbedder( - backbone="resnet", + backbone="vision_transformer", training_strategy="barlow_twins", head="barlow_twins_head", pretraining_transform="barlow_twins_transform", training_strategy_kwargs={"latent_embedding_dim": 128}, - pretraining_transform_kwargs={"size_crops": [196]}, + pretraining_transform_kwargs={"size_crops": [32]}, ) # 3. Create the trainer and pre-train the encoder # use accelerator='ddp' when using GPU(s), # i.e. flash.Trainer(max_epochs=3, gpus=1, accelerator='ddp') -trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count()) +trainer = flash.Trainer(max_epochs=1, gpus=torch.cuda.device_count()) trainer.fit(embedder, datamodule=datamodule) # 4. Save the model! From 3e5c90ac20363e7b1ef5f5aca16375eae5368d30 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 28 Mar 2022 16:29:59 +0100 Subject: [PATCH 07/22] Add additional_log_data --- flash/image/embedding/vissl/adapter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flash/image/embedding/vissl/adapter.py b/flash/image/embedding/vissl/adapter.py index 119db01974..41996b5c66 100644 --- a/flash/image/embedding/vissl/adapter.py +++ b/flash/image/embedding/vissl/adapter.py @@ -51,6 +51,9 @@ def __init__(self, vissl_adapter, vissl_loss, task_config, vissl_model) -> None: # set for momentum teacher based hooks self.last_batch = AttrDict({"sample": AttrDict({"input": None, "data_momentum": None})}) + # used in dino + self.additional_log_data = {} + class VISSLAdapter(Adapter, AdaptVISSLHooks): """The ``VISSLAdapter`` is an :class:`~flash.core.adapter.Adapter` for integrating with VISSL. From 824b85b863bf95005bb3274d7d1a9899f320fccc Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 28 Mar 2022 18:55:14 +0100 Subject: [PATCH 08/22] Updates --- flash_examples/image_embedder.py | 7 ++++--- tests/image/embedding/test_model.py | 12 +++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/flash_examples/image_embedder.py b/flash_examples/image_embedder.py index 50dc9457b7..fd79ec794c 100644 --- a/flash_examples/image_embedder.py +++ b/flash_examples/image_embedder.py @@ -35,9 +35,10 @@ ) # 3. Create the trainer and pre-train the encoder -# use accelerator='ddp' when using GPU(s), -# i.e. flash.Trainer(max_epochs=3, gpus=1, accelerator='ddp') -trainer = flash.Trainer(max_epochs=1, gpus=torch.cuda.device_count()) +# use strategy='ddp' when using GPU(s) +trainer = flash.Trainer( + max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp" if torch.cuda.device_count() > 1 else None +) trainer.fit(embedder, datamodule=datamodule) # 4. Save the model! diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py index 38f78ad861..e2d4a95a03 100644 --- a/tests/image/embedding/test_model.py +++ b/tests/image/embedding/test_model.py @@ -70,7 +70,7 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform # moco strategy, transform and head is not added for this test as it doesn't work as of now. datamodule = ImageClassificationData.from_datasets( train_dataset=FakeData(16), - predict_dataset=FakeData(4), + predict_dataset=FakeData(8), batch_size=4, ) @@ -82,10 +82,12 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform ) # DINO only works with DDP - if training_strategy == "dino": - trainer = flash.Trainer(max_steps=3, max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp") - else: - trainer = flash.Trainer(max_steps=3, max_epochs=1, gpus=torch.cuda.device_count()) + trainer = flash.Trainer( + max_steps=3, + max_epochs=1, + gpus=torch.cuda.device_count(), + strategy="ddp" if (training_strategy == "dino" or torch.cuda.device_count() > 1) else None, + ) trainer.fit(embedder, datamodule=datamodule) predictions = trainer.predict(embedder, datamodule=datamodule) From 5b488cd712e6ee1edc8b8a8dc5f69d610e8759da Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Mon, 28 Mar 2022 22:01:16 +0100 Subject: [PATCH 09/22] Try something --- tests/image/embedding/test_model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py index e2d4a95a03..5e343cf810 100644 --- a/tests/image/embedding/test_model.py +++ b/tests/image/embedding/test_model.py @@ -19,6 +19,7 @@ import flash from flash.core.utilities.imports import _IMAGE_AVAILABLE, _TORCHVISION_AVAILABLE, _VISSL_AVAILABLE from flash.image import ImageClassificationData, ImageEmbedder +from tests.helpers.forked import forked if _TORCHVISION_AVAILABLE: from torchvision.datasets import FakeData @@ -66,6 +67,7 @@ def test_load_from_checkpoint_dependency_error(): ("vision_transformer", "swav", "swav_head", "swav_transform"), ], ) +@forked def test_vissl_training(backbone, training_strategy, head, pretraining_transform): # moco strategy, transform and head is not added for this test as it doesn't work as of now. datamodule = ImageClassificationData.from_datasets( From 0a9a303b0cedab70020859aeb2cae289794a9113 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Tue, 29 Mar 2022 13:25:23 +0100 Subject: [PATCH 10/22] Add spawned decorator --- requirements/test.txt | 1 + tests/examples/test_scripts.py | 2 +- tests/helpers/decorators.py | 58 +++++++++++++++++++++++++++++ tests/helpers/forked.py | 24 ------------ tests/image/embedding/test_model.py | 4 +- 5 files changed, 62 insertions(+), 27 deletions(-) create mode 100644 tests/helpers/decorators.py delete mode 100644 tests/helpers/forked.py diff --git a/requirements/test.txt b/requirements/test.txt index 8b5899f7d3..6373aba29c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -6,6 +6,7 @@ flake8 pytest-doctestplus>=0.9.0 pytest-rerunfailures>=10.0 pytest-forked +dill # install pkg check-manifest diff --git a/tests/examples/test_scripts.py b/tests/examples/test_scripts.py index 10a33477ec..a4bd849464 100644 --- a/tests/examples/test_scripts.py +++ b/tests/examples/test_scripts.py @@ -32,7 +32,7 @@ _VISSL_AVAILABLE, ) from tests.examples.utils import run_test -from tests.helpers.forked import forked +from tests.helpers.decorators import forked root = Path(__file__).parent.parent.parent diff --git a/tests/helpers/decorators.py b/tests/helpers/decorators.py new file mode 100644 index 0000000000..c7566f6878 --- /dev/null +++ b/tests/helpers/decorators.py @@ -0,0 +1,58 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import functools +import multiprocessing as mp +import os + +import pytest +from dill import dumps, loads + + +def forked(callable): + # PyTest forked not available in Windows + if os.name == "nt": + return callable + os.environ["MKL_SERVICE_FORCE_INTEL"] = "1" + return pytest.mark.forked(callable) + + +class pickleable_target: + def __init__(self, target): + self.target = target + + def __call__(self, *args, **kwargs): + return self.target(*args, **kwargs) + + def __getstate__(self): + self.target = dumps(self.target) + return self.__dict__ + + def __setstate__(self, d): + self.__dict__ = d + self.target = loads(self.target) + + +class spawned: + def __init__(self, target): + self.target = target + functools.update_wrapper(self, target) + + def __call__(self, *args, **kwargs): + context = mp.get_context("spawn") + target = pickleable_target(self.target) + + p = context.Process(target=target, args=args, kwargs=kwargs) + p.start() + p.join() + assert not p.exitcode diff --git a/tests/helpers/forked.py b/tests/helpers/forked.py deleted file mode 100644 index 2f1567e6a3..0000000000 --- a/tests/helpers/forked.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -import pytest - - -def forked(callable): - # PyTest forked not available in Windows - if os.name == "nt": - return callable - os.environ["MKL_SERVICE_FORCE_INTEL"] = "1" - return pytest.mark.forked(callable) diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py index 5e343cf810..9f6a0473f2 100644 --- a/tests/image/embedding/test_model.py +++ b/tests/image/embedding/test_model.py @@ -19,7 +19,7 @@ import flash from flash.core.utilities.imports import _IMAGE_AVAILABLE, _TORCHVISION_AVAILABLE, _VISSL_AVAILABLE from flash.image import ImageClassificationData, ImageEmbedder -from tests.helpers.forked import forked +from tests.helpers.decorators import spawned if _TORCHVISION_AVAILABLE: from torchvision.datasets import FakeData @@ -67,7 +67,7 @@ def test_load_from_checkpoint_dependency_error(): ("vision_transformer", "swav", "swav_head", "swav_transform"), ], ) -@forked +@spawned def test_vissl_training(backbone, training_strategy, head, pretraining_transform): # moco strategy, transform and head is not added for this test as it doesn't work as of now. datamodule = ImageClassificationData.from_datasets( From 42fe05be4f867f215f6415553f7efb519b4e5f41 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Tue, 29 Mar 2022 14:51:15 +0100 Subject: [PATCH 11/22] Debnugging --- tests/image/embedding/test_model.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py index 9f6a0473f2..3830da2399 100644 --- a/tests/image/embedding/test_model.py +++ b/tests/image/embedding/test_model.py @@ -55,16 +55,16 @@ def test_load_from_checkpoint_dependency_error(): @pytest.mark.parametrize( "backbone, training_strategy, head, pretraining_transform", [ - ("vision_transformer", "simclr", "simclr_head", "simclr_transform"), - pytest.param( - "vision_transformer", - "dino", - "dino_head", - "dino_transform", - marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="VISSL DINO calls all_reduce internally."), - ), + # ("vision_transformer", "simclr", "simclr_head", "simclr_transform"), + # pytest.param( + # "vision_transformer", + # "dino", + # "dino_head", + # "dino_transform", + # marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="VISSL DINO calls all_reduce internally."), + # ), ("vision_transformer", "barlow_twins", "barlow_twins_head", "barlow_twins_transform"), - ("vision_transformer", "swav", "swav_head", "swav_transform"), + # ("vision_transformer", "swav", "swav_head", "swav_transform"), ], ) @spawned From d8b5a6a07652886417939d90684456205c80b353 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Tue, 29 Mar 2022 16:06:40 +0100 Subject: [PATCH 12/22] Debugging --- tests/image/embedding/test_model.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py index 3830da2399..499544ac6d 100644 --- a/tests/image/embedding/test_model.py +++ b/tests/image/embedding/test_model.py @@ -19,7 +19,6 @@ import flash from flash.core.utilities.imports import _IMAGE_AVAILABLE, _TORCHVISION_AVAILABLE, _VISSL_AVAILABLE from flash.image import ImageClassificationData, ImageEmbedder -from tests.helpers.decorators import spawned if _TORCHVISION_AVAILABLE: from torchvision.datasets import FakeData @@ -51,23 +50,23 @@ def test_load_from_checkpoint_dependency_error(): ImageEmbedder.load_from_checkpoint("not_a_real_checkpoint.pt") +@pytest.mark.skipif(torch.cuda.device_count() > 1, reason="DDP not working.") @pytest.mark.skipif(not (_TORCHVISION_AVAILABLE and _VISSL_AVAILABLE), reason="vissl not installed.") @pytest.mark.parametrize( "backbone, training_strategy, head, pretraining_transform", [ - # ("vision_transformer", "simclr", "simclr_head", "simclr_transform"), - # pytest.param( - # "vision_transformer", - # "dino", - # "dino_head", - # "dino_transform", - # marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="VISSL DINO calls all_reduce internally."), - # ), + ("vision_transformer", "simclr", "simclr_head", "simclr_transform"), + pytest.param( + "vision_transformer", + "dino", + "dino_head", + "dino_transform", + marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="VISSL DINO calls all_reduce internally."), + ), ("vision_transformer", "barlow_twins", "barlow_twins_head", "barlow_twins_transform"), - # ("vision_transformer", "swav", "swav_head", "swav_transform"), + ("vision_transformer", "swav", "swav_head", "swav_transform"), ], ) -@spawned def test_vissl_training(backbone, training_strategy, head, pretraining_transform): # moco strategy, transform and head is not added for this test as it doesn't work as of now. datamodule = ImageClassificationData.from_datasets( From ca8caa4372ef11e151cb3142cfc271b330e1cc38 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Tue, 29 Mar 2022 18:28:13 +0100 Subject: [PATCH 13/22] Updates --- docs/source/reference/image_embedder.rst | 5 ++--- flash_examples/image_embedder.py | 5 +---- tests/examples/test_scripts.py | 9 ++++++--- tests/image/embedding/test_model.py | 2 +- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst index fe9019cd8b..e0eb656081 100644 --- a/docs/source/reference/image_embedder.rst +++ b/docs/source/reference/image_embedder.rst @@ -31,10 +31,9 @@ Then the user can configure the :class:`~flash.image.embedding.model.ImageEmbedd There are options provided to send additional arguments to config selections. This task can now be sent to the ``fit()`` method of :class:`~flash.core.trainer.Trainer`. -.. note:: +.. warning:: - A lot of VISSL loss functions use hard-coded ``torch.distributed`` methods. The user is suggested to use ``accelerator=ddp`` even with a single GPU. - Only ``barlow_twins`` training strategy works on the CPU. All other loss functions are configured to work on GPUs. + The VISSL integration does not support multi-GPU training. .. literalinclude:: ../../../flash_examples/image_embedder.py :language: python diff --git a/flash_examples/image_embedder.py b/flash_examples/image_embedder.py index fd79ec794c..5a51d48eba 100644 --- a/flash_examples/image_embedder.py +++ b/flash_examples/image_embedder.py @@ -35,10 +35,7 @@ ) # 3. Create the trainer and pre-train the encoder -# use strategy='ddp' when using GPU(s) -trainer = flash.Trainer( - max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp" if torch.cuda.device_count() > 1 else None -) +trainer = flash.Trainer(max_epochs=1, gpus=torch.cuda.device_count()) trainer.fit(embedder, datamodule=datamodule) # 4. Save the model! diff --git a/tests/examples/test_scripts.py b/tests/examples/test_scripts.py index a4bd849464..1162caa91d 100644 --- a/tests/examples/test_scripts.py +++ b/tests/examples/test_scripts.py @@ -59,9 +59,12 @@ ), pytest.param( "image_embedder.py", - marks=pytest.mark.skipif( - not (_IMAGE_AVAILABLE and _VISSL_AVAILABLE), reason="image libraries aren't installed" - ), + marks=[ + pytest.mark.skipif( + not (_IMAGE_AVAILABLE and _VISSL_AVAILABLE), reason="image libraries aren't installed" + ), + pytest.mark.skipif(torch.cuda.device_count() > 1, reason="VISSL integration doesn't support multi-GPU"), + ], ), pytest.param( "object_detection.py", diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py index 499544ac6d..3ddd368b1f 100644 --- a/tests/image/embedding/test_model.py +++ b/tests/image/embedding/test_model.py @@ -50,7 +50,7 @@ def test_load_from_checkpoint_dependency_error(): ImageEmbedder.load_from_checkpoint("not_a_real_checkpoint.pt") -@pytest.mark.skipif(torch.cuda.device_count() > 1, reason="DDP not working.") +@pytest.mark.skipif(torch.cuda.device_count() > 1, reason="VISSL integration doesn't support multi-GPU") @pytest.mark.skipif(not (_TORCHVISION_AVAILABLE and _VISSL_AVAILABLE), reason="vissl not installed.") @pytest.mark.parametrize( "backbone, training_strategy, head, pretraining_transform", From da4ecbae6ed792f5df999cf49b1cc880d9834d17 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Tue, 29 Mar 2022 18:29:01 +0100 Subject: [PATCH 14/22] Clean --- requirements/test.txt | 1 - tests/helpers/decorators.py | 34 ---------------------------------- 2 files changed, 35 deletions(-) diff --git a/requirements/test.txt b/requirements/test.txt index 6373aba29c..8b5899f7d3 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -6,7 +6,6 @@ flake8 pytest-doctestplus>=0.9.0 pytest-rerunfailures>=10.0 pytest-forked -dill # install pkg check-manifest diff --git a/tests/helpers/decorators.py b/tests/helpers/decorators.py index c7566f6878..2f1567e6a3 100644 --- a/tests/helpers/decorators.py +++ b/tests/helpers/decorators.py @@ -11,12 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import functools -import multiprocessing as mp import os import pytest -from dill import dumps, loads def forked(callable): @@ -25,34 +22,3 @@ def forked(callable): return callable os.environ["MKL_SERVICE_FORCE_INTEL"] = "1" return pytest.mark.forked(callable) - - -class pickleable_target: - def __init__(self, target): - self.target = target - - def __call__(self, *args, **kwargs): - return self.target(*args, **kwargs) - - def __getstate__(self): - self.target = dumps(self.target) - return self.__dict__ - - def __setstate__(self, d): - self.__dict__ = d - self.target = loads(self.target) - - -class spawned: - def __init__(self, target): - self.target = target - functools.update_wrapper(self, target) - - def __call__(self, *args, **kwargs): - context = mp.get_context("spawn") - target = pickleable_target(self.target) - - p = context.Process(target=target, args=args, kwargs=kwargs) - p.start() - p.join() - assert not p.exitcode From 2b486813fd13f8be290233744057b06e8b0e1821 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Tue, 29 Mar 2022 19:04:01 +0100 Subject: [PATCH 15/22] Try fix --- docs/source/reference/image_embedder.rst | 2 +- tests/image/embedding/test_model.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst index e0eb656081..2a83711ddc 100644 --- a/docs/source/reference/image_embedder.rst +++ b/docs/source/reference/image_embedder.rst @@ -33,7 +33,7 @@ This task can now be sent to the ``fit()`` method of :class:`~flash.core.trainer .. warning:: - The VISSL integration does not support multi-GPU training. + The VISSL integration does not support multi-GPU training. All training strategies support single-GPU training and all except for DINO support CPU training. .. literalinclude:: ../../../flash_examples/image_embedder.py :language: python diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py index 3ddd368b1f..a4b1095bfd 100644 --- a/tests/image/embedding/test_model.py +++ b/tests/image/embedding/test_model.py @@ -87,7 +87,6 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform max_steps=3, max_epochs=1, gpus=torch.cuda.device_count(), - strategy="ddp" if (training_strategy == "dino" or torch.cuda.device_count() > 1) else None, ) trainer.fit(embedder, datamodule=datamodule) From 2f1062db86373b7043e7926ec6e3de2cc7db3423 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Wed, 30 Mar 2022 10:22:59 +0100 Subject: [PATCH 16/22] Update docs --- docs/source/reference/image_embedder.rst | 34 ++++++++++++++++++------ flash/image/embedding/model.py | 31 +++++++++++++++------ 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst index 2a83711ddc..bdecbb7c09 100644 --- a/docs/source/reference/image_embedder.rst +++ b/docs/source/reference/image_embedder.rst @@ -4,6 +4,10 @@ :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/image_embedder.svg :tags: Image,Embedding +.. warning:: + + Multi-gpu training is not currently supported by the :class:`~flash.image.embedding.model.ImageEmbedder` task. + .. _image_embedder: ############## @@ -17,7 +21,9 @@ The Task Image embedding encodes an image into a vector of features which can be used for a downstream task. This could include: clustering, similarity search, or classification. +The Flash :class:`~flash.image.embedding.model.ImageEmbedder` can be trained with Self Supervised Learning (SSL) to improve the quality of the embeddings it produces for your data. The :class:`~flash.image.embedding.model.ImageEmbedder` internally relies on `VISSL `_. +You can read more about our integration with VISSL here: :ref:`vissl`. ------ @@ -26,17 +32,29 @@ Example ******* Let's see how to configure a training strategy for the :class:`~flash.image.embedding.model.ImageEmbedder` task. -A vanilla :class:`~flash.core.data.data_module.DataModule` object be created using standard Datasets as shown below. -Then the user can configure the :class:`~flash.image.embedding.model.ImageEmbedder` task with ``training_strategy``, ``backbone``, ``head`` and ``pretraining_transform``. -There are options provided to send additional arguments to config selections. -This task can now be sent to the ``fit()`` method of :class:`~flash.core.trainer.Trainer`. - -.. warning:: - - The VISSL integration does not support multi-GPU training. All training strategies support single-GPU training and all except for DINO support CPU training. +First we create an :class:`~flash.image.classification.data.ImageClassificationData` object using a `Dataset` from torchvision. +Next, we configure the :class:`~flash.image.embedding.model.ImageEmbedder` task with ``training_strategy``, ``backbone``, ``head`` and ``pretraining_transform``. +Finally, we construct a :class:`~flash.core.trainer.Trainer` and call ``fit()``. +Here's the full example: .. literalinclude:: ../../../flash_examples/image_embedder.py :language: python :lines: 14- To learn how to view the available backbones / heads for this task, see :ref:`backbones_heads`. +You can view the available training strategies with the :meth:`~flash.image.embedding.model.ImageEmbedder.available_training_strategies` method. + +.. note:: + + The ``"dino"`` training strategy only supports single GPU training with ``strategy="DDP"``. + +The ``head`` and ``pretraining_transform`` arguments should match the choice of ``training_strategy`` following this table: + +===================== ================ ========================== +``training_strategy`` ``head`` ``pretraining_transform`` +===================== ================ ========================== +``simclr`` ``simclr_head`` ``simclr_transform`` +``barlow_twins`` ``barlow_twins`` ``barlow_twins_transform`` +``swav`` ``swav_head`` ``swav_transform`` +``dino`` ``dino_head`` ``dino_transform`` +===================== ================ ========================== diff --git a/flash/image/embedding/model.py b/flash/image/embedding/model.py index 6ca62f5528..edd7e37d93 100644 --- a/flash/image/embedding/model.py +++ b/flash/image/embedding/model.py @@ -18,21 +18,24 @@ from flash.core.registry import FlashRegistry from flash.core.utilities.imports import _VISSL_AVAILABLE, requires from flash.core.utilities.types import LR_SCHEDULER_TYPE, OPTIMIZER_TYPE +from flash.image.embedding.backbones import IMAGE_EMBEDDER_BACKBONES +from flash.image.embedding.strategies import IMAGE_EMBEDDER_STRATEGIES +from flash.image.embedding.transforms import IMAGE_EMBEDDER_TRANSFORMS if _VISSL_AVAILABLE: import classy_vision import classy_vision.generic.distributed_util - from flash.image.embedding.backbones import IMAGE_EMBEDDER_BACKBONES - from flash.image.embedding.strategies import IMAGE_EMBEDDER_STRATEGIES - from flash.image.embedding.transforms import IMAGE_EMBEDDER_TRANSFORMS - # patch this to avoid classy vision/vissl based distributed training classy_vision.generic.distributed_util.get_world_size = lambda: 1 -else: - IMAGE_EMBEDDER_BACKBONES = FlashRegistry("backbones") - IMAGE_EMBEDDER_STRATEGIES = FlashRegistry("embedder_training_strategies") - IMAGE_EMBEDDER_TRANSFORMS = FlashRegistry("embedder_transforms") + +# Skip doctests if requirements aren't available +__doctest_skip__ = [] +if not _VISSL_AVAILABLE: + __doctest_skip__ += [ + "ImageEmbedder", + "ImageEmbedder.*", + ] class ImageEmbedder(AdapterTask): @@ -130,6 +133,18 @@ def on_train_batch_end(self, outputs: Any, batch: Any, batch_idx: int, dataloade @classmethod @requires(["image", "vissl", "fairscale"]) def available_training_strategies(cls) -> List[str]: + """Get the list of available training strategies (passed to the ``training_strategy`` argument) for this + task. + + Examples + ________ + + .. doctest:: + + >>> from flash.image import ImageEmbedder + >>> ImageEmbedder.available_training_strategies() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + ['barlow_twins', ..., 'swav'] + """ registry: Optional[FlashRegistry] = getattr(cls, "training_strategies", None) if registry is None: return [] From 0cf4831678223c4db56ab06f0034cd2fb5c7d06e Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Wed, 30 Mar 2022 10:26:25 +0100 Subject: [PATCH 17/22] Fix test --- tests/image/embedding/test_model.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py index a4b1095bfd..828dc417ab 100644 --- a/tests/image/embedding/test_model.py +++ b/tests/image/embedding/test_model.py @@ -17,7 +17,12 @@ import torch import flash -from flash.core.utilities.imports import _IMAGE_AVAILABLE, _TORCHVISION_AVAILABLE, _VISSL_AVAILABLE +from flash.core.utilities.imports import ( + _IMAGE_AVAILABLE, + _PL_GREATER_EQUAL_1_5_0, + _TORCHVISION_AVAILABLE, + _VISSL_AVAILABLE, +) from flash.image import ImageClassificationData, ImageEmbedder if _TORCHVISION_AVAILABLE: @@ -82,11 +87,20 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform pretraining_transform=pretraining_transform, ) + kwargs = {} + # DINO only works with DDP + if training_strategy == "dino": + if _PL_GREATER_EQUAL_1_5_0: + kwargs["strategy"] = "DDP" + else: + kwargs["accelerator"] = "DDP" + trainer = flash.Trainer( max_steps=3, max_epochs=1, gpus=torch.cuda.device_count(), + **kwargs, ) trainer.fit(embedder, datamodule=datamodule) From 7b0986c93022c0d9192f86649f3e7e939e7c177f Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Wed, 30 Mar 2022 10:50:30 +0100 Subject: [PATCH 18/22] Respond to comment --- flash/image/embedding/vissl/hooks.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/flash/image/embedding/vissl/hooks.py b/flash/image/embedding/vissl/hooks.py index db94df6be0..f9845e7ae3 100644 --- a/flash/image/embedding/vissl/hooks.py +++ b/flash/image/embedding/vissl/hooks.py @@ -19,7 +19,6 @@ import flash from flash.core.utilities.compatibility import accelerator_connector from flash.core.utilities.imports import _VISSL_AVAILABLE -from flash.image.embedding.losses.vissl_losses import _recursive_register if _VISSL_AVAILABLE: from classy_vision.hooks.classy_hook import ClassyHook @@ -82,8 +81,7 @@ def on_start(self, task: "flash.image.embedding.vissl.adapter.MockVISSLTask") -> task.loss.info_criterion.precompute_pos_neg_mask() - # Re-register params / devices - _recursive_register(task.loss) + # Cast the loss to the correct device / dtype task.loss.to(task.vissl_adapter.adapter_task.device, task.vissl_adapter.adapter_task.dtype) From f018fb84c6d321d42c590d619804f4fdaad3690d Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Wed, 30 Mar 2022 10:51:50 +0100 Subject: [PATCH 19/22] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99643da230..8afdf4ff74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed +- Fixed GPU support for self-supervised training with the `ImageEmbedder` ([#1256](https://github.com/PyTorchLightning/lightning-flash/pull/1256)) + - Fixed a bug where collate functions were never called in the `ImageEmbedder` class. ([#1217](https://github.com/PyTorchLightning/lightning-flash/pull/1217)) - Fixed a bug where `pretraining_transforms` in the `ImageEmbedder` was never called. ([#1196](https://github.com/PyTorchLightning/lightning-flash/pull/1196)) From fc2f886c4ace864b0fc66e598a1d61590deda4e0 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Wed, 30 Mar 2022 10:57:09 +0100 Subject: [PATCH 20/22] Update flash/image/embedding/vissl/hooks.py Co-authored-by: Kushashwa Ravi Shrimali --- flash/image/embedding/vissl/hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash/image/embedding/vissl/hooks.py b/flash/image/embedding/vissl/hooks.py index f9845e7ae3..9176883f59 100644 --- a/flash/image/embedding/vissl/hooks.py +++ b/flash/image/embedding/vissl/hooks.py @@ -82,7 +82,7 @@ def on_start(self, task: "flash.image.embedding.vissl.adapter.MockVISSLTask") -> task.loss.info_criterion.precompute_pos_neg_mask() # Cast the loss to the correct device / dtype - task.loss.to(task.vissl_adapter.adapter_task.device, task.vissl_adapter.adapter_task.dtype) + task.loss.to(lightning_module.device, lightning_module.dtype) class AdaptVISSLHooks(ModelHooks): From 813a998ab2a586a0c8ef4535654089ed3348fe24 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Wed, 30 Mar 2022 11:39:03 +0100 Subject: [PATCH 21/22] Fix docs --- docs/source/reference/image_embedder.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst index bdecbb7c09..01d2ec8735 100644 --- a/docs/source/reference/image_embedder.rst +++ b/docs/source/reference/image_embedder.rst @@ -50,11 +50,11 @@ You can view the available training strategies with the :meth:`~flash.image.embe The ``head`` and ``pretraining_transform`` arguments should match the choice of ``training_strategy`` following this table: -===================== ================ ========================== -``training_strategy`` ``head`` ``pretraining_transform`` -===================== ================ ========================== -``simclr`` ``simclr_head`` ``simclr_transform`` -``barlow_twins`` ``barlow_twins`` ``barlow_twins_transform`` -``swav`` ``swav_head`` ``swav_transform`` -``dino`` ``dino_head`` ``dino_transform`` -===================== ================ ========================== +===================== ===================== ========================== +``training_strategy`` ``head`` ``pretraining_transform`` +===================== ===================== ========================== +``simclr`` ``simclr_head`` ``simclr_transform`` +``barlow_twins`` ``barlow_twins_head`` ``barlow_twins_transform`` +``swav`` ``swav_head`` ``swav_transform`` +``dino`` ``dino_head`` ``dino_transform`` +===================== ===================== ========================== From 9350f568f9a17224161aeabebc125606465b113e Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Wed, 30 Mar 2022 12:28:02 +0100 Subject: [PATCH 22/22] Fix ddp to lowercase --- docs/source/reference/image_embedder.rst | 2 +- tests/image/embedding/test_model.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst index 01d2ec8735..78f2232c73 100644 --- a/docs/source/reference/image_embedder.rst +++ b/docs/source/reference/image_embedder.rst @@ -46,7 +46,7 @@ You can view the available training strategies with the :meth:`~flash.image.embe .. note:: - The ``"dino"`` training strategy only supports single GPU training with ``strategy="DDP"``. + The ``"dino"`` training strategy only supports single GPU training with ``strategy="ddp"``. The ``head`` and ``pretraining_transform`` arguments should match the choice of ``training_strategy`` following this table: diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py index 828dc417ab..9f26fc8444 100644 --- a/tests/image/embedding/test_model.py +++ b/tests/image/embedding/test_model.py @@ -92,9 +92,9 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform # DINO only works with DDP if training_strategy == "dino": if _PL_GREATER_EQUAL_1_5_0: - kwargs["strategy"] = "DDP" + kwargs["strategy"] = "ddp" else: - kwargs["accelerator"] = "DDP" + kwargs["accelerator"] = "ddp" trainer = flash.Trainer( max_steps=3,