From 8db30fcaf94e0487590d7921e389214899e45ae2 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Mon, 28 Mar 2022 14:27:33 +0100
Subject: [PATCH 01/22] Fix VISSL on GPU and add VISSL GPU CI

---
 .azure-pipelines/gpu-example-tests.yml |  2 +-
 .azure-pipelines/testing-template.yml  | 12 +++++++++---
 flash/core/utilities/imports.py        |  2 +-
 flash/image/embedding/vissl/hooks.py   |  5 +++++
 tests/examples/test_scripts.py         |  8 +++++++-
 tests/image/embedding/test_model.py    |  7 ++++++-
 6 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/.azure-pipelines/gpu-example-tests.yml b/.azure-pipelines/gpu-example-tests.yml
index db47dcbc7a..a1f4e09a6e 100644
--- a/.azure-pipelines/gpu-example-tests.yml
+++ b/.azure-pipelines/gpu-example-tests.yml
@@ -12,7 +12,7 @@ jobs:
   parameters:
     configs:
     - "image"
-    - "image,image_extras"
+    - "icevision"
     - "text"
     - "tabular"
     - "video"
diff --git a/.azure-pipelines/testing-template.yml b/.azure-pipelines/testing-template.yml
index 42d132fb02..ccf77688ae 100644
--- a/.azure-pipelines/testing-template.yml
+++ b/.azure-pipelines/testing-template.yml
@@ -36,7 +36,7 @@ jobs:
 
       - bash: |
           # python -m pip install "pip==20.1"
-          if [ "${{config}}" == "image,image_extras" ]; then pip install '.[image]' icevision effdet icedata; else pip install '.[${{config}}]'; fi
+          if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; else pip install '.[${{config}}]'; fi
           pip install '.[test]' --upgrade-strategy only-if-needed
           pip list
         displayName: 'Install dependencies'
@@ -46,11 +46,17 @@ jobs:
           pip uninstall -y opencv-python-headless
           pip install opencv-python-headless==4.5.5.64
         displayName: 'Install OpenCV dependencies'
-        condition: eq('${{ config }}', 'image,image_extras')
+        condition: eq('${{ config }}', 'icevision')
+
+      - bash: |
+          pip install git+https://github.com/facebookresearch/ClassyVision.git
+          pip install git+https://github.com/facebookresearch/vissl.git
+        displayName: 'Install VISSL dependencies'
+        condition: eq('${{ config }}', 'vissl')
 
       - bash: |
           python -c "import torch; print(f'found GPUs: {torch.cuda.device_count()}')"
-          python -m coverage run --source flash -m pytest tests/examples/test_scripts.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=30
+          python -m coverage run --source flash -m pytest tests/examples/test_scripts.py tests/image/embedding/test_model.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=30
         env:
           CUDA_VISIBLE_DEVICES: ${{gids}}
           FLASH_TEST_TOPIC: ${{ config }}
diff --git a/flash/core/utilities/imports.py b/flash/core/utilities/imports.py
index c5417f73b9..a0b5026c82 100644
--- a/flash/core/utilities/imports.py
+++ b/flash/core/utilities/imports.py
@@ -291,7 +291,7 @@ def _import_module(self):
 if "FLASH_TEST_TOPIC" in os.environ:
     topic = os.environ["FLASH_TEST_TOPIC"]
     _IMAGE_TESTING = topic == "image"
-    _IMAGE_EXTRAS_TESTING = topic == "image,image_extras"
+    _IMAGE_EXTRAS_TESTING = topic == "image,image_extras" or topic == "icevision" or topic == "vissl"
     _VIDEO_TESTING = topic == "video"
     _VIDEO_EXTRAS_TESTING = topic == "video,video_extras"
     _TABULAR_TESTING = topic == "tabular"
diff --git a/flash/image/embedding/vissl/hooks.py b/flash/image/embedding/vissl/hooks.py
index 6bd090a5ef..db94df6be0 100644
--- a/flash/image/embedding/vissl/hooks.py
+++ b/flash/image/embedding/vissl/hooks.py
@@ -19,6 +19,7 @@
 import flash
 from flash.core.utilities.compatibility import accelerator_connector
 from flash.core.utilities.imports import _VISSL_AVAILABLE
+from flash.image.embedding.losses.vissl_losses import _recursive_register
 
 if _VISSL_AVAILABLE:
     from classy_vision.hooks.classy_hook import ClassyHook
@@ -81,6 +82,10 @@ def on_start(self, task: "flash.image.embedding.vissl.adapter.MockVISSLTask") ->
 
         task.loss.info_criterion.precompute_pos_neg_mask()
 
+        # Re-register params / devices
+        _recursive_register(task.loss)
+        task.loss.to(task.vissl_adapter.adapter_task.device, task.vissl_adapter.adapter_task.dtype)
+
 
 class AdaptVISSLHooks(ModelHooks):
     def __init__(self, hooks: List[ClassyHook], task) -> None:
diff --git a/tests/examples/test_scripts.py b/tests/examples/test_scripts.py
index f57746c65c..10a33477ec 100644
--- a/tests/examples/test_scripts.py
+++ b/tests/examples/test_scripts.py
@@ -29,6 +29,7 @@
     _TABULAR_TESTING,
     _TEXT_TESTING,
     _VIDEO_TESTING,
+    _VISSL_AVAILABLE,
 )
 from tests.examples.utils import run_test
 from tests.helpers.forked import forked
@@ -56,6 +57,12 @@
             "image_classification_multi_label.py",
             marks=pytest.mark.skipif(not _IMAGE_TESTING, reason="image libraries aren't installed"),
         ),
+        pytest.param(
+            "image_embedder.py",
+            marks=pytest.mark.skipif(
+                not (_IMAGE_AVAILABLE and _VISSL_AVAILABLE), reason="image libraries aren't installed"
+            ),
+        ),
         pytest.param(
             "object_detection.py",
             marks=pytest.mark.skipif(
@@ -74,7 +81,6 @@
                 not (_IMAGE_AVAILABLE and _ICEVISION_AVAILABLE), reason="image libraries aren't installed"
             ),
         ),
-        # pytest.param("finetuning", "object_detection.py"),  # TODO: takes too long.
         pytest.param(
             "question_answering.py",
             marks=pytest.mark.skipif(not _TEXT_TESTING, reason="text libraries aren't installed"),
diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py
index 9cd78e8b74..38f78ad861 100644
--- a/tests/image/embedding/test_model.py
+++ b/tests/image/embedding/test_model.py
@@ -81,7 +81,12 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform
         pretraining_transform=pretraining_transform,
     )
 
-    trainer = flash.Trainer(max_steps=3, max_epochs=1, gpus=torch.cuda.device_count())
+    # DINO only works with DDP
+    if training_strategy == "dino":
+        trainer = flash.Trainer(max_steps=3, max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp")
+    else:
+        trainer = flash.Trainer(max_steps=3, max_epochs=1, gpus=torch.cuda.device_count())
+
     trainer.fit(embedder, datamodule=datamodule)
     predictions = trainer.predict(embedder, datamodule=datamodule)
     for prediction_batch in predictions:

From acae1547d97a224e340f63a1c06e6dcdd2867254 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Mon, 28 Mar 2022 14:33:02 +0100
Subject: [PATCH 02/22] Add vissl

---
 .azure-pipelines/gpu-example-tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.azure-pipelines/gpu-example-tests.yml b/.azure-pipelines/gpu-example-tests.yml
index a1f4e09a6e..e71374ac4f 100644
--- a/.azure-pipelines/gpu-example-tests.yml
+++ b/.azure-pipelines/gpu-example-tests.yml
@@ -13,6 +13,7 @@ jobs:
     configs:
     - "image"
     - "icevision"
+    - "vissl"
     - "text"
     - "tabular"
     - "video"

From 3e0d42bd1750b465f1b16f776360508c0857652e Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Mon, 28 Mar 2022 14:50:37 +0100
Subject: [PATCH 03/22] Fix

---
 .azure-pipelines/testing-template.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.azure-pipelines/testing-template.yml b/.azure-pipelines/testing-template.yml
index ccf77688ae..56f3bff13f 100644
--- a/.azure-pipelines/testing-template.yml
+++ b/.azure-pipelines/testing-template.yml
@@ -49,6 +49,7 @@ jobs:
         condition: eq('${{ config }}', 'icevision')
 
       - bash: |
+          pip install fairscale
           pip install git+https://github.com/facebookresearch/ClassyVision.git
           pip install git+https://github.com/facebookresearch/vissl.git
         displayName: 'Install VISSL dependencies'

From 23bb7623403b11da43ffd070a0d5dabe90b70692 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Mon, 28 Mar 2022 15:15:32 +0100
Subject: [PATCH 04/22] Fix

---
 .azure-pipelines/testing-template.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.azure-pipelines/testing-template.yml b/.azure-pipelines/testing-template.yml
index 56f3bff13f..e2e7ffd7ce 100644
--- a/.azure-pipelines/testing-template.yml
+++ b/.azure-pipelines/testing-template.yml
@@ -36,7 +36,7 @@ jobs:
 
       - bash: |
           # python -m pip install "pip==20.1"
-          if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; else pip install '.[${{config}}]'; fi
+          if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; elif [ "${{config}}" == "icevision" ]; then pip install '.[image]'; else pip install '.[${{config}}]'; fi
           pip install '.[test]' --upgrade-strategy only-if-needed
           pip list
         displayName: 'Install dependencies'

From 651e452da52b1e0402a9e2f70040c89ab979154b Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Mon, 28 Mar 2022 15:44:45 +0100
Subject: [PATCH 05/22] Fixes

---
 .azure-pipelines/testing-template.yml | 2 +-
 flash_examples/image_embedder.py      | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.azure-pipelines/testing-template.yml b/.azure-pipelines/testing-template.yml
index e2e7ffd7ce..6c69b810f5 100644
--- a/.azure-pipelines/testing-template.yml
+++ b/.azure-pipelines/testing-template.yml
@@ -36,7 +36,7 @@ jobs:
 
       - bash: |
           # python -m pip install "pip==20.1"
-          if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; elif [ "${{config}}" == "icevision" ]; then pip install '.[image]'; else pip install '.[${{config}}]'; fi
+          if [ "${{config}}" == "icevision" ]; then pip install '.[image]' icevision effdet icedata; elif [ "${{config}}" == "vissl" ]; then pip install '.[image]'; else pip install '.[${{config}}]'; fi
           pip install '.[test]' --upgrade-strategy only-if-needed
           pip list
         displayName: 'Install dependencies'
diff --git a/flash_examples/image_embedder.py b/flash_examples/image_embedder.py
index 7edc360427..db667d20ef 100644
--- a/flash_examples/image_embedder.py
+++ b/flash_examples/image_embedder.py
@@ -50,7 +50,8 @@
     predict_files=[
         "data/hymenoptera_data/predict/153783656_85f9c3ac70.jpg",
         "data/hymenoptera_data/predict/2039585088_c6f47c592e.jpg",
-    ]
+    ],
+    batch_size=3,
 )
 embeddings = trainer.predict(embedder, datamodule=datamodule)
 

From 59907af88a6a48cfac8083fe3a41471d5ab0aedc Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Mon, 28 Mar 2022 16:19:33 +0100
Subject: [PATCH 06/22] Speed up example

---
 flash_examples/image_embedder.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/flash_examples/image_embedder.py b/flash_examples/image_embedder.py
index db667d20ef..50dc9457b7 100644
--- a/flash_examples/image_embedder.py
+++ b/flash_examples/image_embedder.py
@@ -21,23 +21,23 @@
 # 1. Download the data and prepare the datamodule
 datamodule = ImageClassificationData.from_datasets(
     train_dataset=CIFAR10(".", download=True),
-    batch_size=16,
+    batch_size=4,
 )
 
 # 2. Build the task
 embedder = ImageEmbedder(
-    backbone="resnet",
+    backbone="vision_transformer",
     training_strategy="barlow_twins",
     head="barlow_twins_head",
     pretraining_transform="barlow_twins_transform",
     training_strategy_kwargs={"latent_embedding_dim": 128},
-    pretraining_transform_kwargs={"size_crops": [196]},
+    pretraining_transform_kwargs={"size_crops": [32]},
 )
 
 # 3. Create the trainer and pre-train the encoder
 # use accelerator='ddp' when using GPU(s),
 # i.e. flash.Trainer(max_epochs=3, gpus=1, accelerator='ddp')
-trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count())
+trainer = flash.Trainer(max_epochs=1, gpus=torch.cuda.device_count())
 trainer.fit(embedder, datamodule=datamodule)
 
 # 4. Save the model!

From 3e5c90ac20363e7b1ef5f5aca16375eae5368d30 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Mon, 28 Mar 2022 16:29:59 +0100
Subject: [PATCH 07/22] Add additional_log_data

---
 flash/image/embedding/vissl/adapter.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/flash/image/embedding/vissl/adapter.py b/flash/image/embedding/vissl/adapter.py
index 119db01974..41996b5c66 100644
--- a/flash/image/embedding/vissl/adapter.py
+++ b/flash/image/embedding/vissl/adapter.py
@@ -51,6 +51,9 @@ def __init__(self, vissl_adapter, vissl_loss, task_config, vissl_model) -> None:
         # set for momentum teacher based hooks
         self.last_batch = AttrDict({"sample": AttrDict({"input": None, "data_momentum": None})})
 
+        # used in dino
+        self.additional_log_data = {}
+
 
 class VISSLAdapter(Adapter, AdaptVISSLHooks):
     """The ``VISSLAdapter`` is an :class:`~flash.core.adapter.Adapter` for integrating with VISSL.

From 824b85b863bf95005bb3274d7d1a9899f320fccc Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Mon, 28 Mar 2022 18:55:14 +0100
Subject: [PATCH 08/22] Updates

---
 flash_examples/image_embedder.py    |  7 ++++---
 tests/image/embedding/test_model.py | 12 +++++++-----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/flash_examples/image_embedder.py b/flash_examples/image_embedder.py
index 50dc9457b7..fd79ec794c 100644
--- a/flash_examples/image_embedder.py
+++ b/flash_examples/image_embedder.py
@@ -35,9 +35,10 @@
 )
 
 # 3. Create the trainer and pre-train the encoder
-# use accelerator='ddp' when using GPU(s),
-# i.e. flash.Trainer(max_epochs=3, gpus=1, accelerator='ddp')
-trainer = flash.Trainer(max_epochs=1, gpus=torch.cuda.device_count())
+# use strategy='ddp' when using GPU(s)
+trainer = flash.Trainer(
+    max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp" if torch.cuda.device_count() > 1 else None
+)
 trainer.fit(embedder, datamodule=datamodule)
 
 # 4. Save the model!
diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py
index 38f78ad861..e2d4a95a03 100644
--- a/tests/image/embedding/test_model.py
+++ b/tests/image/embedding/test_model.py
@@ -70,7 +70,7 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform
     # moco strategy, transform and head is not added for this test as it doesn't work as of now.
     datamodule = ImageClassificationData.from_datasets(
         train_dataset=FakeData(16),
-        predict_dataset=FakeData(4),
+        predict_dataset=FakeData(8),
         batch_size=4,
     )
 
@@ -82,10 +82,12 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform
     )
 
     # DINO only works with DDP
-    if training_strategy == "dino":
-        trainer = flash.Trainer(max_steps=3, max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp")
-    else:
-        trainer = flash.Trainer(max_steps=3, max_epochs=1, gpus=torch.cuda.device_count())
+    trainer = flash.Trainer(
+        max_steps=3,
+        max_epochs=1,
+        gpus=torch.cuda.device_count(),
+        strategy="ddp" if (training_strategy == "dino" or torch.cuda.device_count() > 1) else None,
+    )
 
     trainer.fit(embedder, datamodule=datamodule)
     predictions = trainer.predict(embedder, datamodule=datamodule)

From 5b488cd712e6ee1edc8b8a8dc5f69d610e8759da Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Mon, 28 Mar 2022 22:01:16 +0100
Subject: [PATCH 09/22] Try something

---
 tests/image/embedding/test_model.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py
index e2d4a95a03..5e343cf810 100644
--- a/tests/image/embedding/test_model.py
+++ b/tests/image/embedding/test_model.py
@@ -19,6 +19,7 @@
 import flash
 from flash.core.utilities.imports import _IMAGE_AVAILABLE, _TORCHVISION_AVAILABLE, _VISSL_AVAILABLE
 from flash.image import ImageClassificationData, ImageEmbedder
+from tests.helpers.forked import forked
 
 if _TORCHVISION_AVAILABLE:
     from torchvision.datasets import FakeData
@@ -66,6 +67,7 @@ def test_load_from_checkpoint_dependency_error():
         ("vision_transformer", "swav", "swav_head", "swav_transform"),
     ],
 )
+@forked
 def test_vissl_training(backbone, training_strategy, head, pretraining_transform):
     # moco strategy, transform and head is not added for this test as it doesn't work as of now.
     datamodule = ImageClassificationData.from_datasets(

From 0a9a303b0cedab70020859aeb2cae289794a9113 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Tue, 29 Mar 2022 13:25:23 +0100
Subject: [PATCH 10/22] Add spawned decorator

---
 requirements/test.txt               |  1 +
 tests/examples/test_scripts.py      |  2 +-
 tests/helpers/decorators.py         | 58 +++++++++++++++++++++++++++++
 tests/helpers/forked.py             | 24 ------------
 tests/image/embedding/test_model.py |  4 +-
 5 files changed, 62 insertions(+), 27 deletions(-)
 create mode 100644 tests/helpers/decorators.py
 delete mode 100644 tests/helpers/forked.py

diff --git a/requirements/test.txt b/requirements/test.txt
index 8b5899f7d3..6373aba29c 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -6,6 +6,7 @@ flake8
 pytest-doctestplus>=0.9.0
 pytest-rerunfailures>=10.0
 pytest-forked
+dill
 
 # install pkg
 check-manifest
diff --git a/tests/examples/test_scripts.py b/tests/examples/test_scripts.py
index 10a33477ec..a4bd849464 100644
--- a/tests/examples/test_scripts.py
+++ b/tests/examples/test_scripts.py
@@ -32,7 +32,7 @@
     _VISSL_AVAILABLE,
 )
 from tests.examples.utils import run_test
-from tests.helpers.forked import forked
+from tests.helpers.decorators import forked
 
 root = Path(__file__).parent.parent.parent
 
diff --git a/tests/helpers/decorators.py b/tests/helpers/decorators.py
new file mode 100644
index 0000000000..c7566f6878
--- /dev/null
+++ b/tests/helpers/decorators.py
@@ -0,0 +1,58 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import functools
+import multiprocessing as mp
+import os
+
+import pytest
+from dill import dumps, loads
+
+
+def forked(callable):
+    # PyTest forked not available in Windows
+    if os.name == "nt":
+        return callable
+    os.environ["MKL_SERVICE_FORCE_INTEL"] = "1"
+    return pytest.mark.forked(callable)
+
+
+class pickleable_target:
+    def __init__(self, target):
+        self.target = target
+
+    def __call__(self, *args, **kwargs):
+        return self.target(*args, **kwargs)
+
+    def __getstate__(self):
+        self.target = dumps(self.target)
+        return self.__dict__
+
+    def __setstate__(self, d):
+        self.__dict__ = d
+        self.target = loads(self.target)
+
+
+class spawned:
+    def __init__(self, target):
+        self.target = target
+        functools.update_wrapper(self, target)
+
+    def __call__(self, *args, **kwargs):
+        context = mp.get_context("spawn")
+        target = pickleable_target(self.target)
+
+        p = context.Process(target=target, args=args, kwargs=kwargs)
+        p.start()
+        p.join()
+        assert not p.exitcode
diff --git a/tests/helpers/forked.py b/tests/helpers/forked.py
deleted file mode 100644
index 2f1567e6a3..0000000000
--- a/tests/helpers/forked.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-
-import pytest
-
-
-def forked(callable):
-    # PyTest forked not available in Windows
-    if os.name == "nt":
-        return callable
-    os.environ["MKL_SERVICE_FORCE_INTEL"] = "1"
-    return pytest.mark.forked(callable)
diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py
index 5e343cf810..9f6a0473f2 100644
--- a/tests/image/embedding/test_model.py
+++ b/tests/image/embedding/test_model.py
@@ -19,7 +19,7 @@
 import flash
 from flash.core.utilities.imports import _IMAGE_AVAILABLE, _TORCHVISION_AVAILABLE, _VISSL_AVAILABLE
 from flash.image import ImageClassificationData, ImageEmbedder
-from tests.helpers.forked import forked
+from tests.helpers.decorators import spawned
 
 if _TORCHVISION_AVAILABLE:
     from torchvision.datasets import FakeData
@@ -67,7 +67,7 @@ def test_load_from_checkpoint_dependency_error():
         ("vision_transformer", "swav", "swav_head", "swav_transform"),
     ],
 )
-@forked
+@spawned
 def test_vissl_training(backbone, training_strategy, head, pretraining_transform):
     # moco strategy, transform and head is not added for this test as it doesn't work as of now.
     datamodule = ImageClassificationData.from_datasets(

From 42fe05be4f867f215f6415553f7efb519b4e5f41 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Tue, 29 Mar 2022 14:51:15 +0100
Subject: [PATCH 11/22] Debnugging

---
 tests/image/embedding/test_model.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py
index 9f6a0473f2..3830da2399 100644
--- a/tests/image/embedding/test_model.py
+++ b/tests/image/embedding/test_model.py
@@ -55,16 +55,16 @@ def test_load_from_checkpoint_dependency_error():
 @pytest.mark.parametrize(
     "backbone, training_strategy, head, pretraining_transform",
     [
-        ("vision_transformer", "simclr", "simclr_head", "simclr_transform"),
-        pytest.param(
-            "vision_transformer",
-            "dino",
-            "dino_head",
-            "dino_transform",
-            marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="VISSL DINO calls all_reduce internally."),
-        ),
+        # ("vision_transformer", "simclr", "simclr_head", "simclr_transform"),
+        # pytest.param(
+        #     "vision_transformer",
+        #     "dino",
+        #     "dino_head",
+        #     "dino_transform",
+        #     marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="VISSL DINO calls all_reduce internally."),
+        # ),
         ("vision_transformer", "barlow_twins", "barlow_twins_head", "barlow_twins_transform"),
-        ("vision_transformer", "swav", "swav_head", "swav_transform"),
+        # ("vision_transformer", "swav", "swav_head", "swav_transform"),
     ],
 )
 @spawned

From d8b5a6a07652886417939d90684456205c80b353 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Tue, 29 Mar 2022 16:06:40 +0100
Subject: [PATCH 12/22] Debugging

---
 tests/image/embedding/test_model.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py
index 3830da2399..499544ac6d 100644
--- a/tests/image/embedding/test_model.py
+++ b/tests/image/embedding/test_model.py
@@ -19,7 +19,6 @@
 import flash
 from flash.core.utilities.imports import _IMAGE_AVAILABLE, _TORCHVISION_AVAILABLE, _VISSL_AVAILABLE
 from flash.image import ImageClassificationData, ImageEmbedder
-from tests.helpers.decorators import spawned
 
 if _TORCHVISION_AVAILABLE:
     from torchvision.datasets import FakeData
@@ -51,23 +50,23 @@ def test_load_from_checkpoint_dependency_error():
         ImageEmbedder.load_from_checkpoint("not_a_real_checkpoint.pt")
 
 
+@pytest.mark.skipif(torch.cuda.device_count() > 1, reason="DDP not working.")
 @pytest.mark.skipif(not (_TORCHVISION_AVAILABLE and _VISSL_AVAILABLE), reason="vissl not installed.")
 @pytest.mark.parametrize(
     "backbone, training_strategy, head, pretraining_transform",
     [
-        # ("vision_transformer", "simclr", "simclr_head", "simclr_transform"),
-        # pytest.param(
-        #     "vision_transformer",
-        #     "dino",
-        #     "dino_head",
-        #     "dino_transform",
-        #     marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="VISSL DINO calls all_reduce internally."),
-        # ),
+        ("vision_transformer", "simclr", "simclr_head", "simclr_transform"),
+        pytest.param(
+            "vision_transformer",
+            "dino",
+            "dino_head",
+            "dino_transform",
+            marks=pytest.mark.skipif(torch.cuda.device_count() < 1, reason="VISSL DINO calls all_reduce internally."),
+        ),
         ("vision_transformer", "barlow_twins", "barlow_twins_head", "barlow_twins_transform"),
-        # ("vision_transformer", "swav", "swav_head", "swav_transform"),
+        ("vision_transformer", "swav", "swav_head", "swav_transform"),
     ],
 )
-@spawned
 def test_vissl_training(backbone, training_strategy, head, pretraining_transform):
     # moco strategy, transform and head is not added for this test as it doesn't work as of now.
     datamodule = ImageClassificationData.from_datasets(

From ca8caa4372ef11e151cb3142cfc271b330e1cc38 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Tue, 29 Mar 2022 18:28:13 +0100
Subject: [PATCH 13/22] Updates

---
 docs/source/reference/image_embedder.rst | 5 ++---
 flash_examples/image_embedder.py         | 5 +----
 tests/examples/test_scripts.py           | 9 ++++++---
 tests/image/embedding/test_model.py      | 2 +-
 4 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst
index fe9019cd8b..e0eb656081 100644
--- a/docs/source/reference/image_embedder.rst
+++ b/docs/source/reference/image_embedder.rst
@@ -31,10 +31,9 @@ Then the user can configure the :class:`~flash.image.embedding.model.ImageEmbedd
 There are options provided to send additional arguments to config selections.
 This task can now be sent to the ``fit()`` method of :class:`~flash.core.trainer.Trainer`.
 
-.. note::
+.. warning::
 
-   A lot of VISSL loss functions use hard-coded ``torch.distributed`` methods. The user is suggested to use ``accelerator=ddp`` even with a single GPU.
-   Only ``barlow_twins`` training strategy works on the CPU. All other loss functions are configured to work on GPUs.
+   The VISSL integration does not support multi-GPU training.
 
 .. literalinclude:: ../../../flash_examples/image_embedder.py
     :language: python
diff --git a/flash_examples/image_embedder.py b/flash_examples/image_embedder.py
index fd79ec794c..5a51d48eba 100644
--- a/flash_examples/image_embedder.py
+++ b/flash_examples/image_embedder.py
@@ -35,10 +35,7 @@
 )
 
 # 3. Create the trainer and pre-train the encoder
-# use strategy='ddp' when using GPU(s)
-trainer = flash.Trainer(
-    max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp" if torch.cuda.device_count() > 1 else None
-)
+trainer = flash.Trainer(max_epochs=1, gpus=torch.cuda.device_count())
 trainer.fit(embedder, datamodule=datamodule)
 
 # 4. Save the model!
diff --git a/tests/examples/test_scripts.py b/tests/examples/test_scripts.py
index a4bd849464..1162caa91d 100644
--- a/tests/examples/test_scripts.py
+++ b/tests/examples/test_scripts.py
@@ -59,9 +59,12 @@
         ),
         pytest.param(
             "image_embedder.py",
-            marks=pytest.mark.skipif(
-                not (_IMAGE_AVAILABLE and _VISSL_AVAILABLE), reason="image libraries aren't installed"
-            ),
+            marks=[
+                pytest.mark.skipif(
+                    not (_IMAGE_AVAILABLE and _VISSL_AVAILABLE), reason="image libraries aren't installed"
+                ),
+                pytest.mark.skipif(torch.cuda.device_count() > 1, reason="VISSL integration doesn't support multi-GPU"),
+            ],
         ),
         pytest.param(
             "object_detection.py",
diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py
index 499544ac6d..3ddd368b1f 100644
--- a/tests/image/embedding/test_model.py
+++ b/tests/image/embedding/test_model.py
@@ -50,7 +50,7 @@ def test_load_from_checkpoint_dependency_error():
         ImageEmbedder.load_from_checkpoint("not_a_real_checkpoint.pt")
 
 
-@pytest.mark.skipif(torch.cuda.device_count() > 1, reason="DDP not working.")
+@pytest.mark.skipif(torch.cuda.device_count() > 1, reason="VISSL integration doesn't support multi-GPU")
 @pytest.mark.skipif(not (_TORCHVISION_AVAILABLE and _VISSL_AVAILABLE), reason="vissl not installed.")
 @pytest.mark.parametrize(
     "backbone, training_strategy, head, pretraining_transform",

From da4ecbae6ed792f5df999cf49b1cc880d9834d17 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Tue, 29 Mar 2022 18:29:01 +0100
Subject: [PATCH 14/22] Clean

---
 requirements/test.txt       |  1 -
 tests/helpers/decorators.py | 34 ----------------------------------
 2 files changed, 35 deletions(-)

diff --git a/requirements/test.txt b/requirements/test.txt
index 6373aba29c..8b5899f7d3 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -6,7 +6,6 @@ flake8
 pytest-doctestplus>=0.9.0
 pytest-rerunfailures>=10.0
 pytest-forked
-dill
 
 # install pkg
 check-manifest
diff --git a/tests/helpers/decorators.py b/tests/helpers/decorators.py
index c7566f6878..2f1567e6a3 100644
--- a/tests/helpers/decorators.py
+++ b/tests/helpers/decorators.py
@@ -11,12 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import functools
-import multiprocessing as mp
 import os
 
 import pytest
-from dill import dumps, loads
 
 
 def forked(callable):
@@ -25,34 +22,3 @@ def forked(callable):
         return callable
     os.environ["MKL_SERVICE_FORCE_INTEL"] = "1"
     return pytest.mark.forked(callable)
-
-
-class pickleable_target:
-    def __init__(self, target):
-        self.target = target
-
-    def __call__(self, *args, **kwargs):
-        return self.target(*args, **kwargs)
-
-    def __getstate__(self):
-        self.target = dumps(self.target)
-        return self.__dict__
-
-    def __setstate__(self, d):
-        self.__dict__ = d
-        self.target = loads(self.target)
-
-
-class spawned:
-    def __init__(self, target):
-        self.target = target
-        functools.update_wrapper(self, target)
-
-    def __call__(self, *args, **kwargs):
-        context = mp.get_context("spawn")
-        target = pickleable_target(self.target)
-
-        p = context.Process(target=target, args=args, kwargs=kwargs)
-        p.start()
-        p.join()
-        assert not p.exitcode

From 2b486813fd13f8be290233744057b06e8b0e1821 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Tue, 29 Mar 2022 19:04:01 +0100
Subject: [PATCH 15/22] Try fix

---
 docs/source/reference/image_embedder.rst | 2 +-
 tests/image/embedding/test_model.py      | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst
index e0eb656081..2a83711ddc 100644
--- a/docs/source/reference/image_embedder.rst
+++ b/docs/source/reference/image_embedder.rst
@@ -33,7 +33,7 @@ This task can now be sent to the ``fit()`` method of :class:`~flash.core.trainer
 
 .. warning::
 
-   The VISSL integration does not support multi-GPU training.
+   The VISSL integration does not support multi-GPU training. All training strategies support single-GPU training and all except for DINO support CPU training.
 
 .. literalinclude:: ../../../flash_examples/image_embedder.py
     :language: python
diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py
index 3ddd368b1f..a4b1095bfd 100644
--- a/tests/image/embedding/test_model.py
+++ b/tests/image/embedding/test_model.py
@@ -87,7 +87,6 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform
         max_steps=3,
         max_epochs=1,
         gpus=torch.cuda.device_count(),
-        strategy="ddp" if (training_strategy == "dino" or torch.cuda.device_count() > 1) else None,
     )
 
     trainer.fit(embedder, datamodule=datamodule)

From 2f1062db86373b7043e7926ec6e3de2cc7db3423 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Wed, 30 Mar 2022 10:22:59 +0100
Subject: [PATCH 16/22] Update docs

---
 docs/source/reference/image_embedder.rst | 34 ++++++++++++++++++------
 flash/image/embedding/model.py           | 31 +++++++++++++++------
 2 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst
index 2a83711ddc..bdecbb7c09 100644
--- a/docs/source/reference/image_embedder.rst
+++ b/docs/source/reference/image_embedder.rst
@@ -4,6 +4,10 @@
    :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/image_embedder.svg
    :tags: Image,Embedding
 
+.. warning::
+
+   Multi-gpu training is not currently supported by the :class:`~flash.image.embedding.model.ImageEmbedder` task.
+
 .. _image_embedder:
 
 ##############
@@ -17,7 +21,9 @@ The Task
 Image embedding encodes an image into a vector of features which can be used for a downstream task.
 This could include: clustering, similarity search, or classification.
 
+The Flash :class:`~flash.image.embedding.model.ImageEmbedder` can be trained with Self Supervised Learning (SSL) to improve the quality of the embeddings it produces for your data.
 The :class:`~flash.image.embedding.model.ImageEmbedder` internally relies on `VISSL <https://vissl.ai/>`_.
+You can read more about our integration with VISSL here: :ref:`vissl`.
 
 ------
 
@@ -26,17 +32,29 @@ Example
 *******
 
 Let's see how to configure a training strategy for the :class:`~flash.image.embedding.model.ImageEmbedder` task.
-A vanilla :class:`~flash.core.data.data_module.DataModule` object be created using standard Datasets as shown below.
-Then the user can configure the :class:`~flash.image.embedding.model.ImageEmbedder` task with ``training_strategy``, ``backbone``, ``head`` and ``pretraining_transform``.
-There are options provided to send additional arguments to config selections.
-This task can now be sent to the ``fit()`` method of :class:`~flash.core.trainer.Trainer`.
-
-.. warning::
-
-   The VISSL integration does not support multi-GPU training. All training strategies support single-GPU training and all except for DINO support CPU training.
+First we create an :class:`~flash.image.classification.data.ImageClassificationData` object using a `Dataset` from torchvision.
+Next, we configure the :class:`~flash.image.embedding.model.ImageEmbedder` task with ``training_strategy``, ``backbone``, ``head`` and ``pretraining_transform``.
+Finally, we construct a :class:`~flash.core.trainer.Trainer` and call ``fit()``.
+Here's the full example:
 
 .. literalinclude:: ../../../flash_examples/image_embedder.py
     :language: python
     :lines: 14-
 
 To learn how to view the available backbones / heads for this task, see :ref:`backbones_heads`.
+You can view the available training strategies with the :meth:`~flash.image.embedding.model.ImageEmbedder.available_training_strategies` method.
+
+.. note::
+
+    The ``"dino"`` training strategy only supports single GPU training with ``strategy="DDP"``.
+
+The ``head`` and ``pretraining_transform`` arguments should match the choice of ``training_strategy`` following this table:
+
+=====================  ================  ==========================
+``training_strategy``  ``head``          ``pretraining_transform``
+=====================  ================  ==========================
+``simclr``             ``simclr_head``   ``simclr_transform``
+``barlow_twins``       ``barlow_twins``  ``barlow_twins_transform``
+``swav``               ``swav_head``     ``swav_transform``
+``dino``               ``dino_head``     ``dino_transform``
+=====================  ================  ==========================
diff --git a/flash/image/embedding/model.py b/flash/image/embedding/model.py
index 6ca62f5528..edd7e37d93 100644
--- a/flash/image/embedding/model.py
+++ b/flash/image/embedding/model.py
@@ -18,21 +18,24 @@
 from flash.core.registry import FlashRegistry
 from flash.core.utilities.imports import _VISSL_AVAILABLE, requires
 from flash.core.utilities.types import LR_SCHEDULER_TYPE, OPTIMIZER_TYPE
+from flash.image.embedding.backbones import IMAGE_EMBEDDER_BACKBONES
+from flash.image.embedding.strategies import IMAGE_EMBEDDER_STRATEGIES
+from flash.image.embedding.transforms import IMAGE_EMBEDDER_TRANSFORMS
 
 if _VISSL_AVAILABLE:
     import classy_vision
     import classy_vision.generic.distributed_util
 
-    from flash.image.embedding.backbones import IMAGE_EMBEDDER_BACKBONES
-    from flash.image.embedding.strategies import IMAGE_EMBEDDER_STRATEGIES
-    from flash.image.embedding.transforms import IMAGE_EMBEDDER_TRANSFORMS
-
     # patch this to avoid classy vision/vissl based distributed training
     classy_vision.generic.distributed_util.get_world_size = lambda: 1
-else:
-    IMAGE_EMBEDDER_BACKBONES = FlashRegistry("backbones")
-    IMAGE_EMBEDDER_STRATEGIES = FlashRegistry("embedder_training_strategies")
-    IMAGE_EMBEDDER_TRANSFORMS = FlashRegistry("embedder_transforms")
+
+# Skip doctests if requirements aren't available
+__doctest_skip__ = []
+if not _VISSL_AVAILABLE:
+    __doctest_skip__ += [
+        "ImageEmbedder",
+        "ImageEmbedder.*",
+    ]
 
 
 class ImageEmbedder(AdapterTask):
@@ -130,6 +133,18 @@ def on_train_batch_end(self, outputs: Any, batch: Any, batch_idx: int, dataloade
     @classmethod
     @requires(["image", "vissl", "fairscale"])
     def available_training_strategies(cls) -> List[str]:
+        """Get the list of available training strategies (passed to the ``training_strategy`` argument) for this
+        task.
+
+        Examples
+        ________
+
+        .. doctest::
+
+            >>> from flash.image import ImageEmbedder
+            >>> ImageEmbedder.available_training_strategies()  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+            ['barlow_twins', ..., 'swav']
+        """
         registry: Optional[FlashRegistry] = getattr(cls, "training_strategies", None)
         if registry is None:
             return []

From 0cf4831678223c4db56ab06f0034cd2fb5c7d06e Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Wed, 30 Mar 2022 10:26:25 +0100
Subject: [PATCH 17/22] Fix test

---
 tests/image/embedding/test_model.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py
index a4b1095bfd..828dc417ab 100644
--- a/tests/image/embedding/test_model.py
+++ b/tests/image/embedding/test_model.py
@@ -17,7 +17,12 @@
 import torch
 
 import flash
-from flash.core.utilities.imports import _IMAGE_AVAILABLE, _TORCHVISION_AVAILABLE, _VISSL_AVAILABLE
+from flash.core.utilities.imports import (
+    _IMAGE_AVAILABLE,
+    _PL_GREATER_EQUAL_1_5_0,
+    _TORCHVISION_AVAILABLE,
+    _VISSL_AVAILABLE,
+)
 from flash.image import ImageClassificationData, ImageEmbedder
 
 if _TORCHVISION_AVAILABLE:
@@ -82,11 +87,20 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform
         pretraining_transform=pretraining_transform,
     )
 
+    kwargs = {}
+
     # DINO only works with DDP
+    if training_strategy == "dino":
+        if _PL_GREATER_EQUAL_1_5_0:
+            kwargs["strategy"] = "DDP"
+        else:
+            kwargs["accelerator"] = "DDP"
+
     trainer = flash.Trainer(
         max_steps=3,
         max_epochs=1,
         gpus=torch.cuda.device_count(),
+        **kwargs,
     )
 
     trainer.fit(embedder, datamodule=datamodule)

From 7b0986c93022c0d9192f86649f3e7e939e7c177f Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Wed, 30 Mar 2022 10:50:30 +0100
Subject: [PATCH 18/22] Respond to comment

---
 flash/image/embedding/vissl/hooks.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/flash/image/embedding/vissl/hooks.py b/flash/image/embedding/vissl/hooks.py
index db94df6be0..f9845e7ae3 100644
--- a/flash/image/embedding/vissl/hooks.py
+++ b/flash/image/embedding/vissl/hooks.py
@@ -19,7 +19,6 @@
 import flash
 from flash.core.utilities.compatibility import accelerator_connector
 from flash.core.utilities.imports import _VISSL_AVAILABLE
-from flash.image.embedding.losses.vissl_losses import _recursive_register
 
 if _VISSL_AVAILABLE:
     from classy_vision.hooks.classy_hook import ClassyHook
@@ -82,8 +81,7 @@ def on_start(self, task: "flash.image.embedding.vissl.adapter.MockVISSLTask") ->
 
         task.loss.info_criterion.precompute_pos_neg_mask()
 
-        # Re-register params / devices
-        _recursive_register(task.loss)
+        # Cast the loss to the correct device / dtype
         task.loss.to(task.vissl_adapter.adapter_task.device, task.vissl_adapter.adapter_task.dtype)
 
 

From f018fb84c6d321d42c590d619804f4fdaad3690d Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Wed, 30 Mar 2022 10:51:50 +0100
Subject: [PATCH 19/22] Update CHANGELOG.md

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 99643da230..8afdf4ff74 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
+- Fixed GPU support for self-supervised training with the `ImageEmbedder` ([#1256](https://github.com/PyTorchLightning/lightning-flash/pull/1256))
+
 - Fixed a bug where collate functions were never called in the `ImageEmbedder` class. ([#1217](https://github.com/PyTorchLightning/lightning-flash/pull/1217))
 
 - Fixed a bug where `pretraining_transforms` in the `ImageEmbedder` was never called. ([#1196](https://github.com/PyTorchLightning/lightning-flash/pull/1196))

From fc2f886c4ace864b0fc66e598a1d61590deda4e0 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Wed, 30 Mar 2022 10:57:09 +0100
Subject: [PATCH 20/22] Update flash/image/embedding/vissl/hooks.py

Co-authored-by: Kushashwa Ravi Shrimali <kushashwaravishrimali@gmail.com>
---
 flash/image/embedding/vissl/hooks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flash/image/embedding/vissl/hooks.py b/flash/image/embedding/vissl/hooks.py
index f9845e7ae3..9176883f59 100644
--- a/flash/image/embedding/vissl/hooks.py
+++ b/flash/image/embedding/vissl/hooks.py
@@ -82,7 +82,7 @@ def on_start(self, task: "flash.image.embedding.vissl.adapter.MockVISSLTask") ->
         task.loss.info_criterion.precompute_pos_neg_mask()
 
         # Cast the loss to the correct device / dtype
-        task.loss.to(task.vissl_adapter.adapter_task.device, task.vissl_adapter.adapter_task.dtype)
+        task.loss.to(lightning_module.device, lightning_module.dtype)
 
 
 class AdaptVISSLHooks(ModelHooks):

From 813a998ab2a586a0c8ef4535654089ed3348fe24 Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Wed, 30 Mar 2022 11:39:03 +0100
Subject: [PATCH 21/22] Fix docs

---
 docs/source/reference/image_embedder.rst | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst
index bdecbb7c09..01d2ec8735 100644
--- a/docs/source/reference/image_embedder.rst
+++ b/docs/source/reference/image_embedder.rst
@@ -50,11 +50,11 @@ You can view the available training strategies with the :meth:`~flash.image.embe
 
 The ``head`` and ``pretraining_transform`` arguments should match the choice of ``training_strategy`` following this table:
 
-=====================  ================  ==========================
-``training_strategy``  ``head``          ``pretraining_transform``
-=====================  ================  ==========================
-``simclr``             ``simclr_head``   ``simclr_transform``
-``barlow_twins``       ``barlow_twins``  ``barlow_twins_transform``
-``swav``               ``swav_head``     ``swav_transform``
-``dino``               ``dino_head``     ``dino_transform``
-=====================  ================  ==========================
+=====================  =====================  ==========================
+``training_strategy``  ``head``               ``pretraining_transform``
+=====================  =====================  ==========================
+``simclr``             ``simclr_head``        ``simclr_transform``
+``barlow_twins``       ``barlow_twins_head``  ``barlow_twins_transform``
+``swav``               ``swav_head``          ``swav_transform``
+``dino``               ``dino_head``          ``dino_transform``
+=====================  =====================  ==========================

From 9350f568f9a17224161aeabebc125606465b113e Mon Sep 17 00:00:00 2001
From: Ethan Harris <ethanwharris@gmail.com>
Date: Wed, 30 Mar 2022 12:28:02 +0100
Subject: [PATCH 22/22] Fix ddp to lowercase

---
 docs/source/reference/image_embedder.rst | 2 +-
 tests/image/embedding/test_model.py      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/reference/image_embedder.rst b/docs/source/reference/image_embedder.rst
index 01d2ec8735..78f2232c73 100644
--- a/docs/source/reference/image_embedder.rst
+++ b/docs/source/reference/image_embedder.rst
@@ -46,7 +46,7 @@ You can view the available training strategies with the :meth:`~flash.image.embe
 
 .. note::
 
-    The ``"dino"`` training strategy only supports single GPU training with ``strategy="DDP"``.
+    The ``"dino"`` training strategy only supports single GPU training with ``strategy="ddp"``.
 
 The ``head`` and ``pretraining_transform`` arguments should match the choice of ``training_strategy`` following this table:
 
diff --git a/tests/image/embedding/test_model.py b/tests/image/embedding/test_model.py
index 828dc417ab..9f26fc8444 100644
--- a/tests/image/embedding/test_model.py
+++ b/tests/image/embedding/test_model.py
@@ -92,9 +92,9 @@ def test_vissl_training(backbone, training_strategy, head, pretraining_transform
     # DINO only works with DDP
     if training_strategy == "dino":
         if _PL_GREATER_EQUAL_1_5_0:
-            kwargs["strategy"] = "DDP"
+            kwargs["strategy"] = "ddp"
         else:
-            kwargs["accelerator"] = "DDP"
+            kwargs["accelerator"] = "ddp"
 
     trainer = flash.Trainer(
         max_steps=3,