From 16a0b07dcc4089131355c012861d659fff4e9cbb Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Wed, 16 Jun 2021 15:43:36 +0200
Subject: [PATCH 1/3] add config parameter weights_summary

---
 lightly/cli/config/config.yaml | 2 ++
 lightly/cli/lightly_cli.py     | 1 +
 lightly/embedding/_base.py     | 1 +
 3 files changed, 4 insertions(+)

diff --git a/lightly/cli/config/config.yaml b/lightly/cli/config/config.yaml
index b7368884b..2332c4864 100644
--- a/lightly/cli/config/config.yaml
+++ b/lightly/cli/config/config.yaml
@@ -71,6 +71,8 @@ trainer:
   gpus: 1                     # Number of gpus to use for training.
   max_epochs: 100             # Number of epochs to train for.
   precision: 32               # If set to 16, will use half-precision.
+  weights_summary: 'top'      # how to print the model architecture, one of {None, 'top', full},
+                                #see https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html#weights-summary
 
 # checkpoint_callback namespace: Modify the checkpoint callback
 checkpoint_callback:
diff --git a/lightly/cli/lightly_cli.py b/lightly/cli/lightly_cli.py
index c74015c6c..4b1345ef9 100644
--- a/lightly/cli/lightly_cli.py
+++ b/lightly/cli/lightly_cli.py
@@ -22,6 +22,7 @@ def _lightly_cli(cfg, is_cli_call=True):
     if cfg['trainer']['max_epochs'] > 0:
         print('#' * 10 + ' Starting to train an embedding model.')
         checkpoint = _train_cli(cfg, is_cli_call)
+        cfg['trainer']['weights_summary'] = None
     else:
         checkpoint = ''
 
diff --git a/lightly/embedding/_base.py b/lightly/embedding/_base.py
index dc8ee53a9..e12a1cb93 100644
--- a/lightly/embedding/_base.py
+++ b/lightly/embedding/_base.py
@@ -78,6 +78,7 @@ def train_embedding(self, **kwargs):
                 min_epochs: (int) Minimum number of epochs to train
                 max_epochs: (int) Maximum number of epochs to train
                 gpus: (int) number of gpus to use
+                weights_summary: (str) how to print a summary of the model and weights (number, size)
 
         Returns:
             A trained encoder, ready for embedding datasets.

From 498d6435f79634cffcbd36c0ed0a28e1064a3e79 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Thu, 17 Jun 2021 11:52:25 +0200
Subject: [PATCH 2/3] unittests and bugfixes for the option
 trainer.weights_summary

---
 lightly/cli/train_cli.py    |  5 +++
 tests/cli/test_cli_train.py | 75 +++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 tests/cli/test_cli_train.py

diff --git a/lightly/cli/train_cli.py b/lightly/cli/train_cli.py
index 35cbe838c..214fe5d75 100644
--- a/lightly/cli/train_cli.py
+++ b/lightly/cli/train_cli.py
@@ -45,11 +45,16 @@ def _train_cli(cfg, is_cli_call=True):
         torch.backends.cudnn.deterministic = True
         torch.backends.cudnn.benchmark = False
 
+    if cfg["trainer"]["weights_summary"] == "None":
+        cfg["trainer"]["weights_summary"] = None
+
     if torch.cuda.is_available():
         device = 'cuda'
     elif cfg['trainer'] and cfg['trainer']['gpus']:
         device = 'cpu'
         cfg['trainer']['gpus'] = 0
+    else:
+        device = 'cpu'
 
     if cfg['loader']['batch_size'] < 64:
         msg = 'Training a self-supervised model with a small batch size: {}! '
diff --git a/tests/cli/test_cli_train.py b/tests/cli/test_cli_train.py
new file mode 100644
index 000000000..9bc1ace5f
--- /dev/null
+++ b/tests/cli/test_cli_train.py
@@ -0,0 +1,75 @@
+import os
+import re
+import sys
+import tempfile
+
+import torchvision
+from hydra.experimental import compose, initialize
+
+import lightly
+from tests.api_workflow.mocked_api_workflow_client import MockedApiWorkflowSetup, MockedApiWorkflowClient
+
+
+class TestCLITrain(MockedApiWorkflowSetup):
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        sys.modules["lightly.cli.upload_cli"].ApiWorkflowClient = MockedApiWorkflowClient
+
+    def setUp(self):
+        MockedApiWorkflowSetup.setUp(self)
+        self.create_fake_dataset()
+        with initialize(config_path="../../lightly/cli/config", job_name="test_app"):
+            self.cfg = compose(config_name="config", overrides=[
+                "token='123'",
+                f"input_dir={self.folder_path}",
+                "trainer.max_epochs=0"
+            ])
+
+    def create_fake_dataset(self):
+        n_data = 5
+        self.dataset = torchvision.datasets.FakeData(size=n_data, image_size=(3, 32, 32))
+
+        self.folder_path = tempfile.mkdtemp()
+        sample_names = [f'img_{i}.jpg' for i in range(n_data)]
+        self.sample_names = sample_names
+        for sample_idx in range(n_data):
+            data = self.dataset[sample_idx]
+            path = os.path.join(self.folder_path, sample_names[sample_idx])
+            data[0].save(path)
+
+    def parse_cli_string(self, cli_words: str):
+        cli_words = cli_words.replace("lightly-train ", "")
+        cli_words = re.split("=| ", cli_words)
+        assert len(cli_words) % 2 == 0
+        dict_keys = cli_words[0::2]
+        dict_values = cli_words[1::2]
+        for key, value in zip(dict_keys, dict_values):
+            value = value.strip('\"')
+            value = value.strip('\'')
+            key_parts = key.split(".")
+            if len(key_parts) == 1:
+                self.cfg[key_parts[0]]= value
+            elif len(key_parts) == 2:
+                self.cfg[key_parts[0]][key_parts[1]] = value
+            else:
+                raise ValueError
+
+    def test_parse_cli_string(self):
+        cli_string = "lightly-train trainer.weights_summary=top"
+        self.parse_cli_string(cli_string)
+        assert self.cfg["trainer"]["weights_summary"] == 'top'
+
+    def test_train_weights_summary(self):
+        for weights_summary in ["None", "top", "full"]:
+            cli_string = f"lightly-train trainer.weights_summary={weights_summary}"
+            with self.subTest(cli_string):
+                self.parse_cli_string(cli_string)
+                lightly.cli.train_cli(self.cfg)
+
+    def tearDown(self) -> None:
+        for filename in ["embeddings.csv", "embeddings_sorted.csv"]:
+            try:
+                os.remove(filename)
+            except FileNotFoundError:
+                pass

From c8e0a8621b86c39ae3526d98c303704f8023d878 Mon Sep 17 00:00:00 2001
From: MalteEbner <malte.ebner@gmail.com>
Date: Thu, 17 Jun 2021 12:10:08 +0200
Subject: [PATCH 3/3] added better documentation

---
 lightly/cli/config/config.yaml | 2 +-
 lightly/cli/lightly_cli.py     | 1 -
 lightly/cli/train_cli.py       | 3 +++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/lightly/cli/config/config.yaml b/lightly/cli/config/config.yaml
index 2332c4864..775c0296c 100644
--- a/lightly/cli/config/config.yaml
+++ b/lightly/cli/config/config.yaml
@@ -71,7 +71,7 @@ trainer:
   gpus: 1                     # Number of gpus to use for training.
   max_epochs: 100             # Number of epochs to train for.
   precision: 32               # If set to 16, will use half-precision.
-  weights_summary: 'top'      # how to print the model architecture, one of {None, 'top', full},
+  weights_summary: 'top'      # how to print the model architecture, one of {None, top, full},
                                 #see https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html#weights-summary
 
 # checkpoint_callback namespace: Modify the checkpoint callback
diff --git a/lightly/cli/lightly_cli.py b/lightly/cli/lightly_cli.py
index 4b1345ef9..c74015c6c 100644
--- a/lightly/cli/lightly_cli.py
+++ b/lightly/cli/lightly_cli.py
@@ -22,7 +22,6 @@ def _lightly_cli(cfg, is_cli_call=True):
     if cfg['trainer']['max_epochs'] > 0:
         print('#' * 10 + ' Starting to train an embedding model.')
         checkpoint = _train_cli(cfg, is_cli_call)
-        cfg['trainer']['weights_summary'] = None
     else:
         checkpoint = ''
 
diff --git a/lightly/cli/train_cli.py b/lightly/cli/train_cli.py
index 214fe5d75..6750401fa 100644
--- a/lightly/cli/train_cli.py
+++ b/lightly/cli/train_cli.py
@@ -157,6 +157,9 @@ def train_cli(cfg):
         >>>
         >>> # train model for 10 epochs
         >>> lightly-train input_dir=data/ trainer.max_epochs=10
+        >>>
+        >>> # print a full summary of the model
+        >>> lightly-train input_dir=data/ trainer.weights_summary=full
 
     """
     return _train_cli(cfg)