From 16a0b07dcc4089131355c012861d659fff4e9cbb Mon Sep 17 00:00:00 2001 From: MalteEbner Date: Wed, 16 Jun 2021 15:43:36 +0200 Subject: [PATCH 1/3] add config parameter weights_summary --- lightly/cli/config/config.yaml | 2 ++ lightly/cli/lightly_cli.py | 1 + lightly/embedding/_base.py | 1 + 3 files changed, 4 insertions(+) diff --git a/lightly/cli/config/config.yaml b/lightly/cli/config/config.yaml index b7368884b..2332c4864 100644 --- a/lightly/cli/config/config.yaml +++ b/lightly/cli/config/config.yaml @@ -71,6 +71,8 @@ trainer: gpus: 1 # Number of gpus to use for training. max_epochs: 100 # Number of epochs to train for. precision: 32 # If set to 16, will use half-precision. + weights_summary: 'top' # how to print the model architecture, one of {None, 'top', full}, + #see https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html#weights-summary # checkpoint_callback namespace: Modify the checkpoint callback checkpoint_callback: diff --git a/lightly/cli/lightly_cli.py b/lightly/cli/lightly_cli.py index c74015c6c..4b1345ef9 100644 --- a/lightly/cli/lightly_cli.py +++ b/lightly/cli/lightly_cli.py @@ -22,6 +22,7 @@ def _lightly_cli(cfg, is_cli_call=True): if cfg['trainer']['max_epochs'] > 0: print('#' * 10 + ' Starting to train an embedding model.') checkpoint = _train_cli(cfg, is_cli_call) + cfg['trainer']['weights_summary'] = None else: checkpoint = '' diff --git a/lightly/embedding/_base.py b/lightly/embedding/_base.py index dc8ee53a9..e12a1cb93 100644 --- a/lightly/embedding/_base.py +++ b/lightly/embedding/_base.py @@ -78,6 +78,7 @@ def train_embedding(self, **kwargs): min_epochs: (int) Minimum number of epochs to train max_epochs: (int) Maximum number of epochs to train gpus: (int) number of gpus to use + weights_summary: (str) how to print a summary of the model and weights (number, size) Returns: A trained encoder, ready for embedding datasets. From 498d6435f79634cffcbd36c0ed0a28e1064a3e79 Mon Sep 17 00:00:00 2001 From: MalteEbner Date: Thu, 17 Jun 2021 11:52:25 +0200 Subject: [PATCH 2/3] unittests and bugfixes for the option trainer.weights_summary --- lightly/cli/train_cli.py | 5 +++ tests/cli/test_cli_train.py | 75 +++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 tests/cli/test_cli_train.py diff --git a/lightly/cli/train_cli.py b/lightly/cli/train_cli.py index 35cbe838c..214fe5d75 100644 --- a/lightly/cli/train_cli.py +++ b/lightly/cli/train_cli.py @@ -45,11 +45,16 @@ def _train_cli(cfg, is_cli_call=True): torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False + if cfg["trainer"]["weights_summary"] == "None": + cfg["trainer"]["weights_summary"] = None + if torch.cuda.is_available(): device = 'cuda' elif cfg['trainer'] and cfg['trainer']['gpus']: device = 'cpu' cfg['trainer']['gpus'] = 0 + else: + device = 'cpu' if cfg['loader']['batch_size'] < 64: msg = 'Training a self-supervised model with a small batch size: {}! ' diff --git a/tests/cli/test_cli_train.py b/tests/cli/test_cli_train.py new file mode 100644 index 000000000..9bc1ace5f --- /dev/null +++ b/tests/cli/test_cli_train.py @@ -0,0 +1,75 @@ +import os +import re +import sys +import tempfile + +import torchvision +from hydra.experimental import compose, initialize + +import lightly +from tests.api_workflow.mocked_api_workflow_client import MockedApiWorkflowSetup, MockedApiWorkflowClient + + +class TestCLITrain(MockedApiWorkflowSetup): + + @classmethod + def setUpClass(cls) -> None: + sys.modules["lightly.cli.upload_cli"].ApiWorkflowClient = MockedApiWorkflowClient + + def setUp(self): + MockedApiWorkflowSetup.setUp(self) + self.create_fake_dataset() + with initialize(config_path="../../lightly/cli/config", job_name="test_app"): + self.cfg = compose(config_name="config", overrides=[ + "token='123'", + f"input_dir={self.folder_path}", + "trainer.max_epochs=0" + ]) + + def create_fake_dataset(self): + n_data = 5 + self.dataset = torchvision.datasets.FakeData(size=n_data, image_size=(3, 32, 32)) + + self.folder_path = tempfile.mkdtemp() + sample_names = [f'img_{i}.jpg' for i in range(n_data)] + self.sample_names = sample_names + for sample_idx in range(n_data): + data = self.dataset[sample_idx] + path = os.path.join(self.folder_path, sample_names[sample_idx]) + data[0].save(path) + + def parse_cli_string(self, cli_words: str): + cli_words = cli_words.replace("lightly-train ", "") + cli_words = re.split("=| ", cli_words) + assert len(cli_words) % 2 == 0 + dict_keys = cli_words[0::2] + dict_values = cli_words[1::2] + for key, value in zip(dict_keys, dict_values): + value = value.strip('\"') + value = value.strip('\'') + key_parts = key.split(".") + if len(key_parts) == 1: + self.cfg[key_parts[0]]= value + elif len(key_parts) == 2: + self.cfg[key_parts[0]][key_parts[1]] = value + else: + raise ValueError + + def test_parse_cli_string(self): + cli_string = "lightly-train trainer.weights_summary=top" + self.parse_cli_string(cli_string) + assert self.cfg["trainer"]["weights_summary"] == 'top' + + def test_train_weights_summary(self): + for weights_summary in ["None", "top", "full"]: + cli_string = f"lightly-train trainer.weights_summary={weights_summary}" + with self.subTest(cli_string): + self.parse_cli_string(cli_string) + lightly.cli.train_cli(self.cfg) + + def tearDown(self) -> None: + for filename in ["embeddings.csv", "embeddings_sorted.csv"]: + try: + os.remove(filename) + except FileNotFoundError: + pass From c8e0a8621b86c39ae3526d98c303704f8023d878 Mon Sep 17 00:00:00 2001 From: MalteEbner Date: Thu, 17 Jun 2021 12:10:08 +0200 Subject: [PATCH 3/3] added better documentation --- lightly/cli/config/config.yaml | 2 +- lightly/cli/lightly_cli.py | 1 - lightly/cli/train_cli.py | 3 +++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lightly/cli/config/config.yaml b/lightly/cli/config/config.yaml index 2332c4864..775c0296c 100644 --- a/lightly/cli/config/config.yaml +++ b/lightly/cli/config/config.yaml @@ -71,7 +71,7 @@ trainer: gpus: 1 # Number of gpus to use for training. max_epochs: 100 # Number of epochs to train for. precision: 32 # If set to 16, will use half-precision. - weights_summary: 'top' # how to print the model architecture, one of {None, 'top', full}, + weights_summary: 'top' # how to print the model architecture, one of {None, top, full}, #see https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html#weights-summary # checkpoint_callback namespace: Modify the checkpoint callback diff --git a/lightly/cli/lightly_cli.py b/lightly/cli/lightly_cli.py index 4b1345ef9..c74015c6c 100644 --- a/lightly/cli/lightly_cli.py +++ b/lightly/cli/lightly_cli.py @@ -22,7 +22,6 @@ def _lightly_cli(cfg, is_cli_call=True): if cfg['trainer']['max_epochs'] > 0: print('#' * 10 + ' Starting to train an embedding model.') checkpoint = _train_cli(cfg, is_cli_call) - cfg['trainer']['weights_summary'] = None else: checkpoint = '' diff --git a/lightly/cli/train_cli.py b/lightly/cli/train_cli.py index 214fe5d75..6750401fa 100644 --- a/lightly/cli/train_cli.py +++ b/lightly/cli/train_cli.py @@ -157,6 +157,9 @@ def train_cli(cfg): >>> >>> # train model for 10 epochs >>> lightly-train input_dir=data/ trainer.max_epochs=10 + >>> + >>> # print a full summary of the model + >>> lightly-train input_dir=data/ trainer.weights_summary=full """ return _train_cli(cfg)