microsoft · ant0nsc · Jun 22, 2021 · Jun 21, 2021 · Jun 21, 2021 · Jun 21, 2021
diff --git a/.amlignore b/.amlignore
@@ -1,11 +1,29 @@
+.idea
 .git
 .azureml
 .pytest_cache
 .mypy_cache
+.github
+.amlignore
+.coveragerc
+.editorconfig
+.flake8
+.gitattributes
+.gitconfig
+.gitignore
+.gitmodules
+CODE_OF_CONDUCT.md
+GeoPol.xml
+most_recent_run.txt
+mypy.ini
+mypy_runner.py
+pull_request_template.md
+SECURITY.md
 __pycache__
 azure-pipelines
-.github
 datasets
+docs
+sphinx-docs
 modelweights
 outputs
 logs

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,8 @@ jobs that run in AzureML.
 
 ### Changed
 - ([#496](https://github.com/microsoft/InnerEye-DeepLearning/pull/496)) All plots are now saved as PNG, rather than JPG.
+- ([#497](https://github.com/microsoft/InnerEye-DeepLearning/pull/497)) Reducing the size of the code snapshot that
+gets uploaded to AzureML, by skipping all test folders.
 
 ### Fixed
 

diff --git a/InnerEye/Common/common_util.py b/InnerEye/Common/common_util.py
@@ -14,6 +14,7 @@
 from pathlib import Path
 from typing import Any, Callable, Generator, Iterable, List, Optional, Union
 
+from InnerEye.Common import fixed_paths
 from InnerEye.Common.fixed_paths import repository_root_directory
 from InnerEye.Common.type_annotations import PathOrString
 from InnerEye.ML.common import ModelExecutionMode
@@ -415,3 +416,16 @@ def change_working_directory(path_or_str: PathOrString) -> Generator:
     os.chdir(new_path)
     yield
     os.chdir(old_path)
+
+
+@contextmanager
+def append_to_amlignore(lines_to_append: List[str]) -> Generator:
+    """
+    Context manager that appends lines to the .amlignore file, and reverts to the previous contents after.
+    """
+    amlignore = fixed_paths.repository_root_directory(".amlignore")
+    old_contents = amlignore.read_text()
+    new_contents = old_contents.splitlines() + lines_to_append
+    amlignore.write_text("\n".join(new_contents))
+    yield
+    amlignore.write_text(old_contents)
diff --git a/InnerEye/ML/configs/other/HelloContainer.py b/InnerEye/ML/configs/other/HelloContainer.py
@@ -8,19 +8,20 @@
 import numpy as np
 import torch
 from pytorch_lightning import LightningDataModule, LightningModule
+from pytorch_lightning.metrics import MeanAbsoluteError
 from torch.optim import Adam, Optimizer
 from torch.optim.lr_scheduler import StepLR, _LRScheduler
 from torch.utils.data import DataLoader, Dataset
-from pytorch_lightning.metrics import MeanAbsoluteError
 
-from InnerEye.Common import fixed_paths_for_tests
+from InnerEye.Common import fixed_paths
 from InnerEye.ML.lightning_container import LightningContainer
 
 
 class HelloDataset(Dataset):
     """
     A simple 1dim regression task, read from a data file stored in the test data folder.
     """
+
     # Creating the data file:
     # import numpy as np
     # import torch
@@ -29,7 +30,7 @@ class HelloDataset(Dataset):
     # x = torch.rand((N, 1)) * 10
     # y = 0.2 * x + 0.1 * torch.randn(x.size())
     # xy = torch.cat((x, y), dim=1)
-    # np.savetxt("Tests/ML/test_data/hellocontainer.csv", xy.numpy(), delimiter=",")
+    # np.savetxt("InnerEye/ML/configs/other/hellocontainer.csv", xy.numpy(), delimiter=",")
     def __init__(self, root_folder: Path, start_index: int, end_index: int) -> None:
         """
         Creates the 1-dim regression dataset.
@@ -38,7 +39,7 @@ def __init__(self, root_folder: Path, start_index: int, end_index: int) -> None:
         :param end_index: The last row to read (exclusive)
         """
         super().__init__()
-        raw_data = np.loadtxt(root_folder / "hellocontainer.csv", delimiter=",")[start_index:end_index]
+        raw_data = np.loadtxt(str(root_folder / "hellocontainer.csv"), delimiter=",")[start_index:end_index]
         self.data = torch.tensor(raw_data, dtype=torch.float)
 
     def __len__(self) -> int:
@@ -52,6 +53,7 @@ class HelloDataModule(LightningDataModule):
     """
     A data module that gives the training, validation and test data for a simple 1-dim regression task.
     """
+
     def __init__(self, root_folder: Path) -> None:
         super().__init__()
         self.train = HelloDataset(root_folder, start_index=0, end_index=50)
@@ -72,6 +74,7 @@ class HelloRegression(LightningModule):
     """
     A simple 1-dim regression model.
     """
+
     def __init__(self) -> None:
         super().__init__()
         self.model = torch.nn.Linear(in_features=1, out_features=1, bias=True)
@@ -101,7 +104,8 @@ def training_step(self, batch: Dict[str, torch.Tensor], *args: Any, **kwargs: An
         self.log("loss", loss, on_epoch=True, on_step=False)
         return loss
 
-    def validation_step(self, batch: Dict[str, torch.Tensor], *args: Any, **kwargs: Any) -> torch.Tensor:  # type: ignore
+    def validation_step(self, batch: Dict[str, torch.Tensor], *args: Any,  # type: ignore
+                        **kwargs: Any) -> torch.Tensor:
         """
         This method is part of the standard PyTorch Lightning interface. For an introduction, please see
         https://pytorch-lightning.readthedocs.io/en/stable/starter/converting.html
@@ -187,9 +191,10 @@ class HelloContainer(LightningContainer):
     You can train this model by running `python InnerEye/ML/runner.py --model=HelloContainer` on the local box,
     or via `python InnerEye/ML/runner.py --model=HelloContainer --azureml=True` in AzureML
     """
+
     def __init__(self) -> None:
         super().__init__()
-        self.local_dataset = fixed_paths_for_tests.full_ml_test_data_path()
+        self.local_dataset = fixed_paths.repository_root_directory() / "InnerEye" / "ML" / "configs" / "other"
         self.num_epochs = 20
 
     # This method must be overridden by any subclass of LightningContainer. It returns the model that you wish to

diff --git a/Tests/ML/test_data/hellocontainer.csv → InnerEye/ML/configs/other/hellocontainer.csv b/Tests/ML/test_data/hellocontainer.csv → InnerEye/ML/configs/other/hellocontainer.csv
diff --git a/InnerEye/ML/runner.py b/InnerEye/ML/runner.py
@@ -41,7 +41,7 @@
 from InnerEye.Azure.run_pytest import download_pytest_result, run_pytest
 from InnerEye.Common import fixed_paths
 from InnerEye.Common.common_util import FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE, \
-    disable_logging_to_file, is_linux, logging_to_stdout
+    append_to_amlignore, disable_logging_to_file, is_linux, logging_to_stdout
 from InnerEye.Common.generic_parsing import GenericConfig
 from InnerEye.ML.common import DATASET_CSV_FILE_NAME
 from InnerEye.ML.deep_learning_config import DeepLearningConfig
@@ -229,9 +229,17 @@ def submit_to_azureml(self) -> Run:
             upload_timeout_seconds=86400,
         )
         source_config.set_script_params_except_submit_flag()
-        azure_run = submit_to_azureml(self.azure_config, source_config,
-                                      self.lightning_container.all_azure_dataset_ids(),
-                                      self.lightning_container.all_dataset_mountpoints())
+        # Reduce the size of the snapshot by adding unused folders to amlignore. The Test* subfolders are only needed
+        # when running pytest.
+        ignored_folders = []
+        if not self.azure_config.pytest_mark:
+            ignored_folders.extend(["Tests", "TestsOutsidePackage", "TestSubmodule"])
+        if not self.lightning_container.regression_test_folder:
+            ignored_folders.append("RegressionTestResults")
+        with append_to_amlignore(ignored_folders):
+            azure_run = submit_to_azureml(self.azure_config, source_config,
+                                          self.lightning_container.all_azure_dataset_ids(),
+                                          self.lightning_container.all_dataset_mountpoints())
         logging.info("Job submission to AzureML done.")
         if self.azure_config.pytest_mark and self.azure_config.wait_for_completion:
             # The AzureML job can optionally run pytest. Attempt to download it to the current directory.

diff --git a/Tests/Common/test_util.py b/Tests/Common/test_util.py
@@ -7,10 +7,9 @@
 
 import pytest
 
-from InnerEye.Common import common_util
-from InnerEye.Common.common_util import change_working_directory, check_is_any_of, is_private_field_name, \
-    namespace_to_path, \
-    path_to_namespace, print_exception
+from InnerEye.Common import common_util, fixed_paths
+from InnerEye.Common.common_util import (append_to_amlignore, change_working_directory, check_is_any_of,
+                                         is_private_field_name, namespace_to_path, path_to_namespace, print_exception)
 from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path, tests_root_directory
 from InnerEye.Common.output_directories import OutputFolderForTests
 
@@ -124,3 +123,21 @@ def test_change_dir(test_output_dirs: OutputFolderForTests) -> None:
         Path("bar.txt").touch()
     assert Path.cwd() == test_output_dirs.root_dir
     assert (new_dir / "bar.txt").is_file()
+
+
+def test_modify_amlignore() -> None:
+    """
+    Test that we can change the .AMLignore file and change it back to what it was before.
+    """
+    folder1 = "Added1"
+    folder2 = "Added2"
+    added_folders = [folder1, folder2]
+    amlignore = fixed_paths.repository_root_directory(".amlignore")
+    old_contents = amlignore.read_text()
+    for f in added_folders:
+        assert f not in old_contents
+    with append_to_amlignore(added_folders):
+        new_contents = amlignore.read_text()
+        for f in added_folders:
+            assert f in new_contents
+    assert amlignore.read_text() == old_contents