Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Reduce AML snapshot size by skipping test folders #497

Merged
merged 7 commits into from
Jun 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion .amlignore
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
.idea
.git
.azureml
.pytest_cache
.mypy_cache
.github
.amlignore
.coveragerc
.editorconfig
.flake8
.gitattributes
.gitconfig
.gitignore
.gitmodules
CODE_OF_CONDUCT.md
GeoPol.xml
most_recent_run.txt
mypy.ini
mypy_runner.py
pull_request_template.md
SECURITY.md
__pycache__
azure-pipelines
.github
datasets
docs
sphinx-docs
modelweights
outputs
logs
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ jobs that run in AzureML.

### Changed
- ([#496](https://github.com/microsoft/InnerEye-DeepLearning/pull/496)) All plots are now saved as PNG, rather than JPG.
- ([#497](https://github.com/microsoft/InnerEye-DeepLearning/pull/497)) Reducing the size of the code snapshot that
gets uploaded to AzureML, by skipping all test folders.

### Fixed

Expand Down
14 changes: 14 additions & 0 deletions InnerEye/Common/common_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pathlib import Path
from typing import Any, Callable, Generator, Iterable, List, Optional, Union

from InnerEye.Common import fixed_paths
from InnerEye.Common.fixed_paths import repository_root_directory
from InnerEye.Common.type_annotations import PathOrString
from InnerEye.ML.common import ModelExecutionMode
Expand Down Expand Up @@ -415,3 +416,16 @@ def change_working_directory(path_or_str: PathOrString) -> Generator:
os.chdir(new_path)
yield
os.chdir(old_path)


@contextmanager
def append_to_amlignore(lines_to_append: List[str]) -> Generator:
"""
Context manager that appends lines to the .amlignore file, and reverts to the previous contents after.
"""
amlignore = fixed_paths.repository_root_directory(".amlignore")
old_contents = amlignore.read_text()
new_contents = old_contents.splitlines() + lines_to_append
amlignore.write_text("\n".join(new_contents))
yield
amlignore.write_text(old_contents)
17 changes: 11 additions & 6 deletions InnerEye/ML/configs/other/HelloContainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,20 @@
import numpy as np
import torch
from pytorch_lightning import LightningDataModule, LightningModule
from pytorch_lightning.metrics import MeanAbsoluteError
from torch.optim import Adam, Optimizer
from torch.optim.lr_scheduler import StepLR, _LRScheduler
from torch.utils.data import DataLoader, Dataset
from pytorch_lightning.metrics import MeanAbsoluteError

from InnerEye.Common import fixed_paths_for_tests
from InnerEye.Common import fixed_paths
from InnerEye.ML.lightning_container import LightningContainer


class HelloDataset(Dataset):
"""
A simple 1dim regression task, read from a data file stored in the test data folder.
"""

# Creating the data file:
# import numpy as np
# import torch
Expand All @@ -29,7 +30,7 @@ class HelloDataset(Dataset):
# x = torch.rand((N, 1)) * 10
# y = 0.2 * x + 0.1 * torch.randn(x.size())
# xy = torch.cat((x, y), dim=1)
# np.savetxt("Tests/ML/test_data/hellocontainer.csv", xy.numpy(), delimiter=",")
# np.savetxt("InnerEye/ML/configs/other/hellocontainer.csv", xy.numpy(), delimiter=",")
def __init__(self, root_folder: Path, start_index: int, end_index: int) -> None:
"""
Creates the 1-dim regression dataset.
Expand All @@ -38,7 +39,7 @@ def __init__(self, root_folder: Path, start_index: int, end_index: int) -> None:
:param end_index: The last row to read (exclusive)
"""
super().__init__()
raw_data = np.loadtxt(root_folder / "hellocontainer.csv", delimiter=",")[start_index:end_index]
raw_data = np.loadtxt(str(root_folder / "hellocontainer.csv"), delimiter=",")[start_index:end_index]
self.data = torch.tensor(raw_data, dtype=torch.float)

def __len__(self) -> int:
Expand All @@ -52,6 +53,7 @@ class HelloDataModule(LightningDataModule):
"""
A data module that gives the training, validation and test data for a simple 1-dim regression task.
"""

def __init__(self, root_folder: Path) -> None:
super().__init__()
self.train = HelloDataset(root_folder, start_index=0, end_index=50)
Expand All @@ -72,6 +74,7 @@ class HelloRegression(LightningModule):
"""
A simple 1-dim regression model.
"""

def __init__(self) -> None:
super().__init__()
self.model = torch.nn.Linear(in_features=1, out_features=1, bias=True)
Expand Down Expand Up @@ -101,7 +104,8 @@ def training_step(self, batch: Dict[str, torch.Tensor], *args: Any, **kwargs: An
self.log("loss", loss, on_epoch=True, on_step=False)
return loss

def validation_step(self, batch: Dict[str, torch.Tensor], *args: Any, **kwargs: Any) -> torch.Tensor: # type: ignore
def validation_step(self, batch: Dict[str, torch.Tensor], *args: Any, # type: ignore
**kwargs: Any) -> torch.Tensor:
"""
This method is part of the standard PyTorch Lightning interface. For an introduction, please see
https://pytorch-lightning.readthedocs.io/en/stable/starter/converting.html
Expand Down Expand Up @@ -187,9 +191,10 @@ class HelloContainer(LightningContainer):
You can train this model by running `python InnerEye/ML/runner.py --model=HelloContainer` on the local box,
or via `python InnerEye/ML/runner.py --model=HelloContainer --azureml=True` in AzureML
"""

def __init__(self) -> None:
super().__init__()
self.local_dataset = fixed_paths_for_tests.full_ml_test_data_path()
self.local_dataset = fixed_paths.repository_root_directory() / "InnerEye" / "ML" / "configs" / "other"
self.num_epochs = 20

# This method must be overridden by any subclass of LightningContainer. It returns the model that you wish to
Expand Down
16 changes: 12 additions & 4 deletions InnerEye/ML/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from InnerEye.Azure.run_pytest import download_pytest_result, run_pytest
from InnerEye.Common import fixed_paths
from InnerEye.Common.common_util import FULL_METRICS_DATAFRAME_FILE, METRICS_AGGREGATES_FILE, \
disable_logging_to_file, is_linux, logging_to_stdout
append_to_amlignore, disable_logging_to_file, is_linux, logging_to_stdout
from InnerEye.Common.generic_parsing import GenericConfig
from InnerEye.ML.common import DATASET_CSV_FILE_NAME
from InnerEye.ML.deep_learning_config import DeepLearningConfig
Expand Down Expand Up @@ -229,9 +229,17 @@ def submit_to_azureml(self) -> Run:
upload_timeout_seconds=86400,
)
source_config.set_script_params_except_submit_flag()
azure_run = submit_to_azureml(self.azure_config, source_config,
self.lightning_container.all_azure_dataset_ids(),
self.lightning_container.all_dataset_mountpoints())
# Reduce the size of the snapshot by adding unused folders to amlignore. The Test* subfolders are only needed
# when running pytest.
ignored_folders = []
if not self.azure_config.pytest_mark:
ignored_folders.extend(["Tests", "TestsOutsidePackage", "TestSubmodule"])
if not self.lightning_container.regression_test_folder:
ignored_folders.append("RegressionTestResults")
with append_to_amlignore(ignored_folders):
azure_run = submit_to_azureml(self.azure_config, source_config,
self.lightning_container.all_azure_dataset_ids(),
self.lightning_container.all_dataset_mountpoints())
logging.info("Job submission to AzureML done.")
if self.azure_config.pytest_mark and self.azure_config.wait_for_completion:
# The AzureML job can optionally run pytest. Attempt to download it to the current directory.
Expand Down
25 changes: 21 additions & 4 deletions Tests/Common/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@

import pytest

from InnerEye.Common import common_util
from InnerEye.Common.common_util import change_working_directory, check_is_any_of, is_private_field_name, \
namespace_to_path, \
path_to_namespace, print_exception
from InnerEye.Common import common_util, fixed_paths
from InnerEye.Common.common_util import (append_to_amlignore, change_working_directory, check_is_any_of,
is_private_field_name, namespace_to_path, path_to_namespace, print_exception)
from InnerEye.Common.fixed_paths_for_tests import full_ml_test_data_path, tests_root_directory
from InnerEye.Common.output_directories import OutputFolderForTests

Expand Down Expand Up @@ -124,3 +123,21 @@ def test_change_dir(test_output_dirs: OutputFolderForTests) -> None:
Path("bar.txt").touch()
assert Path.cwd() == test_output_dirs.root_dir
assert (new_dir / "bar.txt").is_file()


def test_modify_amlignore() -> None:
"""
Test that we can change the .AMLignore file and change it back to what it was before.
"""
folder1 = "Added1"
folder2 = "Added2"
added_folders = [folder1, folder2]
amlignore = fixed_paths.repository_root_directory(".amlignore")
old_contents = amlignore.read_text()
for f in added_folders:
assert f not in old_contents
with append_to_amlignore(added_folders):
new_contents = amlignore.read_text()
for f in added_folders:
assert f in new_contents
assert amlignore.read_text() == old_contents