Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

DOC: Add all InnerEye/ML docstrings to ReadTheDocs #783

Merged
merged 23 commits into from
Aug 16, 2022
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
267d4e8
📝 Create basic for ML API
peterhessey Aug 5, 2022
806f261
📝 Add ML/configs base doc files
peterhessey Aug 5, 2022
0cbd7c7
📝 Finish ML/configs API
peterhessey Aug 8, 2022
f5c82cf
📝 Update augmentations
peterhessey Aug 8, 2022
ac0171a
📝 Add ML/dataset API docs
peterhessey Aug 8, 2022
e9fff6a
📝 Add rst skeleton for ML/models
peterhessey Aug 8, 2022
4fd8de4
📝 Fix docstring missing newlines
peterhessey Aug 8, 2022
6371548
Remove script
peterhessey Aug 8, 2022
27e7784
📝 Finish ML/models API docs
peterhessey Aug 8, 2022
6a1273f
📝 Start ML/SSL API. Fix some formatting issues
peterhessey Aug 9, 2022
562173b
📝 Correct whitespace issues in `:param`
peterhessey Aug 9, 2022
416e907
📝 Fix whitespace errors on `:return` statements
peterhessey Aug 9, 2022
a778dac
📝 Fix :return: statements
peterhessey Aug 9, 2022
33b557c
📝 Finish ML/SSL API
peterhessey Aug 9, 2022
7d4f466
📝 Add ML/utils API docs
peterhessey Aug 9, 2022
19ab5b2
📝 Add visualizer docs, fix `:raise` indents
peterhessey Aug 9, 2022
67169af
📝 Fix more issues with the `:raises:` formatting
peterhessey Aug 9, 2022
7619004
♻️ Restructuring folders
peterhessey Aug 9, 2022
bdc2a51
📝 Limit API `toctree` depth
peterhessey Aug 9, 2022
56c3a52
📝 Add primary InnerEye/ML files API to docs
peterhessey Aug 9, 2022
e9e5ee8
📝 Fix and add `InnerEye/ML/*.py` docs
peterhessey Aug 9, 2022
c1d84a8
⚰️ Remove weird `settings.json` change
peterhessey Aug 9, 2022
9f57a6a
♻️ 💡 Address review comments
peterhessey Aug 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@
},
"files.trimTrailingWhitespace": true,
"files.trimFinalNewlines": true,
"files.insertFinalNewline": true
"files.insertFinalNewline": true,
}
2 changes: 2 additions & 0 deletions InnerEye/Azure/azure_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ def from_yaml(yaml_file_path: Path, project_root: Optional[Path]) -> AzureConfig
"""
Creates an AzureConfig object with default values, with the keys/secrets populated from values in the
given YAML file. If a `project_root` folder is provided, a private settings file is read from there as well.

:param yaml_file_path: Path to the YAML file that contains values to create the AzureConfig
:param project_root: A folder in which to search for a private settings file.
:return: AzureConfig with values populated from the yaml files.
Expand Down Expand Up @@ -231,6 +232,7 @@ def get_service_principal_auth(self) -> Optional[Union[InteractiveLoginAuthentic
def fetch_run(self, run_recovery_id: str) -> Run:
"""
Gets an instantiated Run object for a given run recovery ID (format experiment_name:run_id).

:param run_recovery_id: A run recovery ID (format experiment_name:run_id)
"""
return fetch_run(workspace=self.get_workspace(), run_recovery_id=run_recovery_id)
Expand Down
15 changes: 12 additions & 3 deletions InnerEye/Azure/azure_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def get_git_tags(azure_config: AzureConfig) -> Dict[str, str]:
Creates a dictionary with git-related information, like branch and commit ID. The dictionary key is a string
that can be used as a tag on an AzureML run, the dictionary value is the git information. If git information
is passed in via commandline arguments, those take precedence over information read out from the repository.

:param azure_config: An AzureConfig object specifying git-related commandline args.
:return: A dictionary mapping from tag name to git info.
"""
Expand All @@ -56,6 +57,7 @@ def get_git_tags(azure_config: AzureConfig) -> Dict[str, str]:
def additional_run_tags(azure_config: AzureConfig, commandline_args: str) -> Dict[str, str]:
"""
Gets the set of tags that will be added to the AzureML run as metadata, like git status and user name.

:param azure_config: The configurations for the present AzureML job
:param commandline_args: A string that holds all commandline arguments that were used for the present run.
"""
Expand All @@ -77,6 +79,7 @@ def additional_run_tags(azure_config: AzureConfig, commandline_args: str) -> Dic
def create_experiment_name(azure_config: AzureConfig) -> str:
"""
Gets the name of the AzureML experiment. This is taken from the commandline, or from the git branch.

:param azure_config: The object containing all Azure-related settings.
:return: The name to use for the AzureML experiment.
"""
Expand Down Expand Up @@ -104,7 +107,7 @@ def create_dataset_configs(azure_config: AzureConfig,
:param all_dataset_mountpoints: When using the datasets in AzureML, these are the per-dataset mount points.
:param all_local_datasets: The paths for all local versions of the datasets.
:return: A list of DatasetConfig objects, in the same order as datasets were provided in all_azure_dataset_ids,
omitting datasets with an empty name.
omitting datasets with an empty name.
"""
datasets: List[DatasetConfig] = []
num_local = len(all_local_datasets)
Expand Down Expand Up @@ -147,6 +150,7 @@ def create_runner_parser(model_config_class: type = None) -> argparse.ArgumentPa
"""
Creates a commandline parser, that understands all necessary arguments for running a script in Azure,
plus all arguments for the given class. The class must be a subclass of GenericConfig.

:param model_config_class: A class that contains the model-specific parameters.
:return: An instance of ArgumentParser.
"""
Expand All @@ -167,11 +171,12 @@ def parse_args_and_add_yaml_variables(parser: ArgumentParser,
"""
Reads arguments from sys.argv, modifies them with secrets from local YAML files,
and parses them using the given argument parser.

:param project_root: The root folder for the whole project. Only used to access a private settings file.
:param parser: The parser to use.
:param yaml_config_file: The path to the YAML file that contains values to supply into sys.argv.
:param fail_on_unknown_args: If True, raise an exception if the parser encounters an argument that it does not
recognize. If False, unrecognized arguments will be ignored, and added to the "unknown" field of the parser result.
recognize. If False, unrecognized arguments will be ignored, and added to the "unknown" field of the parser result.
:return: The parsed arguments, and overrides
"""
settings_from_yaml = read_all_settings(yaml_config_file, project_root=project_root)
Expand All @@ -183,6 +188,7 @@ def parse_args_and_add_yaml_variables(parser: ArgumentParser,
def _create_default_namespace(parser: ArgumentParser) -> Namespace:
"""
Creates an argparse Namespace with all parser-specific default values set.

:param parser: The parser to work with.
:return:
"""
Expand All @@ -207,10 +213,12 @@ def parse_arguments(parser: ArgumentParser,
Parses a list of commandline arguments with a given parser, and adds additional information read
from YAML files. Returns results broken down into a full arguments dictionary, a dictionary of arguments
that were set to non-default values, and unknown arguments.

:param parser: The parser to use
:param settings_from_yaml: A dictionary of settings read from a YAML config file.
:param fail_on_unknown_args: If True, raise an exception if the parser encounters an argument that it does not
recognize. If False, unrecognized arguments will be ignored, and added to the "unknown" field of the parser result.
recognize. If False, unrecognized arguments will be ignored, and added to the "unknown" field of the parser result.

:param args: Arguments to parse. If not given, use those in sys.argv
:return: The parsed arguments, and overrides
"""
Expand Down Expand Up @@ -261,6 +269,7 @@ def run_duration_string_to_seconds(s: str) -> Optional[int]:
Parse a string that represents a timespan, and returns it converted into seconds. The string is expected to be
floating point number with a single character suffix s, m, h, d for seconds, minutes, hours, day.
Examples: '3.5h', '2d'. If the argument is an empty string, None is returned.

:param s: The string to parse.
:return: The timespan represented in the string converted to seconds.
"""
Expand Down
17 changes: 15 additions & 2 deletions InnerEye/Azure/azure_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def split_recovery_id(id: str) -> Tuple[str, str]:
The argument can be in the format 'experiment_name:run_id',
or just a run ID like user_branch_abcde12_123. In the latter case, everything before the last
two alphanumeric parts is assumed to be the experiment name.

:param id:
:return: experiment name and run name
"""
Expand Down Expand Up @@ -74,9 +75,10 @@ def fetch_run(workspace: Workspace, run_recovery_id: str) -> Run:
Finds an existing run in an experiment, based on a recovery ID that contains the experiment ID
and the actual RunId. The run can be specified either in the experiment_name:run_id format,
or just the run_id.

:param workspace: the configured AzureML workspace to search for the experiment.
:param run_recovery_id: The Run to find. Either in the full recovery ID format, experiment_name:run_id
or just the run_id
or just the run_id
:return: The AzureML run.
"""
return get_aml_run_from_run_id(aml_workspace=workspace, run_id=run_recovery_id)
Expand All @@ -85,6 +87,7 @@ def fetch_run(workspace: Workspace, run_recovery_id: str) -> Run:
def fetch_runs(experiment: Experiment, filters: List[str]) -> List[Run]:
"""
Fetch the runs in an experiment.

:param experiment: the experiment to fetch runs from
:param filters: a list of run status to include. Must be subset of [Running, Completed, Failed, Canceled].
:return: the list of runs in the experiment
Expand All @@ -107,10 +110,11 @@ def fetch_child_runs(
"""
Fetch child runs for the provided runs that have the provided AML status (or fetch all by default)
and have a run_recovery_id tag value set (this is to ignore superfluous AML infrastructure platform runs).

:param run: parent run to fetch child run from
:param status: if provided, returns only child runs with this status
:param expected_number_cross_validation_splits: when recovering child runs from AML hyperdrive
sometimes the get_children function fails to retrieve all children. If the number of child runs
sometimes the get_children function fails to retrieve all children. If the number of child runs
retrieved by AML is lower than the expected number of splits, we try to retrieve them manually.
"""
if is_ensemble_run(run):
Expand Down Expand Up @@ -159,6 +163,7 @@ def to_azure_friendly_string(x: Optional[str]) -> Optional[str]:
def to_azure_friendly_container_path(path: Path) -> str:
"""
Converts a path an Azure friendly container path by replacing "\\", "//" with "/" so it can be in the form: a/b/c.

:param path: Original path
:return: Converted path
"""
Expand All @@ -168,6 +173,7 @@ def to_azure_friendly_container_path(path: Path) -> str:
def is_offline_run_context(run_context: Run) -> bool:
"""
Tells if a run_context is offline by checking if it has an experiment associated with it.

:param run_context: Context of the run to check
:return:
"""
Expand All @@ -177,6 +183,7 @@ def is_offline_run_context(run_context: Run) -> bool:
def get_run_context_or_default(run: Optional[Run] = None) -> Run:
"""
Returns the context of the run, if run is not None. If run is None, returns the context of the current run.

:param run: Run to retrieve context for. If None, retrieve ocntext of current run.
:return: Run context
"""
Expand All @@ -186,6 +193,7 @@ def get_run_context_or_default(run: Optional[Run] = None) -> Run:
def get_cross_validation_split_index(run: Run) -> int:
"""
Gets the cross validation index from the run's tags or returns the default

:param run: Run context from which to get index
:return: The cross validation split index
"""
Expand All @@ -204,6 +212,7 @@ def is_cross_validation_child_run(run: Run) -> bool:
"""
Checks the provided run's tags to determine if it is a cross validation child run
(which is the case if the split index >=0)

:param run: Run to check.
:return: True if cross validation run. False otherwise.
"""
Expand All @@ -213,6 +222,7 @@ def is_cross_validation_child_run(run: Run) -> bool:
def strip_prefix(string: str, prefix: str) -> str:
"""
Returns the string without the prefix if it has the prefix, otherwise the string unchanged.

:param string: Input string.
:param prefix: Prefix to remove from input string.
:return: Input string with prefix removed.
Expand All @@ -226,6 +236,7 @@ def get_all_environment_files(project_root: Path) -> List[Path]:
"""
Returns a list of all Conda environment files that should be used. This is firstly the InnerEye conda file,
and possibly a second environment.yml file that lives at the project root folder.

:param project_root: The root folder of the code that starts the present training run.
:return: A list with 1 or 2 entries that are conda environment files.
"""
Expand Down Expand Up @@ -260,6 +271,7 @@ def download_run_output_file(blob_path: Path, destination: Path, run: Run) -> Pa
Downloads a single file from the run's default output directory: DEFAULT_AML_UPLOAD_DIR ("outputs").
For example, if blobs_path = "foo/bar.csv", then the run result file "outputs/foo/bar.csv" will be downloaded
to <destination>/bar.csv (the directory will be stripped off).

:param blob_path: The name of the file to download.
:param run: The AzureML run to download the files from
:param destination: Local path to save the downloaded blob to.
Expand Down Expand Up @@ -287,6 +299,7 @@ def download_run_outputs_by_prefix(
have a given prefix (folder structure). When saving, the prefix string will be stripped off. For example,
if blobs_prefix = "foo", and the run has a file "outputs/foo/bar.csv", it will be downloaded to destination/bar.csv.
If there is in addition a file "foo.txt", that file will be skipped.

:param blobs_prefix: The prefix for all files in "outputs" that should be downloaded.
:param run: The AzureML run to download the files from.
:param destination: Local path to save the downloaded blobs to.
Expand Down
3 changes: 3 additions & 0 deletions InnerEye/Azure/parser_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
def _is_empty(item: Any) -> bool:
"""
Returns True if the argument has length 0.

:param item: Object to check.
:return: True if the argument has length 0. False otherwise.
"""
Expand All @@ -18,6 +19,7 @@ def _is_empty(item: Any) -> bool:
def _is_empty_or_empty_string_list(item: Any) -> bool:
"""
Returns True if the argument has length 0, or a list with a single element that has length 0.

:param item: Object to check.
:return: True if argument has length 0, or a list with a single element that has length 0. False otherwise.
"""
Expand All @@ -32,6 +34,7 @@ def value_to_string(x: object) -> str:
"""
Returns a string representation of x, with special treatment of Enums (return their value)
and lists (return comma-separated list).

:param x: Object to convert to string
:return: The string representation of the object.
Special cases: For Enums, returns their value, for lists, returns a comma-separated list.
Expand Down
4 changes: 3 additions & 1 deletion InnerEye/Azure/run_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
def run_pytest(pytest_mark: str, outputs_folder: Path) -> Tuple[bool, Path]:
"""
Runs pytest on the whole test suite, restricting to the tests that have the given PyTest mark.

:param pytest_mark: The PyTest mark to use for filtering out the tests to run.
:param outputs_folder: The folder into which the test result XML file should be written.
:return: True if PyTest found tests to execute and completed successfully, False otherwise.
Also returns the path to the generated PyTest results file.
Also returns the path to the generated PyTest results file.
"""
from _pytest.main import ExitCode
_outputs_file = outputs_folder / PYTEST_RESULTS_FILE
Expand All @@ -43,6 +44,7 @@ def download_pytest_result(run: Run, destination_folder: Path = Path.cwd()) -> P
"""
Downloads the pytest result file that is stored in the output folder of the given AzureML run.
If there is no pytest result file, throw an Exception.

:param run: The run from which the files should be read.
:param destination_folder: The folder into which the PyTest result file is downloaded.
:return: The path (folder and filename) of the downloaded file.
Expand Down
17 changes: 12 additions & 5 deletions InnerEye/Azure/secrets_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class SecretsHandling:
def __init__(self, project_root: Path) -> None:
"""
Creates a new instance of the class.

:param project_root: The root folder of the project that starts the InnerEye run.
"""
self.project_root = project_root
Expand All @@ -36,8 +37,9 @@ def read_secrets_from_file(self, secrets_to_read: List[str]) -> Optional[Dict[st
"""
Reads the secrets from file in YAML format, and returns the contents as a dictionary. The YAML file is expected
in the project root directory.

:param secrets_to_read: The list of secret names to read from the YAML file. These will be converted to
uppercase.
uppercase.
:return: A dictionary with secrets, or None if the file does not exist.
"""
secrets_file = self.project_root / fixed_paths.PROJECT_SECRETS_FILE
Expand All @@ -57,8 +59,9 @@ def get_secrets_from_environment_or_file(self, secrets_to_read: List[str]) -> Di
Attempts to read secrets from the project secret file. If there is no secrets file, it returns all secrets
in secrets_to_read read from environment variables. When reading from environment, if an expected
secret is not found, its value will be None.

:param secrets_to_read: The list of secret names to read from the YAML file. These will be converted to
uppercase.
uppercase.
"""
# Read all secrets from a local file if present, and sets the matching environment variables.
# If no secrets file is present, no environment variable is modified or created.
Expand All @@ -69,9 +72,10 @@ def get_secrets_from_environment_or_file(self, secrets_to_read: List[str]) -> Di
def get_secret_from_environment(self, name: str, allow_missing: bool = False) -> Optional[str]:
"""
Gets a password or key from the secrets file or environment variables.

:param name: The name of the environment variable to read. It will be converted to uppercase.
:param allow_missing: If true, the function returns None if there is no entry of the given name in
any of the places searched. If false, missing entries will raise a ValueError.
any of the places searched. If false, missing entries will raise a ValueError.
:return: Value of the secret. None, if there is no value and allow_missing is True.
"""

Expand Down Expand Up @@ -99,10 +103,11 @@ def read_all_settings(project_settings_file: Optional[Path] = None,
the `project_root` folder. Settings in the private settings file
override those in the project settings. Both settings files are expected in YAML format, with an entry called
'variables'.

:param project_settings_file: The first YAML settings file to read.
:param project_root: The folder that can contain a 'InnerEyePrivateSettings.yml' file.
:return: A dictionary mapping from string to variable value. The dictionary key is the union of variable names
found in the two settings files.
found in the two settings files.
"""
private_settings_file = None
if project_root and project_root.is_dir():
Expand All @@ -117,10 +122,11 @@ def read_settings_and_merge(project_settings_file: Optional[Path] = None,
file is read into a dictionary, then the private settings file is read. Settings in the private settings file
override those in the project settings. Both settings files are expected in YAML format, with an entry called
'variables'.

:param project_settings_file: The first YAML settings file to read.
:param private_settings_file: The second YAML settings file to read. Settings in this file has higher priority.
:return: A dictionary mapping from string to variable value. The dictionary key is the union of variable names
found in the two settings files.
found in the two settings files.
"""
result = dict()
if project_settings_file:
Expand All @@ -138,6 +144,7 @@ def read_settings_yaml_file(yaml_file: Path) -> Dict[str, Any]:
"""
Reads a YAML file, that is expected to contain an entry 'variables'. Returns the dictionary for the 'variables'
section of the file.

:param yaml_file: The yaml file to read.
:return: A dictionary with the variables from the yaml file.
"""
Expand Down
4 changes: 3 additions & 1 deletion InnerEye/Azure/tensorboard_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def validate(self) -> None:
def monitor(monitor_config: AMLTensorBoardMonitorConfig, azure_config: AzureConfig) -> None:
"""
Starts TensorBoard monitoring as per the provided arguments.

:param monitor_config: The config containing information on which runs that need be monitored.
:param azure_config: An AzureConfig object with secrets/keys to access the workspace.
"""
Expand Down Expand Up @@ -93,9 +94,10 @@ def main(settings_yaml_file: Optional[Path] = None,
"""
Parses the commandline arguments, and based on those, starts the Tensorboard monitoring for the AzureML runs
supplied on the commandline.

:param settings_yaml_file: The YAML file that contains all information for accessing Azure.
:param project_root: The root folder that contains all code for the present run. This is only used to locate
a private settings file InnerEyePrivateSettings.yml.
a private settings file InnerEyePrivateSettings.yml.
"""
monitor_config = AMLTensorBoardMonitorConfig.parse_args()
settings_yaml_file = settings_yaml_file or monitor_config.settings
Expand Down
Loading