microsoft · dumbledad · Jul 5, 2021 · May 21, 2021 · May 24, 2021 · May 25, 2021
diff --git a/.gitignore b/.gitignore
@@ -42,6 +42,7 @@ wheels/
 .installed.cfg
 *.egg
 MANIFEST
+packages-microsoft-prod.deb
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,8 @@ created.
 ## Upcoming
 
 ### Added
+- ([#465](https://github.com/microsoft/InnerEye-DeepLearning/pull/465/)) Adding ability to run segmentation inference
+module in the test data without or partial ground truth files.
 - ([#492](https://github.com/microsoft/InnerEye-DeepLearning/pull/492)) Adding capability for regression tests for test
 jobs that run in AzureML.
 

diff --git a/InnerEye/ML/config.py b/InnerEye/ML/config.py
@@ -474,17 +474,25 @@ class SegmentationModelBase(ModelConfigBase):
     is_plotting_enabled: bool = param.Boolean(True, doc="If true, various overview plots with results are generated "
                                                         "during model evaluation. Set to False if you see "
                                                         "non-deterministic pull request build failures.")
+
     show_patch_sampling: int = param.Integer(1, bounds=(0, None),
                                              doc="Number of patients from the training set for which the effect of"
                                                  "patch sampling will be shown. Nifti images and thumbnails for each"
                                                  "of the first N subjects in the training set will be "
                                                  "written to the outputs folder.")
+
     #: If true an error is raised in InnerEye.ML.utils.io_util.load_labels_from_dataset_source if the labels are not
     #: mutually exclusive. Some loss functions (e.g. SoftDice) may produce results on overlapping labels, but others (e.g.
     #: FocalLoss) will fail with a cryptic error message. Set to false if you are sure that you want to use labels that
     #: are not mutually exclusive.
     check_exclusive: bool = param.Boolean(True, doc="Raise an error if the segmentation labels are not mutually exclusive.")
 
+    allow_incomplete_labels: bool = param.Boolean(
+        default=False,
+        doc="If False, the default, then test patient data must include all of the ground truth labels. If true then "
+        "some test patient data with missing ground truth data is allowed and will be reflected in the patient "
+        "counts in the metrics and report.")
+
     def __init__(self, center_size: Optional[TupleInt3] = None,
                  inference_stride_size: Optional[TupleInt3] = None,
                  min_l_rate: float = 0,

diff --git a/InnerEye/ML/dataset/full_image_dataset.py b/InnerEye/ML/dataset/full_image_dataset.py
@@ -216,7 +216,7 @@ def __init__(self, args: SegmentationModelBase, data_frame: pd.DataFrame,
                  full_image_sample_transforms: Optional[Compose3D[Sample]] = None):
         super().__init__(args, data_frame)
         self.full_image_sample_transforms = full_image_sample_transforms
-
+        self.allow_incomplete_labels = args.allow_incomplete_labels
         # Check base_path
         assert self.args.local_dataset is not None
         if not self.args.local_dataset.is_dir():
@@ -250,7 +250,8 @@ def _extension_from_df_file_paths(file_paths: List[str]) -> str:
     def get_samples_at_index(self, index: int) -> List[Sample]:
         # load the channels into memory
         ds = self.dataset_sources[self.dataset_indices[index]]
-        samples = [io_util.load_images_from_dataset_source(dataset_source=ds, check_exclusive=self.args.check_exclusive)]  # type: ignore
+        samples = [io_util.load_images_from_dataset_source(dataset_source=ds,
+                                                           check_exclusive=self.args.check_exclusive)]  # type: ignore
         return [Compose3D.apply(self.full_image_sample_transforms, x) for x in samples]
 
     def _load_dataset_sources(self) -> Dict[str, PatientDatasetSource]:
@@ -259,34 +260,40 @@ def _load_dataset_sources(self) -> Dict[str, PatientDatasetSource]:
                                     local_dataset_root_folder=self.args.local_dataset,
                                     image_channels=self.args.image_channels,
                                     ground_truth_channels=self.args.ground_truth_ids,
-                                    mask_channel=self.args.mask_id
-                                    )
+                                    mask_channel=self.args.mask_id,
+                                    allow_incomplete_labels=self.allow_incomplete_labels)
 
 
 def convert_channels_to_file_paths(channels: List[str],
                                    rows: pd.DataFrame,
                                    local_dataset_root_folder: Path,
-                                   patient_id: str) -> Tuple[List[Path], str]:
+                                   patient_id: str,
+                                   allow_incomplete_labels: bool = False) -> Tuple[List[Optional[Path]], str]:
     """
-    Returns: 1) The full path for files specified in the training, validation and testing datasets, and
-             2) Missing channels or missing files.
+    Returns: 1) A list of path file objects specified in the training, validation and testing datasets, and
+             2) a string with description of missing channels, files and more than one channel per patient.
 
     :param channels: channel type defined in the configuration file
     :param rows: Input Pandas dataframe object containing subjectIds, path of local dataset, channel information
     :param local_dataset_root_folder: Root directory which points to the local dataset
     :param patient_id: string which contains subject identifier
+    :param allow_incomplete_labels: boolean flag. If false, all ground truth files must be provided. If true, ground
+                                    truth files are optional
     """
-    paths: List[Path] = []
-    failed_channel_info: str = ''
+    paths: List[Optional[Path]] = []
+    failed_channel_info = ''
 
     for channel_id in channels:
         row = rows.loc[rows[CSV_CHANNEL_HEADER] == channel_id]
-        if len(row) == 0:
+        if len(row) == 0 and not allow_incomplete_labels:
             failed_channel_info += f"Patient {patient_id} does not have channel '{channel_id}'" + os.linesep
+        elif len(row) == 0 and allow_incomplete_labels:
+            # Keeps track of missing channels order
+            paths.append(None)
         elif len(row) > 1:
             failed_channel_info += f"Patient {patient_id} has more than one entry for channel '{channel_id}'" + \
                                    os.linesep
-        else:
+        elif len(row) == 1:
             image_path = local_dataset_root_folder / row[CSV_PATH_HEADER].values[0]
             if not image_path.is_file():
                 failed_channel_info += f"Patient {patient_id}, file {image_path} does not exist" + os.linesep
@@ -300,7 +307,8 @@ def load_dataset_sources(dataframe: pd.DataFrame,
                          local_dataset_root_folder: Path,
                          image_channels: List[str],
                          ground_truth_channels: List[str],
-                         mask_channel: Optional[str]) -> Dict[str, PatientDatasetSource]:
+                         mask_channel: Optional[str],
+                         allow_incomplete_labels: bool = False) -> Dict[str, PatientDatasetSource]:
     """
     Prepares a patient-to-images mapping from a dataframe read directly from a dataset CSV file.
     The dataframe contains per-patient per-channel image information, relative to a root directory.
@@ -311,6 +319,8 @@ def load_dataset_sources(dataframe: pd.DataFrame,
     :param image_channels: The names of the image channels that should be used in the result.
     :param ground_truth_channels: The names of the ground truth channels that should be used in the result.
     :param mask_channel: The name of the mask channel that should be used in the result. This can be None.
+    :param allow_incomplete_labels: Boolean flag. If false, all ground truth files must be provided. If true, ground
+                                    truth files are optional. Default value is false.
     :return: A dictionary mapping from an integer subject ID to a PatientDatasetSource.
     """
     expected_headers = {CSV_SUBJECT_HEADER, CSV_PATH_HEADER, CSV_CHANNEL_HEADER}
@@ -328,16 +338,19 @@ def load_dataset_sources(dataframe: pd.DataFrame,
     def get_mask_channel_or_default() -> Optional[Path]:
         if mask_channel is None:
             return None
+        paths = get_paths_for_channel_ids(channels=[mask_channel], allow_incomplete_labels_flag=allow_incomplete_labels)
+        if len(paths) == 0:
+            return None
         else:
-            return get_paths_for_channel_ids(channels=[mask_channel])[0]
+            return paths[0]
 
-    def get_paths_for_channel_ids(channels: List[str]) -> List[Path]:
+    def get_paths_for_channel_ids(channels: List[str], allow_incomplete_labels_flag: bool) -> List[Optional[Path]]:
         if len(set(channels)) < len(channels):
             raise ValueError(f"ids have duplicated entries: {channels}")
         rows = dataframe.loc[dataframe[CSV_SUBJECT_HEADER] == patient_id]
         # converts channels to paths and makes second sanity check for channel data
         paths, failed_channel_info = convert_channels_to_file_paths(channels, rows, local_dataset_root_folder,
-                                                                    patient_id)
+                                                                    patient_id, allow_incomplete_labels_flag)
 
         if failed_channel_info:
             raise ValueError(failed_channel_info)
@@ -349,9 +362,11 @@ def get_paths_for_channel_ids(channels: List[str]) -> List[Path]:
         metadata = PatientMetadata.from_dataframe(dataframe, patient_id)
         dataset_sources[patient_id] = PatientDatasetSource(
             metadata=metadata,
-            image_channels=get_paths_for_channel_ids(channels=image_channels),  # type: ignore
+            image_channels=get_paths_for_channel_ids(channels=image_channels,  # type: ignore
+                                                     allow_incomplete_labels_flag=False),
             mask_channel=get_mask_channel_or_default(),
-            ground_truth_channels=get_paths_for_channel_ids(channels=ground_truth_channels)  # type: ignore
-        )
+            ground_truth_channels=get_paths_for_channel_ids(channels=ground_truth_channels,  # type: ignore
+                                                            allow_incomplete_labels_flag=allow_incomplete_labels),
+            allow_incomplete_labels=allow_incomplete_labels)
 
     return dataset_sources
diff --git a/InnerEye/ML/dataset/sample.py b/InnerEye/ML/dataset/sample.py
@@ -129,19 +129,24 @@ class PatientDatasetSource(SampleBase):
     Dataset source locations for channels associated with a given patient in a particular dataset.
     """
     image_channels: List[PathOrString]
-    ground_truth_channels: List[PathOrString]
+    ground_truth_channels: List[Optional[PathOrString]]
     mask_channel: Optional[PathOrString]
     metadata: PatientMetadata
+    allow_incomplete_labels: Optional[bool] = False
 
     def __post_init__(self) -> None:
         # make sure all properties are populated
         common_util.check_properties_are_not_none(self, ignore=["mask_channel"])
 
         if not self.image_channels:
             raise ValueError("image_channels cannot be empty")
+
         if not self.ground_truth_channels:
             raise ValueError("ground_truth_channels cannot be empty")
 
+        if self.ground_truth_channels.count(None) > 0 and not self.allow_incomplete_labels:
+            raise ValueError("all ground_truth_channels must be provided")
+
 
 @dataclass(frozen=True)
 class Sample(SampleBase):

diff --git a/InnerEye/ML/lightning_base.py b/InnerEye/ML/lightning_base.py
@@ -161,11 +161,13 @@ def setup(self) -> None:
                 unique_ids = set(split_data[CSV_SUBJECT_HEADER])
                 for patient_id in unique_ids:
                     rows = split_data.loc[split_data[CSV_SUBJECT_HEADER] == patient_id]
+                    allow_incomplete_labels = self.config.allow_incomplete_labels  # type: ignore
                     # Converts channels from data frame to file paths and gets errors if any
                     __, failed_channel_info = convert_channels_to_file_paths(all_channels,
-                                                                              rows,
-                                                                              local_dataset_root_folder,
-                                                                              patient_id)
+                                                                             rows,
+                                                                             local_dataset_root_folder,
+                                                                             patient_id,
+                                                                             allow_incomplete_labels)
                     full_failed_channel_info += failed_channel_info
 
         if full_failed_channel_info:

diff --git a/InnerEye/ML/metrics.py b/InnerEye/ML/metrics.py
@@ -12,6 +12,7 @@
 
 import SimpleITK as sitk
 import numpy as np
+from numpy.core.numeric import NaN
 import torch
 import torch.nn.functional as F
 from azureml.core import Run
@@ -21,12 +22,13 @@
 from InnerEye.Common.type_annotations import DictStrFloat, TupleFloat3
 from InnerEye.ML.common import ModelExecutionMode
 from InnerEye.ML.config import BACKGROUND_CLASS_NAME
-from InnerEye.ML.metrics_dict import DataframeLogger, INTERNAL_TO_LOGGING_COLUMN_NAMES, MetricsDict, \
-    ScalarMetricsDict
+from InnerEye.ML.metrics_dict import (DataframeLogger, INTERNAL_TO_LOGGING_COLUMN_NAMES, MetricsDict,
+                                      ScalarMetricsDict)
 from InnerEye.ML.scalar_config import ScalarLoss
 from InnerEye.ML.utils.image_util import binaries_from_multi_label_array, is_binary_array
 from InnerEye.ML.utils.io_util import reverse_tuple_float3
-from InnerEye.ML.utils.metrics_util import binary_classification_accuracy, mean_absolute_error, r2_score
+from InnerEye.ML.utils.metrics_util import (binary_classification_accuracy, mean_absolute_error,
+                                            r2_score, is_missing_ground_truth)
 from InnerEye.ML.utils.ml_util import check_size_matches
 from InnerEye.ML.utils.sequence_utils import get_masked_model_outputs_and_labels
 
@@ -56,15 +58,15 @@ class InferenceMetricsForSegmentation(InferenceMetrics):
     """
     Stores metrics for segmentation models, per execution mode and epoch.
     """
-    data_split: ModelExecutionMode
+    execution_mode: ModelExecutionMode
     metrics: float
 
     def get_metrics_log_key(self) -> str:
         """
         Gets a string name for logging the metrics specific to the execution mode (train, val, test)
         :return:
         """
-        return f"InferenceMetrics_{self.data_split.value}"
+        return f"InferenceMetrics_{self.execution_mode.value}"
 
     def log_metrics(self, run_context: Run = None) -> None:
         """
@@ -230,9 +232,10 @@ def calculate_metrics_per_class(segmentation: np.ndarray,
     Calculate the dice for all foreground structures (the background class is completely ignored).
     Returns a MetricsDict with metrics for each of the foreground
     structures. Metrics are NaN if both ground truth and prediction are all zero for a class.
+    If first element of a ground truth image channel is NaN, the image is flagged as NaN and not use.
     :param ground_truth_ids: The names of all foreground classes.
     :param segmentation: predictions multi-value array with dimensions: [Z x Y x X]
-    :param ground_truth: ground truth binary array with dimensions: [C x Z x Y x X]
+    :param ground_truth: ground truth binary array with dimensions: [C x Z x Y x X].
     :param voxel_spacing: voxel_spacing in 3D Z x Y x X
     :param patient_id: for logging
     """
@@ -242,15 +245,34 @@ def calculate_metrics_per_class(segmentation: np.ndarray,
                          f"the label tensor indicates that there are {number_of_classes - 1} classes.")
     binaries = binaries_from_multi_label_array(segmentation, number_of_classes)
 
-    all_classes_are_binary = [is_binary_array(ground_truth[label_id]) for label_id in range(ground_truth.shape[0])]
-    if not np.all(all_classes_are_binary):
+    binary_classes = [is_binary_array(ground_truth[label_id]) for label_id in range(ground_truth.shape[0])]
+
+    # If ground truth image is nan, then will not be used for metrics computation.
+    nan_images = [is_missing_ground_truth(ground_truth[label_id]) for label_id in range(ground_truth.shape[0])]
+
+    # Compares element-wise if not binary then nan and checks all elements are True.
+    assert np.all(np.array(binary_classes) == ~np.array(nan_images))
+
+    #  Validates that all binary images should be 0 or 1
+    if not np.all(np.array(binary_classes)[~np.array(nan_images)]):
         raise ValueError("Ground truth values should be 0 or 1")
     overlap_measures_filter = sitk.LabelOverlapMeasuresImageFilter()
     hausdorff_distance_filter = sitk.HausdorffDistanceImageFilter()
     metrics = MetricsDict(hues=ground_truth_ids)
+
+    def add_metric(metric_type: MetricType, value: float) -> None:
+        metrics.add_metric(metric_type, value, skip_nan_when_averaging=True, hue=ground_truth_ids[i - 1])
+
     for i, prediction in enumerate(binaries):
+        # Skip if background image
         if i == 0:
             continue
+        # Skip but record if nan_image
+        elif nan_images[i]:
+            add_metric(MetricType.DICE, NaN)
+            add_metric(MetricType.HAUSDORFF_mm, NaN)
+            add_metric(MetricType.MEAN_SURFACE_DIST_mm, NaN)
+            continue
         check_size_matches(prediction, ground_truth[i], arg1_name="prediction", arg2_name="ground_truth")
         if not is_binary_array(prediction):
             raise ValueError("Predictions values should be 0 or 1")
@@ -280,10 +302,6 @@ def calculate_metrics_per_class(segmentation: np.ndarray,
                 except Exception as e:
                     logging.warning(f"Cannot calculate mean distance for structure {i} of patient {patient_id}: {e}")
             logging.debug(f"Patient {patient_id}, class {i} has Dice score {dice}")
-
-        def add_metric(metric_type: MetricType, value: float) -> None:
-            metrics.add_metric(metric_type, value, skip_nan_when_averaging=True, hue=ground_truth_ids[i - 1])
-
         add_metric(MetricType.DICE, dice)
         add_metric(MetricType.HAUSDORFF_mm, hausdorff_distance)
         add_metric(MetricType.MEAN_SURFACE_DIST_mm, mean_surface_distance)