Skip to content

Commit

Permalink
perf(AutoML): added sanity checks for multi_label supported metalearners
Browse files Browse the repository at this point in the history
  • Loading branch information
muellerdo committed Jun 13, 2022
1 parent dfea756 commit e5b8ae2
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 40 deletions.
54 changes: 38 additions & 16 deletions aucmedi/automl/block_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#-----------------------------------------------------#
# External libraries
import os
import numpy as np
import json
from tensorflow.keras.metrics import AUC
from tensorflow_addons.metrics import F1Score
Expand All @@ -39,24 +40,31 @@
# Building Blocks for Training #
#-----------------------------------------------------#
def block_train(config):
"""
""" Internal code block for AutoML training.
This function is called by the Command-Line-Interface (CLI) of AUCMEDI.
Args:
config (dict): Configuration dictionary containing all required
parameters for performing an AutoML training.
The following attributes are stored in the `config` dictionary:
Attributes:
interface
path_imagedir
path_data
output
analysis (str): bla.
ohe (bool): bla
multi_label
data_aug
two_dim
shape_3D (tuple of int): bla.
epochs
batch_size
workers
metalearner
architecture
interface (str): String defining format interface for loading/storing data (`csv` or `dictionary`).
path_imagedir (str): Path to the directory containing the images.
path_data (str): Path to the index/class annotation file if required. (csv/json).
output (str): Path to the output directory in which fitted models and metadata are stored.
analysis (str): Analysis mode for the AutoML training. Options: `["minimal", "standard", "advanced"]`.
ohe (bool): Boolean option whether annotation data is sparse categorical or one-hot encoded.
data_aug (bool): asd.
two_dim (bool): asd.
shape_3D (tuple of int): bla.
epochs (int): asd.
batch_size (int): asd.
workers (int): asd.
metalearner (str): asd.
architecture (str or list of str): asd.
"""
# Peak into the dataset via the input interface
ds = input_interface(config["interface"],
Expand All @@ -70,6 +78,20 @@ def block_train(config):
# Create output directory
if not os.path.exists(config["output"]) : os.mkdir(config["output"])

# Identify task (multi-class vs multi-label)
if np.sum(class_ohe) > class_ohe.shape[0] : config["multi_label"] = True
else : config["multi_label"] = False

# Sanity check on multi-label metalearner
multilabel_metalearner_supported = ["mlp", "k_neighbors", "random_forest",
"weighted_mean", "best_model",
"decision_tree", "mean", "median"]
if config["multi_label"] and config["analysis"] == "advanced" and \
config["metalearner"] not in multilabel_metalearner_supported:
raise ValueError("Non-compatible metalearner selected for multi-label"\
+ " classification. Supported metalearner:",
multilabel_metalearner_supported)

# Store meta information
config["class_names"] = class_names
path_meta = os.path.join(config["output"], "meta.training.json")
Expand Down
15 changes: 3 additions & 12 deletions tests/test_automl_block_pred.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ def setUpClass(self):
labels_ohe = [0, 0, 0, 0]
class_index = np.random.randint(0, 4)
labels_ohe[class_index] = 1
class_index = np.random.randint(0, 4)
labels_ohe[class_index] = 1
data["sample_" + str(i)] = labels_ohe
self.tmp_csv_ohe = tempfile.NamedTemporaryFile(mode="w",
prefix="tmp.aucmedi.",
Expand All @@ -104,7 +106,6 @@ def test_minimal(self):
"output": input_dir.name,
"analysis": "minimal",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": True,
"shape_3D": (128,128,128),
Expand Down Expand Up @@ -153,14 +154,12 @@ def test_minimal_multilabel(self):
"output": input_dir.name,
"analysis": "minimal",
"ohe": True,
"multi_label": True,
"data_aug": True,
"two_dim": True,
"shape_3D": (128,128,128),
"epochs": 8,
"batch_size": 4,
"workers": 1,
"metalearner": "logistic_regression",
"architecture": "Vanilla"
}
# Run AutoML training block
Expand Down Expand Up @@ -198,7 +197,6 @@ def test_minimal_3D(self):
"output": input_dir.name,
"analysis": "minimal",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": False,
"shape_3D": (16, 16, 16),
Expand Down Expand Up @@ -246,7 +244,6 @@ def test_standard(self):
"output": input_dir.name,
"analysis": "standard",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": True,
"epochs": 8,
Expand Down Expand Up @@ -290,13 +287,11 @@ def test_standard_multilabel(self):
"output": input_dir.name,
"analysis": "standard",
"ohe": True,
"multi_label": True,
"data_aug": True,
"two_dim": True,
"epochs": 8,
"batch_size": 4,
"workers": 1,
"metalearner": "logistic_regression",
"architecture": "Vanilla"
}
# Run AutoML training block
Expand Down Expand Up @@ -334,7 +329,6 @@ def test_standard_3D(self):
"output": input_dir.name,
"analysis": "standard",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": False,
"shape_3D": (16, 16, 16),
Expand Down Expand Up @@ -382,7 +376,6 @@ def test_composite(self):
"output": input_dir.name,
"analysis": "advanced",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": True,
"epochs": 8,
Expand Down Expand Up @@ -426,13 +419,12 @@ def test_composite_multilabel(self):
"output": input_dir.name,
"analysis": "advanced",
"ohe": True,
"multi_label": True,
"data_aug": True,
"two_dim": True,
"epochs": 8,
"batch_size": 1,
"workers": 1,
"metalearner": "logistic_regression",
"metalearner": "mean",
"architecture": ["Vanilla", "Vanilla"]
}
# Run AutoML training block
Expand Down Expand Up @@ -470,7 +462,6 @@ def test_composite_3D(self):
"output": input_dir.name,
"analysis": "advanced",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": False,
"shape_3D": (16, 16, 16),
Expand Down
17 changes: 5 additions & 12 deletions tests/test_automl_block_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ def setUpClass(self):
labels_ohe = [0, 0, 0, 0]
class_index = np.random.randint(0, 4)
labels_ohe[class_index] = 1
class_index = np.random.randint(0, 4)
labels_ohe[class_index] = 1
data["sample_" + str(i)] = labels_ohe
self.tmp_csv_ohe = tempfile.NamedTemporaryFile(mode="w",
prefix="tmp.aucmedi.",
Expand All @@ -103,7 +105,6 @@ def test_minimal(self):
"output": output_dir.name,
"analysis": "minimal",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": True,
"shape_3D": (128,128,128),
Expand Down Expand Up @@ -132,14 +133,13 @@ def test_minimal_multilabel(self):
"output": output_dir.name,
"analysis": "minimal",
"ohe": True,
"multi_label": True,
"data_aug": True,
"two_dim": True,
"shape_3D": (128,128,128),
"epochs": 8,
"batch_size": 4,
"workers": 1,
"metalearner": "logistic_regression",
"metalearner": "mean",
"architecture": "Vanilla"
}
# Run AutoML training block
Expand All @@ -161,7 +161,6 @@ def test_minimal_3D(self):
"output": output_dir.name,
"analysis": "minimal",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": False,
"shape_3D": (16, 16, 16),
Expand Down Expand Up @@ -193,7 +192,6 @@ def test_standard(self):
"output": output_dir.name,
"analysis": "standard",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": True,
"epochs": 8,
Expand Down Expand Up @@ -222,13 +220,12 @@ def test_standard_multilabel(self):
"output": output_dir.name,
"analysis": "standard",
"ohe": True,
"multi_label": True,
"data_aug": True,
"two_dim": True,
"epochs": 8,
"batch_size": 4,
"workers": 1,
"metalearner": "logistic_regression",
"metalearner": "mean",
"architecture": "Vanilla"
}
# Run AutoML training block
Expand All @@ -251,7 +248,6 @@ def test_standard_3D(self):
"output": output_dir.name,
"analysis": "standard",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": False,
"shape_3D": (16, 16, 16),
Expand Down Expand Up @@ -284,7 +280,6 @@ def test_composite(self):
"output": output_dir.name,
"analysis": "advanced",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": True,
"epochs": 8,
Expand All @@ -310,13 +305,12 @@ def test_composite_multilabel(self):
"output": output_dir.name,
"analysis": "advanced",
"ohe": True,
"multi_label": True,
"data_aug": True,
"two_dim": True,
"epochs": 8,
"batch_size": 1,
"workers": 1,
"metalearner": "logistic_regression",
"metalearner": "mean",
"architecture": ["Vanilla", "Vanilla"]
}
# Run AutoML training block
Expand All @@ -336,7 +330,6 @@ def test_composite_3D(self):
"output": output_dir.name,
"analysis": "advanced",
"ohe": False,
"multi_label": False,
"data_aug": True,
"two_dim": False,
"shape_3D": (16, 16, 16),
Expand Down

0 comments on commit e5b8ae2

Please sign in to comment.