Skip to content

Commit

Permalink
feat(AutoML): reworked AutoML CLI arguments - closes #151
Browse files Browse the repository at this point in the history
  • Loading branch information
muellerdo committed Jul 4, 2022
1 parent faf7d26 commit 3566791
Show file tree
Hide file tree
Showing 10 changed files with 255 additions and 249 deletions.
19 changes: 11 additions & 8 deletions aucmedi/automl/block_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,27 +43,30 @@ def block_evaluate(config):
The following attributes are stored in the `config` dictionary:
Attributes:
interface (str): String defining format interface for loading/storing data (`csv` or `dictionary`).
path_imagedir (str): Path to the directory containing the ground truth images.
path_data (str): Path to the index/class annotation file if required. (csv/json).
input (str): Path to the input file in which predicted csv file is stored.
output (str): Path to the directory in which evaluation figures and tables should be stored.
path_gt (str): Path to the index/class annotation file if required. (only for 'csv' interface).
path_pred (str): Path to the input file in which predicted csv file is stored.
path_evaldir (str): Path to the directory in which evaluation figures and tables should be stored.
ohe (bool): Boolean option whether annotation data is sparse categorical or one-hot encoded.
"""
# Obtain interface
if config["path_gt"] is None : config["interface"] = "directory"
else : config["interface"] = "csv"
# Peak into the dataset via the input interface
ds = input_interface(config["interface"],
config["path_imagedir"],
path_data=config["path_data"],
path_data=config["path_gt"],
training=True,
ohe=config["ohe"],
image_format=None)
(index_list, class_ohe, class_n, class_names, image_format) = ds

# Create output directory
if not os.path.exists(config["output"]) : os.mkdir(config["output"])
if not os.path.exists(config["path_evaldir"]):
os.mkdir(config["path_evaldir"])

# Read prediction csv
df_pred = pd.read_csv(config["input"])
df_pred = pd.read_csv(config["path_pred"])

# Create ground truth pandas dataframe
df_index = pd.DataFrame(data={"SAMPLE": index_list})
Expand Down Expand Up @@ -96,7 +99,7 @@ def block_evaluate(config):

# Evaluate performance via AUCMEDI evaluation submodule
evaluate_performance(data_pd, data_gt,
out_path=config["output"],
out_path=config["path_evaldir"],
class_names=class_names,
multi_label=multi_label,
metrics_threshold=0.5,
Expand Down
21 changes: 11 additions & 10 deletions aucmedi/automl/block_pred.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ def block_predict(config):
The following attributes are stored in the `config` dictionary:
Attributes:
path_imagedir (str): Path to the directory containing the images.
input (str): Path to the input directory in which fitted models and metadata are stored.
output (str): Path to the output file in which predicted csv file should be stored.
path_imagedir (str): Path to the directory containing the images for prediction.
path_modeldir (str): Path to the model directory in which fitted model weights and metadata are stored.
path_pred (str): Path to the output file in which predicted csv file should be stored.
xai_method (str or None): Key for XAI method.
xai_directory (str or None): Path to the output directory in which predicted image xai heatmaps should be stored.
batch_size (int): Number of samples inside a single batch.
Expand All @@ -63,11 +63,11 @@ def block_predict(config):
(index_list, _, _, _, image_format) = ds

# Verify existence of input directory
if not os.path.exists(config["input"]):
raise FileNotFoundError(config["input"])
if not os.path.exists(config["path_modeldir"]):
raise FileNotFoundError(config["path_modeldir"])

# Load metadata from training
path_meta = os.path.join(config["input"], "meta.training.json")
path_meta = os.path.join(config["path_modeldir"], "meta.training.json")
with open(path_meta, "r") as json_file:
meta_training = json.load(json_file)

Expand Down Expand Up @@ -123,7 +123,7 @@ def block_predict(config):
standardize_mode=model.meta_standardize,
**paras_datagen)
# Load model
path_model = os.path.join(config["input"], "model.last.hdf5")
path_model = os.path.join(config["path_modeldir"], "model.last.hdf5")
model.load(path_model)
# Start model inference
preds = model.predict(prediction_generator=pred_gen)
Expand All @@ -141,7 +141,8 @@ def block_predict(config):
standardize_mode=model.meta_standardize,
**paras_datagen)
# Load model
path_model = os.path.join(config["input"], "model.best_loss.hdf5")
path_model = os.path.join(config["path_modeldir"],
"model.best_loss.hdf5")
model.load(path_model)
# Start model inference via Augmenting
preds = predict_augmenting(model, pred_gen)
Expand All @@ -163,7 +164,7 @@ def block_predict(config):
standardize_mode=None,
**paras_datagen)
# Load composite model directory
el.load(config["input"])
el.load(config["path_modeldir"])
# Start model inference via ensemble learning
preds = el.predict(pred_gen)

Expand All @@ -173,7 +174,7 @@ def block_predict(config):
df_merged = pd.concat([df_index, df_pd], axis=1, sort=False)
df_merged.sort_values(by=["SAMPLE"], inplace=True)
# Store predictions to disk
df_merged.to_csv(config["output"], index=False)
df_merged.to_csv(config["path_pred"], index=False)

# Create XAI heatmaps
if config["xai_method"] is not None and config["xai_directory"] is not None:
Expand Down
28 changes: 16 additions & 12 deletions aucmedi/automl/block_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,9 @@ def block_train(config):
The following attributes are stored in the `config` dictionary:
Attributes:
interface (str): String defining format interface for loading/storing data (`csv` or `dictionary`).
path_imagedir (str): Path to the directory containing the images.
path_data (str): Path to the index/class annotation file if required. (csv/json).
output (str): Path to the output directory in which fitted models and metadata are stored.
path_modeldir (str): Path to the output directory in which fitted models and metadata are stored.
path_gt (str): Path to the index/class annotation file if required. (only for 'csv' interface).
analysis (str): Analysis mode for the AutoML training. Options: `["minimal", "standard", "advanced"]`.
ohe (bool): Boolean option whether annotation data is sparse categorical or one-hot encoded.
three_dim (bool): Boolean, whether data is 2D or 3D.
Expand All @@ -67,17 +66,21 @@ def block_train(config):
metalearner (str): Key for Metalearner or Aggregate function.
architecture (str or list of str): Key (str) of a neural network model Architecture class instance.
"""
# Obtain interface
if config["path_gt"] is None : config["interface"] = "directory"
else : config["interface"] = "csv"
# Peak into the dataset via the input interface
ds = input_interface(config["interface"],
config["path_imagedir"],
path_data=config["path_data"],
path_data=config["path_gt"],
training=True,
ohe=config["ohe"],
image_format=None)
(index_list, class_ohe, class_n, class_names, image_format) = ds

# Create output directory
if not os.path.exists(config["output"]) : os.mkdir(config["output"])
if not os.path.exists(config["path_modeldir"]):
os.mkdir(config["path_modeldir"])

# Identify task (multi-class vs multi-label)
if np.sum(class_ohe) > class_ohe.shape[0] : config["multi_label"] = True
Expand All @@ -95,20 +98,21 @@ def block_train(config):

# Store meta information
config["class_names"] = class_names
path_meta = os.path.join(config["output"], "meta.training.json")
path_meta = os.path.join(config["path_modeldir"], "meta.training.json")
with open(path_meta, "w") as json_io:
json.dump(config, json_io)

# Define Callbacks
callbacks = []
if config["analysis"] == "standard":
cb_loss = ModelCheckpoint(os.path.join(config["output"],
cb_loss = ModelCheckpoint(os.path.join(config["path_modeldir"],
"model.best_loss.hdf5"),
monitor="val_loss", verbose=1,
save_best_only=True)
callbacks.append(cb_loss)
if config["analysis"] in ["minimal", "standard"]:
cb_cl = CSVLogger(os.path.join(config["output"], "logs.training.csv"),
cb_cl = CSVLogger(os.path.join(config["path_modeldir"],
"logs.training.csv"),
separator=',', append=True)
callbacks.append(cb_cl)
if config["analysis"] != "minimal":
Expand Down Expand Up @@ -215,7 +219,7 @@ def block_train(config):
# Start model training
hist = model.train(training_generator=train_gen, **paras_train)
# Store model
path_model = os.path.join(config["output"], "model.last.hdf5")
path_model = os.path.join(config["path_modeldir"], "model.last.hdf5")
model.dump(path_model)
elif config["analysis"] == "standard":
# Setup neural network
Expand Down Expand Up @@ -248,7 +252,7 @@ def block_train(config):
validation_generator=val_gen,
**paras_train)
# Store model
path_model = os.path.join(config["output"], "model.last.hdf5")
path_model = os.path.join(config["path_modeldir"], "model.last.hdf5")
model.dump(path_model)
else:
# Sanity check of architecutre config
Expand All @@ -275,7 +279,7 @@ def block_train(config):
# Start model training
hist = el.train(training_generator=train_gen, **paras_train)
# Store model directory
el.dump(config["output"])
el.dump(config["path_modeldir"])

# Plot fitting history
evaluate_fitting(train_history=hist, out_path=config["output"])
evaluate_fitting(train_history=hist, out_path=config["path_modeldir"])
Loading

0 comments on commit 3566791

Please sign in to comment.