feat(AutoML): reworked AutoML CLI arguments - closes #151

frankkramer-lab · Jul 4, 2022 · 3566791 · 3566791
1 parent faf7d26
commit 3566791
Show file tree

Hide file tree

Showing 10 changed files with 255 additions and 249 deletions.
diff --git a/aucmedi/automl/block_eval.py b/aucmedi/automl/block_eval.py
@@ -43,27 +43,30 @@ def block_evaluate(config):
     The following attributes are stored in the `config` dictionary:
 
     Attributes:
-        interface (str):                    String defining format interface for loading/storing data (`csv` or `dictionary`).
         path_imagedir (str):                Path to the directory containing the ground truth images.
-        path_data (str):                    Path to the index/class annotation file if required. (csv/json).
-        input (str):                        Path to the input file in which predicted csv file is stored.
-        output (str):                       Path to the directory in which evaluation figures and tables should be stored.
+        path_gt (str):                      Path to the index/class annotation file if required. (only for 'csv' interface).
+        path_pred (str):                    Path to the input file in which predicted csv file is stored.
+        path_evaldir (str):                 Path to the directory in which evaluation figures and tables should be stored.
         ohe (bool):                         Boolean option whether annotation data is sparse categorical or one-hot encoded.
     """
+    # Obtain interface
+    if config["path_gt"] is None : config["interface"] = "directory"
+    else : config["interface"] = "csv"
     # Peak into the dataset via the input interface
     ds = input_interface(config["interface"],
                          config["path_imagedir"],
-                         path_data=config["path_data"],
+                         path_data=config["path_gt"],
                          training=True,
                          ohe=config["ohe"],
                          image_format=None)
     (index_list, class_ohe, class_n, class_names, image_format) = ds
 
     # Create output directory
-    if not os.path.exists(config["output"]) : os.mkdir(config["output"])
+    if not os.path.exists(config["path_evaldir"]):
+        os.mkdir(config["path_evaldir"])
 
     # Read prediction csv
-    df_pred = pd.read_csv(config["input"])
+    df_pred = pd.read_csv(config["path_pred"])
 
     # Create ground truth pandas dataframe
     df_index = pd.DataFrame(data={"SAMPLE": index_list})
@@ -96,7 +99,7 @@ def block_evaluate(config):
 
     # Evaluate performance via AUCMEDI evaluation submodule
     evaluate_performance(data_pd, data_gt,
-                         out_path=config["output"],
+                         out_path=config["path_evaldir"],
                          class_names=class_names,
                          multi_label=multi_label,
                          metrics_threshold=0.5,

diff --git a/aucmedi/automl/block_pred.py b/aucmedi/automl/block_pred.py
@@ -45,9 +45,9 @@ def block_predict(config):
     The following attributes are stored in the `config` dictionary:
 
     Attributes:
-        path_imagedir (str):                Path to the directory containing the images.
-        input (str):                        Path to the input directory in which fitted models and metadata are stored.
-        output (str):                       Path to the output file in which predicted csv file should be stored.
+        path_imagedir (str):                Path to the directory containing the images for prediction.
+        path_modeldir (str):                Path to the model directory in which fitted model weights and metadata are stored.
+        path_pred (str):                    Path to the output file in which predicted csv file should be stored.
         xai_method (str or None):           Key for XAI method.
         xai_directory (str or None):        Path to the output directory in which predicted image xai heatmaps should be stored.
         batch_size (int):                   Number of samples inside a single batch.
@@ -63,11 +63,11 @@ def block_predict(config):
     (index_list, _, _, _, image_format) = ds
 
     # Verify existence of input directory
-    if not os.path.exists(config["input"]):
-        raise FileNotFoundError(config["input"])
+    if not os.path.exists(config["path_modeldir"]):
+        raise FileNotFoundError(config["path_modeldir"])
 
     # Load metadata from training
-    path_meta = os.path.join(config["input"], "meta.training.json")
+    path_meta = os.path.join(config["path_modeldir"], "meta.training.json")
     with open(path_meta, "r") as json_file:
         meta_training = json.load(json_file)
 
@@ -123,7 +123,7 @@ def block_predict(config):
                                  standardize_mode=model.meta_standardize,
                                  **paras_datagen)
         # Load model
-        path_model = os.path.join(config["input"], "model.last.hdf5")
+        path_model = os.path.join(config["path_modeldir"], "model.last.hdf5")
         model.load(path_model)
         # Start model inference
         preds = model.predict(prediction_generator=pred_gen)
@@ -141,7 +141,8 @@ def block_predict(config):
                                  standardize_mode=model.meta_standardize,
                                  **paras_datagen)
         # Load model
-        path_model = os.path.join(config["input"], "model.best_loss.hdf5")
+        path_model = os.path.join(config["path_modeldir"],
+                                  "model.best_loss.hdf5")
         model.load(path_model)
         # Start model inference via Augmenting
         preds = predict_augmenting(model, pred_gen)
@@ -163,7 +164,7 @@ def block_predict(config):
                                  standardize_mode=None,
                                  **paras_datagen)
         # Load composite model directory
-        el.load(config["input"])
+        el.load(config["path_modeldir"])
         # Start model inference via ensemble learning
         preds = el.predict(pred_gen)
 
@@ -173,7 +174,7 @@ def block_predict(config):
     df_merged = pd.concat([df_index, df_pd], axis=1, sort=False)
     df_merged.sort_values(by=["SAMPLE"], inplace=True)
     # Store predictions to disk
-    df_merged.to_csv(config["output"], index=False)
+    df_merged.to_csv(config["path_pred"], index=False)
 
     # Create XAI heatmaps
     if config["xai_method"] is not None and config["xai_directory"] is not None:

diff --git a/aucmedi/automl/block_train.py b/aucmedi/automl/block_train.py
@@ -52,10 +52,9 @@ def block_train(config):
     The following attributes are stored in the `config` dictionary:
 
     Attributes:
-        interface (str):                    String defining format interface for loading/storing data (`csv` or `dictionary`).
         path_imagedir (str):                Path to the directory containing the images.
-        path_data (str):                    Path to the index/class annotation file if required. (csv/json).
-        output (str):                       Path to the output directory in which fitted models and metadata are stored.
+        path_modeldir (str):                Path to the output directory in which fitted models and metadata are stored.
+        path_gt (str):                      Path to the index/class annotation file if required. (only for 'csv' interface).
         analysis (str):                     Analysis mode for the AutoML training. Options: `["minimal", "standard", "advanced"]`.
         ohe (bool):                         Boolean option whether annotation data is sparse categorical or one-hot encoded.
         three_dim (bool):                   Boolean, whether data is 2D or 3D.
@@ -67,17 +66,21 @@ def block_train(config):
         metalearner (str):                  Key for Metalearner or Aggregate function.
         architecture (str or list of str):  Key (str) of a neural network model Architecture class instance.
     """
+    # Obtain interface
+    if config["path_gt"] is None : config["interface"] = "directory"
+    else : config["interface"] = "csv"
     # Peak into the dataset via the input interface
     ds = input_interface(config["interface"],
                          config["path_imagedir"],
-                         path_data=config["path_data"],
+                         path_data=config["path_gt"],
                          training=True,
                          ohe=config["ohe"],
                          image_format=None)
     (index_list, class_ohe, class_n, class_names, image_format) = ds
 
     # Create output directory
-    if not os.path.exists(config["output"]) : os.mkdir(config["output"])
+    if not os.path.exists(config["path_modeldir"]):
+        os.mkdir(config["path_modeldir"])
 
     # Identify task (multi-class vs multi-label)
     if np.sum(class_ohe) > class_ohe.shape[0] : config["multi_label"] = True
@@ -95,20 +98,21 @@ def block_train(config):
 
     # Store meta information
     config["class_names"] = class_names
-    path_meta = os.path.join(config["output"], "meta.training.json")
+    path_meta = os.path.join(config["path_modeldir"], "meta.training.json")
     with open(path_meta, "w") as json_io:
         json.dump(config, json_io)
 
     # Define Callbacks
     callbacks = []
     if config["analysis"] == "standard":
-        cb_loss = ModelCheckpoint(os.path.join(config["output"],
+        cb_loss = ModelCheckpoint(os.path.join(config["path_modeldir"],
                                                "model.best_loss.hdf5"),
                                   monitor="val_loss", verbose=1,
                                   save_best_only=True)
         callbacks.append(cb_loss)
     if config["analysis"] in ["minimal", "standard"]:
-        cb_cl = CSVLogger(os.path.join(config["output"], "logs.training.csv"),
+        cb_cl = CSVLogger(os.path.join(config["path_modeldir"],
+                                       "logs.training.csv"),
                           separator=',', append=True)
         callbacks.append(cb_cl)
     if config["analysis"] != "minimal":
@@ -215,7 +219,7 @@ def block_train(config):
         # Start model training
         hist = model.train(training_generator=train_gen, **paras_train)
         # Store model
-        path_model = os.path.join(config["output"], "model.last.hdf5")
+        path_model = os.path.join(config["path_modeldir"], "model.last.hdf5")
         model.dump(path_model)
     elif config["analysis"] == "standard":
         # Setup neural network
@@ -248,7 +252,7 @@ def block_train(config):
                            validation_generator=val_gen,
                            **paras_train)
         # Store model
-        path_model = os.path.join(config["output"], "model.last.hdf5")
+        path_model = os.path.join(config["path_modeldir"], "model.last.hdf5")
         model.dump(path_model)
     else:
         # Sanity check of architecutre config
@@ -275,7 +279,7 @@ def block_train(config):
         # Start model training
         hist = el.train(training_generator=train_gen, **paras_train)
         # Store model directory
-        el.dump(config["output"])
+        el.dump(config["path_modeldir"])
 
     # Plot fitting history
-    evaluate_fitting(train_history=hist, out_path=config["output"])
+    evaluate_fitting(train_history=hist, out_path=config["path_modeldir"])