bhklab · skim2257 · Dec 12, 2023 · Jun 2, 2023 · Jun 16, 2023 · Dec 11, 2023
diff --git a/imgtools/autopipeline.py b/imgtools/autopipeline.py
diff --git a/imgtools/io/common.py b/imgtools/io/common.py
@@ -1,8 +1,10 @@
-import os, pathlib
+import os
+import pathlib
 from typing import Dict
 
 from pydicom.misc import is_dicom
 
+
 def find_dicom_paths(root_path: str, yield_directories: bool = False) -> str:
     """Find DICOM file paths in the specified root directory file tree.
 
@@ -33,6 +35,7 @@ def find_dicom_paths(root_path: str, yield_directories: bool = False) -> str:
                 if is_dicom(fpath):
                     yield fpath
 
+
 def file_name_convention() -> Dict:
     """
     This function returns the file name taxonomy which is used by ImageAutoOutput and Dataset class
@@ -48,4 +51,4 @@ def file_name_convention() -> Dict:
                             "RTDOSE": "dose", 
                             "RTSTRUCT": "mask"}
 
-    return file_name_convention
+    return file_name_convention
diff --git a/imgtools/io/dataset.py b/imgtools/io/dataset.py
@@ -1,5 +1,7 @@
 from genericpath import exists
-import os, pathlib, ast
+import os
+import pathlib
+import ast
 from typing import List, Sequence, Optional, Callable, Iterable, Dict,Tuple
 from tqdm import tqdm
 
@@ -12,18 +14,18 @@
 from imgtools.pipeline import Pipeline
 from joblib import Parallel, delayed
 
+
 class Dataset(tio.SubjectsDataset):
     """
     This class takes in medical dataset in the form of nrrds or directly from the dataset and converts the data into torchio.Subject object, which can be loaded into 
     torchio.SubjectDataset object.
     This class inherits from torchio.SubjectDataset object, which can support transforms and torch.Dataloader.
     Read more about torchio from https://torchio.readthedocs.io/quickstart.html and torchio.SubjectDataset from https://github.com/fepegar/torchio/blob/3e07b78da16d6db4da7193325b3f9cb31fc0911a/torchio/data/dataset.py#L101
     """
-    def __init__(
-        self,
-        subjects: Sequence[tio.Subject],
-        path: str,
-        ) -> List[tio.Subject]:
+    def __init__(self,
+                 subjects: Sequence[tio.Subject],
+                 path: str) -> List[tio.Subject]:
+
         super().__init__(subjects)
         self.subjects = subjects
         self.path = path
@@ -47,16 +49,16 @@ def load_image(
 
         for col in df_metadata.columns:
             if col.startswith("output_folder"):
-                df_metadata[col] = df_metadata[col].apply(lambda x: pathlib.Path(os.path.split(os.path.dirname(path))[0], x).as_posix() if isinstance(x, str) else x) #input folder joined with the rel path
+                df_metadata[col] = df_metadata[col].apply(lambda x: pathlib.Path(os.path.split(os.path.dirname(path))[0], x).as_posix() if isinstance(x, str) else x)  # input folder joined with the rel path
 
         output_streams = [("_").join(cols.split("_")[2:]) for cols in df_metadata.columns if cols.split("_")[0] == "output"]
         imp_metadata = [cols for cols in df_metadata.columns if cols.split("_")[0] in ("metadata")]
-        #Ignores multiple connection to single modality
+        # Ignores multiple connection to single modality
         if ignore_multi:
-            output_streams = [items for items in output_streams if items.split("_")[-1].isnumeric()==False]
-            imp_metadata = [items for items in imp_metadata if items.split("_")[-1].isnumeric()==False]
+            output_streams = [items for items in output_streams if not items.split("_")[-1].isnumeric()]
+            imp_metadata = [items for items in imp_metadata if not items.split("_")[-1].isnumeric()]
 
-        #Based on the file naming convention
+        # Based on the file naming convention
         subject_id_list = list(df_metadata.index)
         subjects = []
         for subject_id in tqdm(subject_id_list):
@@ -72,7 +74,7 @@ def load_image(
                     filename = col
                 path_mod = pathlib.Path(path, subject_id, col, f"{filename}.nii.gz").as_posix()
                 print(path_mod)
-                #All modalities except RTSTRUCT should be of type torchIO.ScalarImage 
+                # All modalities except RTSTRUCT should be of type torchIO.ScalarImage 
                 if os.path.exists(path_mod):
                     if col.split("_")[0]!="RTSTRUCT":
                         temp[f"mod_{col}"] = tio.ScalarImage(path_mod)
@@ -81,14 +83,14 @@ def load_image(
                         temp[f"mod_{col}"] = tio.LabelMap(path_mods)
                 else:
                     temp[f"mod_{col}"] = None
-                #For including metadata
+                # For including metadata
                 if metadata_name in imp_metadata:
-                    #convert string to proper datatype
+                    # convert string to proper datatype
                     meta = df_metadata.loc[subject_id,metadata_name]
                     if pd.notna(meta):
                         temp[metadata_name] = eval(meta)[0]
                     else:
-                        #torch dataloader doesnt accept None type
+                        # torch dataloader doesnt accept None type
                         temp[metadata_name] = {}
             subjects.append(tio.Subject(temp))
         return cls(subjects, path)
@@ -111,10 +113,10 @@ def load_directly(
         input = ImageAutoInput(path, modalities, n_jobs)
         df_metadata = input.df_combined
         output_streams = input.output_streams
-        #Ignores multiple connection to single modality
+        # Ignores multiple connection to single modality
         if ignore_multi:
-            output_streams = [items for items in output_streams if items.split("_")[-1].isnumeric()==False]
-        #Basic operations
+            output_streams = [items for items in output_streams if not items.split("_")[-1].isnumeric()]
+        # Basic operations
         subject_id_list = list(df_metadata.index)
         # basic image processing ops
         resample = Resample(spacing=spacing)
@@ -159,15 +161,15 @@ def process_one_subject(
                 image = resample(image)
                 temp[f"mod_{colname}"] = tio.ScalarImage.from_sitk(image)
             elif modality == "RTDOSE":
-                try: #For cases with no image present
+                try:  # For cases with no image present
                     doses = read_results[i].resample_dose(image)
                 except:
                     Warning("No CT image present. Returning dose image without resampling")
                     doses = read_results[i]
                 temp[f"mod_{colname}"] = tio.ScalarImage.from_sitk(doses)
                 temp[f"metadata_{colname}"] = read_results[i].get_metadata()
             elif modality == "RTSTRUCT":
-                #For RTSTRUCT, you need image or PT
+                # For RTSTRUCT, you need image or PT
                 structure_set = read_results[i]
                 conn_to = output_stream.split("_")[-1]
                 # make_binary_mask relative to ct/pet
@@ -181,7 +183,7 @@ def process_one_subject(
                 temp[f"metadata_{colname}"] = structure_set.roi_names
             elif modality == "PT":
                 try:
-                    #For cases with no image present
+                    # For cases with no image present
                     pet = read_results[i].resample_pet(image)
                 except:
                     Warning("No CT image present. Returning PT/PET image without resampling.")

diff --git a/imgtools/io/loaders.py b/imgtools/io/loaders.py
@@ -1,4 +1,6 @@
-import os, pathlib, json
+import os
+import pathlib
+import json
 import glob
 import re
 from typing import Optional, List
@@ -16,9 +18,11 @@
 from ..utils.crawl import *
 from ..utils.dicomutils import *
 
+
 def read_image(path):
     return sitk.ReadImage(path)
 
+
 def read_dicom_series(path: str,
                       series_id: Optional[str] = None,
                       recursive: bool = False, 
@@ -51,8 +55,8 @@ def read_dicom_series(path: str,
     reader = sitk.ImageSeriesReader()
     if file_names is None:
         file_names = reader.GetGDCMSeriesFileNames(path,
-                                                    seriesID=series_id if series_id else "",
-                                                    recursive=recursive)
+                                                   seriesID=series_id if series_id else "",
+                                                   recursive=recursive)
         # extract the names of the dicom files that are in the path variable, which is a directory
 
     reader.SetFileNames(file_names)
@@ -68,24 +72,28 @@ def read_dicom_series(path: str,
     return reader.Execute()
 
 
-
 def read_dicom_scan(path, series_id=None, recursive: bool=False, file_names=None) -> Scan:
     image = read_dicom_series(path, series_id=series_id, recursive=recursive, file_names=file_names)
     return Scan(image, {})
 
+
 def read_dicom_rtstruct(path):
     return StructureSet.from_dicom_rtstruct(path)
 
+
 def read_dicom_rtdose(path):
     return Dose.from_dicom_rtdose(path)
 
+
 def read_dicom_pet(path, series=None):
     return PET.from_dicom_pet(path, series, "SUV")
 
+
 def read_dicom_seg(path, meta, series=None):
     seg_img = read_dicom_series(path, series)
     return Segmentation.from_dicom_seg(seg_img, meta)
 
+
 def read_dicom_auto(path, series=None, file_names=None):
     if path is None:
         return None
@@ -120,6 +128,7 @@ def read_dicom_auto(path, series=None, file_names=None):
         obj.metadata.update(get_modality_metadata(meta, modality))
         return obj
 
+
 class BaseLoader:
     def __getitem__(self, subject_id):
         raise NotImplementedError
@@ -142,6 +151,7 @@ def get(self, subject_id, default=None):
         except KeyError:
             return default
 
+
 class ImageTreeLoader(BaseLoader):
     def __init__(self,
                  json_path,
@@ -155,7 +165,7 @@ def __init__(self,
                  readers=None):
 
         if readers is None:
-            readers = [read_image] # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
+            readers = [read_image]  # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
 
         self.expand_paths = expand_paths
         self.readers = readers
@@ -180,7 +190,7 @@ def __init__(self,
 
         if isinstance(json_path, str):
             with open(json_path, 'r') as f:
-                self.tree = json.load(json_path)
+                self.tree = json.load(f)
         else:
             raise ValueError(f"Expected a path to a json file, not {type(json_path)}.")
 
@@ -213,6 +223,7 @@ def keys(self):
     def items(self):
         return ((k, self[k]) for k in self.keys())
 
+
 class ImageCSVLoader(BaseLoader):
     def __init__(self,
                  csv_path_or_dataframe,
@@ -223,7 +234,7 @@ def __init__(self,
                  readers=None):
 
         if readers is None:
-            readers = [read_image] # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
+            readers = [read_image]  # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
 
         self.expand_paths = expand_paths
         self.readers = readers
@@ -279,7 +290,7 @@ def __init__(self,
         if exclude_paths is None:
             exclude_paths = []
         if reader is None:
-            reader = read_image # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
+            reader = read_image  # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
 
         self.root_directory = root_directory
         self.get_subject_id_from = get_subject_id_from
@@ -338,7 +349,6 @@ def keys(self):
         return self.paths.keys()
 
 
-
 # class CombinedLoader(BaseLoader):
 #     def __init__(self, **kwargs):
 #         self.loaders = kwargs

diff --git a/imgtools/io/writers.py b/imgtools/io/writers.py
@@ -1,4 +1,5 @@
-import os, pathlib
+import os
+import pathlib
 import json
 import csv
 import pickle
@@ -39,7 +40,7 @@ def _get_path_from_subject_id(self, subject_id, **kwargs):
         out_path = pathlib.Path(self.root_directory, out_filename).as_posix()
         out_dir = os.path.dirname(out_path)
         if self.create_dirs and not os.path.exists(out_dir):
-            os.makedirs(out_dir, exist_ok=True) # create subdirectories if specified in filename_format
+            os.makedirs(out_dir, exist_ok=True)  # create subdirectories if specified in filename_format
 
         return out_path
 
@@ -52,12 +53,11 @@ def __init__(self, root_directory, filename_format="{subject_id}.nii.gz", create
         self.create_dirs = create_dirs
         self.compress = compress
         if os.path.exists(self.root_directory):
+            # delete the folder called {subject_id} that was made in the original BaseWriter / the one named {label_or_image}
             if os.path.basename(os.path.dirname(self.root_directory)) == "{subject_id}":
                 shutil.rmtree(os.path.dirname(self.root_directory))
             elif "{label_or_image}{train_or_test}" in os.path.basename(self.root_directory):
                 shutil.rmtree(self.root_directory)
-           #delete the folder called {subject_id} that was made in the original BaseWriter / the one named {label_or_image}
-
 
     def put(self, subject_id, 
             image, is_mask=False, 
@@ -69,27 +69,28 @@ def put(self, subject_id,
         if is_mask:
             # remove illegal characters for Windows/Unix
             badboys = '<>:"/\|?*'
-            for char in badboys: mask_label = mask_label.replace(char, "")
+            for char in badboys: 
+                mask_label = mask_label.replace(char, "")
 
             # filename_format eh
-            self.filename_format = mask_label + ".nii.gz" #save the mask labels as their rtstruct names
+            self.filename_format = mask_label + ".nii.gz"  # save the mask labels as their rtstruct names
 
         if nnunet_info:
             if label_or_image == "labels":
-                filename = f"{subject_id}.nii.gz" #naming convention for labels
+                filename = f"{subject_id}.nii.gz"  # naming convention for labels
             else:
-                filename = self.filename_format.format(subject_id=subject_id, modality_index=nnunet_info['modalities'][nnunet_info['current_modality']]) #naming convention for images
+                filename = self.filename_format.format(subject_id=subject_id, modality_index=nnunet_info['modalities'][nnunet_info['current_modality']])  # naming convention for images
             out_path = self._get_path_from_subject_id(filename, label_or_image=label_or_image, train_or_test=train_or_test)
         else:
             out_path = self._get_path_from_subject_id(self.filename_format, subject_id=subject_id)
         sitk.WriteImage(image, out_path, self.compress)
 
     def _get_path_from_subject_id(self, filename, **kwargs):
-        root_directory = self.root_directory.format(**kwargs) #replace the {} with the kwargs passed in from .put() (above)
+        root_directory = self.root_directory.format(**kwargs)  # replace the {} with the kwargs passed in from .put() (above)
         out_path = pathlib.Path(root_directory, filename).as_posix()
         out_dir = os.path.dirname(out_path)
         if self.create_dirs and not os.path.exists(out_dir):
-            os.makedirs(out_dir, exist_ok=True) # create subdirectories if specified in filename_format
+            os.makedirs(out_dir, exist_ok=True)  # create subdirectories if specified in filename_format
         return out_path
 
 
@@ -133,7 +134,7 @@ def put(self, subject_id, mask, **kwargs):
         if len(labels) > 1: 
             arr = np.transpose(sitk.GetArrayFromImage(mask), [-1, -2, -3, -4])
 
-            #add extra dimension to metadata
+            # add extra dimension to metadata
             space_directions.insert(0, [float('nan'), float('nan'), float('nan')])
             kinds.insert(0, 'vector')
             dims += 1 
@@ -152,7 +153,7 @@ def put(self, subject_id, mask, **kwargs):
                     props = regionprops(arr)[0]
                 bbox = props["bbox"]
                 bbox_segment = [bbox[0], bbox[3], bbox[1], bbox[4], bbox[2], bbox[5]]
-            except IndexError: # mask is empty
+            except IndexError:  # mask is empty
                 assert arr[n].sum() == 0, "Mask not empty but 'skimage.measure.regionprops' failed."
                 bbox_segment = [0, 0, 0, 0, 0, 0]
 
@@ -185,7 +186,7 @@ def __init__(self, root_directory, filename_format="{subject_id}.npy", create_di
     def put(self, subject_id, image, **kwargs):
         out_path = self._get_path_from_subject_id(subject_id, **kwargs)
         if isinstance(image, sitk.Image):
-            array, *_ = image_to_array(image) # TODO (Michal) optionally save the image geometry
+            array, *_ = image_to_array(image)  # TODO (Michal) optionally save the image geometry
         np.save(out_path, array)
 
 

diff --git a/imgtools/modules/__init__.py b/imgtools/modules/__init__.py
@@ -4,4 +4,4 @@
 from .dose import *
 from .datagraph import *
 from .sparsemask import *
-from .scan import *
+from .scan import *