test_components, test_modalities works with new AutoPipeline

bhklab · Jun 19, 2022 · e51fe82 · e51fe82
1 parent a786724
commit e51fe82
Show file tree

Hide file tree

Showing 11 changed files with 218 additions and 122 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ examples/adsf.py
 data
 examples/data/tcia_n*
 scratch.ipynb
+tests/temp
 
 # macOS
 .DS_Store

diff --git a/imgtools/autopipeline.py b/imgtools/autopipeline.py
@@ -107,11 +107,14 @@ def __init__(self,
         self.label_names = {}
         self.ignore_missing_regex = ignore_missing_regex
 
-        with open(pathlib.Path(self.input_directory, "roi_names.yaml").as_posix(), "r") as f:
-            try:
-                self.label_names = yaml.safe_load(f)
-            except yaml.YAMLError as exc:
-                print(exc)
+        roi_path = pathlib.Path(self.input_directory, "roi_names.yaml").as_posix()
+
+        if os.path.exists(roi_path):
+            with open(roi_path, "r") as f:
+                try:
+                    self.label_names = yaml.safe_load(f)
+                except yaml.YAMLError as exc:
+                    print(exc)
 
         if not isinstance(self.label_names, dict):
             raise ValueError("roi_names.yaml must parse as a dictionary")
@@ -436,7 +439,6 @@ def main():
     """Print nnU-Net specific info here:
     * dataset.json can be found at /path/to/dataset/json
     * You can train nnU-Net by cloning /path/to/nnunet/repo and run `nnUNet_plan_and_preprocess -t taskID` to let the nnU-Net package prepare 
-    * 
     """
 
 if __name__ == "__main__":

diff --git a/imgtools/io/dataset.py b/imgtools/io/dataset.py
@@ -1,5 +1,5 @@
 from genericpath import exists
-import os, pathlib
+import os, pathlib, ast
 from typing import List, Sequence, Optional, Callable, Iterable, Dict,Tuple
 from tqdm import tqdm
 
@@ -29,7 +29,7 @@ def __init__(
         self.path = path
 
     @classmethod
-    def load_from_nrrd(
+    def load_image(
             cls,
             path:str,
             ignore_multi: bool = True,
@@ -46,37 +46,37 @@ def load_from_nrrd(
         df_metadata = pd.read_csv(path_metadata,index_col=0)
 
         for col in df_metadata.columns:
-            if col.startswith("folder"):
+            if col.startswith("output_folder"):
                 df_metadata[col] = df_metadata[col].apply(lambda x: pathlib.Path(os.path.split(os.path.dirname(path))[0], x).as_posix() if isinstance(x, str) else x) #input folder joined with the rel path
 
-        output_streams = [("_").join(cols.split("_")[1:]) for cols in df_metadata.columns if cols.split("_")[0] == "folder"]
+        output_streams = [("_").join(cols.split("_")[2:]) for cols in df_metadata.columns if cols.split("_")[0] == "output"]
         imp_metadata = [cols for cols in df_metadata.columns if cols.split("_")[0] in ("metadata")]
         #Ignores multiple connection to single modality
         if ignore_multi:
             output_streams = [items for items in output_streams if items.split("_")[-1].isnumeric()==False]
             imp_metadata = [items for items in imp_metadata if items.split("_")[-1].isnumeric()==False]
+
         #Based on the file naming convention
-        file_names = file_name_convention()
         subject_id_list = list(df_metadata.index)
         subjects = []
         for subject_id in tqdm(subject_id_list):
             temp = {}
             for col in output_streams:
-                mult_conn = col.split("_")[-1].isnumeric()
                 metadata_name = f"metadata_{col}"
-                if mult_conn:
-                    extra = col.split("_")[-1]+"_"
-                    extension = file_names[("_").join(col.split("_")[:-1])]
+                if 'RTSTRUCT' in col:
+                    filenames = ast.literal_eval(df_metadata.loc[subject_id]['metadata_RTSTRUCT_CT'])[0]
+                    filename = filenames[0]
                 else:
-                    extension = file_names[col]
-                    extra = ""
-                path_mod = pathlib.Path(path,extension.split(".")[0],f"{subject_id}_{extra}{extension}.nrrd").as_posix()
-                #All modalities except RTSTRUCT should be of type torchIO.ScalarImage
+                    filename = col
+                path_mod = pathlib.Path(path, subject_id, col, f"{filename}.nii.gz").as_posix()
+                print(path_mod)
+                #All modalities except RTSTRUCT should be of type torchIO.ScalarImage 
                 if os.path.exists(path_mod):
                     if col.split("_")[0]!="RTSTRUCT":
                         temp[f"mod_{col}"] = tio.ScalarImage(path_mod)
                     else:
-                        temp[f"mod_{col}"] = tio.LabelMap(path_mod)
+                        path_mods = [pathlib.Path(path, subject_id, col, f"{filename}.nii.gz").as_posix() for filename in filenames]
+                        temp[f"mod_{col}"] = tio.LabelMap(path_mods)
                 else:
                     temp[f"mod_{col}"] = None
                 #For including metadata
@@ -89,7 +89,7 @@ def load_from_nrrd(
                         #torch dataloader doesnt accept None type
                         temp[metadata_name] = {}
             subjects.append(tio.Subject(temp))
-        return cls(subjects,path)
+        return cls(subjects, path)
 
     @classmethod
     def load_directly(

diff --git a/imgtools/io/loaders.py b/imgtools/io/loaders.py
@@ -20,8 +20,6 @@
 from ..utils.crawl import *
 from ..utils.dicomutils import *
 
-
-
 def read_image(path):
     return sitk.ReadImage(path)
 
@@ -91,7 +89,7 @@ def read_dicom_auto(path, series=None):
     modality = meta.Modality
     all_modality_metadata = all_modalities_metadata(meta)
     if modality == 'CT' or modality == 'MR':
-        dicom_series = read_dicom_series(path,series, modality=modality)
+        dicom_series = read_dicom_series(path,series)#, modality=modality)
         if modality == 'CT':
             dicom_series.metadata.update(ct_metadata(meta))
             dicom_series.metadata.update(all_modality_metadata)

diff --git a/imgtools/modules/datagraph.py b/imgtools/modules/datagraph.py
@@ -16,6 +16,8 @@ class DataGraph:
     3) edge_type:2 RTSTRUCT(key:ref_ct) -> CT(pair: series) 
     4) edge_type:3 RTSTRUCT(key:ref_ct) -> PT(pair: series) 
     5) edge_type:4 CT(key:study) -> PT(pair: study) 
+    6) edge_type:5 RTDOSE(key: ref_pl) -> RTPLAN(pair: instance)
+    7) edge_type:6 RTPLAN(key: ref_rs) -> RTSTRUCT(pair: series/instance)
 
     Once the edge table is formed, one can query on the graph to get the desired results. For uniformity, the supported query is list of modalities to consider
     For ex:
@@ -61,24 +63,26 @@ def form_graph(self):
         df_filter.drop(columns=["reference_rs_y", "instance_uid_y"], inplace=True)
         df_filter.rename(columns={"reference_rs_x":"reference_rs", "instance_uid_x":"instance_uid"}, inplace=True)
 
-        #Remove entries with no RT dose reference, for extra check, such cases are mostprobably removed in the earlier step
+        #Remove entries with no RTDOSE reference, for extra check, such cases are mostprobably removed in the earlier step
         df_filter = df_filter.loc[~((df_filter["modality"] == "RTDOSE") & (df_filter["reference_ct"].isna()) & (df_filter["reference_rs"].isna()))]
 
         #Get all study ids
-        all_study= df_filter.study.unique()
+        # all_study = df_filter.study.unique()
         start = time.time()
 
         #Defining Master df to store all the Edge dataframes
-        self.df_master = []
+        # self.df_master = []
 
-        for i in tqdm(range(len(all_study))):
-            self._form_edge_study(df_filter, all_study, i)
+        # for i in tqdm(range(len(all_study))):
+            # self._form_edge_study(df_filter, all_study, i)
 
         # df_edge_patient = form_edge_study(df,all_study,i)
+
+        self.df_edges = self._form_edges(self.df) #pd.concat(self.df_master, axis=0, ignore_index=True)
         end = time.time()
         print(f"\nTotal time taken: {end - start}")
 
-        self.df_edges = pd.concat(self.df_master, axis=0, ignore_index=True)
+
         self.df_edges.loc[self.df_edges.study_x.isna(),"study_x"] = self.df_edges.loc[self.df_edges.study_x.isna(), "study"]
         #dropping some columns
         self.df_edges.drop(columns=["study_y", "patient_ID_y", "series_description_y", "study_description_y", "study"],inplace=True)
@@ -122,6 +126,56 @@ def visualize_graph(self):
         vis_path = pathlib.Path(os.path.dirname(self.edge_path),"datanet.html").as_posix()
         data_net.show(vis_path)
 
+    def _form_edges(self, df):
+        '''
+        For a given study id forms edge table
+        '''
+
+        df_list = []
+
+        # Split into each modality
+        plan = df[df["modality"] == "RTPLAN"]
+        dose = df[df["modality"] == "RTDOSE"]
+        struct = df[df["modality"] == "RTSTRUCT"]
+        ct = df[df["modality"] == "CT"]
+        mr = df[df["modality"] == "MR"]
+        pet = df[df["modality"] == "PT"]
+
+        edge_types = np.arange(7)
+        for edge in edge_types:
+            if edge==0:    # FORMS RTDOSE->RTSTRUCT, can be formed on both series and instance uid
+                df_comb1    = pd.merge(struct, dose, left_on="instance_uid", right_on="reference_rs")
+                df_comb2    = pd.merge(struct, dose, left_on="series", right_on="reference_rs")
+                df_combined = pd.concat([df_comb1, df_comb2])
+                #Cases where both series and instance_uid are the same for struct
+                df_combined = df_combined.drop_duplicates(subset=["instance_uid_x"])
+
+            elif edge==1:  # FORMS RTDOSE->CT 
+                df_combined = pd.merge(ct, dose, left_on="series", right_on="reference_ct")
+
+            elif edge==2:  # FORMS RTSTRUCT->CT on ref_ct to series
+                df_ct = pd.merge(ct, struct, left_on="series", right_on="reference_ct")
+                df_mr = pd.merge(mr, struct, left_on="series", right_on="reference_ct")
+                df_combined = pd.concat([df_ct, df_mr])
+
+            elif edge==3:  # FORMS RTSTRUCT->PET on ref_ct to series
+                df_combined = pd.merge(pet, struct, left_on="series", right_on="reference_ct")
+
+            elif edge==4:           # FORMS PET->CT on study
+                df_combined = pd.merge(ct, pet, left_on="study", right_on="study")
+
+            elif edge==5: 
+                df_combined = pd.merge(plan, dose, left_on="instance_uid", right_on="reference_pl")
+
+            else:
+                df_combined = pd.merge(struct, plan, left_on="instance_uid", right_on="reference_rs")
+
+            df_combined["edge_type"] = edge
+            df_list.append(df_combined)
+
+        df_edges = pd.concat(df_list, axis=0, ignore_index=True)
+        return df_edges
+
     def _form_edge_study(self, df, all_study, study_id):
         '''
         For a given study id forms edge table
@@ -130,14 +184,15 @@ def _form_edge_study(self, df, all_study, study_id):
         df_study = df.loc[self.df["study"] == all_study[study_id]]
         df_list = []
 
-        #Bifurcating the dataframe
+        # Split into each modality
+        plan = df_study.loc[df_study["modality"] == "RTPLAN"]
         dose = df_study.loc[df_study["modality"] == "RTDOSE"]
         struct = df_study.loc[df_study["modality"] == "RTSTRUCT"]
         ct = df_study.loc[df_study["modality"] == "CT"]
         mr = df_study.loc[df_study["modality"] == "MR"]
         pet = df_study.loc[df_study["modality"] == "PT"]
 
-        edge_types = np.arange(5)
+        edge_types = np.arange(7)
         for edge in edge_types:
             if edge==0:    # FORMS RTDOSE->RTSTRUCT, can be formed on both series and instance uid
                 df_comb1    = pd.merge(struct, dose, left_on="instance_uid", right_on="reference_rs")
@@ -157,9 +212,15 @@ def _form_edge_study(self, df, all_study, study_id):
             elif edge==3:  # FORMS RTSTRUCT->PET on ref_ct to series
                 df_combined = pd.merge(pet, struct, left_on="series", right_on="reference_ct")
 
-            else:           # FORMS PET->CT on study
+            elif edge==4:           # FORMS PET->CT on study
                 df_combined = pd.merge(ct, pet, left_on="study", right_on="study")
 
+            elif edge==5: 
+                df_combined = pd.merge(plan, dose, left_on="instance", right_on="reference_pl")
+
+            else:
+                df_combined = pd.merge(struct, plan, left_on="instance", right_on="reference_rs")
+
             df_combined["edge_type"] = edge
             df_list.append(df_combined)
 

diff --git a/imgtools/modules/dose.py b/imgtools/modules/dose.py
@@ -1,4 +1,4 @@
-import os, pathlib
+import os, pathlib, glob
 import warnings
 import copy
 
@@ -15,6 +15,7 @@
 def read_image(path):
     reader = sitk.ImageSeriesReader()
     dicom_names = reader.GetGDCMSeriesFileNames(path)
+    print(dicom_names)
     reader.SetFileNames(dicom_names)
     reader.MetaDataDictionaryArrayUpdateOn()
     reader.LoadPrivateTagsOn()
@@ -37,12 +38,15 @@ def from_dicom_rtdose(cls, path):
         '''
         Reads the data and returns the data frame and the image dosage in SITK format
         '''
-        dose = read_image(path)[:,:,:,0]
-        # dose = sitk.ReadImage(glob.glob(path + "/*"))
+        dcms = glob.glob(pathlib.Path(path, "*.dcm").as_posix())
+
+        if len(dcms) < 2:
+            dose = sitk.ReadImage(dcms[0])
+        else:
+            dose = read_image(path)[:,:,:,0]
 
         #Get the metadata
-        dcm_path = pathlib.Path(path, os.listdir(path)[0]).as_posix()
-        df = dcmread(dcm_path)
+        df = dcmread(dcms[0])
 
         #Convert to SUV
         factor = float(df.DoseGridScaling)
@@ -59,7 +63,7 @@ def resample_dose(self,
         Resamples the RTDOSE information so that it can be overlayed with CT scan. The beginning and end slices of the 
         resampled RTDOSE scan might be empty due to the interpolation
         '''
-        resampled_dose = sitk.Resample(self.img_dose, ct_scan, interpolator=sitk.sitkNearestNeighbor)
+        resampled_dose = sitk.Resample(self.img_dose, ct_scan)#, interpolator=sitk.sitkNearestNeighbor)
         return resampled_dose
 
     def show_overlay(self,

diff --git a/imgtools/modules/pet.py b/imgtools/modules/pet.py
@@ -102,7 +102,7 @@ def resample_pet(self,
         Resamples the PET scan so that it can be overlayed with CT scan. The beginning and end slices of the 
         resampled PET scan might be empty due to the interpolation
         '''
-        resampled_pt = sitk.Resample(self.img_pet, ct_scan, interpolator=sitk.sitkNearestNeighbor)
+        resampled_pt = sitk.Resample(self.img_pet, ct_scan)#, interpolator=sitk.sitkNearestNeighbor) # commented interporator due to error
         return resampled_pt
 
     def show_overlay(self,