preliminary MRI functionality (MR-RTSTRUCT pairs)

bhklab · Jan 18, 2022 · 3c01806 · 3c01806
1 parent 0f04145
commit 3c01806
Show file tree

Hide file tree

Showing 6 changed files with 58 additions and 173 deletions.
diff --git a/imgtools/autopipeline.py b/imgtools/autopipeline.py
@@ -94,19 +94,19 @@ def process_one_subject(self, subject_id):
         for i, colname in enumerate(self.output_streams):
             modality = colname.split("_")[0]
 
-            #Taking modality pairs if it exists till _{num}
+            # Taking modality pairs if it exists till _{num}
             output_stream = ("_").join([item for item in colname.split("_") if item.isnumeric()==False])
 
-            #If there are multiple connections existing, multiple connections means two modalities connected to one modality. They end with _1
+            # If there are multiple connections existing, multiple connections means two modalities connected to one modality. They end with _1
             mult_conn = colname.split("_")[-1].isnumeric()
             num = colname.split("_")[-1]
 
             print(output_stream)
 
             if read_results[i] is None:
-                print("The subject id: {} has no {}".format(subject_id,colname))
+                print("The subject id: {} has no {}".format(subject_id, colname))
                 pass
-            elif modality == "CT":
+            elif modality == "CT" or modality == 'MR':
                 image = read_results[i]
                 if len(image.GetSize()) == 4:
                     assert image.GetSize()[-1] == 1, f"There is more than one volume in this CT file for {subject_id}."
@@ -142,7 +142,7 @@ def process_one_subject(self, subject_id):
                 conn_to = output_stream.split("_")[-1]
 
                 # make_binary_mask relative to ct/pet
-                if conn_to == "CT":
+                if conn_to == "CT" or conn_to == "MR":
                     mask = self.make_binary_mask(structure_set, image)
                 elif conn_to == "PT":
                     mask = self.make_binary_mask(structure_set, pet)
@@ -178,7 +178,7 @@ def process_one_subject(self, subject_id):
         return 
 
     def save_data(self):
-        files = glob.glob(os.path.join(self.output_directory,".temp","*.pkl"))
+        files = glob.glob(os.path.join(self.output_directory, ".temp", "*.pkl"))
         for file in files:
             filename = pathlib.Path(file).name
             subject_id = os.path.splitext(filename)[0]

diff --git a/imgtools/io/common.py b/imgtools/io/common.py
@@ -41,11 +41,13 @@ def file_name_convention() -> Dict:
     This function returns the file name taxonomy which is used by ImageAutoOutput and Dataset class
     """
     file_name_convention = {"CT": "image",
-                          "RTDOSE_CT": "dose", 
-                          "RTSTRUCT_CT": "mask_ct.seg", 
-                          "RTSTRUCT_PT": "mask_pt.seg", 
-                          "PT_CT": "pet", 
-                          "PT": "pet", 
-                          "RTDOSE": "dose", 
-                          "RTSTRUCT": "mask.seg"}
+                            "MR": "image",
+                            "RTDOSE_CT": "dose", 
+                            "RTSTRUCT_CT": "mask_ct.seg", 
+                            "RTSTRUCT_MR": "mask_mr.seg", 
+                            "RTSTRUCT_PT": "mask_pt.seg", 
+                            "PT_CT": "pet", 
+                            "PT": "pet", 
+                            "RTDOSE": "dose", 
+                            "RTSTRUCT": "mask.seg"}
     return file_name_convention
diff --git a/imgtools/io/loaders.py b/imgtools/io/loaders.py
@@ -16,6 +16,7 @@
 from ..modules import StructureSet
 from ..modules import Dose
 from ..modules import PET
+from ..utils.crawl import *
 
 
 
@@ -76,13 +77,13 @@ def read_dicom_rtdose(path):
 def read_dicom_pet(path,series=None):
     return PET.from_dicom_pet(path,series, "SUV")
 
-def read_dicom_auto(path,series=None):
+def read_dicom_auto(path, series=None):
     if path is None:
         return None
     dcms = glob.glob(os.path.join(path, "*.dcm"))
     meta = dcmread(dcms[0])
     modality = meta.Modality
-    if modality == 'CT':
+    if modality == 'CT' or modality == 'MR':
         return read_dicom_series(path,series)
     elif modality == 'PT':
         return read_dicom_pet(path,series)
@@ -176,131 +177,6 @@ def keys(self):
     def items(self):
         return ((k, self[k]) for k in self.keys())
 
-class ImageAutoLoader(BaseLoader):
-    def __init__(self, 
-                 path: str, 
-                 modality: List[str] = ['CT']):
-        self.top         = path
-        self.modality    = modality
-        self.database_df = self.crawl(path)
-
-        # sort by modality
-        self.rtstruct    = self.database_df[self.database_df['modality'] == 'RTSTRUCT']
-        self.rtdose      = self.database_df[self.database_df['modality'] == 'RTDOSE']
-        self.pet         = self.database_df[self.database_df['modality'] == 'PT']
-
-    @staticmethod
-    def crawl_one(folder):
-        database = {}
-        for path, directories, files in os.walk(folder):
-            # find dicoms
-            dicoms = glob.glob(os.path.join(path, "*.dcm"))
-
-            # instance (slice) information
-            for dcm in dicoms:
-                meta = dcmread(dcm)
-                patient  = meta.PatientID
-                study    = meta.StudyInstanceUID
-                series   = meta.SeriesInstanceUID
-                instance = meta.SOPInstanceUID
-
-                try:
-                    reference = meta.FrameOfReferenceUID
-                except:
-                    try:
-                        reference = meta.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID
-                    except:
-                        reference = ""
-
-                try:
-                    study_description = meta.StudyDescription
-                except:
-                    study_description = ""
-
-                try:
-                    series_description = meta.SeriesDescription
-                except:
-                    series_description = ""
-
-                if patient not in database:
-                    database[patient] = {}
-                if study not in database[patient]:
-                    database[patient][study] = {'description': study_description}
-                if series not in database[patient][study]:
-                    database[patient][study][series] = {'instances': 0,
-                                                    'modality': meta.Modality,
-                                                    'description': series_description,
-                                                    'reference': reference,
-                                                    'folder': path}
-                database[patient][study][series]['instances'] += 1
-
-        return database
-
-    @staticmethod
-    def to_df(database_dict):
-        df = pd.DataFrame(columns=['patient_ID', 'study', 'study_description', 'series', 'series_description', 'modality', 'instances', 'folder'])
-        for pat in database_dict:
-            for study in database_dict[pat]:
-                for series in database_dict[pat][study]:
-                    if series != 'description':
-                        df = df.append({'patient_ID': pat,
-                                        'study': study,
-                                        'study_description': database_dict[pat][study]['description'],
-                                        'series': series,
-                                        'series_description': database_dict[pat][study][series]['description'],
-                                        'modality': database_dict[pat][study][series]['modality'],
-                                        'instances': database_dict[pat][study][series]['instances'],
-                                        'reference': database_dict[pat][study][series]['reference'],
-                                        'folder': database_dict[pat][study][series]['folder']}, ignore_index=True)
-        return df
-
-    def crawl(self, top, n_jobs=1):
-        database_list = []
-        folders = glob.glob(os.path.join(top, "*"))
-
-        # crawl folder-by-folder
-        if n_jobs == 1:
-            for n, folder in enumerate(tqdm(folders)):
-                database_list.append(self.crawl_one(folder))
-        else:
-            database_list = Parallel(n_jobs=n_jobs)(delayed(self.crawl_one)(os.path.join(top, folder)) for folder in tqdm(folders))
-
-        # convert list to dictionary
-        database_dict = {}
-        for db in database_list:
-            for key in db:
-                database_dict[key] = db[key]
-
-        # save one level above imaging folders
-        parent, dataset  = os.path.split(top)
-
-        # save as json
-        with open(os.path.join(parent, f'imgtools_{dataset}.json'), 'w') as f:
-            json.dump(database_dict, f, indent=4)
-
-        # save as dataframe
-        df = self.to_df(database_dict)
-        df_path = os.path.join(parent, f'imgtools_{dataset}.csv')
-        df.to_csv(df_path)
-
-        print(f"imgtools dataset {dataset} crawling complete. You can find the spreadsheet at: {df_path}")
-
-        return self.filter(df)
-
-
-    def __getitem__(self, subject_id):
-        row = self.df['patient_ID' == subject_id]
-        paths = {col: row[col] for col in self.colnames}
-        if self.expand_paths:
-            paths = {col: glob.glob(path)[0] for col, path in paths.items()}
-        outputs = {col: self.readers[i](path) for i, (col, path) in enumerate(paths.items())}
-        return self.output_tuple(**outputs)
-
-    def keys(self):
-        return list(self.paths.index)
-
-    def items(self):
-        return ((k, self[k]) for k in self.keys())
 
 class ImageFileLoader(BaseLoader):
     def __init__(self,