Added test autopipeline and modalities, solved some autopipeline bugs…

…, read_dicom_series and pet now supports series_id Former-commit-id: 941669e [formerly 03f9957] Former-commit-id: 2d72ee4
bhklab · Dec 6, 2021 · c53ee62 · c53ee62
1 parent 9962656
commit c53ee62
Show file tree

Hide file tree

Showing 10 changed files with 193 additions and 29 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,12 @@ examples/data/tcia_n*
 # macOS
 .DS_Store
 
+__pycache__
+.pytest_cache
+*.egg-info
+*.csv
+*.json
+
 # Created by https://www.gitignore.io/api/emacs,python,jupyternotebooks
 # Edit at https://www.gitignore.io/?templates=emacs,python,jupyternotebooks
 
@@ -193,4 +199,4 @@ examples/process_one.py
 
 .vscode
 
-*__pycache__
+tests/temp_folder*
diff --git a/...CA ORL FDG TEP POS TX-94629/1.000000-RTstructCTsim-PETPET-CT-87625/1-1.dcm.REMOVED.git-id b/...CA ORL FDG TEP POS TX-94629/1.000000-RTstructCTsim-PETPET-CT-87625/1-1.dcm.REMOVED.git-id
@@ -0,0 +1 @@
+b75896dfda8ce5f47a6086c99b9099690502266a
diff --git a/...7-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/11376/1-1.dcm.REMOVED.git-id b/...7-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/11376/1-1.dcm.REMOVED.git-id
@@ -0,0 +1 @@
+cb383cee7496a5c8ce4724e64a121988c2314cab
diff --git a/...B.0OrophCBTRTID derived StudyInstanceUID.-94629/Pinnacle POI-41418/1-1.dcm.REMOVED.git-id b/...B.0OrophCBTRTID derived StudyInstanceUID.-94629/Pinnacle POI-41418/1-1.dcm.REMOVED.git-id
@@ -0,0 +1 @@
+dd2bcf36be02340b1220b43c1f7ab4bb1180dfc0
diff --git a/imgtools/autopipeline.py b/imgtools/autopipeline.py
@@ -81,7 +81,7 @@ def process_one_subject(self, subject_id):
 
         print(subject_id, " start")
         #For counting multiple connections per modality
-        counter = [0 for _ in range(len(self.output_streams))]
+        counter = {"CT":0,"RTDOSE":0,"RTSTRUCT":0,"PT":0}
 
         for i, colname in enumerate(self.output_streams):
             modality = colname.split("_")[0]
@@ -109,22 +109,22 @@ def process_one_subject(self, subject_id):
                 image = self.resample(image)
                 #Saving the output
                 self.output(subject_id, image, output_stream)
-                self.graph.loc[subject_id, f"size_{output_stream}"] = image.GetSize()
+                self.graph.loc[subject_id, f"size_{output_stream}"] = str(image.GetSize())
                 print(subject_id, " SAVED IMAGE")
             elif modality == "RTDOSE":
                 try: #For cases with no image present
-                    doses = read_results[i].resample_rt(image)
+                    doses = read_results[i].resample_dose(image)
                 except:
                     Warning("No CT image present. Returning dose image without resampling")
                     doses = read_results[i]
 
                 # save output
-                if mult_conn:
+                if not mult_conn:
                     self.output(subject_id, doses, output_stream)
                 else:
-                    counter[i] = counter[i]+1
-                    self.output(f"{subject_id}_{counter[i]}", doses, output_stream)
-                self.graph.loc[subject_id, f"size_{output_stream}"] = doses.GetSize()
+                    counter[modality] = counter[modality]+1
+                    self.output(f"{subject_id}_{counter[modality]}", doses, output_stream)
+                self.graph.loc[subject_id, f"size_{output_stream}"] = str(doses.GetSize())
                 print(subject_id, " SAVED DOSE")
             elif modality == "RTSTRUCT":
                 #For RTSTRUCT, you need image or PT
@@ -140,12 +140,12 @@ def process_one_subject(self, subject_id):
                     raise ValueError("You need to pass a reference CT or PT/PET image to map contours to.")
 
                 # save output
-                if mult_conn:
+                if not mult_conn:
                     self.output(subject_id, mask, output_stream)
                 else:
-                    counter[i] = counter[i] + 1
-                    self.output(f"{subject_id}_{counter[i]}", mask, output_stream)
-                self.graph.loc[subject_id, f"roi_names_{output_stream}"] = structure_set.roi_names
+                    counter[modality] = counter[i] + 1
+                    self.output(f"{subject_id}_{counter[modality]}", mask, output_stream)
+                self.graph.loc[subject_id, f"roi_names_{output_stream}"] = str(structure_set.roi_names)
 
                 print(subject_id, "SAVED MASK ON", conn_to)
             elif modality == "PT":
@@ -156,12 +156,12 @@ def process_one_subject(self, subject_id):
                     Warning("No CT image present. Returning PT/PET image without resampling.")
                     pet = read_results[i]
 
-                if mult_conn!="1":
+                if not mult_conn:
                     self.output(subject_id, pet, output_stream)
                 else:
-                    counter[i] = counter[i] + 1
-                    self.output(f"{subject_id}_{counter[i]}", pet, output_stream)
-                self.graph.loc[subject_id, f"size_{output_stream}"] = pet.GetSize()
+                    counter[modality] = counter[modality] + 1
+                    self.output(f"{subject_id}_{counter[modality]}", pet, output_stream)
+                self.graph.loc[subject_id, f"size_{output_stream}"] = str(pet.GetSize())
                 print(subject_id, " SAVED PET")
         return
 

diff --git a/imgtools/io/loaders.py b/imgtools/io/loaders.py
@@ -28,8 +28,8 @@ def read_header(path):
     return nrrd.read_header(path)
 
 def read_dicom_series(path: str,
-                      recursive: bool = False,
-                      series_id: Optional[str] = None) -> sitk.Image:
+                      series_id: Optional[str] = None,
+                      recursive: bool = False) -> sitk.Image:
     """Read DICOM series as SimpleITK Image.
 
     Parameters
@@ -75,19 +75,19 @@ def read_dicom_rtstruct(path):
 def read_dicom_rtdose(path):
     return Dose.from_dicom_rtdose(path)
 
-def read_dicom_pet(path):
-    return PET.from_dicom_pet(path, "SUV")
+def read_dicom_pet(path,series=None):
+    return PET.from_dicom_pet(path,series, "SUV")
 
-def read_dicom_auto(path):
+def read_dicom_auto(path,series=None):
     if path is None:
         return None
     dcms = glob.glob(os.path.join(path, "*.dcm"))
     meta = dcmread(dcms[0])
     modality = meta.Modality
     if modality == 'CT':
-        return read_dicom_series(path)
+        return read_dicom_series(path,series)
     elif modality == 'PT':
-        return read_dicom_pet(path)
+        return read_dicom_pet(path,series)
     # elif len(dcms) == 1:
     #     meta = dcmread(dcms[0])
     #     modality = meta.Modality
@@ -132,6 +132,7 @@ class ImageCSVLoader(BaseLoader):
     def __init__(self,
                  csv_path_or_dataframe,
                  colnames=[],
+                 seriesnames=[],
                  id_column=None,
                  expand_paths=False,
                  readers=[read_image]):
@@ -140,12 +141,12 @@ def __init__(self,
         self.readers = readers
 
         self.colnames = colnames
+        self.seriesnames = seriesnames
 
         if isinstance(csv_path_or_dataframe, str):
             if id_column is not None and id_column not in colnames:
                 colnames.append(id_column)
             self.paths = pd.read_csv(csv_path_or_dataframe,
-                                     usecols=colnames,
                                      index_col=id_column)
         elif isinstance(csv_path_or_dataframe, pd.DataFrame):
             self.paths = csv_path_or_dataframe
@@ -164,10 +165,11 @@ def __init__(self,
     def __getitem__(self, subject_id):
         row = self.paths.loc[subject_id]
         paths = {col: row[col] for col in self.colnames}
+        series = {col: row[col] for col in self.seriesnames}
         if self.expand_paths:
             # paths = {col: glob.glob(path)[0] for col, path in paths.items()}
             paths = {col: glob.glob(path)[0] if pd.notna(path) else None for col, path in paths.items()}
-        outputs = {col: self.readers[i](path) for i, (col, path) in enumerate(paths.items())}
+        outputs = {col: self.readers[i](path,series["series_"+("_").join(col.split("_")[1:])]) for i, (col, path) in enumerate(paths.items())}
         return self.output_tuple(**outputs)
 
     def keys(self):

diff --git a/imgtools/modules/pet.py b/imgtools/modules/pet.py
@@ -5,10 +5,12 @@
 import SimpleITK as sitk
 import warnings
 import datetime
+from typing import Optional
 
-def read_image(path):
+
+def read_image(path:str,series_id: Optional[str]=None):
     reader = sitk.ImageSeriesReader()
-    dicom_names = reader.GetGDCMSeriesFileNames(path)
+    dicom_names = reader.GetGDCMSeriesFileNames(path,seriesID=series_id if series_id else "")
     reader.SetFileNames(dicom_names)
     reader.MetaDataDictionaryArrayUpdateOn()
     reader.LoadPrivateTagsOn()
@@ -22,7 +24,7 @@ def __init__(self, img_pet, df):
         self.df = df
 
     @classmethod
-    def from_dicom_pet(cls, path, type="SUV"):
+    def from_dicom_pet(cls, path,series_id=None,type="SUV"):
         '''
         Reads the PET scan and returns the data frame and the image dosage in SITK format
         There are two types of existing formats which has to be mentioned in the type
@@ -35,7 +37,7 @@ def from_dicom_pet(cls, path, type="SUV"):
         If there is no data on SUV/ACT then backup calculation is done based on the formula in the documentation, although, it may
         have some error.
         '''
-        pet      = read_image(path)
+        pet      = read_image(path,series_id)
         path_one = os.path.join(path,os.listdir(path)[0])
         df       = pydicom.dcmread(path_one)
         try:

diff --git a/imgtools/ops/ops.py b/imgtools/ops/ops.py
@@ -100,6 +100,7 @@ def __init__(self,
         self.df_combined = graph.parser(self.modalities)
         self.output_streams = [("_").join(cols.split("_")[1:]) for cols in self.df_combined.columns if cols.split("_")[0]=="folder"]
         self.column_names = [cols for cols in self.df_combined.columns if cols.split("_")[0]=="folder"]
+        self.series_names = [cols for cols in self.df_combined.columns if cols.split("_")[0]=="series"]
 
         #Initilizations for the pipeline
         for colnames in self.output_streams:
@@ -116,6 +117,7 @@ def __init__(self,
 
         loader = ImageCSVLoader(self.df_combined,
                                 colnames=self.column_names,
+                                seriesnames=self.series_names,
                                 id_column=None,
                                 expand_paths=True,
                                 readers=self.readers) 

diff --git a/tests/test_autopipe.py b/tests/test_autopipe.py
@@ -0,0 +1,92 @@
+import os
+from posixpath import dirname
+import shutil
+import warnings
+from multiprocessing import cpu_count
+
+import numpy as np
+import SimpleITK as sitk
+import pytest
+import nrrd
+import pandas as pd
+
+from imgtools.autopipeline import AutoPipeline
+
+@pytest.fixture
+def dataset_path():
+    curr_path=("/").join(os.getcwd().split("/")[:-1])
+    input_path = curr_path+ "/examples/data_test"
+    output_path = curr_path+ "/tests/"
+    return input_path,output_path
+
+@pytest.mark.parametrize("modalities",["PT","CT,RTDOSE","CT,RTSTRUCT,RTDOSE","CT,RTSTRUCT,RTDOSE,PT"])
+def test_pipeline(dataset_path,modalities):
+    input_path,output_path = dataset_path
+    n_jobs = 2
+    output_path_mod = output_path + "temp_folder_" + ("_").join(modalities.split(","))
+    #Initialize pipeline for the current setting
+    pipeline = AutoPipeline(input_path,output_path_mod,modalities,n_jobs=n_jobs)
+    #Run for different modalities
+    comp_path = os.path.join(output_path_mod, "dataset.csv")
+    if n_jobs > 1 or n_jobs == -1:     # == Parallel Processing ==
+        pipeline.run()
+    elif n_jobs == 1:                       # == Series (Single-core) Processing ==
+        subject_ids = pipeline._get_loader_subject_ids()
+        for subject_id in subject_ids:
+            pipeline.process_one_subject(subject_id)
+    pipeline.graph.to_csv(comp_path)
+
+    #Check if the crawl and edges exist
+    crawl_path = ("/").join(input_path.split("/")[:-1]) + "/imgtools_" + input_path.split("/")[-1] + ".csv"
+    json_path =  ("/").join(input_path.split("/")[:-1]) + "/imgtools_" + input_path.split("/")[-1] + ".json"
+    edge_path = ("/").join(input_path.split("/")[:-1]) + "/imgtools_" + input_path.split("/")[-1] + "_edges.csv"
+    assert os.path.exists(crawl_path) & os.path.exists(edge_path), "this breaks because there was no crawler output"
+
+    #for the test example, there are 6 files and 4 connections
+    crawl_data = pd.read_csv(crawl_path,index_col = 0)
+    edge_data = pd.read_csv(edge_path)
+    assert (len(crawl_data)==7) & (len(edge_data)==4), "this breaks because there was some error in crawling or while making the edge table"
+
+    #Check if the dataset.csv is having the correct number of components and has all the fields
+    comp_table = pd.read_csv(comp_path)
+    assert len(comp_table)==1, "this breaks because there is some error in making components, check datagraph.parser"
+
+    #Check the nrrd files
+    if modalities=="PT":
+        path_pet = output_path_mod + "/pet/" + os.listdir(output_path_mod+"/pet")[0]
+        dicom,_ = nrrd.read(path_pet)
+        assert dicom.shape[-1] == int(crawl_data.loc[crawl_data["modality"]=="PT","instances"].values[0])
+    elif modalities=="CT,RTDOSE":
+        path_ct = output_path_mod + "/image/" + os.listdir(output_path_mod+"/image")[0]
+        path_dose = output_path_mod + "/dose/" + os.listdir(output_path_mod+"/dose")[0]
+        dicom_ct,_ = nrrd.read(path_ct)
+        dicom_dose,_ = nrrd.read(path_dose)
+        assert dicom_ct.shape == dicom_dose.shape
+    elif modalities=="CT,RTSTRUCT,RTDOSE":
+        path_ct = output_path_mod + "/image/" + os.listdir(output_path_mod+"/image")[0]
+        path_dose = output_path_mod + "/dose/" + os.listdir(output_path_mod+"/dose")[0]
+        path_str = output_path_mod + "/mask_ct/" + os.listdir(output_path_mod+"/mask_ct")[0]
+        dicom_ct,_ = nrrd.read(path_ct)
+        dicom_dose,_ = nrrd.read(path_dose)
+        dicom_str,_ = nrrd.read(path_str)
+        #ensure they are in same physical space
+        assert dicom_ct.shape == dicom_dose.shape == dicom_str.shape[1:]
+    else:
+        path_ct = output_path_mod + "/image/" + os.listdir(output_path_mod+"/image")[0]
+        path_dose = output_path_mod + "/dose/" + os.listdir(output_path_mod+"/dose")[0]
+        path_ctstr = output_path_mod + "/mask_ct/" + os.listdir(output_path_mod+"/mask_ct")[0]
+        path_ptstr = output_path_mod + "/mask_pt/" + os.listdir(output_path_mod+"/mask_pt")[0]
+        path_pet = output_path_mod + "/pet/" + os.listdir(output_path_mod+"/pet")[0]
+        dicom_ct,_ = nrrd.read(path_ct)
+        dicom_dose,_ = nrrd.read(path_dose)
+        dicom_ctstr,_ = nrrd.read(path_ctstr)
+        dicom_ptstr,_ = nrrd.read(path_ptstr)
+        dicom_pet,_ = nrrd.read(path_pet)
+        #ensure they are in same physical space
+        assert dicom_ct.shape == dicom_dose.shape == dicom_ctstr.shape[1:] == dicom_ptstr.shape[1:] == dicom_pet.shape
+        os.remove(crawl_path)
+        os.remove(json_path)
+        os.remove(edge_path)
+    shutil.rmtree(output_path_mod)
+
+
diff --git a/tests/test_modalities.py b/tests/test_modalities.py
@@ -0,0 +1,57 @@
+'''
+This code is for testing functioning of different modalities 
+'''
+
+
+import os
+from posixpath import dirname
+import shutil
+import warnings
+from multiprocessing import cpu_count
+
+import numpy as np
+import SimpleITK as sitk
+import pytest
+import pydicom
+
+from imgtools.io import read_dicom_auto
+from imgtools.ops import StructureSetToSegmentation, ImageAutoOutput, Resample
+from imgtools.pipeline import Pipeline
+
+@pytest.fixture
+def modalities_path():
+    path = {}
+    path["CT"] = "../examples/data_test/patient_1/08-27-1885-CA ORL FDG TEP POS TX-94629/3.000000-Merged-06362"
+    path["RTSTRUCT"] = "../examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/Pinnacle POI-41418"
+    path["RTDOSE"] = "../examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/11376"
+    path["PT"] = "../examples/data_test/patient_1/08-27-1885-CA ORL FDG TEP POS TX-94629/532790.000000-LOR-RAMLA-44600"
+    return path
+
+@pytest.mark.parametrize("modalities", ["CT", "RTSTRUCT","RTDOSE","PT"])
+def test_modalities(modalities,modalities_path):
+    path = modalities_path
+    if modalities!="RTSTRUCT":
+        #Checks for dimensions
+        img = read_dicom_auto(path["CT"])
+        dcm = pydicom.dcmread(os.path.join(path[modalities],os.listdir(path[modalities])[0])).pixel_array
+        instances = len(os.listdir(path[modalities]))
+        dicom = read_dicom_auto(path[modalities])
+        if instances>1: #For comparing CT and PT modalities
+            assert dcm.shape == (dicom.GetHeight(),dicom.GetWidth())
+            assert instances == dicom.GetDepth()
+        else: #For comparing RTDOSE modalties
+            assert dcm.shape == (dicom.GetDepth(),dicom.GetHeight(),dicom.GetWidth())
+        if modalities=="PT":
+            dicom = dicom.resample_pet(img)
+            assert dicom.GetSize()==img.GetSize()
+        if modalities=="RTDOSE":
+            dicom = dicom.resample_dose(img)
+            assert dicom.GetSize()==img.GetSize()
+    else:
+        img = read_dicom_auto(path["CT"])
+        struc = read_dicom_auto(path[modalities])
+        make_binary_mask = StructureSetToSegmentation(roi_names=[], continuous=False)
+        mask = make_binary_mask(struc, img)
+        A = sitk.GetArrayFromImage(mask)
+        assert len(A.shape)==4
+        assert A.shape[0:3]==(img.GetDepth(),img.GetHeight(),img.GetWidth())