From c53ee623a6f420e5575cbb0b2e97f3630213f52f Mon Sep 17 00:00:00 2001
From: Vishwesh <vishweshramanathan@gmail.com>
Date: Mon, 6 Dec 2021 18:14:54 -0500
Subject: [PATCH] Added test autopipeline and modalities, solved some
 autopipeline bugs, read_dicom_series and pet now supports series_id

Former-commit-id: 941669ee0574c71785fd8eb07e7dbe4e0b07b484 [formerly 03f99572453723e4a3e895acac0996997f090168]
Former-commit-id: 2d72ee4c185d5848277d8d0a52cf4ddd4e664d44
---
 .gitignore                                    |  8 +-
 .../1-1.dcm.REMOVED.git-id                    |  1 +
 .../11376/1-1.dcm.REMOVED.git-id              |  1 +
 .../Pinnacle POI-41418/1-1.dcm.REMOVED.git-id |  1 +
 imgtools/autopipeline.py                      | 30 +++---
 imgtools/io/loaders.py                        | 20 ++--
 imgtools/modules/pet.py                       | 10 +-
 imgtools/ops/ops.py                           |  2 +
 tests/test_autopipe.py                        | 92 +++++++++++++++++++
 tests/test_modalities.py                      | 57 ++++++++++++
 10 files changed, 193 insertions(+), 29 deletions(-)
 create mode 100644 examples/data_test/patient_1/08-27-1885-CA ORL FDG TEP POS TX-94629/1.000000-RTstructCTsim-PETPET-CT-87625/1-1.dcm.REMOVED.git-id
 create mode 100644 examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/11376/1-1.dcm.REMOVED.git-id
 create mode 100644 examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/Pinnacle POI-41418/1-1.dcm.REMOVED.git-id
 create mode 100644 tests/test_autopipe.py
 create mode 100644 tests/test_modalities.py

diff --git a/.gitignore b/.gitignore
index 05d5ebe..aafd421 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,12 @@ examples/data/tcia_n*
 # macOS
 .DS_Store
 
+__pycache__
+.pytest_cache
+*.egg-info
+*.csv
+*.json
+
 # Created by https://www.gitignore.io/api/emacs,python,jupyternotebooks
 # Edit at https://www.gitignore.io/?templates=emacs,python,jupyternotebooks
 
@@ -193,4 +199,4 @@ examples/process_one.py
 
 .vscode
 
-*__pycache__
+tests/temp_folder*
diff --git a/examples/data_test/patient_1/08-27-1885-CA ORL FDG TEP POS TX-94629/1.000000-RTstructCTsim-PETPET-CT-87625/1-1.dcm.REMOVED.git-id b/examples/data_test/patient_1/08-27-1885-CA ORL FDG TEP POS TX-94629/1.000000-RTstructCTsim-PETPET-CT-87625/1-1.dcm.REMOVED.git-id
new file mode 100644
index 0000000..a809ada
--- /dev/null
+++ b/examples/data_test/patient_1/08-27-1885-CA ORL FDG TEP POS TX-94629/1.000000-RTstructCTsim-PETPET-CT-87625/1-1.dcm.REMOVED.git-id	
@@ -0,0 +1 @@
+b75896dfda8ce5f47a6086c99b9099690502266a
\ No newline at end of file
diff --git a/examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/11376/1-1.dcm.REMOVED.git-id b/examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/11376/1-1.dcm.REMOVED.git-id
new file mode 100644
index 0000000..4e3805b
--- /dev/null
+++ b/examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/11376/1-1.dcm.REMOVED.git-id	
@@ -0,0 +1 @@
+cb383cee7496a5c8ce4724e64a121988c2314cab
\ No newline at end of file
diff --git a/examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/Pinnacle POI-41418/1-1.dcm.REMOVED.git-id b/examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/Pinnacle POI-41418/1-1.dcm.REMOVED.git-id
new file mode 100644
index 0000000..4a21d07
--- /dev/null
+++ b/examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/Pinnacle POI-41418/1-1.dcm.REMOVED.git-id	
@@ -0,0 +1 @@
+dd2bcf36be02340b1220b43c1f7ab4bb1180dfc0
\ No newline at end of file
diff --git a/imgtools/autopipeline.py b/imgtools/autopipeline.py
index e8a94af..722a676 100644
--- a/imgtools/autopipeline.py
+++ b/imgtools/autopipeline.py
@@ -81,7 +81,7 @@ def process_one_subject(self, subject_id):
 
         print(subject_id, " start")
         #For counting multiple connections per modality
-        counter = [0 for _ in range(len(self.output_streams))]
+        counter = {"CT":0,"RTDOSE":0,"RTSTRUCT":0,"PT":0}
         
         for i, colname in enumerate(self.output_streams):
             modality = colname.split("_")[0]
@@ -109,22 +109,22 @@ def process_one_subject(self, subject_id):
                 image = self.resample(image)
                 #Saving the output
                 self.output(subject_id, image, output_stream)
-                self.graph.loc[subject_id, f"size_{output_stream}"] = image.GetSize()
+                self.graph.loc[subject_id, f"size_{output_stream}"] = str(image.GetSize())
                 print(subject_id, " SAVED IMAGE")
             elif modality == "RTDOSE":
                 try: #For cases with no image present
-                    doses = read_results[i].resample_rt(image)
+                    doses = read_results[i].resample_dose(image)
                 except:
                     Warning("No CT image present. Returning dose image without resampling")
                     doses = read_results[i]
                 
                 # save output
-                if mult_conn:
+                if not mult_conn:
                     self.output(subject_id, doses, output_stream)
                 else:
-                    counter[i] = counter[i]+1
-                    self.output(f"{subject_id}_{counter[i]}", doses, output_stream)
-                self.graph.loc[subject_id, f"size_{output_stream}"] = doses.GetSize()
+                    counter[modality] = counter[modality]+1
+                    self.output(f"{subject_id}_{counter[modality]}", doses, output_stream)
+                self.graph.loc[subject_id, f"size_{output_stream}"] = str(doses.GetSize())
                 print(subject_id, " SAVED DOSE")
             elif modality == "RTSTRUCT":
                 #For RTSTRUCT, you need image or PT
@@ -140,12 +140,12 @@ def process_one_subject(self, subject_id):
                     raise ValueError("You need to pass a reference CT or PT/PET image to map contours to.")
                 
                 # save output
-                if mult_conn:
+                if not mult_conn:
                     self.output(subject_id, mask, output_stream)
                 else:
-                    counter[i] = counter[i] + 1
-                    self.output(f"{subject_id}_{counter[i]}", mask, output_stream)
-                self.graph.loc[subject_id, f"roi_names_{output_stream}"] = structure_set.roi_names
+                    counter[modality] = counter[i] + 1
+                    self.output(f"{subject_id}_{counter[modality]}", mask, output_stream)
+                self.graph.loc[subject_id, f"roi_names_{output_stream}"] = str(structure_set.roi_names)
 
                 print(subject_id, "SAVED MASK ON", conn_to)
             elif modality == "PT":
@@ -156,12 +156,12 @@ def process_one_subject(self, subject_id):
                     Warning("No CT image present. Returning PT/PET image without resampling.")
                     pet = read_results[i]
 
-                if mult_conn!="1":
+                if not mult_conn:
                     self.output(subject_id, pet, output_stream)
                 else:
-                    counter[i] = counter[i] + 1
-                    self.output(f"{subject_id}_{counter[i]}", pet, output_stream)
-                self.graph.loc[subject_id, f"size_{output_stream}"] = pet.GetSize()
+                    counter[modality] = counter[modality] + 1
+                    self.output(f"{subject_id}_{counter[modality]}", pet, output_stream)
+                self.graph.loc[subject_id, f"size_{output_stream}"] = str(pet.GetSize())
                 print(subject_id, " SAVED PET")
         return
 
diff --git a/imgtools/io/loaders.py b/imgtools/io/loaders.py
index 42880ae..1510a46 100644
--- a/imgtools/io/loaders.py
+++ b/imgtools/io/loaders.py
@@ -28,8 +28,8 @@ def read_header(path):
     return nrrd.read_header(path)
 
 def read_dicom_series(path: str,
-                      recursive: bool = False,
-                      series_id: Optional[str] = None) -> sitk.Image:
+                      series_id: Optional[str] = None,
+                      recursive: bool = False) -> sitk.Image:
     """Read DICOM series as SimpleITK Image.
 
     Parameters
@@ -75,19 +75,19 @@ def read_dicom_rtstruct(path):
 def read_dicom_rtdose(path):
     return Dose.from_dicom_rtdose(path)
 
-def read_dicom_pet(path):
-    return PET.from_dicom_pet(path, "SUV")
+def read_dicom_pet(path,series=None):
+    return PET.from_dicom_pet(path,series, "SUV")
 
-def read_dicom_auto(path):
+def read_dicom_auto(path,series=None):
     if path is None:
         return None
     dcms = glob.glob(os.path.join(path, "*.dcm"))
     meta = dcmread(dcms[0])
     modality = meta.Modality
     if modality == 'CT':
-        return read_dicom_series(path)
+        return read_dicom_series(path,series)
     elif modality == 'PT':
-        return read_dicom_pet(path)
+        return read_dicom_pet(path,series)
     # elif len(dcms) == 1:
     #     meta = dcmread(dcms[0])
     #     modality = meta.Modality
@@ -132,6 +132,7 @@ class ImageCSVLoader(BaseLoader):
     def __init__(self,
                  csv_path_or_dataframe,
                  colnames=[],
+                 seriesnames=[],
                  id_column=None,
                  expand_paths=False,
                  readers=[read_image]):
@@ -140,12 +141,12 @@ def __init__(self,
         self.readers = readers
 
         self.colnames = colnames
+        self.seriesnames = seriesnames
 
         if isinstance(csv_path_or_dataframe, str):
             if id_column is not None and id_column not in colnames:
                 colnames.append(id_column)
             self.paths = pd.read_csv(csv_path_or_dataframe,
-                                     usecols=colnames,
                                      index_col=id_column)
         elif isinstance(csv_path_or_dataframe, pd.DataFrame):
             self.paths = csv_path_or_dataframe
@@ -164,10 +165,11 @@ def __init__(self,
     def __getitem__(self, subject_id):
         row = self.paths.loc[subject_id]
         paths = {col: row[col] for col in self.colnames}
+        series = {col: row[col] for col in self.seriesnames}
         if self.expand_paths:
             # paths = {col: glob.glob(path)[0] for col, path in paths.items()}
             paths = {col: glob.glob(path)[0] if pd.notna(path) else None for col, path in paths.items()}
-        outputs = {col: self.readers[i](path) for i, (col, path) in enumerate(paths.items())}
+        outputs = {col: self.readers[i](path,series["series_"+("_").join(col.split("_")[1:])]) for i, (col, path) in enumerate(paths.items())}
         return self.output_tuple(**outputs)
 
     def keys(self):
diff --git a/imgtools/modules/pet.py b/imgtools/modules/pet.py
index 5eaeee6..a7ec207 100644
--- a/imgtools/modules/pet.py
+++ b/imgtools/modules/pet.py
@@ -5,10 +5,12 @@
 import SimpleITK as sitk
 import warnings
 import datetime
+from typing import Optional
 
-def read_image(path):
+
+def read_image(path:str,series_id: Optional[str]=None):
     reader = sitk.ImageSeriesReader()
-    dicom_names = reader.GetGDCMSeriesFileNames(path)
+    dicom_names = reader.GetGDCMSeriesFileNames(path,seriesID=series_id if series_id else "")
     reader.SetFileNames(dicom_names)
     reader.MetaDataDictionaryArrayUpdateOn()
     reader.LoadPrivateTagsOn()
@@ -22,7 +24,7 @@ def __init__(self, img_pet, df):
         self.df = df
     
     @classmethod
-    def from_dicom_pet(cls, path, type="SUV"):
+    def from_dicom_pet(cls, path,series_id=None,type="SUV"):
         '''
         Reads the PET scan and returns the data frame and the image dosage in SITK format
         There are two types of existing formats which has to be mentioned in the type
@@ -35,7 +37,7 @@ def from_dicom_pet(cls, path, type="SUV"):
         If there is no data on SUV/ACT then backup calculation is done based on the formula in the documentation, although, it may
         have some error.
         '''
-        pet      = read_image(path)
+        pet      = read_image(path,series_id)
         path_one = os.path.join(path,os.listdir(path)[0])
         df       = pydicom.dcmread(path_one)
         try:
diff --git a/imgtools/ops/ops.py b/imgtools/ops/ops.py
index 71f0577..bc87532 100644
--- a/imgtools/ops/ops.py
+++ b/imgtools/ops/ops.py
@@ -100,6 +100,7 @@ def __init__(self,
         self.df_combined = graph.parser(self.modalities)
         self.output_streams = [("_").join(cols.split("_")[1:]) for cols in self.df_combined.columns if cols.split("_")[0]=="folder"]
         self.column_names = [cols for cols in self.df_combined.columns if cols.split("_")[0]=="folder"]
+        self.series_names = [cols for cols in self.df_combined.columns if cols.split("_")[0]=="series"]
         
         #Initilizations for the pipeline
         for colnames in self.output_streams:
@@ -116,6 +117,7 @@ def __init__(self,
 
         loader = ImageCSVLoader(self.df_combined,
                                 colnames=self.column_names,
+                                seriesnames=self.series_names,
                                 id_column=None,
                                 expand_paths=True,
                                 readers=self.readers) 
diff --git a/tests/test_autopipe.py b/tests/test_autopipe.py
new file mode 100644
index 0000000..3664d7f
--- /dev/null
+++ b/tests/test_autopipe.py
@@ -0,0 +1,92 @@
+import os
+from posixpath import dirname
+import shutil
+import warnings
+from multiprocessing import cpu_count
+
+import numpy as np
+import SimpleITK as sitk
+import pytest
+import nrrd
+import pandas as pd
+
+from imgtools.autopipeline import AutoPipeline
+
+@pytest.fixture
+def dataset_path():
+    curr_path=("/").join(os.getcwd().split("/")[:-1])
+    input_path = curr_path+ "/examples/data_test"
+    output_path = curr_path+ "/tests/"
+    return input_path,output_path
+
+@pytest.mark.parametrize("modalities",["PT","CT,RTDOSE","CT,RTSTRUCT,RTDOSE","CT,RTSTRUCT,RTDOSE,PT"])
+def test_pipeline(dataset_path,modalities):
+    input_path,output_path = dataset_path
+    n_jobs = 2
+    output_path_mod = output_path + "temp_folder_" + ("_").join(modalities.split(","))
+    #Initialize pipeline for the current setting
+    pipeline = AutoPipeline(input_path,output_path_mod,modalities,n_jobs=n_jobs)
+    #Run for different modalities
+    comp_path = os.path.join(output_path_mod, "dataset.csv")
+    if n_jobs > 1 or n_jobs == -1:     # == Parallel Processing ==
+        pipeline.run()
+    elif n_jobs == 1:                       # == Series (Single-core) Processing ==
+        subject_ids = pipeline._get_loader_subject_ids()
+        for subject_id in subject_ids:
+            pipeline.process_one_subject(subject_id)
+    pipeline.graph.to_csv(comp_path)
+
+    #Check if the crawl and edges exist
+    crawl_path = ("/").join(input_path.split("/")[:-1]) + "/imgtools_" + input_path.split("/")[-1] + ".csv"
+    json_path =  ("/").join(input_path.split("/")[:-1]) + "/imgtools_" + input_path.split("/")[-1] + ".json"
+    edge_path = ("/").join(input_path.split("/")[:-1]) + "/imgtools_" + input_path.split("/")[-1] + "_edges.csv"
+    assert os.path.exists(crawl_path) & os.path.exists(edge_path), "this breaks because there was no crawler output"
+
+    #for the test example, there are 6 files and 4 connections
+    crawl_data = pd.read_csv(crawl_path,index_col = 0)
+    edge_data = pd.read_csv(edge_path)
+    assert (len(crawl_data)==7) & (len(edge_data)==4), "this breaks because there was some error in crawling or while making the edge table"
+
+    #Check if the dataset.csv is having the correct number of components and has all the fields
+    comp_table = pd.read_csv(comp_path)
+    assert len(comp_table)==1, "this breaks because there is some error in making components, check datagraph.parser"
+
+    #Check the nrrd files
+    if modalities=="PT":
+        path_pet = output_path_mod + "/pet/" + os.listdir(output_path_mod+"/pet")[0]
+        dicom,_ = nrrd.read(path_pet)
+        assert dicom.shape[-1] == int(crawl_data.loc[crawl_data["modality"]=="PT","instances"].values[0])
+    elif modalities=="CT,RTDOSE":
+        path_ct = output_path_mod + "/image/" + os.listdir(output_path_mod+"/image")[0]
+        path_dose = output_path_mod + "/dose/" + os.listdir(output_path_mod+"/dose")[0]
+        dicom_ct,_ = nrrd.read(path_ct)
+        dicom_dose,_ = nrrd.read(path_dose)
+        assert dicom_ct.shape == dicom_dose.shape
+    elif modalities=="CT,RTSTRUCT,RTDOSE":
+        path_ct = output_path_mod + "/image/" + os.listdir(output_path_mod+"/image")[0]
+        path_dose = output_path_mod + "/dose/" + os.listdir(output_path_mod+"/dose")[0]
+        path_str = output_path_mod + "/mask_ct/" + os.listdir(output_path_mod+"/mask_ct")[0]
+        dicom_ct,_ = nrrd.read(path_ct)
+        dicom_dose,_ = nrrd.read(path_dose)
+        dicom_str,_ = nrrd.read(path_str)
+        #ensure they are in same physical space
+        assert dicom_ct.shape == dicom_dose.shape == dicom_str.shape[1:]
+    else:
+        path_ct = output_path_mod + "/image/" + os.listdir(output_path_mod+"/image")[0]
+        path_dose = output_path_mod + "/dose/" + os.listdir(output_path_mod+"/dose")[0]
+        path_ctstr = output_path_mod + "/mask_ct/" + os.listdir(output_path_mod+"/mask_ct")[0]
+        path_ptstr = output_path_mod + "/mask_pt/" + os.listdir(output_path_mod+"/mask_pt")[0]
+        path_pet = output_path_mod + "/pet/" + os.listdir(output_path_mod+"/pet")[0]
+        dicom_ct,_ = nrrd.read(path_ct)
+        dicom_dose,_ = nrrd.read(path_dose)
+        dicom_ctstr,_ = nrrd.read(path_ctstr)
+        dicom_ptstr,_ = nrrd.read(path_ptstr)
+        dicom_pet,_ = nrrd.read(path_pet)
+        #ensure they are in same physical space
+        assert dicom_ct.shape == dicom_dose.shape == dicom_ctstr.shape[1:] == dicom_ptstr.shape[1:] == dicom_pet.shape
+        os.remove(crawl_path)
+        os.remove(json_path)
+        os.remove(edge_path)
+    shutil.rmtree(output_path_mod)
+
+
diff --git a/tests/test_modalities.py b/tests/test_modalities.py
new file mode 100644
index 0000000..928c038
--- /dev/null
+++ b/tests/test_modalities.py
@@ -0,0 +1,57 @@
+'''
+This code is for testing functioning of different modalities 
+'''
+
+
+import os
+from posixpath import dirname
+import shutil
+import warnings
+from multiprocessing import cpu_count
+
+import numpy as np
+import SimpleITK as sitk
+import pytest
+import pydicom
+
+from imgtools.io import read_dicom_auto
+from imgtools.ops import StructureSetToSegmentation, ImageAutoOutput, Resample
+from imgtools.pipeline import Pipeline
+
+@pytest.fixture
+def modalities_path():
+    path = {}
+    path["CT"] = "../examples/data_test/patient_1/08-27-1885-CA ORL FDG TEP POS TX-94629/3.000000-Merged-06362"
+    path["RTSTRUCT"] = "../examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/Pinnacle POI-41418"
+    path["RTDOSE"] = "../examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/11376"
+    path["PT"] = "../examples/data_test/patient_1/08-27-1885-CA ORL FDG TEP POS TX-94629/532790.000000-LOR-RAMLA-44600"
+    return path
+
+@pytest.mark.parametrize("modalities", ["CT", "RTSTRUCT","RTDOSE","PT"])
+def test_modalities(modalities,modalities_path):
+    path = modalities_path
+    if modalities!="RTSTRUCT":
+        #Checks for dimensions
+        img = read_dicom_auto(path["CT"])
+        dcm = pydicom.dcmread(os.path.join(path[modalities],os.listdir(path[modalities])[0])).pixel_array
+        instances = len(os.listdir(path[modalities]))
+        dicom = read_dicom_auto(path[modalities])
+        if instances>1: #For comparing CT and PT modalities
+            assert dcm.shape == (dicom.GetHeight(),dicom.GetWidth())
+            assert instances == dicom.GetDepth()
+        else: #For comparing RTDOSE modalties
+            assert dcm.shape == (dicom.GetDepth(),dicom.GetHeight(),dicom.GetWidth())
+        if modalities=="PT":
+            dicom = dicom.resample_pet(img)
+            assert dicom.GetSize()==img.GetSize()
+        if modalities=="RTDOSE":
+            dicom = dicom.resample_dose(img)
+            assert dicom.GetSize()==img.GetSize()
+    else:
+        img = read_dicom_auto(path["CT"])
+        struc = read_dicom_auto(path[modalities])
+        make_binary_mask = StructureSetToSegmentation(roi_names=[], continuous=False)
+        mask = make_binary_mask(struc, img)
+        A = sitk.GetArrayFromImage(mask)
+        assert len(A.shape)==4
+        assert A.shape[0:3]==(img.GetDepth(),img.GetHeight(),img.GetWidth())