Skip to content

Commit

Permalink
Added test autopipeline and modalities, solved some autopipeline bugs…
Browse files Browse the repository at this point in the history
…, read_dicom_series and pet now supports series_id

Former-commit-id: 03f9957
  • Loading branch information
Vishwesh4 committed Dec 6, 2021
1 parent bd4d2d1 commit 941669e
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 29 deletions.
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ examples/data/tcia_n*
# macOS
.DS_Store

__pycache__
.pytest_cache
*.egg-info
*.csv
*.json

# Created by https://www.gitignore.io/api/emacs,python,jupyternotebooks
# Edit at https://www.gitignore.io/?templates=emacs,python,jupyternotebooks

Expand Down Expand Up @@ -193,4 +199,4 @@ examples/process_one.py

.vscode

*__pycache__
tests/temp_folder*
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
b75896dfda8ce5f47a6086c99b9099690502266a
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dd2bcf36be02340b1220b43c1f7ab4bb1180dfc0
30 changes: 15 additions & 15 deletions imgtools/autopipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def process_one_subject(self, subject_id):

print(subject_id, " start")
#For counting multiple connections per modality
counter = [0 for _ in range(len(self.output_streams))]
counter = {"CT":0,"RTDOSE":0,"RTSTRUCT":0,"PT":0}

for i, colname in enumerate(self.output_streams):
modality = colname.split("_")[0]
Expand Down Expand Up @@ -109,22 +109,22 @@ def process_one_subject(self, subject_id):
image = self.resample(image)
#Saving the output
self.output(subject_id, image, output_stream)
self.graph.loc[subject_id, f"size_{output_stream}"] = image.GetSize()
self.graph.loc[subject_id, f"size_{output_stream}"] = str(image.GetSize())
print(subject_id, " SAVED IMAGE")
elif modality == "RTDOSE":
try: #For cases with no image present
doses = read_results[i].resample_rt(image)
doses = read_results[i].resample_dose(image)
except:
Warning("No CT image present. Returning dose image without resampling")
doses = read_results[i]

# save output
if mult_conn:
if not mult_conn:
self.output(subject_id, doses, output_stream)
else:
counter[i] = counter[i]+1
self.output(f"{subject_id}_{counter[i]}", doses, output_stream)
self.graph.loc[subject_id, f"size_{output_stream}"] = doses.GetSize()
counter[modality] = counter[modality]+1
self.output(f"{subject_id}_{counter[modality]}", doses, output_stream)
self.graph.loc[subject_id, f"size_{output_stream}"] = str(doses.GetSize())
print(subject_id, " SAVED DOSE")
elif modality == "RTSTRUCT":
#For RTSTRUCT, you need image or PT
Expand All @@ -140,12 +140,12 @@ def process_one_subject(self, subject_id):
raise ValueError("You need to pass a reference CT or PT/PET image to map contours to.")

# save output
if mult_conn:
if not mult_conn:
self.output(subject_id, mask, output_stream)
else:
counter[i] = counter[i] + 1
self.output(f"{subject_id}_{counter[i]}", mask, output_stream)
self.graph.loc[subject_id, f"roi_names_{output_stream}"] = structure_set.roi_names
counter[modality] = counter[i] + 1
self.output(f"{subject_id}_{counter[modality]}", mask, output_stream)
self.graph.loc[subject_id, f"roi_names_{output_stream}"] = str(structure_set.roi_names)

print(subject_id, "SAVED MASK ON", conn_to)
elif modality == "PT":
Expand All @@ -156,12 +156,12 @@ def process_one_subject(self, subject_id):
Warning("No CT image present. Returning PT/PET image without resampling.")
pet = read_results[i]

if mult_conn!="1":
if not mult_conn:
self.output(subject_id, pet, output_stream)
else:
counter[i] = counter[i] + 1
self.output(f"{subject_id}_{counter[i]}", pet, output_stream)
self.graph.loc[subject_id, f"size_{output_stream}"] = pet.GetSize()
counter[modality] = counter[modality] + 1
self.output(f"{subject_id}_{counter[modality]}", pet, output_stream)
self.graph.loc[subject_id, f"size_{output_stream}"] = str(pet.GetSize())
print(subject_id, " SAVED PET")
return

Expand Down
20 changes: 11 additions & 9 deletions imgtools/io/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def read_header(path):
return nrrd.read_header(path)

def read_dicom_series(path: str,
recursive: bool = False,
series_id: Optional[str] = None) -> sitk.Image:
series_id: Optional[str] = None,
recursive: bool = False) -> sitk.Image:
"""Read DICOM series as SimpleITK Image.
Parameters
Expand Down Expand Up @@ -75,19 +75,19 @@ def read_dicom_rtstruct(path):
def read_dicom_rtdose(path):
return Dose.from_dicom_rtdose(path)

def read_dicom_pet(path):
return PET.from_dicom_pet(path, "SUV")
def read_dicom_pet(path,series=None):
return PET.from_dicom_pet(path,series, "SUV")

def read_dicom_auto(path):
def read_dicom_auto(path,series=None):
if path is None:
return None
dcms = glob.glob(os.path.join(path, "*.dcm"))
meta = dcmread(dcms[0])
modality = meta.Modality
if modality == 'CT':
return read_dicom_series(path)
return read_dicom_series(path,series)
elif modality == 'PT':
return read_dicom_pet(path)
return read_dicom_pet(path,series)
# elif len(dcms) == 1:
# meta = dcmread(dcms[0])
# modality = meta.Modality
Expand Down Expand Up @@ -132,6 +132,7 @@ class ImageCSVLoader(BaseLoader):
def __init__(self,
csv_path_or_dataframe,
colnames=[],
seriesnames=[],
id_column=None,
expand_paths=False,
readers=[read_image]):
Expand All @@ -140,12 +141,12 @@ def __init__(self,
self.readers = readers

self.colnames = colnames
self.seriesnames = seriesnames

if isinstance(csv_path_or_dataframe, str):
if id_column is not None and id_column not in colnames:
colnames.append(id_column)
self.paths = pd.read_csv(csv_path_or_dataframe,
usecols=colnames,
index_col=id_column)
elif isinstance(csv_path_or_dataframe, pd.DataFrame):
self.paths = csv_path_or_dataframe
Expand All @@ -164,10 +165,11 @@ def __init__(self,
def __getitem__(self, subject_id):
row = self.paths.loc[subject_id]
paths = {col: row[col] for col in self.colnames}
series = {col: row[col] for col in self.seriesnames}
if self.expand_paths:
# paths = {col: glob.glob(path)[0] for col, path in paths.items()}
paths = {col: glob.glob(path)[0] if pd.notna(path) else None for col, path in paths.items()}
outputs = {col: self.readers[i](path) for i, (col, path) in enumerate(paths.items())}
outputs = {col: self.readers[i](path,series["series_"+("_").join(col.split("_")[1:])]) for i, (col, path) in enumerate(paths.items())}
return self.output_tuple(**outputs)

def keys(self):
Expand Down
10 changes: 6 additions & 4 deletions imgtools/modules/pet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
import SimpleITK as sitk
import warnings
import datetime
from typing import Optional

def read_image(path):

def read_image(path:str,series_id: Optional[str]=None):
reader = sitk.ImageSeriesReader()
dicom_names = reader.GetGDCMSeriesFileNames(path)
dicom_names = reader.GetGDCMSeriesFileNames(path,seriesID=series_id if series_id else "")
reader.SetFileNames(dicom_names)
reader.MetaDataDictionaryArrayUpdateOn()
reader.LoadPrivateTagsOn()
Expand All @@ -22,7 +24,7 @@ def __init__(self, img_pet, df):
self.df = df

@classmethod
def from_dicom_pet(cls, path, type="SUV"):
def from_dicom_pet(cls, path,series_id=None,type="SUV"):
'''
Reads the PET scan and returns the data frame and the image dosage in SITK format
There are two types of existing formats which has to be mentioned in the type
Expand All @@ -35,7 +37,7 @@ def from_dicom_pet(cls, path, type="SUV"):
If there is no data on SUV/ACT then backup calculation is done based on the formula in the documentation, although, it may
have some error.
'''
pet = read_image(path)
pet = read_image(path,series_id)
path_one = os.path.join(path,os.listdir(path)[0])
df = pydicom.dcmread(path_one)
try:
Expand Down
2 changes: 2 additions & 0 deletions imgtools/ops/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def __init__(self,
self.df_combined = graph.parser(self.modalities)
self.output_streams = [("_").join(cols.split("_")[1:]) for cols in self.df_combined.columns if cols.split("_")[0]=="folder"]
self.column_names = [cols for cols in self.df_combined.columns if cols.split("_")[0]=="folder"]
self.series_names = [cols for cols in self.df_combined.columns if cols.split("_")[0]=="series"]

#Initilizations for the pipeline
for colnames in self.output_streams:
Expand All @@ -116,6 +117,7 @@ def __init__(self,

loader = ImageCSVLoader(self.df_combined,
colnames=self.column_names,
seriesnames=self.series_names,
id_column=None,
expand_paths=True,
readers=self.readers)
Expand Down
92 changes: 92 additions & 0 deletions tests/test_autopipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import os
from posixpath import dirname
import shutil
import warnings
from multiprocessing import cpu_count

import numpy as np
import SimpleITK as sitk
import pytest
import nrrd
import pandas as pd

from imgtools.autopipeline import AutoPipeline

@pytest.fixture
def dataset_path():
curr_path=("/").join(os.getcwd().split("/")[:-1])
input_path = curr_path+ "/examples/data_test"
output_path = curr_path+ "/tests/"
return input_path,output_path

@pytest.mark.parametrize("modalities",["PT","CT,RTDOSE","CT,RTSTRUCT,RTDOSE","CT,RTSTRUCT,RTDOSE,PT"])
def test_pipeline(dataset_path,modalities):
input_path,output_path = dataset_path
n_jobs = 2
output_path_mod = output_path + "temp_folder_" + ("_").join(modalities.split(","))
#Initialize pipeline for the current setting
pipeline = AutoPipeline(input_path,output_path_mod,modalities,n_jobs=n_jobs)
#Run for different modalities
comp_path = os.path.join(output_path_mod, "dataset.csv")
if n_jobs > 1 or n_jobs == -1: # == Parallel Processing ==
pipeline.run()
elif n_jobs == 1: # == Series (Single-core) Processing ==
subject_ids = pipeline._get_loader_subject_ids()
for subject_id in subject_ids:
pipeline.process_one_subject(subject_id)
pipeline.graph.to_csv(comp_path)

#Check if the crawl and edges exist
crawl_path = ("/").join(input_path.split("/")[:-1]) + "/imgtools_" + input_path.split("/")[-1] + ".csv"
json_path = ("/").join(input_path.split("/")[:-1]) + "/imgtools_" + input_path.split("/")[-1] + ".json"
edge_path = ("/").join(input_path.split("/")[:-1]) + "/imgtools_" + input_path.split("/")[-1] + "_edges.csv"
assert os.path.exists(crawl_path) & os.path.exists(edge_path), "this breaks because there was no crawler output"

#for the test example, there are 6 files and 4 connections
crawl_data = pd.read_csv(crawl_path,index_col = 0)
edge_data = pd.read_csv(edge_path)
assert (len(crawl_data)==7) & (len(edge_data)==4), "this breaks because there was some error in crawling or while making the edge table"

#Check if the dataset.csv is having the correct number of components and has all the fields
comp_table = pd.read_csv(comp_path)
assert len(comp_table)==1, "this breaks because there is some error in making components, check datagraph.parser"

#Check the nrrd files
if modalities=="PT":
path_pet = output_path_mod + "/pet/" + os.listdir(output_path_mod+"/pet")[0]
dicom,_ = nrrd.read(path_pet)
assert dicom.shape[-1] == int(crawl_data.loc[crawl_data["modality"]=="PT","instances"].values[0])
elif modalities=="CT,RTDOSE":
path_ct = output_path_mod + "/image/" + os.listdir(output_path_mod+"/image")[0]
path_dose = output_path_mod + "/dose/" + os.listdir(output_path_mod+"/dose")[0]
dicom_ct,_ = nrrd.read(path_ct)
dicom_dose,_ = nrrd.read(path_dose)
assert dicom_ct.shape == dicom_dose.shape
elif modalities=="CT,RTSTRUCT,RTDOSE":
path_ct = output_path_mod + "/image/" + os.listdir(output_path_mod+"/image")[0]
path_dose = output_path_mod + "/dose/" + os.listdir(output_path_mod+"/dose")[0]
path_str = output_path_mod + "/mask_ct/" + os.listdir(output_path_mod+"/mask_ct")[0]
dicom_ct,_ = nrrd.read(path_ct)
dicom_dose,_ = nrrd.read(path_dose)
dicom_str,_ = nrrd.read(path_str)
#ensure they are in same physical space
assert dicom_ct.shape == dicom_dose.shape == dicom_str.shape[1:]
else:
path_ct = output_path_mod + "/image/" + os.listdir(output_path_mod+"/image")[0]
path_dose = output_path_mod + "/dose/" + os.listdir(output_path_mod+"/dose")[0]
path_ctstr = output_path_mod + "/mask_ct/" + os.listdir(output_path_mod+"/mask_ct")[0]
path_ptstr = output_path_mod + "/mask_pt/" + os.listdir(output_path_mod+"/mask_pt")[0]
path_pet = output_path_mod + "/pet/" + os.listdir(output_path_mod+"/pet")[0]
dicom_ct,_ = nrrd.read(path_ct)
dicom_dose,_ = nrrd.read(path_dose)
dicom_ctstr,_ = nrrd.read(path_ctstr)
dicom_ptstr,_ = nrrd.read(path_ptstr)
dicom_pet,_ = nrrd.read(path_pet)
#ensure they are in same physical space
assert dicom_ct.shape == dicom_dose.shape == dicom_ctstr.shape[1:] == dicom_ptstr.shape[1:] == dicom_pet.shape
os.remove(crawl_path)
os.remove(json_path)
os.remove(edge_path)
shutil.rmtree(output_path_mod)


57 changes: 57 additions & 0 deletions tests/test_modalities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
'''
This code is for testing functioning of different modalities
'''


import os
from posixpath import dirname
import shutil
import warnings
from multiprocessing import cpu_count

import numpy as np
import SimpleITK as sitk
import pytest
import pydicom

from imgtools.io import read_dicom_auto
from imgtools.ops import StructureSetToSegmentation, ImageAutoOutput, Resample
from imgtools.pipeline import Pipeline

@pytest.fixture
def modalities_path():
path = {}
path["CT"] = "../examples/data_test/patient_1/08-27-1885-CA ORL FDG TEP POS TX-94629/3.000000-Merged-06362"
path["RTSTRUCT"] = "../examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/Pinnacle POI-41418"
path["RTDOSE"] = "../examples/data_test/patient_1/08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/11376"
path["PT"] = "../examples/data_test/patient_1/08-27-1885-CA ORL FDG TEP POS TX-94629/532790.000000-LOR-RAMLA-44600"
return path

@pytest.mark.parametrize("modalities", ["CT", "RTSTRUCT","RTDOSE","PT"])
def test_modalities(modalities,modalities_path):
path = modalities_path
if modalities!="RTSTRUCT":
#Checks for dimensions
img = read_dicom_auto(path["CT"])
dcm = pydicom.dcmread(os.path.join(path[modalities],os.listdir(path[modalities])[0])).pixel_array
instances = len(os.listdir(path[modalities]))
dicom = read_dicom_auto(path[modalities])
if instances>1: #For comparing CT and PT modalities
assert dcm.shape == (dicom.GetHeight(),dicom.GetWidth())
assert instances == dicom.GetDepth()
else: #For comparing RTDOSE modalties
assert dcm.shape == (dicom.GetDepth(),dicom.GetHeight(),dicom.GetWidth())
if modalities=="PT":
dicom = dicom.resample_pet(img)
assert dicom.GetSize()==img.GetSize()
if modalities=="RTDOSE":
dicom = dicom.resample_dose(img)
assert dicom.GetSize()==img.GetSize()
else:
img = read_dicom_auto(path["CT"])
struc = read_dicom_auto(path[modalities])
make_binary_mask = StructureSetToSegmentation(roi_names=[], continuous=False)
mask = make_binary_mask(struc, img)
A = sitk.GetArrayFromImage(mask)
assert len(A.shape)==4
assert A.shape[0:3]==(img.GetDepth(),img.GetHeight(),img.GetWidth())

0 comments on commit 941669e

Please sign in to comment.