Skip to content

Commit

Permalink
test_components, test_modalities works with new AutoPipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
skim2257 committed Jun 19, 2022
1 parent a786724 commit e51fe82
Show file tree
Hide file tree
Showing 11 changed files with 218 additions and 122 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ examples/adsf.py
data
examples/data/tcia_n*
scratch.ipynb
tests/temp

# macOS
.DS_Store
Expand Down
14 changes: 8 additions & 6 deletions imgtools/autopipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,14 @@ def __init__(self,
self.label_names = {}
self.ignore_missing_regex = ignore_missing_regex

with open(pathlib.Path(self.input_directory, "roi_names.yaml").as_posix(), "r") as f:
try:
self.label_names = yaml.safe_load(f)
except yaml.YAMLError as exc:
print(exc)
roi_path = pathlib.Path(self.input_directory, "roi_names.yaml").as_posix()

if os.path.exists(roi_path):
with open(roi_path, "r") as f:
try:
self.label_names = yaml.safe_load(f)
except yaml.YAMLError as exc:
print(exc)

if not isinstance(self.label_names, dict):
raise ValueError("roi_names.yaml must parse as a dictionary")
Expand Down Expand Up @@ -436,7 +439,6 @@ def main():
"""Print nnU-Net specific info here:
* dataset.json can be found at /path/to/dataset/json
* You can train nnU-Net by cloning /path/to/nnunet/repo and run `nnUNet_plan_and_preprocess -t taskID` to let the nnU-Net package prepare
*
"""

if __name__ == "__main__":
Expand Down
30 changes: 15 additions & 15 deletions imgtools/io/dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from genericpath import exists
import os, pathlib
import os, pathlib, ast
from typing import List, Sequence, Optional, Callable, Iterable, Dict,Tuple
from tqdm import tqdm

Expand Down Expand Up @@ -29,7 +29,7 @@ def __init__(
self.path = path

@classmethod
def load_from_nrrd(
def load_image(
cls,
path:str,
ignore_multi: bool = True,
Expand All @@ -46,37 +46,37 @@ def load_from_nrrd(
df_metadata = pd.read_csv(path_metadata,index_col=0)

for col in df_metadata.columns:
if col.startswith("folder"):
if col.startswith("output_folder"):
df_metadata[col] = df_metadata[col].apply(lambda x: pathlib.Path(os.path.split(os.path.dirname(path))[0], x).as_posix() if isinstance(x, str) else x) #input folder joined with the rel path

output_streams = [("_").join(cols.split("_")[1:]) for cols in df_metadata.columns if cols.split("_")[0] == "folder"]
output_streams = [("_").join(cols.split("_")[2:]) for cols in df_metadata.columns if cols.split("_")[0] == "output"]
imp_metadata = [cols for cols in df_metadata.columns if cols.split("_")[0] in ("metadata")]
#Ignores multiple connection to single modality
if ignore_multi:
output_streams = [items for items in output_streams if items.split("_")[-1].isnumeric()==False]
imp_metadata = [items for items in imp_metadata if items.split("_")[-1].isnumeric()==False]

#Based on the file naming convention
file_names = file_name_convention()
subject_id_list = list(df_metadata.index)
subjects = []
for subject_id in tqdm(subject_id_list):
temp = {}
for col in output_streams:
mult_conn = col.split("_")[-1].isnumeric()
metadata_name = f"metadata_{col}"
if mult_conn:
extra = col.split("_")[-1]+"_"
extension = file_names[("_").join(col.split("_")[:-1])]
if 'RTSTRUCT' in col:
filenames = ast.literal_eval(df_metadata.loc[subject_id]['metadata_RTSTRUCT_CT'])[0]
filename = filenames[0]
else:
extension = file_names[col]
extra = ""
path_mod = pathlib.Path(path,extension.split(".")[0],f"{subject_id}_{extra}{extension}.nrrd").as_posix()
#All modalities except RTSTRUCT should be of type torchIO.ScalarImage
filename = col
path_mod = pathlib.Path(path, subject_id, col, f"{filename}.nii.gz").as_posix()
print(path_mod)
#All modalities except RTSTRUCT should be of type torchIO.ScalarImage
if os.path.exists(path_mod):
if col.split("_")[0]!="RTSTRUCT":
temp[f"mod_{col}"] = tio.ScalarImage(path_mod)
else:
temp[f"mod_{col}"] = tio.LabelMap(path_mod)
path_mods = [pathlib.Path(path, subject_id, col, f"{filename}.nii.gz").as_posix() for filename in filenames]
temp[f"mod_{col}"] = tio.LabelMap(path_mods)
else:
temp[f"mod_{col}"] = None
#For including metadata
Expand All @@ -89,7 +89,7 @@ def load_from_nrrd(
#torch dataloader doesnt accept None type
temp[metadata_name] = {}
subjects.append(tio.Subject(temp))
return cls(subjects,path)
return cls(subjects, path)

@classmethod
def load_directly(
Expand Down
4 changes: 1 addition & 3 deletions imgtools/io/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
from ..utils.crawl import *
from ..utils.dicomutils import *



def read_image(path):
return sitk.ReadImage(path)

Expand Down Expand Up @@ -91,7 +89,7 @@ def read_dicom_auto(path, series=None):
modality = meta.Modality
all_modality_metadata = all_modalities_metadata(meta)
if modality == 'CT' or modality == 'MR':
dicom_series = read_dicom_series(path,series, modality=modality)
dicom_series = read_dicom_series(path,series)#, modality=modality)
if modality == 'CT':
dicom_series.metadata.update(ct_metadata(meta))
dicom_series.metadata.update(all_modality_metadata)
Expand Down
79 changes: 70 additions & 9 deletions imgtools/modules/datagraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ class DataGraph:
3) edge_type:2 RTSTRUCT(key:ref_ct) -> CT(pair: series)
4) edge_type:3 RTSTRUCT(key:ref_ct) -> PT(pair: series)
5) edge_type:4 CT(key:study) -> PT(pair: study)
6) edge_type:5 RTDOSE(key: ref_pl) -> RTPLAN(pair: instance)
7) edge_type:6 RTPLAN(key: ref_rs) -> RTSTRUCT(pair: series/instance)
Once the edge table is formed, one can query on the graph to get the desired results. For uniformity, the supported query is list of modalities to consider
For ex:
Expand Down Expand Up @@ -61,24 +63,26 @@ def form_graph(self):
df_filter.drop(columns=["reference_rs_y", "instance_uid_y"], inplace=True)
df_filter.rename(columns={"reference_rs_x":"reference_rs", "instance_uid_x":"instance_uid"}, inplace=True)

#Remove entries with no RT dose reference, for extra check, such cases are mostprobably removed in the earlier step
#Remove entries with no RTDOSE reference, for extra check, such cases are mostprobably removed in the earlier step
df_filter = df_filter.loc[~((df_filter["modality"] == "RTDOSE") & (df_filter["reference_ct"].isna()) & (df_filter["reference_rs"].isna()))]

#Get all study ids
all_study= df_filter.study.unique()
# all_study = df_filter.study.unique()
start = time.time()

#Defining Master df to store all the Edge dataframes
self.df_master = []
# self.df_master = []

for i in tqdm(range(len(all_study))):
self._form_edge_study(df_filter, all_study, i)
# for i in tqdm(range(len(all_study))):
# self._form_edge_study(df_filter, all_study, i)

# df_edge_patient = form_edge_study(df,all_study,i)

self.df_edges = self._form_edges(self.df) #pd.concat(self.df_master, axis=0, ignore_index=True)
end = time.time()
print(f"\nTotal time taken: {end - start}")

self.df_edges = pd.concat(self.df_master, axis=0, ignore_index=True)

self.df_edges.loc[self.df_edges.study_x.isna(),"study_x"] = self.df_edges.loc[self.df_edges.study_x.isna(), "study"]
#dropping some columns
self.df_edges.drop(columns=["study_y", "patient_ID_y", "series_description_y", "study_description_y", "study"],inplace=True)
Expand Down Expand Up @@ -122,6 +126,56 @@ def visualize_graph(self):
vis_path = pathlib.Path(os.path.dirname(self.edge_path),"datanet.html").as_posix()
data_net.show(vis_path)

def _form_edges(self, df):
'''
For a given study id forms edge table
'''

df_list = []

# Split into each modality
plan = df[df["modality"] == "RTPLAN"]
dose = df[df["modality"] == "RTDOSE"]
struct = df[df["modality"] == "RTSTRUCT"]
ct = df[df["modality"] == "CT"]
mr = df[df["modality"] == "MR"]
pet = df[df["modality"] == "PT"]

edge_types = np.arange(7)
for edge in edge_types:
if edge==0: # FORMS RTDOSE->RTSTRUCT, can be formed on both series and instance uid
df_comb1 = pd.merge(struct, dose, left_on="instance_uid", right_on="reference_rs")
df_comb2 = pd.merge(struct, dose, left_on="series", right_on="reference_rs")
df_combined = pd.concat([df_comb1, df_comb2])
#Cases where both series and instance_uid are the same for struct
df_combined = df_combined.drop_duplicates(subset=["instance_uid_x"])

elif edge==1: # FORMS RTDOSE->CT
df_combined = pd.merge(ct, dose, left_on="series", right_on="reference_ct")

elif edge==2: # FORMS RTSTRUCT->CT on ref_ct to series
df_ct = pd.merge(ct, struct, left_on="series", right_on="reference_ct")
df_mr = pd.merge(mr, struct, left_on="series", right_on="reference_ct")
df_combined = pd.concat([df_ct, df_mr])

elif edge==3: # FORMS RTSTRUCT->PET on ref_ct to series
df_combined = pd.merge(pet, struct, left_on="series", right_on="reference_ct")

elif edge==4: # FORMS PET->CT on study
df_combined = pd.merge(ct, pet, left_on="study", right_on="study")

elif edge==5:
df_combined = pd.merge(plan, dose, left_on="instance_uid", right_on="reference_pl")

else:
df_combined = pd.merge(struct, plan, left_on="instance_uid", right_on="reference_rs")

df_combined["edge_type"] = edge
df_list.append(df_combined)

df_edges = pd.concat(df_list, axis=0, ignore_index=True)
return df_edges

def _form_edge_study(self, df, all_study, study_id):
'''
For a given study id forms edge table
Expand All @@ -130,14 +184,15 @@ def _form_edge_study(self, df, all_study, study_id):
df_study = df.loc[self.df["study"] == all_study[study_id]]
df_list = []

#Bifurcating the dataframe
# Split into each modality
plan = df_study.loc[df_study["modality"] == "RTPLAN"]
dose = df_study.loc[df_study["modality"] == "RTDOSE"]
struct = df_study.loc[df_study["modality"] == "RTSTRUCT"]
ct = df_study.loc[df_study["modality"] == "CT"]
mr = df_study.loc[df_study["modality"] == "MR"]
pet = df_study.loc[df_study["modality"] == "PT"]

edge_types = np.arange(5)
edge_types = np.arange(7)
for edge in edge_types:
if edge==0: # FORMS RTDOSE->RTSTRUCT, can be formed on both series and instance uid
df_comb1 = pd.merge(struct, dose, left_on="instance_uid", right_on="reference_rs")
Expand All @@ -157,9 +212,15 @@ def _form_edge_study(self, df, all_study, study_id):
elif edge==3: # FORMS RTSTRUCT->PET on ref_ct to series
df_combined = pd.merge(pet, struct, left_on="series", right_on="reference_ct")

else: # FORMS PET->CT on study
elif edge==4: # FORMS PET->CT on study
df_combined = pd.merge(ct, pet, left_on="study", right_on="study")

elif edge==5:
df_combined = pd.merge(plan, dose, left_on="instance", right_on="reference_pl")

else:
df_combined = pd.merge(struct, plan, left_on="instance", right_on="reference_rs")

df_combined["edge_type"] = edge
df_list.append(df_combined)

Expand Down
16 changes: 10 additions & 6 deletions imgtools/modules/dose.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os, pathlib
import os, pathlib, glob
import warnings
import copy

Expand All @@ -15,6 +15,7 @@
def read_image(path):
reader = sitk.ImageSeriesReader()
dicom_names = reader.GetGDCMSeriesFileNames(path)
print(dicom_names)
reader.SetFileNames(dicom_names)
reader.MetaDataDictionaryArrayUpdateOn()
reader.LoadPrivateTagsOn()
Expand All @@ -37,12 +38,15 @@ def from_dicom_rtdose(cls, path):
'''
Reads the data and returns the data frame and the image dosage in SITK format
'''
dose = read_image(path)[:,:,:,0]
# dose = sitk.ReadImage(glob.glob(path + "/*"))
dcms = glob.glob(pathlib.Path(path, "*.dcm").as_posix())

if len(dcms) < 2:
dose = sitk.ReadImage(dcms[0])
else:
dose = read_image(path)[:,:,:,0]

#Get the metadata
dcm_path = pathlib.Path(path, os.listdir(path)[0]).as_posix()
df = dcmread(dcm_path)
df = dcmread(dcms[0])

#Convert to SUV
factor = float(df.DoseGridScaling)
Expand All @@ -59,7 +63,7 @@ def resample_dose(self,
Resamples the RTDOSE information so that it can be overlayed with CT scan. The beginning and end slices of the
resampled RTDOSE scan might be empty due to the interpolation
'''
resampled_dose = sitk.Resample(self.img_dose, ct_scan, interpolator=sitk.sitkNearestNeighbor)
resampled_dose = sitk.Resample(self.img_dose, ct_scan)#, interpolator=sitk.sitkNearestNeighbor)
return resampled_dose

def show_overlay(self,
Expand Down
2 changes: 1 addition & 1 deletion imgtools/modules/pet.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def resample_pet(self,
Resamples the PET scan so that it can be overlayed with CT scan. The beginning and end slices of the
resampled PET scan might be empty due to the interpolation
'''
resampled_pt = sitk.Resample(self.img_pet, ct_scan, interpolator=sitk.sitkNearestNeighbor)
resampled_pt = sitk.Resample(self.img_pet, ct_scan)#, interpolator=sitk.sitkNearestNeighbor) # commented interporator due to error
return resampled_pt

def show_overlay(self,
Expand Down
Loading

0 comments on commit e51fe82

Please sign in to comment.