-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5e93381
commit 971e8d6
Showing
5 changed files
with
325 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import os | ||
from argparse import ArgumentParser | ||
|
||
from imgtools.io import (ImageFileLoader, ImageFileWriter, | ||
read_dicom_rtstruct, read_dicom_series, read_dicom_rtdose, read_dicom_pet) | ||
from imgtools.ops import StructureSetToSegmentation, ImageFileInput, ImageFileOutput, Resample | ||
from imgtools.pipeline import Pipeline | ||
|
||
class samplePipeline(Pipeline): | ||
def __init__(self, | ||
input_directory, | ||
output_directory, | ||
spacing, | ||
n_jobs): | ||
#i think that it was pretty clear from the sample notebook that we | ||
#need to inheret the Pipeline object as a parent for any pipeline we use | ||
#maybe we can make that even more clear in the docstring but it should | ||
#be fine as it is rn | ||
|
||
#what is the default n_jobs? | ||
super().__init__(n_jobs=n_jobs) | ||
|
||
self.input_directory = input_directory | ||
self.output_directory = output_directory | ||
self.spacing = spacing | ||
self.image_input = ImageFileInput( | ||
self.input_directory, # where to look for the images | ||
get_subject_id_from="subject_directory", # how to extract the subject ID, 'subject_directory' means use the name of the subject directory | ||
subdir_path="*/NA-*", | ||
# whether the images are stored in a subdirectory of the subject directory (also accepts glob patterns) | ||
reader=read_dicom_series # the function used to read individual images | ||
) | ||
self.structure_set_input = ImageFileInput( | ||
self.input_directory, | ||
get_subject_id_from="subject_directory", | ||
subdir_path="*/1.000000-ARIA RadOnc Structure Sets-*/1-1.dcm", | ||
reader=read_dicom_rtstruct | ||
) | ||
|
||
self.make_binary_mask = StructureSetToSegmentation(roi_names="GTV.*")#"GTV") | ||
self.image_output = ImageFileOutput( | ||
os.path.join(self.output_directory, "images"), # where to save the processed images | ||
filename_format="{subject_id}_image.nrrd", # the filename template, {subject_id} will be replaced by each subject's ID at runtime | ||
create_dirs=True, # whether to create directories that don't exists already | ||
compress=True # enable compression for NRRD format | ||
) | ||
self.mask_output = ImageFileOutput( | ||
os.path.join(self.output_directory, "masks"), | ||
filename_format="{subject_id}_mask.nrrd", | ||
create_dirs=True, | ||
compress=True | ||
) | ||
def process_one_subject(self, subject_id): | ||
image = self.image_input(subject_id) | ||
structure_set = self.structure_set_input(subject_id) | ||
# note that the binary mask can be generated with correct spacing using | ||
# the resampled image, eliminating the need to resample it separately | ||
|
||
print(structure_set.roi_names) | ||
mask = self.make_binary_mask(structure_set, image) | ||
self.image_output(subject_id, image) | ||
self.mask_output(subject_id, mask) | ||
|
||
if __name__ == "__main__": | ||
pipeline = samplePipeline( | ||
input_directory="C:/Users/qukev/BHKLAB/dataset/manifest-1598890146597/NSCLC-Radiomics-Interobserver1", | ||
output_directory="C:/Users/qukev/BHKLAB/output", | ||
spacing=(1.,1.,0.), | ||
n_jobs=1) | ||
# pipeline.run() | ||
subject_ids = pipeline._get_loader_subject_ids() | ||
for subject_id in subject_ids: | ||
pipeline.process_one_subject(subject_id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from imgtools.autopipeline import AutoPipeline | ||
|
||
if __name__ == "__main__": | ||
pipeline = AutoPipeline(input_directory="C:/Users/qukev/BHKLAB/dataset/manifest-1598890146597/NSCLC-Radiomics-Interobserver1", | ||
output_directory="C:/Users/qukev/BHKLAB/autopipelineoutput", | ||
visualize=True) | ||
|
||
print(f'starting Pipeline...') | ||
pipeline.run() | ||
|
||
|
||
print(f'finished Pipeline!') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
import os, pathlib | ||
import shutil | ||
import glob | ||
import pickle | ||
|
||
from argparse import ArgumentParser | ||
import SimpleITK as sitk | ||
|
||
from imgtools.ops import StructureSetToSegmentation, ImageAutoInput, ImageAutoOutput, Resample | ||
from imgtools.pipeline import Pipeline | ||
from joblib import Parallel, delayed | ||
|
||
############################################################### | ||
# Example usage: | ||
# python radcure_simple.py ./data/RADCURE/data ./RADCURE_output | ||
############################################################### | ||
|
||
|
||
class AutoPipeline(Pipeline): | ||
"""Example processing pipeline for the RADCURE dataset. | ||
This pipeline loads the CT images and structure sets, re-samples the images, | ||
and draws the GTV contour using the resampled image. | ||
""" | ||
|
||
def __init__(self, | ||
input_directory, | ||
output_directory, | ||
modalities="CT", | ||
spacing=(1., 1., 0.), | ||
n_jobs=-1, | ||
visualize=False, | ||
missing_strategy="drop", | ||
show_progress=False, | ||
warn_on_error=False): | ||
|
||
super().__init__( | ||
n_jobs=n_jobs, | ||
missing_strategy=missing_strategy, | ||
show_progress=show_progress, | ||
warn_on_error=warn_on_error) | ||
|
||
# pipeline configuration | ||
self.input_directory = input_directory | ||
self.output_directory = output_directory | ||
self.spacing = spacing | ||
self.existing = [None] #self.existing_patients() | ||
|
||
#input operations | ||
self.input = ImageAutoInput(input_directory, modalities, n_jobs, visualize) | ||
|
||
self.output_df_path = os.path.join(self.output_directory, "dataset.csv") | ||
#Output component table | ||
self.output_df = self.input.df_combined | ||
#Name of the important columns which needs to be saved | ||
self.output_streams = self.input.output_streams | ||
|
||
# image processing ops | ||
self.resample = Resample(spacing=self.spacing) | ||
self.make_binary_mask = StructureSetToSegmentation(roi_names=[], continuous=False) | ||
|
||
# output ops | ||
self.output = ImageAutoOutput(self.output_directory, self.output_streams) | ||
|
||
#Make a directory | ||
if not os.path.exists(os.path.join(self.output_directory,".temp")): | ||
os.mkdir(os.path.join(self.output_directory,".temp")) | ||
|
||
|
||
def process_one_subject(self, subject_id): | ||
"""Define the processing operations for one subject. | ||
This method must be defined for all pipelines. It is used to define | ||
the preprocessing steps for a single subject (note: that might mean | ||
multiple images, structures, etc.). During pipeline execution, this | ||
method will receive one argument, subject_id, which can be used to | ||
retrieve inputs and save outputs. | ||
Parameters | ||
---------- | ||
subject_id : str | ||
The ID of subject to process | ||
""" | ||
#Check if the subject_id has already been processed | ||
if os.path.exists(os.path.join(self.output_directory,".temp",f'temp_{subject_id}.pkl')): | ||
print(f"{subject_id} already processed") | ||
return | ||
|
||
print("Processing:", subject_id) | ||
|
||
read_results = self.input(subject_id) | ||
print(read_results) | ||
|
||
print(subject_id, " start") | ||
|
||
metadata = {} | ||
for i, colname in enumerate(self.output_streams): | ||
modality = colname.split("_")[0] | ||
|
||
# Taking modality pairs if it exists till _{num} | ||
output_stream = ("_").join([item for item in colname.split("_") if item.isnumeric()==False]) | ||
|
||
# If there are multiple connections existing, multiple connections means two modalities connected to one modality. They end with _1 | ||
mult_conn = colname.split("_")[-1].isnumeric() | ||
num = colname.split("_")[-1] | ||
|
||
print(output_stream) | ||
|
||
if read_results[i] is None: | ||
print("The subject id: {} has no {}".format(subject_id, colname)) | ||
pass | ||
elif modality == "CT" or modality == 'MR': | ||
image = read_results[i] | ||
if len(image.GetSize()) == 4: | ||
assert image.GetSize()[-1] == 1, f"There is more than one volume in this CT file for {subject_id}." | ||
extractor = sitk.ExtractImageFilter() | ||
extractor.SetSize([*image.GetSize()[:3], 0]) | ||
extractor.SetIndex([0, 0, 0, 0]) | ||
|
||
image = extractor.Execute(image) | ||
print(image.GetSize()) | ||
image = self.resample(image) | ||
#Saving the output | ||
self.output(subject_id, image, output_stream) | ||
metadata[f"size_{output_stream}"] = str(image.GetSize()) | ||
print(subject_id, " SAVED IMAGE") | ||
elif modality == "RTDOSE": | ||
try: #For cases with no image present | ||
doses = read_results[i].resample_dose(image) | ||
except: | ||
Warning("No CT image present. Returning dose image without resampling") | ||
doses = read_results[i] | ||
|
||
# save output | ||
if not mult_conn: | ||
self.output(subject_id, doses, output_stream) | ||
else: | ||
self.output(f"{subject_id}_{num}", doses, output_stream) | ||
metadata[f"size_{output_stream}"] = str(doses.GetSize()) | ||
metadata[f"metadata_{colname}"] = [read_results[i].get_metadata()] | ||
print(subject_id, " SAVED DOSE") | ||
elif modality == "RTSTRUCT": | ||
#For RTSTRUCT, you need image or PT | ||
structure_set = read_results[i] | ||
conn_to = output_stream.split("_")[-1] | ||
|
||
# make_binary_mask relative to ct/pet | ||
if conn_to == "CT" or conn_to == "MR": | ||
mask = self.make_binary_mask(structure_set, image) | ||
elif conn_to == "PT": | ||
mask = self.make_binary_mask(structure_set, pet) | ||
else: | ||
raise ValueError("You need to pass a reference CT or PT/PET image to map contours to.") | ||
|
||
# save output | ||
if not mult_conn: | ||
self.output(subject_id, mask, output_stream) | ||
else: | ||
self.output(f"{subject_id}_{num}", mask, output_stream) | ||
metadata[f"metadata_{colname}"] = [structure_set.roi_names] | ||
|
||
print(subject_id, "SAVED MASK ON", conn_to) | ||
elif modality == "PT": | ||
try: | ||
#For cases with no image present | ||
pet = read_results[i].resample_pet(image) | ||
except: | ||
Warning("No CT image present. Returning PT/PET image without resampling.") | ||
pet = read_results[i] | ||
|
||
if not mult_conn: | ||
self.output(subject_id, pet, output_stream) | ||
else: | ||
self.output(f"{subject_id}_{num}", pet, output_stream) | ||
metadata[f"size_{output_stream}"] = str(pet.GetSize()) | ||
metadata[f"metadata_{colname}"] = [read_results[i].get_metadata()] | ||
print(subject_id, " SAVED PET") | ||
#Saving all the metadata in multiple text files | ||
with open(os.path.join(self.output_directory,".temp",f'{subject_id}.pkl'),'wb') as f: | ||
pickle.dump(metadata,f) | ||
return | ||
|
||
def save_data(self): | ||
files = glob.glob(os.path.join(self.output_directory, ".temp", "*.pkl")) | ||
for file in files: | ||
filename = pathlib.Path(file).name | ||
subject_id = os.path.splitext(filename)[0] | ||
with open(file,"rb") as f: | ||
metadata = pickle.load(f) | ||
self.output_df.loc[subject_id, list(metadata.keys())] = list(metadata.values()) | ||
self.output_df.to_csv(self.output_df_path) | ||
shutil.rmtree(os.path.join(self.output_directory, ".temp")) | ||
|
||
def run(self): | ||
"""Execute the pipeline, possibly in parallel. | ||
""" | ||
# Joblib prints progress to stdout if verbose > 50 | ||
verbose = 51 if self.show_progress else 0 | ||
|
||
subject_ids = self._get_loader_subject_ids() | ||
# Note that returning any SimpleITK object in process_one_subject is | ||
# not supported yet, since they cannot be pickled | ||
if os.path.exists(self.output_df_path): | ||
print("Dataset already processed...") | ||
shutil.rmtree(os.path.join(self.output_directory, ".temp")) | ||
else: | ||
Parallel(n_jobs=self.n_jobs, verbose=verbose)( | ||
delayed(self._process_wrapper)(subject_id) for subject_id in subject_ids) | ||
self.save_data() | ||
|
||
|
||
if __name__ == "__main__": | ||
pipeline = AutoPipeline(input_directory="C:/Users/qukev/BHKLAB/dataset/manifest-1598890146597/NSCLC-Radiomics-Interobserver1", | ||
output_directory="C:/Users/qukev/BHKLAB/autopipelineoutput", | ||
visualize=True) | ||
|
||
print(f'starting Pipeline...') | ||
pipeline.run() | ||
|
||
|
||
print(f'finished Pipeline!') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters