-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PR tests - macos/ubuntu failing (#13)
* Added test autopipeline and modalities, solved some autopipeline bugs, read_dicom_series and pet now supports series_id * PT/RTDOSE metadata to csv * fixed some bugs in autopipeline.py * now the pipeline saves on exit * deleted data * now checks for existing subject id * uncommented one line pytest * uncommented one line pytest * Added dataset class which can load from nrrds or directly from the dataset and convert to pytorch dataset * bug fixes_1.0 * test and autopipe fixed * bug fixes 2 * fixed pipeline tests * clean tests * added workflow * yml * yml * matplotlib * trying other patient to avoid memoryerror * set roi_names to avoid memoryerror * cave * indents * Update manual-test.yml Co-authored-by: Vishwesh <vishweshramanathan@gmail.com>
- Loading branch information
Showing
13 changed files
with
374 additions
and
99 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
# data | ||
data | ||
examples/data/tcia_n* | ||
|
||
# macOS | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from .common import * | ||
from .loaders import * | ||
from .writers import * | ||
from .dataset import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
from genericpath import exists | ||
import os | ||
import numpy as np | ||
from typing import List, Sequence, Optional, Callable, Iterable, Dict,Tuple | ||
import torchio as tio | ||
import pandas as pd | ||
# from . import file_name_convention | ||
# from ..ops import StructureSetToSegmentation, ImageAutoInput, Resample, BaseOp | ||
from imgtools.io import file_name_convention | ||
from imgtools.ops import StructureSetToSegmentation, ImageAutoInput, Resample, BaseOp | ||
from tqdm import tqdm | ||
from joblib import Parallel, delayed | ||
import SimpleITK as sitk | ||
import warnings | ||
from imgtools.pipeline import Pipeline | ||
|
||
class Dataset(tio.SubjectsDataset): | ||
""" | ||
This class takes in medical dataset in the form of nrrds or directly from the dataset and converts the data into torchio.Subject object, which can be loaded into | ||
torchio.SubjectDataset object. | ||
This class inherits from torchio.SubjectDataset object, which can support transforms and torch.Dataloader. | ||
Read more about torchio from https://torchio.readthedocs.io/quickstart.html and torchio.SubjectDataset from https://github.com/fepegar/torchio/blob/3e07b78da16d6db4da7193325b3f9cb31fc0911a/torchio/data/dataset.py#L101 | ||
""" | ||
def __init__( | ||
self, | ||
subjects: Sequence[tio.Subject], | ||
transform: Optional[Callable] = None, | ||
load_getitem: bool = True | ||
) -> tio.SubjectsDataset: | ||
super().__init__(subjects,transform,load_getitem) | ||
|
||
@classmethod | ||
def load_from_nrrd( | ||
cls, | ||
path:str, | ||
transform: Optional[Callable] = None, | ||
load_getitem: bool = True | ||
) -> List[tio.Subject]: | ||
""" | ||
Based on the given path, passess the processed nrrd files present in the directory and the metadata associated with it and creates a list of Subject instances | ||
Parameters | ||
path: Path to the output directory passed to the autopipeline script. The output directory should have all the user mentioned modalities processed and present in their folder. The directory | ||
should additionally have dataset.csv which stores all the metadata | ||
""" | ||
path_metadata = os.path.join(path,"dataset.csv") | ||
if not os.path.exists(path_metadata): | ||
raise ValueError("The specified path has no file name {}".format(path_metadata)) | ||
df_metadata = pd.read_csv(path_metadata,index_col=0) | ||
output_streams = [("_").join(cols.split("_")[1:]) for cols in df_metadata.columns if cols.split("_")[0]=="folder"] | ||
imp_metadata = [cols for cols in df_metadata.columns if cols.split("_")[0] in ("metadata")] | ||
#Based on the file naming taxonomy | ||
file_names = file_name_convention() | ||
subject_id_list = list(df_metadata.index) | ||
subjects = [] | ||
for subject_id in tqdm(subject_id_list): | ||
temp = {} | ||
for col in output_streams: | ||
extension = file_names[col] | ||
mult_conn = col.split("_")[-1].isnumeric() | ||
metadata_name = f"metadata_{col}" | ||
if mult_conn: | ||
extra = col.split("_")[-1]+"_" | ||
else: | ||
extra = "" | ||
path_mod = os.path.join(path,extension.split(".")[0],f"{subject_id}_{extra}{extension}.nrrd") | ||
#All modalities except RTSTRUCT should be of type torchIO.ScalarImage | ||
if col!="RTSTRUCT": | ||
temp[f"mod_{col}"] = tio.ScalarImage(path_mod) | ||
else: | ||
temp[f"mod_{col}"] = tio.LabelImage(path_mod) | ||
#For including metadata | ||
if metadata_name in imp_metadata: | ||
#convert string to proper datatype | ||
temp[metadata_name] = df_metadata.loc[subject_id,metadata_name][0] | ||
subjects.append(tio.Subject(temp)) | ||
return cls(subjects,transform,load_getitem) | ||
|
||
@classmethod | ||
def load_directly( | ||
cls, | ||
path:str, | ||
modalities: str, | ||
n_jobs: int = -1, | ||
spacing: Tuple = (1., 1., 0.), | ||
transform: Optional[Callable] = None, | ||
load_getitem: bool = True | ||
) -> List[tio.Subject]: | ||
""" | ||
Based on the given path, imgtools crawls through the directory, forms datagraph and picks the user defined modalities. These paths are processed into sitk.Image. | ||
This image and the metadata associated with it, creates a list of Subject instances | ||
Parameters | ||
path: Path to the directory of the dataset | ||
""" | ||
input = ImageAutoInput(path, modalities, n_jobs) | ||
df_metadata = input.df_combined | ||
output_streams = input.output_streams | ||
#Basic operations | ||
subject_id_list = list(df_metadata.index) | ||
# basic image processing ops | ||
resample = Resample(spacing=spacing) | ||
make_binary_mask = StructureSetToSegmentation(roi_names=[], continuous=False) | ||
subjects = Parallel(n_jobs=n_jobs)(delayed(cls.process_one_subject)(input,subject_id,output_streams,resample,make_binary_mask) for subject_id in tqdm(subject_id_list)) | ||
return cls(subjects,transform,load_getitem) | ||
|
||
@staticmethod | ||
def process_one_subject( | ||
input: Pipeline, | ||
subject_id: str, | ||
output_streams: List[str], | ||
resample: BaseOp, | ||
make_binary_mask: BaseOp, | ||
) -> tio.Subject: | ||
""" | ||
Process all modalities for one subject | ||
Parameters: | ||
input: ImageAutoInput class which helps in loading the respective DICOMs | ||
subject_id: subject id of the data | ||
output_streams: the modalities that are being considered, Note that there can be multiple items of same modality based on their relations with different modalities | ||
resample: transformation which resamples sitk.Image | ||
make_binary_mask: transformation useful in making binary mask for rtstructs | ||
Returns tio.Subject instance for a particular subject id | ||
""" | ||
temp = {} | ||
read_results = input(subject_id) | ||
for i,colname in enumerate(output_streams): | ||
modality = colname.split("_")[0] | ||
output_stream = ("_").join([item for item in colname.split("_") if item != "1"]) | ||
|
||
if read_results[i] is None: | ||
temp[f"mod_{colname}"] = None | ||
elif modality == "CT": | ||
image = read_results[i] | ||
if len(image.GetSize()) == 4: | ||
assert image.GetSize()[-1] == 1, f"There is more than one volume in this CT file for {subject_id}." | ||
extractor = sitk.ExtractImageFilter() | ||
extractor.SetSize([*image.GetSize()[:3], 0]) | ||
extractor.SetIndex([0, 0, 0, 0]) | ||
image = extractor.Execute(image) | ||
image = resample(image) | ||
temp[f"mod_{colname}"] = tio.ScalarImage.from_sitk(image) | ||
elif modality == "RTDOSE": | ||
try: #For cases with no image present | ||
doses = read_results[i].resample_dose(image) | ||
except: | ||
Warning("No CT image present. Returning dose image without resampling") | ||
doses = read_results[i] | ||
temp[f"mod_{colname}"] = tio.ScalarImage.from_sitk(doses) | ||
temp[f"metadata_{colname}"] = read_results[i].get_metadata() | ||
elif modality == "RTSTRUCT": | ||
#For RTSTRUCT, you need image or PT | ||
structure_set = read_results[i] | ||
conn_to = output_stream.split("_")[-1] | ||
# make_binary_mask relative to ct/pet | ||
if conn_to == "CT": | ||
mask = make_binary_mask(structure_set, image) | ||
elif conn_to == "PT": | ||
mask = make_binary_mask(structure_set, pet) | ||
else: | ||
raise ValueError("You need to pass a reference CT or PT/PET image to map contours to.") | ||
temp[f"mod_{colname}"] = tio.LabelMap.from_sitk(mask) | ||
temp[f"metadata_{colname}"] = structure_set.roi_names | ||
elif modality == "PT": | ||
try: | ||
#For cases with no image present | ||
pet = read_results[i].resample_pet(image) | ||
except: | ||
Warning("No CT image present. Returning PT/PET image without resampling.") | ||
pet = read_results[i] | ||
temp[f"mod_{colname}"] = tio.ScalarImage.from_sitk(pet) | ||
temp[f"metadata_{colname}"] = read_results[i].get_metadata() | ||
return tio.Subject(temp) | ||
|
||
if __name__=="__main__": | ||
from torch.utils.data import DataLoader | ||
# output_path = "/cluster/projects/radiomics/Temp/vishwesh/HN-CT_RTdose_test2" | ||
input_path = "/cluster/home/ramanav/imgtools/examples/data_test" | ||
transform = tio.Compose([tio.Resize(256)]) | ||
# subjects_dataset = Dataset.load_from_nrrd(output_path,transform=transform) | ||
subjects_dataset = Dataset.load_directly(input_path,modalities="CT,RTDOSE,PT",n_jobs=4,transform=transform) | ||
print(len(subjects_dataset)) | ||
training_loader = DataLoader(subjects_dataset, batch_size=4) | ||
items = next(iter(training_loader)) | ||
print(items["mod_RTDOSE_CT"]) |
Oops, something went wrong.