Skip to content

Commit

Permalink
PR tests - macos/ubuntu failing (#13)
Browse files Browse the repository at this point in the history
* Added test autopipeline and modalities, solved some autopipeline bugs, read_dicom_series and pet now supports series_id

* PT/RTDOSE metadata to csv

* fixed some bugs in autopipeline.py

* now the pipeline saves on exit

* deleted data

* now checks for existing subject id

* uncommented one line pytest

* uncommented one line pytest

* Added dataset class which can load from nrrds or directly from the dataset and convert to pytorch dataset

* bug fixes_1.0

* test and autopipe fixed

* bug fixes 2

* fixed pipeline tests

* clean tests

* added workflow

* yml

* yml

* matplotlib

* trying other patient to avoid memoryerror

* set roi_names to avoid memoryerror

* cave

* indents

* Update manual-test.yml

Co-authored-by: Vishwesh <vishweshramanathan@gmail.com>
  • Loading branch information
skim2257 and Vishwesh4 authored Dec 10, 2021
1 parent c17f6b5 commit 47f3122
Show file tree
Hide file tree
Showing 13 changed files with 374 additions and 99 deletions.
17 changes: 13 additions & 4 deletions .github/workflows/manual-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,15 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: [3.7, 3.8, 3.9]
os:
- ubuntu-latest
- macos-latest
- windows-latest
python-version:
- 3.7
- 3.8
- 3.9



steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -32,3 +36,8 @@ jobs:
- name: Run pytest
run: |
pytest tests
- name: Slack Notification
if: ${{always() && matrix.os == 'ubuntu-latest'}}
uses: rtCamp/action-slack-notify@v2
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# data
data
examples/data/tcia_n*

# macOS
Expand Down
47 changes: 42 additions & 5 deletions imgtools/autopipeline.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import os
import shutil
import warnings
import glob
import ast
import datetime
import json

from argparse import ArgumentParser

Expand All @@ -7,10 +13,11 @@

import SimpleITK as sitk
import pandas as pd
import warnings
import numpy as np

from joblib import Parallel, delayed
import glob
import ast


###############################################################
# Example usage:
# python radcure_simple.py ./data/RADCURE/data ./RADCURE_output
Expand Down Expand Up @@ -61,6 +68,10 @@ def __init__(self,
# output ops
self.output = ImageAutoOutput(self.output_directory, self.output_streams)

#Make a directory
if not os.path.exists(os.path.join(self.output_directory,".temp")):
os.mkdir(os.path.join(self.output_directory,".temp"))


def process_one_subject(self, subject_id):
"""Define the processing operations for one subject.
Expand All @@ -75,7 +86,7 @@ def process_one_subject(self, subject_id):
The ID of subject to process
"""
#Check if the subject_id has already been processed
if os.path.exists(os.path.join(self.output_directory,f'temp_{subject_id}.txt')):
if os.path.exists(os.path.join(self.output_directory,".temp",f'temp_{subject_id}.json')):
print(f"{subject_id} already processed")
return

Expand All @@ -96,7 +107,9 @@ def process_one_subject(self, subject_id):
output_stream = ("_").join([item for item in colname.split("_") if item != "1"])

#If there are multiple connections existing, multiple connections means two modalities connected to one modality. They end with _1
mult_conn = colname.split("_")[-1] == "1"
mult_conn = colname.split("_")[-1].isnumeric()
num = colname.split("_")[-1]

print(output_stream)

if read_results[i] is None:
Expand Down Expand Up @@ -130,6 +143,8 @@ def process_one_subject(self, subject_id):
else:
counter[modality] = counter[modality]+1
self.output(f"{subject_id}_{counter[modality]}", doses, output_stream)
# self.output(f"{subject_id}_{num}", doses, output_stream)

metadata[f"size_{output_stream}"] = str(doses.GetSize())
metadata[f"metadata_{output_stream}"] = str(read_results[i].get_metadata())
print(subject_id, " SAVED DOSE")
Expand All @@ -152,7 +167,10 @@ def process_one_subject(self, subject_id):
else:
counter[modality] = counter[modality] + 1
self.output(f"{subject_id}_{counter[modality]}", mask, output_stream)
# self.output(f"{subject_id}_{num}", mask, output_stream)

metadata[f"roi_names_{output_stream}"] = str(structure_set.roi_names)
# metadata[f"metadata_{colname}"] = [structure_set.roi_names]

print(subject_id, "SAVED MASK ON", conn_to)
elif modality == "PT":
Expand All @@ -175,7 +193,26 @@ def process_one_subject(self, subject_id):
with open(os.path.join(self.output_directory,f'temp_{subject_id}.txt'),'w') as f:
f.write(str(metadata))
return

# self.output(f"{subject_id}_{num}", pet, output_stream)
# metadata[f"size_{output_stream}"] = str(pet.GetSize())
# metadata[f"metadata_{colname}"] = [read_results[i].get_metadata()]
# print(subject_id, " SAVED PET")
# #Saving all the metadata in multiple text files
# with open(os.path.join(self.output_directory,".temp",f'temp_{subject_id}.json'),'w') as f:
# json.dump(metadata,f)
# return

# def save_data(self):
# files = glob.glob(os.path.join(self.output_directory,".temp","*.json"))
# for file in files:
# subject_id = ("_").join(file.replace("/","_").replace(".","_").split("_")[-3:-1])
# with open(file) as f:
# metadata = json.load(f)
# self.output_df.loc[subject_id, list(metadata.keys())] = list(metadata.values())
# self.output_df.to_csv(self.output_df_path)
# shutil.rmtree(os.path.join(self.output_directory,".temp"))

def save_data(self):
files = glob.glob(os.path.join(self.output_directory,"*.txt"))
for file in files:
Expand Down
1 change: 1 addition & 0 deletions imgtools/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .common import *
from .loaders import *
from .writers import *
from .dataset import *
15 changes: 15 additions & 0 deletions imgtools/io/common.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from typing import Dict

from pydicom.misc import is_dicom

Expand Down Expand Up @@ -34,3 +35,17 @@ def find_dicom_paths(root_path: str, yield_directories: bool = False) -> str:
fpath = os.path.join(root, f)
if is_dicom(fpath):
yield fpath

def file_name_convention() -> Dict:
"""
This function returns the file name taxonomy which is used by ImageAutoOutput and Dataset class
"""
file_name_convention = {"CT": "image",
"RTDOSE_CT": "dose",
"RTSTRUCT_CT": "mask_ct.seg",
"RTSTRUCT_PT": "mask_pt.seg",
"PT_CT": "pet",
"PT": "pet",
"RTDOSE": "dose",
"RTSTRUCT": "mask.seg"}
return file_name_convention
183 changes: 183 additions & 0 deletions imgtools/io/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
from genericpath import exists
import os
import numpy as np
from typing import List, Sequence, Optional, Callable, Iterable, Dict,Tuple
import torchio as tio
import pandas as pd
# from . import file_name_convention
# from ..ops import StructureSetToSegmentation, ImageAutoInput, Resample, BaseOp
from imgtools.io import file_name_convention
from imgtools.ops import StructureSetToSegmentation, ImageAutoInput, Resample, BaseOp
from tqdm import tqdm
from joblib import Parallel, delayed
import SimpleITK as sitk
import warnings
from imgtools.pipeline import Pipeline

class Dataset(tio.SubjectsDataset):
"""
This class takes in medical dataset in the form of nrrds or directly from the dataset and converts the data into torchio.Subject object, which can be loaded into
torchio.SubjectDataset object.
This class inherits from torchio.SubjectDataset object, which can support transforms and torch.Dataloader.
Read more about torchio from https://torchio.readthedocs.io/quickstart.html and torchio.SubjectDataset from https://github.com/fepegar/torchio/blob/3e07b78da16d6db4da7193325b3f9cb31fc0911a/torchio/data/dataset.py#L101
"""
def __init__(
self,
subjects: Sequence[tio.Subject],
transform: Optional[Callable] = None,
load_getitem: bool = True
) -> tio.SubjectsDataset:
super().__init__(subjects,transform,load_getitem)

@classmethod
def load_from_nrrd(
cls,
path:str,
transform: Optional[Callable] = None,
load_getitem: bool = True
) -> List[tio.Subject]:
"""
Based on the given path, passess the processed nrrd files present in the directory and the metadata associated with it and creates a list of Subject instances
Parameters
path: Path to the output directory passed to the autopipeline script. The output directory should have all the user mentioned modalities processed and present in their folder. The directory
should additionally have dataset.csv which stores all the metadata
"""
path_metadata = os.path.join(path,"dataset.csv")
if not os.path.exists(path_metadata):
raise ValueError("The specified path has no file name {}".format(path_metadata))
df_metadata = pd.read_csv(path_metadata,index_col=0)
output_streams = [("_").join(cols.split("_")[1:]) for cols in df_metadata.columns if cols.split("_")[0]=="folder"]
imp_metadata = [cols for cols in df_metadata.columns if cols.split("_")[0] in ("metadata")]
#Based on the file naming taxonomy
file_names = file_name_convention()
subject_id_list = list(df_metadata.index)
subjects = []
for subject_id in tqdm(subject_id_list):
temp = {}
for col in output_streams:
extension = file_names[col]
mult_conn = col.split("_")[-1].isnumeric()
metadata_name = f"metadata_{col}"
if mult_conn:
extra = col.split("_")[-1]+"_"
else:
extra = ""
path_mod = os.path.join(path,extension.split(".")[0],f"{subject_id}_{extra}{extension}.nrrd")
#All modalities except RTSTRUCT should be of type torchIO.ScalarImage
if col!="RTSTRUCT":
temp[f"mod_{col}"] = tio.ScalarImage(path_mod)
else:
temp[f"mod_{col}"] = tio.LabelImage(path_mod)
#For including metadata
if metadata_name in imp_metadata:
#convert string to proper datatype
temp[metadata_name] = df_metadata.loc[subject_id,metadata_name][0]
subjects.append(tio.Subject(temp))
return cls(subjects,transform,load_getitem)

@classmethod
def load_directly(
cls,
path:str,
modalities: str,
n_jobs: int = -1,
spacing: Tuple = (1., 1., 0.),
transform: Optional[Callable] = None,
load_getitem: bool = True
) -> List[tio.Subject]:
"""
Based on the given path, imgtools crawls through the directory, forms datagraph and picks the user defined modalities. These paths are processed into sitk.Image.
This image and the metadata associated with it, creates a list of Subject instances
Parameters
path: Path to the directory of the dataset
"""
input = ImageAutoInput(path, modalities, n_jobs)
df_metadata = input.df_combined
output_streams = input.output_streams
#Basic operations
subject_id_list = list(df_metadata.index)
# basic image processing ops
resample = Resample(spacing=spacing)
make_binary_mask = StructureSetToSegmentation(roi_names=[], continuous=False)
subjects = Parallel(n_jobs=n_jobs)(delayed(cls.process_one_subject)(input,subject_id,output_streams,resample,make_binary_mask) for subject_id in tqdm(subject_id_list))
return cls(subjects,transform,load_getitem)

@staticmethod
def process_one_subject(
input: Pipeline,
subject_id: str,
output_streams: List[str],
resample: BaseOp,
make_binary_mask: BaseOp,
) -> tio.Subject:
"""
Process all modalities for one subject
Parameters:
input: ImageAutoInput class which helps in loading the respective DICOMs
subject_id: subject id of the data
output_streams: the modalities that are being considered, Note that there can be multiple items of same modality based on their relations with different modalities
resample: transformation which resamples sitk.Image
make_binary_mask: transformation useful in making binary mask for rtstructs
Returns tio.Subject instance for a particular subject id
"""
temp = {}
read_results = input(subject_id)
for i,colname in enumerate(output_streams):
modality = colname.split("_")[0]
output_stream = ("_").join([item for item in colname.split("_") if item != "1"])

if read_results[i] is None:
temp[f"mod_{colname}"] = None
elif modality == "CT":
image = read_results[i]
if len(image.GetSize()) == 4:
assert image.GetSize()[-1] == 1, f"There is more than one volume in this CT file for {subject_id}."
extractor = sitk.ExtractImageFilter()
extractor.SetSize([*image.GetSize()[:3], 0])
extractor.SetIndex([0, 0, 0, 0])
image = extractor.Execute(image)
image = resample(image)
temp[f"mod_{colname}"] = tio.ScalarImage.from_sitk(image)
elif modality == "RTDOSE":
try: #For cases with no image present
doses = read_results[i].resample_dose(image)
except:
Warning("No CT image present. Returning dose image without resampling")
doses = read_results[i]
temp[f"mod_{colname}"] = tio.ScalarImage.from_sitk(doses)
temp[f"metadata_{colname}"] = read_results[i].get_metadata()
elif modality == "RTSTRUCT":
#For RTSTRUCT, you need image or PT
structure_set = read_results[i]
conn_to = output_stream.split("_")[-1]
# make_binary_mask relative to ct/pet
if conn_to == "CT":
mask = make_binary_mask(structure_set, image)
elif conn_to == "PT":
mask = make_binary_mask(structure_set, pet)
else:
raise ValueError("You need to pass a reference CT or PT/PET image to map contours to.")
temp[f"mod_{colname}"] = tio.LabelMap.from_sitk(mask)
temp[f"metadata_{colname}"] = structure_set.roi_names
elif modality == "PT":
try:
#For cases with no image present
pet = read_results[i].resample_pet(image)
except:
Warning("No CT image present. Returning PT/PET image without resampling.")
pet = read_results[i]
temp[f"mod_{colname}"] = tio.ScalarImage.from_sitk(pet)
temp[f"metadata_{colname}"] = read_results[i].get_metadata()
return tio.Subject(temp)

if __name__=="__main__":
from torch.utils.data import DataLoader
# output_path = "/cluster/projects/radiomics/Temp/vishwesh/HN-CT_RTdose_test2"
input_path = "/cluster/home/ramanav/imgtools/examples/data_test"
transform = tio.Compose([tio.Resize(256)])
# subjects_dataset = Dataset.load_from_nrrd(output_path,transform=transform)
subjects_dataset = Dataset.load_directly(input_path,modalities="CT,RTDOSE,PT",n_jobs=4,transform=transform)
print(len(subjects_dataset))
training_loader = DataLoader(subjects_dataset, batch_size=4)
items = next(iter(training_loader))
print(items["mod_RTDOSE_CT"])
Loading

0 comments on commit 47f3122

Please sign in to comment.