Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flake8 linting fixes #93

Merged
merged 5 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 56 additions & 61 deletions imgtools/autopipeline.py

Large diffs are not rendered by default.

7 changes: 5 additions & 2 deletions imgtools/io/common.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os, pathlib
import os
import pathlib
from typing import Dict

from pydicom.misc import is_dicom


def find_dicom_paths(root_path: str, yield_directories: bool = False) -> str:
"""Find DICOM file paths in the specified root directory file tree.

Expand Down Expand Up @@ -33,6 +35,7 @@ def find_dicom_paths(root_path: str, yield_directories: bool = False) -> str:
if is_dicom(fpath):
yield fpath


def file_name_convention() -> Dict:
"""
This function returns the file name taxonomy which is used by ImageAutoOutput and Dataset class
Expand All @@ -48,4 +51,4 @@ def file_name_convention() -> Dict:
"RTDOSE": "dose",
"RTSTRUCT": "mask"}

return file_name_convention
return file_name_convention
44 changes: 23 additions & 21 deletions imgtools/io/dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from genericpath import exists
import os, pathlib, ast
import os
import pathlib
import ast
from typing import List, Sequence, Optional, Callable, Iterable, Dict,Tuple
from tqdm import tqdm

Expand All @@ -12,18 +14,18 @@
from imgtools.pipeline import Pipeline
from joblib import Parallel, delayed


class Dataset(tio.SubjectsDataset):
"""
This class takes in medical dataset in the form of nrrds or directly from the dataset and converts the data into torchio.Subject object, which can be loaded into
torchio.SubjectDataset object.
This class inherits from torchio.SubjectDataset object, which can support transforms and torch.Dataloader.
Read more about torchio from https://torchio.readthedocs.io/quickstart.html and torchio.SubjectDataset from https://github.com/fepegar/torchio/blob/3e07b78da16d6db4da7193325b3f9cb31fc0911a/torchio/data/dataset.py#L101
"""
def __init__(
self,
subjects: Sequence[tio.Subject],
path: str,
) -> List[tio.Subject]:
def __init__(self,
subjects: Sequence[tio.Subject],
path: str) -> List[tio.Subject]:

super().__init__(subjects)
self.subjects = subjects
self.path = path
Expand All @@ -47,16 +49,16 @@ def load_image(

for col in df_metadata.columns:
if col.startswith("output_folder"):
df_metadata[col] = df_metadata[col].apply(lambda x: pathlib.Path(os.path.split(os.path.dirname(path))[0], x).as_posix() if isinstance(x, str) else x) #input folder joined with the rel path
df_metadata[col] = df_metadata[col].apply(lambda x: pathlib.Path(os.path.split(os.path.dirname(path))[0], x).as_posix() if isinstance(x, str) else x) # input folder joined with the rel path

output_streams = [("_").join(cols.split("_")[2:]) for cols in df_metadata.columns if cols.split("_")[0] == "output"]
imp_metadata = [cols for cols in df_metadata.columns if cols.split("_")[0] in ("metadata")]
#Ignores multiple connection to single modality
# Ignores multiple connection to single modality
if ignore_multi:
output_streams = [items for items in output_streams if items.split("_")[-1].isnumeric()==False]
imp_metadata = [items for items in imp_metadata if items.split("_")[-1].isnumeric()==False]
output_streams = [items for items in output_streams if not items.split("_")[-1].isnumeric()]
imp_metadata = [items for items in imp_metadata if not items.split("_")[-1].isnumeric()]

#Based on the file naming convention
# Based on the file naming convention
subject_id_list = list(df_metadata.index)
subjects = []
for subject_id in tqdm(subject_id_list):
Expand All @@ -72,7 +74,7 @@ def load_image(
filename = col
path_mod = pathlib.Path(path, subject_id, col, f"{filename}.nii.gz").as_posix()
print(path_mod)
#All modalities except RTSTRUCT should be of type torchIO.ScalarImage
# All modalities except RTSTRUCT should be of type torchIO.ScalarImage
if os.path.exists(path_mod):
if col.split("_")[0]!="RTSTRUCT":
temp[f"mod_{col}"] = tio.ScalarImage(path_mod)
Expand All @@ -81,14 +83,14 @@ def load_image(
temp[f"mod_{col}"] = tio.LabelMap(path_mods)
else:
temp[f"mod_{col}"] = None
#For including metadata
# For including metadata
if metadata_name in imp_metadata:
#convert string to proper datatype
# convert string to proper datatype
meta = df_metadata.loc[subject_id,metadata_name]
if pd.notna(meta):
temp[metadata_name] = eval(meta)[0]
else:
#torch dataloader doesnt accept None type
# torch dataloader doesnt accept None type
temp[metadata_name] = {}
subjects.append(tio.Subject(temp))
return cls(subjects, path)
Expand All @@ -111,10 +113,10 @@ def load_directly(
input = ImageAutoInput(path, modalities, n_jobs)
df_metadata = input.df_combined
output_streams = input.output_streams
#Ignores multiple connection to single modality
# Ignores multiple connection to single modality
if ignore_multi:
output_streams = [items for items in output_streams if items.split("_")[-1].isnumeric()==False]
#Basic operations
output_streams = [items for items in output_streams if not items.split("_")[-1].isnumeric()]
# Basic operations
subject_id_list = list(df_metadata.index)
# basic image processing ops
resample = Resample(spacing=spacing)
Expand Down Expand Up @@ -159,15 +161,15 @@ def process_one_subject(
image = resample(image)
temp[f"mod_{colname}"] = tio.ScalarImage.from_sitk(image)
elif modality == "RTDOSE":
try: #For cases with no image present
try: # For cases with no image present
doses = read_results[i].resample_dose(image)
except:
Warning("No CT image present. Returning dose image without resampling")
doses = read_results[i]
temp[f"mod_{colname}"] = tio.ScalarImage.from_sitk(doses)
temp[f"metadata_{colname}"] = read_results[i].get_metadata()
elif modality == "RTSTRUCT":
#For RTSTRUCT, you need image or PT
# For RTSTRUCT, you need image or PT
structure_set = read_results[i]
conn_to = output_stream.split("_")[-1]
# make_binary_mask relative to ct/pet
Expand All @@ -181,7 +183,7 @@ def process_one_subject(
temp[f"metadata_{colname}"] = structure_set.roi_names
elif modality == "PT":
try:
#For cases with no image present
# For cases with no image present
pet = read_results[i].resample_pet(image)
except:
Warning("No CT image present. Returning PT/PET image without resampling.")
Expand Down
28 changes: 19 additions & 9 deletions imgtools/io/loaders.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os, pathlib, json
import os
import pathlib
import json
import glob
import re
from typing import Optional, List
Expand All @@ -16,9 +18,11 @@
from ..utils.crawl import *
from ..utils.dicomutils import *


def read_image(path):
return sitk.ReadImage(path)


def read_dicom_series(path: str,
series_id: Optional[str] = None,
recursive: bool = False,
Expand Down Expand Up @@ -51,8 +55,8 @@ def read_dicom_series(path: str,
reader = sitk.ImageSeriesReader()
if file_names is None:
file_names = reader.GetGDCMSeriesFileNames(path,
seriesID=series_id if series_id else "",
recursive=recursive)
seriesID=series_id if series_id else "",
recursive=recursive)
# extract the names of the dicom files that are in the path variable, which is a directory

reader.SetFileNames(file_names)
Expand All @@ -68,24 +72,28 @@ def read_dicom_series(path: str,
return reader.Execute()



def read_dicom_scan(path, series_id=None, recursive: bool=False, file_names=None) -> Scan:
image = read_dicom_series(path, series_id=series_id, recursive=recursive, file_names=file_names)
return Scan(image, {})


def read_dicom_rtstruct(path):
return StructureSet.from_dicom_rtstruct(path)


def read_dicom_rtdose(path):
return Dose.from_dicom_rtdose(path)


def read_dicom_pet(path, series=None):
return PET.from_dicom_pet(path, series, "SUV")


def read_dicom_seg(path, meta, series=None):
seg_img = read_dicom_series(path, series)
return Segmentation.from_dicom_seg(seg_img, meta)


def read_dicom_auto(path, series=None, file_names=None):
if path is None:
return None
Expand Down Expand Up @@ -120,6 +128,7 @@ def read_dicom_auto(path, series=None, file_names=None):
obj.metadata.update(get_modality_metadata(meta, modality))
return obj


class BaseLoader:
def __getitem__(self, subject_id):
raise NotImplementedError
Expand All @@ -142,6 +151,7 @@ def get(self, subject_id, default=None):
except KeyError:
return default


class ImageTreeLoader(BaseLoader):
def __init__(self,
json_path,
Expand All @@ -155,7 +165,7 @@ def __init__(self,
readers=None):

if readers is None:
readers = [read_image] # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
readers = [read_image] # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/

self.expand_paths = expand_paths
self.readers = readers
Expand All @@ -180,7 +190,7 @@ def __init__(self,

if isinstance(json_path, str):
with open(json_path, 'r') as f:
self.tree = json.load(json_path)
self.tree = json.load(f)
else:
raise ValueError(f"Expected a path to a json file, not {type(json_path)}.")

Expand Down Expand Up @@ -213,6 +223,7 @@ def keys(self):
def items(self):
return ((k, self[k]) for k in self.keys())


class ImageCSVLoader(BaseLoader):
def __init__(self,
csv_path_or_dataframe,
Expand All @@ -223,7 +234,7 @@ def __init__(self,
readers=None):

if readers is None:
readers = [read_image] # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
readers = [read_image] # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/

self.expand_paths = expand_paths
self.readers = readers
Expand Down Expand Up @@ -279,7 +290,7 @@ def __init__(self,
if exclude_paths is None:
exclude_paths = []
if reader is None:
reader = read_image # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/
reader = read_image # no mutable defaults https://florimond.dev/en/posts/2018/08/python-mutable-defaults-are-the-source-of-all-evil/

self.root_directory = root_directory
self.get_subject_id_from = get_subject_id_from
Expand Down Expand Up @@ -338,7 +349,6 @@ def keys(self):
return self.paths.keys()



# class CombinedLoader(BaseLoader):
# def __init__(self, **kwargs):
# self.loaders = kwargs
Expand Down
27 changes: 14 additions & 13 deletions imgtools/io/writers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os, pathlib
import os
import pathlib
import json
import csv
import pickle
Expand Down Expand Up @@ -39,7 +40,7 @@ def _get_path_from_subject_id(self, subject_id, **kwargs):
out_path = pathlib.Path(self.root_directory, out_filename).as_posix()
out_dir = os.path.dirname(out_path)
if self.create_dirs and not os.path.exists(out_dir):
os.makedirs(out_dir, exist_ok=True) # create subdirectories if specified in filename_format
os.makedirs(out_dir, exist_ok=True) # create subdirectories if specified in filename_format

return out_path

Expand All @@ -52,12 +53,11 @@ def __init__(self, root_directory, filename_format="{subject_id}.nii.gz", create
self.create_dirs = create_dirs
self.compress = compress
if os.path.exists(self.root_directory):
# delete the folder called {subject_id} that was made in the original BaseWriter / the one named {label_or_image}
if os.path.basename(os.path.dirname(self.root_directory)) == "{subject_id}":
shutil.rmtree(os.path.dirname(self.root_directory))
elif "{label_or_image}{train_or_test}" in os.path.basename(self.root_directory):
shutil.rmtree(self.root_directory)
#delete the folder called {subject_id} that was made in the original BaseWriter / the one named {label_or_image}


def put(self, subject_id,
image, is_mask=False,
Expand All @@ -69,27 +69,28 @@ def put(self, subject_id,
if is_mask:
# remove illegal characters for Windows/Unix
badboys = '<>:"/\|?*'
for char in badboys: mask_label = mask_label.replace(char, "")
for char in badboys:
mask_label = mask_label.replace(char, "")

# filename_format eh
self.filename_format = mask_label + ".nii.gz" #save the mask labels as their rtstruct names
self.filename_format = mask_label + ".nii.gz" # save the mask labels as their rtstruct names

if nnunet_info:
if label_or_image == "labels":
filename = f"{subject_id}.nii.gz" #naming convention for labels
filename = f"{subject_id}.nii.gz" # naming convention for labels
else:
filename = self.filename_format.format(subject_id=subject_id, modality_index=nnunet_info['modalities'][nnunet_info['current_modality']]) #naming convention for images
filename = self.filename_format.format(subject_id=subject_id, modality_index=nnunet_info['modalities'][nnunet_info['current_modality']]) # naming convention for images
out_path = self._get_path_from_subject_id(filename, label_or_image=label_or_image, train_or_test=train_or_test)
else:
out_path = self._get_path_from_subject_id(self.filename_format, subject_id=subject_id)
sitk.WriteImage(image, out_path, self.compress)

def _get_path_from_subject_id(self, filename, **kwargs):
root_directory = self.root_directory.format(**kwargs) #replace the {} with the kwargs passed in from .put() (above)
root_directory = self.root_directory.format(**kwargs) # replace the {} with the kwargs passed in from .put() (above)
out_path = pathlib.Path(root_directory, filename).as_posix()
out_dir = os.path.dirname(out_path)
if self.create_dirs and not os.path.exists(out_dir):
os.makedirs(out_dir, exist_ok=True) # create subdirectories if specified in filename_format
os.makedirs(out_dir, exist_ok=True) # create subdirectories if specified in filename_format
return out_path


Expand Down Expand Up @@ -133,7 +134,7 @@ def put(self, subject_id, mask, **kwargs):
if len(labels) > 1:
arr = np.transpose(sitk.GetArrayFromImage(mask), [-1, -2, -3, -4])

#add extra dimension to metadata
# add extra dimension to metadata
space_directions.insert(0, [float('nan'), float('nan'), float('nan')])
kinds.insert(0, 'vector')
dims += 1
Expand All @@ -152,7 +153,7 @@ def put(self, subject_id, mask, **kwargs):
props = regionprops(arr)[0]
bbox = props["bbox"]
bbox_segment = [bbox[0], bbox[3], bbox[1], bbox[4], bbox[2], bbox[5]]
except IndexError: # mask is empty
except IndexError: # mask is empty
assert arr[n].sum() == 0, "Mask not empty but 'skimage.measure.regionprops' failed."
bbox_segment = [0, 0, 0, 0, 0, 0]

Expand Down Expand Up @@ -185,7 +186,7 @@ def __init__(self, root_directory, filename_format="{subject_id}.npy", create_di
def put(self, subject_id, image, **kwargs):
out_path = self._get_path_from_subject_id(subject_id, **kwargs)
if isinstance(image, sitk.Image):
array, *_ = image_to_array(image) # TODO (Michal) optionally save the image geometry
array, *_ = image_to_array(image) # TODO (Michal) optionally save the image geometry
np.save(out_path, array)


Expand Down
2 changes: 1 addition & 1 deletion imgtools/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
from .dose import *
from .datagraph import *
from .sparsemask import *
from .scan import *
from .scan import *
Loading
Loading