first commit

bhklab · May 13, 2022 · 971e8d6 · 971e8d6
1 parent 5e93381
commit 971e8d6
Show file tree

Hide file tree

Showing 5 changed files with 325 additions and 7 deletions.
diff --git a/examples/adsf.py b/examples/adsf.py
@@ -0,0 +1,73 @@
+import os
+from argparse import ArgumentParser
+
+from imgtools.io import (ImageFileLoader, ImageFileWriter,
+                         read_dicom_rtstruct, read_dicom_series, read_dicom_rtdose, read_dicom_pet)
+from imgtools.ops import StructureSetToSegmentation, ImageFileInput, ImageFileOutput, Resample
+from imgtools.pipeline import Pipeline
+
+class samplePipeline(Pipeline):
+    def __init__(self,
+                 input_directory,
+                 output_directory,
+                 spacing,
+                 n_jobs):
+        #i think that it was pretty clear from the sample notebook that we
+        #need to inheret the Pipeline object as a parent for any pipeline we use
+        #maybe we can make that even more clear in the docstring but it should
+        #be fine as it is rn
+
+        #what is the default n_jobs?
+        super().__init__(n_jobs=n_jobs)
+
+        self.input_directory = input_directory
+        self.output_directory = output_directory
+        self.spacing = spacing
+        self.image_input = ImageFileInput(
+            self.input_directory,                    # where to look for the images
+            get_subject_id_from="subject_directory", # how to extract the subject ID, 'subject_directory' means use the name of the subject directory
+            subdir_path="*/NA-*",
+            # whether the images are stored in a subdirectory of the subject directory (also accepts glob patterns)
+            reader=read_dicom_series                 # the function used to read individual images
+        )
+        self.structure_set_input = ImageFileInput(
+            self.input_directory,
+            get_subject_id_from="subject_directory",
+            subdir_path="*/1.000000-ARIA RadOnc Structure Sets-*/1-1.dcm",
+            reader=read_dicom_rtstruct
+        )
+
+        self.make_binary_mask = StructureSetToSegmentation(roi_names="GTV.*")#"GTV")
+        self.image_output = ImageFileOutput(
+            os.path.join(self.output_directory, "images"), # where to save the processed images
+            filename_format="{subject_id}_image.nrrd",     # the filename template, {subject_id} will be replaced by each subject's ID at runtime
+            create_dirs=True,                              # whether to create directories that don't exists already
+            compress=True                                  # enable compression for NRRD format
+        )
+        self.mask_output = ImageFileOutput(
+            os.path.join(self.output_directory, "masks"),
+            filename_format="{subject_id}_mask.nrrd",
+            create_dirs=True,
+            compress=True
+        )
+    def process_one_subject(self, subject_id):
+        image = self.image_input(subject_id)
+        structure_set = self.structure_set_input(subject_id)
+        # note that the binary mask can be generated with correct spacing using
+        # the resampled image, eliminating the need to resample it separately
+
+        print(structure_set.roi_names)
+        mask = self.make_binary_mask(structure_set, image)
+        self.image_output(subject_id, image)
+        self.mask_output(subject_id, mask)
+
+if __name__ == "__main__":
+    pipeline = samplePipeline(
+            input_directory="C:/Users/qukev/BHKLAB/dataset/manifest-1598890146597/NSCLC-Radiomics-Interobserver1",
+            output_directory="C:/Users/qukev/BHKLAB/output",
+            spacing=(1.,1.,0.),
+            n_jobs=1)
+    # pipeline.run()
+    subject_ids = pipeline._get_loader_subject_ids()
+    for subject_id in subject_ids:
+        pipeline.process_one_subject(subject_id)
diff --git a/examples/autorun.py b/examples/autorun.py
@@ -0,0 +1,12 @@
+from imgtools.autopipeline import AutoPipeline
+
+if __name__ == "__main__":
+    pipeline = AutoPipeline(input_directory="C:/Users/qukev/BHKLAB/dataset/manifest-1598890146597/NSCLC-Radiomics-Interobserver1",
+                            output_directory="C:/Users/qukev/BHKLAB/autopipelineoutput",
+                            visualize=True)
+
+    print(f'starting Pipeline...')
+    pipeline.run()
+
+
+    print(f'finished Pipeline!')
diff --git a/examples/autotest.py b/examples/autotest.py
@@ -0,0 +1,218 @@
+import os, pathlib
+import shutil
+import glob
+import pickle
+
+from argparse import ArgumentParser
+import SimpleITK as sitk
+
+from imgtools.ops import StructureSetToSegmentation, ImageAutoInput, ImageAutoOutput, Resample
+from imgtools.pipeline import Pipeline
+from joblib import Parallel, delayed
+
+###############################################################
+# Example usage:
+# python radcure_simple.py ./data/RADCURE/data ./RADCURE_output
+###############################################################
+
+
+class AutoPipeline(Pipeline):
+    """Example processing pipeline for the RADCURE dataset.
+    This pipeline loads the CT images and structure sets, re-samples the images,
+    and draws the GTV contour using the resampled image.
+    """
+
+    def __init__(self,
+                 input_directory,
+                 output_directory,
+                 modalities="CT",
+                 spacing=(1., 1., 0.),
+                 n_jobs=-1,
+                 visualize=False,
+                 missing_strategy="drop",
+                 show_progress=False,
+                 warn_on_error=False):
+
+        super().__init__(
+            n_jobs=n_jobs,
+            missing_strategy=missing_strategy,
+            show_progress=show_progress,
+            warn_on_error=warn_on_error)
+
+        # pipeline configuration
+        self.input_directory = input_directory
+        self.output_directory = output_directory
+        self.spacing = spacing
+        self.existing = [None] #self.existing_patients()
+
+        #input operations
+        self.input = ImageAutoInput(input_directory, modalities, n_jobs, visualize)
+
+        self.output_df_path = os.path.join(self.output_directory, "dataset.csv")
+        #Output component table
+        self.output_df = self.input.df_combined
+        #Name of the important columns which needs to be saved    
+        self.output_streams = self.input.output_streams
+
+        # image processing ops
+        self.resample = Resample(spacing=self.spacing)
+        self.make_binary_mask = StructureSetToSegmentation(roi_names=[], continuous=False)
+
+        # output ops
+        self.output = ImageAutoOutput(self.output_directory, self.output_streams)
+
+        #Make a directory
+        if not os.path.exists(os.path.join(self.output_directory,".temp")):
+            os.mkdir(os.path.join(self.output_directory,".temp"))
+
+
+    def process_one_subject(self, subject_id):
+        """Define the processing operations for one subject.
+        This method must be defined for all pipelines. It is used to define
+        the preprocessing steps for a single subject (note: that might mean
+        multiple images, structures, etc.). During pipeline execution, this
+        method will receive one argument, subject_id, which can be used to
+        retrieve inputs and save outputs.
+        Parameters
+        ----------
+        subject_id : str
+           The ID of subject to process
+        """
+        #Check if the subject_id has already been processed
+        if os.path.exists(os.path.join(self.output_directory,".temp",f'temp_{subject_id}.pkl')):
+            print(f"{subject_id} already processed")
+            return 
+
+        print("Processing:", subject_id)
+
+        read_results = self.input(subject_id)
+        print(read_results)
+
+        print(subject_id, " start")
+
+        metadata = {}
+        for i, colname in enumerate(self.output_streams):
+            modality = colname.split("_")[0]
+
+            # Taking modality pairs if it exists till _{num}
+            output_stream = ("_").join([item for item in colname.split("_") if item.isnumeric()==False])
+
+            # If there are multiple connections existing, multiple connections means two modalities connected to one modality. They end with _1
+            mult_conn = colname.split("_")[-1].isnumeric()
+            num = colname.split("_")[-1]
+
+            print(output_stream)
+
+            if read_results[i] is None:
+                print("The subject id: {} has no {}".format(subject_id, colname))
+                pass
+            elif modality == "CT" or modality == 'MR':
+                image = read_results[i]
+                if len(image.GetSize()) == 4:
+                    assert image.GetSize()[-1] == 1, f"There is more than one volume in this CT file for {subject_id}."
+                    extractor = sitk.ExtractImageFilter()
+                    extractor.SetSize([*image.GetSize()[:3], 0])
+                    extractor.SetIndex([0, 0, 0, 0])    
+
+                    image = extractor.Execute(image)
+                    print(image.GetSize())
+                image = self.resample(image)
+                #Saving the output
+                self.output(subject_id, image, output_stream)
+                metadata[f"size_{output_stream}"] = str(image.GetSize())
+                print(subject_id, " SAVED IMAGE")
+            elif modality == "RTDOSE":
+                try: #For cases with no image present
+                    doses = read_results[i].resample_dose(image)
+                except:
+                    Warning("No CT image present. Returning dose image without resampling")
+                    doses = read_results[i]
+
+                # save output
+                if not mult_conn:
+                    self.output(subject_id, doses, output_stream)
+                else:
+                    self.output(f"{subject_id}_{num}", doses, output_stream)
+                metadata[f"size_{output_stream}"] = str(doses.GetSize())
+                metadata[f"metadata_{colname}"] = [read_results[i].get_metadata()]
+                print(subject_id, " SAVED DOSE")
+            elif modality == "RTSTRUCT":
+                #For RTSTRUCT, you need image or PT
+                structure_set = read_results[i]
+                conn_to = output_stream.split("_")[-1]
+
+                # make_binary_mask relative to ct/pet
+                if conn_to == "CT" or conn_to == "MR":
+                    mask = self.make_binary_mask(structure_set, image)
+                elif conn_to == "PT":
+                    mask = self.make_binary_mask(structure_set, pet)
+                else:
+                    raise ValueError("You need to pass a reference CT or PT/PET image to map contours to.")
+
+                # save output
+                if not mult_conn:
+                    self.output(subject_id, mask, output_stream)
+                else:
+                    self.output(f"{subject_id}_{num}", mask, output_stream)
+                metadata[f"metadata_{colname}"] = [structure_set.roi_names]
+
+                print(subject_id, "SAVED MASK ON", conn_to)
+            elif modality == "PT":
+                try:
+                    #For cases with no image present
+                    pet = read_results[i].resample_pet(image)
+                except:
+                    Warning("No CT image present. Returning PT/PET image without resampling.")
+                    pet = read_results[i]
+
+                if not mult_conn:
+                    self.output(subject_id, pet, output_stream)
+                else:
+                    self.output(f"{subject_id}_{num}", pet, output_stream)
+                metadata[f"size_{output_stream}"] = str(pet.GetSize())
+                metadata[f"metadata_{colname}"] = [read_results[i].get_metadata()]
+                print(subject_id, " SAVED PET")
+        #Saving all the metadata in multiple text files
+        with open(os.path.join(self.output_directory,".temp",f'{subject_id}.pkl'),'wb') as f:
+            pickle.dump(metadata,f)
+        return 
+
+    def save_data(self):
+        files = glob.glob(os.path.join(self.output_directory, ".temp", "*.pkl"))
+        for file in files:
+            filename = pathlib.Path(file).name
+            subject_id = os.path.splitext(filename)[0]
+            with open(file,"rb") as f:
+                metadata = pickle.load(f)
+            self.output_df.loc[subject_id, list(metadata.keys())] = list(metadata.values())
+        self.output_df.to_csv(self.output_df_path)
+        shutil.rmtree(os.path.join(self.output_directory, ".temp"))
+
+    def run(self):
+        """Execute the pipeline, possibly in parallel.
+        """
+        # Joblib prints progress to stdout if verbose > 50
+        verbose = 51 if self.show_progress else 0
+
+        subject_ids = self._get_loader_subject_ids()
+        # Note that returning any SimpleITK object in process_one_subject is
+        # not supported yet, since they cannot be pickled
+        if os.path.exists(self.output_df_path):
+            print("Dataset already processed...")
+            shutil.rmtree(os.path.join(self.output_directory, ".temp"))
+        else:
+            Parallel(n_jobs=self.n_jobs, verbose=verbose)(
+                    delayed(self._process_wrapper)(subject_id) for subject_id in subject_ids)
+            self.save_data()
+
+
+if __name__ == "__main__":
+    pipeline = AutoPipeline(input_directory="C:/Users/qukev/BHKLAB/dataset/manifest-1598890146597/NSCLC-Radiomics-Interobserver1",
+                            output_directory="C:/Users/qukev/BHKLAB/autopipelineoutput",
+                            visualize=True)
+
+    print(f'starting Pipeline...')
+    pipeline.run()
+
+
+    print(f'finished Pipeline!')
diff --git a/imgtools/modules/structureset.py b/imgtools/modules/structureset.py
@@ -140,11 +140,26 @@ def to_segmentation(self, reference_image: sitk.Image,
 
             for contour in mask_points:
                 z, slice_points = np.unique(contour[:, 0]), contour[:, 1:]
+                # rounding errors for points on the boundary
+                if z == mask.shape[0]:
+                    z -= 1
+                elif z == -1:
+                    z += 1
+                elif z > mask.shape[0] or z < -1:
+                    raise IndexError(f"{z} index is out of bounds for image sized {mask.shape}.")
+
+                # if the contour spans only 1 z-slice 
                 if len(z) == 1:
-                    # assert len(z) == 1, f"This contour ({name}) spreads across more than 1 slice."
-                    z = z[0]
+                    z = int(np.floor(z[0]))
                     slice_mask = polygon2mask(size[1:-1], slice_points)
                     mask[z, :, :, label] += slice_mask
+                else:
+                    raise ValueError("This contour is corrupted and spans across 2 or more slices.")
+                # if len(z) == 1:
+                #     # assert len(z) == 1, f"This contour ({name}) spreads across more than 1 slice."
+                #     z = z[0]
+                #     slice_mask = polygon2mask(size[1:-1], slice_points)
+                #     mask[z, :, :, label] += slice_mask
 
 
         mask[mask > 1] = 1        

diff --git a/imgtools/ops/ops.py b/imgtools/ops/ops.py
@@ -605,7 +605,7 @@ def __call__(self, image: sitk.Image) -> sitk.Image:
 
 
 class Rotate(BaseOp):
-    """Rorate operation class: A callable class that rotates an image around a given centre.
+    """Rotate operation class: A callable class that rotates an image around a given centre.
 
     To instantiate:
         obj = Rotate(rotation_centre, angles, interpolation)
@@ -637,7 +637,7 @@ def __init__(self,
         self.interpolation = interpolation
 
     def __call__(self, image: sitk.Image) -> sitk.Image:
-        """Rorate callable object: Rotates an image around a given centre.
+        """Rotate callable object: Rotates an image around a given centre.
 
         Parameters
         ----------
@@ -657,10 +657,10 @@ def __call__(self, image: sitk.Image) -> sitk.Image:
 
 
 class InPlaneRotate(BaseOp):
-    """InPlaneRorate operation class: A callable class that rotates an image on a plane.
+    """InPlaneRotate operation class: A callable class that rotates an image on a plane.
 
     To instantiate:
-        obj = InPlaneRorate(angle, interpolation)
+        obj = InPlaneRotate(angle, interpolation)
 
     To call:
         result = obj(image)
@@ -682,7 +682,7 @@ def __init__(self, angle: float, interpolation: str ="linear"):
         self.interpolation = interpolation
 
     def __call__(self, image: sitk.Image) -> sitk.Image:
-        """InPlaneRorate callable object: Rotates an image on a plane.
+        """InPlaneRotate callable object: Rotates an image on a plane.
 
         Parameters
         ----------