added regex dictionary option for non nnunet runs

Former-commit-id: eeae5b7
bhklab · Jun 15, 2022 · 89d8251 · 89d8251
1 parent 3fe34ed
commit 89d8251
Show file tree

Hide file tree

Showing 4 changed files with 82 additions and 47 deletions.
diff --git a/examples/autotest.py b/examples/autotest.py
@@ -206,7 +206,7 @@ def process_one_subject(self, subject_id):
                             self.nnunet_info["modalities"][metadata["AcquisitionContrast"]] = str(len(self.nnunet_info["modalities"])).zfill(4) #fill to 4 digits
                     else:
                         self.nnunet_info['current_modality'] = modality #CT
-                    if subject_id in self.train:
+                    if "_".join(subject_id.split("_")[1::]) in self.train:
                         self.output(subject_id, image, output_stream, nnunet_info=self.nnunet_info)
                     else:
                         self.output(subject_id, image, output_stream, nnunet_info=self.nnunet_info, train_or_test="Ts")
@@ -257,7 +257,7 @@ def process_one_subject(self, subject_id):
                 if self.is_nnunet:
                     sparse_mask = mask.generate_sparse_mask(self.label_names).mask_array
                     sparse_mask = sitk.GetImageFromArray(sparse_mask) #convert the nparray to sitk image
-                    if subject_id in self.train:
+                    if "_".join(subject_id.split("_")[1::]) in self.train:
                         self.output(subject_id, sparse_mask, output_stream, nnunet_info=self.nnunet_info, label_or_image="labels") #rtstruct is label for nnunet
                     else:
                         self.output(subject_id, sparse_mask, output_stream, nnunet_info=self.nnunet_info, label_or_image="labels", train_or_test="Ts")
@@ -309,7 +309,7 @@ def process_one_subject(self, subject_id):
         #Saving all the metadata in multiple text files
         metadata["Modalities"] = str(list(subject_modalities))
         metadata["numRTSTRUCTs"] = num_rtstructs
-        metadata["Train or Test"] = "train" if subject_id in self.train else "test"
+        metadata["Train or Test"] = "train" if "_".join(subject_id.split("_")[1::]) in self.train else "test"
         with open(pathlib.Path(self.output_directory,".temp",f'{subject_id}.pkl').as_posix(),'wb') as f:
             pickle.dump(metadata,f)
         return 
@@ -338,9 +338,14 @@ def run(self):
         verbose = 51 if self.show_progress else 0
 
         subject_ids = self._get_loader_subject_ids()
+        patient_ids = []
+        for subject_id in subject_ids:
+            if subject_id.split("_")[1::] not in patient_ids:
+                patient_ids.append("_".join(subject_id.split("_")[1::]))
         if self.is_nnunet:
-            self.num_subjects = len(subject_ids)
-            self.train, self.test = train_test_split(subject_ids, train_size=self.train_size, random_state=self.random_state)
+            self.train, self.test = train_test_split(patient_ids, train_size=self.train_size, random_state=self.random_state)
+        else:
+            self.train, self.test = [], []
         # Note that returning any SimpleITK object in process_one_subject is
         # not supported yet, since they cannot be pickled
         if os.path.exists(self.output_df_path) and not self.overwrite:
@@ -378,9 +383,9 @@ def run(self):
                             modalities="CT,RTSTRUCT",
                             visualize=False,
                             overwrite=True,
-                            is_nnunet=True,
+                            # is_nnunet=True,
                             train_size=0.5,
-                            label_names={})
+                            label_names={"GTV":"GTV.*", "Brainstem": "Brainstem.*"})
 
     # pipeline = AutoPipeline(input_directory="C:/Users/qukev/BHKLAB/dataset/manifest-1598890146597/NSCLC-Radiomics-Interobserver1",
     #                         output_directory="C:/Users/qukev/BHKLAB/autopipelineoutput",

diff --git a/imgtools/modules/datagraph.py b/imgtools/modules/datagraph.py
@@ -324,7 +324,7 @@ def graph_query(self,
 
         # Based on the correct study ids, fetches are the relevant edges
         df_processed = self.df_edges.loc[self.df_edges.study_x.isin(relevant_study_id) & (self.df_edges.edge_type.isin(edge_list))]
-        print(df_processed.to_csv("/cluster/home/sejinkim/projects/process/tcga_impatient.csv"))
+        # print(df_processed.to_csv("/cluster/home/sejinkim/projects/process/tcga_impatient.csv"))
 
         # The components are deleted if it has less number of nodes than the passed modalities, change this so as to alter that condition
         final_df = self._get_df(df_processed, relevant_study_id, remove_less_comp)

diff --git a/imgtools/modules/structureset.py b/imgtools/modules/structureset.py
@@ -86,7 +86,7 @@ def _assign_labels(self, names, force_missing=False):
         return labels
 
     def to_segmentation(self, reference_image: sitk.Image,
-                        roi_names: Dict[str: str] = None,
+                        roi_names: Dict[str, str] = None,
                         force_missing: bool = False,
                         continuous: bool = True) -> Segmentation:
         """Convert the structure set to a Segmentation object.
@@ -126,60 +126,90 @@ def to_segmentation(self, reference_image: sitk.Image,
         guaranteed (unless all patterns in `roi_names` can only match
         a single name or are lists of strings).
         """
-        if not roi_names or roi_names == {}:
-            roi_names = self.roi_names
+        labels = {}
+        if roi_names is None or roi_names == {}:
+            roi_names = self.roi_names #all the contour names
+            labels = self._assign_labels(roi_names, force_missing) #only the ones that match the regex
+        elif isinstance(roi_names, dict):
+            for name, pattern in roi_names.items():
+                matching_names = list(self._assign_labels([pattern], force_missing).keys())
+                if matching_names:
+                    labels[name] = matching_names #{"GTV": ["GTV1", "GTV2"]}
         if isinstance(roi_names, str):
             roi_names = [roi_names]
         if isinstance(roi_names, list):
             labels = self._assign_labels(roi_names, force_missing)
-        else:
-            labels = self._assign_labels(list(roi_names.values()), force_missing)
         print("labels:", labels)
         if not labels:
             raise ValueError(f"No ROIs matching {roi_names} found in {self.roi_names}.")
 
-        size = reference_image.GetSize()[::-1] + (max(labels.values()) + 1,)
+        # size = reference_image.GetSize()[::-1] + (max(labels.values()) + 1,)
+        size = reference_image.GetSize()[::-1] + (len(labels),)
+        # print(size)
+        # print(reference_image.GetSize()[::-1])
+        # print((max(labels.values()) + 1,))
 
         mask = np.zeros(size, dtype=np.uint8)
 
-        for name, label in labels.items():
-            physical_points = self.roi_points.get(name, np.array([]))
-            if len(physical_points) == 0:
-                continue # allow for missing labels, will return a blank slice
-
-            mask_points = physical_points_to_idxs(reference_image, physical_points, continuous=continuous)
-
-            # print(mask.shape, "asldkfjalsk")
-            for contour in mask_points:
-                z, slice_points = np.unique(contour[:, 0]), contour[:, 1:]
-                # rounding errors for points on the boundary
-                # if z == mask.shape[0]:
-                #     z -= 1
-                # elif z == -1:
-                #     z += 1
-                # elif z > mask.shape[0] or z < -1:
-                #     raise IndexError(f"{z} index is out of bounds for image sized {mask.shape}.")
+        # print(self.roi_points)
+
+        seg_roi_names = {}
+        print(roi_names)
+        if roi_names != {} and isinstance(roi_names, dict):
+            for i, (name, label_list) in enumerate(labels.items()):
+                for label in label_list:
+                    physical_points = self.roi_points.get(label, np.array([]))
+                    mask_points = physical_points_to_idxs(reference_image, physical_points, continuous=continuous)
+                    for contour in mask_points:
+                        z, slice_points = np.unique(contour[:, 0]), contour[:, 1:]
+                        if len(z) == 1:
+                            # assert len(z) == 1, f"This contour ({name}) spreads across more than 1 slice."
+                            z = z[0]
+                            slice_mask = polygon2mask(size[1:-1], slice_points)
+                            mask[z, :, :, i] += slice_mask
+                seg_roi_names[name] = i
+        else:
+            for name, label in labels.items():
+                physical_points = self.roi_points.get(name, np.array([]))
+                # print(physical_points) #np.ndarray, 3d array with the physical locations (float coordinates)
+                if len(physical_points) == 0:
+                    continue # allow for missing labels, will return a blank slice
+
+                mask_points = physical_points_to_idxs(reference_image, physical_points, continuous=continuous)
+                # print(mask_points)
 
-                # # if the contour spans only 1 z-slice 
-                # if len(z) == 1:
-                #     z = int(np.floor(z[0]))
-                #     slice_mask = polygon2mask(size[1:-1], slice_points)
-                #     mask[z, :, :, label] += slice_mask
-                # else:
-                #     raise ValueError("This contour is corrupted and spans across 2 or more slices.")
-
-                # This is the old version of z index parsing. Kept for backup
-                if len(z) == 1:
-                    # assert len(z) == 1, f"This contour ({name}) spreads across more than 1 slice."
-                    z = z[0]
-                    slice_mask = polygon2mask(size[1:-1], slice_points)
-                    mask[z, :, :, label] += slice_mask
+                # print(mask.shape, "asldkfjalsk")
+                for contour in mask_points:
+                    z, slice_points = np.unique(contour[:, 0]), contour[:, 1:]
+                    # rounding errors for points on the boundary
+                    # if z == mask.shape[0]:
+                    #     z -= 1
+                    # elif z == -1:
+                    #     z += 1
+                    # elif z > mask.shape[0] or z < -1:
+                    #     raise IndexError(f"{z} index is out of bounds for image sized {mask.shape}.")
+
+                    # # if the contour spans only 1 z-slice 
+                    # if len(z) == 1:
+                    #     z = int(np.floor(z[0]))
+                    #     slice_mask = polygon2mask(size[1:-1], slice_points)
+                    #     mask[z, :, :, label] += slice_mask
+                    # else:
+                    #     raise ValueError("This contour is corrupted and spans across 2 or more slices.")
+
+                    # This is the old version of z index parsing. Kept for backup
+                    if len(z) == 1:
+                        # assert len(z) == 1, f"This contour ({name}) spreads across more than 1 slice."
+                        z = z[0]
+                        slice_mask = polygon2mask(size[1:-1], slice_points)
+                        mask[z, :, :, label] += slice_mask
+            seg_roi_names = {"_".join(k): v for v, k in groupby(labels, key=lambda x: labels[x])}
 
 
         mask[mask > 1] = 1
         mask = sitk.GetImageFromArray(mask, isVector=True)
         mask.CopyInformation(reference_image)
-        seg_roi_names = {"_".join(k): v for v, k in groupby(labels, key=lambda x: labels[x])}
+        print("adams",seg_roi_names)
         mask = Segmentation(mask, roi_names=seg_roi_names)
 
         return mask

diff --git a/imgtools/ops/ops.py b/imgtools/ops/ops.py
@@ -1426,7 +1426,7 @@ class StructureSetToSegmentation(BaseOp):
     """
 
     def __init__(self, 
-                 roi_names: Dict[str: str], 
+                 roi_names: Dict[str, str], 
                  force_missing: bool = False,
                  continuous: bool = True):
         """Initialize the op.