Skip to content

Commit

Permalink
args
Browse files Browse the repository at this point in the history
Former-commit-id: f4f6d2d
  • Loading branch information
fishingguy456 committed Jun 13, 2022
1 parent e73ae50 commit 8571b68
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 41 deletions.
86 changes: 59 additions & 27 deletions examples/autotest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,40 @@ def __init__(self,
show_progress=False,
warn_on_error=False,
overwrite=False,
nnUnet_info=None,
is_nnunet=False,
train_size=1.0,
random_state=42):
"""Initialize the pipeline.
Parameters
----------
input_directory: str
Directory containing the input data
output_directory: str
Directory where the output data will be stored
modalities: str, default="CT"
Modalities to load. Can be a comma-separated list of modalities with no spaces
spacing: tuple of floats, default=(1., 1., 0.)
Spacing of the output image
n_jobs: int, default=-1
Number of jobs to run in parallel. If -1, use all cores
visualize: bool, default=False
Whether to visualize the results of the pipeline using pyvis. Outputs to an HTML file.
missing_strategy: str, default="drop"
How to handle missing modalities. Can be "drop" or "fill"
show_progress: bool, default=False
Whether to show progress bars
warn_on_error: bool, default=False
Whether to warn on errors
overwrite: bool, default=False
Whether to write output files even if existing output files exist
is_nnunet: bool, default=False
Whether to format the output for nnunet
train_size: float, default=1.0
Proportion of the dataset to use for training, as a decimal
random_state: int, default=42
Random state for train_test_split
"""
super().__init__(
n_jobs=n_jobs,
missing_strategy=missing_strategy,
Expand All @@ -57,7 +87,11 @@ def __init__(self,
self.output_directory = pathlib.Path(output_directory).as_posix()
self.spacing = spacing
self.existing = [None] #self.existing_patients()
self.nnUnet_info = nnUnet_info
self.is_nnunet = is_nnunet
if is_nnunet:
self.nnunet_info = {}
else:
self.nnunet_info = None
self.train_size = train_size
self.random_state = random_state

Expand All @@ -67,15 +101,14 @@ def __init__(self,
if self.train_size == 0.0:
warnings.warn("Train size is 0, all data will be used for testing")

if self.train_size != 1 and not self.nnUnet_info:
warnings.warn("Cannot run train/test split without nnUnet, ignoring train_size")
if self.train_size != 1 and not self.is_nnunet:
warnings.warn("Cannot run train/test split without nnunet, ignoring train_size")

if self.train_size > 1 or self.train_size < 0 and self.nnUnet_info:
if self.train_size > 1 or self.train_size < 0 and self.is_nnunet:
raise ValueError("train_size must be between 0 and 1")

if nnUnet_info:
self.nnUnet_info["modalities"] = {"CT": "0000"} #modality to 4-digit code
self.nnUnet_info["index"] = 0 #number of patients
if self.is_nnunet:
self.nnunet_info["modalities"] = {"CT": "0000"} #modality to 4-digit code

#input operations
self.input = ImageAutoInput(input_directory, modalities, n_jobs, visualize)
Expand All @@ -91,7 +124,7 @@ def __init__(self,
self.make_binary_mask = StructureSetToSegmentation(roi_names=[], continuous=False) # "GTV-.*"

# output ops
self.output = ImageAutoOutput(self.output_directory, self.output_streams, self.nnUnet_info)
self.output = ImageAutoOutput(self.output_directory, self.output_streams, self.nnunet_info)

#Make a directory
if not os.path.exists(pathlib.Path(self.output_directory,".temp").as_posix()):
Expand All @@ -105,6 +138,7 @@ def process_one_subject(self, subject_id):
multiple images, structures, etc.). During pipeline execution, this
method will receive one argument, subject_id, which can be used to
retrieve inputs and save outputs.
Parameters
----------
subject_id : str
Expand All @@ -126,8 +160,6 @@ def process_one_subject(self, subject_id):
subject_modalities = set() # all the modalities that this subject has
num_rtstructs = 0

if self.nnUnet_info:
self.nnUnet_info["index"] += 1 #increment the number of patients
for i, colname in enumerate(self.output_streams):
modality = colname.split("_")[0]
subject_modalities.add(modality) #set add
Expand Down Expand Up @@ -160,18 +192,18 @@ def process_one_subject(self, subject_id):
if hasattr(read_results[i], "metadata") and read_results[i].metadata is not None:
metadata.update(read_results[i].metadata)

#modality is MR and the user has selected to have nnUnet output
if self.nnUnet_info:
#modality is MR and the user has selected to have nnunet output
if self.is_nnunet:
if modality == "MR": #MR images can have various modalities like FLAIR, T1, etc.
self.nnUnet_info['current_modality'] = metadata["AcquisitionContrast"]
if not metadata["AcquisitionContrast"] in self.nnUnet_info["modalities"].keys(): #if the modality is new
self.nnUnet_info["modalities"][metadata["AcquisitionContrast"]] = str(len(self.nnUnet_info["modalities"])).zfill(4) #fill to 4 digits
self.nnunet_info['current_modality'] = metadata["AcquisitionContrast"]
if not metadata["AcquisitionContrast"] in self.nnunet_info["modalities"].keys(): #if the modality is new
self.nnunet_info["modalities"][metadata["AcquisitionContrast"]] = str(len(self.nnunet_info["modalities"])).zfill(4) #fill to 4 digits
else:
self.nnUnet_info['current_modality'] = modality #CT
self.nnunet_info['current_modality'] = modality #CT
if subject_id in self.train:
self.output(subject_id, image, output_stream, nnUnet_info=self.nnUnet_info)
self.output(subject_id, image, output_stream, nnunet_info=self.nnunet_info)
else:
self.output(subject_id, image, output_stream, nnUnet_info=self.nnUnet_info, train_or_test="Ts")
self.output(subject_id, image, output_stream, nnunet_info=self.nnunet_info, train_or_test="Ts")
else:
self.output(subject_id, image, output_stream)

Expand Down Expand Up @@ -216,13 +248,13 @@ def process_one_subject(self, subject_id):
print(mask.GetSize())
mask_arr = np.transpose(sitk.GetArrayFromImage(mask))

if self.nnUnet_info:
if self.is_nnunet:
sparse_mask = mask.generate_sparse_mask().mask_array
sparse_mask = sitk.GetImageFromArray(sparse_mask) #convert the nparray to sitk image
if subject_id in self.train:
self.output(subject_id, sparse_mask, output_stream, nnUnet_info=self.nnUnet_info, label_or_image="labels") #rtstruct is label for nnunet
self.output(subject_id, sparse_mask, output_stream, nnunet_info=self.nnunet_info, label_or_image="labels") #rtstruct is label for nnunet
else:
self.output(subject_id, sparse_mask, output_stream, nnUnet_info=self.nnUnet_info, label_or_image="labels", train_or_test="Ts")
self.output(subject_id, sparse_mask, output_stream, nnunet_info=self.nnunet_info, label_or_image="labels", train_or_test="Ts")
else:
# if there is only one ROI, sitk.GetArrayFromImage() will return a 3d array instead of a 4d array with one slice
if len(mask_arr.shape) == 3:
Expand Down Expand Up @@ -300,7 +332,7 @@ def run(self):
verbose = 51 if self.show_progress else 0

subject_ids = self._get_loader_subject_ids()
if self.nnUnet_info:
if self.is_nnunet:
self.num_subjects = len(subject_ids)
self.train, self.test = train_test_split(subject_ids, train_size=self.train_size, random_state=self.random_state)
# Note that returning any SimpleITK object in process_one_subject is
Expand All @@ -327,28 +359,28 @@ def run(self):
# modalities="CT,RTSTRUCT",
# visualize=False,
# overwrite=True,
# nnUnet_info={"study name": "NSCLC-Radiomics-Interobserver1"})
# nnunet_info={"study name": "NSCLC-Radiomics-Interobserver1"})
# pipeline = AutoPipeline(input_directory="C:/Users/qukev/BHKLAB/dataset/manifest-1598890146597/NSCLC-Radiomics-Interobserver1",
# output_directory="C:/Users/qukev/BHKLAB/autopipelineoutput",
# modalities="CT,RTSTRUCT",
# visualize=False,
# overwrite=True,
# nnUnet_info={"study name": "NSCLC-Radiomics-Interobserver1"})
# nnunet_info={"study name": "NSCLC-Radiomics-Interobserver1"})

pipeline = AutoPipeline(input_directory="C:/Users/qukev/BHKLAB/hnscc_testing/HNSCC",
output_directory="C:/Users/qukev/BHKLAB/hnscc_testing_output",
modalities="CT,RTSTRUCT",
visualize=False,
overwrite=True,
nnUnet_info={"study name": "TCIA-HNSCC"},
is_nnunet=True,
train_size=0.5)

# pipeline = AutoPipeline(input_directory="C:/Users/qukev/BHKLAB/dataset/manifest-1598890146597/NSCLC-Radiomics-Interobserver1",
# output_directory="C:/Users/qukev/BHKLAB/autopipelineoutput",
# modalities="CT,RTSTRUCT",
# visualize=False,
# overwrite=True,
# nnUnet_info={"study name": "NSCLC-Radiomics-Interobserver1"},
# nnunet_info={"study name": "NSCLC-Radiomics-Interobserver1"},
# train_size=0.5)

# pipeline = AutoPipeline(input_directory="C:/Users/qukev/BHKLAB/hnscc_pet/PET",
Expand Down
4 changes: 2 additions & 2 deletions imgtools/autopipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,15 @@ def run(self):
def main():
args = parser()
if args.nnunet_study_name:
nnUnet_info = {"study name": args.nnunet_study_name}
nnunet_info = {"study name": args.nnunet_study_name}
pipeline = AutoPipeline(args.input_directory,
args.output_directory,
modalities=args.modalities,
spacing=args.spacing,
n_jobs=args.n_jobs,
visualize=args.visualize,
show_progress=args.show_progress,
nnUnet_info=nnUnet_info)
nnunet_info=nnunet_info)

print(f'starting Pipeline...')
pipeline.run()
Expand Down
10 changes: 5 additions & 5 deletions imgtools/io/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ def __init__(self, root_directory, filename_format="{subject_id}.nii.gz", create
#delete the folder called {subject_id} that was made in the original BaseWriter / the one named {label_or_image}


def put(self, subject_id, image, is_mask=False, nnUnet_info=None, label_or_image: str = "images", mask_label="", train_or_test: str = "Tr", **kwargs):
def put(self, subject_id, image, is_mask=False, nnunet_info=None, label_or_image: str = "images", mask_label="", train_or_test: str = "Tr", **kwargs):
if is_mask:
self.filename_format = mask_label+".nii.gz" #save the mask labels as their rtstruct names
if nnUnet_info:
if nnunet_info:
if label_or_image == "labels":
filename = f"{nnUnet_info['study name']}_{nnUnet_info['index']}.nii.gz" #naming convention for labels
filename = f"{subject_id}.nii.gz" #naming convention for labels
else:
# f"{nnUnet_info['study name']}_{nnUnet_info['index']}_{nnUnet_info['modalities'][nnUnet_info['current_modality']]}.nii.gz"
filename = self.filename_format.format(study_name=nnUnet_info['study name'], index=nnUnet_info['index'], modality_index=nnUnet_info['modalities'][nnUnet_info['current_modality']]) #naming convention for images
# f"{nnunet_info['study name']}_{nnunet_info['index']}_{nnunet_info['modalities'][nnunet_info['current_modality']]}.nii.gz"
filename = self.filename_format.format(subject_id=subject_id, modality_index=nnunet_info['modalities'][nnunet_info['current_modality']]) #naming convention for images
out_path = self._get_path_from_subject_id(filename, label_or_image=label_or_image, train_or_test=train_or_test)
else:
out_path = self._get_path_from_subject_id(self.filename_format, subject_id=subject_id)
Expand Down
10 changes: 5 additions & 5 deletions imgtools/ops/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ class ImageAutoOutput:
def __init__(self,
root_directory: str,
output_streams: List[str],
nnUnet_info: Dict = None):
nnunet_info: Dict = None):

# File types
self.file_name = file_name_convention()
Expand All @@ -333,12 +333,12 @@ def __init__(self,
# Not considering colnames ending with alphanumeric
colname_process = ("_").join([item for item in colname.split("_") if item.isnumeric()==False])
extension = self.file_name[colname_process]
if not nnUnet_info:
if not nnunet_info:
self.output[colname_process] = ImageSubjectFileOutput(pathlib.Path(root_directory,"{subject_id}",extension.split(".")[0]).as_posix(),
filename_format=colname_process+"{}.nii.gz".format(extension))
else:
self.output[colname_process] = ImageSubjectFileOutput(pathlib.Path(root_directory,"{label_or_image}{train_or_test}").as_posix(),
filename_format="{study_name}_{index}_{modality_index}.nii.gz")
filename_format="{subject_id}_{modality_index}.nii.gz")

def __call__(self,
subject_id: str,
Expand All @@ -348,9 +348,9 @@ def __call__(self,
mask_label: Optional[str] = "",
label_or_image: str="images",
train_or_test: str="Tr",
nnUnet_info: Dict=None):
nnunet_info: Dict=None):

self.output[output_stream](subject_id, img, is_mask=is_mask, mask_label=mask_label, label_or_image=label_or_image, train_or_test=train_or_test, nnUnet_info=nnUnet_info)
self.output[output_stream](subject_id, img, is_mask=is_mask, mask_label=mask_label, label_or_image=label_or_image, train_or_test=train_or_test, nnunet_info=nnunet_info)

class NumpyOutput(BaseOutput):
"""NumpyOutput class processed images as NumPy files.
Expand Down
4 changes: 2 additions & 2 deletions imgtools/utils/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def parser():
parser.add_argument("--show_progress", action="store_true",
help="Whether to print progress to standard output.")

parser.add_argument("--nnunet_study_name", type=str, default=None,
help="Name of the study to be used for nn-Unet.")
parser.add_argument("--nnunet", default=False, action="store_true",
help="Whether to make the output conform to nnunet requirements.")

parser.add_argument("--train_size", type=float, default=1.0,
help="The proportion of data to be used for training, as a decimal.")
Expand Down

0 comments on commit 8571b68

Please sign in to comment.