changed os.path.join to pathlib.Path.as_posix()

Former-commit-id: 042aa1f
bhklab · May 25, 2022 · f10f579 · f10f579
1 parent a783dca
commit f10f579
Show file tree

Hide file tree

Showing 14 changed files with 44 additions and 54 deletions.
diff --git a/examples/process_general.py b/examples/process_general.py
@@ -1,4 +1,4 @@
-import os
+import os, pathlib
 import glob
 from numpy import mod
 import pandas as pd
@@ -78,7 +78,7 @@ def __init__(self,
         for colname in self.column_names:
             colname_process = ("_").join(colname.split("_")[1:])
             extension = file_name[colname_process]
-            self.output.append(ImageFileOutput(os.path.join(self.output_directory,extension.split(".")[0]),
+            self.output.append(ImageFileOutput(pathlib.Path(self.output_directory,extension.split(".")[0]).as_posix(),
                                             filename_format="{subject_id}_"+"{}.nrrd".format(extension)))
 
     def process_one_subject(self, subject_id):

diff --git a/examples/radcure_simple.py b/examples/radcure_simple.py
@@ -1,4 +1,4 @@
-import os
+import os, pathlib
 from argparse import ArgumentParser
 
 from imgtools.io import (ImageFileLoader, ImageFileWriter,
@@ -74,25 +74,25 @@ def __init__(self,
 
         # output ops
         self.image_output = ImageFileOutput(
-            os.path.join(self.output_directory, "images"), # where to save the processed images
+            pathlib.Path(self.output_directory, "images"), # where to save the processed image.as_posix()s
             filename_format="{subject_id}_image.nrrd",     # the filename template, {subject_id} will be replaced by each subject's ID at runtime
             create_dirs=True,                              # whether to create directories that don't exists already
             compress=True                                  # enable compression for NRRD format
         )
         self.mask_output = ImageFileOutput(
-            os.path.join(self.output_directory, "masks"),
+            pathlib.Path(self.output_directory, "masks").as_posix(),
             filename_format="{subject_id}_mask.nrrd",
             create_dirs=True,
             compress=True
         )
         self.dose_output = ImageFileOutput(
-            os.path.join(self.output_directory, "doses"),
+            pathlib.Path(self.output_directory, "doses").as_posix(),
             filename_format="{subject_id}_dose.nrrd",
             create_dirs=True,
             compress=True
         )
         self.petscan_output = ImageFileOutput(
-            os.path.join(self.output_directory, "petscan"),
+            pathlib.Path(self.output_directory, "petscan").as_posix(),
             filename_format="{subject_id}_petscan.nrrd",
             create_dirs=True,
             compress=True

diff --git a/imgtools/autopipeline.py b/imgtools/autopipeline.py
@@ -48,7 +48,7 @@ def __init__(self,
         #input operations
         self.input = ImageAutoInput(input_directory, modalities, n_jobs, visualize)
 
-        self.output_df_path = os.path.join(self.output_directory, "dataset.csv")
+        self.output_df_path = pathlib.Path(self.output_directory, "dataset.csv").as_posix()
         #Output component table
         self.output_df = self.input.df_combined
         #Name of the important columns which needs to be saved    
@@ -62,8 +62,8 @@ def __init__(self,
         self.output = ImageAutoOutput(self.output_directory, self.output_streams)
 
         #Make a directory
-        if not os.path.exists(os.path.join(self.output_directory,".temp")):
-            os.mkdir(os.path.join(self.output_directory,".temp"))
+        if not os.path.exists(pathlib.Path(self.output_directory,".temp").as_posix()):
+            os.mkdir(pathlib.Path(self.output_directory,".temp").as_posix())
 
 
     def process_one_subject(self, subject_id):
@@ -79,7 +79,7 @@ def process_one_subject(self, subject_id):
            The ID of subject to process
         """
         #Check if the subject_id has already been processed
-        if os.path.exists(os.path.join(self.output_directory,".temp",f'temp_{subject_id}.pkl')):
+        if os.path.exists(pathlib.Path(self.output_directory,".temp",f'temp_{subject_id}.pkl').as_posix()):
             print(f"{subject_id} already processed")
             return 
 
@@ -173,20 +173,20 @@ def process_one_subject(self, subject_id):
                 metadata[f"metadata_{colname}"] = [read_results[i].get_metadata()]
                 print(subject_id, " SAVED PET")
         #Saving all the metadata in multiple text files
-        with open(os.path.join(self.output_directory,".temp",f'{subject_id}.pkl'),'wb') as f:
+        with open(pathlib.Path(self.output_directory,".temp",f'{subject_id}.pkl').as_posix(),'wb') as f:
             pickle.dump(metadata,f)
         return 
 
     def save_data(self):
-        files = glob.glob(os.path.join(self.output_directory, ".temp", "*.pkl"))
+        files = glob.glob(pathlib.Path(self.output_directory, ".temp", "*.pkl").as_posix())
         for file in files:
             filename = pathlib.Path(file).name
             subject_id = os.path.splitext(filename)[0]
             with open(file,"rb") as f:
                 metadata = pickle.load(f)
             self.output_df.loc[subject_id, list(metadata.keys())] = list(metadata.values())
         self.output_df.to_csv(self.output_df_path)
-        shutil.rmtree(os.path.join(self.output_directory, ".temp"))
+        shutil.rmtree(pathlib.Path(self.output_directory, ".temp").as_posix())
 
     def run(self):
         """Execute the pipeline, possibly in parallel.

diff --git a/imgtools/io/common.py b/imgtools/io/common.py
@@ -1,4 +1,4 @@
-import os
+import os, pathlib
 from typing import Dict
 
 from pydicom.misc import is_dicom
@@ -28,11 +28,11 @@ def find_dicom_paths(root_path: str, yield_directories: bool = False) -> str:
     # TODO add some filtering options
     for root, _, files in os.walk(root_path):
         if yield_directories:
-            if any((is_dicom(os.path.join(root, f)) for f in files)):
+            if any((is_dicom(pathlib.Path(root, f).as_posix()) for f in files)):
                 yield root
         else:
             for f in files:
-                fpath = os.path.join(root, f)
+                fpath = pathlib.Path(root, f).as_posix()
                 if is_dicom(fpath):
                     yield fpath
 

diff --git a/imgtools/io/dataset.py b/imgtools/io/dataset.py
@@ -1,6 +1,5 @@
 from genericpath import exists
-import os
-import pathlib
+import os, pathlib
 from typing import List, Sequence, Optional, Callable, Iterable, Dict,Tuple
 from tqdm import tqdm
 
@@ -41,7 +40,7 @@ def load_from_nrrd(
             path: Path to the output directory passed to the autopipeline script. The output directory should have all the user mentioned modalities processed and present in their folder. The directory
                   should additionally have dataset.csv which stores all the metadata
         """
-        path_metadata = os.path.join(path,"dataset.csv")
+        path_metadata = pathlib.Path(path,"dataset.csv").as_posix()
         if not os.path.exists(path_metadata):
             raise ValueError("The specified path has no file name {}".format(path_metadata))
         df_metadata = pd.read_csv(path_metadata,index_col=0)
@@ -71,7 +70,7 @@ def load_from_nrrd(
                 else:
                     extension = file_names[col]
                     extra = ""
-                path_mod = os.path.join(path,extension.split(".")[0],f"{subject_id}_{extra}{extension}.nrrd")
+                path_mod = pathlib.Path(path,extension.split(".")[0],f"{subject_id}_{extra}{extension}.nrrd").as_posix()
                 #All modalities except RTSTRUCT should be of type torchIO.ScalarImage
                 if os.path.exists(path_mod):
                     if col.split("_")[0]!="RTSTRUCT":

diff --git a/imgtools/io/loaders.py b/imgtools/io/loaders.py
@@ -191,7 +191,7 @@ def __init__(self,
         self.exclude_paths = []
         for path in exclude_paths:
             if not path.startswith(self.root_directory):
-                full_paths = glob.glob(os.path.join(root_directory, path))
+                full_paths = glob.glob(pathlib.Path(root_directory, path).as_posix())
                 self.exclude_paths.extend(full_paths)
             else:
                 full_path = path
@@ -207,15 +207,15 @@ def _generate_paths(self):
                 continue
             subject_dir_path = f.path
             if self.subdir_path:
-                full_path = os.path.join(subject_dir_path, self.subdir_path)
+                full_path = pathlib.Path(subject_dir_path, self.subdir_path).as_posix()
             else:
                 full_path = subject_dir_path
             try:
                 full_path = glob.glob(full_path)[0]
             except IndexError:
                 continue
             if os.path.isdir(full_path):
-                full_path = os.path.join(full_path, "")
+                full_path = pathlib.Path(full_path, "").as_posix()
             subject_dir_name = os.path.basename(os.path.normpath(subject_dir_path))
             subject_id = self._extract_subject_id_from_path(full_path, subject_dir_name)
             paths[subject_id] = full_path

diff --git a/imgtools/io/writers.py b/imgtools/io/writers.py
@@ -1,4 +1,4 @@
-import os
+import os, pathlib
 import json
 import csv
 import pickle
@@ -36,7 +36,7 @@ def _get_path_from_subject_id(self, subject_id, **kwargs):
                                                    time=time,
                                                    date_time=date_time,
                                                    **kwargs)
-        out_path = os.path.join(self.root_directory, out_filename)
+        out_path = pathlib.Path(self.root_directory, out_filename).as_posix()
         out_dir = os.path.dirname(out_path)
         if self.create_dirs and not os.path.exists(out_dir):
             os.makedirs(out_dir, exist_ok=True) # create subdirectories if specified in filename_format
@@ -68,7 +68,7 @@ def _get_path_from_subject_id(self, subject_id, **kwargs):
         # out_filename = self.filename_format.format(subject_id=subject_id, **kwargs)
         self.root_directory = self.root_directory.format(subject_id=subject_id,
                                                          **kwargs)
-        out_path = os.path.join(self.root_directory, self.filename_format)
+        out_path = pathlib.Path(self.root_directory, self.filename_format).as_posix()
         out_dir = os.path.dirname(out_path)
         if self.create_dirs and not os.path.exists(out_dir):
             os.makedirs(out_dir, exist_ok=True) # create subdirectories if specified in filename_format
@@ -205,7 +205,7 @@ def __init__(self, root_directory, filename_format="{subject_id}.json", create_d
             raise ValueError(f"File format {self.file_format} not supported. Supported formats: JSON (.json), CSV (.csv), Pickle (.pkl).")
 
         if self.file_format == "csv" and self.remove_existing:
-            out_path = os.path.join(self.root_directory, self.filename_format)
+            out_path = pathlib.Path(self.root_directory, self.filename_format).as_posix()
             if os.path.exists(out_path):
                 os.remove(out_path) # remove existing CSV instead of appending
 

diff --git a/imgtools/modules/datagraph.py b/imgtools/modules/datagraph.py
@@ -1,10 +1,9 @@
-import os, time
+import os, time, pathlib
 from typing import List
 from functools import reduce
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
-import pathlib
 
 
 class DataGraph:
@@ -119,7 +118,7 @@ def visualize_graph(self):
             node["value"] = len(neigbour_map[node['id']])
 
 
-        vis_path = os.path.join(os.path.dirname(self.edge_path),"datanet.html")
+        vis_path = pathlib.Path(os.path.dirname(self.edge_path),"datanet.html").as_posix()
         data_net.show(vis_path)
 
     def _form_edge_study(self, df, all_study, study_id):

diff --git a/imgtools/modules/dose.py b/imgtools/modules/dose.py
@@ -1,4 +1,4 @@
-import os
+import os, pathlib
 import warnings
 
 import numpy as np
@@ -34,7 +34,7 @@ def from_dicom_rtdose(cls, path):
         # dose = sitk.ReadImage(glob.glob(path + "/*"))
 
         #Get the metadata
-        dcm_path = os.path.join(path, os.listdir(path)[0])
+        dcm_path = pathlib.Path(path, os.listdir(path)[0]).as_posix()
         df = dcmread(dcm_path)
 
         #Convert to SUV

diff --git a/imgtools/modules/pet.py b/imgtools/modules/pet.py
@@ -1,4 +1,4 @@
-import os
+import os, pathlib
 import warnings
 import datetime
 from typing import Optional
@@ -39,7 +39,7 @@ def from_dicom_pet(cls, path,series_id=None,type="SUV"):
         have some error.
         '''
         pet      = read_image(path,series_id)
-        path_one = os.path.join(path,os.listdir(path)[0])
+        path_one = pathlib.Path(path,os.listdir(path)[0]).as_posix()
         df       = dcmread(path_one)
         calc     = False
         try:

diff --git a/imgtools/ops/ops.py b/imgtools/ops/ops.py
@@ -85,7 +85,7 @@ def __init__(self,
         ####### CRAWLER ############
         # Checks if dataset has already been indexed
         # To be changed later
-        path_crawl = os.path.join(self.parent, ".imgtools", f"imgtools_{self.dataset_name}.csv")
+        path_crawl = pathlib.Path(self.parent, ".imgtools", f"imgtools_{self.dataset_name}.csv").as_posix()
         if not os.path.exists(path_crawl):
             print("Couldn't find the dataset index CSV. Indexing the dataset...")
             db = crawl(self.dir_path, n_jobs=n_jobs)
@@ -95,7 +95,7 @@ def __init__(self,
 
         ####### GRAPH ##########
         # Form the graph
-        edge_path = os.path.join(self.parent,".imgtools",f"imgtools_{self.dataset_name}_edges.csv")
+        edge_path = pathlib.Path(self.parent,".imgtools",f"imgtools_{self.dataset_name}_edges.csv").as_posix()
         graph = DataGraph(path_crawl=path_crawl, edge_path=edge_path, visualize=visualize)
         print(f"Forming the graph based on the given modalities: {self.modalities}")
         self.df_combined = graph.parser(self.modalities)
@@ -334,16 +334,10 @@ def __init__(self,
             # Not considering colnames ending with alphanumeric
             colname_process = ("_").join([item for item in colname.split("_") if item.isnumeric()==False])
             extension = self.file_name[colname_process]
-            self.output[colname_process] = ImageSubjectFileOutput(os.path.join(root_directory,"{subject_id}",extension.split(".")[0]),
+            self.output[colname_process] = ImageSubjectFileOutput(pathlib.Path(root_directory,"{subject_id}",extension.split(".")[0]).as_posix(),
                                                                     filename_format=colname_process+"{}.nii.gz".format(extension))
             # self.output[colname_process] = ImageFileOutput(os.path.join(root_directory,extension.split(".")[0]),
             #                                                filename_format="{subject_id}_"+"{}.nrrd".format(extension))
-            # if not is_mask:
-            #         self.output[colname_process] = ImageSubjectFileOutput(os.path.join(root_directory,"{subject_id}",extension.split(".")[0]),
-            #                                                               filename_format=colname_process+"{}.nii.gz".format(extension))
-            # else:
-            #     self.output[colname_process] = ImageSubjectFileOutput(os.path.join(root_directory,"{subject_id}",extension.split(".")[0]),
-            #                                                             filename_format=mask_label+"{}.nii.gz".format(extension),)
 
     def __call__(self, 
                  subject_id: str,

diff --git a/imgtools/utils/crawl.py b/imgtools/utils/crawl.py
@@ -13,7 +13,7 @@ def crawl_one(folder):
     database = {}
     for path, _, _ in os.walk(folder):
         # find dicoms
-        dicoms = glob.glob(os.path.join(path, "*.dcm"))
+        dicoms = glob.glob(pathlib.Path(path, "*.dcm").as_posix())
 
         # instance (slice) information
         for dcm in dicoms:
@@ -107,9 +107,9 @@ def crawl(top,
           n_jobs: int = -1):
     #top is the input directory in the argument parser from autotest.py
     database_list = []
-    folders = glob.glob(os.path.join(top, "*"))
+    folders = glob.glob(pathlib.Path(top, "*").as_posix())
 
-    database_list = Parallel(n_jobs=n_jobs)(delayed(crawl_one)(os.path.join(top, folder)) for folder in tqdm(folders))
+    database_list = Parallel(n_jobs=n_jobs)(delayed(crawl_one)(pathlib.Path(top, folder).as_posix()) for folder in tqdm(folders))
 
     # convert list to dictionary
     database_dict = {}
@@ -120,18 +120,18 @@ def crawl(top,
     # save one level above imaging folders
     parent, dataset  = os.path.split(top)
 
-    parent_imgtools = os.path.join(parent, ".imgtools")
+    parent_imgtools = pathlib.Path(parent, ".imgtools").as_posix()
 
     if not os.path.exists(parent_imgtools):
         os.makedirs(parent_imgtools)
 
     # save as json
-    with open(os.path.join(parent_imgtools, f'imgtools_{dataset}.json'), 'w') as f:
+    with open(pathlib.Path(parent_imgtools, f'imgtools_{dataset}.json').as_posix(), 'w') as f:
         json.dump(database_dict, f, indent=4)
 
     # save as dataframe
     df = to_df(database_dict)
-    df_path = os.path.join(parent_imgtools, f'imgtools_{dataset}.csv')
+    df_path = pathlib.Path(parent_imgtools, f'imgtools_{dataset}.csv').as_posix()
     df.to_csv(df_path)
 
     return database_dict

diff --git a/tests/test_components.py b/tests/test_components.py
@@ -1,6 +1,5 @@
-import os
+import os, pathlib
 import shutil
-import pathlib
 import urllib.request as request
 from zipfile import ZipFile
 import torchio as tio

diff --git a/tests/test_modalities.py b/tests/test_modalities.py
@@ -3,11 +3,10 @@
 '''
 
 
-import os
+import os, pathlib
 from posixpath import dirname
 import shutil
 import warnings
-import pathlib
 from multiprocessing import cpu_count
 
 import numpy as np