Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase validate paths usage #796

Merged
merged 7 commits into from
Oct 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 14 additions & 90 deletions ark/phenotyping/cell_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import scipy.stats as stats

from ark.analysis import visualize
from ark.utils import misc_utils
from ark.utils import misc_utils, io_utils


def compute_cell_cluster_count_avg(cell_cluster_path, pixel_cluster_col_prefix,
Expand Down Expand Up @@ -96,10 +96,7 @@ def compute_cell_cluster_channel_avg(fovs, channels, base_dir,
"""

# verify the cell table actually exists
if not os.path.exists(os.path.join(base_dir, weighted_cell_channel_name)):
raise FileNotFoundError(
"Weighted cell table %s not found in %s" % (weighted_cell_channel_name, base_dir)
)
io_utils.validate_paths(os.path.join(base_dir, weighted_cell_channel_name))

# verify the cell cluster col specified is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -445,15 +442,8 @@ def train_cell_som(fovs, channels, base_dir, pixel_data_dir, cell_table_path,
cluster_counts_norm_path = os.path.join(base_dir, cluster_counts_norm_name)
weights_path = os.path.join(base_dir, weights_name)

# if the cell table path does not exist
if not os.path.exists(cell_table_path):
raise FileNotFoundError('Cell table path %s does not exist' %
cell_table_path)

# if the pixel data with the SOM and meta labels path does not exist
if not os.path.exists(pixel_data_path):
raise FileNotFoundError('Pixel data dir %s does not exist in base_dir %s' %
(pixel_data_path, base_dir))
# check the cell table path and pixel data path exist
io_utils.validate_paths([cell_table_path, pixel_data_path])

# verify the cluster_col provided is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -550,17 +540,8 @@ def cluster_cells(base_dir, cluster_counts_norm_name='cluster_counts_norm.feathe
weights_path = os.path.join(base_dir, weights_name)
cell_data_path = os.path.join(base_dir, cell_data_name)

# if the path to the normalized pixel cluster counts per cell doesn't exist
if not os.path.exists(cluster_counts_norm_path):
raise FileNotFoundError(
'Normalized pixel cluster counts per cell file %s does not exist in base_dir %s' %
(cluster_counts_norm_name, base_dir)
)

# if the path to the weights file does not exist
if not os.path.exists(weights_path):
raise FileNotFoundError('Weights file %s does not exist in base_dir %s' %
(weights_name, base_dir))
# check the path to the normalized pixel cluster counts per cell and weights file exists
io_utils.validate_paths([cluster_counts_norm_path, weights_path])

# verify the pixel_cluster_col_prefix provided is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -678,26 +659,8 @@ def cell_consensus_cluster(fovs, channels, base_dir, pixel_cluster_col, max_k=20
weighted_channel_path = os.path.join(base_dir, weighted_cell_channel_name)
clust_to_meta_path = os.path.join(base_dir, clust_to_meta_name)

# if the path to the SOM clustered data doesn't exist
if not os.path.exists(cell_data_path):
raise FileNotFoundError(
'Cell data file %s does not exist in base_dir %s' %
(cell_data_name, base_dir)
)

# if the path to the average pixel cluster counts per cell cluster doesn't exist
if not os.path.exists(som_cluster_counts_avg_path):
raise FileNotFoundError(
'Average pix clust count per cell SOM cluster file %s does not exist in base_dir %s' %
(cell_som_cluster_count_avgs_name, base_dir)
)

# if the path to the weighted channel data doesn't exist
if not os.path.exists(weighted_channel_path):
raise FileNotFoundError(
'Weighted channel table %s does not exist in base_dir %s' %
(weighted_cell_channel_name, base_dir)
)
# check paths
io_utils.validate_paths([cell_data_path, som_cluster_counts_avg_path, weighted_channel_path])

# verify the pixel_cluster_col provided is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -866,41 +829,10 @@ def apply_cell_meta_cluster_remapping(fovs, channels, base_dir, cell_consensus_n
meta_cluster_channel_avgs_path = os.path.join(base_dir, cell_meta_cluster_channel_avg_name)

# file path validation
if not os.path.exists(cell_consensus_path):
raise FileNotFoundError('Cell consensus file %s does not exist in base_dir %s' %
(cell_consensus_name, base_dir))

if not os.path.exists(cell_remapped_path):
raise FileNotFoundError('Cell remapping file %s does not exist in base_dir %s' %
(cell_remapped_name, base_dir))

if not os.path.exists(som_cluster_counts_avgs_path):
raise FileNotFoundError(
'Average pix clust count per cell SOM cluster file %s does not exist in base_dir %s' %
(cell_som_cluster_count_avgs_name, base_dir)
)

if not os.path.exists(meta_cluster_counts_avgs_path):
raise FileNotFoundError(
'Average pix clust count per cell meta cluster file %s does not exist in base_dir %s' %
(cell_meta_cluster_count_avgs_name, base_dir)
)

if not os.path.exists(weighted_channel_path):
raise FileNotFoundError('Weighted channel table %s does not exist in base_dir %s' %
(weighted_cell_channel_name, base_dir))

if not os.path.exists(som_cluster_channel_avgs_path):
raise FileNotFoundError(
'Average weighted chan per cell SOM cluster file %s does not exist in base_dir %s' %
(cell_som_cluster_channel_avg_name, base_dir)
)

if not os.path.exists(meta_cluster_channel_avgs_path):
raise FileNotFoundError(
'Average weighted chan per cell meta cluster file %s does not exist in base_dir %s' %
(cell_meta_cluster_channel_avg_name, base_dir)
)
io_utils.validate_paths([cell_consensus_path, cell_remapped_path,
som_cluster_counts_avgs_path, meta_cluster_counts_avgs_path,
weighted_channel_path, som_cluster_channel_avgs_path,
meta_cluster_channel_avgs_path])

# verify the pixel_cluster_col provided is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -1061,9 +993,7 @@ def generate_weighted_channel_avg_heatmap(cell_cluster_channel_avg_path, cell_cl
"""

# file path validation
if not os.path.exists(cell_cluster_channel_avg_path):
raise FileNotFoundError('Channel average path %s does not exist' %
cell_cluster_channel_avg_path)
io_utils.validate_paths(cell_cluster_channel_avg_path)

# verify the cell_cluster_col provided is valid
misc_utils.verify_in_list(
Expand Down Expand Up @@ -1138,13 +1068,7 @@ def add_consensus_labels_cell_table(base_dir, cell_table_path, cell_data_name):
cell_data_path = os.path.join(base_dir, cell_data_name)

# file path validation
if not os.path.exists(cell_table_path):
raise FileNotFoundError('Cell table file %s does not exist' %
cell_table_path)

if not os.path.exists(cell_data_path):
raise FileNotFoundError('Cell data file %s does not exist in base_dir %s' %
(cell_data_name, base_dir))
io_utils.validate_paths([cell_data_path, cell_data_path])

# read in the data, ensure sorted by FOV column just in case
cell_table = pd.read_csv(cell_table_path)
Expand Down
74 changes: 11 additions & 63 deletions ark/phenotyping/pixel_cluster_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,7 @@ def filter_with_nuclear_mask(fovs, tiff_dir, seg_dir, channel,
return

# raise an error if the provided seg_dir does not exist
if not os.path.exists(seg_dir):
raise FileNotFoundError('seg_dir %s does not exist' % seg_dir)
io_utils.validate_paths(seg_dir)

# convert to path-compatible format
if img_sub_folder is None:
Expand Down Expand Up @@ -603,17 +602,8 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir,
if subset_proportion <= 0 or subset_proportion > 1:
raise ValueError('Invalid subset percentage entered: must be in (0, 1]')

# if the base directory doesn't exist
if not os.path.exists(base_dir):
raise FileNotFoundError("base_dir %s does not exist" % base_dir)

# if the tiff dir doesn't exist
if not os.path.exists(tiff_dir):
raise FileNotFoundError("tiff_dir %s does not exist" % tiff_dir)

# if the pixel output dir doesn't exist
if not os.path.exists(os.path.join(base_dir, pixel_output_dir)):
raise FileNotFoundError("pixel_output_dir %s does not exist" % pixel_output_dir)
# path validation
io_utils.validate_paths([base_dir, tiff_dir, os.path.join(base_dir, pixel_output_dir)])

# create data_dir if it doesn't already exist
if not os.path.exists(os.path.join(base_dir, data_dir)):
Expand Down Expand Up @@ -794,9 +784,7 @@ def find_fovs_missing_col(base_dir, data_dir, missing_col):
temp_path = os.path.join(base_dir, data_dir + '_temp')

# verify the data path exists
if not os.path.exists(data_path):
raise FileNotFoundError('Data directory %s does not exist in base_dir %s' %
(data_dir, base_dir))
io_utils.validate_paths(data_path)

# if the temp path does not exist, either all the FOVs need to be run or none of them do
if not os.path.exists(temp_path):
Expand Down Expand Up @@ -883,9 +871,7 @@ def train_pixel_som(fovs, channels, base_dir,
return

# if path to the subsetted file does not exist
if not os.path.exists(subsetted_path):
raise FileNotFoundError('Pixel subsetted directory %s does not exist in base_dir %s' %
(subset_dir, base_dir))
io_utils.validate_paths(subsetted_path)

# verify that all provided fovs exist in the folder
files = io_utils.list_files(subsetted_path, substrs='.feather')
Expand Down Expand Up @@ -959,20 +945,8 @@ def cluster_pixels(fovs, channels, base_dir, data_dir='pixel_mat_data',
norm_vals_path = os.path.join(base_dir, norm_vals_name)
weights_path = os.path.join(base_dir, weights_name)

# if path to the preprocessed directory does not exist
if not os.path.exists(data_path):
raise FileNotFoundError('Pixel data directory %s does not exist in base_dir %s' %
(data_dir, base_dir))

# if path to the normalized values file does not exist
if not os.path.exists(norm_vals_path):
raise FileNotFoundError('Normalized values file %s does not exist in base_dir %s' %
(norm_vals_path, base_dir))

# if path to the weights file does not exist
if not os.path.exists(weights_path):
raise FileNotFoundError('Weights file %s does not exist in base_dir %s' %
(weights_name, base_dir))
# path validation
io_utils.validate_paths([data_path, norm_vals_path, weights_path])

# verify that all provided fovs exist in the folder
# NOTE: remove the channel and pixel normalization files as those are not pixel data
Expand Down Expand Up @@ -1126,19 +1100,8 @@ def pixel_consensus_cluster(fovs, channels, base_dir, max_k=20, cap=3,
som_cluster_avg_path = os.path.join(base_dir, pc_chan_avg_som_cluster_name)
clust_to_meta_path = os.path.join(base_dir, clust_to_meta_name)

# if the path to the SOM clustered data doesn't exist
if not os.path.exists(data_path):
raise FileNotFoundError(
'Data dir %s does not exist in base_dir %s' %
(data_dir, base_dir)
)

# if the path to the average channel expression per SOM cluster doesn't exist
if not os.path.exists(som_cluster_avg_path):
raise FileNotFoundError(
'Channel avg per SOM cluster file %s does not exist in base_dir %s' %
(pc_chan_avg_som_cluster_name, base_dir)
)
# path validation
io_utils.validate_paths([data_path, som_cluster_avg_path])

# if the path mapping SOM to meta clusters exists, don't re-run consensus clustering
if os.path.exists(clust_to_meta_path):
Expand Down Expand Up @@ -1318,23 +1281,8 @@ def apply_pixel_meta_cluster_remapping(fovs, channels, base_dir,
meta_cluster_avg_path = os.path.join(base_dir, pc_chan_avg_meta_cluster_name)

# file path validation
if not os.path.exists(pixel_data_path):
raise FileNotFoundError('Pixel data dir %s does not exist in base_dir %s' %
(pixel_data_dir, base_dir))

if not os.path.exists(pixel_remapped_path):
raise FileNotFoundError('Pixel remapping file %s does not exist in base_dir %s' %
(pixel_remapped_name, base_dir))

if not os.path.exists(som_cluster_avg_path):
raise FileNotFoundError(
'Channel average per SOM cluster file %s does not exist in base_dir %s' %
(pc_chan_avg_meta_cluster_name, base_dir))

if not os.path.exists(meta_cluster_avg_path):
raise FileNotFoundError(
'Channel average per meta cluster file %s does not exist in base_dir %s' %
(pc_chan_avg_meta_cluster_name, base_dir))
io_utils.validate_paths([pixel_data_path, pixel_remapped_path, som_cluster_avg_path,
meta_cluster_avg_path])

# read in the remapping
pixel_remapped_data = pd.read_csv(pixel_remapped_path)
Expand Down
6 changes: 3 additions & 3 deletions ark/segmentation/fiber_segmentation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_plot_fiber_segmentation_steps():
shutil.rmtree(os.path.join(temp_dir, 'image_data', fov))

# bad directory should raise an errors
with pytest.raises(ValueError):
with pytest.raises(FileNotFoundError):
_, _ = fiber_segmentation.plot_fiber_segmentation_steps('bad_dir', 'fov1', 'Collagen1')

# bad channel should raise an errors
Expand Down Expand Up @@ -46,10 +46,10 @@ def test_run_fiber_segmentation():
os.makedirs(out_dir)

# bad directories should raise an error
with pytest.raises(ValueError):
with pytest.raises(FileNotFoundError):
_ = fiber_segmentation.run_fiber_segmentation('bad_path', 'Collagen1', out_dir)

with pytest.raises(ValueError):
with pytest.raises(FileNotFoundError):
_ = fiber_segmentation.run_fiber_segmentation(img_dir, 'Collagen1', 'bad_path')

# bad subdirectory should raise an errors
Expand Down
24 changes: 5 additions & 19 deletions ark/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ def save_fov_mask(fov, data_dir, mask_data, sub_dir=None, name_suffix=''):
"""

# data_dir validation
if not os.path.exists(data_dir):
raise FileNotFoundError("data_dir %s does not exist" % data_dir)
io_utils.validate_paths(data_dir)

# ensure None is handled correctly in file path generation
if sub_dir is None:
Expand Down Expand Up @@ -161,12 +160,8 @@ def generate_cell_cluster_mask(fov, base_dir, seg_dir, cell_data_name,
"""

# path checking
if not os.path.exists(seg_dir):
raise FileNotFoundError("seg_dir %s does not exist" % seg_dir)

if not os.path.exists(os.path.join(base_dir, cell_data_name)):
raise FileNotFoundError(
"Cell data file %s does not exist in base_dir %s" % (cell_data_name, base_dir))
cell_data_path = os.path.join(os.path.join(base_dir, cell_data_name))
io_utils.validate_paths([seg_dir, cell_data_path])

# verify the cluster_col provided is valid
verify_in_list(
Expand Down Expand Up @@ -277,17 +272,8 @@ def generate_pixel_cluster_mask(fov, base_dir, tiff_dir, chan_file_path,
"""

# path checking
if not os.path.exists(tiff_dir):
raise FileNotFoundError("tiff_dir %s does not exist")

if not os.path.exists(os.path.join(tiff_dir, chan_file_path)):
raise FileNotFoundError("chan_file_path %s does not exist in tiff_dir %s"
% (chan_file_path, tiff_dir))

if not os.path.exists(os.path.join(base_dir, pixel_data_dir)):
raise FileNotFoundError(
"Pixel data dir %s does not exist in base_dir %s" % (pixel_data_dir, base_dir)
)
io_utils.validate_paths([tiff_dir, os.path.join(tiff_dir, chan_file_path),
os.path.join(base_dir, pixel_data_dir)])

# verify the pixel_cluster_col provided is valid
verify_in_list(
Expand Down
2 changes: 1 addition & 1 deletion ark/utils/data_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ def test_stitch_images_by_shape(segmentation, clustering, subdir, fovs):
os.makedirs(data_dir)

# invalid directory is provided
with pytest.raises(ValueError):
with pytest.raises(FileNotFoundError):
data_utils.stitch_images_by_shape('not_a_dir', stitched_dir)

# no fov dirs should raise an error
Expand Down
6 changes: 3 additions & 3 deletions ark/utils/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,18 @@ def validate_paths(paths, data_prefix=False):
for path in paths:
# check data prefix
if data_prefix and not str(path).startswith('../data'):
raise ValueError(
raise FileNotFoundError(
f'The path, {path}, is not prefixed with \'../data\'.\n'
f'Be sure to add all images/files/data to the \'data\' folder, '
f'and to reference as \'../data/path_to_data/myfile.tif\'')

if not os.path.exists(path):
for parent in reversed(pathlib.Path(path).parents):
if not os.path.exists(parent):
raise ValueError(
raise FileNotFoundError(
f'A bad path, {path}, was provided.\n'
f'The folder, {parent.name}, could not be found...')
raise ValueError(
raise FileNotFoundError(
f'The file/path, {pathlib.Path(path).name}, could not be found...')


Expand Down
Loading