Skip to content

Commit

Permalink
Added fix to io_utils.list_files function for instances where column … (
Browse files Browse the repository at this point in the history
#42)

* Added fix to io_utils.list_files function for instances where column numbers go past 0-9.

* forgot to add latest version of fix. now updated.

* Added double quotations for string in pattern search.

* Updated pre-commit

* Updated substr matching to account for a list of substrs

* Updated matches variable in list_files.

* Additionally updated list_folders substr matching with same fix in list_files.

* Changed list_files/folder substr to use set matching instead of pure pattern recogntion.

* Added a step to regex split to filter out empty tokens.

* Updated load_folders functionality to more closely match subsetting by strings.

* Add more encompassing test cases

* Fix formatting

---------

Co-authored-by: alex-l-kong <alkong@ucdavis.edu>
  • Loading branch information
bryjcannon and alex-l-kong authored Jan 8, 2024
1 parent 32ea69f commit e2e45e6
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 3 deletions.
17 changes: 15 additions & 2 deletions src/alpineer/io_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import itertools
import os
import pathlib
import re
import warnings
from typing import List

Expand Down Expand Up @@ -81,7 +82,13 @@ def list_files(dir_name, substrs=None, exact_match=False, ignore_hidden=True):
if any([substr == os.path.splitext(file)[0] for substr in substrs])
]
else:
matches = [file for file in files if any([substr in file for substr in substrs])]
matches = []
for substr in substrs:
substr_pattern = list(filter(bool, re.split("[^a-zA-Z0-9]", substr)))
for file in files:
file_pattern = list(filter(bool, re.split("[^a-zA-Z0-9]", file)))
if set(substr_pattern).issubset(file_pattern):
matches.append(file)

return matches

Expand Down Expand Up @@ -226,6 +233,12 @@ def list_folders(dir_name, substrs=None, exact_match=False, ignore_hidden=True):
if any([substr == os.path.splitext(folder)[0] for substr in substrs])
]
else:
matches = [folder for folder in folders if any([substr in folder for substr in substrs])]
matches = []
for substr in substrs:
substr_pattern = list(filter(bool, re.split("[^a-zA-Z0-9]", substr)))
for folder in folders:
folder_pattern = list(filter(bool, re.split("[^a-zA-Z0-9]", folder)))
if set(substr_pattern).issubset(folder_pattern):
matches.append(folder)

return matches
58 changes: 57 additions & 1 deletion tests/io_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,32 @@ def test_list_files():
)
assert sorted(get_hidden_files) == [".chan-metadata.tiff"]

# test delimiter functionality of substr matching
with tempfile.TemporaryDirectory() as temp_dir:
filenames = [
"fov1.tiff",
"fov1_test.tiff",
"fov10.tiff",
"fov2.tiff",
"fov2_test.tiff",
"fov20.tiff",
"fov3.tiff",
"fov3_test.tiff",
"fov30.tiff",
]
for filename in filenames:
pathlib.Path(os.path.join(temp_dir, filename)).touch()

# test substrs is not list (single string)
get_txt = io_utils.list_files(temp_dir, substrs="fov1")
assert sorted(get_txt) == sorted(["fov1.tiff", "fov1_test.tiff"])

# test substrs is list
get_test_and_other = io_utils.list_files(temp_dir, substrs=["fov1", "fov2"])
assert sorted(get_test_and_other) == sorted(
["fov1.tiff", "fov1_test.tiff", "fov2.tiff", "fov2_test.tiff"]
)


def test_remove_file_extensions():
# test a mixture of file paths and extensions
Expand Down Expand Up @@ -206,7 +232,7 @@ def test_list_folders():
temp_dir, substrs=["test_", "other"], exact_match=False
)
assert sorted(get_test_and_other) == sorted(
["Ntest_csv", "test_csv", "test_csv1", "test_csv2", "test_out", "othertf_txt"]
["test_csv", "test_csv1", "test_csv2", "test_out"]
)

# Test hidden files
Expand Down Expand Up @@ -243,3 +269,33 @@ def test_list_folders():
temp_dir, substrs=".hidden_dir", exact_match=True, ignore_hidden=False
)
assert get_hidden_dirs == [".hidden_dir"]

# test delimiter functionality of substr matching
with tempfile.TemporaryDirectory() as temp_dir:
dirnames = [
"test1",
"test1_folder",
"test10",
"test2",
"test2_folder",
"test20",
"test3",
"test3_folder",
"test30",
]

dirnames.sort()
for dirname in dirnames:
os.mkdir(os.path.join(temp_dir, dirname))

# test substrs is not list (single string)
get_txt = io_utils.list_folders(temp_dir, substrs="test1", exact_match=False)
assert sorted(get_txt) == sorted(["test1", "test1_folder"])

# test substrs is list
get_test_and_other = io_utils.list_folders(
temp_dir, substrs=["test1", "test2"], exact_match=False
)
assert sorted(get_test_and_other) == sorted(
["test1", "test1_folder", "test2", "test2_folder"]
)

0 comments on commit e2e45e6

Please sign in to comment.