Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] minor improvements to Index tests #1900

Merged
merged 52 commits into from
Mar 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
cc0db3a
add -d/--debug to various commands
ctb Jan 4, 2022
906ef0b
initial implementation of StandaloneManifestIndex
ctb Mar 22, 2022
72e9523
support prefix if not abspath
ctb Mar 22, 2022
0d79fb6
clean up
ctb Mar 23, 2022
b65e428
some standalone manifests tests - incl CLI
ctb Mar 23, 2022
56a31ad
iterate over internal locations instead
ctb Mar 23, 2022
1cfaab8
switch to picklist API
ctb Mar 23, 2022
da27a1b
aaaaand swap out for load_file_as_index :tada:
ctb Mar 23, 2022
a031d57
remove unnecessary spaces
ctb Mar 23, 2022
9a939a1
more tests
ctb Mar 23, 2022
47b1f40
more better prefix test
ctb Mar 23, 2022
a75815e
remove unnec space
ctb Mar 24, 2022
8a77943
Merge branch 'latest' of https://github.com/sourmash-bio/sourmash int…
ctb Mar 25, 2022
e1a1975
upgrade output error messages
ctb Mar 25, 2022
da682b2
Merge branch 'add/debug' into add/manifestindex
ctb Mar 25, 2022
6104a18
fix SBT subdir loading error
ctb Mar 25, 2022
d9d3bff
add message about using --debug
ctb Mar 25, 2022
112dd3b
Merge branch 'add/debug' into add/manifestindex
ctb Mar 25, 2022
e34bd1a
Merge branch 'add/test_sbt_load_fail' into add/manifestindex
ctb Mar 25, 2022
87b72b8
doc etc
ctb Mar 25, 2022
f4546de
rationalize _signatures_with_internal
ctb Mar 25, 2022
cd9e670
test describe and fileinfo on manifests
ctb Mar 25, 2022
5147dc4
think through more manifest stuff
ctb Mar 25, 2022
50e87c2
Merge branch 'latest' of https://github.com/sourmash-bio/sourmash int…
ctb Mar 25, 2022
59cfe9f
fix descr
ctb Mar 25, 2022
84b200b
rationalize _signatures_with_internal
ctb Mar 25, 2022
bdff48b
Merge branch 'refactor/mf_internal' into add/manifestindex
ctb Mar 25, 2022
3b591b8
fix docstring
ctb Mar 25, 2022
7e6caa9
add heading anchors config; fix napoleon package ref
ctb Mar 25, 2022
785e7c9
pin versions for doc building
ctb Mar 25, 2022
3e6872a
fix internal refs
ctb Mar 25, 2022
e8763b9
fix one last ref target
ctb Mar 25, 2022
6ead927
add docs
ctb Mar 25, 2022
85b2c12
clarify language
ctb Mar 25, 2022
de0b7b2
add docs
ctb Mar 25, 2022
b03ba2f
add more/better tests for lazy loading
ctb Mar 25, 2022
c1ada69
clarify
ctb Mar 25, 2022
1bd133d
a few more tests
ctb Mar 25, 2022
ab882cc
Merge branch 'fix/docs' into add/manifestindex
ctb Mar 26, 2022
c6a7e24
update docs
ctb Mar 26, 2022
38ece63
cleanup and comment on index code
ctb Mar 26, 2022
2924ede
minor improvements to Index tests
ctb Mar 26, 2022
cc1598e
add explicit test for lazy-loading prefetch on StandaloneManifestIndex
ctb Mar 26, 2022
38593b6
add explicit test for lazy-loading prefetch on StandaloneManifestIndex
ctb Mar 26, 2022
c126437
Merge branch 'latest' of https://github.com/sourmash-bio/sourmash int…
ctb Mar 27, 2022
a463fd0
update comments/docstrings
ctb Mar 27, 2022
d89bc40
Merge branch 'add/manifestindex' into cleanup/index
ctb Mar 27, 2022
f2aca85
Merge branch 'cleanup/index' into cleanup/index_2
ctb Mar 27, 2022
841ebf1
Merge branch 'latest' of https://github.com/sourmash-bio/sourmash int…
ctb Mar 28, 2022
d1d7042
Merge branch 'cleanup/index' into cleanup/index_2
ctb Mar 28, 2022
93d38ef
Merge branch 'latest' of https://github.com/sourmash-bio/sourmash int…
ctb Mar 29, 2022
68c7edb
Apply suggestions from code review
ctb Mar 29, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/sourmash/index/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ def sigloc_iter():
for ss in idx.signatures():
yield ss, iloc

# build manifest; note, signatures are stored in memory.
# build manifest; note, ALL signatures are stored in memory.
# CTB: could do this on demand?
# CTB: should we use get_manifest functionality?
# CTB: note here that the manifest is created by iteration
Expand Down Expand Up @@ -976,11 +976,11 @@ def load_from_directory(cls, pathname, *, force=False):

rel = os.path.relpath(thisfile, pathname)
source_list.append(rel)
except (IOError, sourmash.exceptions.SourmashError):
except (IOError, sourmash.exceptions.SourmashError) as exc:
if force:
continue # ignore error
else:
raise # stop loading!
raise ValueError(exc) # stop loading!

# did we load anything? if not, error
if not index_list:
Expand Down
30 changes: 26 additions & 4 deletions tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1312,8 +1312,21 @@ def test_multi_index_load_from_directory_2():
assert len(sigs) == 7


def test_multi_index_load_from_directory_3_simple_bad_file(runtmp):
# check that force=False fails properly when confronted with non-JSON
# files.
c = runtmp

with open(runtmp.output('badsig.sig'), 'wt') as fp:
fp.write('bad content.')

with pytest.raises(ValueError):
mi = MultiIndex.load_from_directory(runtmp.location, force=False)


def test_multi_index_load_from_directory_3(runtmp):
# check that force works ok on a directory
# check that force=False fails properly when confronted with non-JSON
# files that are legit sourmash files...
c = runtmp

dirname = utils.get_test_data('prot')
Expand All @@ -1327,12 +1340,13 @@ def test_multi_index_load_from_directory_3(runtmp):
shutil.copyfile(fullname, copyto)
count += 1

with pytest.raises(sourmash.exceptions.SourmashError):
with pytest.raises(ValueError):
mi = MultiIndex.load_from_directory(c.location, force=False)


def test_multi_index_load_from_directory_3_yield_all_true(runtmp):
# check that force works ok on a directory w/force=True
# Note here that only .sig/.sig.gz files are loaded.
c = runtmp

dirname = utils.get_test_data('prot')
Expand All @@ -1353,7 +1367,8 @@ def test_multi_index_load_from_directory_3_yield_all_true(runtmp):


def test_multi_index_load_from_directory_3_yield_all_true_subdir(runtmp):
# check that force works ok on subdirectories
# check that force works ok on subdirectories.
# Note here that only .sig/.sig.gz files are loaded.
c = runtmp
dirname = utils.get_test_data('prot')

Expand All @@ -1371,6 +1386,9 @@ def test_multi_index_load_from_directory_3_yield_all_true_subdir(runtmp):

mi = MultiIndex.load_from_directory(c.location, force=True)

locations = set([ row['internal_location'] for row in mi.manifest.rows ])
print(locations)

sigs = list(mi.signatures())
assert len(sigs) == 8

Expand Down Expand Up @@ -1466,6 +1484,7 @@ def test_multi_index_load_from_pathlist_1(runtmp):

def test_multi_index_load_from_pathlist_2(runtmp):
# create a pathlist file with _all_ files under dir, and try to load it.
# this will fail on one of several CSV or .sh files in there.

# CTB note: if you create extra files under this directory,
# it will fail :)
Expand All @@ -1479,9 +1498,12 @@ def test_multi_index_load_from_pathlist_2(runtmp):
with open(file_list, 'wt') as fp:
print("\n".join(files), file=fp)

with pytest.raises(ValueError):
with pytest.raises(ValueError) as exc:
mi = MultiIndex.load_from_pathlist(file_list)

print(str(exc))
assert 'Error while reading signatures from' in str(exc)


def test_multi_index_load_from_pathlist_3_zipfile(runtmp):
# can we load zipfiles in a pathlist? yes please.
Expand Down