From a78361dbd342f5610904f21b1ed36da93b2a69d7 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 28 Mar 2022 17:57:41 -0700 Subject: [PATCH] [MRG] minor improvements to `Index` tests (#1900) * add -d/--debug to various commands * initial implementation of StandaloneManifestIndex * support prefix if not abspath * clean up * some standalone manifests tests - incl CLI * iterate over internal locations instead * switch to picklist API * aaaaand swap out for load_file_as_index :tada: * remove unnecessary spaces * more tests * more better prefix test * remove unnec space * upgrade output error messages * fix SBT subdir loading error * add message about using --debug * doc etc * rationalize _signatures_with_internal * test describe and fileinfo on manifests * think through more manifest stuff * fix descr * rationalize _signatures_with_internal * fix docstring * add heading anchors config; fix napoleon package ref * pin versions for doc building * fix internal refs * fix one last ref target * add docs * clarify language * add docs * add more/better tests for lazy loading * clarify * a few more tests * update docs * cleanup and comment on index code * minor improvements to Index tests * add explicit test for lazy-loading prefetch on StandaloneManifestIndex * add explicit test for lazy-loading prefetch on StandaloneManifestIndex * update comments/docstrings * Apply suggestions from code review Co-authored-by: Tessa Pierce Ward Co-authored-by: Tessa Pierce Ward --- src/sourmash/index/__init__.py | 6 +++--- tests/test_index.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/sourmash/index/__init__.py b/src/sourmash/index/__init__.py index 5ae2f2c921..60f1940c36 100644 --- a/src/sourmash/index/__init__.py +++ b/src/sourmash/index/__init__.py @@ -938,7 +938,7 @@ def sigloc_iter(): for ss in idx.signatures(): yield ss, iloc - # build manifest; note, signatures are stored in memory. + # build manifest; note, ALL signatures are stored in memory. # CTB: could do this on demand? # CTB: should we use get_manifest functionality? # CTB: note here that the manifest is created by iteration @@ -976,11 +976,11 @@ def load_from_directory(cls, pathname, *, force=False): rel = os.path.relpath(thisfile, pathname) source_list.append(rel) - except (IOError, sourmash.exceptions.SourmashError): + except (IOError, sourmash.exceptions.SourmashError) as exc: if force: continue # ignore error else: - raise # stop loading! + raise ValueError(exc) # stop loading! # did we load anything? if not, error if not index_list: diff --git a/tests/test_index.py b/tests/test_index.py index 919d079112..2ac1ab42e4 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -1312,8 +1312,21 @@ def test_multi_index_load_from_directory_2(): assert len(sigs) == 7 +def test_multi_index_load_from_directory_3_simple_bad_file(runtmp): + # check that force=False fails properly when confronted with non-JSON + # files. + c = runtmp + + with open(runtmp.output('badsig.sig'), 'wt') as fp: + fp.write('bad content.') + + with pytest.raises(ValueError): + mi = MultiIndex.load_from_directory(runtmp.location, force=False) + + def test_multi_index_load_from_directory_3(runtmp): - # check that force works ok on a directory + # check that force=False fails properly when confronted with non-JSON + # files that are legit sourmash files... c = runtmp dirname = utils.get_test_data('prot') @@ -1327,12 +1340,13 @@ def test_multi_index_load_from_directory_3(runtmp): shutil.copyfile(fullname, copyto) count += 1 - with pytest.raises(sourmash.exceptions.SourmashError): + with pytest.raises(ValueError): mi = MultiIndex.load_from_directory(c.location, force=False) def test_multi_index_load_from_directory_3_yield_all_true(runtmp): # check that force works ok on a directory w/force=True + # Note here that only .sig/.sig.gz files are loaded. c = runtmp dirname = utils.get_test_data('prot') @@ -1353,7 +1367,8 @@ def test_multi_index_load_from_directory_3_yield_all_true(runtmp): def test_multi_index_load_from_directory_3_yield_all_true_subdir(runtmp): - # check that force works ok on subdirectories + # check that force works ok on subdirectories. + # Note here that only .sig/.sig.gz files are loaded. c = runtmp dirname = utils.get_test_data('prot') @@ -1371,6 +1386,9 @@ def test_multi_index_load_from_directory_3_yield_all_true_subdir(runtmp): mi = MultiIndex.load_from_directory(c.location, force=True) + locations = set([ row['internal_location'] for row in mi.manifest.rows ]) + print(locations) + sigs = list(mi.signatures()) assert len(sigs) == 8 @@ -1466,6 +1484,7 @@ def test_multi_index_load_from_pathlist_1(runtmp): def test_multi_index_load_from_pathlist_2(runtmp): # create a pathlist file with _all_ files under dir, and try to load it. + # this will fail on one of several CSV or .sh files in there. # CTB note: if you create extra files under this directory, # it will fail :) @@ -1479,9 +1498,12 @@ def test_multi_index_load_from_pathlist_2(runtmp): with open(file_list, 'wt') as fp: print("\n".join(files), file=fp) - with pytest.raises(ValueError): + with pytest.raises(ValueError) as exc: mi = MultiIndex.load_from_pathlist(file_list) + print(str(exc)) + assert 'Error while reading signatures from' in str(exc) + def test_multi_index_load_from_pathlist_3_zipfile(runtmp): # can we load zipfiles in a pathlist? yes please.