diff --git a/.ci/install_cargo.sh b/.ci/install_cargo.sh index 94ec2b9beb..8635836ef6 100755 --- a/.ci/install_cargo.sh +++ b/.ci/install_cargo.sh @@ -1,5 +1,10 @@ #! /bin/sh -curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable +curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain=stable +rustup show export PATH="$HOME/.cargo/bin:$PATH" rustc -V rustup target add aarch64-apple-darwin + +# update crates.io index without updating Cargo.lock +export CARGO_NET_GIT_FETCH_WITH_CLI=true +cargo update --dry-run diff --git a/.github/workflows/build_wheel_all_archs.yml b/.github/workflows/build_wheel_all_archs.yml index 4ee93748b1..fbf00af36e 100644 --- a/.github/workflows/build_wheel_all_archs.yml +++ b/.github/workflows/build_wheel_all_archs.yml @@ -1,6 +1,7 @@ -name: cibuildwheel +name: cibuildwheel_ubuntu on: + #pull_request: # use for testing modifications to this action push: branches: [latest] tags: v* @@ -70,6 +71,7 @@ jobs: CIBW_ENVIRONMENT_MACOS: ${{ matrix.macos_target }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: ${{ matrix.arch }} + CARGO_NET_GIT_FETCH_WITH_CLI: true - uses: actions/upload-artifact@v3 with: diff --git a/Cargo.lock b/Cargo.lock index 701fd6ece9..799f84f204 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -138,9 +138,9 @@ dependencies = [ [[package]] name = "capnp" -version = "0.14.5" +version = "0.14.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16c262726f68118392269a3f7a5546baf51dcfe5cb3c3f0957b502106bf1a065" +checksum = "2dca085c2c7d9d65ad749d450b19b551efaa8e3476a439bdca07aca8533097f3" [[package]] name = "cast" @@ -1087,9 +1087,9 @@ checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d" [[package]] name = "serde" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e53f64bb4ba0191d6d0676e1b141ca55047d83b74f5607e6d8eb88126c52c2dc" +checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055" dependencies = [ "serde_derive", ] @@ -1106,9 +1106,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55492425aa53521babf6137309e7d34c20bbfbbfcfe2c7f3a047fd1f6b92c0c" +checksum = "b4eae9b04cbffdfd550eb462ed33bc6a1b68c935127d008b27444d08380f94e4" dependencies = [ "proc-macro2", "quote", diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index 20af5a36e7..f9926a8e2c 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -38,7 +38,7 @@ nohash-hasher = "0.2.0" num-iter = "0.1.43" once_cell = "1.3.1" # once_cell 1.14+ requires Rust 1.56+ rayon = { version = "1.6.0", optional = true } -serde = { version = "1.0.148", features = ["derive"] } +serde = { version = "1.0.149", features = ["derive"] } serde_json = "1.0.89" primal-check = "0.3.1" thiserror = "1.0" diff --git a/src/sourmash/index/sqlite_index.py b/src/sourmash/index/sqlite_index.py index 26b26e25ae..16020b17e2 100644 --- a/src/sourmash/index/sqlite_index.py +++ b/src/sourmash/index/sqlite_index.py @@ -405,7 +405,7 @@ def find(self, search_fn, query, **kwargs): if picklist is None or subj in picklist: yield IndexSearchResult(score, subj, self.location) - def select(self, *, num=0, track_abundance=False, **kwargs): + def _select(self, *, num=0, track_abundance=False, **kwargs): "Run a select! This just modifies the manifest." # check SqliteIndex specific conditions on the 'select' if num: @@ -421,9 +421,14 @@ def select(self, *, num=0, track_abundance=False, **kwargs): # modify manifest manifest = manifest.select_to_manifest(**kwargs) + return manifest + + def select(self, *args, **kwargs): + sqlite_manifest = self._select(*args, **kwargs) + # return a new SqliteIndex with a new manifest, but same old conn. return SqliteIndex(self.dbfile, - sqlite_manifest=manifest, + sqlite_manifest=sqlite_manifest, conn=self.conn) # @@ -921,10 +926,10 @@ class LCA_SqliteDatabase(SqliteIndex): """ is_database = True - def __init__(self, dbfile, *, lineage_db=None): + def __init__(self, dbfile, *, lineage_db=None, sqlite_manifest=None): # CTB note: we need to let SqliteIndex open dbfile here, so can't # just pass in a conn. - super().__init__(dbfile) + super().__init__(dbfile, sqlite_manifest=sqlite_manifest) c = self.conn.cursor() @@ -1033,6 +1038,14 @@ def _build_index(self): def insert(self, *args, **kwargs): raise NotImplementedError + # return correct type on select + def select(self, *args, **kwargs): + sqlite_manifest = self._select(*args, **kwargs) + + return LCA_SqliteDatabase(self.dbfile, + sqlite_manifest=sqlite_manifest, + lineage_db=self.lineage_db) + ### LCA_Database API/protocol. def downsample_scaled(self, scaled): diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py index 62932d9a76..3a465b18c1 100644 --- a/src/sourmash/sourmash_args.py +++ b/src/sourmash/sourmash_args.py @@ -926,19 +926,14 @@ def get_manifest(idx, *, require=True, rebuild=False): # def _get_signatures_from_rust(siglist): - for ss in siglist: - try: - ss.md5sum() - yield ss - except sourmash.exceptions.Panic: - # this deals with a disconnect between the way Rust - # and Python handle signatures; Python expects one - # minhash (and hence one md5sum) per signature, while - # Rust supports multiple. For now, go through serializing - # and deserializing the signature! See issue #1167 for more. - json_str = sourmash.save_signatures([ss]) - for ss in sourmash.load_signatures(json_str): - yield ss + # this deals with a disconnect between the way Rust + # and Python handle signatures; Python expects one + # minhash (and hence one md5sum) per signature, while + # Rust supports multiple. For now, go through serializing + # and deserializing the signature! See issue #1167 for more. + json_str = sourmash.save_signatures(siglist) + for ss in sourmash.load_signatures(json_str): + yield ss class _BaseSaveSignaturesToLocation: diff --git a/tests/conftest.py b/tests/conftest.py index f92db587d4..3281133cd5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -75,6 +75,10 @@ def lca_db_format(request): def manifest_db_format(request): return request.param +@pytest.fixture(params=['sig', 'sig.gz', 'zip', '.d/', '.sqldb']) +def sig_save_extension(request): + return request.param + # --- BEGIN - Only run tests using a particular fixture --- # # Cribbed from: http://pythontesting.net/framework/pytest/pytest-run-tests-using-particular-fixture/ diff --git a/tests/test_sourmash_sketch.py b/tests/test_sourmash_sketch.py index fd299a82ed..fc4cb2373a 100644 --- a/tests/test_sourmash_sketch.py +++ b/tests/test_sourmash_sketch.py @@ -218,6 +218,33 @@ def test_dna_defaults(): sig.minhash +def test_dna_multiple_ksize(): + factory = _signatures_for_sketch_factory(['k=21,k=31,k=51'], 'dna') + params_list = list(factory.get_compute_params()) + + assert len(params_list) == 1 + params = params_list[0] + + assert params.ksizes == [21,31,51] + assert params.num_hashes == 0 + assert params.scaled == 1000 + assert not params.track_abundance + assert params.seed == 42 + assert params.dna + assert not params.dayhoff + assert not params.hp + assert not params.protein + + from sourmash.sourmash_args import _get_signatures_from_rust + + siglist = factory() + ksizes = set() + for ss in _get_signatures_from_rust(siglist): + ksizes.add(ss.minhash.ksize) + + assert ksizes == {21, 31, 51} + + def test_dna_override_1(): factory = _signatures_for_sketch_factory(['k=21,scaled=2000,abund'], 'dna') @@ -272,6 +299,7 @@ def test_dna_override_bad_2(): with pytest.raises(ValueError): factory = _signatures_for_sketch_factory(['k=21,protein'], 'dna') + def test_protein_defaults(): factory = _signatures_for_sketch_factory([], 'protein') params_list = list(factory.get_compute_params()) @@ -338,6 +366,7 @@ def test_dayhoff_override_bad_2(): with pytest.raises(ValueError): factory = _signatures_for_sketch_factory(['k=21,dna'], 'dayhoff') + def test_hp_defaults(): factory = _signatures_for_sketch_factory([], 'hp') params_list = list(factory.get_compute_params()) @@ -853,6 +882,22 @@ def test_do_sourmash_sketchdna_multik(runtmp): assert 31 in ksizes +def test_do_sourmash_sketchdna_multik_output(runtmp, sig_save_extension): + testdata1 = utils.get_test_data('short.fa') + outfile = runtmp.output(f'out.{sig_save_extension}') + runtmp.sourmash('sketch', 'dna', '-p', 'k=31,k=21', testdata1, + '-o', outfile) + + print("saved to file/path with extension:", outfile) + assert os.path.exists(outfile) + + siglist = list(sourmash.load_file_as_signatures(outfile)) + assert len(siglist) == 2 + ksizes = set([ x.minhash.ksize for x in siglist ]) + assert 21 in ksizes + assert 31 in ksizes + + def test_do_sketch_dna_override_protein_fail(runtmp): testdata1 = utils.get_test_data('short.fa') diff --git a/tests/test_sqlite_index.py b/tests/test_sqlite_index.py index f0c32ddeab..74c4692c06 100644 --- a/tests/test_sqlite_index.py +++ b/tests/test_sqlite_index.py @@ -795,6 +795,17 @@ def test_sqlite_lca_db_load_existing(): assert len(siglist) == 2 +def test_sqlite_lca_db_select(): + # try loading an existing sqlite index + filename = utils.get_test_data('sqlite/lca.sqldb') + sqlidx = sourmash.load_file_as_index(filename) + assert isinstance(sqlidx, LCA_SqliteDatabase) + + sqlidx2 = sqlidx.select(ksize=31) + x = list(sqlidx2.hashvals) # only on LCA_SqliteDatabase + assert isinstance(sqlidx2, LCA_SqliteDatabase) + + def test_sqlite_lca_db_create_load_existing(runtmp): # try creating (from CLI) then loading (from API) an LCA db filename = runtmp.output('lca.sqldb')