From 6f25fbfe121270b1c52ca4321e131cbbb75e35a7 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Thu, 1 Dec 2022 15:35:51 -0800 Subject: [PATCH 1/8] [MRG] fix return type of `LCA_SqliteDatabase.select` (#2382) Fixes https://github.com/sourmash-bio/sourmash/issues/2380 --- src/sourmash/index/sqlite_index.py | 21 +++++++++++++++++---- tests/test_sqlite_index.py | 11 +++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/sourmash/index/sqlite_index.py b/src/sourmash/index/sqlite_index.py index 26b26e25ae..16020b17e2 100644 --- a/src/sourmash/index/sqlite_index.py +++ b/src/sourmash/index/sqlite_index.py @@ -405,7 +405,7 @@ def find(self, search_fn, query, **kwargs): if picklist is None or subj in picklist: yield IndexSearchResult(score, subj, self.location) - def select(self, *, num=0, track_abundance=False, **kwargs): + def _select(self, *, num=0, track_abundance=False, **kwargs): "Run a select! This just modifies the manifest." # check SqliteIndex specific conditions on the 'select' if num: @@ -421,9 +421,14 @@ def select(self, *, num=0, track_abundance=False, **kwargs): # modify manifest manifest = manifest.select_to_manifest(**kwargs) + return manifest + + def select(self, *args, **kwargs): + sqlite_manifest = self._select(*args, **kwargs) + # return a new SqliteIndex with a new manifest, but same old conn. return SqliteIndex(self.dbfile, - sqlite_manifest=manifest, + sqlite_manifest=sqlite_manifest, conn=self.conn) # @@ -921,10 +926,10 @@ class LCA_SqliteDatabase(SqliteIndex): """ is_database = True - def __init__(self, dbfile, *, lineage_db=None): + def __init__(self, dbfile, *, lineage_db=None, sqlite_manifest=None): # CTB note: we need to let SqliteIndex open dbfile here, so can't # just pass in a conn. - super().__init__(dbfile) + super().__init__(dbfile, sqlite_manifest=sqlite_manifest) c = self.conn.cursor() @@ -1033,6 +1038,14 @@ def _build_index(self): def insert(self, *args, **kwargs): raise NotImplementedError + # return correct type on select + def select(self, *args, **kwargs): + sqlite_manifest = self._select(*args, **kwargs) + + return LCA_SqliteDatabase(self.dbfile, + sqlite_manifest=sqlite_manifest, + lineage_db=self.lineage_db) + ### LCA_Database API/protocol. def downsample_scaled(self, scaled): diff --git a/tests/test_sqlite_index.py b/tests/test_sqlite_index.py index f0c32ddeab..74c4692c06 100644 --- a/tests/test_sqlite_index.py +++ b/tests/test_sqlite_index.py @@ -795,6 +795,17 @@ def test_sqlite_lca_db_load_existing(): assert len(siglist) == 2 +def test_sqlite_lca_db_select(): + # try loading an existing sqlite index + filename = utils.get_test_data('sqlite/lca.sqldb') + sqlidx = sourmash.load_file_as_index(filename) + assert isinstance(sqlidx, LCA_SqliteDatabase) + + sqlidx2 = sqlidx.select(ksize=31) + x = list(sqlidx2.hashvals) # only on LCA_SqliteDatabase + assert isinstance(sqlidx2, LCA_SqliteDatabase) + + def test_sqlite_lca_db_create_load_existing(runtmp): # try creating (from CLI) then loading (from API) an LCA db filename = runtmp.output('lca.sqldb') From fa463074121a46fea55cd74d831c5c3565af620c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Thu, 1 Dec 2022 22:02:44 -0800 Subject: [PATCH 2/8] [MRG] fixing cross-platform cibuildwheel action for release (#2384) --- .ci/install_cargo.sh | 6 +++++- .github/workflows/build_wheel_all_archs.yml | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.ci/install_cargo.sh b/.ci/install_cargo.sh index 94ec2b9beb..a29d5c82cb 100755 --- a/.ci/install_cargo.sh +++ b/.ci/install_cargo.sh @@ -1,5 +1,9 @@ #! /bin/sh -curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable +curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain=stable +rustup show export PATH="$HOME/.cargo/bin:$PATH" rustc -V rustup target add aarch64-apple-darwin + +export CARGO_NET_GIT_FETCH_WITH_CLI=true +cargo update diff --git a/.github/workflows/build_wheel_all_archs.yml b/.github/workflows/build_wheel_all_archs.yml index 4ee93748b1..98a2ecf279 100644 --- a/.github/workflows/build_wheel_all_archs.yml +++ b/.github/workflows/build_wheel_all_archs.yml @@ -1,6 +1,7 @@ -name: cibuildwheel +name: cibuildwheel_ubuntu on: + pull_request: # CTB remove before merge! push: branches: [latest] tags: v* @@ -70,6 +71,7 @@ jobs: CIBW_ENVIRONMENT_MACOS: ${{ matrix.macos_target }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: ${{ matrix.arch }} + CARGO_NET_GIT_FETCH_WITH_CLI: true - uses: actions/upload-artifact@v3 with: From 281ad98ec7ee6c6b9003166e75b5ff2ece8b957f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 2 Dec 2022 05:24:40 -0800 Subject: [PATCH 3/8] [WIP] remove cibuildwheel action from PRs (#2385) --- .github/workflows/build_wheel_all_archs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_wheel_all_archs.yml b/.github/workflows/build_wheel_all_archs.yml index 98a2ecf279..fbf00af36e 100644 --- a/.github/workflows/build_wheel_all_archs.yml +++ b/.github/workflows/build_wheel_all_archs.yml @@ -1,7 +1,7 @@ name: cibuildwheel_ubuntu on: - pull_request: # CTB remove before merge! + #pull_request: # use for testing modifications to this action push: branches: [latest] tags: v* From fe7e417483a8062d9eaf89fee85b6de0442cdc16 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 2 Dec 2022 08:02:51 -0800 Subject: [PATCH 4/8] [MRG] change `cargo update` to do a dry-run only (#2388) This PR continues the cibuildwheel exploration that started with a fix for building the wheels (https://github.com/sourmash-bio/sourmash/pull/2384 and #2385). The saga continues like so: When building the release candidates, we don't get "clean" rcN wheels. I think this is because when I run `cargo update` in the cibuildwheel action it's actually updating the Rust packages and we're getting a version bump from that. Yay? (See screenshot of releases page with 'dev' yada in wheel names.) Screen Shot 2022-12-02 at 7 15 52 AM This PR changes `cargo update` to `cargo update --dry-run` which should update the crates.io index without actually changing `Cargo.lock` and pushing a version bump. We Shall See! --- .ci/install_cargo.sh | 3 ++- .github/workflows/build_wheel_all_archs.yml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.ci/install_cargo.sh b/.ci/install_cargo.sh index a29d5c82cb..8635836ef6 100755 --- a/.ci/install_cargo.sh +++ b/.ci/install_cargo.sh @@ -5,5 +5,6 @@ export PATH="$HOME/.cargo/bin:$PATH" rustc -V rustup target add aarch64-apple-darwin +# update crates.io index without updating Cargo.lock export CARGO_NET_GIT_FETCH_WITH_CLI=true -cargo update +cargo update --dry-run diff --git a/.github/workflows/build_wheel_all_archs.yml b/.github/workflows/build_wheel_all_archs.yml index fbf00af36e..993b162fb3 100644 --- a/.github/workflows/build_wheel_all_archs.yml +++ b/.github/workflows/build_wheel_all_archs.yml @@ -1,7 +1,7 @@ name: cibuildwheel_ubuntu on: - #pull_request: # use for testing modifications to this action + pull_request: # use for testing modifications to this action push: branches: [latest] tags: v* From 5e7794d3dd7e3de69eefe2c96373f997274b258f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 3 Dec 2022 11:03:22 -0800 Subject: [PATCH 5/8] [MRG] disable cibuildwheel on PRs again (#2389) Fixes leftover config from testing https://github.com/sourmash-bio/sourmash/pull/2388 --- .github/workflows/build_wheel_all_archs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_wheel_all_archs.yml b/.github/workflows/build_wheel_all_archs.yml index 993b162fb3..fbf00af36e 100644 --- a/.github/workflows/build_wheel_all_archs.yml +++ b/.github/workflows/build_wheel_all_archs.yml @@ -1,7 +1,7 @@ name: cibuildwheel_ubuntu on: - pull_request: # use for testing modifications to this action + #pull_request: # use for testing modifications to this action push: branches: [latest] tags: v* From 724ec1229023212656b9d815fa24a12e64993cd1 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 3 Dec 2022 11:45:04 -0800 Subject: [PATCH 6/8] [MRG] fix `sourmash sketch ... -o .zip` bug introduced in #2329 and released in v4.6.0 (#2391) This PR fixes a bug introduced in #2329 and released in v4.6.0. It also adds tests so that this bug does not trouble us again... The easiest fix involves serializing _every_ signature into JSON format, and then deserializing it. This may add a slight performance regression that I will work to fix in a different PR, once I get v4.6.1 out. Fixes #2390. --- src/sourmash/sourmash_args.py | 21 +++++++--------- tests/conftest.py | 4 ++++ tests/test_sourmash_sketch.py | 45 +++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py index 62932d9a76..3a465b18c1 100644 --- a/src/sourmash/sourmash_args.py +++ b/src/sourmash/sourmash_args.py @@ -926,19 +926,14 @@ def get_manifest(idx, *, require=True, rebuild=False): # def _get_signatures_from_rust(siglist): - for ss in siglist: - try: - ss.md5sum() - yield ss - except sourmash.exceptions.Panic: - # this deals with a disconnect between the way Rust - # and Python handle signatures; Python expects one - # minhash (and hence one md5sum) per signature, while - # Rust supports multiple. For now, go through serializing - # and deserializing the signature! See issue #1167 for more. - json_str = sourmash.save_signatures([ss]) - for ss in sourmash.load_signatures(json_str): - yield ss + # this deals with a disconnect between the way Rust + # and Python handle signatures; Python expects one + # minhash (and hence one md5sum) per signature, while + # Rust supports multiple. For now, go through serializing + # and deserializing the signature! See issue #1167 for more. + json_str = sourmash.save_signatures(siglist) + for ss in sourmash.load_signatures(json_str): + yield ss class _BaseSaveSignaturesToLocation: diff --git a/tests/conftest.py b/tests/conftest.py index f92db587d4..3281133cd5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -75,6 +75,10 @@ def lca_db_format(request): def manifest_db_format(request): return request.param +@pytest.fixture(params=['sig', 'sig.gz', 'zip', '.d/', '.sqldb']) +def sig_save_extension(request): + return request.param + # --- BEGIN - Only run tests using a particular fixture --- # # Cribbed from: http://pythontesting.net/framework/pytest/pytest-run-tests-using-particular-fixture/ diff --git a/tests/test_sourmash_sketch.py b/tests/test_sourmash_sketch.py index fd299a82ed..fc4cb2373a 100644 --- a/tests/test_sourmash_sketch.py +++ b/tests/test_sourmash_sketch.py @@ -218,6 +218,33 @@ def test_dna_defaults(): sig.minhash +def test_dna_multiple_ksize(): + factory = _signatures_for_sketch_factory(['k=21,k=31,k=51'], 'dna') + params_list = list(factory.get_compute_params()) + + assert len(params_list) == 1 + params = params_list[0] + + assert params.ksizes == [21,31,51] + assert params.num_hashes == 0 + assert params.scaled == 1000 + assert not params.track_abundance + assert params.seed == 42 + assert params.dna + assert not params.dayhoff + assert not params.hp + assert not params.protein + + from sourmash.sourmash_args import _get_signatures_from_rust + + siglist = factory() + ksizes = set() + for ss in _get_signatures_from_rust(siglist): + ksizes.add(ss.minhash.ksize) + + assert ksizes == {21, 31, 51} + + def test_dna_override_1(): factory = _signatures_for_sketch_factory(['k=21,scaled=2000,abund'], 'dna') @@ -272,6 +299,7 @@ def test_dna_override_bad_2(): with pytest.raises(ValueError): factory = _signatures_for_sketch_factory(['k=21,protein'], 'dna') + def test_protein_defaults(): factory = _signatures_for_sketch_factory([], 'protein') params_list = list(factory.get_compute_params()) @@ -338,6 +366,7 @@ def test_dayhoff_override_bad_2(): with pytest.raises(ValueError): factory = _signatures_for_sketch_factory(['k=21,dna'], 'dayhoff') + def test_hp_defaults(): factory = _signatures_for_sketch_factory([], 'hp') params_list = list(factory.get_compute_params()) @@ -853,6 +882,22 @@ def test_do_sourmash_sketchdna_multik(runtmp): assert 31 in ksizes +def test_do_sourmash_sketchdna_multik_output(runtmp, sig_save_extension): + testdata1 = utils.get_test_data('short.fa') + outfile = runtmp.output(f'out.{sig_save_extension}') + runtmp.sourmash('sketch', 'dna', '-p', 'k=31,k=21', testdata1, + '-o', outfile) + + print("saved to file/path with extension:", outfile) + assert os.path.exists(outfile) + + siglist = list(sourmash.load_file_as_signatures(outfile)) + assert len(siglist) == 2 + ksizes = set([ x.minhash.ksize for x in siglist ]) + assert 21 in ksizes + assert 31 in ksizes + + def test_do_sketch_dna_override_protein_fail(runtmp): testdata1 = utils.get_test_data('short.fa') From d424579553f0e467dc01fbc631fac5148968cd00 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Dec 2022 10:42:19 -0800 Subject: [PATCH 7/8] Bump capnp from 0.14.5 to 0.14.11 (#2396) Bumps [capnp](https://github.com/capnproto/capnproto-rust) from 0.14.5 to 0.14.11.
Commits
  • c1780f4 prepare for capnp-v0.14.11 release
  • 634a386 remove list pointer munging and add tests
  • 03c4338 prepare for capnp-v0.14.10 release
  • e342ce2 add comment on message::Builder::new_default()
  • 551dcdf add some comments on allocate_segment() methods
  • 8c9d41b [HeapAllocator::allocate_segment] handle case when alloc_zeroed() returns null
  • 8b8c5ec Replace some get calls by iterators or try_get method in codegen
  • 471626e Set length of vector after actually having written to it
  • 62d6a90 Remove useless ref mut on let binding
  • 88d3e03 Use a slice instead of a reference to a vector
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=capnp&package-manager=cargo&previous-version=0.14.5&new-version=0.14.11)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/sourmash-bio/sourmash/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 701fd6ece9..ba2c4799fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -138,9 +138,9 @@ dependencies = [ [[package]] name = "capnp" -version = "0.14.5" +version = "0.14.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16c262726f68118392269a3f7a5546baf51dcfe5cb3c3f0957b502106bf1a065" +checksum = "2dca085c2c7d9d65ad749d450b19b551efaa8e3476a439bdca07aca8533097f3" [[package]] name = "cast" From b2d29804f9165b1121ae05ac0a6668789a62feeb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 Dec 2022 17:33:34 -0800 Subject: [PATCH 8/8] Bump serde from 1.0.148 to 1.0.149 (#2397) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [serde](https://github.com/serde-rs/serde) from 1.0.148 to 1.0.149.
Release notes

Sourced from serde's releases.

v1.0.149

  • Relax some trait bounds from the Serialize impl of BinaryHeap, BTreeSet, and HashSet (#2333, thanks @​jonasbb)
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=serde&package-manager=cargo&previous-version=1.0.148&new-version=1.0.149)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 8 ++++---- src/core/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba2c4799fa..799f84f204 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1087,9 +1087,9 @@ checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d" [[package]] name = "serde" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e53f64bb4ba0191d6d0676e1b141ca55047d83b74f5607e6d8eb88126c52c2dc" +checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055" dependencies = [ "serde_derive", ] @@ -1106,9 +1106,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55492425aa53521babf6137309e7d34c20bbfbbfcfe2c7f3a047fd1f6b92c0c" +checksum = "b4eae9b04cbffdfd550eb462ed33bc6a1b68c935127d008b27444d08380f94e4" dependencies = [ "proc-macro2", "quote", diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index 20af5a36e7..f9926a8e2c 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -38,7 +38,7 @@ nohash-hasher = "0.2.0" num-iter = "0.1.43" once_cell = "1.3.1" # once_cell 1.14+ requires Rust 1.56+ rayon = { version = "1.6.0", optional = true } -serde = { version = "1.0.148", features = ["derive"] } +serde = { version = "1.0.149", features = ["derive"] } serde_json = "1.0.89" primal-check = "0.3.1" thiserror = "1.0"