Skip to content

Commit

Permalink
Merge branch 'latest' into debias-everything
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Dec 7, 2022
2 parents f4b7c14 + b2d2980 commit 0a80d9f
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 26 deletions.
7 changes: 6 additions & 1 deletion .ci/install_cargo.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#! /bin/sh
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain=stable
rustup show
export PATH="$HOME/.cargo/bin:$PATH"
rustc -V
rustup target add aarch64-apple-darwin

# update crates.io index without updating Cargo.lock
export CARGO_NET_GIT_FETCH_WITH_CLI=true
cargo update --dry-run
4 changes: 3 additions & 1 deletion .github/workflows/build_wheel_all_archs.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: cibuildwheel
name: cibuildwheel_ubuntu

on:
#pull_request: # use for testing modifications to this action
push:
branches: [latest]
tags: v*
Expand Down Expand Up @@ -70,6 +71,7 @@ jobs:
CIBW_ENVIRONMENT_MACOS: ${{ matrix.macos_target }}
CIBW_ARCHS_LINUX: ${{ matrix.arch }}
CIBW_ARCHS_MACOS: ${{ matrix.arch }}
CARGO_NET_GIT_FETCH_WITH_CLI: true

- uses: actions/upload-artifact@v3
with:
Expand Down
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ nohash-hasher = "0.2.0"
num-iter = "0.1.43"
once_cell = "1.3.1" # once_cell 1.14+ requires Rust 1.56+
rayon = { version = "1.6.0", optional = true }
serde = { version = "1.0.148", features = ["derive"] }
serde = { version = "1.0.149", features = ["derive"] }
serde_json = "1.0.89"
primal-check = "0.3.1"
thiserror = "1.0"
Expand Down
21 changes: 17 additions & 4 deletions src/sourmash/index/sqlite_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def find(self, search_fn, query, **kwargs):
if picklist is None or subj in picklist:
yield IndexSearchResult(score, subj, self.location)

def select(self, *, num=0, track_abundance=False, **kwargs):
def _select(self, *, num=0, track_abundance=False, **kwargs):
"Run a select! This just modifies the manifest."
# check SqliteIndex specific conditions on the 'select'
if num:
Expand All @@ -421,9 +421,14 @@ def select(self, *, num=0, track_abundance=False, **kwargs):
# modify manifest
manifest = manifest.select_to_manifest(**kwargs)

return manifest

def select(self, *args, **kwargs):
sqlite_manifest = self._select(*args, **kwargs)

# return a new SqliteIndex with a new manifest, but same old conn.
return SqliteIndex(self.dbfile,
sqlite_manifest=manifest,
sqlite_manifest=sqlite_manifest,
conn=self.conn)

#
Expand Down Expand Up @@ -921,10 +926,10 @@ class LCA_SqliteDatabase(SqliteIndex):
"""
is_database = True

def __init__(self, dbfile, *, lineage_db=None):
def __init__(self, dbfile, *, lineage_db=None, sqlite_manifest=None):
# CTB note: we need to let SqliteIndex open dbfile here, so can't
# just pass in a conn.
super().__init__(dbfile)
super().__init__(dbfile, sqlite_manifest=sqlite_manifest)

c = self.conn.cursor()

Expand Down Expand Up @@ -1033,6 +1038,14 @@ def _build_index(self):
def insert(self, *args, **kwargs):
raise NotImplementedError

# return correct type on select
def select(self, *args, **kwargs):
sqlite_manifest = self._select(*args, **kwargs)

return LCA_SqliteDatabase(self.dbfile,
sqlite_manifest=sqlite_manifest,
lineage_db=self.lineage_db)

### LCA_Database API/protocol.

def downsample_scaled(self, scaled):
Expand Down
21 changes: 8 additions & 13 deletions src/sourmash/sourmash_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,19 +926,14 @@ def get_manifest(idx, *, require=True, rebuild=False):
#

def _get_signatures_from_rust(siglist):
for ss in siglist:
try:
ss.md5sum()
yield ss
except sourmash.exceptions.Panic:
# this deals with a disconnect between the way Rust
# and Python handle signatures; Python expects one
# minhash (and hence one md5sum) per signature, while
# Rust supports multiple. For now, go through serializing
# and deserializing the signature! See issue #1167 for more.
json_str = sourmash.save_signatures([ss])
for ss in sourmash.load_signatures(json_str):
yield ss
# this deals with a disconnect between the way Rust
# and Python handle signatures; Python expects one
# minhash (and hence one md5sum) per signature, while
# Rust supports multiple. For now, go through serializing
# and deserializing the signature! See issue #1167 for more.
json_str = sourmash.save_signatures(siglist)
for ss in sourmash.load_signatures(json_str):
yield ss


class _BaseSaveSignaturesToLocation:
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ def lca_db_format(request):
def manifest_db_format(request):
return request.param

@pytest.fixture(params=['sig', 'sig.gz', 'zip', '.d/', '.sqldb'])
def sig_save_extension(request):
return request.param


# --- BEGIN - Only run tests using a particular fixture --- #
# Cribbed from: http://pythontesting.net/framework/pytest/pytest-run-tests-using-particular-fixture/
Expand Down
45 changes: 45 additions & 0 deletions tests/test_sourmash_sketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,33 @@ def test_dna_defaults():
sig.minhash


def test_dna_multiple_ksize():
factory = _signatures_for_sketch_factory(['k=21,k=31,k=51'], 'dna')
params_list = list(factory.get_compute_params())

assert len(params_list) == 1
params = params_list[0]

assert params.ksizes == [21,31,51]
assert params.num_hashes == 0
assert params.scaled == 1000
assert not params.track_abundance
assert params.seed == 42
assert params.dna
assert not params.dayhoff
assert not params.hp
assert not params.protein

from sourmash.sourmash_args import _get_signatures_from_rust

siglist = factory()
ksizes = set()
for ss in _get_signatures_from_rust(siglist):
ksizes.add(ss.minhash.ksize)

assert ksizes == {21, 31, 51}


def test_dna_override_1():
factory = _signatures_for_sketch_factory(['k=21,scaled=2000,abund'],
'dna')
Expand Down Expand Up @@ -272,6 +299,7 @@ def test_dna_override_bad_2():
with pytest.raises(ValueError):
factory = _signatures_for_sketch_factory(['k=21,protein'], 'dna')


def test_protein_defaults():
factory = _signatures_for_sketch_factory([], 'protein')
params_list = list(factory.get_compute_params())
Expand Down Expand Up @@ -338,6 +366,7 @@ def test_dayhoff_override_bad_2():
with pytest.raises(ValueError):
factory = _signatures_for_sketch_factory(['k=21,dna'], 'dayhoff')


def test_hp_defaults():
factory = _signatures_for_sketch_factory([], 'hp')
params_list = list(factory.get_compute_params())
Expand Down Expand Up @@ -853,6 +882,22 @@ def test_do_sourmash_sketchdna_multik(runtmp):
assert 31 in ksizes


def test_do_sourmash_sketchdna_multik_output(runtmp, sig_save_extension):
testdata1 = utils.get_test_data('short.fa')
outfile = runtmp.output(f'out.{sig_save_extension}')
runtmp.sourmash('sketch', 'dna', '-p', 'k=31,k=21', testdata1,
'-o', outfile)

print("saved to file/path with extension:", outfile)
assert os.path.exists(outfile)

siglist = list(sourmash.load_file_as_signatures(outfile))
assert len(siglist) == 2
ksizes = set([ x.minhash.ksize for x in siglist ])
assert 21 in ksizes
assert 31 in ksizes


def test_do_sketch_dna_override_protein_fail(runtmp):
testdata1 = utils.get_test_data('short.fa')

Expand Down
11 changes: 11 additions & 0 deletions tests/test_sqlite_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,17 @@ def test_sqlite_lca_db_load_existing():
assert len(siglist) == 2


def test_sqlite_lca_db_select():
# try loading an existing sqlite index
filename = utils.get_test_data('sqlite/lca.sqldb')
sqlidx = sourmash.load_file_as_index(filename)
assert isinstance(sqlidx, LCA_SqliteDatabase)

sqlidx2 = sqlidx.select(ksize=31)
x = list(sqlidx2.hashvals) # only on LCA_SqliteDatabase
assert isinstance(sqlidx2, LCA_SqliteDatabase)


def test_sqlite_lca_db_create_load_existing(runtmp):
# try creating (from CLI) then loading (from API) an LCA db
filename = runtmp.output('lca.sqldb')
Expand Down

0 comments on commit 0a80d9f

Please sign in to comment.