Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] alias sig fileinfo to sig summarize; minor docs, test cleanup #1863

Merged
merged 7 commits into from
Mar 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions doc/command-line.md
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,9 @@ databases, LCA databases, and directory hierarchies.
`sourmash sig fileinfo` provides optional JSON and YAML output, and
those formats are under semantic versioning.

Note: `sourmash signature summarize` is an alias for `fileinfo`; they are
the same command.

### `sourmash signature split` - split signatures into individual files

Split each signature in the input file(s) into individual files, with
Expand Down Expand Up @@ -1315,8 +1318,14 @@ sourmash sig manifest tests/test-data/prot/all.zip -o manifest.csv
will create a CSV file, `manifest.csv`, in the internal sourmash
manifest format. The manifest will contain an entry for every
signature in the file, database, or collection. This format is largely
meant for internal use, but it can serve as a picklist pickfile for
subsetting large collections.
meant for internal use, but it can serve as a
[picklist pickfile](#using-picklists-to-subset-large-collections-of-signatures)
for subsetting large collections.

By default, `sourmash sig manifest` will rebuild the manifest by
iterating over the signatures in the input file. This can be slow for
large collections. Use `--no-rebuild-manifest` to load an existing
manifest if it is available.

## Advanced command-line usage

Expand Down
1 change: 1 addition & 0 deletions src/sourmash/cli/sig/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from . import filter
from . import flatten
from . import fileinfo
from . import fileinfo as summarize
from . import kmers
from . import intersect
from . import manifest
Expand Down
17 changes: 16 additions & 1 deletion src/sourmash/cli/sig/fileinfo.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
"""provide summary information on the given file"""

usage="""

sourmash sig fileinfo <filename>

This will provide a summary of the sketch contents in the given file.

JSON output can be generated in place of the normal human-readable output
with '--json-out'.

'sig summarize' and 'sig fileinfo' are aliases for the same command.

"""



def subparser(subparsers):
subparser = subparsers.add_parser('fileinfo')
subparser = subparsers.add_parser('fileinfo', aliases=['summarize'],
usage=usage)
subparser.add_argument('path')
subparser.add_argument(
'-q', '--quiet', action='store_true',
Expand Down
17 changes: 16 additions & 1 deletion src/sourmash/cli/sig/manifest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
"""create a manifest for a collection of signatures"""

usage="""

sourmash sig manifest <filename> -o manifest.csv

This will output a sourmash manifest in CSV format. This manifest
can be used as a picklist with --picklist manifest.csv::manifest.

The manifest will be rebuilt by iterating over the signatures in the
file unless --no-rebuild-manifest is specified; for large
collections, rebuilding the manifest can take a long time!

See also the 'describe' and 'fileinfo' commands under 'sourmash sig'.

"""


def subparser(subparsers):
subparser = subparsers.add_parser('manifest')
subparser = subparsers.add_parser('manifest', usage=usage)
subparser.add_argument('location')
subparser.add_argument(
'-q', '--quiet', action='store_true',
Expand Down
5 changes: 4 additions & 1 deletion src/sourmash/sig/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,15 +284,18 @@ def manifest(args):

rebuild = True
if args.no_rebuild_manifest:
debug("sig manifest: not forcing rebuild.")
rebuild = False
else:
debug("sig manifest: forcing rebuild.")

manifest = sourmash_args.get_manifest(loader, require=True,
rebuild=rebuild)

with open(args.output, "w", newline='') as csv_fp:
manifest.write_to_csv(csv_fp, write_header=True)

notify(f"built manifest for {len(manifest)} signatures total.")
notify(f"manifest contains {len(manifest)} signatures total.")
notify(f"wrote manifest to '{args.output}'")


Expand Down
4 changes: 2 additions & 2 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -1231,7 +1231,7 @@ def test_sig_extract_1(runtmp):
assert actual_extract_sig == test_extract_sig


def test_sig_extract_1(runtmp):
def test_sig_extract_1_from_file(runtmp):
# run sig extract with --from-file
c = runtmp

Expand Down Expand Up @@ -2366,7 +2366,7 @@ def test_sig_flatten_1(runtmp):
assert test_flattened.minhash == siglist[0].minhash


def test_sig_flatten_1(runtmp):
def test_sig_flatten_1_from_file(runtmp):
c = runtmp

# extract matches to several names from among several signatures & flatten
Expand Down
34 changes: 26 additions & 8 deletions tests/test_cmd_signature_fileinfo.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
"""
Tests for the 'sourmash signature fileinfo' command line.
"""
import csv
import shutil
import os
import glob

import pytest
import screed
import json

import sourmash_tst_utils as utils
import sourmash
from sourmash.signature import load_signatures
from sourmash.manifest import CollectionManifest
from sourmash_tst_utils import SourmashCommandFailed

## command line tests
Expand Down Expand Up @@ -43,6 +37,30 @@ def test_fileinfo_1_sig(runtmp):
assert line.strip() in out


def test_fileinfo_1_sig_summarize(runtmp):
# get basic info on a signature with 'summarize' as alias for fileinfo
sig47 = utils.get_test_data('47.fa.sig')

shutil.copyfile(sig47, runtmp.output('sig47.sig'))
runtmp.run_sourmash('sig', 'summarize', 'sig47.sig')

out = runtmp.last_result.out
print(runtmp.last_result.out)

expected_output = """\
path filetype: MultiIndex
location: sig47.sig
is database? no
has manifest? yes
num signatures: 1
total hashes: 5177
summary of sketches:
1 sketches with DNA, k=31, scaled=1000 5177
""".splitlines()
for line in expected_output:
assert line.strip() in out


def test_fileinfo_1_sig_abund(runtmp):
# get basic info on a signature with abundance
sig47 = utils.get_test_data('47.abunds.fa.sig')
Expand Down Expand Up @@ -126,7 +144,7 @@ def test_fileinfo_4_zip(runtmp):
print(runtmp.last_result.out)

# 'location' will be fully resolved, ignore it for now
expected_output = f"""\
expected_output = """\
path filetype: ZipFileLinearIndex
is database? yes
has manifest? yes
Expand Down Expand Up @@ -187,7 +205,7 @@ def test_fileinfo_4_zip_rebuild(runtmp):
# CTB: note we're missing one of the 8 in the rebuilt, dna-sig.noext,
# because it is not automatically included unless you load the zipfile
# with traverse. This is intentional.
expected_output = f"""\
expected_output = """\
path filetype: ZipFileLinearIndex
is database? yes
has manifest? yes
Expand Down
6 changes: 3 additions & 3 deletions tests/test_sourmash_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def test_save_signatures_to_location_3_zip_add_with_manifest(runtmp):

# construct & save manifest
mf = manifest.CollectionManifest([row])
mf_name = f"SOURMASH-MANIFEST.csv"
mf_name = "SOURMASH-MANIFEST.csv"

manifest_fp = io.StringIO()
mf.write_to_csv(manifest_fp, write_header=True)
Expand Down Expand Up @@ -461,8 +461,8 @@ def _signatures_with_internal(self):
assert m.rows[0]['internal_location'] == "fakeiloc"


def test_get_manifest_3_build():
# check that manifest is building
def test_get_manifest_3_build_2():
# check that manifest is building, but only when asked
sig47 = utils.get_test_data('47.fa.sig')
ss47 = sourmash.load_one_signature(sig47)

Expand Down