diff --git a/sourmash/__init__.py b/sourmash/__init__.py index e021d61e94..719b83745b 100644 --- a/sourmash/__init__.py +++ b/sourmash/__init__.py @@ -28,7 +28,9 @@ from . import sbtmh from . import sbt_storage from . import signature +from . import sig from . import cli +from . import commands from pkg_resources import get_distribution, DistributionNotFound diff --git a/sourmash/__main__.py b/sourmash/__main__.py index 54fcc247c2..70428bb349 100644 --- a/sourmash/__main__.py +++ b/sourmash/__main__.py @@ -11,7 +11,7 @@ def main(arglist=None): else: mod = getattr(sourmash.cli, args.cmd) mainmethod = getattr(mod, 'main') - mainmethod(args) + return mainmethod(args) if __name__ == '__main__': diff --git a/sourmash/cli/__init__.py b/sourmash/cli/__init__.py index 148beef3f4..2a8308e4f9 100644 --- a/sourmash/cli/__init__.py +++ b/sourmash/cli/__init__.py @@ -1,24 +1,28 @@ from argparse import ArgumentParser import sys +import sourmash + from . import utils # Commands +from . import categorize from . import compare from . import compute from . import gather +from . import import_csv from . import info +from . import index +from . import multigather from . import plot from . import search +from . import watch # Subcommand groups from . import lca from . import sbt -from . import signature - - -DEFAULT_LOAD_K = 31 -VERSION = '2.2.0' +from . import sig +from . import storage class SourmashParser(ArgumentParser): @@ -36,77 +40,66 @@ def print_citation(self): notify("== Please cite Brown and Irber (2016), doi:10.21105/joss.00027. ==\n") self._citation_printed = True + def _subparser_from_name(self, name): + """Given a name, get the subparser instance registered with this parser.""" + container = self._actions + if name is None: + return None + for action in container: + if action.choices is None: + continue + elif name in action.choices: + return action.choices[name] + def parse_args(self, args=None, namespace=None): if (args is None and len(sys.argv) == 1) or (args is not None and len(args) == 0): self.print_help() - raise SystemExit(0) + raise SystemExit(1) args = super(SourmashParser, self).parse_args(args=args, namespace=namespace) if ('quiet' not in args or not args.quiet) and self.citation: self.print_citation() + if 'subcmd' in args and args.subcmd is None: + self._subparser_from_name(args.cmd).print_help() + raise SystemExit(1) + # BEGIN: dirty hacks to simultaneously support new and previous interface if hasattr(args, 'subcmd') and args.subcmd == 'import': args.subcmd = 'ingest' if hasattr(args, 'cmd') and args.cmd == 'sbt_combine': args.cmd = 'sbt' args.subcmd = 'combine' - if hasattr(args, 'cmd') and args.cmd == 'index': - args.cmd = 'sbt' - args.subcmd = 'index' - if hasattr(args, 'cmd') and args.cmd == 'categorize': + if hasattr(args, 'cmd') and args.cmd == 'migrate': args.cmd = 'sbt' - args.subcmd = 'categorize' - if hasattr(args, 'cmd') and args.cmd == 'watch': - args.subcmd = 'sbt' - args.subcmd = 'watch' + args.subcmd = 'migrate' if hasattr(args, 'subcmd') and args.subcmd == 'compare_csv': args.subcmd = 'compare' # END: dirty hacks to simultaneously support new and previous interface return args -def add_moltype_args(parser): - parser.add_argument( - '--protein', dest='protein', action='store_true', - help='choose a protein signature; by default, a nucleotide signature is used' - ) - parser.add_argument( - '--dayhoff', dest='dayhoff', action='store_true', - help='build Dayhoff-encoded amino acid signatures' - ) - parser.add_argument( - '--hp', '--hydrophobic-polar', dest='hp', action='store_true', - help='build hydrophobic-polar-encoded amino acid signatures' - ) - - -def add_ksize_arg(parser, default=21): - parser.add_argument( - '-k', '--ksize', metavar='K', default=None, type=int, - help='k-mer size; default={d}'.format(d=default) - ) - - def get_parser(): - commands = ['compute', 'compare', 'search', 'plot', 'gather', 'lca', 'sbt', 'info', 'signature'] + commands = ['compute', 'compare', 'search', 'plot', 'gather', 'index', + 'lca', 'sbt', 'info', 'sig', 'categorize', 'watch', 'storage', + 'multigather', 'migrate', 'sbt_combine', 'import_csv'] commandstr = ' -- '.join(sorted(commands)) desc = 'Compute, compare, manipulate, and analyze MinHash sketches of DNA sequences.' parser = SourmashParser(prog='sourmash', description=desc) parser._optionals.title = 'Options' - parser.add_argument('-v', '--version', action='version', version='sourmash '+ VERSION) + parser.add_argument('-v', '--version', action='version', version='sourmash '+ sourmash.VERSION) parser.add_argument('-q', '--quiet', action='store_true', help='don\'t print citation information') sub = parser.add_subparsers( title='Commands', dest='cmd', metavar='cmd', help=commandstr, description='Invoke "sourmash --help" for more details on executing each command.' ) for cmd in commands: + if cmd in ('migrate', 'sbt_combine'): + continue getattr(sys.modules[__name__], cmd).subparser(sub) # BEGIN: dirty hacks to simultaneously support new and previous interface - sbt.categorize.subparser(sub) sbt.combine.alt_subparser(sub) - sbt.index.subparser(sub) - sbt.watch.subparser(sub) + sbt.migrate.subparser(sub) # END: dirty hacks to simultaneously support new and previous interface parser._action_groups.reverse() return parser diff --git a/sourmash/cli/sbt/categorize.py b/sourmash/cli/categorize.py similarity index 70% rename from sourmash/cli/sbt/categorize.py rename to sourmash/cli/categorize.py index 11bf79149d..a15de4c36e 100644 --- a/sourmash/cli/sbt/categorize.py +++ b/sourmash/cli/categorize.py @@ -1,5 +1,7 @@ -import sourmash -from sourmash.cli.utils import add_ksize_arg +import argparse + +from sourmash.cli.utils import add_ksize_arg, add_moltype_args + def subparser(subparsers): subparser = subparsers.add_parser('categorize') @@ -22,7 +24,13 @@ def subparser(subparsers): '--ignore-abundance', action='store_true', help='do NOT use k-mer abundances if present' ) + add_moltype_args(subparser) + + # TODO: help messages in these + subparser.add_argument('--csv', type=argparse.FileType('at')) + subparser.add_argument('--load-csv', default=None) def main(args): - print(args) + import sourmash + return sourmash.commands.categorize(args) diff --git a/sourmash/cli/compare.py b/sourmash/cli/compare.py index e55b925763..8b2dc54853 100644 --- a/sourmash/cli/compare.py +++ b/sourmash/cli/compare.py @@ -1,7 +1,8 @@ from argparse import FileType -import sourmash + from sourmash.cli.utils import add_ksize_arg, add_moltype_args + def subparser(subparsers): subparser = subparsers.add_parser('compare') subparser.add_argument( @@ -36,4 +37,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.commands.compare(args) diff --git a/sourmash/cli/compute.py b/sourmash/cli/compute.py index 6ec5a42667..2351f851b7 100644 --- a/sourmash/cli/compute.py +++ b/sourmash/cli/compute.py @@ -1,7 +1,11 @@ from argparse import FileType -import sourmash + from sourmash._minhash import get_minhash_default_seed -from sourmash.cli.utils import add_ksize_arg, add_moltype_args +from sourmash.cli.utils import ( + add_ksize_arg, + add_moltype_args, + add_construct_moltype_args +) def subparser(subparsers): @@ -24,7 +28,7 @@ def subparser(subparsers): '--scaled', type=float, default=0, help='choose number of hashes as 1 in FRACTION of input k-mers' ) - add_moltype_args(sketch_args) + add_construct_moltype_args(sketch_args) sketch_args.add_argument( '--input-is-protein', action='store_true', help='Consume protein sequences - no translation needed.' @@ -123,4 +127,5 @@ def subparser(subparsers): def main(args): - print(args) + from sourmash.command_compute import compute + return compute(args) diff --git a/sourmash/cli/gather.py b/sourmash/cli/gather.py index 50228ff433..e34881f449 100644 --- a/sourmash/cli/gather.py +++ b/sourmash/cli/gather.py @@ -1,6 +1,8 @@ from argparse import FileType + from sourmash.cli.utils import add_ksize_arg, add_moltype_args + def subparser(subparsers): subparser = subparsers.add_parser('gather') subparser.add_argument('query', help='query signature') @@ -50,4 +52,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.commands.gather(args) diff --git a/sourmash/cli/import_csv.py b/sourmash/cli/import_csv.py new file mode 100644 index 0000000000..52846196e1 --- /dev/null +++ b/sourmash/cli/import_csv.py @@ -0,0 +1,17 @@ +from argparse import FileType +import sys + + +def subparser(subparsers): + subparser = subparsers.add_parser('import_csv') + subparser.add_argument('mash_csvfile', help='CSV file with mash sketches') + subparser.add_argument( + '-o', '--output', type=FileType('wt'), + default=sys.stdout, + help='save signature generated from data here' + ) + + +def main(args): + import sourmash + return sourmash.commands.import_csv(args) diff --git a/sourmash/cli/sbt/index.py b/sourmash/cli/index.py similarity index 96% rename from sourmash/cli/sbt/index.py rename to sourmash/cli/index.py index 07df8eed74..eaab6c269a 100644 --- a/sourmash/cli/sbt/index.py +++ b/sourmash/cli/index.py @@ -1,6 +1,6 @@ -import sourmash from sourmash.cli.utils import add_moltype_args, add_ksize_arg + def subparser(subparsers): subparser = subparsers.add_parser('index') subparser.add_argument('sbt_name', help='name to save SBT into') @@ -46,4 +46,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.commands.index(args) diff --git a/sourmash/cli/info.py b/sourmash/cli/info.py index 630c017823..e429600de0 100644 --- a/sourmash/cli/info.py +++ b/sourmash/cli/info.py @@ -7,7 +7,7 @@ def subparser(subparsers): subparser = subparsers.add_parser('info') subparser.add_argument( - '--verbose', action='store_true', + '-v', '--verbose', action='store_true', help='report versions of khmer and screed' ) diff --git a/sourmash/cli/lca/classify.py b/sourmash/cli/lca/classify.py index db3ca55f1f..eabe1d5d8b 100644 --- a/sourmash/cli/lca/classify.py +++ b/sourmash/cli/lca/classify.py @@ -1,5 +1,6 @@ from argparse import FileType + def subparser(subparsers): subparser = subparsers.add_parser('classify') subparser.add_argument('--db', nargs='+', action='append') @@ -26,4 +27,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.lca.command_classify.classify(args) diff --git a/sourmash/cli/lca/compare.py b/sourmash/cli/lca/compare.py index d7c0e57ff7..21b54227d2 100644 --- a/sourmash/cli/lca/compare.py +++ b/sourmash/cli/lca/compare.py @@ -1,56 +1,33 @@ def subparser(subparsers): - subparser = subparsers.add_parser('compare') - subparser.add_argument('csv1', help='taxonomy spreadsheet output by classify') - subparser.add_argument('csv2', help='custom taxonomy spreadsheet') - subparser.add_argument( - '-q', '--quiet', action='store_true', - help='suppress non-error output' - ) - subparser.add_argument( - '-d', '--debug', action='store_true', - help='output debugging output' - ) - subparser.add_argument( - '-C', '--start-column', metavar='C', default=2, type=int, - help='column at which taxonomic assignments start; default=2' - ) - subparser.add_argument( - '--tabs', action='store_true', - help='input spreadsheet is tab-delimited; default is commas' - ) - subparser.add_argument( - '--no-headers', action='store_true', - help='no headers present in taxonomy spreadsheet' - ) - subparser.add_argument('-f', '--force', action='store_true') - # Dirty hack to simultaneously support new and previous interface # If desired, this function can be removed with a major version bump. - subparser = subparsers.add_parser('compare_csv') - subparser.add_argument('csv1', help='taxonomy spreadsheet output by classify') - subparser.add_argument('csv2', help='custom taxonomy spreadsheet') - subparser.add_argument( - '-q', '--quiet', action='store_true', - help='suppress non-error output' - ) - subparser.add_argument( - '-d', '--debug', action='store_true', - help='output debugging output' - ) - subparser.add_argument( - '-C', '--start-column', metavar='C', default=2, type=int, - help='column at which taxonomic assignments start; default=2' - ) - subparser.add_argument( - '--tabs', action='store_true', - help='input spreadsheet is tab-delimited; default is commas' - ) - subparser.add_argument( - '--no-headers', action='store_true', - help='no headers present in taxonomy spreadsheet' - ) - subparser.add_argument('-f', '--force', action='store_true') + for cmd in ('compare', 'compare_csv'): + subparser = subparsers.add_parser(cmd) + subparser.add_argument('csv1', help='taxonomy spreadsheet output by classify') + subparser.add_argument('csv2', help='custom taxonomy spreadsheet') + subparser.add_argument( + '-q', '--quiet', action='store_true', + help='suppress non-error output' + ) + subparser.add_argument( + '-d', '--debug', action='store_true', + help='output debugging output' + ) + subparser.add_argument( + '-C', '--start-column', metavar='C', default=2, type=int, + help='column at which taxonomic assignments start; default=2' + ) + subparser.add_argument( + '--tabs', action='store_true', + help='input spreadsheet is tab-delimited; default is commas' + ) + subparser.add_argument( + '--no-headers', action='store_true', + help='no headers present in taxonomy spreadsheet' + ) + subparser.add_argument('-f', '--force', action='store_true') def main(args): - print(args) + import sourmash + return sourmash.lca.command_compare_csv.compare_csv(args) diff --git a/sourmash/cli/lca/gather.py b/sourmash/cli/lca/gather.py index f8ca65930f..410889472e 100644 --- a/sourmash/cli/lca/gather.py +++ b/sourmash/cli/lca/gather.py @@ -1,5 +1,6 @@ from argparse import FileType + def subparser(subparsers): subparser = subparsers.add_parser('gather') subparser.add_argument('query') @@ -27,4 +28,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.lca.gather_main(args) diff --git a/sourmash/cli/lca/index.py b/sourmash/cli/lca/index.py index 3673045782..1a947d985d 100644 --- a/sourmash/cli/lca/index.py +++ b/sourmash/cli/lca/index.py @@ -1,6 +1,6 @@ -import sourmash from sourmash.cli.utils import add_ksize_arg + def subparser(subparsers): subparser = subparsers.add_parser('index') subparser.add_argument('csv', help='taxonomy spreadsheet') @@ -45,7 +45,12 @@ def subparser(subparsers): subparser.add_argument( '--report', help='output a report on anomalies, if any' ) + subparser.add_argument( + '--require-taxonomy', action='store_true', + help='ignore signatures with no taxonomy entry' + ) def main(args): - print(args) + import sourmash + return sourmash.lca.command_index.index(args) diff --git a/sourmash/cli/lca/rankinfo.py b/sourmash/cli/lca/rankinfo.py index eb21c6956d..b1a01758cf 100644 --- a/sourmash/cli/lca/rankinfo.py +++ b/sourmash/cli/lca/rankinfo.py @@ -10,7 +10,12 @@ def subparser(subparsers): help='output debugging output' ) subparser.add_argument('--scaled', metavar='FLOAT', type=float) + subparser.add_argument( + '--minimum-num', type=int, default=0, + help='Minimum number of different lineages a k-mer must be in to be counted' + ) def main(args): - print(args) + import sourmash + return sourmash.lca.command_rankinfo.rankinfo_main(args) diff --git a/sourmash/cli/lca/summarize.py b/sourmash/cli/lca/summarize.py index 8086f2a2aa..345788e41a 100644 --- a/sourmash/cli/lca/summarize.py +++ b/sourmash/cli/lca/summarize.py @@ -1,5 +1,6 @@ from argparse import FileType + def subparser(subparsers): subparser = subparsers.add_parser('summarize') subparser.add_argument('--db', nargs='+', action='append') @@ -25,4 +26,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.lca.command_summarize.summarize_main(args) diff --git a/sourmash/cli/multigather.py b/sourmash/cli/multigather.py new file mode 100644 index 0000000000..df29ab2e82 --- /dev/null +++ b/sourmash/cli/multigather.py @@ -0,0 +1,45 @@ +from argparse import FileType + +from sourmash.cli.utils import add_ksize_arg, add_moltype_args + + +def subparser(subparsers): + subparser = subparsers.add_parser('multigather') + subparser.add_argument( + '--query', nargs='+', action='append', + help='query signature' + ) + subparser.add_argument( + '--db', nargs='+', action='append', + help='signatures/SBTs to search', + ) + subparser.add_argument( + '-q', '--quiet', action='store_true', + help='suppress non-error output' + ) + subparser.add_argument( + '-d', '--debug', action='store_true' + ) + subparser.add_argument( + '--traverse-directory', action='store_true', + help='search all signatures underneath directories' + ) + subparser.add_argument( + '--threshold-bp', metavar='REAL', type=float, default=5e4, + help='threshold (in bp) for reporting results (default=50,000)' + ) + subparser.add_argument( + '--scaled', metavar='FLOAT', type=float, default=0, + help='downsample query to the specified scaled factor' + ) + subparser.add_argument( + '--ignore-abundance', action='store_true', + help='do NOT use k-mer abundances if present' + ) + add_ksize_arg(subparser, 31) + add_moltype_args(subparser) + + +def main(args): + import sourmash + return sourmash.commands.multigather(args) diff --git a/sourmash/cli/plot.py b/sourmash/cli/plot.py index 77c1b084c4..4461d0bb7e 100644 --- a/sourmash/cli/plot.py +++ b/sourmash/cli/plot.py @@ -41,4 +41,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.commands.plot(args) diff --git a/sourmash/cli/sbt/__init__.py b/sourmash/cli/sbt/__init__.py index 819f99caa0..e2b2716166 100644 --- a/sourmash/cli/sbt/__init__.py +++ b/sourmash/cli/sbt/__init__.py @@ -1,10 +1,9 @@ -from . import index -from . import combine -from . import categorize -from . import watch import sys -subcommands = ['index', 'combine', 'categorize', 'watch'] +from . import combine +from . import migrate + +subcommands = ['combine', 'migrate'] subcommandstr = ' -- '.join(sorted(subcommands)) def subparser(subparsers): diff --git a/sourmash/cli/sbt/combine.py b/sourmash/cli/sbt/combine.py index 0242a79ded..025cb69f39 100644 --- a/sourmash/cli/sbt/combine.py +++ b/sourmash/cli/sbt/combine.py @@ -47,4 +47,5 @@ def alt_subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.commands.sbt_combine(args) diff --git a/sourmash/cli/sbt/migrate.py b/sourmash/cli/sbt/migrate.py new file mode 100644 index 0000000000..4f1eeaf7e5 --- /dev/null +++ b/sourmash/cli/sbt/migrate.py @@ -0,0 +1,8 @@ +def subparser(subparsers): + subparser = subparsers.add_parser('migrate') + subparser.add_argument('sbt_name', help='name to save SBT into') + + +def main(args): + import sourmash + return sourmash.commands.migrate(args) diff --git a/sourmash/cli/search.py b/sourmash/cli/search.py index 223791438e..26bde72f5d 100644 --- a/sourmash/cli/search.py +++ b/sourmash/cli/search.py @@ -1,6 +1,8 @@ from argparse import FileType + from sourmash.cli.utils import add_ksize_arg, add_moltype_args + def subparser(subparsers): subparser = subparsers.add_parser('search') subparser.add_argument( @@ -56,4 +58,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.commands.search(args) diff --git a/sourmash/cli/signature/__init__.py b/sourmash/cli/sig/__init__.py similarity index 73% rename from sourmash/cli/signature/__init__.py rename to sourmash/cli/sig/__init__.py index 1bdf08a4e1..c698612f54 100644 --- a/sourmash/cli/signature/__init__.py +++ b/sourmash/cli/sig/__init__.py @@ -1,6 +1,7 @@ from . import describe from . import downsample from . import extract +from . import filter from . import flatten from . import intersect from . import merge @@ -12,16 +13,16 @@ import sys subcommands = [ - 'describe', 'downsample', 'extract', 'flatten', 'intersect', 'merge', + 'describe', 'downsample', 'extract', 'filter', 'flatten', 'intersect', 'merge', 'rename', 'subtract', 'ingest', 'export', 'overlap' ] subcommandstr = ' -- '.join(sorted(subcommands)) def subparser(subparsers): - subparser = subparsers.add_parser('signature') + subparser = subparsers.add_parser('sig') s = subparser.add_subparsers( title='Subcommands', dest='subcmd', metavar='subcmd', help=subcommandstr, - description='Invoke "sourmash signature --help" for more details on executing each subcommand.' + description='Invoke "sourmash sig --help" for more details on executing each subcommand.' ) for subcmd in subcommands: getattr(sys.modules[__name__], subcmd).subparser(s) diff --git a/sourmash/cli/signature/describe.py b/sourmash/cli/sig/describe.py similarity index 97% rename from sourmash/cli/signature/describe.py rename to sourmash/cli/sig/describe.py index 029ba8d73c..38ce97f6b9 100644 --- a/sourmash/cli/signature/describe.py +++ b/sourmash/cli/sig/describe.py @@ -1,6 +1,9 @@ from argparse import FileType +import csv + import sourmash -from sourmash.logging import notify, print_results +from sourmash.logging import notify, print_results, error + def subparser(subparsers): subparser = subparsers.add_parser('describe') diff --git a/sourmash/cli/signature/downsample.py b/sourmash/cli/sig/downsample.py similarity index 88% rename from sourmash/cli/signature/downsample.py rename to sourmash/cli/sig/downsample.py index f7b7186ef0..419aa12bd9 100644 --- a/sourmash/cli/signature/downsample.py +++ b/sourmash/cli/sig/downsample.py @@ -1,7 +1,9 @@ from argparse import FileType -import sourmash +import sys + from sourmash.cli.utils import add_moltype_args, add_ksize_arg + def subparser(subparsers): subparser = subparsers.add_parser('downsample') subparser.add_argument('signatures', nargs="+") @@ -19,6 +21,7 @@ def subparser(subparsers): ) subparser.add_argument( '-o', '--output', metavar='FILE', type=FileType('wt'), + default=sys.stdout, help='output signature to this file' ) add_ksize_arg(subparser, 31) @@ -26,4 +29,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.sig.__main__.downsample(args) diff --git a/sourmash/cli/signature/export.py b/sourmash/cli/sig/export.py similarity index 83% rename from sourmash/cli/signature/export.py rename to sourmash/cli/sig/export.py index 78b95e50a1..82578fd6c9 100644 --- a/sourmash/cli/signature/export.py +++ b/sourmash/cli/sig/export.py @@ -1,7 +1,9 @@ from argparse import FileType -import sourmash +import sys + from sourmash.cli.utils import add_ksize_arg, add_moltype_args + def subparser(subparsers): subparser = subparsers.add_parser('export') subparser.add_argument('filename') @@ -11,6 +13,7 @@ def subparser(subparsers): ) subparser.add_argument( '-o', '--output', metavar='FILE', type=FileType('wt'), + default=sys.stdout, help='output signature to this file' ) add_ksize_arg(subparser, 31) @@ -18,4 +21,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.sig.__main__.export(args) diff --git a/sourmash/cli/signature/extract.py b/sourmash/cli/sig/extract.py similarity index 88% rename from sourmash/cli/signature/extract.py rename to sourmash/cli/sig/extract.py index 7bfde69858..c91dbb35b6 100644 --- a/sourmash/cli/signature/extract.py +++ b/sourmash/cli/sig/extract.py @@ -1,7 +1,9 @@ from argparse import FileType -import sourmash +import sys + from sourmash.cli.utils import add_moltype_args, add_ksize_arg + def subparser(subparsers): subparser = subparsers.add_parser('extract') subparser.add_argument('signatures', nargs='+') @@ -11,6 +13,7 @@ def subparser(subparsers): ) subparser.add_argument( '-o', '--output', metavar='FILE', type=FileType('wt'), + default=sys.stdout, help='output signature to this file' ) subparser.add_argument( @@ -26,4 +29,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.sig.__main__.extract(args) diff --git a/sourmash/cli/sig/filter.py b/sourmash/cli/sig/filter.py new file mode 100644 index 0000000000..500f6a771b --- /dev/null +++ b/sourmash/cli/sig/filter.py @@ -0,0 +1,41 @@ +from argparse import FileType +import sys + +from sourmash.cli.utils import add_moltype_args, add_ksize_arg + + +def subparser(subparsers): + subparser = subparsers.add_parser('filter') + subparser.add_argument('signatures', nargs='+') + subparser.add_argument( + '-q', '--quiet', action='store_true', + help='suppress non-error output' + ) + subparser.add_argument( + '-o', '--output', metavar='FILE', type=FileType('wt'), + default=sys.stdout, + help='output signature to this file' + ) + subparser.add_argument( + '--md5', type=str, default=None, + help='select signatures whose md5 contains this substring' + ) + subparser.add_argument( + '--name', type=str, default=None, + help='select signatures whose name contains this substring' + ) + subparser.add_argument( + '-m', '--min-abundance', type=int, default=1, + help='keep hashes >= this minimum abundance' + ) + subparser.add_argument( + '-M', '--max-abundance', type=int, default=None, + help='keep hashes <= this maximum abundance' + ) + add_ksize_arg(subparser, 31) + add_moltype_args(subparser) + + +def main(args): + import sourmash + return sourmash.sig.__main__.filter(args) diff --git a/sourmash/cli/signature/flatten.py b/sourmash/cli/sig/flatten.py similarity index 88% rename from sourmash/cli/signature/flatten.py rename to sourmash/cli/sig/flatten.py index 7e8600863f..7fc65592b8 100644 --- a/sourmash/cli/signature/flatten.py +++ b/sourmash/cli/sig/flatten.py @@ -1,7 +1,9 @@ from argparse import FileType -import sourmash +import sys + from sourmash.cli.utils import add_moltype_args, add_ksize_arg + def subparser(subparsers): subparser = subparsers.add_parser('flatten') subparser.add_argument('signatures', nargs='+') @@ -11,6 +13,7 @@ def subparser(subparsers): ) subparser.add_argument( '-o', '--output', metavar='FILE', type=FileType('wt'), + default=sys.stdout, help='output signature to this file' ) subparser.add_argument( @@ -26,4 +29,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.sig.__main__.flatten(args) diff --git a/sourmash/cli/sig/ingest.py b/sourmash/cli/sig/ingest.py new file mode 100644 index 0000000000..80070607d8 --- /dev/null +++ b/sourmash/cli/sig/ingest.py @@ -0,0 +1,24 @@ +from argparse import FileType +import sys + + +def subparser(subparsers): + # Dirty hack to simultaneously support new and previous interface + # If desired, this function can be removed with a major version bump. + for cmd in ('ingest', 'import'): + subparser = subparsers.add_parser(cmd) + subparser.add_argument('filenames', nargs='+') + subparser.add_argument( + '-q', '--quiet', action='store_true', + help='suppress non-error output' + ) + subparser.add_argument( + '-o', '--output', metavar='FILE', type=FileType('wt'), + default=sys.stdout, + help='output signature to this file' + ) + + +def main(args): + import sourmash + return sourmash.sig.__main__.sig_import(args) diff --git a/sourmash/cli/signature/intersect.py b/sourmash/cli/sig/intersect.py similarity index 86% rename from sourmash/cli/signature/intersect.py rename to sourmash/cli/sig/intersect.py index 75102bd44d..f0fd1a677a 100644 --- a/sourmash/cli/signature/intersect.py +++ b/sourmash/cli/sig/intersect.py @@ -1,7 +1,9 @@ from argparse import FileType -import sourmash +import sys + from sourmash.cli.utils import add_moltype_args, add_ksize_arg + def subparser(subparsers): subparser = subparsers.add_parser('intersect') subparser.add_argument('signatures', nargs='+') @@ -11,6 +13,7 @@ def subparser(subparsers): ) subparser.add_argument( '-o', '--output', metavar='FILE', type=FileType('wt'), + default=sys.stdout, help='output signature to this file' ) subparser.add_argument( @@ -22,4 +25,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.sig.__main__.intersect(args) diff --git a/sourmash/cli/signature/merge.py b/sourmash/cli/sig/merge.py similarity index 86% rename from sourmash/cli/signature/merge.py rename to sourmash/cli/sig/merge.py index ae7c782d1c..54f4d36675 100644 --- a/sourmash/cli/signature/merge.py +++ b/sourmash/cli/sig/merge.py @@ -1,7 +1,9 @@ from argparse import FileType -import sourmash +import sys + from sourmash.cli.utils import add_moltype_args, add_ksize_arg + def subparser(subparsers): subparser = subparsers.add_parser('merge') subparser.add_argument('signatures', nargs='+') @@ -11,6 +13,7 @@ def subparser(subparsers): ) subparser.add_argument( '-o', '--output', metavar='FILE', type=FileType('wt'), + default=sys.stdout, help='output signature to this file' ) subparser.add_argument( @@ -22,4 +25,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.sig.__main__.merge(args) diff --git a/sourmash/cli/signature/overlap.py b/sourmash/cli/sig/overlap.py similarity index 86% rename from sourmash/cli/signature/overlap.py rename to sourmash/cli/sig/overlap.py index 3a3b899612..783327bcfe 100644 --- a/sourmash/cli/signature/overlap.py +++ b/sourmash/cli/sig/overlap.py @@ -1,6 +1,6 @@ -import sourmash from sourmash.cli.utils import add_moltype_args, add_ksize_arg + def subparser(subparsers): subparser = subparsers.add_parser('overlap') subparser.add_argument('signature1') @@ -14,4 +14,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.sig.__main__.overlap(args) diff --git a/sourmash/cli/signature/rename.py b/sourmash/cli/sig/rename.py similarity index 89% rename from sourmash/cli/signature/rename.py rename to sourmash/cli/sig/rename.py index 8b07e05b5a..c8765690af 100644 --- a/sourmash/cli/signature/rename.py +++ b/sourmash/cli/sig/rename.py @@ -1,6 +1,8 @@ -import sourmash +import sys + from sourmash.cli.utils import add_ksize_arg, add_moltype_args + def subparser(subparsers): subparser = subparsers.add_parser('rename') subparser.add_argument('sigfiles', nargs='+') @@ -21,4 +23,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.sig.__main__.rename(args) diff --git a/sourmash/cli/signature/subtract.py b/sourmash/cli/sig/subtract.py similarity index 87% rename from sourmash/cli/signature/subtract.py rename to sourmash/cli/sig/subtract.py index a73ce3a7c2..4746cda911 100644 --- a/sourmash/cli/signature/subtract.py +++ b/sourmash/cli/sig/subtract.py @@ -1,7 +1,9 @@ from argparse import FileType -import sourmash +import sys + from sourmash.cli.utils import add_moltype_args, add_ksize_arg + def subparser(subparsers): subparser = subparsers.add_parser('subtract') subparser.add_argument('signature_from') @@ -12,6 +14,7 @@ def subparser(subparsers): ) subparser.add_argument( '-o', '--output', metavar='FILE', type=FileType('wt'), + default=sys.stdout, help='output signature to this file' ) subparser.add_argument( @@ -23,4 +26,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.sig.__main__.subtract(args) diff --git a/sourmash/cli/signature/ingest.py b/sourmash/cli/signature/ingest.py deleted file mode 100644 index 6852a2bb95..0000000000 --- a/sourmash/cli/signature/ingest.py +++ /dev/null @@ -1,30 +0,0 @@ -from argparse import FileType - -def subparser(subparsers): - subparser = subparsers.add_parser('ingest') - subparser.add_argument('filenames', nargs='+') - subparser.add_argument( - '-q', '--quiet', action='store_true', - help='suppress non-error output' - ) - subparser.add_argument( - '-o', '--output', metavar='FILE', type=FileType('wt'), - help='output signature to this file' - ) - - # Dirty hack to simultaneously support new and previous interface - # If desired, this function can be removed with a major version bump. - subparser = subparsers.add_parser('import') - subparser.add_argument('filenames', nargs='+') - subparser.add_argument( - '-q', '--quiet', action='store_true', - help='suppress non-error output' - ) - subparser.add_argument( - '-o', '--output', metavar='FILE', type=FileType('wt'), - help='output signature to this file' - ) - - -def main(args): - print(args) diff --git a/sourmash/cli/storage/__init__.py b/sourmash/cli/storage/__init__.py new file mode 100644 index 0000000000..e2c1e0862e --- /dev/null +++ b/sourmash/cli/storage/__init__.py @@ -0,0 +1,18 @@ +import sys + +from . import convert + +subcommands = ['convert'] +subcommandstr = ' -- '.join(sorted(subcommands)) + + +def subparser(subparsers): + subparser = subparsers.add_parser('storage') + s = subparser.add_subparsers( + title='Subcommands', dest='subcmd', metavar='subcmd', help=subcommandstr, + description='Invoke "sourmash storage --help" for more details on executing each subcommand.' + ) + for subcmd in subcommands: + getattr(sys.modules[__name__], subcmd).subparser(s) + subparser._action_groups.reverse() + subparser._optionals.title = 'Options' diff --git a/sourmash/cli/storage/convert.py b/sourmash/cli/storage/convert.py new file mode 100644 index 0000000000..d269ff02ae --- /dev/null +++ b/sourmash/cli/storage/convert.py @@ -0,0 +1,14 @@ +def subparser(subparsers): + subparser = subparsers.add_parser('convert') + subparser.add_argument( + 'sbt', help='name to save SBT into' + ) + subparser.add_argument( + '-b', '--backend', type=str, + help='Backend to convert to' + ) + + +def main(args): + import sourmash + return sourmash.sbt.convert_cmd(args.sbt, args.backend) diff --git a/sourmash/cli/utils.py b/sourmash/cli/utils.py index f94d3abbb0..3a21767ffd 100644 --- a/sourmash/cli/utils.py +++ b/sourmash/cli/utils.py @@ -3,14 +3,41 @@ def add_moltype_args(parser): '--protein', dest='protein', action='store_true', help='choose a protein signature; by default, a nucleotide signature is used' ) + parser.add_argument( + '--no-protein', dest='protein', action='store_false', + help='do not choose a protein signature') + parser.set_defaults(protein=False) + parser.add_argument( '--dayhoff', dest='dayhoff', action='store_true', help='build Dayhoff-encoded amino acid signatures' ) + parser.add_argument( + '--no-dayhoff', dest='dayhoff', action='store_false', + help='do not build Dayhoff-encoded amino acid signatures') + parser.set_defaults(dayhoff=False) + parser.add_argument( '--hp', '--hydrophobic-polar', dest='hp', action='store_true', help='build hydrophobic-polar-encoded amino acid signatures' ) + parser.add_argument( + '--no-hp', '--no-hydrophobic-polar', dest='hp', action='store_false', + help='do not build hydrophobic-polar-encoded amino acid signatures') + parser.set_defaults(hp=False) + + parser.add_argument( + '--dna', '--rna', dest='dna', default=None, action='store_true', + help='choose a nucleotide signature (default: True)') + parser.add_argument( + '--no-dna', '--no-rna', dest='dna', action='store_false', + help='do not choose a nucleotide signature') + parser.set_defaults(dna=None) + + +def add_construct_moltype_args(parser): + add_moltype_args(parser) + parser.set_defaults(dna=True) def add_ksize_arg(parser, default=31): diff --git a/sourmash/cli/sbt/watch.py b/sourmash/cli/watch.py similarity index 95% rename from sourmash/cli/sbt/watch.py rename to sourmash/cli/watch.py index 0812b62118..0d1d3b5409 100644 --- a/sourmash/cli/sbt/watch.py +++ b/sourmash/cli/watch.py @@ -1,7 +1,8 @@ from argparse import FileType -import sourmash + from sourmash.cli.utils import add_ksize_arg, add_moltype_args + def subparser(subparsers): subparser = subparsers.add_parser('watch') subparser.add_argument('sbt_name', help='name of SBT to search') @@ -35,4 +36,5 @@ def subparser(subparsers): def main(args): - print(args) + import sourmash + return sourmash.commands.watch(args) diff --git a/sourmash/command_compute.py b/sourmash/command_compute.py index 77a7c7f695..daaf9133b7 100644 --- a/sourmash/command_compute.py +++ b/sourmash/command_compute.py @@ -35,74 +35,6 @@ def compute(args): => creates one output file file.sig, with all sequences from file1.fa and file2.fa combined into one signature. """ - parser = SourmashArgumentParser() - parser.add_argument('filenames', nargs='+', - help='file(s) of sequences') - - sourmash_args.add_construct_moltype_args(parser) - - parser.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - parser.add_argument('--input-is-protein', action='store_true', - help='Consume protein sequences - no translation needed.') - parser.add_argument('-k', '--ksizes', - default=DEFAULT_COMPUTE_K, - help='comma-separated list of k-mer sizes (default: %(default)s)') - parser.add_argument('-n', '--num-hashes', type=int, - default=DEFAULT_N, - help='number of hashes to use in each sketch (default: %(default)i)') - parser.add_argument('--check-sequence', action='store_true', - help='complain if input sequence is invalid (default: False)') - parser.add_argument('-f', '--force', action='store_true', - help='recompute signatures even if the file exists (default: False)') - parser.add_argument('-o', '--output', type=argparse.FileType('wt'), - help='output computed signatures to this file') - parser.add_argument('--singleton', action='store_true', - help='compute a signature for each sequence record individually (default: False)') - parser.add_argument('--merge', '--name', type=str, default='', metavar="MERGED", - help="merge all input files into one signature named this") - parser.add_argument('--name-from-first', action='store_true', - help="name the signature generated from each file after the first record in the file (default: False)") - parser.add_argument('--input-is-10x', action='store_true', - help="Input is 10x single cell output folder (default: False)") - parser.add_argument('--count-valid-reads', default=0, type=int, - help="For 10x input only (i.e input-is-10x flag is True), " - "A barcode is only considered a valid barcode read " - "and its signature is written if number of umis are greater " - "than count-valid-reads. It is used to weed out cell barcodes " - "with few umis that might have been due to false rna enzyme reactions") - parser.add_argument('--write-barcode-meta-csv', type=str, - help="For 10x input only (i.e input-is-10x flag is True), for each of the unique barcodes, " - "Write to a given path, number of reads and number of umis per barcode.") - parser.add_argument('-p', '--processes', default=2, type=int, - help='For 10x input only (i.e input-is-10x flag is True, ' - 'Number of processes to use for reading 10x bam file') - parser.add_argument('--save-fastas', default="", type=str, - help='For 10x input only (i.e input-is-10x flag is True), ' - 'save merged fastas for all the unique barcodes to {CELL_BARCODE}.fasta ' - 'in the absolute path given by this flag, By default, fastas are not saved') - parser.add_argument('--line-count', type=int, - help='For 10x input only (i.e input-is-10x flag is True), line count for each bam shard', - default=DEFAULT_LINE_COUNT) - parser.add_argument('--track-abundance', action='store_true', - help='track k-mer abundances in the generated signature (default: False)') - parser.add_argument('--scaled', type=float, default=0, - help='choose number of hashes as 1 in FRACTION of input k-mers') - parser.add_argument('--seed', type=int, - help='seed used by MurmurHash (default: 42)', - default=DEFAULT_SEED) - parser.add_argument('--randomize', action='store_true', - help='shuffle the list of input filenames randomly') - parser.add_argument('--license', default='CC0', type=str, - help='signature license. Currently only CC0 is supported.') - parser.add_argument('--rename-10x-barcodes', type=str, - help="Tab-separated file mapping 10x barcode name " - "to new name, e.g. with channel or cell " - "annotation label", required=False) - parser.add_argument('--barcodes-file', type=str, - help="Barcodes file if the input is unfiltered 10x bam file", required=False) - - args = parser.parse_args(args) set_quiet(args.quiet) if args.license != 'CC0': diff --git a/sourmash/commands.py b/sourmash/commands.py index ac81f816b5..2b3fcc812e 100644 --- a/sourmash/commands.py +++ b/sourmash/commands.py @@ -52,22 +52,6 @@ def compare(args): "Compare multiple signature files and create a distance matrix." import numpy - parser = SourmashArgumentParser() - parser.add_argument('signatures', nargs='+', help='list of signatures') - parser.add_argument('-o', '--output') - parser.add_argument('--ignore-abundance', action='store_true', - help='do NOT use k-mer abundances if present') - sourmash_args.add_ksize_arg(parser, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(parser) - parser.add_argument('--traverse-directory', action='store_true', - help='compare all signatures underneath directories.') - parser.add_argument('--csv', type=argparse.FileType('w'), - help='save matrix in CSV format (with column headers)') - parser.add_argument('-p', '--processes', type=int, - help='Number of processes to use to calculate similarity') - parser.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - args = parser.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -187,29 +171,6 @@ def plot(args): import scipy.cluster.hierarchy as sch from . import fig as sourmash_fig - # set up cmd line arguments - parser = SourmashArgumentParser() - parser.add_argument('distances', help="output from 'sourmash compare'") - parser.add_argument('--pdf', action='store_true', - help='output PDF, not PNG.') - parser.add_argument('--labels', action='store_true', - help='show sample labels on dendrogram/matrix') - parser.add_argument('--indices', action='store_false', - help='show sample indices but not labels') - parser.add_argument('--vmax', default=1.0, type=float, - help='upper limit of heatmap scale; (default: %(default)f)') - parser.add_argument('--vmin', default=0.0, type=float, - help='lower limit of heatmap scale; (default: %(default)f)') - parser.add_argument("--subsample", type=int, - help="randomly downsample to this many samples, max.") - parser.add_argument("--subsample-seed", type=int, default=1, - help="random seed for --subsample; default=1") - parser.add_argument('-f', '--force', action='store_true', - help='forcibly plot non-distance matrices') - parser.add_argument('--output-dir', help='directory for output plots') - - args = parser.parse_args(args) - # load files D_filename = args.distances labelfilename = D_filename + '.labels.txt' @@ -298,11 +259,6 @@ def plot(args): def import_csv(args): "Import a CSV file full of signatures/hashes." - p = SourmashArgumentParser() - p.add_argument('mash_csvfile') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - default=sys.stdout, help='(default: stdout)') - args = p.parse_args(args) with open(args.mash_csvfile, 'r') as fp: reader = csv.reader(fp) @@ -352,16 +308,6 @@ def dump(args): def sbt_combine(args): - parser = SourmashArgumentParser() - parser.add_argument('sbt_name', help='name to save SBT into') - parser.add_argument('sbts', nargs='+', - help='SBTs to combine to a new SBT') - parser.add_argument('-x', '--bf-size', type=float, default=1e5) - - sourmash_args.add_moltype_args(parser) - - args = parser.parse_args(args) - inp_files = list(args.sbts) notify('combining {} SBTs', len(inp_files)) @@ -380,32 +326,6 @@ def index(args): """ Build an Sequence Bloom Tree index of the given signatures. """ - parser = SourmashArgumentParser() - parser.add_argument('sbt_name', help='name to save SBT into') - parser.add_argument('signatures', nargs='+', - help='signatures to load into SBT') - parser.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - parser.add_argument('-k', '--ksize', type=int, default=None, - help='k-mer size for which to build the SBT.') - parser.add_argument('-d', '--n_children', type=int, default=2, - help='Number of children for internal nodes') - parser.add_argument('--traverse-directory', action='store_true', - help='load all signatures underneath any directories.') - parser.add_argument('--append', action='store_true', default=False, - help='add signatures to an existing SBT.') - parser.add_argument('-x', '--bf-size', type=float, default=1e5, - help='Bloom filter size used for internal nodes.') - parser.add_argument('-f', '--force', action='store_true', - help='Try loading all files with --traverse-directory') - parser.add_argument('-s', '--sparseness', type=float, default=.0, - help='What percentage of internal nodes will not be saved. ' - 'Ranges from 0.0 (save all nodes) to 1.0 (no nodes saved)') - parser.add_argument('--scaled', type=float, default=0, - help='downsample signatures to this scaled factor') - sourmash_args.add_moltype_args(parser) - - args = parser.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -487,36 +407,6 @@ def index(args): def search(args): from .search import search_databases - parser = SourmashArgumentParser() - parser.add_argument('query', help='query signature') - parser.add_argument('databases', help='signatures/SBTs to search', - nargs='+') - parser.add_argument('--traverse-directory', action='store_true', - help='search all signatures underneath directories.') - parser.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - parser.add_argument('--threshold', default=0.08, type=float, - help='minimum threshold for reporting matches (default=0.08)') - parser.add_argument('--save-matches', type=argparse.FileType('wt'), - help='output matching signatures to this file.') - parser.add_argument('--best-only', action='store_true', - help='report only the best match (with greater speed).') - parser.add_argument('-n', '--num-results', default=3, type=int, - help='number of results to report') - parser.add_argument('--containment', action='store_true', - help='evaluate containment rather than similarity') - parser.add_argument('--ignore-abundance', action='store_true', - help='do NOT use k-mer abundances if present. Note: ' - 'has no effect if --containment is specified') - parser.add_argument('--scaled', type=float, default=0, - help='downsample query to this scaled factor (yields greater speed)') - parser.add_argument('-o', '--output', type=argparse.FileType('wt'), - help='output CSV containing matches to this file') - - sourmash_args.add_ksize_arg(parser, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(parser) - - args = parser.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -598,25 +488,6 @@ def search(args): def categorize(args): - parser = SourmashArgumentParser() - parser.add_argument('sbt_name', help='name of SBT to load') - parser.add_argument('queries', nargs='+', - help='list of signatures to categorize') - parser.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - parser.add_argument('-k', '--ksize', type=int, default=None) - parser.add_argument('--threshold', default=0.08, type=float, - help='minimum threshold for reporting matches (default=0.08)') - parser.add_argument('--traverse-directory', action="store_true") - parser.add_argument('--ignore-abundance', action='store_true', - help='do NOT use k-mer abundances if present') - - sourmash_args.add_moltype_args(parser) - - parser.add_argument('--csv', type=argparse.FileType('at')) - parser.add_argument('--load-csv', default=None) - - args = parser.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -680,32 +551,6 @@ def categorize(args): def gather(args): from .search import gather_databases, format_bp - parser = SourmashArgumentParser() - parser.add_argument('query', help='query signature') - parser.add_argument('databases', help='signatures/SBTs to search', - nargs='+') - parser.add_argument('--traverse-directory', action='store_true', - help='search all signatures underneath directories.') - parser.add_argument('-o', '--output', type=argparse.FileType('wt'), - help='output CSV containing matches to this file') - parser.add_argument('--save-matches', type=argparse.FileType('wt'), - help='save the matched signatures from the database to this file.') - parser.add_argument('--threshold-bp', type=float, default=5e4, - help='threshold (in bp) for reporting results (default=50,000)') - parser.add_argument('--output-unassigned', type=argparse.FileType('wt'), - help='output unassigned portions of the query as a signature to this file') - parser.add_argument('--scaled', type=float, default=0, - help='downsample query to this scaled factor') - parser.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - parser.add_argument('--ignore-abundance', action='store_true', - help='do NOT use k-mer abundances if present') - parser.add_argument('-d', '--debug', action='store_true') - - sourmash_args.add_ksize_arg(parser, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(parser) - - args = parser.parse_args(args) set_quiet(args.quiet, args.debug) moltype = sourmash_args.calculate_moltype(args) @@ -820,25 +665,6 @@ def gather(args): def multigather(args): from .search import gather_databases, format_bp - parser = SourmashArgumentParser() - parser.add_argument('--db', nargs='+', action='append') - parser.add_argument('--query', nargs='+', action='append') - parser.add_argument('--traverse-directory', action='store_true', - help='search all signatures underneath directories.') - parser.add_argument('--threshold-bp', type=float, default=5e4, - help='threshold (in bp) for reporting results') - parser.add_argument('--scaled', type=float, default=0, - help='downsample query to this scaled factor') - parser.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - parser.add_argument('--ignore-abundance', action='store_true', - help='do NOT use k-mer abundances if present') - parser.add_argument('-d', '--debug', action='store_true') - - sourmash_args.add_ksize_arg(parser, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(parser) - - args = parser.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -971,26 +797,6 @@ def multigather(args): def watch(args): "Build a signature from raw FASTA/FASTQ coming in on stdin, search." - - parser = SourmashArgumentParser() - parser.add_argument('sbt_name', help='name of SBT to search') - parser.add_argument('inp_file', nargs='?', default='/dev/stdin') - parser.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - parser.add_argument('-o', '--output', type=argparse.FileType('wt'), - help='save signature generated from data here') - parser.add_argument('--threshold', default=0.05, type=float, - help='minimum threshold for matches (default=0.05)') - parser.add_argument('--input-is-protein', action='store_true', - help='Consume protein sequences - no translation needed') - sourmash_args.add_construct_moltype_args(parser) - parser.add_argument('-n', '--num-hashes', type=int, - default=DEFAULT_N, - help='number of hashes to use in each sketch (default: %(default)i)') - parser.add_argument('--name', type=str, default='stdin', - help='name to use for generated signature') - sourmash_args.add_ksize_arg(parser, DEFAULT_LOAD_K) - args = parser.parse_args(args) set_quiet(args.quiet) if args.input_is_protein and args.dna: @@ -1080,32 +886,7 @@ def do_search(): sig.save_signatures([streamsig], args.output) -def storage(args): - from .sbt import convert_cmd - - parser = SourmashArgumentParser() - parser.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - - subparsers = parser.add_subparsers() - convert_parser = subparsers.add_parser('convert') - convert_parser.add_argument('sbt', help='SBT to convert') - convert_parser.add_argument('-b', "--backend", type=str, - help='Backend to convert to') - convert_parser.set_defaults(command='convert') - - args = parser.parse_args(args) - set_quiet(args.quiet) - if args.command == 'convert': - convert_cmd(args.sbt, args.backend) - - def migrate(args): - parser = SourmashArgumentParser() - parser.add_argument('sbt_name', help='name to save SBT into') - - args = parser.parse_args(args) - tree = load_sbt_index(args.sbt_name, print_version_warning=False) notify('saving SBT under "{}".', args.sbt_name) diff --git a/sourmash/lca/command_classify.py b/sourmash/lca/command_classify.py index 8c3be86b29..4049a3d43e 100644 --- a/sourmash/lca/command_classify.py +++ b/sourmash/lca/command_classify.py @@ -79,21 +79,6 @@ def classify(args): """ main single-genome classification function. """ - p = SourmashArgumentParser(prog="sourmash lca classify") - p.add_argument('--db', nargs='+', action='append') - p.add_argument('--query', nargs='+', action='append') - p.add_argument('--threshold', type=int, default=DEFAULT_THRESHOLD) - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - help='output CSV to this file instead of stdout') - p.add_argument('--scaled', type=float) - p.add_argument('--traverse-directory', action='store_true', - help='load all signatures underneath directories.') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-d', '--debug', action='store_true', - help='output debugging output') - args = p.parse_args(args) - if not args.db: error('Error! must specify at least one LCA database with --db') sys.exit(-1) diff --git a/sourmash/lca/command_compare_csv.py b/sourmash/lca/command_compare_csv.py index 6f06adca89..8e3d4944f1 100644 --- a/sourmash/lca/command_compare_csv.py +++ b/sourmash/lca/command_compare_csv.py @@ -14,22 +14,6 @@ def compare_csv(args): - p = SourmashArgumentParser(prog="sourmash lca compare_csv") - p.add_argument('csv1', help='taxonomy spreadsheet output by classify') - p.add_argument('csv2', help='custom taxonomy spreadsheet') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-d', '--debug', action='store_true', - help='output debugging output') - p.add_argument('-C', '--start-column', default=2, type=int, - help='column at which taxonomic assignments start') - p.add_argument('--tabs', action='store_true', - help='input spreadsheet is tab-delimited (default: commas)') - p.add_argument('--no-headers', action='store_true', - help='no headers present in taxonomy spreadsheet') - p.add_argument('-f', '--force', action='store_true') - args = p.parse_args(args) - if args.start_column < 2: error('error, --start-column cannot be less than 2') sys.exit(-1) diff --git a/sourmash/lca/command_gather.py b/sourmash/lca/command_gather.py index 1611d22097..4778b8ac25 100644 --- a/sourmash/lca/command_gather.py +++ b/sourmash/lca/command_gather.py @@ -184,21 +184,6 @@ def gather_main(args): full lineage information for each known hash, as opposed to storing only the least-common-ancestor information for it. """ - p = SourmashArgumentParser(prog="sourmash lca gather") - p.add_argument('query') - p.add_argument('db', nargs='+') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - help='output CSV containing matches to this file') - p.add_argument('--output-unassigned', type=argparse.FileType('wt'), - help='output unassigned portions of the query as a signature to this file') - p.add_argument('--ignore-abundance', action='store_true', - help='do NOT use k-mer abundances if present') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-d', '--debug', action='store_true', - help='output debugging output') - args = p.parse_args(args) - set_quiet(args.quiet, args.debug) if not check_files_exist(args.query, *args.db): diff --git a/sourmash/lca/command_index.py b/sourmash/lca/command_index.py index 56e7db5c12..eab83d5eba 100644 --- a/sourmash/lca/command_index.py +++ b/sourmash/lca/command_index.py @@ -11,7 +11,7 @@ from ..logging import notify, error, debug, set_quiet from . import lca_utils from .lca_utils import LineagePair -from ..sourmash_args import SourmashArgumentParser +from ..sourmash_args import SourmashArgumentParser, DEFAULT_LOAD_K def load_taxonomy_assignments(filename, delimiter=',', start_column=2, @@ -130,33 +130,6 @@ def index(args): """ main function for building an LCA database. """ - p = SourmashArgumentParser(prog="sourmash lca index") - p.add_argument('csv', help='taxonomy spreadsheet') - p.add_argument('lca_db_out', help='name to save database to') - p.add_argument('signatures', nargs='+', - help='one or more sourmash signatures') - p.add_argument('--scaled', default=10000, type=float) - p.add_argument('-k', '--ksize', default=31, type=int) - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-d', '--debug', action='store_true', - help='output debugging output') - p.add_argument('-C', '--start-column', default=2, type=int, - help='column at which taxonomic assignments start') - p.add_argument('--tabs', action='store_true', - help='input spreadsheet is tab-delimited (default: commas)') - p.add_argument('--no-headers', action='store_true', - help='no headers present in taxonomy spreadsheet') - p.add_argument('--split-identifiers', action='store_true', - help='split names in signatures on whitspace and period') - p.add_argument('-f', '--force', action='store_true') - p.add_argument('--traverse-directory', action='store_true', - help='load all signatures underneath directories.') - p.add_argument('--report', help='output a report on anomalies, if any.') - p.add_argument('--require-taxonomy', action='store_true', - help='ignore signatures with no taxonomy entry') - args = p.parse_args(args) - if args.start_column < 2: error('error, --start-column cannot be less than 2') sys.exit(-1) @@ -165,6 +138,9 @@ def index(args): args.scaled = int(args.scaled) + if args.ksize is None: + args.ksize = DEFAULT_LOAD_K + # first, load taxonomy spreadsheet delimiter = ',' if args.tabs: @@ -353,7 +329,7 @@ def get_lineage_id(lineage, arg_d=arg_d): db.lineage_to_lid = lineage_to_lid db.lid_to_lineage = lid_to_lineage db.hashval_to_idx = hashval_to_idx - + db.ksize = int(args.ksize) db.scaled = int(args.scaled) diff --git a/sourmash/lca/command_rankinfo.py b/sourmash/lca/command_rankinfo.py index e9faa9b5ba..55d7ca930d 100644 --- a/sourmash/lca/command_rankinfo.py +++ b/sourmash/lca/command_rankinfo.py @@ -52,17 +52,6 @@ def rankinfo_main(args): """ rankinfo! """ - p = SourmashArgumentParser(prog="sourmash lca rankinfo") - p.add_argument('db', nargs='+') - p.add_argument('--scaled', type=float) - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-d', '--debug', action='store_true', - help='output debugging output') - p.add_argument('--minimum-num', type=int, default=0, - help='Minimum number of different lineages a k-mer must be in to be counted') - args = p.parse_args(args) - if not args.db: error('Error! must specify at least one LCA database with --db') sys.exit(-1) diff --git a/sourmash/lca/command_summarize.py b/sourmash/lca/command_summarize.py index 24d979cb92..9ad4829391 100644 --- a/sourmash/lca/command_summarize.py +++ b/sourmash/lca/command_summarize.py @@ -60,21 +60,6 @@ def summarize_main(args): """ main summarization function. """ - p = SourmashArgumentParser(prog="sourmash lca summarize") - p.add_argument('--db', nargs='+', action='append') - p.add_argument('--query', nargs='+', action='append') - p.add_argument('--threshold', type=int, default=DEFAULT_THRESHOLD) - p.add_argument('--traverse-directory', action='store_true', - help='load all signatures underneath directories.') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - help='CSV output') - p.add_argument('--scaled', type=float) - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-d', '--debug', action='store_true', - help='output debugging output') - args = p.parse_args(args) - if not args.db: error('Error! must specify at least one LCA database with --db') sys.exit(-1) diff --git a/sourmash/sig/__main__.py b/sourmash/sig/__main__.py index 10c9765107..2029557257 100644 --- a/sourmash/sig/__main__.py +++ b/sourmash/sig/__main__.py @@ -143,15 +143,6 @@ def overlap(args): """ provide detailed comparison of two signatures """ - p = SourmashArgumentParser(prog='sourmash signature overlap') - p.add_argument('signature1') - p.add_argument('signature2') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - - sourmash_args.add_ksize_arg(p, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(p) - args = p.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -231,19 +222,6 @@ def merge(args): """ merge one or more signatures. """ - p = SourmashArgumentParser(prog='sourmash signature merge') - p.add_argument('signatures', nargs='+') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - default=sys.stdout, - help='output signature to this file') - p.add_argument('--flatten', action='store_true', - help='Remove abundances from all signatures.') - sourmash_args.add_ksize_arg(p, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(p) - - args = p.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -302,18 +280,6 @@ def intersect(args): This function always removes abundances. """ - p = SourmashArgumentParser(prog='sourmash signature intersect') - p.add_argument('signatures', nargs='+') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - default=sys.stdout, - help='output signature to this file') - p.add_argument('-A', '--abundances-from', - help='intersect with & take abundances from this signature') - sourmash_args.add_ksize_arg(p, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(p) - args = p.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -371,19 +337,6 @@ def subtract(args): """ subtract one or more signatures from another """ - p = SourmashArgumentParser(prog='sourmash signature subtract') - p.add_argument('signature_from') - p.add_argument('subtraction_sigs', nargs='+') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - default=sys.stdout, - help='output signature to this file') - p.add_argument('--flatten', action='store_true', - help='remove abundance from signatures before subtracting') - sourmash_args.add_ksize_arg(p, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(p) - args = p.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -433,17 +386,6 @@ def rename(args): """ rename one or more signatures. """ - p = SourmashArgumentParser(prog='sourmash signature rename') - p.add_argument('sigfiles', nargs='+') - p.add_argument('name') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-d', '--debug', action='store_true', - help='output debugging output') - p.add_argument('-o', '--output', help='output to this file') - sourmash_args.add_ksize_arg(p, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(p) - args = p.parse_args(args) set_quiet(args.quiet, args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -473,21 +415,6 @@ def extract(args): """ extract signatures. """ - p = SourmashArgumentParser(prog='sourmash signature extract') - p.add_argument('signatures', nargs='+') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - default=sys.stdout, - help='output signature to this file') - p.add_argument('--md5', default=None, - help='select signatures whose md5 contains this substring') - p.add_argument('--name', default=None, - help='select signatures whose name contains this substring') - - sourmash_args.add_ksize_arg(p, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(p) - args = p.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -525,26 +452,6 @@ def filter(args): """ filter hashes by abundance in all of the signatures """ - p = SourmashArgumentParser(prog='sourmash signature flatten') - p.add_argument('signatures', nargs='+') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - default=sys.stdout, - help='output signature to this file') - p.add_argument('--md5', default=None, - help='select signatures whose md5 contains this substring') - p.add_argument('--name', default=None, - help='select signatures whose name contains this substring') - - p.add_argument('-m', '--min-abundance', type=int, default=1, - help='keep hashes >= this minimum abundance') - p.add_argument('-M', '--max-abundance', type=int, default=None, - help='keep hashes <= this maximum abundance') - - sourmash_args.add_ksize_arg(p, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(p) - args = p.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -598,21 +505,6 @@ def flatten(args): """ flatten a signature, removing abundances. """ - p = SourmashArgumentParser(prog='sourmash signature flatten') - p.add_argument('signatures', nargs='+') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - default=sys.stdout, - help='output signature to this file') - p.add_argument('--md5', default=None, - help='select signatures whose md5 contains this substring') - p.add_argument('--name', default=None, - help='select signatures whose name contains this substring') - - sourmash_args.add_ksize_arg(p, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(p) - args = p.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -653,20 +545,6 @@ def downsample(args): """ downsample a scaled signature. """ - p = SourmashArgumentParser(prog='sourmash signature downsample') - p.add_argument('signatures', nargs="+") - p.add_argument('--scaled', type=int, default=0, - help='scaled value to downsample to') - p.add_argument('--num', type=int, default=0, - help='num value to downsample to') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - default=sys.stdout, - help='output signature to this file') - sourmash_args.add_ksize_arg(p, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(p) - args = p.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) @@ -724,14 +602,6 @@ def sig_import(args): """ import a signature into sourmash format. """ - p = SourmashArgumentParser(prog='sourmash signature import') - p.add_argument('filenames', nargs='+') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - default=sys.stdout, - help='output signature to this file') - args = p.parse_args(args) set_quiet(args.quiet) siglist = [] @@ -762,16 +632,6 @@ def export(args): """ export a signature to mash format """ - p = SourmashArgumentParser(prog='sourmash signature export') - p.add_argument('filename') - p.add_argument('-q', '--quiet', action='store_true', - help='suppress non-error output') - p.add_argument('-o', '--output', type=argparse.FileType('wt'), - default=sys.stdout, - help='output signature to this file') - sourmash_args.add_ksize_arg(p, DEFAULT_LOAD_K) - sourmash_args.add_moltype_args(p) - args = p.parse_args(args) set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) diff --git a/sourmash/sourmash_args.py b/sourmash/sourmash_args.py index e496173737..ff6bd70d15 100644 --- a/sourmash/sourmash_args.py +++ b/sourmash/sourmash_args.py @@ -47,73 +47,6 @@ def citation(): _citation_printed = True -def add_moltype_args(parser): - parser.add_argument('--protein', dest='protein', action='store_true', - help='choose a protein signature (default: False)') - parser.add_argument('--no-protein', dest='protein', - action='store_false', - help='do not choose a protein signature') - parser.set_defaults(protein=False) - - parser.add_argument('--dayhoff', dest='dayhoff', action='store_true', - help='build Dayhoff-encoded amino acid signatures (default: False)') - parser.add_argument('--no-dayhoff', dest='dayhoff', - action='store_false', - help='do not build Dayhoff-encoded amino acid signatures') - parser.set_defaults(dayhoff=False) - - parser.add_argument('--hp', '--hydrophobic-polar', dest='hp', action='store_true', - help='build hydrophobic-polar-encoded amino acid signatures (default: False)') - parser.add_argument('--no-hp', '--no-hydrophobic-polar', dest='hp', - action='store_false', - help='do not build hydrophobic-polar-encoded amino acid signatures') - parser.set_defaults(hp=False) - - parser.add_argument('--dna', '--rna', dest='dna', default=None, - action='store_true', - help='choose a nucleotide signature (default: True)') - parser.add_argument('--no-dna', '--no-rna', dest='dna', - action='store_false', - help='do not choose a nucleotide signature') - parser.set_defaults(dna=None) - - -def add_construct_moltype_args(parser): - parser.add_argument('--protein', dest='protein', action='store_true', - help='build protein signatures (default: False)') - parser.add_argument('--no-protein', dest='protein', - action='store_false', - help='do not build protein signatures') - parser.set_defaults(protein=False) - - parser.add_argument('--dayhoff', dest='dayhoff', action='store_true', - help='build Dayhoff-encoded amino acid signatures (default: False)') - parser.add_argument('--no-dayhoff', dest='dayhoff', - action='store_false', - help='do not build Dayhoff-encoded amino acid signatures') - parser.set_defaults(dayhoff=False) - - parser.add_argument('--hp', dest='hp', action='store_true', - help='build hp-encoded amino acid signatures (default: False)') - parser.add_argument('--no-hp', dest='hp', - action='store_false', - help='do not build hp-encoded amino acid signatures') - parser.set_defaults(hp=False) - - parser.add_argument('--dna', '--rna', dest='dna', default=None, - action='store_true', - help='build nucleotide signatures (default: True)') - parser.add_argument('--no-dna', '--no-rna', dest='dna', - action='store_false', - help='do not build nucleotide signatures') - parser.set_defaults(dna=True) - - -def add_ksize_arg(parser, default): - parser.add_argument('-k', '--ksize', default=None, type=int, - help='k-mer size (default: {d})'.format(d=default)) - - def get_moltype(sig, require=False): if sig.minhash.is_molecule_type('DNA'): moltype = 'DNA' diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py index c9073d4e4e..fce694ec77 100644 --- a/tests/test_sourmash.py +++ b/tests/test_sourmash.py @@ -34,7 +34,15 @@ def test_run_sourmash(): def test_run_sourmash_badcmd(): status, out, err = utils.runscript('sourmash', ['foobarbaz'], fail_ok=True) assert status != 0 # bad arg! - assert "Unrecognized command" in err + assert "cmd: invalid choice" in err + + +def test_run_sourmash_subcmd_help(): + status, out, err = utils.runscript('sourmash', ['sbt'], fail_ok=True) + assert status != 0 # should fail + assert "usage: sourmash sbt" in out # assert error was printed + assert "Traceback" not in out # should not have printed a Traceback + def test_sourmash_info(): status, out, err = utils.runscript('sourmash', ['info'], fail_ok=False) @@ -523,7 +531,7 @@ def test_search_query_sig_does_not_exist(): print(status, out, err) assert status == -1 assert 'Cannot open file' in err - assert len(err.splitlines()) < 5 + assert len(err.split('\n\r')) < 5 def test_search_subject_sig_does_not_exist():