Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement improved & consistent argument parsing #785

Merged
merged 34 commits into from
Jan 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
2264186
Add CLI demo
standage Dec 5, 2019
86c9c07
Extend ArgumentParser subclass
standage Dec 6, 2019
57649a3
Reorg
standage Dec 6, 2019
b74836a
More CLI work
standage Dec 6, 2019
2e8b61e
Moving things aroundl implement "sourmash compare" CLI completely
standage Dec 6, 2019
cd185a1
Do the compute command
standage Dec 6, 2019
d583eed
Clean it up with argument groups
standage Dec 6, 2019
5dde288
Gather command
standage Dec 6, 2019
f97c829
sourmash info and plot
standage Dec 6, 2019
14b05a0
sourmash search
standage Dec 6, 2019
ab15a39
All "sourmash lca" subcommands
standage Dec 6, 2019
f5ba2b9
All "sourmash sbt" subcommands
standage Dec 6, 2019
fd7438d
All "sourmash signature" subcommands
standage Dec 6, 2019
2ef507e
CLI complete, 1 command and 1 subcommand are functional
standage Dec 6, 2019
61c72bb
Weirdness to support "import" subcommand
standage Dec 6, 2019
39f21ac
Support both old and new interface
standage Dec 6, 2019
e6f70fd
implementation for the new CLI PR (#811)
luizirber Dec 27, 2019
e7c56b2
sigh, py27...
luizirber Dec 27, 2019
be2491b
remove unused imports
luizirber Dec 27, 2019
ada75ec
use new main methods for lca and sig
luizirber Dec 27, 2019
14ed6eb
remove unused import
luizirber Dec 27, 2019
004e31b
Reformat usage
standage Jan 2, 2020
6e27265
More concise "sourmash --help" output
standage Jan 2, 2020
e6d4fc8
Clean up usage statements
standage Jan 2, 2020
a121aa1
Marked some subcommands as expert
standage Jan 2, 2020
630e89c
Make multigather elitist, drop alt_subparsers
standage Jan 3, 2020
6e06496
Sigh
standage Jan 3, 2020
a74cdea
fix test behavior for py27
luizirber Jan 3, 2020
db576db
Print citation
standage Jan 7, 2020
0975271
Add module docstrings, fix descriptors
standage Jan 7, 2020
aaab5ad
Add module docstrings
standage Jan 7, 2020
f97e5f7
Module docstrings
standage Jan 7, 2020
02d83ee
add docstrings to cli modules and some of the commands
ctb Jan 7, 2020
219429d
print citation only once
luizirber Jan 8, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sourmash/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
from . import sbtmh
from . import sbt_storage
from . import signature
from . import sig
from . import cli
from . import commands

from pkg_resources import get_distribution, DistributionNotFound

Expand Down
90 changes: 11 additions & 79 deletions sourmash/__main__.py
Original file line number Diff line number Diff line change
@@ -1,86 +1,18 @@
"""
sourmash command line.
"""
from __future__ import print_function
import sys
import argparse
import sourmash

from .logging import error, set_quiet

from .commands import (categorize, compare, compute, dump, import_csv,
gather, index, sbt_combine, search,
plot, watch, info, storage, migrate, multigather)
from .lca import main as lca_main
from .sig import main as sig_main
def main(arglist=None):
args = sourmash.cli.get_parser().parse_args(arglist)
if hasattr(args, 'subcmd'):
mod = getattr(sourmash.cli, args.cmd)
submod = getattr(mod, args.subcmd)
mainmethod = getattr(submod, 'main')
else:
mod = getattr(sourmash.cli, args.cmd)
mainmethod = getattr(mod, 'main')
return mainmethod(args)

usage='''
sourmash <command> [<args>]

** Commands include:

compute <filenames> Compute MinHash signatures for sequences in files.
compare <filenames.sig> Compute similarity matrix for multiple signatures.
search <query> <against> Search a signature against a list of signatures.
plot <matrix> Plot a distance matrix made by 'compare'.
gather Search a metagenome signature for multiple
non-overlapping matches.

** Taxonomic classification utilities:

Run 'sourmash lca' for the taxonomic classification routines.

** Sequence Bloom Tree (SBT) utilities:

index Index a collection of signatures for fast searching.
sbt_combine Combine multiple SBTs into a new one.
categorize Identify best matches for many signatures using an SBT.
watch Classify a stream of sequences.

** Other commands:

info Display sourmash version and other information.
signature Sourmash signature manipulation utilities.

Use '-h' to get subcommand-specific help, e.g.

sourmash compute -h

** Documentation is available at https://sourmash.readthedocs.io/
'''


def main():
set_quiet(False)

commands = {'search': search, 'compute': compute,
'compare': compare, 'plot': plot,
'import_csv': import_csv, 'dump': dump,
'index': index,
'categorize': categorize, 'gather': gather,
'watch': watch,
'sbt_combine': sbt_combine, 'info': info,
'storage': storage,
'lca': lca_main,
'migrate': migrate,
'multigather': multigather,
'sig': sig_main,
'signature': sig_main}
parser = argparse.ArgumentParser(
description='work with compressed biological sequence representations')
parser.add_argument('command', nargs='?')
args = parser.parse_args(sys.argv[1:2])

if not args.command:
print(usage)
sys.exit(1)

if args.command not in commands:
error('Unrecognized command')
print(usage)
sys.exit(1)

cmd = commands.get(args.command)
cmd(sys.argv[2:])

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions sourmash/cli/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__/
125 changes: 125 additions & 0 deletions sourmash/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Define the top-level command line interface for sourmash

This module handles user input when sourmash is invoked from the command line.
A top-level parser is defined for the `sourmash` command, and subparsers are
defined for each subcommand. Some sourmash operations are grouped together
using the `sourmash <subcmd> <subsubcmd>` pattern, and these are organized in
their own CLI submodules, each with a dedicated directory.
"""

from argparse import ArgumentParser, RawDescriptionHelpFormatter, SUPPRESS
import os
import sys

import sourmash

from . import utils

# Commands
from . import categorize
from . import compare
from . import compute
from . import dump
from . import gather
from . import import_csv
from . import info
from . import index
from . import migrate
from . import multigather
from . import plot
from . import sbt_combine
from . import search
from . import watch

# Subcommand groups
from . import lca
from . import sig
from . import storage


class SourmashParser(ArgumentParser):
_citation_printed = False

def __init__(self, citation=True, **kwargs):
super(SourmashParser, self).__init__(**kwargs)
self.citation = citation

@classmethod
def print_citation(cls):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made print_citation a class method, and storing the _citation_printed at the class level let's us assert that it is only printed once along all commands (because all commands are subclasses of SourmashParser).

(the classmethod trick is straight out of snakemake, btw)

if cls._citation_printed:
return
from sourmash.logging import notify
notify("\n== This is sourmash version {version}. ==", version=sourmash.VERSION)
notify("== Please cite Brown and Irber (2016), doi:10.21105/joss.00027. ==\n")
cls._citation_printed = True

def _subparser_from_name(self, name):
"""Given a name, get the subparser instance registered with this parser."""
container = self._actions
if name is None:
return None
for action in container:
if action.choices is None:
continue
elif name in action.choices:
return action.choices[name]

def print_help(self):
self.print_citation()
super(SourmashParser, self).print_help()


def parse_args(self, args=None, namespace=None):
if (args is None and len(sys.argv) == 1) or (args is not None and len(args) == 0):
self.print_help()
raise SystemExit(1)
args = super(SourmashParser, self).parse_args(args=args, namespace=namespace)
if ('quiet' not in args or not args.quiet) and self.citation:
self.print_citation()

if 'subcmd' in args and args.subcmd is None:
self._subparser_from_name(args.cmd).print_help()
raise SystemExit(1)

# BEGIN: dirty hacks to simultaneously support new and previous interface
if hasattr(args, 'subcmd') and args.subcmd == 'import':
args.subcmd = 'ingest'
# END: dirty hacks to simultaneously support new and previous interface
return args


def get_parser():
module_descs = {
'lca': 'Taxonomic operations',
'sig': 'Manipulate signature files',
'storage': 'Operations on storage',
}
expert = set(['categorize', 'dump', 'import_csv', 'migrate', 'multigather', 'sbt_combine', 'watch'])

clidir = os.path.dirname(__file__)
basic_ops = utils.command_list(clidir)
user_ops = [op for op in basic_ops if op not in expert]
usage = ' Basic operations\n'
for op in user_ops:
docstring = getattr(sys.modules[__name__], op).__doc__
helpstring = 'sourmash {op:s} --help'.format(op=op)
usage += ' {hs:25s} {ds:s}\n'.format(hs=helpstring, ds=docstring)
cmd_group_dirs = next(os.walk(clidir))[1]
cmd_group_dirs = filter(utils.opfilter, cmd_group_dirs)
cmd_group_dirs = sorted(cmd_group_dirs)
for dirpath in cmd_group_dirs:
usage += '\n ' + module_descs[dirpath] + '\n'
usage += ' sourmash {gd:s} --help\n'.format(gd=dirpath)

desc = 'Compute, compare, manipulate, and analyze MinHash sketches of DNA sequences.\n\nUsage instructions:\n' + usage
parser = SourmashParser(prog='sourmash', description=desc, formatter_class=RawDescriptionHelpFormatter, usage=SUPPRESS)
parser._optionals.title = 'Options'
parser.add_argument('-v', '--version', action='version', version='sourmash '+ sourmash.VERSION)
parser.add_argument('-q', '--quiet', action='store_true', help='don\'t print citation information')
sub = parser.add_subparsers(
title='Instructions', dest='cmd', metavar='cmd', help=SUPPRESS,
)
for op in basic_ops + cmd_group_dirs:
getattr(sys.modules[__name__], op).subparser(sub)
parser._action_groups.reverse()
return parser
38 changes: 38 additions & 0 deletions sourmash/cli/categorize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"'sourmash categorize' - query an SBT for bes match, with many signatures."

import argparse

from sourmash.cli.utils import add_ksize_arg, add_moltype_args


def subparser(subparsers):
subparser = subparsers.add_parser('categorize')
subparser.add_argument('sbt_name', help='name of SBT to load')
subparser.add_argument(
'queries', nargs='+',
help='list of signatures to categorize'
)
subparser.add_argument(
'-q', '--quiet', action='store_true',
help='suppress non-error output'
)
add_ksize_arg(subparser, 31)
subparser.add_argument(
'--threshold', default=0.08, type=float,
help='minimum threshold for reporting matches; default=0.08'
)
subparser.add_argument('--traverse-directory', action="store_true")
subparser.add_argument(
'--ignore-abundance', action='store_true',
help='do NOT use k-mer abundances if present'
)
add_moltype_args(subparser)

# TODO: help messages in these
subparser.add_argument('--csv', type=argparse.FileType('at'))
subparser.add_argument('--load-csv', default=None)


def main(args):
import sourmash
return sourmash.commands.categorize(args)
43 changes: 43 additions & 0 deletions sourmash/cli/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""compare genomes"""

from argparse import FileType

from sourmash.cli.utils import add_ksize_arg, add_moltype_args


def subparser(subparsers):
subparser = subparsers.add_parser('compare')
subparser.add_argument(
'signatures', nargs='+', help='list of signatures to compare'
)
subparser.add_argument(
'-q', '--quiet', action='store_true', help='suppress non-error output'
)
add_ksize_arg(subparser)
add_moltype_args(subparser)
subparser.add_argument(
'-o', '--output', metavar='F',
help='file to which output will be written; default is terminal '
'(standard output)'
)
subparser.add_argument(
'--ignore-abundance', action='store_true',
help='do NOT use k-mer abundances even if present'
)
subparser.add_argument(
'--traverse-directory', action='store_true',
help='compare all signatures underneath directories'
)
subparser.add_argument(
'--csv', metavar='F', type=FileType('w'),
help='write matrix to specified file in CSV format (with column '
'headers)'
)
subparser.add_argument(
'-p', '--processes', metavar='N', type=int, default=None,
help='Number of processes to use to calculate similarity')


def main(args):
import sourmash
return sourmash.commands.compare(args)
Loading