Skip to content

Commit

Permalink
upgrade the command line interfaces.
Browse files Browse the repository at this point in the history
  • Loading branch information
meren committed Aug 31, 2015
1 parent 3bf6fdb commit ef78ee4
Show file tree
Hide file tree
Showing 30 changed files with 826 additions and 583 deletions.
568 changes: 568 additions & 0 deletions anvio/__init__.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion anvio/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(self, args, run = run, progress = progress):
self.progress = progress
self.run = run

self.sample_id = args.sample_id
self.sample_id = args.sample_name
self.merged_sample_ids = []
self.input_runinfo_dicts = {}
self.input_runinfo_paths = args.input
Expand Down
8 changes: 4 additions & 4 deletions anvio/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,17 @@ def __init__(self, args = None):
if args:
self.args = args
self.input_file_path = args.input_file
self.contigs_db_path = args.contigs_db_path
self.serialized_profile_path = args.profile
self.output_directory = args.output_directory
self.contigs_db_path = args.contigs_db
self.serialized_profile_path = args.serialized_profile
self.output_directory = args.output_dir
self.list_contigs_and_exit = args.list_contigs
self.min_contig_length = args.min_contig_length
self.min_mean_coverage = args.min_mean_coverage
self.min_coverage_for_variability = args.min_coverage_for_variability
self.contigs_shall_be_clustered = args.cluster_contigs
self.number_of_threads = 4
self.no_trehading = True
self.sample_id = args.sample_id
self.sample_id = args.sample_name
self.report_variability_full = args.report_variability_full
self.overwrite_output_destinations = args.overwrite_output_destinations

Expand Down
2 changes: 1 addition & 1 deletion anvio/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __init__(self, args = None, r = run, p = progress):
sys.exit()

self.collection_id = args.collection_id
self.output_directory = args.output_directory
self.output_directory = args.output_dir
self.debug = args.debug

self.sanity_check()
Expand Down
26 changes: 9 additions & 17 deletions bin/anvi-cluster-with-concoct
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,21 @@ __license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__status__ = "Development"


if __name__ == '__main__':
import argparse

parser = argparse.ArgumentParser(description="A script to call CONCOCT clustering on a merged anvi'o profile")
parser.add_argument('-p', '--profile-db', metavar = "PROFILE_DB", required = True,
help = 'Profile database.')
parser.add_argument('-c', '--contigs-db', required = True, metavar = 'CONTIGS_DB',
help = 'anvio contigs database.')
parser.add_argument('-o', '--output-file', metavar = 'OUTPUT.txt', default = None,
help = 'Store results as a TAB-delimited file')
parser.add_argument('--skip-db', default = False, action = 'store_true',
help = 'By default, results are stored in the profile database that is given as a parameter\
Use this flag if you would like to skip that step')
parser.add_argument('--source-identifier', default = 'CONCOCT', metavar = 'SOURCE',
help = "The source identifier when results are stored in the profile database. The default id\
is '%(default)s'. If there is another entry for '%(default)s', it will be overwrotten\
with new results. Using this parameter you can avoid that.")
parser.add_argument('--debug', action='store_true', help = 'Print out debug info.')
args = parser.parse_args()

parser.add_argument(*anvio.A('profile-db'), **anvio.K('profile-db'))
parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
parser.add_argument(*anvio.A('output-file'), **anvio.K('output-file'))
parser.add_argument(*anvio.A('skip-store-in-db'), **anvio.K('skip-store-in-db'))
parser.add_argument(*anvio.A('source-identifier'), **anvio.K('source-identifier', {'default': 'CONCOCT'}))
parser.add_argument(*anvio.A('debug'), **anvio.K('debug'))

args = parser.parse_args()

try:
source = args.source_identifier.strip()
Expand All @@ -63,7 +55,7 @@ if __name__ == '__main__':

if args.output_file:
c.store_clusters_as_TAB_delimited_text(args.output_file)
if not args.skip_db:
if not args.skip_store_in_db:
c.store_clusters_in_db(source = source)

except ConfigError, e:
Expand Down
18 changes: 5 additions & 13 deletions bin/anvi-compute-completeness
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ __license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__status__ = "Development"


run = terminal.Run()
Expand All @@ -29,18 +28,11 @@ progress = terminal.Progress()

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='A script to generate completeness info for a given list of _splits_')
parser.add_argument('splits_txt', metavar = 'SPLITS_TXT',
help = 'File with split names.')
parser.add_argument('contigs_db', metavar = 'CONTIGS_DB',
help = 'Contigs database to read from.')
parser.add_argument('-e', '--min-e-value', default=1e-15, type=float, metavar = 'E-VALUE',
help = 'Minimum significance score of an HMM find to be considered as a valid hit.\
Default is %(default)g.')
parser.add_argument('--list-sources', action='store_true', default=False,
help = 'Show available single-copy gene search results and exit.')
parser.add_argument('--source', default=None,
help = 'Source to focus on. If none declared, all single-copy gene sources\
are going to be listed.')
parser.add_argument(*anvio.A('splits-of-interest'), **anvio.K('splits-of-interest'))
parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
parser.add_argument(*anvio.A('min-e-value'), **anvio.K('min-e-value'))
parser.add_argument(*anvio.A('list-completeness-sources'), **anvio.K('list-completeness-sources'))
parser.add_argument(*anvio.A('completeness-source'), **anvio.K('completeness-source'))

args = parser.parse_args()

Expand Down
29 changes: 8 additions & 21 deletions bin/anvi-experimental-organization
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ __license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__status__ = "Development"


run = terminal.Run()
Expand Down Expand Up @@ -76,26 +75,14 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(description='why yes we do stuff here.')
parser.add_argument('config_file', metavar = 'PATH', default = None, type=str,
help = 'Config file for clustering of contigs. See documentation for help.')
parser.add_argument('-i', '--input-directory', metavar = 'INPUT_DIR', default = None, type=str,
help = 'Input directory where the input files addressed from the configuration\
file can be found (i.e., the profile database, if PROFILE.db::TABLE\
notation is used in the configuration file).')
parser.add_argument('-c', '--contigs-db', metavar = 'CONTIGS_DB',
help = 'anvio contigs database.', required = True)
parser.add_argument('-p', '--profile-db', default = None, metavar = 'PROFILE_DB',
help = 'anvio contigs database.', required = False)
parser.add_argument('-N', '--name', default = None, metavar = 'NAME',
help = "The name to use when storing the resulting clustering in the database.\
This name will appear in the interactive interface and other relevant\
interfaces. Please consider using a short and descriptive single-word\
(if you do not do that you will make anvi'o complain).")
parser.add_argument('--skip-store-in-db', default = False, action = 'store_true',
help = 'By defaut, the resulting tree is stored in the profile database.\
When declared, this flag skips that step.')
parser.add_argument('-o', '--output-file', metavar = 'FILE', default = None, type=str,
help = 'To store the newick output.')
parser.add_argument('-D', '--dry-run', default = False, action = 'store_true',
help = 'Do not do anything, just print out the configuration.')

parser.add_argument(*anvio.A('profile-db'), **anvio.K('profile-db', {'required': False}))
parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
parser.add_argument(*anvio.A('experimental-org-input-dir'), **anvio.K('experimental-org-input-dir'))
parser.add_argument(*anvio.A('clustering-name'), **anvio.K('clustering-name'))
parser.add_argument(*anvio.A('skip-store-in-db'), **anvio.K('skip-store-in-db'))
parser.add_argument(*anvio.A('output-file'), **anvio.K('output-file'))
parser.add_argument(*anvio.A('dry-run'), **anvio.K('dry-run'))

args = parser.parse_args()

Expand Down
14 changes: 6 additions & 8 deletions bin/anvi-export-genes-table
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,24 @@ __license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__status__ = "Development"


run = terminal.Run()

def main(args):
contigs_db = dbops.ContigsDatabase(args.db_path)
contigs_db = dbops.ContigsDatabase(args.contigs_db)
genes_contigs_table = contigs_db.db.get_table_as_dict(t.genes_contigs_table_name)
genes_contigs_table_headers = contigs_db.db.get_table_structure(t.genes_contigs_table_name)
utils.store_dict_as_TAB_delimited_file(genes_contigs_table, args.output, genes_contigs_table_headers)
run.info('Recovered matrix', os.path.abspath(args.output))
utils.store_dict_as_TAB_delimited_file(genes_contigs_table, args.output_file, genes_contigs_table_headers)
run.info('Recovered matrix', os.path.abspath(args.output_file))

if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Generate a TAB-delimited matrix file from genes tables found in an\
contigs database')
parser.add_argument('db_path', metavar = 'DB_PATH', default = None,
help = 'Path to the contigs database.')
parser.add_argument('-o', '--output', default = "CONTIGS.txt",
help = 'Output file path. Default is %(default)s.')

parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
parser.add_argument(*anvio.A('output-file'), **anvio.K('output-file'))

args = parser.parse_args()

Expand Down
28 changes: 12 additions & 16 deletions bin/anvi-export-splits-and-coverages
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,21 @@ __license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__status__ = "Development"


run = terminal.Run()

def main(args):
merged_profile_db = db.DB(args.merged_profile_db, anvio.__profile__version__)
merged_profile_db = db.DB(args.profile_db, anvio.__profile__version__)
contigs_db = db.DB(args.contigs_db, anvio.__contigs__version__)

if(merged_profile_db.get_meta_value('merged') != True):
raise ConfigError, "'%s' does not seem to be a merged profile database :/" % args.merged_profile_db
raise ConfigError, "'%s' does not seem to be a merged profile database :/" % args.profile_db

if args.output_directory:
filesnpaths.gen_output_directory(args.output_directory)
if args.output_dir:
filesnpaths.gen_output_dir(args.output_dir)
else:
args.output_directory = os.path.dirname(os.path.abspath(args.merged_profile_db))
args.output_dir = os.path.dirname(os.path.abspath(args.profile_db))

if not args.output_file_prefix:
args.output_file_prefix = merged_profile_db.get_meta_value('sample_id')
Expand All @@ -58,8 +57,8 @@ def main(args):
merged_profile_db.disconnect()
contigs_db.disconnect()

coverages_file = os.path.join(args.output_directory, args.output_file_prefix + '-COVs.txt')
splits_fasta = os.path.join(args.output_directory, args.output_file_prefix + '-SPLITS.fa')
coverages_file = os.path.join(args.output_dir, args.output_file_prefix + '-COVs.txt')
splits_fasta = os.path.join(args.output_dir, args.output_file_prefix + '-SPLITS.fa')

utils.store_dict_as_TAB_delimited_file(coverages, coverages_file, ['contig'] + samples)

Expand All @@ -78,14 +77,11 @@ def main(args):

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Export splits and the coverage table from database')
parser.add_argument('-p', '---merged-profile-db', metavar = "MERGED PROFILE DB", required = True,
help = 'Profile database.')
parser.add_argument('-c', '--contigs-db', required = True, metavar = 'CONTIGS DB',
help = 'anvio contigs database.')
parser.add_argument('-o', '--output-directory', default = None, metavar = 'OUTPUT_DIR',
help = 'Output directory for files to be stored')
parser.add_argument('-O', '--output-file-prefix', default = None, metavar = 'FILENAME_PREFIX',
help = 'A prefix to name output files.')

parser.add_argument(*anvio.A('profile-db'), **anvio.K('profile-db'))
parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
parser.add_argument(*anvio.A('output-dir'), **anvio.K('output-dir'))
parser.add_argument(*anvio.A('output-file-prefix'), **anvio.K('output-file-prefix'))

args = parser.parse_args()

Expand Down
31 changes: 6 additions & 25 deletions bin/anvi-gen-contigs-database
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ __license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__status__ = "Development"


run = terminal.Run()
Expand All @@ -27,34 +26,16 @@ progress = terminal.Progress()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Generate a new anvio contigs database.')
parser.add_argument('-f', '--contigs-fasta', metavar = 'FASTA', required = True,
help = 'The FASTA file that contains reference sequences you mapped your samples against. This\
could be a reference genome, or contigs from your assembler. Contig names in this file\
must match to those in other input files. If there is a problem, anvio will gracefully\
complain about it.')
parser.add_argument('-L', '--split-length', metavar = 'INTEGER', default = 20000, type=int,
help = 'Splitting very large contigs into multiple pieces improves\
the efficacy of the visualization step. The default value\
is (%(default)d). If you are not sure, we advise you to not\
go below 10,000. The lower you go, the more complicated the\
tree will be, and will take more time and computational\
resources to finish the analysis. Also this is not a case\
of "the smaller the split size the more sensitive the results". If you do\
not want your contigs to be split, you can either enter a very\
large integer, or "-1".')
parser.add_argument('-K', '--kmer-size', metavar = 'INTEGER', default = 4, type=int,
help = 'K-mer size for k-mer frequency calculations. The default k-mer size for composition-based\
analyses is 4, historically. Although tetra-nucleotide frequencies seem to offer the\
the sweet spot of sensitivity, information density, and manageable number of dimensions\
for clustering approaches, you are welcome to experiment (but maybe you should leave\
it as is for your first set of analyses).')
parser.add_argument('-o', '--db-path', default = "CONTIGS.db",
help = 'Output file path for the new contigs database to be generated.')

parser.add_argument(*anvio.A('contigs-fasta'), **anvio.K('contigs-fasta'))
parser.add_argument(*anvio.A('split-length'), **anvio.K('split-length'))
parser.add_argument(*anvio.A('kmer-size'), **anvio.K('kmer-size'))
parser.add_argument(*anvio.A('output-db-path'), **anvio.K('output-db-path', {'default': 'CONTIGS.db'}))

args = parser.parse_args()

try:
a = dbops.ContigsDatabase(args.db_path, run, progress, quiet=False)
a = dbops.ContigsDatabase(args.output_db_path, run, progress, quiet=False)
a.create(args.contigs_fasta, args.split_length, args.kmer_size)
except ConfigError, e:
print e
Expand Down
8 changes: 3 additions & 5 deletions bin/anvi-gen-network
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ __license__ = "GPL 3.0"
__version__ = anvio.__version__
__maintainer__ = "A. Murat Eren"
__email__ = "a.murat.eren@gmail.com"
__status__ = "Development"


run = terminal.Run()
Expand Down Expand Up @@ -219,10 +218,9 @@ if __name__ == '__main__':
import argparse

parser = argparse.ArgumentParser(description='Generate a network description file')
parser.add_argument('runinfo', metavar = 'RUNINFO',
help = 'anvio RUNINFO file')
parser.add_argument('contigs_db', metavar = 'CONTIGS',
help = 'Contigs database that has been used for the run described in RUNINFO file')

parser.add_argument(*anvio.A('runinfo'), **anvio.K('runinfo'))
parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))

args = parser.parse_args()

Expand Down
20 changes: 5 additions & 15 deletions bin/anvi-gen-samples-info-database
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,15 @@ progress = terminal.Progress()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description="Create a new anvi'o samples information database.")
parser.add_argument('-D', '--samples-information', metavar = "FILE", default = None,
help = "A TAB-delimited file with information about samples in your dataset (which also correspond)\
to 'view layers'. Each row in this file must correspond to a sample name. Each column must\
contain a unique attribute. Please refer to the documentation to learn more about the \
structure and purpose of this file.")
parser.add_argument('-R', '--samples-order', metavar = "FILE", default = None,
help = "A TAB-delimited file with three columns: 'attribute', 'basic', 'newick'. For each attribute,\
the order of samples must be defined either in the 'basic' form or via a 'newick'-formatted\
tree structurei that describes the organization of each sample. Anvi'o will look for a\
comma-separated list of sample names for the 'basic' form. Please refer to the online docs\
for more info. Also you shouldn't hesitate to try to find the right file format until you get\
it working. There are stringent checks on this file, and you will not break anything while trying!.")
parser.add_argument('-o', '--db-path', default = "SAMPLES.db",
help = 'Output file path for the new contigs database to be generated.')

parser.add_argument(*anvio.A('samples-information'), **anvio.K('samples-information'))
parser.add_argument(*anvio.A('samples-order'), **anvio.K('samples-order'))
parser.add_argument(*anvio.A('output-db-path'), **anvio.K('output-db-path', {'default': 'SAMPLES.db'}))

args = parser.parse_args()

try:
s = dbops.SamplesInformationDatabase(args.db_path, run, progress, quiet=False)
s = dbops.SamplesInformationDatabase(args.output_db_path, run, progress, quiet=False)
s.create(args.samples_information, args.samples_order)
except ConfigError, e:
print e
Expand Down
Loading

0 comments on commit ef78ee4

Please sign in to comment.