Skip to content

Commit

Permalink
Merge pull request #102 from Ecogenomics/master
Browse files Browse the repository at this point in the history
Merge master branch for release 0.2.1
  • Loading branch information
pchaumeil authored Mar 8, 2019
2 parents 447d5d2 + 2945a67 commit 6295f03
Show file tree
Hide file tree
Showing 103 changed files with 144,979 additions and 5,231 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ __pycache__/
*$py.class

#Config file
src/gtdbtk/config/Config.py
src/gtdbtk/config/ConfigMetadata.py
tests/data/results/

# C extensions
*.so
Expand Down
6 changes: 3 additions & 3 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
global-exclude config.py
global-include pfam_search.pl
global-include *.pm
include genome_1.fna genome_2.fna genome_3.fna
global-exclude genome_*_protein.fna

519 changes: 274 additions & 245 deletions README.md

Large diffs are not rendered by default.

32 changes: 26 additions & 6 deletions bin/gtdbtk
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),'..')))
from gtdbtk import version
from gtdbtk.main import OptionsParser

from biolib.logger import logger_setup
from biolib.misc.custom_help_formatter import CustomHelpFormatter
from gtdbtk.biolib_lite.logger import logger_setup
from gtdbtk.biolib_lite.custom_help_formatter import CustomHelpFormatter


def printHelp():
Expand All @@ -61,6 +61,7 @@ def printHelp():
decorate -> Decorate tree with GTDB taxonomy [In Development]
Tools:
test -> Test the classify_wf pipeline with 3 archaeal genomes
trim_msa -> Trim an untrimmed MSA file based on a mask.
check_install -> Verify is all gtdb data files are present.
Expand Down Expand Up @@ -91,8 +92,8 @@ if __name__ == '__main__':
mutex_group.add_argument('--ar122_ms', action='store_true', help='use 122 archaeal marker genes')

required_denovo_wf = denovo_wf_parser.add_argument_group('required named arguments')
#required_denovo_wf.add_argument('--outgroup_taxon', required=True,
# help="taxon to use as outgroup (e.g., p__Patescibacteria)")
required_denovo_wf.add_argument('--outgroup_taxon', required=True,
help="taxon to use as outgroup (e.g., p__Patescibacteria)")
required_denovo_wf.add_argument('--out_dir', required=True,
help="directory to output files")

Expand Down Expand Up @@ -120,8 +121,9 @@ if __name__ == '__main__':
help='maximum percentage of the same amino acid required to retain column (exclusive bound)')
optional_denovo_wf.add_argument('--min_perc_taxa', type=float, default=50,
help='minimum percentage of taxa required to retain column (inclusive bound)')


optional_denovo_wf.add_argument('--rnd_seed', type=int, default=None,
help='random seed to use for selecting columns.')

optional_denovo_wf.add_argument('--prot_model', choices=['JTT', 'WAG', 'LG'],
help='protein substitution model for tree inference', default='WAG')
optional_denovo_wf.add_argument('--no_support', action="store_true",
Expand Down Expand Up @@ -161,6 +163,7 @@ if __name__ == '__main__':
help='desired prefix for output files')
optional_classify_wf.add_argument('--cpus', default=1, type=int,
help='number of CPUs to use')
optional_classify_wf.add_argument('--scratch_dir', help='Reduce memory usage by writing to disk (slower).')
optional_classify_wf.add_argument('--debug', action="store_true",
help='create intermediate files for debugging purposes.')
optional_classify_wf.add_argument('-h', '--help', action="help",
Expand Down Expand Up @@ -224,6 +227,9 @@ if __name__ == '__main__':
help='maximum percentage of the same amino acid required to retain column (exclusive bound)')
optional_align.add_argument('--min_perc_taxa', type=float, default=50,
help='minimum percentage of taxa required to retain column (inclusive bound)')
optional_align.add_argument('--rnd_seed', type=int, default=None,
help='random seed to use for selecting columns.')

optional_align.add_argument('--prefix', required=False, default='gtdbtk',
help='desired prefix for output files')
optional_align.add_argument('--cpus', default=1, type=int,
Expand Down Expand Up @@ -281,6 +287,7 @@ if __name__ == '__main__':
help='desired prefix for output files')
optional_classify.add_argument('--cpus', default=1, type=int,
help='number of CPUs to use')
optional_classify.add_argument('--scratch_dir', help='Reduce memory usage by writing to disk (slower).')
optional_classify.add_argument('--debug', action="store_true",
help='create intermediate files for debugging purposes.')
optional_classify.add_argument('-h', '--help', action="help",
Expand Down Expand Up @@ -318,6 +325,19 @@ if __name__ == '__main__':
optional_decorate.add_argument('-h', '--help', action="help",
help="show help message")

# test
test_parser = subparsers.add_parser('test', conflict_handler='resolve',
formatter_class=CustomHelpFormatter,
help='Test the classify_wf pipeline with 3 archaeal genomes.')
required_test = test_parser.add_argument_group('required named arguments')
required_test.add_argument('--out_dir', required=True,
help='directory to output files')
optional_test = test_parser.add_argument_group('optional arguments')
optional_test.add_argument('--cpus', default=1, type=int,
help='number of CPUs to use')
optional_test.add_argument('-h', '--help', action="help",
help="show help message")

# trim MSA
msa_parser = subparsers.add_parser('trim_msa', conflict_handler='resolve',
formatter_class=CustomHelpFormatter,
Expand Down
10 changes: 10 additions & 0 deletions gtdbtk/VERSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
0.2.1
- remove Perl dependencies
- remove Biolib,mpdl3,jinja dependencies
- add support for writing to a scratch file using pplacer (--mmap-file options)
- add random seed to trim MSA step to allow for reproducible results.
- use GTDBTK_DATA_PATH variable to set data directory.
- species classification based on ANI radii.
- new columns ( aa_percent,red_values,fastani_reference_radius,warnings )
- bug fixing
- gtdbtk test options
0.1.6
- align step in classify_wf and de_novo_wf function has been fixed.
- improve summary file output.
Expand Down
Empty file added gtdbtk/biolib_lite/__init__.py
Empty file.
103 changes: 103 additions & 0 deletions gtdbtk/biolib_lite/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
###############################################################################
# #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
# #
###############################################################################

__author__ = 'Donovan Parks'
__copyright__ = 'Copyright 2014'
__credits__ = ['Donovan Parks']
__license__ = 'GPL3'
__maintainer__ = 'Donovan Parks'
__email__ = 'donovan.parks@gmail.com'

import os
import errno
import sys
import logging
import ntpath
import re
import gzip


def is_float(s):
"""Check if a string can be converted to a float.
Parameters
----------
s : str
String to evaluate.
Returns
-------
boolean
True if string can be converted, else False.
"""

try:
float(s)
except ValueError:
return False

return True


def check_file_exists(input_file):
"""Check if file exists."""
if not os.path.exists(input_file) or not os.path.isfile(input_file):
logger = logging.getLogger('timestamp')
logger.error('Input file does not exists: ' + input_file + '\n')
sys.exit()


def check_dir_exists(input_dir):
"""Check if directory exists."""
if not os.path.exists(input_dir) or not os.path.isdir(input_dir):
logger = logging.getLogger('timestamp')
logger.error('Input directory does not exists: ' + input_dir + '\n')
sys.exit()


def make_sure_path_exists(path):
"""Create directory if it does not exist."""

if not path:
# lack of a path qualifier is acceptable as this
# simply specifies the current directory
return

try:
os.makedirs(path)
except OSError as exception:
if exception.errno != errno.EEXIST:
logger = logging.getLogger('timestamp')
logger.error('Specified path could not be created: ' + path + '\n')
sys.exit()


def remove_extension(filename, extension=None):
"""Remove extension from filename.
A specific extension can be specified, otherwise
the extension is taken as all characters after the
last period.
"""
f = ntpath.basename(filename)

if extension and f.endswith(extension):
f = f[0:f.rfind(extension)]
else:
f = os.path.splitext(f)[0]

if f[-1] == '.':
f = f[0:-1]

return f
102 changes: 102 additions & 0 deletions gtdbtk/biolib_lite/custom_help_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
###############################################################################
# #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
# #
###############################################################################

__author__ = 'Donovan Parks'
__copyright__ = 'Copyright 2014'
__credits__ = ['Donovan Parks', 'Connor Skennerton']
__license__ = 'GPL3'
__maintainer__ = 'Donovan Parks'
__email__ = 'donovan.parks@gmail.com'

import os
import types
import tempfile
import argparse


class ChangeTempAction(argparse.Action):
"""Action for changing the directory used for temporary files.
Example:
<parse>.add_argument('--tmpdir', action=ChangeTempAction, default=tempfile.gettempdir(), help="specify alternative directory for temporary files")
"""

def __call__(self, parser, namespace, values, option_string=None):
if os.path.isdir(values):
tempfile.tempdir = values
setattr(namespace, self.dest, values)
else:
raise argparse.ArgumentTypeError(
'The value of %s must be a valid directory' % option_string)


class CustomHelpFormatter(argparse.HelpFormatter):
"""Provide a customized format for help output.
http://stackoverflow.com/questions/9642692/argparse-help-without-duplicate-allcaps
"""

def _get_help_string(self, action):
"""Place default value in help string."""
h = action.help
if '%(default)' not in action.help:
if action.default != '' and action.default != [] and action.default is not None and type(action.default) != types.BooleanType:
if action.default is not argparse.SUPPRESS:
defaulting_nargs = [
argparse.OPTIONAL, argparse.ZERO_OR_MORE]

if action.option_strings or action.nargs in defaulting_nargs:
if '\n' in h:
lines = h.splitlines()
lines[0] += ' (default: %(default)s)'
h = '\n'.join(lines)
else:
h += ' (default: %(default)s)'
return h

def _format_action_invocation(self, action):
"""Removes duplicate ALLCAPS with positional arguments."""
if not action.option_strings:
default = self._get_default_metavar_for_positional(action)
metavar, = self._metavar_formatter(action, default)(1)
return metavar

else:
parts = []

# if the Optional doesn't take a value, format is:
# -s, --long
if action.nargs == 0:
parts.extend(action.option_strings)

# if the Optional takes a value, format is:
# -s ARGS, --long ARGS
else:
default = self._get_default_metavar_for_optional(action)
args_string = self._format_args(action, default)
for option_string in action.option_strings:
parts.append(option_string)

return '%s %s' % (', '.join(parts), args_string)

return ', '.join(parts)

def _get_default_metavar_for_optional(self, action):
return action.dest.upper()

def _get_default_metavar_for_positional(self, action):
return action.dest
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,14 @@
# #
###############################################################################

DEFAULT_DOMAIN_THRESHOLD = 10.0
__author__ = 'Donovan Parks'
__copyright__ = 'Copyright 2015'
__credits__ = ['Donovan Parks']
__license__ = 'GPL3'
__maintainer__ = 'Donovan Parks'
__email__ = 'donovan.parks@gmail.com'


class BioLibError(Exception):
def __init__(self, msg):
Exception.__init__(self, msg)
Loading

0 comments on commit 6295f03

Please sign in to comment.