-
Notifications
You must be signed in to change notification settings - Fork 0
/
alwork.py
112 lines (88 loc) · 5.69 KB
/
alwork.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
-----------------------------------------------------------------------------------
Script alwork.py
-----------------------------------------------------------------------------------
This program is necessary for various manipulations with alignments in fasta format.
Available commands: convert - converts aligned sequences to consensus,
consensus - calculates a new consensus based on alignment of complete genomes and
shorter sequences, unite - combines alignments into a single file containing
consensus sequences.
Using convert:
python3 alwork.py convert –i align.fasta –o out_dir –p prefix
-i, --input (align.fasta) - the result of alignment of sequences in fasta format;
-o, --output (out_dir) - the name of the directory for storing the calculation
results;
-p, --prefix (prefix) - prefix file names in the results.
Using consensus:
python3 alwork.py consensus –i align.fasta –s seqs.fasta –o output –f format
-i, --input (align.fasta) - the result of alignment of sequences in fasta format;
-s, --seqs (seqs.fasta) - file with short sequences in fasta format;
-o, --output (output) - name of the output directory containing the new consensus;
Using unite:
python3 alwork.py unite –i input –o output –fl False –ig True –il 0.9
-i, --input (input) - name of the directory containing files with alignments
in fasta format;
-o, --output (output) - the name of the output file, with consensus sequences;
-fl, --flength - (optional parameter, default True) True or False, whether
the ends of sequences consisting only of gaps (gaps) will be taken into account
to calculate the occurrence of a nucleotide at a given alignment position;
-ig - (optional parameter, default False) True or False, ignoring gaps when
drawing up a consensus sequence;
-il - (optional parameter, default 0.9) number, occurrence level above which
gaps are ignored.
"""
from Clss.Controller.AlignController import AlignController
import argparse
version = "1.0.0"
def create_parser():
parser = argparse.ArgumentParser(
prog="alwork",
description="One of the parts fstage program. Serves for working with aligned sequences",
epilog="(c) Maximato 2019."
)
parser.add_argument("--version", action='version', help="print version of program",
version='%(prog)s {}'.format(version))
subparsers = parser.add_subparsers(dest="command", description="Commands for first argument")
# 1. add parser of converting aligned sequences
convert_parser = subparsers.add_parser("convert", help="convert in different combinations aligned sequences "
"stored in fasta format into consensus",
description="Convert in different combinations aligned sequences "
"stored in fasta format into consensus")
convert_parser.add_argument("-i", "--input", help="Input align file", metavar="input")
convert_parser.add_argument("-o", "--output", help="Output directory", metavar="output")
convert_parser.add_argument("-p", "--prefix", help="Prefix for output files", metavar="prefix")
# 2. add parser of creating consensus based on aligned sequences of genome and short sequences (not complete genome)
consensus_parser = subparsers.add_parser("consensus",
help="create consensus based on aligned sequences of complete genome "
"and short sequences (not complete genome)",
description="Create consensus based on aligned sequences of complete "
"genome and short sequences (not complete genome)")
consensus_parser.add_argument("-i", "--input", help="Input align file", metavar="input")
consensus_parser.add_argument("-s", "--seqs", help="Fasta file with sequences", metavar="seqs")
consensus_parser.add_argument("-o", "--output", help="Output directory", metavar="output")
# 3. add parser of uniting aligns into one file
unite_parser = subparsers.add_parser("unite", help="unite aligns from fasta files into one as consensuses",
description="Unite aligns from fasta files into one as consensuses")
unite_parser.add_argument("-i", "--input", help="Input directory with aligns", metavar="input")
unite_parser.add_argument("-o", "--output", help="Output file with consensuses", metavar="output")
unite_parser.add_argument("-fl", "--flength",
help="Count consensus for full length of aligned sequences (fl=True) or "
"only for content (fl=False)", default=True, type=bool, metavar="flength")
unite_parser.add_argument("-ig", help="Boolean, ignore gaps in consensus", default=False, type=bool, metavar="ig")
unite_parser.add_argument("-il", help="From 0 to 1, level of ignoring gaps in consensus. If "
"confidence of gap in position more or equal ignore level, "
"gap will be ignored. Work only if ignore gaps is True",
default=0.9, type=float, metavar="il")
return parser
if __name__ == "__main__":
ac = AlignController()
parser = create_parser()
ns = parser.parse_args()
if ns.command == "convert":
ac.convert_in_all_combinations(ns.input, ns.output, ns.prefix)
elif ns.command == "consensus":
ac.consensus_with(ns.input, ns.seqs, ns.output)
elif ns.command == "unite":
ac.unite_aligns(ns.input, ns.output, ns.flength, ns.ig, ns.il)
else:
parser.print_help()