-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathretaxdump
executable file
·140 lines (129 loc) · 4.52 KB
/
retaxdump
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python3
#
# Copyright (C) 2017–2024, Jose Manuel Martí Martínez
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
"""
Get needed taxdump files from NCBI.
"""
import argparse
import sys
import time
import urllib.request
from ftplib import FTP, error_temp, error_proto, error_reply
from zipfile import ZipFile
from recentrifuge import __version__, __author__, __date__
from recentrifuge.config import gray, red, green, yellow
from recentrifuge.config import LICENSE, NODES_FILE, NAMES_FILE, ZIPFILE
from recentrifuge.config import TAXDUMP_PATH
RETRY_TIMES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 30, 60, 120, 300]
EXCEPTS = (OSError, EOFError, error_temp, error_proto, error_reply)
FTP_SERVER = 'ftp.ncbi.nlm.nih.gov'
PATH_NCBI = '/pub/taxonomy/'
CONN_TIMEOUT = 30
BUFF_SIZE = 2**20
def main():
"""Main entry point to script."""
# Argument Parser Configuration
parser = argparse.ArgumentParser(
description='Get needed taxdump files from NCBI servers',
epilog=f'%(prog)s - Release {__version__} - {__date__}' + LICENSE,
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
'-V', '--version',
action='version',
version=f'%(prog)s release {__version__} ({__date__})'
)
parser.add_argument(
'-n', '--nodespath',
action='store',
metavar='PATH',
default=TAXDUMP_PATH,
help=('path for the nodes information files (nodes.dmp and names.dmp' +
' from NCBI)')
)
# Parse arguments
args = parser.parse_args()
# Program header
print(f'\n=-= {sys.argv[0]} =-= v{__version__} - {__date__}'
f' =-= by {__author__} =-=\n')
sys.stdout.flush()
# Load NCBI nodes, names and build children
print(f'\033[90mDownloading {ZIPFILE} from NCBI FTP...\033[0m', end='')
sys.stdout.flush()
ftp = None
error = None
for retry_time in RETRY_TIMES:
if retry_time:
print(gray(' Retrying in %s seconds...' % retry_time),
end='', flush=True)
time.sleep(retry_time)
try:
ftp = FTP(FTP_SERVER, timeout=30)
ftp.login()
ftp.cwd(PATH_NCBI)
ftp.retrbinary('RETR ' + ZIPFILE, open(ZIPFILE, 'wb').write)
except EXCEPTS as err:
print(yellow(' PROBLEM!'))
error = err
print(gray(' Retrying with HTTPS...'), end='', flush=True)
try:
url = 'https://' + FTP_SERVER + PATH_NCBI + ZIPFILE
https = urllib.request.urlopen(url, timeout=30)
zipfile = open(ZIPFILE, 'wb')
while True:
buff = https.read(BUFF_SIZE)
if not buff:
break
zipfile.write(buff)
except EXCEPTS as err:
print(yellow(' PROBLEM!'))
error = err
except KeyboardInterrupt:
print(gray(' User'), yellow('interrupted!'))
exit(9)
else:
break
except KeyboardInterrupt:
print(gray(' User'), yellow('interrupted!'))
exit(9)
else:
break
else: # Too many problems, quit
print(red(' FAILED!'),
gray('Exceeded number of attempts!'))
print(gray('Error message:'), error)
exit(5)
print(green(' OK!'))
if ftp:
try:
ftp.quit()
except EXCEPTS:
try:
ftp.close()
except EXCEPTS:
pass
filezip = ZipFile(ZIPFILE)
for filename in [NODES_FILE, NAMES_FILE]:
print(gray(f'Extracting {filename}... '), end='')
try:
filezip.extract(filename, path=args.nodespath)
except KeyError:
print(red('ERROR!'))
else:
print(green('OK!'))
if __name__ == '__main__':
main()