-
Notifications
You must be signed in to change notification settings - Fork 2
/
crude_db_harmonisation.sh
36 lines (27 loc) · 1.2 KB
/
crude_db_harmonisation.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
# set up dirs
mkdir -p dbs
# get the resfinder database (skipping pointfinder for now)
git clone https://bitbucket.org/genomicepidemiology/resfinder_db
cat resfinder_db/*.fsa > dbs/resfinder.fna
rm -rf resfinder_db
# get the NCBI databases
wget https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/AMRProt
mv AMRProt dbs/ncbi_amr.faa
# get latest CARD database
wget -O dbs/card.tar.bz2 https://card.mcmaster.ca/latest/data
tar -xvf dbs/card.tar.bz2 -C dbs
# load latest card into rgi
rgi load -i dbs/card.json
# run rgi on both of these databases
# got to do CDS as resfinder doesn't have
mkdir -p mapping
rgi main -i dbs/resfinder.fna -o mapping/resfinder_rgi -t contig -a BLAST --clean
rgi main -i dbs/ncbi_amr.faa -o mapping/ncbi_rgi -t protein -a BLAST --clean
# reconcile the databases
python reconcile.py -f dbs/resfinder.fna -r mapping/resfinder_rgi.txt -d resfinder
python reconcile.py -f dbs/ncbi_amr.faa -r mapping/ncbi_rgi.txt -d ncbi
# combine outputs
awk -F $'\t' 'NR == 1 || FNR > 1' mapping/resfinder_ARO_mapping.tsv mapping/ncbi_ARO_mapping.tsv > resfinder_ncbi_ARO_mapping.tsv
# tidy up
mv resfinder_ARO_mapping.tsv ncbi_ARO_mapping.tsv mapping