Skip to content

Commit

Permalink
Add option to download VFDB in db_setup workflow.
Browse files Browse the repository at this point in the history
  • Loading branch information
njohner committed Feb 9, 2024
1 parent 9f25f49 commit 7f97dcc
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 0 deletions.
5 changes: 5 additions & 0 deletions bin/zdb
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ elif [[ "$1" == "setup" ]]; then
db_setup_args="${db_setup_args} --pfam"
shift
;;
--vfdb)
db_setup_args="${db_setup_args} --vfdb"
shift
;;
--singularity_dir=*)
singularity_dir=${i#*=}
shift
Expand All @@ -135,6 +139,7 @@ elif [[ "$1" == "setup" ]]; then
echo " --ko: downloads and setups the hmm profiles of the ko database"
echo " --pfam: downloads and setups up the hmm profiles of the PFAM protein domains"
echo " --swissprot: downloads and indexes the swissprot database "
echo " --vfdb: downloads the virulence factor database "
echo ""
echo "Other parameters:"
echo " --dir: directory where to store the reference databases (defaults zdb_ref in the current directory)"
Expand Down
25 changes: 25 additions & 0 deletions db_setup.nf
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,24 @@ process prepare_swissprot {
"""
}

process download_vfdb {

publishDir "$params.vf_db", mode: "move"

output:
tuple path("vfdb.fasta"), path("VFs.xls")

script:
"""
wget http://www.mgc.ac.cn/VFs/Down/VFDB_setB_nt.fas.gz
wget http://www.mgc.ac.cn/VFs/Down/VFs.xls.gz
gunzip < VFDB_setB_nt.fas.gz > vfdb.fasta
gunzip < VFs.xls.gz > VFs.xls
rm VFDB_setB_nt.fas.gz
rm VFs.xls.gz
"""
}

workflow setup_cogg_db {
download_cog_cdd() | setup_cog_cdd
}
Expand All @@ -174,6 +192,10 @@ workflow setup_swissprot_db {
download_swissprot() | prepare_swissprot
}

workflow setup_vfdb {
download_vfdb()
}

workflow {
if( params.cog )
setup_cogg_db()
Expand All @@ -189,4 +211,7 @@ workflow {

if ( params.blast_swissprot )
setup_swissprot_db()

if ( params.vfdb )
setup_vfdb()
}
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ params.pfam_db = "${params.base_db}/pfam/"
params.cog_db = "${params.base_db}/cog/"
params.ko_db = "${params.base_db}/kegg/"
params.refseq_db = "${params.base_db}/refseq/"
params.vf_db = "${params.base_db}/vfdb/"

params.results_dir = "zdb/results"

Expand All @@ -22,6 +23,7 @@ params.cog = false
params.ko = false
params.pfam = false
params.amr = false
params.vfdb = false

params.checkm_args = "domain Bacteria"

Expand Down
17 changes: 17 additions & 0 deletions testing/pipelines/test_db_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,20 @@ def test_creating_swissprot_db(self):
self.assertItemsEqual(
expected_files,
os.listdir(os.path.join(self.ref_db_dir, "uniprot", "swissprot")))

def test_creating_vf_db(self):
self.nf_params["vfdb"] = "true"
execution = self.execute_pipeline()
self.assert_success(execution)

self.assertEqual([proc.name for proc in execution.process_executions],
["setup_vfdb:download_vfdb"])

download_process = execution.process_executions[0]
# Files are moved to zdb_ref/vfdb
expected_files = ['vfdb.fasta',
'VFs.xls']
self.assert_created_files(download_process, [])
self.assertItemsEqual(
expected_files,
os.listdir(os.path.join(self.ref_db_dir, "vfdb")))

0 comments on commit 7f97dcc

Please sign in to comment.