Skip to content

Commit

Permalink
Deeptmhmm (#3981)
Browse files Browse the repository at this point in the history
* deeptmhmm main.nf

* deeptmhmm meta.yml

* deeptmhmm tests main

* deeptmhmm tests validation
  • Loading branch information
vagkaratzas authored Oct 13, 2023
1 parent 063f81a commit 709fdc9
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 0 deletions.
62 changes: 62 additions & 0 deletions modules/nf-core/deeptmhmm/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
process DEEPTMHMM {
tag "$meta.id"
label 'process_medium'

conda "bioconda::pybiolib=1.1.1393"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pybiolib:1.1.1393--pyhdfd78af_0':
'biocontainers/pybiolib:1.1.1393--pyhdfd78af_0' }"

input:
tuple val(meta), path(fasta)

output:
tuple val(meta), path("biolib_results/TMRs.gff3") , emit: gff3
tuple val(meta), path("biolib_results/predicted_topologies.3line"), emit: line3
tuple val(meta), path("biolib_results/deeptmhmm_results.md") , emit: md
tuple val(meta), path("biolib_results/*_probs.csv") , optional: true, emit: csv
tuple val(meta), path("biolib_results/plot.png") , optional: true, emit: png
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def is_compressed = fasta.name.endsWith(".gz")
def fasta_name = fasta.name.replace(".gz", "")

"""
if [ "$is_compressed" == "true" ]; then
gzip -c -d $fasta > $fasta_name
fi
biolib \\
run \\
DTU/DeepTMHMM \\
--fasta ${fasta_name} \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
biolib: \$(echo \$(biolib --version) | sed -n 's/.*version \\([0-9.]*\\).*/\\1/p' )
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''

"""
mkdir biolib_results
touch biolib_results/TMRs.gff3
touch biolib_results/predicted_topologies.3line
touch biolib_results/deeptmhmm_results.md
touch biolib_results/MX_probs.csv
touch biolib_results/plot.png
cat <<-END_VERSIONS > versions.yml
"${task.process}":
biolib: \$(echo \$(biolib --version) | sed -n 's/.*version \\([0-9.]*\\).*/\\1/p' )
END_VERSIONS
"""
}
67 changes: 67 additions & 0 deletions modules/nf-core/deeptmhmm/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
name: deeptmhmm
description: A Deep Learning Model for Transmembrane Topology Prediction and Classification
keywords:
- transmembrane
- protein
- classification
tools:
- deeptmhmm:
description: Deep Learning model for Transmembrane Helices protein domain prediction through the BioLib Python Client
homepage: https://dtu.biolib.com/DeepTMHMM
documentation: https://dtu.biolib.com/DeepTMHMM
doi: 10.1101/2022.04.08.487609
licence: ["MIT"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Database of sequences in FASTA format
pattern: "*.{fasta,fa,fasta.gz,fa.gz}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- gff3:
type: file
description: Predicted topologies (inside, outside, TMhelix) in general Feature Format Version 3
pattern: "biolib_results/TMRs.gff3"

- line3:
type: file
description: Predicted topologies and information of protein sequences in three lines (name, sequence, topology)
pattern: "biolib_results/predicted_topologies.3line"

- md:
type: file
description: Markdown results file
pattern: "biolib_results/deeptmhmm_results.md"

- csv:
type: file
description: CSV file with per-residue predictions for the likelihood of each amino acid being in structural regions such as Beta-sheet, Periplasm, Membrane, Inside, Outside or Signal (only when querying against genomic fasta)
pattern: "biolib_results/*_probs.csv"

- png:
type: file
description: Most likely topology probability line plots (only when querying against genomic fasta)
pattern: "biolib_results/plot.png"

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@vagkaratzas"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,10 @@ deepcell/mesmer:
- modules/nf-core/deepcell/mesmer/**
- tests/modules/nf-core/deepcell/mesmer/**

deeptmhmm:
- modules/nf-core/deeptmhmm/**
- tests/modules/nf-core/deeptmhmm/**

deeptools/bamcoverage:
- modules/nf-core/deeptools/bamcoverage/**
- tests/modules/nf-core/deeptools/bamcoverage/**
Expand Down
19 changes: 19 additions & 0 deletions tests/modules/nf-core/deeptmhmm/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { DEEPTMHMM } from '../../../../modules/nf-core/deeptmhmm/main.nf'

workflow test_deeptmhmm {

fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]

DEEPTMHMM ( [ [id:'test'], fasta ] )
}

workflow test_deeptmhmm_gz {

fasta_gz = [ file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) ]

DEEPTMHMM ( [ [id:'test'], fasta_gz ] )
}
5 changes: 5 additions & 0 deletions tests/modules/nf-core/deeptmhmm/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
29 changes: 29 additions & 0 deletions tests/modules/nf-core/deeptmhmm/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
- name: deeptmhmm test_deeptmhmm
command: nextflow run ./tests/modules/nf-core/deeptmhmm -entry test_deeptmhmm -c ./tests/config/nextflow.config
tags:
- deeptmhmm
files:
- path: output/deeptmhmm/biolib_results/TMRs.gff3
md5sum: 563b7cf9659f955c3353ab7ba2ed4c41
- path: output/deeptmhmm/biolib_results/deeptmhmm_results.md
md5sum: 14867a44faf50cab3f56874b47ff21ec
- path: output/deeptmhmm/biolib_results/predicted_topologies.3line
md5sum: 50c853fd0dc0f28aff0bdb6d35d08ca2
- path: output/deeptmhmm/versions.yml

- name: deeptmhmm test_deeptmhmm_gz
command: nextflow run ./tests/modules/nf-core/deeptmhmm -entry test_deeptmhmm_gz -c ./tests/config/nextflow.config
tags:
- deeptmhmm
files:
- path: output/deeptmhmm/biolib_results/MT192765.1_probs.csv
md5sum: ac50b0378d09761e9c2931073ee92436
- path: output/deeptmhmm/biolib_results/TMRs.gff3
md5sum: f338e8a8280d62112ed6d84087dc9f82
- path: output/deeptmhmm/biolib_results/deeptmhmm_results.md
md5sum: 2873f7df7582bef408ca8e51f5119dd9
- path: output/deeptmhmm/biolib_results/plot.png
md5sum: 5e9e47bfc5c07b81c1056293d3329920
- path: output/deeptmhmm/biolib_results/predicted_topologies.3line
md5sum: 95b7fedc0a110dd20ab3e265727c2c62
- path: output/deeptmhmm/versions.yml

0 comments on commit 709fdc9

Please sign in to comment.