Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nf-test for local module blat_mirna #387

Merged
merged 6 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [[#382]](https://github.com/nf-core/smrnaseq/pull/382) - Add [collapse_mirtop.R](https://github.com/nf-core/smrnaseq/issues/174) - Add nf-tests for local modules using custom R scripts.
- [[#383]](https://github.com/nf-core/smrnaseq/pull/383) - Fix [parameter `--skip_fastp` throws an error](https://github.com/nf-core/smrnaseq/issues/263) - Fix parameter --skip_fastp.
- [[#384]](https://github.com/nf-core/smrnaseq/pull/384) - Fix [filter status bug fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix filter stats module and add filter contaminants test profile.
- [[#387]](https://github.com/nf-core/smrnaseq/pull/387) - Add nf-test to local module `blat_mirna` and fixes [contaminant filter failure because the Docker image for BLAT cannot be pulled](https://github.com/nf-core/smrnaseq/issues/354). Adds a small test profile to test contaminant filter results.

## v2.3.1 - 2024-04-18 - Gray Zinc Dalmation Patch

Expand Down
41 changes: 41 additions & 0 deletions conf/test_contamination.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test profile is almost identical to test_full_filter_contamination isn't it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! The input samplesheet is smaller, though

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run nf-core/smrnaseq -profile test_contamination,<docker/singularity> --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function with contamination filter'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data

input = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/samplesheet/v2.0/samplesheet.csv'
fasta = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/genome.fa'

mirtrace_species = 'hsa'
skip_mirdeep = true
save_merged = false
save_aligned_mirna_quant = false


filter_contamination = true
cdna = "https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/Homo_sapiens.GRCh37.cdna.all.fa"
ncrna = "https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/Homo_sapiens.GRCh37.ncrna.fa"
trna = "https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/hg19-tRNAs.fa"
}

// Include illumina config to run test without additional profiles

includeConfig 'protocol_illumina.config'
60 changes: 60 additions & 0 deletions modules/local/blat_mirna/blat_mirna.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
process BLAT_MIRNA {
tag "$fasta"
label 'process_medium'

conda 'bioconda::blat=36'
container 'community.wave.seqera.io/library/ucsc-blat:445--32730933d3c2c916'

input:
val db_type
path mirna
path contaminants


output:
path 'filtered.fa' , emit: filtered_set
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
if ( db_type == "cdna" )
"""
echo $db_type
awk '/^>/ { x=index(\$6, "transcript_biotype:miRNA") } { if(!x) print }' $contaminants > subset.fa
blat -out=blast8 $mirna subset.fa /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt
awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' subset.fa > filtered.fa

cat <<-END_VERSIONS > versions.yml
"${task.process}":
blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }')
END_VERSIONS
"""

else if ( db_type == "ncrna" )
"""
echo $db_type
awk '/^>/ { x=(index(\$6, "transcript_biotype:rRNA") || index(\$6, "transcript_biotype:miRNA")) } { if(!x) print }' $contaminants > subset.fa
blat -out=blast8 $mirna subset.fa /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt
awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' subset.fa > filtered.fa

cat <<-END_VERSIONS > versions.yml
"${task.process}":
blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }')
END_VERSIONS
"""

else
"""
echo $db_type
blat -out=blast8 $mirna $contaminants /dev/stdout | awk 'BEGIN{FS="\t"}{if(\$11 < 1e-5)print \$1;}' | uniq > mirnahit.txt
awk 'BEGIN { while((getline<"mirnahit.txt")>0) l[">"\$1]=1 } /^>/ {x = l[\$1]} {if(!x) print }' $contaminants > filtered.fa

cat <<-END_VERSIONS > versions.yml
"${task.process}":
blat: \$(echo \$(blat) | grep Standalone | awk '{ if (match(\$0,/[0-9]*[0-9]/,m)) print m[0] }')
END_VERSIONS
"""

}
114 changes: 114 additions & 0 deletions modules/local/blat_mirna/tests/blat_mirna.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
nextflow_process {

name "Test Process BLAT_MIRNA"
script "../blat_mirna.nf"
process "BLAT_MIRNA"
tag "modules"
tag "modules_local"
tag "blat_mirna"

test("cDNA BLAT - Human") {

when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] = "cdna"
input[1] = [file("https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hairpin.fa", checkIfExists: true)]
input[2] = [file("https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/Homo_sapiens.GRCh37.cdna.all.fa", checkIfExists: true)]
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()

with(process.out.filtered_set) {
with(get(0)) {
assert get(0).endsWith("filtered.fa")

// Check for AWK filtering of specific biotype
def lines = path(get(0)).readLines()
assert !lines.any { it.contains("transcript_biotype:miRNA") }

// Check for exclusion of miRNAs hits
assert !lines.any { it.contains("ENST00000564740.1") }
}
}
}

}

test("ncRNA BLAT - Human") {

when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] = "ncRNA"
input[1] = [file("https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hairpin.fa", checkIfExists: true)]
input[2] = [file("https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/Homo_sapiens.GRCh37.ncrna.fa", checkIfExists: true)]
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()

with(process.out.filtered_set) {
with(get(0)) {
assert get(0).endsWith("filtered.fa")

// Check for AWK filtering of specific biotype
def lines = path(get(0)).readLines()
// Lines contain transcript_biotype:rRNA or miRNA, so AWK assertions do not work:
//assert !lines.any { it.contains("transcript_biotype:rRNA") }
//assert !lines.any { it.contains("transcript_biotype:miRNA") }

// Check for exclusion of miRNAs hits
assert !lines.any { it.contains("ENST00000564740.1") }
}
}
}

}

test("tRNA BLAT - Human") {

when {
params {
outdir = "${outputDir}"
}
process {
"""
input[0] = "tRNA"
input[1] = [file("https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hairpin.fa", checkIfExists: true)]
input[2] = [file("https://huggingface.co/datasets/nf-core/smrnaseq/resolve/main/GRCh37/hg19-tRNAs.fa", checkIfExists: true)]
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()

with(process.out.filtered_set) {
with(get(0)) {
assert get(0).endsWith("filtered.fa")

// Check for exclusion of miRNAs hits
def lines = path(get(0)).readLines()
assert !lines.any { it.contains("ENST00000564740.1") }
}
}
}

}

}
71 changes: 71 additions & 0 deletions modules/local/blat_mirna/tests/blat_mirna.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"ncRNA BLAT - Human": {
"content": [
{
"0": [
"filtered.fa:md5,6bc8a430400e2e78cf7f474981230811"
],
"1": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
],
"filtered_set": [
"filtered.fa:md5,6bc8a430400e2e78cf7f474981230811"
],
"versions": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-23T17:53:35.313580289"
},
"tRNA BLAT - Human": {
"content": [
{
"0": [
"filtered.fa:md5,6b54e95ca5418d7d9c4d331ca3b2c96f"
],
"1": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
],
"filtered_set": [
"filtered.fa:md5,6b54e95ca5418d7d9c4d331ca3b2c96f"
],
"versions": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-23T17:57:58.238216453"
},
"cDNA BLAT - Human": {
"content": [
{
"0": [
"filtered.fa:md5,8fd42894e815999b4278b08297720aae"
],
"1": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
],
"filtered_set": [
"filtered.fa:md5,8fd42894e815999b4278b08297720aae"
],
"versions": [
"versions.yml:md5,e2957df2cc8f0410101564c8e65d1761"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.4"
},
"timestamp": "2024-08-23T17:53:16.735132971"
}
}
3 changes: 2 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ profiles {
test_index { includeConfig 'conf/test_index.config' }
test_technical_repeats { includeConfig 'conf/test_technical_repeats.config' }
test_mirgenedb { includeConfig 'conf/test_mirgenedb.config' }
test_skipfastp { includeConfig 'conf/test_skipfastp.config' }
test_contamination { includeConfig 'conf/test_contamination.config' }
test_skipfastp { includeConfig 'conf/test_skipfastp.config' }


//Protocol specific profiles
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/contaminant_filter.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
include { BLAT_MIRNA as BLAT_CDNA
BLAT_MIRNA as BLAT_NCRNA
BLAT_MIRNA as BLAT_PIRNA
BLAT_MIRNA as BLAT_OTHER } from '../../modules/local/blat_mirna'
BLAT_MIRNA as BLAT_OTHER } from '../../modules/local/blat_mirna/blat_mirna'

include { INDEX_CONTAMINANTS as INDEX_RRNA
INDEX_CONTAMINANTS as INDEX_TRNA
Expand Down
Loading