Skip to content

Commit

Permalink
Merge pull request #386 from nf-core/trim3p_nextflex
Browse files Browse the repository at this point in the history
  • Loading branch information
apeltzer authored Sep 7, 2024
2 parents 874ef64 + 7a15447 commit f7cd5ba
Show file tree
Hide file tree
Showing 8 changed files with 327 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [[#382]](https://github.com/nf-core/smrnaseq/pull/382) - Add [collapse_mirtop.R](https://github.com/nf-core/smrnaseq/issues/174) - Add nf-tests for local modules using custom R scripts.
- [[#383]](https://github.com/nf-core/smrnaseq/pull/383) - Fix [parameter `--skip_fastp` throws an error](https://github.com/nf-core/smrnaseq/issues/263) - Fix parameter --skip_fastp.
- [[#384]](https://github.com/nf-core/smrnaseq/pull/384) - Fix [filter status bug fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix filter stats module and add filter contaminants test profile.
- [[#386]](https://github.com/nf-core/smrnaseq/pull/386) - Fix [Nextflex trimming support](https://github.com/nf-core/smrnaseq/issues/365) - Fix Nextflex trimming support.
- [[#387]](https://github.com/nf-core/smrnaseq/pull/387) - Add [contaminant filter failure because the Docker image for BLAT cannot be pulled](https://github.com/nf-core/smrnaseq/issues/354) - Add nf-test to local module `blat_mirna` and fixes . Adds a small test profile to test contaminant filter results.
- [[#388]](https://github.com/nf-core/smrnaseq/pull/388) - Fix [igenomes fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix workflow scripts so that they can use igenome parameters.
- [[#391]](https://github.com/nf-core/smrnaseq/pull/391) - Fix [error because of large chromosomes](https://github.com/nf-core/smrnaseq/issues/132) - Change `.bai` index for `.csi` index in `samtools_index` to fix .
Expand Down
26 changes: 25 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ process {
ext.args = [ "",
params.trim_fastq ? "" : "--disable_adapter_trimming",
params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1.
params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed.
params.fastp_min_length > 0 ? "-l ${params.fastp_min_length}" : "",
params.fastp_max_length > 0 ? "--max_len1 ${params.fastp_max_length}" : "",
params.three_prime_adapter == "auto-detect" ? "" : "--adapter_sequence ${params.three_prime_adapter}"
Expand Down Expand Up @@ -79,6 +78,31 @@ process {
]
]
}
//
// FASTQ_FASTQC_UMITOOLS_FASTP
//
withName: '.*:FASTP3' {
ext.prefix = { "${meta.id}.fastp3" }
ext.args = [ "",
"--disable_adapter_trimming",
"--disable_quality_filtering",
params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed.
params.fastp_min_length > 0 ? "-l ${params.fastp_min_length}" : "",
params.fastp_max_length > 0 ? "--max_len1 ${params.fastp_max_length}" : "",
].join(" ").trim()
publishDir = [
[
path: { "${params.outdir}/fastp/on_raw" },
mode: params.publish_dir_mode,
pattern: "*.{json,html}"
],
[
path: { "${params.outdir}/fastp/on_raw/log" },
mode: params.publish_dir_mode,
pattern: "*.log"
]
]
}
withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' {
//the prefix is required for multiqc to pickup the files separately from the other fastqc instances
ext.prefix = { "${meta.id}.raw" }
Expand Down
36 changes: 36 additions & 0 deletions conf/test_nextflex.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
========================================================================================
Nextflow config file for running minimal tests
========================================================================================
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/smrnaseq -profile test_nextflex,<docker/singularity>
----------------------------------------------------------------------------------------
*/
// This test profile tests nextflex without genome

params {
config_profile_name = 'Nextflex Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/smrnaseq/samplesheet/v2.0/samplesheet_test_nextflex.csv'
mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/mature.fa'
hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hairpin.fa'
mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hsa.gff3'
mirtrace_species = 'hsa'

skip_mirdeep = true

}

// Include nextflex config to run test without additional profiles

includeConfig 'protocol_nextflex.config'
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ profiles {
test_contamination { includeConfig 'conf/test_contamination.config' }
test_contamination_tech_reps { includeConfig 'conf/test_contamination_tech_reps.config' }
test_skipfastp { includeConfig 'conf/test_skipfastp.config' }
test_nextflex { includeConfig 'conf/test_nextflex.config' }


//Protocol specific profiles
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/prepare_genome/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ workflow PREPARE_GENOME {

ch_mirtrace_species = val_mirtrace_species ? Channel.value(val_mirtrace_species) : Channel.empty()
mirna_gtf_from_species = val_mirtrace_species ? (val_mirtrace_species == 'hsa' ? "https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hsa.gff3" : "https://mirbase.org/download/CURRENT/genomes/${val_mirtrace_species}.gff3") : false
ch_mirna_gtf = val_mirna_gtf ? Channel.empty() : ( mirna_gtf_from_species ? Channel.fromPath(mirna_gtf_from_species, checkIfExists: true).collect() : Channel.empty() ) //TODO for ch_mirna_gtf, shouldn't it try to build a channel.fromPath with params.mirna_gtf, if true? (instead of setting it to empty). Is this parameter used for non mirgenedb runs?
ch_mirna_gtf = val_mirna_gtf ? Channel.fromPath(val_mirna_gtf, checkIfExists: true) : ( mirna_gtf_from_species ? Channel.fromPath(mirna_gtf_from_species, checkIfExists: true).collect() : Channel.empty() )
ch_mirna_adapters = params.with_umi ? [] : Channel.fromPath(val_fastp_known_mirna_adapters, checkIfExists: true).collect()

ch_rrna = val_rrna ? Channel.fromPath(val_rrna) : Channel.empty()
Expand Down
122 changes: 122 additions & 0 deletions tests/test_nextflex.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@

nextflow_pipeline {

name "Test Workflow main.nf - test_nextflex"
script "main.nf"
profile "test_nextflex"
tag "test_nextflex"
tag "pipeline"

test("test_nextflex") {

when {
params {
outdir = "$outputDir"
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") },
{ assert workflow.trace.succeeded().size() == 65 },

{ assert snapshot(
path("$outputDir/mirna_quant/bam/mature/sample2_mature.sorted.idxstats"),
path("$outputDir/mirna_quant/bam/mature/sample1_mature.sorted.flagstat"),
path("$outputDir/mirna_quant/bam/mature/sample2_mature.sorted.stats"),
path("$outputDir/mirna_quant/bam/mature/sample3_mature.sorted.stats"),
path("$outputDir/mirna_quant/bam/mature/sample3_mature.sorted.flagstat"),
path("$outputDir/mirna_quant/bam/mature/sample3_mature.sorted.idxstats"),
path("$outputDir/mirna_quant/bam/mature/sample1_mature.sorted.stats"),
path("$outputDir/mirna_quant/bam/mature/sample2_mature.sorted.flagstat"),
path("$outputDir/mirna_quant/bam/mature/sample1_mature.sorted.idxstats"),
path("$outputDir/mirna_quant/bam/hairpin/sample3_mature_hairpin.sorted.stats"),
path("$outputDir/mirna_quant/bam/hairpin/sample2_mature_hairpin.sorted.flagstat"),
path("$outputDir/mirna_quant/bam/hairpin/sample1_mature_hairpin.sorted.flagstat"),
path("$outputDir/mirna_quant/bam/hairpin/sample1_mature_hairpin.sorted.idxstats"),
path("$outputDir/mirna_quant/bam/hairpin/sample3_mature_hairpin.sorted.flagstat"),
path("$outputDir/mirna_quant/bam/hairpin/sample2_mature_hairpin.sorted.stats"),
path("$outputDir/mirna_quant/bam/hairpin/sample1_mature_hairpin.sorted.stats"),
path("$outputDir/mirna_quant/bam/hairpin/sample3_mature_hairpin.sorted.idxstats"),
path("$outputDir/mirna_quant/bam/hairpin/sample2_mature_hairpin.sorted.idxstats")
).match("mirna_quant_bam") },

{ assert snapshot(
path("$outputDir/mirna_quant/edger_qc/hairpin_counts.csv").exists(),
path("$outputDir/mirna_quant/edger_qc/hairpin_edgeR_MDS_plot_coordinates.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/hairpin_edgeR_MDS_distance_matrix.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/hairpin_log2CPM_sample_distances.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/hairpin_logtpm.csv").exists(),
path("$outputDir/mirna_quant/edger_qc/hairpin_logtpm.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/hairpin_normalized_CPM.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/mature_edgeR_MDS_plot_coordinates.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/mature_edgeR_MDS_distance_matrix.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/mature_counts.csv").exists(),
path("$outputDir/mirna_quant/edger_qc/mature_log2CPM_sample_distances.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/hairpin_unmapped_read_counts.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/mature_normalized_CPM.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/mature_logtpm.csv").exists(),
path("$outputDir/mirna_quant/edger_qc/mature_logtpm.txt").exists(),
path("$outputDir/mirna_quant/edger_qc/mature_unmapped_read_counts.txt").exists()
).match("mirna_quant_edger_qc") },

{ assert snapshot(
path("$outputDir/mirtrace/mirtrace-report.html").exists(),
path("$outputDir/mirtrace/mirtrace-stats-contamination_basic.tsv"),
path("$outputDir/mirtrace/mirtrace-stats-mirna-complexity.tsv"),
path("$outputDir/mirtrace/mirtrace-stats-phred.tsv"),
path("$outputDir/mirtrace/mirtrace-stats-length.tsv"),
path("$outputDir/mirtrace/mirtrace-stats-contamination_detailed.tsv"),
path("$outputDir/mirtrace/mirtrace-stats-qcstatus.tsv"),
path("$outputDir/mirtrace/mirtrace-stats-rnatype.tsv")
).match("mirtrace") },

{ assert snapshot(
path("$outputDir/multiqc/multiqc_data/fastqc-status-check-heatmap.txt"),
path("$outputDir/multiqc/multiqc_data/fastp_filtered_reads_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_overrepresented_sequences_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table-1.txt").exists(),
path("$outputDir/multiqc/multiqc_data/fastqc_sequence_counts_plot-1.txt"),
path("$outputDir/multiqc/multiqc_data/mirtrace_complexity_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot-1_Percentages.txt"),
path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"),
path("$outputDir/multiqc/multiqc_data/samtools-stats-dp.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_Before_filtering.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot-1.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot-1.txt"),
path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_per_base_n_content_plot-1.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_After_filtering.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt"),
path("$outputDir/multiqc/multiqc_data/mirtrace_qc_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot-1.txt"),
path("$outputDir/multiqc/multiqc_data/mirtrace_length_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt").exists(),
path("$outputDir/multiqc/multiqc_data/fastqc-status-check-heatmap-1.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_sequence_counts_plot.txt"),
path("$outputDir/multiqc/multiqc_data/mirtrace_rna_categories_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_Before_filtering.txt"),
path("$outputDir/multiqc/multiqc_data/samtools_alignment_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_After_filtering.txt"),
path("$outputDir/multiqc/multiqc_data/mirtrace_contamination_check_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_adapter_content_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot-1_Counts.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_adapter_content_plot-1.txt"),
path("$outputDir/multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_Before_filtering.txt"),
path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt"),
path("$outputDir/multiqc/multiqc_data/multiqc_sources.txt").exists(),
path("$outputDir/multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt")
).match("multiqc_multiqc_data") },

)
}

}

}
Loading

0 comments on commit f7cd5ba

Please sign in to comment.