diff --git a/CHANGELOG.md b/CHANGELOG.md index b3a3d255..3cb43abe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#382]](https://github.com/nf-core/smrnaseq/pull/382) - Add [collapse_mirtop.R](https://github.com/nf-core/smrnaseq/issues/174) - Add nf-tests for local modules using custom R scripts. - [[#383]](https://github.com/nf-core/smrnaseq/pull/383) - Fix [parameter `--skip_fastp` throws an error](https://github.com/nf-core/smrnaseq/issues/263) - Fix parameter --skip_fastp. - [[#384]](https://github.com/nf-core/smrnaseq/pull/384) - Fix [filter status bug fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix filter stats module and add filter contaminants test profile. +- [[#386]](https://github.com/nf-core/smrnaseq/pull/386) - Fix [Nextflex trimming support](https://github.com/nf-core/smrnaseq/issues/365) - Fix Nextflex trimming support. - [[#387]](https://github.com/nf-core/smrnaseq/pull/387) - Add [contaminant filter failure because the Docker image for BLAT cannot be pulled](https://github.com/nf-core/smrnaseq/issues/354) - Add nf-test to local module `blat_mirna` and fixes . Adds a small test profile to test contaminant filter results. - [[#388]](https://github.com/nf-core/smrnaseq/pull/388) - Fix [igenomes fix](https://github.com/nf-core/smrnaseq/issues/360) - Fix workflow scripts so that they can use igenome parameters. - [[#391]](https://github.com/nf-core/smrnaseq/pull/391) - Fix [error because of large chromosomes](https://github.com/nf-core/smrnaseq/issues/132) - Change `.bai` index for `.csi` index in `samtools_index` to fix . diff --git a/conf/modules.config b/conf/modules.config index f6d797de..e379e211 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -49,7 +49,6 @@ process { ext.args = [ "", params.trim_fastq ? "" : "--disable_adapter_trimming", params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1. - params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed. params.fastp_min_length > 0 ? "-l ${params.fastp_min_length}" : "", params.fastp_max_length > 0 ? "--max_len1 ${params.fastp_max_length}" : "", params.three_prime_adapter == "auto-detect" ? "" : "--adapter_sequence ${params.three_prime_adapter}" @@ -79,6 +78,31 @@ process { ] ] } + // + // FASTQ_FASTQC_UMITOOLS_FASTP + // + withName: '.*:FASTP3' { + ext.prefix = { "${meta.id}.fastp3" } + ext.args = [ "", + "--disable_adapter_trimming", + "--disable_quality_filtering", + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed. + params.fastp_min_length > 0 ? "-l ${params.fastp_min_length}" : "", + params.fastp_max_length > 0 ? "--max_len1 ${params.fastp_max_length}" : "", + ].join(" ").trim() + publishDir = [ + [ + path: { "${params.outdir}/fastp/on_raw" }, + mode: params.publish_dir_mode, + pattern: "*.{json,html}" + ], + [ + path: { "${params.outdir}/fastp/on_raw/log" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ] + ] + } withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { //the prefix is required for multiqc to pickup the files separately from the other fastqc instances ext.prefix = { "${meta.id}.raw" } diff --git a/conf/test_nextflex.config b/conf/test_nextflex.config new file mode 100644 index 00000000..6967c933 --- /dev/null +++ b/conf/test_nextflex.config @@ -0,0 +1,36 @@ +/* +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/smrnaseq -profile test_nextflex, + +---------------------------------------------------------------------------------------- +*/ +// This test profile tests nextflex without genome + +params { + config_profile_name = 'Nextflex Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/smrnaseq/samplesheet/v2.0/samplesheet_test_nextflex.csv' + mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/mature.fa' + hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hairpin.fa' + mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hsa.gff3' + mirtrace_species = 'hsa' + + skip_mirdeep = true + +} + +// Include nextflex config to run test without additional profiles + +includeConfig 'protocol_nextflex.config' diff --git a/nextflow.config b/nextflow.config index bb588a50..bcdaa324 100644 --- a/nextflow.config +++ b/nextflow.config @@ -248,6 +248,7 @@ profiles { test_contamination { includeConfig 'conf/test_contamination.config' } test_contamination_tech_reps { includeConfig 'conf/test_contamination_tech_reps.config' } test_skipfastp { includeConfig 'conf/test_skipfastp.config' } + test_nextflex { includeConfig 'conf/test_nextflex.config' } //Protocol specific profiles diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 81943f7f..086c860e 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -34,7 +34,7 @@ workflow PREPARE_GENOME { ch_mirtrace_species = val_mirtrace_species ? Channel.value(val_mirtrace_species) : Channel.empty() mirna_gtf_from_species = val_mirtrace_species ? (val_mirtrace_species == 'hsa' ? "https://github.com/nf-core/test-datasets/raw/smrnaseq/miRBase/hsa.gff3" : "https://mirbase.org/download/CURRENT/genomes/${val_mirtrace_species}.gff3") : false - ch_mirna_gtf = val_mirna_gtf ? Channel.empty() : ( mirna_gtf_from_species ? Channel.fromPath(mirna_gtf_from_species, checkIfExists: true).collect() : Channel.empty() ) //TODO for ch_mirna_gtf, shouldn't it try to build a channel.fromPath with params.mirna_gtf, if true? (instead of setting it to empty). Is this parameter used for non mirgenedb runs? + ch_mirna_gtf = val_mirna_gtf ? Channel.fromPath(val_mirna_gtf, checkIfExists: true) : ( mirna_gtf_from_species ? Channel.fromPath(mirna_gtf_from_species, checkIfExists: true).collect() : Channel.empty() ) ch_mirna_adapters = params.with_umi ? [] : Channel.fromPath(val_fastp_known_mirna_adapters, checkIfExists: true).collect() ch_rrna = val_rrna ? Channel.fromPath(val_rrna) : Channel.empty() diff --git a/tests/test_nextflex.nf.test b/tests/test_nextflex.nf.test new file mode 100644 index 00000000..5d8a1371 --- /dev/null +++ b/tests/test_nextflex.nf.test @@ -0,0 +1,122 @@ + +nextflow_pipeline { + + name "Test Workflow main.nf - test_nextflex" + script "main.nf" + profile "test_nextflex" + tag "test_nextflex" + tag "pipeline" + + test("test_nextflex") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, + { assert workflow.trace.succeeded().size() == 65 }, + + { assert snapshot( + path("$outputDir/mirna_quant/bam/mature/sample2_mature.sorted.idxstats"), + path("$outputDir/mirna_quant/bam/mature/sample1_mature.sorted.flagstat"), + path("$outputDir/mirna_quant/bam/mature/sample2_mature.sorted.stats"), + path("$outputDir/mirna_quant/bam/mature/sample3_mature.sorted.stats"), + path("$outputDir/mirna_quant/bam/mature/sample3_mature.sorted.flagstat"), + path("$outputDir/mirna_quant/bam/mature/sample3_mature.sorted.idxstats"), + path("$outputDir/mirna_quant/bam/mature/sample1_mature.sorted.stats"), + path("$outputDir/mirna_quant/bam/mature/sample2_mature.sorted.flagstat"), + path("$outputDir/mirna_quant/bam/mature/sample1_mature.sorted.idxstats"), + path("$outputDir/mirna_quant/bam/hairpin/sample3_mature_hairpin.sorted.stats"), + path("$outputDir/mirna_quant/bam/hairpin/sample2_mature_hairpin.sorted.flagstat"), + path("$outputDir/mirna_quant/bam/hairpin/sample1_mature_hairpin.sorted.flagstat"), + path("$outputDir/mirna_quant/bam/hairpin/sample1_mature_hairpin.sorted.idxstats"), + path("$outputDir/mirna_quant/bam/hairpin/sample3_mature_hairpin.sorted.flagstat"), + path("$outputDir/mirna_quant/bam/hairpin/sample2_mature_hairpin.sorted.stats"), + path("$outputDir/mirna_quant/bam/hairpin/sample1_mature_hairpin.sorted.stats"), + path("$outputDir/mirna_quant/bam/hairpin/sample3_mature_hairpin.sorted.idxstats"), + path("$outputDir/mirna_quant/bam/hairpin/sample2_mature_hairpin.sorted.idxstats") + ).match("mirna_quant_bam") }, + + { assert snapshot( + path("$outputDir/mirna_quant/edger_qc/hairpin_counts.csv").exists(), + path("$outputDir/mirna_quant/edger_qc/hairpin_edgeR_MDS_plot_coordinates.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/hairpin_edgeR_MDS_distance_matrix.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/hairpin_log2CPM_sample_distances.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/hairpin_logtpm.csv").exists(), + path("$outputDir/mirna_quant/edger_qc/hairpin_logtpm.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/hairpin_normalized_CPM.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/mature_edgeR_MDS_plot_coordinates.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/mature_edgeR_MDS_distance_matrix.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/mature_counts.csv").exists(), + path("$outputDir/mirna_quant/edger_qc/mature_log2CPM_sample_distances.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/hairpin_unmapped_read_counts.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/mature_normalized_CPM.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/mature_logtpm.csv").exists(), + path("$outputDir/mirna_quant/edger_qc/mature_logtpm.txt").exists(), + path("$outputDir/mirna_quant/edger_qc/mature_unmapped_read_counts.txt").exists() + ).match("mirna_quant_edger_qc") }, + + { assert snapshot( + path("$outputDir/mirtrace/mirtrace-report.html").exists(), + path("$outputDir/mirtrace/mirtrace-stats-contamination_basic.tsv"), + path("$outputDir/mirtrace/mirtrace-stats-mirna-complexity.tsv"), + path("$outputDir/mirtrace/mirtrace-stats-phred.tsv"), + path("$outputDir/mirtrace/mirtrace-stats-length.tsv"), + path("$outputDir/mirtrace/mirtrace-stats-contamination_detailed.tsv"), + path("$outputDir/mirtrace/mirtrace-stats-qcstatus.tsv"), + path("$outputDir/mirtrace/mirtrace-stats-rnatype.tsv") + ).match("mirtrace") }, + + { assert snapshot( + path("$outputDir/multiqc/multiqc_data/fastqc-status-check-heatmap.txt"), + path("$outputDir/multiqc/multiqc_data/fastp_filtered_reads_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_overrepresented_sequences_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table-1.txt").exists(), + path("$outputDir/multiqc/multiqc_data/fastqc_sequence_counts_plot-1.txt"), + path("$outputDir/multiqc/multiqc_data/mirtrace_complexity_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot-1_Percentages.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), + path("$outputDir/multiqc/multiqc_data/samtools-stats-dp.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_Before_filtering.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot-1.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot-1.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_per_base_n_content_plot-1.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_After_filtering.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt"), + path("$outputDir/multiqc/multiqc_data/mirtrace_qc_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot-1.txt"), + path("$outputDir/multiqc/multiqc_data/mirtrace_length_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt").exists(), + path("$outputDir/multiqc/multiqc_data/fastqc-status-check-heatmap-1.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_sequence_counts_plot.txt"), + path("$outputDir/multiqc/multiqc_data/mirtrace_rna_categories_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_Before_filtering.txt"), + path("$outputDir/multiqc/multiqc_data/samtools_alignment_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_After_filtering.txt"), + path("$outputDir/multiqc/multiqc_data/mirtrace_contamination_check_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_adapter_content_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot-1_Counts.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_adapter_content_plot-1.txt"), + path("$outputDir/multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_Before_filtering.txt"), + path("$outputDir/multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_sources.txt").exists(), + path("$outputDir/multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt") + ).match("multiqc_multiqc_data") }, + + ) + } + + } + +} diff --git a/tests/test_nextflex.nf.test.snap b/tests/test_nextflex.nf.test.snap new file mode 100644 index 00000000..30262a2b --- /dev/null +++ b/tests/test_nextflex.nf.test.snap @@ -0,0 +1,129 @@ +{ + "mirtrace": { + "content": [ + true, + "mirtrace-stats-contamination_basic.tsv:md5,2cb713dcb906c5a44ab0587cef5848b5", + "mirtrace-stats-mirna-complexity.tsv:md5,50d36857f67ab0fb72e6717ffb3a7f5a", + "mirtrace-stats-phred.tsv:md5,eeacb4b6341174e87a527138936f60c5", + "mirtrace-stats-length.tsv:md5,7c5caaf25a8e7ce4d322985f3225e0cf", + "mirtrace-stats-contamination_detailed.tsv:md5,90bc8fc20fdb9e6360c347d3a741733a", + "mirtrace-stats-qcstatus.tsv:md5,c41d52e9f464f364297390267a4df426", + "mirtrace-stats-rnatype.tsv:md5,469de7788e4e95e6f89fb1ad9b7b1795" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.10.0" + }, + "timestamp": "2024-09-06T16:26:35.587317829" + }, + "software_versions": { + "content": [ + "{BOWTIE_MAP_HAIRPIN={bowtie=1.3.1, samtools=1.14}, BOWTIE_MAP_MATURE={bowtie=1.3.1, samtools=1.14}, BOWTIE_MAP_SEQCLUSTER={bowtie=1.3.1, samtools=1.14}, FASTP={fastp=0.23.4}, FASTQC_RAW={fastqc=0.12.1}, FASTQC_TRIM={fastqc=0.12.1}, FORMAT_HAIRPIN={fastx_toolkit=0.0.14}, FORMAT_MATURE={fastx_toolkit=0.0.14}, INDEX_HAIRPIN={bowtie=1.3.1}, INDEX_MATURE={bowtie=1.3.1}, MIRTOP_QUANT={mirtop=0.4.25}, MIRTRACE_RUN={mirtrace=1.0.1}, PARSE_HAIRPIN={seqkit=2.6.1}, PARSE_MATURE={seqkit=2.6.1}, SAMTOOLS_FLAGSTAT={samtools=1.2}, SAMTOOLS_IDXSTATS={samtools=1.2}, SAMTOOLS_INDEX={samtools=1.2}, SAMTOOLS_SORT={samtools=1.2}, SAMTOOLS_STATS={samtools=1.2}, SEQCLUSTER_SEQUENCES={seqcluster=1.2.9}, TABLE_MERGE={r-base=3.6.2}, Workflow={nf-core/smrnaseq=v2.3.2dev}}" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-06T21:13:17.662742597" + }, + "mirna_quant_bam": { + "content": [ + "sample2_mature.sorted.idxstats:md5,9688f02beeebf9a590dc81e49415ede9", + "sample1_mature.sorted.flagstat:md5,7d61ae305e545c7a66ef8d23a0c8be25", + "sample2_mature.sorted.stats:md5,897f3efa4ecf2757ecc489cd3860cb39", + "sample3_mature.sorted.stats:md5,f35902ad2475738e48bc7b472477ec64", + "sample3_mature.sorted.flagstat:md5,1aae00444143bce06cb0f8cf31deb8e4", + "sample3_mature.sorted.idxstats:md5,9688f02beeebf9a590dc81e49415ede9", + "sample1_mature.sorted.stats:md5,743586b54608b68ada3235f6473bc357", + "sample2_mature.sorted.flagstat:md5,1aae00444143bce06cb0f8cf31deb8e4", + "sample1_mature.sorted.idxstats:md5,6db0cfab41307285fe5c89dfe95b5d46", + "sample3_mature_hairpin.sorted.stats:md5,06eeda9757f859140ed63d94a074b376", + "sample2_mature_hairpin.sorted.flagstat:md5,4e201dd868164d0c53142888dd6ca238", + "sample1_mature_hairpin.sorted.flagstat:md5,7ed3ab444077ddf6c334845e9c4ce75e", + "sample1_mature_hairpin.sorted.idxstats:md5,7b7d142caee6cccbb6d83c8e6568a951", + "sample3_mature_hairpin.sorted.flagstat:md5,4e201dd868164d0c53142888dd6ca238", + "sample2_mature_hairpin.sorted.stats:md5,ecd5b0dc54a14dca7dcc66abfc6d3845", + "sample1_mature_hairpin.sorted.stats:md5,b5e867c324113e8ab805c27a5d4ef89a", + "sample3_mature_hairpin.sorted.idxstats:md5,8927231d0ea3100fb75a96b4e5317321", + "sample2_mature_hairpin.sorted.idxstats:md5,8927231d0ea3100fb75a96b4e5317321" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.10.0" + }, + "timestamp": "2024-09-06T16:26:35.460795492" + }, + "mirna_quant_edger_qc": { + "content": [ + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "23.10.0" + }, + "timestamp": "2024-09-06T16:26:35.55181047" + }, + "multiqc_multiqc_data": { + "content": [ + "fastqc-status-check-heatmap.txt:md5,949e6825a7ecc751aa9ba515de7dbd02", + "fastp_filtered_reads_plot.txt:md5,7d99da33e244ce77191b8bfeaec8180c", + "fastqc_overrepresented_sequences_plot.txt:md5,11b85c61ea97ca62a9e7c34fae9e575c", + true, + "fastqc_sequence_counts_plot-1.txt:md5,926304f6c18b8151a56692bb69db97e7", + "mirtrace_complexity_plot.txt:md5,b88b1ec0d2d0ee2f443b968c107dc45a", + "fastqc_per_sequence_gc_content_plot-1_Percentages.txt:md5,3f7fd27d4553da6a88f4f15dd4b6413b", + "multiqc_citations.txt:md5,3adbccd17a42d0d5d97ee7ebb476f433", + "samtools-stats-dp.txt:md5,1fa31e11ef6c82185d5c9dc2f40d61b2", + "fastqc_sequence_length_distribution_plot.txt:md5,130a5569ba830f7e7abb971d1c8da537", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,bd72bc8bfc907c6aab72f315917ab280", + "fastqc_sequence_duplication_levels_plot-1.txt:md5,a53f959bf59ad69d3bcbc53e8fe609b3", + "fastqc_per_base_sequence_quality_plot-1.txt:md5,2f85a658bcb8261328449f1642688086", + "multiqc_general_stats.txt:md5,a1b6656beb208450e6b619514545f42e", + "fastqc_per_base_n_content_plot-1.txt:md5,e3b4bb3ed98e87f2d8acb0c009485ecd", + "fastqc_per_base_n_content_plot.txt:md5,48710007bb055bae6f35f2fa8ea8f42b", + "fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,2956382a3f2e855a4dce8e8246a57add", + "fastqc_per_sequence_quality_scores_plot.txt:md5,d2339b94d6c56616b6975086bf1e23c8", + "mirtrace_qc_plot.txt:md5,4f3f1e0a2b6e5dd5f9e0a7a7aea0e529", + "fastqc_per_sequence_quality_scores_plot-1.txt:md5,28ed13d328e755aa06a0f13f87c336eb", + "mirtrace_length_plot.txt:md5,f915d98a47a66ea9602ff0758a5e462f", + true, + "fastqc-status-check-heatmap-1.txt:md5,66af5433ebb61bc68905f8219d7419ab", + "fastqc_sequence_counts_plot.txt:md5,3bfb0974e5395d2d0044586c3e4547d1", + "mirtrace_rna_categories_plot.txt:md5,e39093f9a152845b6f4f44a239d9652d", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,e5ea2bfd87e957a18fae5239137d6499", + "samtools_alignment_plot.txt:md5,73b963380d0fc2ae145c3cd39e26824a", + "fastqc_per_base_sequence_quality_plot.txt:md5,1208509fcaff06edcddc377c907dfdaf", + "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,dd53a16aebc689109fc8065d08d8a6c7", + "mirtrace_contamination_check_plot.txt:md5,88cfa15add5e8a561761ee70d8a0da12", + "fastqc_adapter_content_plot.txt:md5,8aa2cbcf256bbb89c4a1d1fd18019c9b", + "fastqc_sequence_duplication_levels_plot.txt:md5,97a930f423f2cd365c2262b0a185f68a", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,5857a8a1980816cf70b34b7b318e1482", + "fastqc_per_sequence_gc_content_plot-1_Counts.txt:md5,d3ecffd88ebbdac463e297a2b98c8b3d", + "fastqc_adapter_content_plot-1.txt:md5,245d96a402988141cbe68b60a42db535", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,9033ad6887da19d96fb9e2504d8de0a5", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,fbe1f23a76ed70b2568d553fc42adef2", + true, + "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,ed44d5035150f69bdeb7855c80271c21" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-06T21:13:17.86597983" + } +} \ No newline at end of file diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 007afb35..dc976797 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -8,6 +8,7 @@ include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' include { FASTQC } from '../modules/nf-core/fastqc/main' include { FASTP as FASTP_LENGTH_FILTER } from '../modules/nf-core/fastp' +include { FASTP as FASTP3 } from '../modules/nf-core/fastp' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { UMICOLLAPSE as UMICOLLAPSE_FASTQ } from '../modules/nf-core/umicollapse/main' include { UMITOOLS_EXTRACT } from '../modules/nf-core/umitools/extract/main' @@ -105,6 +106,17 @@ workflow NFCORE_SMRNASEQ { ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions) ch_reads_for_mirna = FASTQ_FASTQC_UMITOOLS_FASTP.out.reads + // Trim 3' end nucleotides after adapter is removed, otherwise they are not really trimmed + if (params.three_prime_clip_r1){ + FASTP3( + ch_reads_for_mirna, + [], + false, + false, + false + ) + ch_reads_for_mirna = FASTP3.out.reads + } // UMI Dedup for fastq input // This involves running on the sequencing adapter trimmed remnants of the entire reads