From bb44d5b79eed1791ebf143cd9a5818b0a18b7b25 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Thu, 28 Apr 2022 11:54:21 +0200 Subject: [PATCH 01/12] ADD MIRGENEDB COMMANDS Add parameters to allow the use of MirGeneDB as database in the smranseq pipeline. The paths to the files are still temporary. --- nextflow.config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nextflow.config b/nextflow.config index c5fa807d..a6a277c7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -26,6 +26,11 @@ params { mirtrace_protocol = 'illumina' mature = "https://mirbase.org/ftp/CURRENT/mature.fa.gz" hairpin = "https://mirbase.org/ftp/CURRENT/hairpin.fa.gz" + MirGeneDB = false + MirGeneDB_mature = "https://mirgenedb.org/fasta/ALL?mat=1" + MirGeneDB_hairpin = "https://mirgenedb.org/static/data/ALL/ALL-pre.fas" + MirGeneDB_gff = "https://mirgenedb.org/gff/ALL?sort=pos&all=1" + MirGeneDB_species = null // Trimming options clip_r1 = 0 From ed3e850d43d4529c3d7804934ecd2154f66226aa Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Thu, 28 Apr 2022 13:55:18 +0200 Subject: [PATCH 02/12] ASSIGN NEW MIRNA DB FILES Added a if statement to assign the new mature and hairpin fasta files if MirGeneDB is to be used in the analysis. --- workflows/smrnaseq.nf | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index ff8669bf..8079d4da 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -51,8 +51,13 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } -if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } +if (!params.MirGeneDB) { + if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } + if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } +} else { + if (params.MirGeneDR_mature) { reference_mature = file(params.MirGeneDB_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } + if (params.MirGeneDB_hairpin) { reference_hairpin = file(params.MirGeneDB_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } +} include { INPUT_CHECK } from '../subworkflows/local/input_check' include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' From 9aad3485f88268cee1c294441d26ae0d49443249 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Thu, 28 Apr 2022 14:15:01 +0200 Subject: [PATCH 03/12] DISTINGUISH BETWEEN SPECIES IN PARSE_FASTA_MIRNA Added the option to use the mirGeneDB species to parse the fasta files as different styles in species naming result in mis-parsed files. --- modules/local/parse_fasta_mirna.nf | 2 ++ nextflow.config | 10 +++++----- workflows/smrnaseq.nf | 8 ++++---- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf index 8b4c21f8..717bf07f 100644 --- a/modules/local/parse_fasta_mirna.nf +++ b/modules/local/parse_fasta_mirna.nf @@ -9,6 +9,8 @@ process PARSE_FASTA_MIRNA { input: path fasta + if (!params.mirGeneDB) {species = params.mirtrace_species} else {species = params.mirGeneDB_species} + output: path '*_igenome.fa', emit: parsed_fasta path "versions.yml", emit: versions diff --git a/nextflow.config b/nextflow.config index a6a277c7..7f88fc01 100644 --- a/nextflow.config +++ b/nextflow.config @@ -26,11 +26,11 @@ params { mirtrace_protocol = 'illumina' mature = "https://mirbase.org/ftp/CURRENT/mature.fa.gz" hairpin = "https://mirbase.org/ftp/CURRENT/hairpin.fa.gz" - MirGeneDB = false - MirGeneDB_mature = "https://mirgenedb.org/fasta/ALL?mat=1" - MirGeneDB_hairpin = "https://mirgenedb.org/static/data/ALL/ALL-pre.fas" - MirGeneDB_gff = "https://mirgenedb.org/gff/ALL?sort=pos&all=1" - MirGeneDB_species = null + mirGeneDB = false + mirGeneDB_mature = "https://mirgenedb.org/fasta/ALL?mat=1" + mirGeneDB_hairpin = "https://mirgenedb.org/static/data/ALL/ALL-pre.fas" + mirGeneDB_gff = "https://mirgenedb.org/gff/ALL?sort=pos&all=1" + mirGeneDB_species = null // Trimming options clip_r1 = 0 diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 8079d4da..24c786f5 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -25,7 +25,7 @@ if (!params.mirtrace_species){ } // Genome options bt_index_from_species = params.genome ? params.genomes[ params.genome ].bowtie ?: false : false -bt_index = params.bt_indices ?: bt_index_from_species +bt_index = params.bt_indices ?: bt_index_from_species mirtrace_species_from_species = params.genome ? params.genomes[ params.genome ].mirtrace_species ?: false : false mirtrace_species = params.mirtrace_species ?: mirtrace_species_from_species fasta_from_species = params.genome ? params.genomes[ params.genome ].fasta ?: false : false @@ -51,12 +51,12 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -if (!params.MirGeneDB) { +if (!params.mirGeneDB) { if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } } else { - if (params.MirGeneDR_mature) { reference_mature = file(params.MirGeneDB_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } - if (params.MirGeneDB_hairpin) { reference_hairpin = file(params.MirGeneDB_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } + if (params.mirGeneDR_mature) { reference_mature = file(params.mirGeneDB_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } + if (params.mirGeneDB_hairpin) { reference_hairpin = file(params.mirGeneDB_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } } include { INPUT_CHECK } from '../subworkflows/local/input_check' From b760c5ab56bdaf02b224df6f226a12896a913faa Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Thu, 28 Apr 2022 14:54:53 +0200 Subject: [PATCH 04/12] FIX TYPO --- workflows/smrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 24c786f5..3f52a5e5 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -55,7 +55,7 @@ if (!params.mirGeneDB) { if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } } else { - if (params.mirGeneDR_mature) { reference_mature = file(params.mirGeneDB_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } + if (params.mirGeneDB_mature) { reference_mature = file(params.mirGeneDB_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } if (params.mirGeneDB_hairpin) { reference_hairpin = file(params.mirGeneDB_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } } From 17bffbae540fdba2629c82e32386d3747ae08488 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Tue, 10 May 2022 10:17:17 +0200 Subject: [PATCH 05/12] ADD LATEST CHANGES Commit of the latest testing changes --- conf/test.config | 2 +- modules/local/parse_fasta_mirna.nf | 2 +- nextflow.config | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/conf/test.config b/conf/test.config index 36333670..78c9b953 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,7 +26,7 @@ params { mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/mature.fa' hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hairpin.fa' mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hsa.gff3' - mirtrace_species = "hsa" + mirtrace_species = "Hsa" skip_mirdeep = true } diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf index 717bf07f..a471b246 100644 --- a/modules/local/parse_fasta_mirna.nf +++ b/modules/local/parse_fasta_mirna.nf @@ -9,7 +9,7 @@ process PARSE_FASTA_MIRNA { input: path fasta - if (!params.mirGeneDB) {species = params.mirtrace_species} else {species = params.mirGeneDB_species} + //if (!params.mirGeneDB) {params.species = params.mirtrace_species} else {params.species = params.mirGeneDB_species} output: path '*_igenome.fa', emit: parsed_fasta diff --git a/nextflow.config b/nextflow.config index 7f88fc01..97190f2d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,9 +27,9 @@ params { mature = "https://mirbase.org/ftp/CURRENT/mature.fa.gz" hairpin = "https://mirbase.org/ftp/CURRENT/hairpin.fa.gz" mirGeneDB = false - mirGeneDB_mature = "https://mirgenedb.org/fasta/ALL?mat=1" - mirGeneDB_hairpin = "https://mirgenedb.org/static/data/ALL/ALL-pre.fas" - mirGeneDB_gff = "https://mirgenedb.org/gff/ALL?sort=pos&all=1" + mirGeneDB_mature = "/Users/chriskub/Downloads/ALL-mat.fas" + mirGeneDB_hairpin = "/Users/chriskub/Downloads/ALL-pre.fas" + mirGeneDB_gff = "/Users/chriskub/Downloads/ALL.gff" mirGeneDB_species = null // Trimming options From 0939a32a5a2098da5e3af1261505645b14a73053 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Wed, 11 May 2022 17:10:05 +0200 Subject: [PATCH 06/12] ADD NEW PARAMETERS AND CHECKS Enable the use of the mirGeneDB gff file in the mirtop analysis and replace the mirna_gtf file with it internally. --- conf/test.config | 2 +- workflows/smrnaseq.nf | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/conf/test.config b/conf/test.config index 78c9b953..36333670 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,7 +26,7 @@ params { mature = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/mature.fa' hairpin = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hairpin.fa' mirna_gtf = 'https://github.com/nf-core/test-datasets/raw/smrnaseq/reference/hsa.gff3' - mirtrace_species = "Hsa" + mirtrace_species = "hsa" skip_mirdeep = true } diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index 3f52a5e5..c2a7a1e6 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -55,8 +55,9 @@ if (!params.mirGeneDB) { if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } } else { - if (params.mirGeneDB_mature) { reference_mature = file(params.mirGeneDB_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } - if (params.mirGeneDB_hairpin) { reference_hairpin = file(params.mirGeneDB_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } + if (params.mirGeneDB_mature) { reference_mature = file(params.mirGeneDB_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mirGeneDB_mature}" } + if (params.mirGeneDB_hairpin) { reference_hairpin = file(params.mirGeneDB_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.mirGeneDB_hairpin}" } + if (params.mirGeneDB_gff) { mirna_gtf = file(params.mirGeneDB_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirGeneDB_gff}"} } include { INPUT_CHECK } from '../subworkflows/local/input_check' From 48fb232c2799627309f7d66e0c68bac18a0b2bf8 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Thu, 12 May 2022 09:31:28 +0200 Subject: [PATCH 07/12] ENABLE THE USE OF MIRGENEDB SPECIES Added a new variable that either takes the mirGeneDB or the mirtrace species to filter the fasta files. --- modules/local/parse_fasta_mirna.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf index a471b246..ee8a954d 100644 --- a/modules/local/parse_fasta_mirna.nf +++ b/modules/local/parse_fasta_mirna.nf @@ -9,7 +9,7 @@ process PARSE_FASTA_MIRNA { input: path fasta - //if (!params.mirGeneDB) {params.species = params.mirtrace_species} else {params.species = params.mirGeneDB_species} + if (!params.mirGeneDB) {params.filterSpecies = params.mirtrace_species} else {params.filterSpecies = params.mirGeneDB_species} output: path '*_igenome.fa', emit: parsed_fasta @@ -29,7 +29,7 @@ process PARSE_FASTA_MIRNA { # TODO perl -ane 's/[ybkmrsw]/N/ig;print;' \${FASTA}_parsed_tmp.fa > \${FASTA}_parsed.fa sed -i 's/\s.*//' \${FASTA}_parsed.fa - seqkit grep -r --pattern \".*${params.mirtrace_species}-.*\" \${FASTA}_parsed.fa > \${FASTA}_sps.fa + seqkit grep -r --pattern \".*${params.filterSpecies}-.*\" \${FASTA}_parsed.fa > \${FASTA}_sps.fa seqkit seq --rna2dna \${FASTA}_sps.fa > \${FASTA}_igenome.fa cat <<-END_VERSIONS > versions.yml From 1a08eb9b828906a6c287bc0bbf3feee2b3e29089 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Thu, 12 May 2022 11:14:51 +0200 Subject: [PATCH 08/12] ADD NEW OPTIONS Add the new options and their description to the schema. --- nextflow_schema.json | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 027f1b37..b6337d19 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -62,12 +62,23 @@ "fa_icon": "fas fa-book", "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, + "mirGeneDB": { + "type": "boolean", + "description": "Boolean wether mirGeneDB should be used instead of miRBase", + "help_text": "This allows you to use mirGeneDB instead of miRBase as the database. \n Note that you will need to set the additional flags `--mirGeneDB_species`, `--mirGeneDB_gff`, `--mirGeneDB_mature` and `--mirGeneDB_hairpin`", + "default": "false" + }, "mirtrace_species": { "type": "string", "description": "Species for miRTrace.", "help_text": "This is automatically set when using `--genome`. Example values: `hsa`, `mmu`...\n Note that mirTrace relies on miRBase for its species reference. See available references [here](https://mirbase.org/ftp/CURRENT/genomes/).", "fa_icon": "fas fa-journal-whills" }, + "mirGeneDB_species": { + "type": "string", + "description": "Species of mirGeneDB.", + "help_text": "This replaces the value of `--mirtrace_species` if `--mirGeneDB` is used. \n Note the difference in case for species names used in MirGeneDB and miRBase." + }, "fasta": { "type": "string", "fa_icon": "fas fa-font", @@ -80,6 +91,11 @@ "help_text": "miRBase `.gff3` file, typically downloaded from [`https://mirbase.org/ftp/CURRENT/genomes/`](https://mirbase.org/ftp/CURRENT/genomes/)\n\nIf using iGenomes with `--genome` this file will be downloaded from miRBase automatically during the pipeline run.\n\n", "fa_icon": "fas fa-address-book" }, + "mirGeneDB_gff": { + "type": "string", + "description": "GFF/GTF file with coordinates positions of precursor and miRNAs.", + "help_text": "mirGeneDB `.gff3` file, typically downloaded from [`https://mirgenedb.org/download`]. This replaces the value of --mirna_gff if --mirGeneDB is used." + }, "mature": { "type": "string", "description": "Path to FASTA file with mature miRNAs.", @@ -87,6 +103,11 @@ "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", "default": "https://mirbase.org/ftp/CURRENT/mature.fa.gz" }, + "mirGeneDB_mature": { + "type": "string", + "description": "Path to FASTA file with mirGeneDB mature miRNAs.", + "help_text": "This file needs to be downloaded from [`https://mirgenedb.org/download`]. Can be given either as a plain text `.fa` file or a compressed `.gz` file." + }, "hairpin": { "type": "string", "description": "Path to FASTA file with miRNAs precursors.", @@ -94,6 +115,11 @@ "help_text": "Typically this will be the `mature.fa` file from miRBase. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\n\nDefaults to the current miRBase release URL, from which the file will be downloaded.", "default": "https://mirbase.org/ftp/CURRENT/hairpin.fa.gz" }, + "mirGeneDB_hairpin": { + "type": "string", + "description": "Path to FASTA file with miRNAs precursors.", + "help_text": "This file needs to be downloaded from [`https://mirgenedb.org/download`]. Can be given either as a plain text `.fa` file or a compressed `.gz` file.\nNote that mirGeneDB does not have a dedicated hairpin file. The equivalent is the `Precursor sequences`." + }, "bt_indices": { "type": "string", "description": "Path to a Bowtie 1 index directory", From 8e0ba87867e03c020d61d3f34badc12a198385cf Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Thu, 12 May 2022 11:16:18 +0200 Subject: [PATCH 09/12] MOVE VARIABLE DEFINITION Moved the devinition of the filterSpecies variable to the smrnaseq workflow instead of the individual modules. --- modules/local/mirtop_quant.nf | 8 +++++--- modules/local/parse_fasta_mirna.nf | 2 +- workflows/smrnaseq.nf | 2 ++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/local/mirtop_quant.nf b/modules/local/mirtop_quant.nf index 07643e99..72c3ba74 100644 --- a/modules/local/mirtop_quant.nf +++ b/modules/local/mirtop_quant.nf @@ -11,6 +11,8 @@ process MIRTOP_QUANT { path hairpin path gtf + //if (!params.mirGeneDB) {params.filterSpecies = params.mirtrace_species} else {params.filterSpecies = params.mirGeneDB_species} + output: path "mirtop/mirtop.gff" path "mirtop/mirtop.tsv" , emit: mirtop_table @@ -20,9 +22,9 @@ process MIRTOP_QUANT { script: """ - mirtop gff --hairpin $hairpin --gtf $gtf -o mirtop --sps $params.mirtrace_species ./bams/* - mirtop counts --hairpin $hairpin --gtf $gtf -o mirtop --sps $params.mirtrace_species --add-extra --gff mirtop/mirtop.gff - mirtop export --format isomir --hairpin $hairpin --gtf $gtf --sps $params.mirtrace_species -o mirtop mirtop/mirtop.gff + mirtop gff --hairpin $hairpin --gtf $gtf -o mirtop --sps $params.filterSpecies ./bams/* + mirtop counts --hairpin $hairpin --gtf $gtf -o mirtop --sps $params.filterSpecies --add-extra --gff mirtop/mirtop.gff + mirtop export --format isomir --hairpin $hairpin --gtf $gtf --sps $params.filterSpecies -o mirtop mirtop/mirtop.gff mirtop stats mirtop/mirtop.gff --out mirtop/stats mv mirtop/stats/mirtop_stats.log mirtop/stats/full_mirtop_stats.log diff --git a/modules/local/parse_fasta_mirna.nf b/modules/local/parse_fasta_mirna.nf index ee8a954d..18b51066 100644 --- a/modules/local/parse_fasta_mirna.nf +++ b/modules/local/parse_fasta_mirna.nf @@ -9,7 +9,7 @@ process PARSE_FASTA_MIRNA { input: path fasta - if (!params.mirGeneDB) {params.filterSpecies = params.mirtrace_species} else {params.filterSpecies = params.mirGeneDB_species} + //if (!params.mirGeneDB) {params.filterSpecies = params.mirtrace_species} else {params.filterSpecies = params.mirGeneDB_species} output: path '*_igenome.fa', emit: parsed_fasta diff --git a/workflows/smrnaseq.nf b/workflows/smrnaseq.nf index c2a7a1e6..f59c85bc 100644 --- a/workflows/smrnaseq.nf +++ b/workflows/smrnaseq.nf @@ -54,10 +54,12 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi if (!params.mirGeneDB) { if (params.mature) { reference_mature = file(params.mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mature}" } if (params.hairpin) { reference_hairpin = file(params.hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.hairpin}" } + params.filterSpecies = params.mirtrace_species } else { if (params.mirGeneDB_mature) { reference_mature = file(params.mirGeneDB_mature, checkIfExists: true) } else { exit 1, "Mature miRNA fasta file not found: ${params.mirGeneDB_mature}" } if (params.mirGeneDB_hairpin) { reference_hairpin = file(params.mirGeneDB_hairpin, checkIfExists: true) } else { exit 1, "Hairpin miRNA fasta file not found: ${params.mirGeneDB_hairpin}" } if (params.mirGeneDB_gff) { mirna_gtf = file(params.mirGeneDB_gff, checkIfExists: true) } else { exit 1, "MirGeneDB gff file not found: ${params.mirGeneDB_gff}"} + params.filterSpecies = params.mirGeneDB_species } include { INPUT_CHECK } from '../subworkflows/local/input_check' From 26cb509de2d7dacf73f0148f1c95315ffde500a3 Mon Sep 17 00:00:00 2001 From: Christian Kubica Date: Thu, 12 May 2022 13:20:57 +0200 Subject: [PATCH 10/12] UPDATE DOCUMENTATION Added the decoumentation of new parameters and features to the appropriate files. --- CHANGELOG.md | 17 ++++++++++++----- README.md | 8 ++++---- docs/output.md | 2 +- docs/usage.md | 13 +++++++++++-- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1b6875e..615b42f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,14 +21,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Other enhancements & fixes - [#134](https://github.com/nf-core/smrnaseq/issues/134) - Fixed colSum of zero issues for edgeR_miRBase.R script +- [#55](https://github.com/nf-core/smrnaseq/issues/12) - Enabled the use of `MirGeneDB` as an alternative database insted of `miRBase` ### Parameters -| Old parameter | New parameter | -| -------------------- | ---------------- | -| `--conda` | `--enable_conda` | -| `--clusterOptions` | | -| `--publish_dir_mode` | | +| Old parameter | New parameter | +| -------------------- | --------------------- | +| `--conda` | `--enable_conda` | +| `--clusterOptions` | | +| `--publish_dir_mode` | | +| | `--mirGeneDB` | +| | `--mirGeneDB_species` | +| | `--mirGeneDB_gff` | +| | `--mirGeneDB_mature` | +| | `--mirGeneDB_hairpin` | + > **NB:** Parameter has been **updated** if both old and new parameter information is present. > **NB:** Parameter has been **added** if just the new parameter information is present. diff --git a/README.md b/README.md index 6d3efa52..aec34f05 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,13 @@ On release, automated continuous integration tests run the pipeline on a full-si 2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) 1. Insert Size calculation 2. Collapse reads ([`seqcluster`](https://seqcluster.readthedocs.io/mirna_annotation.html#processing-of-reads)) -3. Alignment against miRBase mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -4. Alignment against miRBase hairpin +3. Alignment against miRBase or MirGeneDB mature miRNA ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) +4. Alignment against miRBase or MirGeneDB hairpin 1. Unaligned reads from step 3 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) 2. Collapsed reads from step 2.2 ([`Bowtie1`](http://bowtie-bio.sourceforge.net/index.shtml)) -5. Post-alignment processing of miRBase hairpin +5. Post-alignment processing of miRBase, or MirGeneDB hairpin 1. Basic statistics from step 3 and step 4.1 ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - 2. Analysis on miRBase hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html)) + 2. Analysis on miRBase, or MirGeneDB hairpin counts ([`edgeR`](https://bioconductor.org/packages/release/bioc/html/edgeR.html)) - TMM normalization and a table of top expression hairpin - MDS plot clustering samples - Heatmap of sample similarities diff --git a/docs/output.md b/docs/output.md index ce1f8347..f3a23da8 100644 --- a/docs/output.md +++ b/docs/output.md @@ -60,7 +60,7 @@ This is an example of the output we can get: ## Bowtie -[Bowtie](http://bowtie-bio.sourceforge.net/index.shtml) is used for mapping adapter trimmed reads against the mature miRNAs and miRNA precursors (hairpins) in [miRBase](http://www.mirbase.org/). +[Bowtie](http://bowtie-bio.sourceforge.net/index.shtml) is used for mapping adapter trimmed reads against the mature miRNAs and miRNA precursors (hairpins) of the chosen database [miRBase](http://www.mirbase.org/) or [MirGeneDB](https://mirgenedb.org/). **Output directory: `results/samtools`** diff --git a/docs/usage.md b/docs/usage.md index f1304605..c85c2a4f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -16,16 +16,25 @@ This option indicates the experimental protocol used for the sample preparation. - 'cats': adapter (`GATCGGAAGAGCACACGTCTG), clip_r1(`3) - 'custom' (where the ser can indicate the `three_prime_adapter`, `clip_r1` and three_prime_clip_r1`) -### `mirtrace_species` +### `mirtrace_species or mirGeneDB_species` -It should point to the 3-letter species name used by `miRBase`. +It should point to the 3-letter species name used by `miRBase`, or `MirGeneDB`. Note the difference in case for the two databases. ### miRNA related files +Different parameters can be set for the two supported datbases. By default `miRBase` will be used with the parameters below. + - `mirna_gtf`: If not supplied by the user, then `mirna_gtf` will point to the latest GFF3 file in miRbase: `https://mirbase.org/ftp/CURRENT/genomes/${params.mirtrace_species}.gff3` - `mature`: points to the FASTA file of mature miRNA sequences. `https://mirbase.org/ftp/CURRENT/mature.fa.gz` - `hairpin`: points to the FASTA file of precursor miRNA sequences. `https://mirbase.org/ftp/CURRENT/hairpin.fa.gz` +If `MirGeneDB` should be used instead it needs to be specified using `--mirGeneDB` and use the parameters below . + +- `mirGeneDB_gff`: The data can not be downloaded automatically, thus the user needs to supply the gff file for either his species, or all species downloaded from `https://mirgenedb.org/download`. The total set will automatically be subsetted to the species specified with `mirGeneDB_species`. +- `mirGeneDB_mature`: points to the FASTA file of mature miRNA sequences. Download from `https://mirgenedb.org/download`. +- `mirGeneDB_hairpin`: points to the FASTA file of precursor miRNA sequences. Download from `https://mirgenedb.org/download`. Note that `MirGeneDB` does not have a dedicated `hairpin` file, but the `Precursor sequences` are to be used. + + ### Genome - `fasta`: the reference genome FASTA file From 9a9439a7a18b2dad5eaf919c367876bcf777f10a Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Fri, 10 Jun 2022 12:50:52 +0000 Subject: [PATCH 11/12] [automated] Fix linting with Prettier --- CHANGELOG.md | 15 ++++++++------- docs/usage.md | 1 - 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf70a0dc..f49f2e1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes -| Old parameter | New parameter | -| -------------------- | --------------------- | -| | `--mirGeneDB` | -| | `--mirGeneDB_species` | -| | `--mirGeneDB_gff` | -| | `--mirGeneDB_mature` | -| | `--mirGeneDB_hairpin` | +| Old parameter | New parameter | +| ------------- | --------------------- | +| | `--mirGeneDB` | +| | `--mirGeneDB_species` | +| | `--mirGeneDB_gff` | +| | `--mirGeneDB_mature` | +| | `--mirGeneDB_hairpin` | + ### Parameters ## [v2.0.0](https://github.com/nf-core/smrnaseq/releases/tag/2.0.0) - 2022-05-31 Aqua Zinc Chihuahua diff --git a/docs/usage.md b/docs/usage.md index c85c2a4f..c55ad4a0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -34,7 +34,6 @@ If `MirGeneDB` should be used instead it needs to be specified using `--mirGeneD - `mirGeneDB_mature`: points to the FASTA file of mature miRNA sequences. Download from `https://mirgenedb.org/download`. - `mirGeneDB_hairpin`: points to the FASTA file of precursor miRNA sequences. Download from `https://mirgenedb.org/download`. Note that `MirGeneDB` does not have a dedicated `hairpin` file, but the `Precursor sequences` are to be used. - ### Genome - `fasta`: the reference genome FASTA file From 85fb55a9b4f00e5a3deac454613c350f4f0222d0 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 10 Jun 2022 14:51:39 +0200 Subject: [PATCH 12/12] Update CHANGELOG.md Move changelog up to dev --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f49f2e1d..b12442dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | `--mirGeneDB_mature` | | | `--mirGeneDB_hairpin` | +### Other enhancements + +- [#55](https://github.com/nf-core/smrnaseq/issues/12) - Enabled the use of `MirGeneDB` as an alternative database insted of `miRBase` + ### Parameters ## [v2.0.0](https://github.com/nf-core/smrnaseq/releases/tag/2.0.0) - 2022-05-31 Aqua Zinc Chihuahua @@ -34,7 +38,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Other enhancements & fixes - [#134](https://github.com/nf-core/smrnaseq/issues/134) - Fixed colSum of zero issues for edgeR_miRBase.R script -- [#55](https://github.com/nf-core/smrnaseq/issues/12) - Enabled the use of `MirGeneDB` as an alternative database insted of `miRBase` - [#55](https://github.com/lpantano/seqcluster/pull/55) - update seqcluster to fix UMI-detecting bug ### Parameters