nf-core · maxulysse · Nov 1, 2023 · Oct 27, 2023 · Oct 27, 2023 · Oct 27, 2023
@@ -24,8 +24,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - [#1277](https://github.com/nf-core/sarek/pull/1277) - Fix null value issue for Mutect2 joint calling
 - [#1287](https://github.com/nf-core/sarek/pull/1287) - Adding label `process_single` to local modules
+- [#1298](https://github.com/nf-core/sarek/pull/1298) - Fix annotation cache usage
 - [#1301](https://github.com/nf-core/sarek/pull/1301) - Fix nf-prov usage
 
+### Removed
+
+- [#1298](https://github.com/nf-core/sarek/pull/1298) - Remove `--use_annotation_cache_keys` params
+
 ### Dependencies
 
 | Dependency | Old version | New version |

@@ -19,8 +19,7 @@ params {
 
     // Other params
     tools = 'strelka,mutect2,freebayes,ascat,manta,cnvkit,tiddit,controlfreec,vep'
-    split_fastq               = 20000000
-    intervals                 = 's3://ngi-igenomes/test-data/sarek/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed'
-    wes                       = true
-    use_annotation_cache_keys = true
+    split_fastq = 20000000
+    intervals   = 's3://ngi-igenomes/test-data/sarek/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed'
+    wes         = true
 }
@@ -19,6 +19,5 @@ params {
 
     // Other params
     tools = 'strelka,freebayes,haplotypecaller,deepvariant,manta,tiddit,cnvkit,vep'
-    split_fastq               = 50000000
-    use_annotation_cache_keys = true
+    split_fastq = 50000000
 }
@@ -308,7 +308,7 @@ See the [`input`](usage#input-sample-sheet-configurations) section in the usage
 ## Variant Calling
 
 The results regarding variant calling are collected in `{outdir}/variantcalling/`.
-If some results from a variant caller do not appear here, please check out the `--tools` section in the parameter [documentation](https://nf-co.re/sarek/3.0.1/parameters).
+If some results from a variant caller do not appear here, please check out the `--tools` section in the parameter [documentation](https://nf-co.re/sarek/latest/parameters).
 
 (Recalibrated) CRAM files can used as an input to start the variant calling.
 

@@ -18,10 +18,16 @@ Sarek is designed to handle single samples, such as single-normal or single-tumo
 The typical command for running the pipeline is as follows:
 
 ```bash
-nextflow run nf-core/sarek --input ./samplesheet.csv --outdir ./results --genome GATK.GRCh38 --tools <TOOLS> -profile docker
+nextflow run nf-core/sarek -r <VERSION> -profile <PROFILE> --input ./samplesheet.csv --outdir ./results --genome GATK.GRCh38 --tools <TOOLS>
 ```
 
-This will launch the pipeline and perform variant calling with the tools specified in `--tools`, see the [parameter section](https://nf-co.re/sarek/3.2.3/parameters#tools) for details on variant calling tools.
+`-r <VERSION>` is optional but strongly recommended for reproducibility and should match the latest version.
+
+`-profile <PROFILE>` is mandatory and should reflect either your own institutional profile or any pipeline profile specified in the [profile section](##-profile).
+
+This documentation imply that any `nextflow run nf-core/sarek` command is run with the appropriate `-r` and `-profile` commands.
+
+This will launch the pipeline and perform variant calling with the tools specified in `--tools`, see the [parameter section](https://nf-co.re/sarek/latest/parameters#tools) for details on variant calling tools.
 In the above example the pipeline runs with the `docker` configuration profile. See below for more information about profiles.
 
 Note that the pipeline will create the following files in your working directory:
@@ -44,7 +50,7 @@ Do not use `-c <file>` to specify parameters as this will result in errors. Cust
 The above pipeline run specified with a params file in yaml format:
 
 ```bash
-nextflow run nf-core/sarek -profile docker -params-file params.yaml
+nextflow run nf-core/sarek -params-file params.yaml
 ```
 
 with `params.yaml` containing:
@@ -312,7 +318,7 @@ test,sample4_vs_sample3,manta,sample4_vs_sample3.somatic_sv.vcf.gz
 
 ## Updating the pipeline
 
-When you launch a pipeline from the command-line with `nextflow run nf-core/sarek -profile docker -params-file params.yaml`, Nextflow will automatically pull the pipeline code from GitHub and store it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
+When you launch a pipeline from the command-line with `nextflow run nf-core/sarek -params-file params.yaml`, Nextflow will automatically pull the pipeline code from GitHub and store it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
 
 ```bash
 nextflow pull nf-core/sarek
@@ -322,8 +328,8 @@ nextflow pull nf-core/sarek
 
 It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since.
 
-First, go to the [nf-core/sarek releases page](https://github.com/nf-core/sarek/releases) and find the latest version number - numeric only (eg. `3.1.1`).
-Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 3.1.1`. Of course, you can switch to another version by changing the number after the `-r` flag.
+First, go to the [nf-core/sarek releases page](https://github.com/nf-core/sarek/releases) and find the latest version number - numeric only (eg. `3.3.2`).
+Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 3.3.2`. Of course, you can switch to another version by changing the number after the `-r` flag.
 
 This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports.
 
@@ -448,7 +454,7 @@ When using default parameters only, sarek runs preprocessing and `Strelka2`.
 This is reflected in the default test profile:
 
 ```bash
-nextflow run nf-core/sarek -r 3.2.1 -profile test,<container/institute> --outdir results
+nextflow run nf-core/sarek -profile test,<container/institute> --outdir results
 ```
 
 Expected run output:
@@ -525,7 +531,7 @@ For more extensive testing purpose, we have the `test_cache` profile that contai
 Annotation is generally tested separately from the remaining workflow, since we use references for `C.elegans`, while the remaining tests are run on downsampled human data.
 
 ```bash
-nextflow run nf-core/sarek -r 3.2.1 -profile test_cache,<container/institute> --outdir results --tools snpeff --step annotation
+nextflow run nf-core/sarek -profile test_cache,<container/institute> --outdir results --tools snpeff --step annotation
 ```
 
 If you are interested in any of the other tests that are run on every code change or would like to run them yourself, you can take a look at `tests/<filename>.yml`.
@@ -962,7 +968,7 @@ aws s3 --no-sign-request ls s3://annotation-cache/vep_cache/
 
 Since both Snpeff and VEP are internally figuring the path towards the specific cache version / species, `annotation-cache` is using an extra set of keys to specify the species and genome build.
 
-So if you are using this resource, please either set `--use_annotation_cache_keys` to use the AWS annotation cache, or point towards your own cache folder structure matching the expected structure.
+Which is handled internally by Sarek.
 
 Please refer to the [annotation-cache documentation](https://annotation-cache.github.io) for more details.
 
@@ -988,13 +994,13 @@ Else, it will be downloaded in `cache/` in the specified `--outdir` location.
 This command could be used to download the cache for both tools in the specified `--outdir_cache` location:
 
 ```bash
-nextflow run nf-core/sarek -r 3.3.0 --outdir results --outdir_cache /path_to/my-own-cache --tools vep,snpeff --download_cache --build_only_index --input false
+nextflow run nf-core/sarek --outdir results --outdir_cache /path_to/my-own-cache --tools vep,snpeff --download_cache --build_only_index --input false
 ```
 
 This command could be used to point to the recently downloaded cache and run SnpEff and VEP:
 
 ```bash
-nextflow run nf-core/sarek -r 3.3.0 --outdir results --vep_cache /path_to/my-own-cache/vep_cache --snpeff_cache /path_to/my-own-cache/snpeff_cache --tools vep,snpeff --input samplesheet_vcf.csv
+nextflow run nf-core/sarek --outdir results --vep_cache /path_to/my-own-cache/vep_cache --snpeff_cache /path_to/my-own-cache/snpeff_cache --tools vep,snpeff --input samplesheet_vcf.csv
 ```
 
 ### Create containers with pre-downloaded cache
@@ -1097,15 +1103,28 @@ Sarek also contains the Sentieon functions [DnaScope](https://support.sentieon.c
 
 ### Basic usage of Sentieon functions
 
-To use Sentieon's aligner `bwa mem`, set the aligner option `sentieon-bwamem`. (This can, for example, be done by adding `--aligner sentieon-bwamem` to the nextflow run command.)
+To use Sentieon's aligner `bwa mem`, set the aligner option `sentieon-bwamem`.
+(This can, for example, be done by adding `--aligner sentieon-bwamem` to the `nextflow run` command.)
 
-To use Sentieon's function `Dedup`, specify `sentieon_dedup` as one of the tools. (This can, for example, be done by adding `--tools sentieon_dedup` to the nextflow run command.)
+To use Sentieon's function `Dedup`, specify `sentieon_dedup` as one of the tools.
+(This can, for example, be done by adding `--tools sentieon_dedup` to the `nextflow run` command.)
 
-To use Sentieon's function `DNAscope`, specify `sentieon_dnascope` as one of the tools. This can, for example, be done by adding `--tools sentieon_dnascope` to the nextflow run command. In order to skip Sentieon's variant-filter `DNAModelApply`, one may add `--skip_tools dnascope_filter` to the nextflow run command. Sarek also provides the option `sentieon_dnascope_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#dnascope-algorithm) of Sentieon's dnascope. Sentieon's dnascope can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_dnascope_emit_mode` to `<vcf_emit_mode>,gvcf`, where `<vcf_emit_mode>` is `variant`, `confident` or `all`.
+To use Sentieon's function `DNAscope`, specify `sentieon_dnascope` as one of the tools.
+This can, for example, be done by adding `--tools sentieon_dnascope` to the `nextflow run` command.
+In order to skip Sentieon's variant-filter `DNAModelApply`, one may add `--skip_tools dnascope_filter` to the `nextflow run` command.
+Sarek also provides the option `sentieon_dnascope_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#dnascope-algorithm) of Sentieon's dnascope.
+Sentieon's dnascope can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_dnascope_emit_mode` to `<vcf_emit_mode>,gvcf`, where `<vcf_emit_mode>` is `variant`, `confident` or `all`.
 
-Sentieon's function `Haplotyper` is used in much the same way as `DNAscope`. To use Sentieon's function `Haplotyper`, specify `sentieon_haplotyper` as one of the tools. This can, for example, be done by adding `--tools sentieon_haplotyper` to the nextflow run command. In order to skip the GATK-based variant-filter, one may add `--skip_tools haplotyper_filter` to the nextflow run command. Sarek also provides the option `sentieon_haplotyper_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) of Sentieon's haplotyper. Sentieon's haplotyper can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_haplotyper_emit_mode` to `<vcf_emit_mode>,gvcf`, where `<vcf_emit_mode>` is `variant`, `confident` or `all`.
+Sentieon's function `Haplotyper` is used in much the same way as `DNAscope`.
+To use Sentieon's function `Haplotyper`, specify `sentieon_haplotyper` as one of the tools.
+This can, for example, be done by adding `--tools sentieon_haplotyper` to the `nextflow run` command.
+In order to skip the GATK-based variant-filter, one may add `--skip_tools haplotyper_filter` to the `nextflow run` command.
+Sarek also provides the option `sentieon_haplotyper_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) of Sentieon's haplotyper.
+Sentieon's haplotyper can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_haplotyper_emit_mode` to `<vcf_emit_mode>,gvcf`, where `<vcf_emit_mode>` is `variant`, `confident` or `all`.
 
-To use Sentieon's function `GVCFtyper` along with Sention's version of VQSR (`VarCal` and `ApplyVarCal`) for joint-germline genotyping, specify `sentieon_haplotyper` as one of the tools, set the option `sentieon_haplotyper_emit_mode` to `gvcf`, and add the option `joint_germline`. This can, for example, be done by adding `--tools sentieon_haplotyper --joint_germline --sentieon_haplotyper_emit_mode gvcf` to the nextflow run command. If `sentieon_dnascope` is chosen instead of `sentieon_haplotyper`, then Sention's version of VQSR is skipped, as recommended by Sentieon.
+To use Sentieon's function `GVCFtyper` along with Sention's version of VQSR (`VarCal` and `ApplyVarCal`) for joint-germline genotyping, specify `sentieon_haplotyper` as one of the tools, set the option `sentieon_haplotyper_emit_mode` to `gvcf`, and add the option `joint_germline`.
+This can, for example, be done by adding `--tools sentieon_haplotyper --joint_germline --sentieon_haplotyper_emit_mode gvcf` to the `nextflow run` command.
+If `sentieon_dnascope` is chosen instead of `sentieon_haplotyper`, then Sention's version of VQSR is skipped, as recommended by Sentieon.
 
 ### Joint germline variant calling
 

@@ -15,7 +15,7 @@ params {
 
     // References
     genome           = 'GATK.GRCh38'
-    igenomes_base    = 's3://ngi-igenomes/igenomes'
+    igenomes_base    = 's3://ngi-igenomes/igenomes/'
     igenomes_ignore  = false
     save_reference   = false // Built references not saved
     build_only_index = false // Only build the reference indexes
@@ -86,7 +86,6 @@ params {
     spliceai_indel_tbi        = null // No spliceai_indel file index
     spliceai_snv              = null // No spliceai_snv file
     spliceai_snv_tbi          = null // No spliceai_snv file index
-    use_annotation_cache_keys = false
     vep_cache                 = 's3://annotation-cache/vep_cache/'
     vep_custom_args           = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP
     vep_dbnsfp                = null // dbnsfp plugin disabled within VEP

@@ -547,12 +547,6 @@
                     "description": "Should reflect the VEP version used in the container.",
                     "help_text": "Used by the loftee plugin that need the full path."
                 },
-                "use_annotation_cache_keys": {
-                    "type": "boolean",
-                    "fa_icon": "fas fa-toolbox",
-                    "description": "Use annotation cache keys for snpeff_cache and vep_cache.\nOnly when using annotation-cache or a similar structure.\nSee [here](https://annotation-cache.github.io/) for more information.",
-                    "hidden": true
-                },
                 "outdir_cache": {
                     "type": "string",
                     "format": "directory-path",
@@ -768,13 +762,13 @@
                     "type": "string",
                     "fa_icon": "fas fa-microscope",
                     "description": "snpEff genome.",
-                    "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when using the container with pre-downloaded cache."
+                    "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when looking for local cache, or cloud based cache."
                 },
                 "vep_genome": {
                     "type": "string",
                     "fa_icon": "fas fa-microscope",
                     "description": "VEP genome.",
-                    "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when using the container with pre-downloaded cache."
+                    "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when looking for local cache, or cloud based cache."
                 },
                 "vep_species": {
                     "type": "string",
@@ -804,7 +798,7 @@
                     "type": "boolean",
                     "fa_icon": "fas fa-download",
                     "description": "Download annotation cache.",
-                    "help_text": "Set this parameter, if you wish to download annotation cache."
+                    "help_text": "Set this parameter, if you wish to download annotation cache.\nUsing this parameter will download cache even if --snpeff_cache and --vep_cache are provided."
                 },
                 "igenomes_base": {
                     "type": "string",

@@ -1,5 +1,5 @@
 //
-// PREPARE CACHE
+// DOWNLOAD CACHE SNPEFF VEP
 //
 
 // Initialize channels based on params or indices that were just built
@@ -11,7 +11,7 @@
 include { ENSEMBLVEP_DOWNLOAD } from '../../../modules/nf-core/ensemblvep/download/main'
 include { SNPEFF_DOWNLOAD     } from '../../../modules/nf-core/snpeff/download/main'
 
-workflow PREPARE_CACHE {
+workflow DOWNLOAD_CACHE_SNPEFF_VEP {
     take:
     ensemblvep_info
     snpeff_info