From 216d1dd1df430227166ec50a2fe39ab17ca903a2 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 10 May 2022 16:30:33 +0200 Subject: [PATCH 01/10] add cnvkit_tumoronly [skip actions] --- conf/modules.config | 11 ++++++++ subworkflows/local/tumor_variant_calling.nf | 9 +++++++ .../variantcalling/cnvkit/tumoronly/main.nf | 26 +++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf diff --git a/conf/modules.config b/conf/modules.config index a47053d42..5776fa8be 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -868,6 +868,17 @@ process{ } } + // CNVKIT_TUMORONLY + + withName: 'CNVKIT_TUMORONLY' { + ext.when = { params.tools && params.tools.contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/cnvkit" }, + pattern: "*{bed,cnn,cnr,cns}" + ] + } + //MANTA withName: 'CONCAT_MANTA_SOMATIC' { ext.prefix = {"${meta.id}.somatic_sv"} diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index c3ebcf1fb..2c0ae283a 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -9,6 +9,7 @@ include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf- include { RUN_MANTA_TUMORONLY } from '../nf-core/variantcalling/manta/tumoronly/main.nf' include { RUN_STRELKA_SINGLE } from '../nf-core/variantcalling/strelka/single/main.nf' include { RUN_CONTROLFREEC_TUMORONLY } from '../nf-core/variantcalling/controlfreec/tumoronly/main.nf' +include { RUN_CNVKIT_TUMORONLY } from '../nf-core/variantcalling/cnvkit/tumoronly/main.nf' workflow TUMOR_ONLY_VARIANT_CALLING { take: @@ -88,6 +89,14 @@ workflow TUMOR_ONLY_VARIANT_CALLING { ch_versions = ch_versions.mix(RUN_CONTROLFREEC_TUMORONLY.out.versions) } + if(tools.contains('cnvkit')){ + cram_recalibrated_cnvkit = cram_recalibrated + .map(meta, cram, crai) -> + [meta, cram] + } + RUN_CNVKIT_TUMORONLY (cram_recalibrated_cnvkit, [], [], reference) + + if (tools.contains('freebayes')){ // Remap channel for Freebayes cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals diff --git a/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf new file mode 100644 index 000000000..16c3275b0 --- /dev/null +++ b/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf @@ -0,0 +1,26 @@ +// +// CNV calling TUMOR_ONLY +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include {CNVKIT_BATCH as CNVKIT_BATCH_TUMORONLY } from '../../../../../modules/nf-core/modules/cnvkit/batch/main' + +workflow RUN_CNVKIT_TUMORONLY { + take: + cram_recalibrated // channel: [mandatory] cram tumor + fasta // channel: [mandatory] fasta + targets // channel: [mandatory] bed + reference // channel: [mandatory] cnn + + main: + ch_versions = Channel.empty() + + CNVKIT_BATCH_TUMORONLY(cram_recalibrated, fasta, targets, reference) + + ch_versions = ch_versions.mix(CNVKIT_BATCH_TUMORONLY.out.versions.first()) + + emit: + versions = ch_versions // channel: [ versions.yml ] + +} From bfcaa9bc11107a41d94127a33a1f53c20b38b773 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Fri, 13 May 2022 13:09:13 +0200 Subject: [PATCH 02/10] added local modules for tumoronly [skip actions] --- conf/modules.config | 1 + modules/local/cnvkit/antitarget.nf | 36 +++++++++++++++++ modules/local/cnvkit/reference.nf | 39 +++++++++++++++++++ subworkflows/local/tumor_variant_calling.nf | 15 +++++-- .../variantcalling/cnvkit/tumoronly/main.nf | 17 +++++++- 5 files changed, 102 insertions(+), 6 deletions(-) create mode 100644 modules/local/cnvkit/antitarget.nf create mode 100644 modules/local/cnvkit/reference.nf diff --git a/conf/modules.config b/conf/modules.config index 5776fa8be..f82fd1feb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -871,6 +871,7 @@ process{ // CNVKIT_TUMORONLY withName: 'CNVKIT_TUMORONLY' { + ext.args = { params.wes ? "--method hybrid --diagram --scatter" : "--method wgs --diagram --scatter" } ext.when = { params.tools && params.tools.contains('cnvkit') } publishDir = [ mode: params.publish_dir_mode, diff --git a/modules/local/cnvkit/antitarget.nf b/modules/local/cnvkit/antitarget.nf new file mode 100644 index 000000000..ca19a8ae1 --- /dev/null +++ b/modules/local/cnvkit/antitarget.nf @@ -0,0 +1,36 @@ +process CNVKIT_ANTITARGET { + label 'process_low' + + conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0': + 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + + input: + path targets + + output: + path("*.bed"), emit: BED + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "antitarget" + + """ + cnvkit.py \\ + antitarget \\ + $targets \\ + --output ${prefix}.bed \\ + $args + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/local/cnvkit/reference.nf b/modules/local/cnvkit/reference.nf new file mode 100644 index 000000000..3aa8a1d8a --- /dev/null +++ b/modules/local/cnvkit/reference.nf @@ -0,0 +1,39 @@ +process CNVKIT_REFERENCE { + label 'process_low' + + conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0': + 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + + input: + path fasta + path targets + path antitargets + + output: + path("*.cnn"), emit: CNN + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "reference" + """ + cnvkit.py \\ + reference \\ + --fasta $fasta \\ + --targets $targets \\ + --antitargets $antitargets \\ + $args \\ + --output ${prefix}.cnn + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 2c0ae283a..19ac0fa7f 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -91,11 +91,18 @@ workflow TUMOR_ONLY_VARIANT_CALLING { if(tools.contains('cnvkit')){ cram_recalibrated_cnvkit = cram_recalibrated - .map(meta, cram, crai) -> - [meta, cram] - } - RUN_CNVKIT_TUMORONLY (cram_recalibrated_cnvkit, [], [], reference) + .map{ meta, cram, crai -> + [meta, cram, []] + } + + RUN_CNVKIT_TUMORONLY ( cram_recalibrated_cnvkit, + fasta, + intervals_bed_combined, + [] ) + + ch_versions = ch_versions.mix(RUN_CNVKIT_TUMORONLY.out.versions) + } if (tools.contains('freebayes')){ // Remap channel for Freebayes diff --git a/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf index 16c3275b0..d90058f95 100644 --- a/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf +++ b/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf @@ -4,7 +4,10 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include {CNVKIT_BATCH as CNVKIT_BATCH_TUMORONLY } from '../../../../../modules/nf-core/modules/cnvkit/batch/main' + +include {CNVKIT_ANTITARGET } from '../../../../../modules/local/cnvkit/antitarget' +include {CNVKIT_REFERENCE } from '../../../../../modules/local/cnvkit/reference' +include {CNVKIT_BATCH as CNVKIT_BATCH_TUMORONLY } from '../../../../../modules/nf-core/modules/cnvkit/batch/main' workflow RUN_CNVKIT_TUMORONLY { take: @@ -16,8 +19,18 @@ workflow RUN_CNVKIT_TUMORONLY { main: ch_versions = Channel.empty() - CNVKIT_BATCH_TUMORONLY(cram_recalibrated, fasta, targets, reference) + // prepare a reference for tumor_only mode based on target_baits + + CNVKIT_ANTITARGET(targets) + + CNVKIT_REFERENCE(fasta, targets, CNVKIT_ANTITARGET.out.BED) + + // use reference for calling CNVs + + CNVKIT_BATCH_TUMORONLY(cram_recalibrated, [], [], CNVKIT_REFERENCE.out.CNN) + ch_versions = ch_versions.mix(CNVKIT_ANTITARGET.out.versions) + ch_versions = ch_versions.mix(CNVKIT_REFERENCE.out.versions) ch_versions = ch_versions.mix(CNVKIT_BATCH_TUMORONLY.out.versions.first()) emit: From 3d921d2d50625405bdff1fd81ac0e29b42670dc2 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Fri, 13 May 2022 13:12:35 +0200 Subject: [PATCH 03/10] rm old main [skip actions] --- subworkflows/nf-core/variantcalling/cnvkit/main.nf | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 subworkflows/nf-core/variantcalling/cnvkit/main.nf diff --git a/subworkflows/nf-core/variantcalling/cnvkit/main.nf b/subworkflows/nf-core/variantcalling/cnvkit/main.nf deleted file mode 100644 index e69de29bb..000000000 From 691a26e3dbe33662b61de86e6352524578449328 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Fri, 13 May 2022 13:43:50 +0200 Subject: [PATCH 04/10] adjusted space [skip actions] --- modules/local/cnvkit/antitarget.nf | 2 +- modules/local/cnvkit/reference.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/cnvkit/antitarget.nf b/modules/local/cnvkit/antitarget.nf index ca19a8ae1..38456942c 100644 --- a/modules/local/cnvkit/antitarget.nf +++ b/modules/local/cnvkit/antitarget.nf @@ -10,7 +10,7 @@ process CNVKIT_ANTITARGET { path targets output: - path("*.bed"), emit: BED + path("*.bed") , emit: BED path "versions.yml" , emit: versions when: diff --git a/modules/local/cnvkit/reference.nf b/modules/local/cnvkit/reference.nf index 3aa8a1d8a..05ff5ba20 100644 --- a/modules/local/cnvkit/reference.nf +++ b/modules/local/cnvkit/reference.nf @@ -12,7 +12,7 @@ process CNVKIT_REFERENCE { path antitargets output: - path("*.cnn"), emit: CNN + path("*.cnn") , emit: CNN path "versions.yml" , emit: versions when: From ef98d4a285c26df759285efc3c6e06a1d3e3c04b Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 24 May 2022 09:09:34 +0200 Subject: [PATCH 05/10] add tumoronly to modules.config --- conf/modules.config | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 5e447ce78..c2f89f469 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -904,6 +904,17 @@ process{ } } + // CNVKIT_TUMORONLY + + withName: 'CNVKIT_TUMORONLY' { + ext.args = { params.wes ? "--method hybrid --diagram --scatter" : "--method wgs --diagram --scatter" } + ext.when = { params.tools && params.tools.contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/cnvkit" }, + pattern: "*{bed,cnn,cnr,cns,pdf,png}" + ] + //FREEBAYES withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' { ext.args = "--pooled-continuous \ From d257fb4df9f66cab9181575dafcd30b15f8b154e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 24 May 2022 10:42:51 +0200 Subject: [PATCH 06/10] add new nf-core/cnvkit modules [skip_actions] --- conf/modules.config | 1 + modules.json | 8 +++- .../nf-core/modules/cnvkit/antitarget/main.nf | 36 ++++++++++++++ .../modules/cnvkit/antitarget/meta.yml | 44 +++++++++++++++++ modules/nf-core/modules/cnvkit/batch/main.nf | 47 ++++++++++++++----- modules/nf-core/modules/cnvkit/batch/meta.yml | 32 ++++--------- .../nf-core/modules/cnvkit/reference/main.nf | 39 +++++++++++++++ .../nf-core/modules/cnvkit/reference/meta.yml | 47 +++++++++++++++++++ subworkflows/local/tumor_variant_calling.nf | 1 - .../variantcalling/cnvkit/tumoronly/main.nf | 16 ++++--- 10 files changed, 227 insertions(+), 44 deletions(-) create mode 100644 modules/nf-core/modules/cnvkit/antitarget/main.nf create mode 100644 modules/nf-core/modules/cnvkit/antitarget/meta.yml create mode 100644 modules/nf-core/modules/cnvkit/reference/main.nf create mode 100644 modules/nf-core/modules/cnvkit/reference/meta.yml diff --git a/conf/modules.config b/conf/modules.config index c2f89f469..af2d56f93 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -914,6 +914,7 @@ process{ path: { "${params.outdir}/variant_calling/${meta.id}/cnvkit" }, pattern: "*{bed,cnn,cnr,cns,pdf,png}" ] + } //FREEBAYES withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' { diff --git a/modules.json b/modules.json index 02497f14d..0959f26df 100644 --- a/modules.json +++ b/modules.json @@ -30,8 +30,14 @@ "cat/fastq": { "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" }, + "cnvkit/antitarget": { + "git_sha": "58c5ec2f4eb44eff1fb7cc9e02df07bd448c6aaf" + }, "cnvkit/batch": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "9096be5464d800156761b7c73c574f5a5bad2f72" + }, + "cnvkit/reference": { + "git_sha": "bbee9e3c3bea54e8cedc0d8ec68619b1d05dae2e" }, "controlfreec/assesssignificance": { "git_sha": "9ae34a01d1747019fd37753ff4cafb05aec35a2b" diff --git a/modules/nf-core/modules/cnvkit/antitarget/main.nf b/modules/nf-core/modules/cnvkit/antitarget/main.nf new file mode 100644 index 000000000..bf6461bdb --- /dev/null +++ b/modules/nf-core/modules/cnvkit/antitarget/main.nf @@ -0,0 +1,36 @@ +process CNVKIT_ANTITARGET { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0': + 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + + input: + tuple val(meta), path(targets) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + cnvkit.py \\ + antitarget \\ + $targets \\ + --output ${prefix}.antitarget.bed \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cnvkit/antitarget/meta.yml b/modules/nf-core/modules/cnvkit/antitarget/meta.yml new file mode 100644 index 000000000..28a2bfa40 --- /dev/null +++ b/modules/nf-core/modules/cnvkit/antitarget/meta.yml @@ -0,0 +1,44 @@ +name: cnvkit_antitarget +description: +keywords: + - cvnkit + - antitarget +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. + It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + tool_dev_url: "https://github.com/etal/cnvkit" + doi: 10.1371/journal.pcbi.1004873 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - targets: + type: file + description: File containing genomic regions + pattern: "*.{bed}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: File containing off-target regions + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@SusiJo" diff --git a/modules/nf-core/modules/cnvkit/batch/main.nf b/modules/nf-core/modules/cnvkit/batch/main.nf index 7c44d9f61..2e89088e4 100644 --- a/modules/nf-core/modules/cnvkit/batch/main.nf +++ b/modules/nf-core/modules/cnvkit/batch/main.nf @@ -2,10 +2,10 @@ process CNVKIT_BATCH { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::cnvkit=0.9.9' : null) + conda (params.enable_conda ? 'bioconda::cnvkit=0.9.9 bioconda::samtools=1.15.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0' : - 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:304d1c5ab610f216e77c61420ebe85f1e7c5968a-0' : + 'quay.io/biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:304d1c5ab610f216e77c61420ebe85f1e7c5968a-0' }" input: tuple val(meta), path(tumor), path(normal) @@ -18,6 +18,8 @@ process CNVKIT_BATCH { tuple val(meta), path("*.cnn"), emit: cnn, optional: true tuple val(meta), path("*.cnr"), emit: cnr, optional: true tuple val(meta), path("*.cns"), emit: cns, optional: true + tuple val(meta), path("*.pdf"), emit: pdf, optional: true + tuple val(meta), path("*.png"), emit: png, optional: true path "versions.yml" , emit: versions when: @@ -25,21 +27,40 @@ process CNVKIT_BATCH { script: def args = task.ext.args ?: '' - def normal_args = normal ? "--normal $normal" : "" - def fasta_args = fasta ? "--fasta $fasta" : "" - def reference_args = reference ? "--reference $reference" : "" + print args - def target_args = "" - if (args.contains("--method wgs") || args.contains("-m wgs")) { - target_args = targets ? "--targets $targets" : "" - } - else { - target_args = "--targets $targets" + // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow + // input pair is assumed to have same extension if both exist + def is_cram = tumor.Extension == "cram" ? true : false + def tumor_out = is_cram ? tumor.BaseName + ".bam" : "${tumor}" + + // do not run samtools on normal samples in tumor_only mode + def normal_exists = normal ? true: false + // tumor_only mode does not need fasta & target + // instead it requires a pre-computed reference.cnn which is built from fasta & target + def (normal_out, normal_args, fasta_args) = ["", "", ""] + + if (normal_exists){ + def normal_prefix = normal.BaseName + normal_out = is_cram ? "${normal_prefix}" + ".bam" : "${normal}" + normal_args = normal_prefix ? "--normal $normal_out" : "" + fasta_args = fasta ? "--fasta $fasta" : "" } + + def target_args = targets ? "--targets $targets" : "" + def reference_args = reference ? "--reference $reference" : "" + """ + if $is_cram; then + samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out + if $normal_exists; then + samtools view -T $fasta $normal -@ $task.cpus -o $normal_out + fi + fi + cnvkit.py \\ batch \\ - $tumor \\ + $tumor_out \\ $normal_args \\ $fasta_args \\ $reference_args \\ diff --git a/modules/nf-core/modules/cnvkit/batch/meta.yml b/modules/nf-core/modules/cnvkit/batch/meta.yml index 474c55f21..2cd675c77 100644 --- a/modules/nf-core/modules/cnvkit/batch/meta.yml +++ b/modules/nf-core/modules/cnvkit/batch/meta.yml @@ -11,27 +11,6 @@ tools: homepage: https://cnvkit.readthedocs.io/en/stable/index.html documentation: https://cnvkit.readthedocs.io/en/stable/index.html licence: ["Apache-2.0"] -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - meta: type: map @@ -49,7 +28,7 @@ input: - fasta: type: file description: | - Input reference genome fasta file + Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) - targetfile: type: file description: | @@ -80,6 +59,14 @@ output: type: file description: File containing copy number segment information pattern: "*.{cns}" + - pdf: + type: file + description: File with plot of copy numbers or segments on chromosomes + pattern: "*.{pdf}" + - png: + type: file + description: File with plot of bin-level log2 coverages and segmentation calls + pattern: "*.{png}" - versions: type: file description: File containing software versions @@ -91,3 +78,4 @@ authors: - "@drpatelh" - "@fbdtemme" - "@lassefolkersen" + - "@SusiJo" diff --git a/modules/nf-core/modules/cnvkit/reference/main.nf b/modules/nf-core/modules/cnvkit/reference/main.nf new file mode 100644 index 000000000..992d768f1 --- /dev/null +++ b/modules/nf-core/modules/cnvkit/reference/main.nf @@ -0,0 +1,39 @@ +process CNVKIT_REFERENCE { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0': + 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + + input: + path fasta + path targets + path antitargets + + output: + path("*.cnn") , emit: cnn + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + cnvkit.py \\ + reference \\ + --fasta $fasta \\ + --targets $targets \\ + --antitargets $antitargets \\ + --output reference.cnn \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cnvkit/reference/meta.yml b/modules/nf-core/modules/cnvkit/reference/meta.yml new file mode 100644 index 000000000..2e0fef1aa --- /dev/null +++ b/modules/nf-core/modules/cnvkit/reference/meta.yml @@ -0,0 +1,47 @@ +name: cnvkit_reference +description: +keywords: + - cnvkit + - reference +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. + It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + tool_dev_url: https://github.com/etal/cnvkit + doi: 10.1371/journal.pcbi.1004873 + licence: ["Apache-2.0"] + +input: + - fasta: + type: file + description: File containing reference genome + pattern: "*.{fasta}" + - targets: + type: file + description: File containing genomic regions + pattern: "*.{bed}" + - antitargets: + type: file + description: File containing off-target genomic regions + pattern: "*.{bed}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference: + type: file + description: File containing a copy-number reference (required for CNV calling in tumor_only mode) + pattern: "*.{cnn}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@SusiJo" diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index e0195a073..338a2a32c 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -90,7 +90,6 @@ workflow TUMOR_ONLY_VARIANT_CALLING { [meta, cram, []] } - RUN_CNVKIT_TUMORONLY ( cram_recalibrated_cnvkit, fasta, intervals_bed_combined, diff --git a/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf index d90058f95..3c5242d82 100644 --- a/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf +++ b/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf @@ -5,8 +5,8 @@ // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include {CNVKIT_ANTITARGET } from '../../../../../modules/local/cnvkit/antitarget' -include {CNVKIT_REFERENCE } from '../../../../../modules/local/cnvkit/reference' +include {CNVKIT_ANTITARGET } from '../../../../../modules/nf-core/modules/cnvkit/antitarget/main' +include {CNVKIT_REFERENCE } from '../../../../../modules/nf-core/modules/cnvkit/reference/main' include {CNVKIT_BATCH as CNVKIT_BATCH_TUMORONLY } from '../../../../../modules/nf-core/modules/cnvkit/batch/main' workflow RUN_CNVKIT_TUMORONLY { @@ -14,24 +14,26 @@ workflow RUN_CNVKIT_TUMORONLY { cram_recalibrated // channel: [mandatory] cram tumor fasta // channel: [mandatory] fasta targets // channel: [mandatory] bed - reference // channel: [mandatory] cnn + reference // channel: [] cnn main: ch_versions = Channel.empty() // prepare a reference for tumor_only mode based on target_baits + targets.view() - CNVKIT_ANTITARGET(targets) + CNVKIT_ANTITARGET(targets.map{ it -> [[id:it[0].baseName], it] }) - CNVKIT_REFERENCE(fasta, targets, CNVKIT_ANTITARGET.out.BED) + CNVKIT_REFERENCE(fasta, targets, CNVKIT_ANTITARGET.out.bed.map{ meta,bed -> [bed]} ) // use reference for calling CNVs + // cram_input needs the fasta reference genome for bam_conversion - CNVKIT_BATCH_TUMORONLY(cram_recalibrated, [], [], CNVKIT_REFERENCE.out.CNN) + CNVKIT_BATCH_TUMORONLY(cram_recalibrated, fasta, [], CNVKIT_REFERENCE.out.cnn) ch_versions = ch_versions.mix(CNVKIT_ANTITARGET.out.versions) ch_versions = ch_versions.mix(CNVKIT_REFERENCE.out.versions) - ch_versions = ch_versions.mix(CNVKIT_BATCH_TUMORONLY.out.versions.first()) + ch_versions = ch_versions.mix(CNVKIT_BATCH_TUMORONLY.out.versions) emit: versions = ch_versions // channel: [ versions.yml ] From c2467b8583a5cc66f7433aa4c2fed80b3cfdfd74 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 24 May 2022 16:01:08 +0200 Subject: [PATCH 07/10] updated cnvkit tumor_only --- CHANGELOG.md | 3 ++- conf/modules.config | 23 ++++++++++++++++++- modules.json | 2 +- modules/nf-core/modules/cnvkit/batch/main.nf | 1 - .../nf-core/modules/cnvkit/reference/main.nf | 13 ++++++----- .../variantcalling/cnvkit/tumoronly/main.nf | 1 - 6 files changed, 32 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4643bab21..350549bcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#533](https://github.com/nf-core/sarek/pull/533) - Add param `--only_paired_variant_calling` to allow skipping of germline variantcalling for paired samples - [#536](https://github.com/nf-core/sarek/pull/536) - Add `--step markduplicates` to start from duplicate marking, `--step prepare_recalibration` now ONLY starts at process `BaseRecalibrator` & adding `bam` and `cram` input support for `--step` `markduplicates`, `prepare_recalibration`, `recalibrate`, and `variant_calling` - [#538](https://github.com/nf-core/sarek/pull/538) - Add param `--seq_platform`, default: `ILLUMINA` +- [#545](https://github.com/nf-core/sarek/pull/545) - Add modules and subworkflows for `cnvkit` tumor_only mode ### Changed @@ -73,7 +74,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#513](https://github.com/nf-core/sarek/pull/513), [#527](https://github.com/nf-core/sarek/pull/527) - CNV is back - [#529](https://github.com/nf-core/sarek/pull/529) - Do not save `versions.yml` files - [#524](https://github.com/nf-core/sarek/pull/524) - Fix intervals usage by counting the actual list of scatter/gather files produced and not overall number of intervals -- [#549](https://github.com/nf-core/sarek/pull/549) - Fix unique lanes required for Freebayes: issue [#311](https://github.com/nf-core/sarek/issues/311), replaces `meta.clone()` with actual copy of map to avoid issues with https://nfcore.slack.com/archives/C027CM7P08M/p1644241819942339 +- [#549](https://github.com/nf-core/sarek/pull/549) - Fix unique lanes required for Freebayes: issue [#311](https://github.com/nf-core/sarek/issues/311), replaces `meta.clone()` with actual copy of map to avoid issues with ### Deprecated diff --git a/conf/modules.config b/conf/modules.config index af2d56f93..5ef5f9430 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -906,7 +906,27 @@ process{ // CNVKIT_TUMORONLY - withName: 'CNVKIT_TUMORONLY' { + withName: 'CNVKIT_ANTITARGET' { + ext.when = { params.tools && params.tools.contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*{bed}" + ] + } + + + withName: 'CNVKIT_REFERENCE' { + ext.prefix = "cnvkit" + ext.when = { params.tools && params.tools.contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*{cnn}" + ] + } + + withName: 'CNVKIT_BATCH_TUMORONLY' { ext.args = { params.wes ? "--method hybrid --diagram --scatter" : "--method wgs --diagram --scatter" } ext.when = { params.tools && params.tools.contains('cnvkit') } publishDir = [ @@ -916,6 +936,7 @@ process{ ] } + //FREEBAYES withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' { ext.args = "--pooled-continuous \ diff --git a/modules.json b/modules.json index 0959f26df..cfb9facba 100644 --- a/modules.json +++ b/modules.json @@ -37,7 +37,7 @@ "git_sha": "9096be5464d800156761b7c73c574f5a5bad2f72" }, "cnvkit/reference": { - "git_sha": "bbee9e3c3bea54e8cedc0d8ec68619b1d05dae2e" + "git_sha": "8208140d21f3a754fff2e177db7a0e570fa2af6e" }, "controlfreec/assesssignificance": { "git_sha": "9ae34a01d1747019fd37753ff4cafb05aec35a2b" diff --git a/modules/nf-core/modules/cnvkit/batch/main.nf b/modules/nf-core/modules/cnvkit/batch/main.nf index 2e89088e4..c1f9ef87b 100644 --- a/modules/nf-core/modules/cnvkit/batch/main.nf +++ b/modules/nf-core/modules/cnvkit/batch/main.nf @@ -27,7 +27,6 @@ process CNVKIT_BATCH { script: def args = task.ext.args ?: '' - print args // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow // input pair is assumed to have same extension if both exist diff --git a/modules/nf-core/modules/cnvkit/reference/main.nf b/modules/nf-core/modules/cnvkit/reference/main.nf index 992d768f1..10458f278 100644 --- a/modules/nf-core/modules/cnvkit/reference/main.nf +++ b/modules/nf-core/modules/cnvkit/reference/main.nf @@ -8,19 +8,20 @@ process CNVKIT_REFERENCE { 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" input: - path fasta - path targets - path antitargets + path fasta + path targets + path antitargets output: - path("*.cnn") , emit: cnn - path "versions.yml" , emit: versions + path "*.cnn" , emit: cnn + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: targets.BaseName """ cnvkit.py \\ @@ -28,7 +29,7 @@ process CNVKIT_REFERENCE { --fasta $fasta \\ --targets $targets \\ --antitargets $antitargets \\ - --output reference.cnn \\ + --output ${prefix}.reference.cnn \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf index 3c5242d82..f9ae82881 100644 --- a/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf +++ b/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf @@ -20,7 +20,6 @@ workflow RUN_CNVKIT_TUMORONLY { ch_versions = Channel.empty() // prepare a reference for tumor_only mode based on target_baits - targets.view() CNVKIT_ANTITARGET(targets.map{ it -> [[id:it[0].baseName], it] }) From abbc59cd1ae2965bcc7bddaed65c41b9e7ecc23b Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 24 May 2022 16:05:22 +0200 Subject: [PATCH 08/10] rm temporary local cnvkit modules --- modules/local/cnvkit/antitarget.nf | 36 --------------------------- modules/local/cnvkit/reference.nf | 39 ------------------------------ 2 files changed, 75 deletions(-) delete mode 100644 modules/local/cnvkit/antitarget.nf delete mode 100644 modules/local/cnvkit/reference.nf diff --git a/modules/local/cnvkit/antitarget.nf b/modules/local/cnvkit/antitarget.nf deleted file mode 100644 index 38456942c..000000000 --- a/modules/local/cnvkit/antitarget.nf +++ /dev/null @@ -1,36 +0,0 @@ -process CNVKIT_ANTITARGET { - label 'process_low' - - conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0': - 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" - - input: - path targets - - output: - path("*.bed") , emit: BED - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "antitarget" - - """ - cnvkit.py \\ - antitarget \\ - $targets \\ - --output ${prefix}.bed \\ - $args - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") - END_VERSIONS - """ -} diff --git a/modules/local/cnvkit/reference.nf b/modules/local/cnvkit/reference.nf deleted file mode 100644 index 05ff5ba20..000000000 --- a/modules/local/cnvkit/reference.nf +++ /dev/null @@ -1,39 +0,0 @@ -process CNVKIT_REFERENCE { - label 'process_low' - - conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0': - 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" - - input: - path fasta - path targets - path antitargets - - output: - path("*.cnn") , emit: CNN - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "reference" - """ - cnvkit.py \\ - reference \\ - --fasta $fasta \\ - --targets $targets \\ - --antitargets $antitargets \\ - $args \\ - --output ${prefix}.cnn - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") - END_VERSIONS - """ -} From d81aef09771415c1224d40d797e1ba334af55ee3 Mon Sep 17 00:00:00 2001 From: SusiJo <43847534+SusiJo@users.noreply.github.com> Date: Mon, 30 May 2022 09:16:16 +0200 Subject: [PATCH 09/10] rm line update conf/modules.config Co-authored-by: FriederikeHanssen --- conf/modules.config | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 5ef5f9430..7cdd29651 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -915,7 +915,6 @@ process{ ] } - withName: 'CNVKIT_REFERENCE' { ext.prefix = "cnvkit" ext.when = { params.tools && params.tools.contains('cnvkit') } From 918e8415c41b17e8b8930552278e7d8ad5c5dc33 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Mon, 30 May 2022 09:30:43 +0200 Subject: [PATCH 10/10] adjusted whitespaces --- conf/modules.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 7cdd29651..39d8b6d1d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -906,7 +906,7 @@ process{ // CNVKIT_TUMORONLY - withName: 'CNVKIT_ANTITARGET' { + withName: 'CNVKIT_ANTITARGET' { ext.when = { params.tools && params.tools.contains('cnvkit') } publishDir = [ mode: params.publish_dir_mode, @@ -915,7 +915,7 @@ process{ ] } - withName: 'CNVKIT_REFERENCE' { + withName: 'CNVKIT_REFERENCE' { ext.prefix = "cnvkit" ext.when = { params.tools && params.tools.contains('cnvkit') } publishDir = [