diff --git a/CHANGELOG.md b/CHANGELOG.md index 4643bab21..350549bcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#533](https://github.com/nf-core/sarek/pull/533) - Add param `--only_paired_variant_calling` to allow skipping of germline variantcalling for paired samples - [#536](https://github.com/nf-core/sarek/pull/536) - Add `--step markduplicates` to start from duplicate marking, `--step prepare_recalibration` now ONLY starts at process `BaseRecalibrator` & adding `bam` and `cram` input support for `--step` `markduplicates`, `prepare_recalibration`, `recalibrate`, and `variant_calling` - [#538](https://github.com/nf-core/sarek/pull/538) - Add param `--seq_platform`, default: `ILLUMINA` +- [#545](https://github.com/nf-core/sarek/pull/545) - Add modules and subworkflows for `cnvkit` tumor_only mode ### Changed @@ -73,7 +74,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#513](https://github.com/nf-core/sarek/pull/513), [#527](https://github.com/nf-core/sarek/pull/527) - CNV is back - [#529](https://github.com/nf-core/sarek/pull/529) - Do not save `versions.yml` files - [#524](https://github.com/nf-core/sarek/pull/524) - Fix intervals usage by counting the actual list of scatter/gather files produced and not overall number of intervals -- [#549](https://github.com/nf-core/sarek/pull/549) - Fix unique lanes required for Freebayes: issue [#311](https://github.com/nf-core/sarek/issues/311), replaces `meta.clone()` with actual copy of map to avoid issues with https://nfcore.slack.com/archives/C027CM7P08M/p1644241819942339 +- [#549](https://github.com/nf-core/sarek/pull/549) - Fix unique lanes required for Freebayes: issue [#311](https://github.com/nf-core/sarek/issues/311), replaces `meta.clone()` with actual copy of map to avoid issues with ### Deprecated diff --git a/conf/modules.config b/conf/modules.config index 5e447ce78..39d8b6d1d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -904,6 +904,38 @@ process{ } } + // CNVKIT_TUMORONLY + + withName: 'CNVKIT_ANTITARGET' { + ext.when = { params.tools && params.tools.contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*{bed}" + ] + } + + withName: 'CNVKIT_REFERENCE' { + ext.prefix = "cnvkit" + ext.when = { params.tools && params.tools.contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*{cnn}" + ] + } + + withName: 'CNVKIT_BATCH_TUMORONLY' { + ext.args = { params.wes ? "--method hybrid --diagram --scatter" : "--method wgs --diagram --scatter" } + ext.when = { params.tools && params.tools.contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/${meta.id}/cnvkit" }, + pattern: "*{bed,cnn,cnr,cns,pdf,png}" + ] + } + + //FREEBAYES withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' { ext.args = "--pooled-continuous \ diff --git a/modules.json b/modules.json index 02497f14d..cfb9facba 100644 --- a/modules.json +++ b/modules.json @@ -30,8 +30,14 @@ "cat/fastq": { "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" }, + "cnvkit/antitarget": { + "git_sha": "58c5ec2f4eb44eff1fb7cc9e02df07bd448c6aaf" + }, "cnvkit/batch": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "9096be5464d800156761b7c73c574f5a5bad2f72" + }, + "cnvkit/reference": { + "git_sha": "8208140d21f3a754fff2e177db7a0e570fa2af6e" }, "controlfreec/assesssignificance": { "git_sha": "9ae34a01d1747019fd37753ff4cafb05aec35a2b" diff --git a/modules/nf-core/modules/cnvkit/antitarget/main.nf b/modules/nf-core/modules/cnvkit/antitarget/main.nf new file mode 100644 index 000000000..bf6461bdb --- /dev/null +++ b/modules/nf-core/modules/cnvkit/antitarget/main.nf @@ -0,0 +1,36 @@ +process CNVKIT_ANTITARGET { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0': + 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + + input: + tuple val(meta), path(targets) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + cnvkit.py \\ + antitarget \\ + $targets \\ + --output ${prefix}.antitarget.bed \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cnvkit/antitarget/meta.yml b/modules/nf-core/modules/cnvkit/antitarget/meta.yml new file mode 100644 index 000000000..28a2bfa40 --- /dev/null +++ b/modules/nf-core/modules/cnvkit/antitarget/meta.yml @@ -0,0 +1,44 @@ +name: cnvkit_antitarget +description: +keywords: + - cvnkit + - antitarget +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. + It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + tool_dev_url: "https://github.com/etal/cnvkit" + doi: 10.1371/journal.pcbi.1004873 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - targets: + type: file + description: File containing genomic regions + pattern: "*.{bed}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: File containing off-target regions + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@SusiJo" diff --git a/modules/nf-core/modules/cnvkit/batch/main.nf b/modules/nf-core/modules/cnvkit/batch/main.nf index 7c44d9f61..c1f9ef87b 100644 --- a/modules/nf-core/modules/cnvkit/batch/main.nf +++ b/modules/nf-core/modules/cnvkit/batch/main.nf @@ -2,10 +2,10 @@ process CNVKIT_BATCH { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::cnvkit=0.9.9' : null) + conda (params.enable_conda ? 'bioconda::cnvkit=0.9.9 bioconda::samtools=1.15.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0' : - 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:304d1c5ab610f216e77c61420ebe85f1e7c5968a-0' : + 'quay.io/biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:304d1c5ab610f216e77c61420ebe85f1e7c5968a-0' }" input: tuple val(meta), path(tumor), path(normal) @@ -18,6 +18,8 @@ process CNVKIT_BATCH { tuple val(meta), path("*.cnn"), emit: cnn, optional: true tuple val(meta), path("*.cnr"), emit: cnr, optional: true tuple val(meta), path("*.cns"), emit: cns, optional: true + tuple val(meta), path("*.pdf"), emit: pdf, optional: true + tuple val(meta), path("*.png"), emit: png, optional: true path "versions.yml" , emit: versions when: @@ -25,21 +27,39 @@ process CNVKIT_BATCH { script: def args = task.ext.args ?: '' - def normal_args = normal ? "--normal $normal" : "" - def fasta_args = fasta ? "--fasta $fasta" : "" - def reference_args = reference ? "--reference $reference" : "" - def target_args = "" - if (args.contains("--method wgs") || args.contains("-m wgs")) { - target_args = targets ? "--targets $targets" : "" - } - else { - target_args = "--targets $targets" + // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow + // input pair is assumed to have same extension if both exist + def is_cram = tumor.Extension == "cram" ? true : false + def tumor_out = is_cram ? tumor.BaseName + ".bam" : "${tumor}" + + // do not run samtools on normal samples in tumor_only mode + def normal_exists = normal ? true: false + // tumor_only mode does not need fasta & target + // instead it requires a pre-computed reference.cnn which is built from fasta & target + def (normal_out, normal_args, fasta_args) = ["", "", ""] + + if (normal_exists){ + def normal_prefix = normal.BaseName + normal_out = is_cram ? "${normal_prefix}" + ".bam" : "${normal}" + normal_args = normal_prefix ? "--normal $normal_out" : "" + fasta_args = fasta ? "--fasta $fasta" : "" } + + def target_args = targets ? "--targets $targets" : "" + def reference_args = reference ? "--reference $reference" : "" + """ + if $is_cram; then + samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out + if $normal_exists; then + samtools view -T $fasta $normal -@ $task.cpus -o $normal_out + fi + fi + cnvkit.py \\ batch \\ - $tumor \\ + $tumor_out \\ $normal_args \\ $fasta_args \\ $reference_args \\ diff --git a/modules/nf-core/modules/cnvkit/batch/meta.yml b/modules/nf-core/modules/cnvkit/batch/meta.yml index 474c55f21..2cd675c77 100644 --- a/modules/nf-core/modules/cnvkit/batch/meta.yml +++ b/modules/nf-core/modules/cnvkit/batch/meta.yml @@ -11,27 +11,6 @@ tools: homepage: https://cnvkit.readthedocs.io/en/stable/index.html documentation: https://cnvkit.readthedocs.io/en/stable/index.html licence: ["Apache-2.0"] -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - meta: type: map @@ -49,7 +28,7 @@ input: - fasta: type: file description: | - Input reference genome fasta file + Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) - targetfile: type: file description: | @@ -80,6 +59,14 @@ output: type: file description: File containing copy number segment information pattern: "*.{cns}" + - pdf: + type: file + description: File with plot of copy numbers or segments on chromosomes + pattern: "*.{pdf}" + - png: + type: file + description: File with plot of bin-level log2 coverages and segmentation calls + pattern: "*.{png}" - versions: type: file description: File containing software versions @@ -91,3 +78,4 @@ authors: - "@drpatelh" - "@fbdtemme" - "@lassefolkersen" + - "@SusiJo" diff --git a/modules/nf-core/modules/cnvkit/reference/main.nf b/modules/nf-core/modules/cnvkit/reference/main.nf new file mode 100644 index 000000000..10458f278 --- /dev/null +++ b/modules/nf-core/modules/cnvkit/reference/main.nf @@ -0,0 +1,40 @@ +process CNVKIT_REFERENCE { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0': + 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + + input: + path fasta + path targets + path antitargets + + output: + path "*.cnn" , emit: cnn + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: targets.BaseName + + """ + cnvkit.py \\ + reference \\ + --fasta $fasta \\ + --targets $targets \\ + --antitargets $antitargets \\ + --output ${prefix}.reference.cnn \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/cnvkit/reference/meta.yml b/modules/nf-core/modules/cnvkit/reference/meta.yml new file mode 100644 index 000000000..2e0fef1aa --- /dev/null +++ b/modules/nf-core/modules/cnvkit/reference/meta.yml @@ -0,0 +1,47 @@ +name: cnvkit_reference +description: +keywords: + - cnvkit + - reference +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. + It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + tool_dev_url: https://github.com/etal/cnvkit + doi: 10.1371/journal.pcbi.1004873 + licence: ["Apache-2.0"] + +input: + - fasta: + type: file + description: File containing reference genome + pattern: "*.{fasta}" + - targets: + type: file + description: File containing genomic regions + pattern: "*.{bed}" + - antitargets: + type: file + description: File containing off-target genomic regions + pattern: "*.{bed}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference: + type: file + description: File containing a copy-number reference (required for CNV calling in tumor_only mode) + pattern: "*.{cnn}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@SusiJo" diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index 99aa57430..338a2a32c 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -8,6 +8,7 @@ include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf- include { RUN_MANTA_TUMORONLY } from '../nf-core/variantcalling/manta/tumoronly/main.nf' include { RUN_STRELKA_SINGLE } from '../nf-core/variantcalling/strelka/single/main.nf' include { RUN_CONTROLFREEC_TUMORONLY } from '../nf-core/variantcalling/controlfreec/tumoronly/main.nf' +include { RUN_CNVKIT_TUMORONLY } from '../nf-core/variantcalling/cnvkit/tumoronly/main.nf' workflow TUMOR_ONLY_VARIANT_CALLING { take: @@ -83,6 +84,20 @@ workflow TUMOR_ONLY_VARIANT_CALLING { ch_versions = ch_versions.mix(RUN_CONTROLFREEC_TUMORONLY.out.versions) } + if(tools.contains('cnvkit')){ + cram_recalibrated_cnvkit = cram_recalibrated + .map{ meta, cram, crai -> + [meta, cram, []] + } + + RUN_CNVKIT_TUMORONLY ( cram_recalibrated_cnvkit, + fasta, + intervals_bed_combined, + [] ) + + ch_versions = ch_versions.mix(RUN_CNVKIT_TUMORONLY.out.versions) + } + if (tools.contains('freebayes')){ // Remap channel for Freebayes cram_recalibrated_intervals_freebayes = cram_recalibrated_intervals diff --git a/subworkflows/nf-core/variantcalling/cnvkit/main.nf b/subworkflows/nf-core/variantcalling/cnvkit/main.nf deleted file mode 100644 index e69de29bb..000000000 diff --git a/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf new file mode 100644 index 000000000..f9ae82881 --- /dev/null +++ b/subworkflows/nf-core/variantcalling/cnvkit/tumoronly/main.nf @@ -0,0 +1,40 @@ +// +// CNV calling TUMOR_ONLY +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + + +include {CNVKIT_ANTITARGET } from '../../../../../modules/nf-core/modules/cnvkit/antitarget/main' +include {CNVKIT_REFERENCE } from '../../../../../modules/nf-core/modules/cnvkit/reference/main' +include {CNVKIT_BATCH as CNVKIT_BATCH_TUMORONLY } from '../../../../../modules/nf-core/modules/cnvkit/batch/main' + +workflow RUN_CNVKIT_TUMORONLY { + take: + cram_recalibrated // channel: [mandatory] cram tumor + fasta // channel: [mandatory] fasta + targets // channel: [mandatory] bed + reference // channel: [] cnn + + main: + ch_versions = Channel.empty() + + // prepare a reference for tumor_only mode based on target_baits + + CNVKIT_ANTITARGET(targets.map{ it -> [[id:it[0].baseName], it] }) + + CNVKIT_REFERENCE(fasta, targets, CNVKIT_ANTITARGET.out.bed.map{ meta,bed -> [bed]} ) + + // use reference for calling CNVs + // cram_input needs the fasta reference genome for bam_conversion + + CNVKIT_BATCH_TUMORONLY(cram_recalibrated, fasta, [], CNVKIT_REFERENCE.out.cnn) + + ch_versions = ch_versions.mix(CNVKIT_ANTITARGET.out.versions) + ch_versions = ch_versions.mix(CNVKIT_REFERENCE.out.versions) + ch_versions = ch_versions.mix(CNVKIT_BATCH_TUMORONLY.out.versions) + + emit: + versions = ch_versions // channel: [ versions.yml ] + +}