diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index c6643b83ab..f20cbe761d 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -35,7 +35,7 @@ A clear and concise description of what you expected to happen. - Engine: - version: -- Image tag: +- Image tag: ## Additional context diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml index dea8627539..dbff1a684a 100644 --- a/.github/markdownlint.yml +++ b/.github/markdownlint.yml @@ -1,10 +1,6 @@ # Markdownlint configuration file default: true, line-length: false -no-multiple-blanks: 0 -blanks-around-headers: false -blanks-around-lists: false -header-increment: false no-duplicate-header: siblings_only: true no-inline-html: diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index b73045800a..76ce81f6aa 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -1,5 +1,6 @@ -name: sarek branch protection -# This workflow is triggered on PRs to master branch on the repository +name: nf-core branch protection +# This workflow is triggered on PRs to `master` branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` on: pull_request: branches: @@ -7,9 +8,9 @@ on: jobs: test: - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: # PRs are only ok if coming from an nf-core `dev` branch or a fork `patch` branch - name: Check PRs run: | - { [[ $(git remote get-url origin) == *nf-core/sarek ]] && [[ ${GITHUB_HEAD_REF} = "dev" ]]; } || [[ ${GITHUB_HEAD_REF} == "patch" ]] \ No newline at end of file + { [[ $(git remote get-url origin) == *nf-core/sarek ]] && [[ ${GITHUB_HEAD_REF} = "dev" ]]; } || [[ ${GITHUB_HEAD_REF} == "patch" ]] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 256f046210..54fb21715e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,61 +1,74 @@ -name: sarek CI +name: nf-core CI # This workflow is triggered on pushes and PRs to the repository. -on: [push, pull_request] +# It runs the pipeline with the minimal test dataset to check that it completes without any syntax errors. +on: [push, pull_request] jobs: test: - runs-on: ubuntu-18.04 + env: + NXF_VER: ${{ matrix.nxf_ver }} + NXF_ANSI_LOG: false + runs-on: ubuntu-latest strategy: matrix: + # Nextflow versions: check pipeline minimum and current latest nxf_ver: ['19.10.0', ''] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Install Nextflow run: | - export NXF_VER=${{ matrix.nxf_ver }} wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ - - name: Download and tag image + - name: Pull docker image run: | - docker pull nfcore/sarek:dev && docker tag nfcore/sarek:dev nfcore/sarek:dev + docker pull nfcore/sarek:dev + docker tag nfcore/sarek:dev nfcore/sarek:dev - name: Run test - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker + run: nextflow run ${GITHUB_WORKSPACE} -profile test,docker + annotation: - runs-on: ubuntu-18.04 + env: + NXF_ANSI_LOG: false + runs-on: ubuntu-latest strategy: matrix: tools: [snpeff] species: [GRCh37] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ env: + # Only check Nextflow pipeline minimum version for other C NXF_VER: '19.10.0' - - name: Download and tag images + - name: Pull docker image run: | - docker pull nfcore/sarek:dev && docker tag nfcore/sarek:dev nfcore/sarek:dev + docker pull nfcore/sarek:dev + docker tag nfcore/sarek:dev nfcore/sarek:dev docker pull nfcore/sarek${{ matrix.tools }}:dev.${{ matrix.species }} docker tag nfcore/sarek${{ matrix.tools }}:dev.${{ matrix.species }} nfcore/sarek${{ matrix.tools }}:dev.${{ matrix.species }} - name: Run annotation test - run: | - nextflow run . -profile test_annotation,docker --verbose --tools ${{ matrix.tools }} + run: nextflow run . -profile test_annotation,docker --verbose --tools ${{ matrix.tools }} + germline: - runs-on: ubuntu-18.04 + env: + NXF_ANSI_LOG: false + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ env: + # Only check Nextflow pipeline minimum version for other C NXF_VER: '19.10.0' - - name: Download and tag image + - name: Pull docker image run: | - docker pull nfcore/sarek:dev && docker tag nfcore/sarek:dev nfcore/sarek:dev + docker pull nfcore/sarek:dev + docker tag nfcore/sarek:dev nfcore/sarek:dev - name: Get test data run: | git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data @@ -64,61 +77,73 @@ jobs: nextflow run . -profile test,docker --input data/testdata/tiny/normal nextflow run . -profile test,docker --input=false --step recalibrate -resume nextflow run . -profile test,docker --input=false --step variantCalling + minimal: - runs-on: ubuntu-18.04 + env: + NXF_ANSI_LOG: false + runs-on: ubuntu-latest strategy: matrix: genome: [smallerGRCh37, minimalGRCh37] intervals: [--no_intervals, ''] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ env: + # Only check Nextflow pipeline minimum version for other C NXF_VER: '19.10.0' - - name: Download and tag image + - name: Pull docker image run: | - docker pull nfcore/sarek:dev && docker tag nfcore/sarek:dev nfcore/sarek:dev + docker pull nfcore/sarek:dev + docker tag nfcore/sarek:dev nfcore/sarek:dev - name: Run test for minimal genomes - run: | - nextflow run . -profile test,docker --skipQC all --verbose --genome ${{ matrix.genome }} ${{ matrix.intervals }} --tools Manta,mpileup,Strelka + run: nextflow run . -profile test,docker --skipQC all --verbose --genome ${{ matrix.genome }} ${{ matrix.intervals }} --tools Manta,mpileup,Strelka + profile: - runs-on: ubuntu-18.04 + env: + NXF_ANSI_LOG: false + runs-on: ubuntu-latest strategy: matrix: - profile: [test_splitfastq, test_targeted] + profile: [test_splitfastq, test_targeted, test_trimming] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ env: + # Only check Nextflow pipeline minimum version for other C NXF_VER: '19.10.0' - - name: Download and tag image + - name: Pull docker image run: | - docker pull nfcore/sarek:dev && docker tag nfcore/sarek:dev nfcore/sarek:dev + docker pull nfcore/sarek:dev + docker tag nfcore/sarek:dev nfcore/sarek:dev - name: Run ${{ matrix.profile }} test - run: | - nextflow run . -profile ${{ matrix.profile }},docker --verbose + run: nextflow run . -profile ${{ matrix.profile }},docker --verbose + tools: - runs-on: ubuntu-18.04 + env: + NXF_ANSI_LOG: false + runs-on: ubuntu-latest strategy: matrix: tool: [Haplotypecaller, Freebayes, Manta, mpileup, Strelka, TIDDIT] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ env: + # Only check Nextflow pipeline minimum version for other C NXF_VER: '19.10.0' - - name: Download and tag image + - name: Pull docker image run: | - docker pull nfcore/sarek:dev && docker tag nfcore/sarek:dev nfcore/sarek:dev + docker pull nfcore/sarek:dev + docker tag nfcore/sarek:dev nfcore/sarek:dev - name: Run ${{ matrix.tool }} test - run: | - nextflow run . -profile test_tool,docker --verbose --tools ${{ matrix.tool }} + run: nextflow run . -profile test_tool,docker --verbose --tools ${{ matrix.tool }} \ No newline at end of file diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8a1cb0f907..1e0827a800 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,38 +1,39 @@ -name: sarek linting +name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -on: [push, pull_request] +# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +on: + push: + pull_request: + release: + types: [published] jobs: Markdown: - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - uses: actions/setup-node@v1 with: node-version: '10' - name: Install markdownlint - run: | - npm install -g markdownlint-cli + run: npm install -g markdownlint-cli - name: Run Markdownlint - run: | - markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml + run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml YAML: - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - uses: actions/setup-node@v1 with: node-version: '10' - - name: Install yamllint - run: | - npm install -g yaml-lint - - name: Run yamllint - run: | - yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml") + - name: Install yaml-lint + run: npm install -g yaml-lint + - name: Run yaml-lint + run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml") nf-core: - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -41,13 +42,9 @@ jobs: with: python-version: '3.6' architecture: 'x64' - - name: Install pip + - name: Install dependencies run: | - sudo apt install python3-pip - pip install --upgrade pip - - name: Install nf-core tools - run: | - pip install --upgrade --force-reinstall git+https://github.com/nf-core/tools.git@dev + python -m pip install --upgrade pip + pip install nf-core - name: Run nf-core lint - run: | - nf-core lint ${GITHUB_WORKSPACE} \ No newline at end of file + run: nf-core lint ${GITHUB_WORKSPACE} diff --git a/.gitignore b/.gitignore index 96675d0154..d4c81ee6b5 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,6 @@ data/ references/ results/ .DS_Store -tests/test_data +tests/ +testing/ *.pyc diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c304296f7..2823a1c30e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## dev ### `Added` + - [#117](https://github.com/nf-core/sarek/pull/117) - Add `Trim Galore` possibilities to Sarek - [#76](https://github.com/nf-core/sarek/pull/76) - Add `GATK Spark` possibilities to Sarek - [#87](https://github.com/nf-core/sarek/pull/87) - Add `GATK BaseRecalibrator` plot to `MultiQC` report @@ -22,12 +23,17 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - [#107](https://github.com/nf-core/sarek/pull/107) - Switch params to snake_case - [#109](https://github.com/nf-core/sarek/pull/109) - Update publication with F1000Research preprint - [#113](https://github.com/nf-core/sarek/pull/113) - Move social preview image +- [#120](https://github.com/nf-core/sarek/pull/120) - Sync TEMPLATE +- [#121](https://github.com/nf-core/sarek/pull/121) - Update `MultiQC` to `1.8` +- [#126](https://github.com/nf-core/sarek/pull/126) - Update docs ### `Fixed` - [#83](https://github.com/nf-core/sarek/pull/83) - Fix some typos in `docs/input.md` - [#107](https://github.com/nf-core/sarek/pull/107) - Fix linting - [#110](https://github.com/nf-core/sarek/pull/110) - Fix `snpEff` report issue cf [#106](https://github.com/nf-core/sarek/issues/106) +- [#126](https://github.com/nf-core/sarek/pull/126) - Fix `iGenomes` paths +- [#127](https://github.com/nf-core/sarek/pull/127), [#128](https://github.com/nf-core/sarek/pull/128) - Fix `ASCAT` ### `Deprecated` diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 09226d0d8d..cf930c8acf 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-co.re/join/slack). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/Dockerfile b/Dockerfile index 350ec57cf7..8ab1767242 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,13 @@ -FROM nfcore/base:dev +FROM nfcore/base:1.9 LABEL authors="Maxime Garcia, Szilveszter Juhos" \ - description="Docker image containing all requirements for nf-core/sarek pipeline" + description="Docker image containing all software requirements for the nf-core/sarek pipeline" +# Install the conda environment COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-sarek-dev/bin:$PATH + +# Dump the details of the installed packages to a file for posterity RUN conda env export --name nf-core-sarek-dev > nf-core-sarek-dev.yml -ENV PATH /opt/conda/envs/nf-core-sarek-dev/bin:$PATH \ No newline at end of file diff --git a/README.md b/README.md index fc30d05aeb..5fefc7d0df 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,5 @@ # [![Sarek](docs/images/nf-core_sarek_logo.png "Sarek")](https://nf-co.re/sarek) -> **An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing** - [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.10.0-brightgreen.svg)](https://www.nextflow.io/) [![nf-core](https://img.shields.io/badge/nf--core-pipeline-brightgreen.svg)](https://nf-co.re/) [![DOI](https://zenodo.org/badge/184289291.svg)](https://zenodo.org/badge/latestdoi/184289291) @@ -20,11 +18,8 @@ Sarek is a workflow designed to run analyses on whole genome or targeted sequencing data from regular samples or tumour / normal pairs and could include additional relapses. -It's built using [Nextflow](https://www.nextflow.io), -a domain specific language for workflow building, -across multiple compute infrastructures in a very portable manner. -Software dependencies are handled using [Conda](https://conda.io/), [Docker](https://www.docker.com) or [Singularity](https://www.sylabs.io/singularity/) - environment/container technologies that provide excellent reproducibility and ease of use. -Thus making installation trivial and results highly reproducible. +It's built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. +It comes with docker containers making installation trivial and results highly reproducible.

@@ -32,6 +27,30 @@ Thus making installation trivial and results highly reproducible. It's listed on the [Elixir - Tools and Data Services Registry](https://bio.tools/Sarek), [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek) and [omicX - Bioinformatics tools](https://omictools.com/sarek-tool). +## Quick Start + +i. Install [`Nextflow`](https://nf-co.re/usage/installation) + +ii. Install either [`Docker`](https://docs.docker.com/engine/installation/) or [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) for full pipeline reproducibility (please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles)) + +iii. Download the pipeline and test it on a minimal dataset with a single command + +```bash +nextflow run nf-core/sarek -profile test, +``` + +> Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. +> If so, you can simply use `-profile ` in your command. +> This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + +iv. Start running your own analysis! + +```bash +nextflow run nf-core/sarek -profile --input '*.tsv' --genome GRCh38 +``` + +See [usage docs](docs/usage.md) for all of the available options when running the pipeline. + ## Documentation The nf-core/sarek pipeline comes with documentation about the pipeline, found in the `docs/` directory: @@ -88,7 +107,7 @@ For further information or help, don't hesitate to get in touch on [Slack](https * [CHANGELOG](CHANGELOG.md) -## Aknowledgements +## Acknowledgements [![Barntumörbanken](docs/images/BTB_logo.png)](https://ki.se/forskning/barntumorbanken-0) | [![SciLifeLab](docs/images/SciLifeLab_logo.png)](https://scilifelab.se) :-:|:-: diff --git a/assets/email_template.html b/assets/email_template.html index 20a5dd5da4..1d069c4254 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -11,6 +11,8 @@

+ +

nf-core/sarek v${version}

Run Name: $runName

diff --git a/assets/email_template.txt b/assets/email_template.txt index 76e9ae7e57..2be647eac1 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -1,6 +1,12 @@ -======================================== - nf-core/sarek v${version} -======================================== +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/sarek v${version} +---------------------------------------------------- + Run Name: $runName <% if (success){ diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 3aeb29ba10..b4974b12c4 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -7,8 +7,10 @@ report_comment: > analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: - nf-core/sarek-software-versions: + software_versions: order: -1000 + nf-core-sarek-summary: + order: -1001 export_plots: true diff --git a/assets/nf-core-sarek_logo.png b/assets/nf-core-sarek_logo.png new file mode 100644 index 0000000000..8f8fcad5be Binary files /dev/null and b/assets/nf-core-sarek_logo.png differ diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index 2d67122006..40adeeb492 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -8,6 +8,23 @@ Content-Type: text/html; charset=utf-8 $email_html +--nfcoremimeboundary +Content-Type: image/png;name="nf-core-sarek_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="nf-core-sarek_logo.png" + +<% out << new File("$baseDir/assets/nf-core-sarek_logo.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + <% if (mqcFile){ def mqcFileObj = new File("$mqcFile") diff --git a/bin/markdown_to_html.py b/bin/markdown_to_html.py new file mode 100755 index 0000000000..57cc4263fe --- /dev/null +++ b/bin/markdown_to_html.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +from __future__ import print_function +import argparse +import markdown +import os +import sys + +def convert_markdown(in_fn): + input_md = open(in_fn, mode="r", encoding="utf-8").read() + html = markdown.markdown( + "[TOC]\n" + input_md, + extensions = [ + 'pymdownx.extra', + 'pymdownx.b64', + 'pymdownx.highlight', + 'pymdownx.emoji', + 'pymdownx.tilde', + 'toc' + ], + extension_configs = { + 'pymdownx.b64': { + 'base_path': os.path.dirname(in_fn) + }, + 'pymdownx.highlight': { + 'noclasses': True + }, + 'toc': { + 'title': 'Table of Contents' + } + } + ) + return html + +def wrap_html(contents): + header = """ + + + + + +
+ """ + footer = """ +
+ + + """ + return header + contents + footer + + +def parse_args(args=None): + parser = argparse.ArgumentParser() + parser.add_argument('mdfile', type=argparse.FileType('r'), nargs='?', + help='File to convert. Defaults to stdin.') + parser.add_argument('-o', '--out', type=argparse.FileType('w'), + default=sys.stdout, + help='Output file name. Defaults to stdout.') + return parser.parse_args(args) + +def main(args=None): + args = parse_args(args) + converted_md = convert_markdown(args.mdfile.name) + html = wrap_html(converted_md) + args.out.write(html) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/bin/markdown_to_html.r b/bin/markdown_to_html.r deleted file mode 100755 index abe1335070..0000000000 --- a/bin/markdown_to_html.r +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env Rscript - -# Command line argument processing -args = commandArgs(trailingOnly=TRUE) -if (length(args) < 2) { - stop("Usage: markdown_to_html.r ", call.=FALSE) -} -markdown_fn <- args[1] -output_fn <- args[2] - -# Load / install packages -if (!require("markdown")) { - install.packages("markdown", dependencies=TRUE, repos='http://cloud.r-project.org/') - library("markdown") -} - -base_css_fn <- getOption("markdown.HTML.stylesheet") -base_css <- readChar(base_css_fn, file.info(base_css_fn)$size) -custom_css <- paste(base_css, " -body { - padding: 3em; - margin-right: 350px; - max-width: 100%; -} -#toc { - position: fixed; - right: 20px; - width: 300px; - padding-top: 20px; - overflow: scroll; - height: calc(100% - 3em - 20px); -} -#toc_header { - font-size: 1.8em; - font-weight: bold; -} -#toc > ul { - padding-left: 0; - list-style-type: none; -} -#toc > ul ul { padding-left: 20px; } -#toc > ul > li > a { display: none; } -img { max-width: 800px; } -") - -markdownToHTML( - file = markdown_fn, - output = output_fn, - stylesheet = custom_css, - options = c('toc', 'base64_images', 'highlight_code') -) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index f5ab066ebd..ba53181640 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -51,14 +51,17 @@ # Search each file using its regex for k, v in regexes.items(): - with open(v[0]) as x: - versions = x.read() - match = re.search(v[1], versions) - if match: - results[k] = "v{}".format(match.group(1)) + try: + with open(v[0]) as x: + versions = x.read() + match = re.search(v[1], versions) + if match: + results[k] = "v{}".format(match.group(1)) + except IOError: + results[k] = False # Remove software set to false in results -for k in results: +for k in list(results): if not results[k]: del(results[k]) diff --git a/conf/base.config b/conf/base.config index 9a24833d8b..ee3bfa7604 100644 --- a/conf/base.config +++ b/conf/base.config @@ -15,7 +15,7 @@ process { time = {check_resource(24.h * task.attempt)} shell = ['/bin/bash', '-euo', 'pipefail'] - errorStrategy = {task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish'} + errorStrategy = {task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxErrors = '-1' maxRetries = 3 @@ -44,11 +44,14 @@ process { withLabel:memory_singleCPU_task_sq { memory = {check_resource((params.single_cpu_mem as nextflow.util.MemoryUnit) * task.attempt * task.attempt)} } - withLabel:memory_max { memory = {params.max_memory} } + withName:get_software_versions { + cache = false + } + withName:ConcatVCF { // For unknown reasons, ConcatVCF sometimes fails with SIGPIPE // (exit code 141). Rerunning the process will usually work. diff --git a/conf/igenomes.config b/conf/igenomes.config index 096c95e2fb..8b6277f40d 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -11,7 +11,7 @@ params { genomes { 'GRCh37' { ac_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci" - ac_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci.gc" + ac_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/1000G_phase3_20130502_SNP_maf0.3.loci.gc" bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}" chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Chromosomes" chr_length = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Length/human_g1k_v37_decoy.len" @@ -31,7 +31,7 @@ params { } 'GRCh38' { ac_loci = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci" - ac_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci.gc" + ac_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci.gc" bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}" chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes" chr_length = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" diff --git a/conf/test.config b/conf/test.config index e7144d8cd1..a21421087c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -4,18 +4,21 @@ * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run nf-core/sarek -profile test + * nextflow run nf-core/sarek -profile test, */ params { config_profile_description = 'Minimal test dataset to check pipeline function' config_profile_name = 'Test profile' + // Limit resources so that this can run on GitHub Actions max_cpus = 2 max_memory = 6.GB max_time = 48.h + // Input data input = 'https://github.com/nf-core/test-datasets/raw/sarek/testdata/tsv/tiny-manta-https.tsv' + // Small reference genome igenomes_ignore = true genome = 'smallGRCh37' diff --git a/conf/test_trimming.config b/conf/test_trimming.config new file mode 100644 index 0000000000..8192ee4b79 --- /dev/null +++ b/conf/test_trimming.config @@ -0,0 +1,18 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run nf-core/sarek -profile test + */ + +includeConfig 'test.config' + +params { + trim_fastq = true + clip_r1 = 1 + clip_r2 = 1 + three_prime_clip_r1 = 1 + three_prime_clip_r2 = 1 +} \ No newline at end of file diff --git a/docs/images/nf-core-sarek_logo.png b/docs/images/nf-core-sarek_logo.png new file mode 100644 index 0000000000..5c8a124aa1 Binary files /dev/null and b/docs/images/nf-core-sarek_logo.png differ diff --git a/docs/output.md b/docs/output.md index 1642d8b216..a915674114 100644 --- a/docs/output.md +++ b/docs/output.md @@ -4,7 +4,7 @@ This document describes the output produced by the pipeline. ## Pipeline overview -The pipeline processes data using the following steps: +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [Preprocessing](#preprocessing) - [Map to Reference](#map-to-reference) diff --git a/docs/usage.md b/docs/usage.md index 1c38de9bb9..9c9dd45a73 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,7 +6,7 @@ - [Reproducibility](#reproducibility) - [Main arguments](#main-arguments) - [-profile](#-profile) - - [--input](#--input) + - [`--reads`](#--reads) - [--split_fastq](#--split_fastq) - [--trim_fastq](#--trim_fastq) - [--clip_r1](#--clip_r1) @@ -40,6 +40,8 @@ - [--ac_loci_gc](#--ac_loci_gc) - [--acLociGC](#--aclocigc) - [--bwa](#--bwa) + - [--ascat_ploidy](#--ascat_ploidy) + - [--ascat_purity](#--ascat_purity) - [--bwaIndex](#--bwaindex) - [--chr_dir](#--chr_dir) - [--chrDir](#--chrdir) @@ -79,12 +81,15 @@ - [AWS Batch specific parameters](#aws-batch-specific-parameters) - [--awsqueue](#--awsqueue) - [--awsregion](#--awsregion) + - [--awscli](#--awscli) - [Other command line parameters](#other-command-line-parameters) - [--outdir](#--outdir) - [--publish_dir_mode](#--publish_dir_mode) - [--publishDirMode](#--publishdirmode) - [--sequencing_center](#--sequencing_center) - [--email](#--email) + - [--email_on_fail](#--email_on_fail) + - [--max_multiqc_email_size](#--max_multiqc_email_size) - [-name](#-name) - [-resume](#-resume) - [-c](#-c) @@ -98,8 +103,6 @@ - [--plaintext_email](#--plaintext_email) - [--monochrome_logs](#--monochrome_logs) - [--multiqc_config](#--multiqc_config) - - [--ascat_ploidy](#--ascat_ploidy) - - [--ascat_purity](#--ascat_purity) ## Introduction @@ -156,8 +159,8 @@ It's a good idea to specify a pipeline version when running the pipeline on your This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/sarek releases page](https://github.com/nf-core/sarek/releases) and find the latest version number - numeric only (eg. `2.5.0`). -Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 2.5.0`. +First, go to the [nf-core/sarek releases page](https://github.com/nf-core/sarek/releases) and find the latest version number - numeric only (eg. `2.5.2`). +Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 2.5.2`. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. @@ -165,31 +168,36 @@ This version number will be logged in reports when you run the pipeline, so that ### -profile -Use this parameter to choose a configuration profile. -Profiles can give configuration presets for different compute environments. -Note that multiple profiles can be loaded, for example: `-profile docker` - the order of arguments is important! +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`. +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Conda) - see below. + +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). + +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. -- `awsbatch` - - A generic configuration profile to be used with AWS Batch. -- `conda` - - A generic configuration profile to be used with [conda](https://conda.io/docs/) - - Pulls most software from [Bioconda](https://bioconda.github.io/) - `docker` - A generic configuration profile to be used with [Docker](http://docker.com/) - Pulls software from dockerhub: [`nfcore/sarek`](http://hub.docker.com/r/nfcore/sarek/) - `singularity` - - A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) - Pulls software from DockerHub: [`nfcore/sarek`](http://hub.docker.com/r/nfcore/sarek/) +- `conda` + - Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker or Singularity. + - A generic configuration profile to be used with [conda](https://conda.io/docs/) + - Pulls most software from [Bioconda](https://bioconda.github.io/) - `test` - A profile with a complete configuration for automated testing - Includes links to test data so needs no other parameters -### --input +### `--reads` -Use this to specify the location of your input TSV file, on `mapping`, `recalibrate` and `variantcalling` steps. -For example: +Use this to specify the location of your input FastQ files. For example: ```bash --input sample.tsv @@ -223,25 +231,37 @@ For example: ``` ### --trim_fastq + Use this to perform adapter trimming [Trim Galore](https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md) ### --clip_r1 -Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. + +Instructs Trim Galore to remove a number of bp from the 5' end of read 1 (or single-end reads). +This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. ### --clip_r2 -Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. + +Instructs Trim Galore to remove a number of bp from the 5' end of read 2 (paired-end reads only). +This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. ### --three_prime_clip_r1 -Instructs Trim Galore to remove bp from the 3' end of read 1 (or single-end reads) AFTER adapter/quality trimming has been performed. This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality. + +Instructs Trim Galore to remove a number of bp from the 3' end of read 1 (or single-end reads) AFTER adapter/quality trimming has been performed. +This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality. ### --three_prime_clip_r2 -Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed. This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality. + +Instructs Trim Galore to remove a number of bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed. +This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality. ### --trim_nextseq -This enables the option --nextseq-trim=3'CUTOFF within Cutadapt, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. + +This enables the option `--nextseq-trim=3'CUTOFF` within `Cutadapt`, which will set a quality cutoff (that is normally given with `-q` instead), but qualities of G bases are ignored. +This trimming is in common for the NextSeq and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. ### --save_trimmed -Option to keep trimmed fastqs + +Option to keep trimmed FASTQs ### --sample @@ -750,12 +770,12 @@ If you are likely to be running `nf-core` pipelines regularly it may be a good i Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition below). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. -If you have any questions or issues please send us a message on [Slack](https://nf-core-invite.herokuapp.com/). +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack). ## AWS Batch specific parameters Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. -Please use the `-awsbatch` profile and then specify all of the following parameters. +Please use [`-profile awsbatch`](https://github.com/nf-core/configs/blob/master/conf/awsbatch.config) and then specify all of the following parameters. ### --awsqueue @@ -766,6 +786,10 @@ The JobQueue that you intend to use on AWS Batch. The AWS region to run your job in. Default is set to `eu-west-1` but can be adjusted to your needs. +### --awscli + +The [AWS CLI](https://www.nextflow.io/docs/latest/awscloud.html#aws-cli-installation) path in your custom AMI. Default: `/home/ec2-user/miniconda/bin/aws`. + Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't. ## Other command line parameters @@ -773,7 +797,7 @@ Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a ### --outdir The output directory where the results will be saved. -Default: `results/ +Default: `results/` ## --publish_dir_mode @@ -795,6 +819,14 @@ The sequencing center that will be used in the BAM CN field Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. +### --email_on_fail + +This works exactly as with `--email`, except emails are only sent if the workflow is not successful. + +### --max_multiqc_email_size + +Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB). + ### -name Name for the pipeline run. diff --git a/environment.yml b/environment.yml index ba7dc3eb7a..a268548910 100644 --- a/environment.yml +++ b/environment.yml @@ -6,25 +6,29 @@ channels: - bioconda - defaults dependencies: - - ascat=2.5.2 - - bcftools=1.9 - - bwa=0.7.17 - - cancerit-allelecount=4.0.2 - - control-freec=11.4 - - ensembl-vep=95.2 - - fastqc=0.11.8 - - freebayes=1.2.0 - - gatk4-spark=4.1.4.1 - - genesplicer=1.0 - - htslib=1.9 - - manta=1.5.0 - - multiqc=1.8 - - qualimap=2.2.2b - - samtools=1.9 - - snpeff=4.3.1t - - strelka=2.9.10 - - tiddit=2.7.1 - - trim-galore=0.6.5 - - conda-forge::pigz=2.3.4 - - vcfanno=0.3.1 - - vcftools=0.1.16 + - conda-forge::python=3.7.3 + - conda-forge::markdown=3.1.1 + - conda-forge::pymdown-extensions=6.0 + - conda-forge::pygments=2.5.2 + - bioconda::ascat=2.5.2 + - bioconda::bcftools=1.9 + - bioconda::bwa=0.7.17 + - bioconda::cancerit-allelecount=4.0.2 + - bioconda::control-freec=11.4 + - bioconda::ensembl-vep=95.2 + - bioconda::fastqc=0.11.8 + - bioconda::freebayes=1.2.0 + - bioconda::gatk4-spark=4.1.4.1 + - bioconda::genesplicer=1.0 + - bioconda::htslib=1.9 + - bioconda::manta=1.5.0 + - bioconda::multiqc=1.8 + - bioconda::qualimap=2.2.2b + - bioconda::samtools=1.9 + - bioconda::snpeff=4.3.1t + - bioconda::strelka=2.9.10 + - bioconda::tiddit=2.7.1 + - bioconda::trim-galore=0.6.5 + - bioconda::vcfanno=0.3.1 + - bioconda::vcftools=0.1.16 + - conda-forge::pigz=2.3.4 \ No newline at end of file diff --git a/main.nf b/main.nf index 69efaed427..d3f930be4a 100644 --- a/main.nf +++ b/main.nf @@ -30,98 +30,99 @@ def helpMessage() { nextflow run nf-core/sarek --input sample.tsv -profile docker Mandatory arguments: - --input Path to input TSV file on mapping, recalibrate and variantcalling steps + --input [file] Path to input TSV file on mapping, recalibrate and variantcalling steps Multiple TSV files can be specified with quotes Works also with the path to a directory on mapping step with a single germline sample only Alternatively, path to VCF input file on annotate step Multiple VCF files can be specified with quotes - -profile Configuration profile to use + -profile [str] Configuration profile to use Can use multiple (comma separated) Available: conda, docker, singularity, test and more + --genome [str] Name of iGenomes reference + --step [str] Specify starting step + Available: Mapping, Recalibrate, VariantCalling, Annotate + Default: Mapping Options: - --genome Name of iGenomes reference - --no_gvcf No g.vcf output from HaplotypeCaller - --no_strelka_bp Will not use Manta candidateSmallIndels for Strelka as Best Practice - --no_intervals Disable usage of intervals - --nucleotides_per_second To estimate interval size - Default: 1000.0 - --target_bed Target BED file for targeted or whole exome sequencing - --step Specify starting step - Available: Mapping, Recalibrate, VariantCalling, Annotate - Default: Mapping - --tools Specify tools to use for variant calling: - Available: ASCAT, ControlFREEC, FreeBayes, HaplotypeCaller - Manta, mpileup, Mutect2, Strelka, TIDDIT - and/or for annotation: - snpEff, VEP, merge - Default: None - --skip_qc Specify which QC tools to skip when running Sarek - Available: all, bamQC, BCFtools, FastQC, MultiQC, samtools, vcftools, versions - Default: None - --annotate_tools Specify from which tools Sarek will look for VCF files to annotate, only for step annotate - Available: HaplotypeCaller, Manta, Mutect2, Strelka, TIDDIT - Default: None - --sentieon If sentieon is available, will enable it for preprocessing, and variant calling - Adds the following tools for --tools: DNAseq, DNAscope and TNscope - --annotation_cache Enable the use of cache for annotation, to be used with --snpeff_cache and/or --vep_cache - --snpeff_cache Specity the path to snpEff cache, to be used with --annotation_cache - --vep_cache Specity the path to VEP cache, to be used with --annotation_cache - --pon panel-of-normals VCF (bgzipped, indexed). See: https://software.broadinstitute.org/gatk/documentation/tooldocs/current/org_broadinstitute_hellbender_tools_walkers_mutect_CreateSomaticPanelOfNormals.php - --pon_index index of pon panel-of-normals VCF + --no_gvcf [bool] No g.vcf output from HaplotypeCaller + --no_strelka_bp [bool] Will not use Manta candidateSmallIndels for Strelka as Best Practice + --no_intervals [bool] Disable usage of intervals + --nucleotides_per_second [int] To estimate interval size + Default: 1000.0 + --target_bed [file] Target BED file for targeted or whole exome sequencing + --tools [str] Specify tools to use for variant calling: + Available: ASCAT, ControlFREEC, FreeBayes, HaplotypeCaller + Manta, mpileup, Mutect2, Strelka, TIDDIT + and/or for annotation: + snpEff, VEP, merge + Default: None + --skip_qc [str] Specify which QC tools to skip when running Sarek + Available: all, bamQC, BCFtools, FastQC, MultiQC, samtools, vcftools, versions + Default: None + --annotate_tools [str] Specify from which tools Sarek will look for VCF files to annotate, only for step annotate + Available: HaplotypeCaller, Manta, Mutect2, Strelka, TIDDIT + Default: None + --sentieon [bool] If sentieon is available, will enable it for preprocessing, and variant calling + Adds the following tools for --tools: DNAseq, DNAscope and TNscope + --annotation_cache [bool] Enable the use of cache for annotation, to be used with --snpeff_cache and/or --vep_cache + --snpeff_cache [file] Specity the path to snpEff cache, to be used with --annotation_cache + --vep_cache [file] Specity the path to VEP cache, to be used with --annotation_cache + --pon [file] Panel-of-normals VCF (bgzipped, indexed). See: https://software.broadinstitute.org/gatk/documentation/tooldocs/current/org_broadinstitute_hellbender_tools_walkers_mutect_CreateSomaticPanelOfNormals.php + --pon_index [file] Index of pon panel-of-normals VCF + --ascat_ploidy [bool] Use this parameter together with to overwrite default behavior from ASCAT regarding ploidy. Note: Also requires that --ascat_purity is set. + --ascat_purity [bool] Use this parameter to overwrite default behavior from ASCAT regarding purity. Note: Also requires that --ascat_ploidy is set. Trimming: - --trim_fastq [bool] Run Trim Galore - --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) - --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) - --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed - --three_prime_clip_r2 [int] Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed - --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails - --save_trimmed [bool] Save trimmed FastQ file intermediates - - References If not specified in the configuration file or you wish to overwrite any of the references. - --ac_loci acLoci file - --ac_loci_gc acLoci GC file - --bwa bwa indexes - If none provided, will be generated automatically from the fasta reference - --dbsnp dbsnp file - --dbsnp_index dbsnp index - If none provided, will be generated automatically if a dbsnp file is provided - --dict dict from the fasta reference - If none provided, will be generated automatically from the fasta reference - --fasta fasta reference - --fasta_fai reference index - If none provided, will be generated automatically from the fasta reference - --germline_resource Germline Resource File - --germline_resource_index Germline Resource Index - If none provided, will be generated automatically if a germlineResource file is provided - --intervals intervals - If none provided, will be generated automatically from the fasta reference - Use --no_intervals to disable automatic generation - --known_indels knownIndels file - --known_indels_index knownIndels index - If none provided, will be generated automatically if a knownIndels file is provided - --species species for VEP - --snpeff_db snpeffDb version - --vep_cache_version VEP Cache version + --trim_fastq [bool] Run Trim Galore + --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) + --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) + --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed + --three_prime_clip_r2 [int] Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed + --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails + --save_trimmed [bool] Save trimmed FastQ file intermediates + + References If not specified in the configuration file or you wish to overwrite any of the references. + --ac_loci [file] acLoci file + --ac_loci_gc [file] acLoci GC file + --bwa [file] bwa indexes + If none provided, will be generated automatically from the fasta reference + --dbsnp [file] dbsnp file + --dbsnp_index [file] dbsnp index + If none provided, will be generated automatically if a dbsnp file is provided + --dict [file] dict from the fasta reference + If none provided, will be generated automatically from the fasta reference + --fasta [file] fasta reference + --fasta_fai [file] reference index + If none provided, will be generated automatically from the fasta reference + --germline_resource [file] Germline Resource File + --germline_resource_index Germline Resource Index + [file] if none provided, will be generated automatically if a germlineResource file is provided + --intervals [file] intervals + If none provided, will be generated automatically from the fasta reference + Use --no_intervals to disable automatic generation + --known_indels [file] knownIndels file + --known_indels_index [file] knownIndels index + If none provided, will be generated automatically if a knownIndels file is provided + --species [str] Species for VEP + --snpeff_db [str] snpEff Database version + --vep_cache_version [str] VEP Cache version Other options: - --outdir The output directory where the results will be saved - --publish_dir_mode Mode of publishing data in the output directory. - Available: symlink, rellink, link, copy, copyNoFollow, move - Default: copy - --sequencing_center Name of sequencing center to be displayed in BAM file - --multiqc_config Specify a custom config file for MultiQC - --monochrome_logs Logs will be without colors - --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - --max_multiqc_email_size Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) - -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic - --ascat_ploidy Use this parameter together with to overwrite default behavior from ASCAT regarding ploidy. Note: Also requires that --ascat_purity is set. - --ascat_purity Use this parameter to overwrite default behavior from ASCAT regarding purity. Note: Also requires that --ascat_ploidy is set. + --outdir [file] The output directory where the results will be saved + --publish_dir_mode [str] Mode of publishing data in the output directory. + Available: symlink, rellink, link, copy, copyNoFollow, move + Default: copy + --sequencing_center [str] Name of sequencing center to be displayed in BAM file + --multiqc_config [file] Specify a custom config file for MultiQC + --monochrome_logs [bool] Logs will be without colors + --email [str] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits + --max_multiqc_email_size [str] Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) + -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic AWSBatch options: - --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion The AWS Region for your AWS Batch job to run on + --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch + --awsregion [str] The AWS Region for your AWS Batch job to run on + --awscli [str] Path to the AWS CLI tool """.stripIndent() } @@ -358,18 +359,20 @@ if ((params.ascat_ploidy && !params.ascat_purity) || (!params.ascat_ploidy && pa custom_runName = params.name if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) custom_runName = workflow.runName -if (workflow.profile == 'awsbatch') { +if (workflow.profile.contains('awsbatch')) { // AWSBatch sanity checking if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" // Check outdir paths to be S3 buckets if running on AWSBatch // related: https://github.com/nextflow-io/nextflow/issues/813 if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." + if (params.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." } // Stage config files -ch_output_docs = Channel.fromPath("${baseDir}/docs/output.md") +ch_multiqc_config = file("$baseDir/assets/multiqc_config.yaml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() +ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true) tsvPath = null if (params.input && (hasExtension(params.input, "tsv") || hasExtension(params.input, "vcf") || hasExtension(params.input, "vcf.gz"))) tsvPath = params.input @@ -489,6 +492,7 @@ if (params.target_bed) summary['Target BED'] = params.target_bed if (step) summary['Step'] = step if (params.tools) summary['Tools'] = tools.join(', ') if (params.skip_qc) summary['QC tools skip'] = skipQC.join(', ') + if (params.trim_fastq) { summary['Fastq trim'] = "Fastq trim selected" summary['Trim R1'] = "$params.clip_r1 bp" @@ -498,9 +502,12 @@ if (params.trim_fastq) { summary["NextSeq Trim"] = "$params.trim_nextseq bp" summary['Saved Trimmed Fastq'] = params.saveTrimmed ? 'Yes' : 'No' } + if (params.no_intervals && step != 'annotate') summary['Intervals'] = 'Do not use' if ('haplotypecaller' in tools) summary['GVCF'] = params.no_gvcf ? 'No' : 'Yes' if ('strelka' in tools && 'manta' in tools ) summary['Strelka BP'] = params.no_strelka_bp ? 'No' : 'Yes' +if (params.ascat_purity) summary['ASCAT purity'] = params.ascat_purity +if (params.ascat_ploidy) summary['ASCAT ploidy'] = params.ascat_ploidy if (params.sequencing_center) summary['Sequenced by'] = params.sequencing_center if (params.pon && 'mutect2' in tools) summary['Panel of normals'] = params.pon @@ -535,38 +542,56 @@ if (params.species) summary['species'] = params.sp if (params.snpeff_cache) summary['snpEff_cache'] = params.snpeff_cache if (params.vep_cache) summary['vep_cache'] = params.vep_cache -if (workflow.profile == 'awsbatch') { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue +if (workflow.profile.contains('awsbatch')) { + summary['AWS Region'] = params.awsregion + summary['AWS Queue'] = params.awsqueue + summary['AWS CLI'] = params.awscli } + summary['Config Profile'] = workflow.profile -if (params.config_profile_description) summary['Config Description'] = params.config_profile_description -if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact -if (params.config_profile_url) summary['Config URL'] = params.config_profile_url -if (params.email) { - summary['E-mail Address'] = params.email - summary['MultiQC maxsize'] = params.max_multiqc_email_size +if (params.config_profile_description) summary['Config Description'] = params.config_profile_description +if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact +if (params.config_profile_url) summary['Config URL'] = params.config_profile_url +if (params.email || params.email_on_fail) { + summary['E-mail Address'] = params.email + summary['E-mail on failure'] = params.email_on_fail + summary['MultiQC maxsize'] = params.max_multiqc_email_size } -if (params.ascat_purity) summary['ASCAT purity'] = params.ascat_purity -if (params.ascat_ploidy) summary['ASCAT ploidy'] = params.ascat_ploidy log.info summary.collect { k, v -> "${k.padRight(18)}: $v" }.join("\n") if (params.monochrome_logs) log.info "----------------------------------------------------" -else log.info "\033[2m----------------------------------------------------\033[0m" +else log.info "-\033[2m--------------------------------------------------\033[0m-" if ('mutect2' in tools && !(params.pon)) log.warn "[nf-core/sarek] Mutect2 was requested, but as no panel of normals were given, results will not be optimal" // Check the hostnames against configured profiles checkHostname() -/* - * Parse software version numbers - */ -process GetSoftwareVersions { - publishDir path:"${params.outdir}/pipeline_info", mode: params.publish_dir_mode +Channel.from(summary.collect{ [it.key, it.value] }) + .map { k,v -> "
$k
${v ?: 'N/A'}
" } + .reduce { a, b -> return [a, b].join("\n ") } + .map { x -> """ + id: '{{ cookiecutter.name_noslash }}-summary' + description: " - this information is collected when the pipeline is started." + section_name: '{{ cookiecutter.name }} Workflow Summary' + section_href: 'https://github.com/{{ cookiecutter.name }}' + plot_type: 'html' + data: | +
+ $x +
+ """.stripIndent() } + .set { ch_workflow_summary } + +// Parse software version numbers + +process Get_software_versions { + publishDir path:"${params.outdir}/pipeline_info", mode: params.publish_dir_mode, + saveAs: { it.indexOf(".csv") > 0 ? it : null } output: - file 'software_versions_mqc.yaml' into yamlSoftwareVersion + file 'software_versions_mqc.yaml' into ch_software_versions_yaml + file "software_versions.csv" when: !('versions' in skipQC) @@ -1004,13 +1029,12 @@ process TrimGalore { } } else { inputPairReadsTrimGalore - .set {outputPairReadsTrimGalore} + .set{outputPairReadsTrimGalore} trimGaloreReport = Channel.empty() } // STEP 1: MAPPING READS TO REFERENCE GENOME WITH BWA MEM - inputPairReads = outputPairReadsTrimGalore.mix(inputBam) inputPairReads = inputPairReads.dump(tag:'INPUT') @@ -1103,7 +1127,7 @@ process SentieonMapReads { sentieon bwa mem -K 100000000 -R \"${readGroup}\" ${extra} -t ${task.cpus} -M ${fasta} \ ${inputFile1} ${inputFile2} | \ sentieon util sort -r ${fasta} -o ${idSample}_${idRun}.bam -t ${task.cpus} --sam2bam -i - - """ + """ } bamMappedSentieon = bamMappedSentieon.dump(tag:'Sentieon Mapped BAM') @@ -1475,7 +1499,7 @@ process SentieonBQSR { output: set idPatient, idSample, file("${idSample}.recal.bam"), file("${idSample}.recal.bam.bai") into bamRecalSentieon - set idPatient, idSample into bamRecalSentieonTSV + set idPatient, idSample into bamRecalSentieonTSV file("${idSample}_recal_result.csv") into bamRecalSentieonQC when: params.sentieon @@ -2148,8 +2172,8 @@ process MergeMutect2Stats { when: 'mutect2' in tools - script: - stats = statsFiles.collect{ "-stats ${it} " }.join(' ') + script: + stats = statsFiles.collect{ "-stats ${it} " }.join(' ') """ gatk --java-options "-Xmx${task.memory.toGiga()}g" \ MergeMutectStats \ @@ -2186,11 +2210,11 @@ process ConcatVCF { script: if (variantCaller == 'HaplotypeCallerGVCF') - outputFile = "HaplotypeCaller_${idSample}.g.vcf" + outputFile = "HaplotypeCaller_${idSample}.g.vcf" else if (variantCaller == "Mutect2") - outputFile = "Mutect2_unfiltered_${idSample}.vcf" + outputFile = "Mutect2_unfiltered_${idSample}.vcf" else - outputFile = "${variantCaller}_${idSample}.vcf" + outputFile = "${variantCaller}_${idSample}.vcf" options = params.target_bed ? "-t ${targetBED}" : "" """ concatenateVCFs.sh -i ${fastaFai} -c ${task.cpus} -o ${outputFile} ${options} @@ -2279,14 +2303,14 @@ process CalculateContamination { input: set idPatient, idSampleNormal, idSampleTumor, file(bamNormal), file(baiNormal), file(bamTumor), file(baiTumor), file(mergedPileup) from pairBamCalculateContamination - - output: + + output: set idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${idSampleTumor}_contamination.table") into contaminationTable when: 'mutect2' in tools - script: - """ + script: + """ # calculate contamination gatk --java-options "-Xmx${task.memory.toGiga()}g" \ CalculateContamination \ @@ -2317,8 +2341,8 @@ process FilterMutect2Calls { file(germlineResource) from ch_germline_resource file(germlineResourceIndex) from ch_germline_resource_tbi file(intervals) from ch_intervals - - output: + + output: set val("Mutect2"), idPatient, idSamplePair, file("Mutect2_filtered_${idSamplePair}.vcf.gz"), file("Mutect2_filtered_${idSamplePair}.vcf.gz.tbi"), file("Mutect2_filtered_${idSamplePair}.vcf.gz.filteringStats.tsv") into filteredMutect2Output when: 'mutect2' in tools @@ -2635,7 +2659,7 @@ process ConvertAlleleCounts { script: gender = genderMap[idPatient] """ - convertAlleleCounts.r ${idSampleTumor} ${alleleCountTumor} ${idSampleNormal} ${alleleCountNormal} ${gender} + Rscript ${workflow.projectDir}/bin/convertAlleleCounts.r ${idSampleTumor} ${alleleCountTumor} ${idSampleNormal} ${alleleCountNormal} ${gender} """ } @@ -2665,14 +2689,16 @@ process Ascat { ascat_ploidy=params.ascat_ploidy if (params.ascat_purity && params.ascat_ploidy) """ - for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done - run_ascat.r --tumorbaf ${bafTumor} --tumorlogr ${logrTumor} --normalbaf ${bafNormal} --normallogr ${logrNormal} --tumorname ${idSampleTumor} --basedir ${baseDir} --gcfile ${acLociGC} --gender ${gender} --purity ${ascat_purity} --ploidy ${ascat_ploidy} + for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done + Rscript ${workflow.projectDir}/bin/run_ascat.r --tumorbaf ${bafTumor} --tumorlogr ${logrTumor} --normalbaf ${bafNormal} --normallogr ${logrNormal} --tumorname ${idSampleTumor} --basedir ${baseDir} --gcfile ${acLociGC} --gender ${gender} --purity ${ascat_purity} --ploidy ${ascat_ploidy} """ else """ - for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done - run_ascat.r --tumorbaf ${bafTumor} --tumorlogr ${logrTumor} --normalbaf ${bafNormal} --normallogr ${logrNormal} --tumorname ${idSampleTumor} --basedir ${baseDir} --gcfile ${acLociGC} --gender ${gender} + for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done + Rscript ${workflow.projectDir}/bin/run_ascat.r --tumorbaf ${bafTumor} --tumorlogr ${logrTumor} --normalbaf ${bafNormal} --normallogr ${logrNormal} --tumorname ${idSampleTumor} --basedir ${baseDir} --gcfile ${acLociGC} --gender ${gender} """ + + } ascatOut.dump(tag:'ASCAT') @@ -3242,8 +3268,10 @@ process MultiQC { publishDir "${params.outdir}/Reports/MultiQC", mode: params.publish_dir_mode input: - file (multiqcConfig) from Channel.value(params.multiqc_config ? file(params.multiqc_config) : "") - file (versions) from yamlSoftwareVersion + file (multiqcConfig) from ch_multiqc_config + file (mqc_custom_config) from ch_multiqc_custom_config.collect().ifEmpty([]) + file (versions) from ch_software_versions_yaml.collect() + file workflow_summary from ch_workflow_summary.collectFile(name: "workflow_summary_mqc.yaml") file ('bamQC/*') from bamQCReport.collect().ifEmpty([]) file ('BCFToolsStats/*') from bcftoolsReport.collect().ifEmpty([]) file ('FastQC/*') from fastQCReport.collect().ifEmpty([]) @@ -3251,30 +3279,50 @@ process MultiQC { file ('DuplicateMarked/*.recal.table') from baseRecalibratorReport.collect().ifEmpty([]) file ('SamToolsStats/*') from samtoolsStatsReport.collect().ifEmpty([]) file ('snpEff/*') from snpeffReport.collect().ifEmpty([]) - file ('TrimGalore/*') from trimGaloreReport.collect().ifEmpty([]) file ('VCFTools/*') from vcftoolsReport.collect().ifEmpty([]) output: - set file("*multiqc_report.html"), file("*multiqc_data") into multiQCOut + file "*multiqc_report.html" into ch_multiqc_report + file "*_data" + file "multiqc_plots" when: !('multiqc' in skipQC) script: + rtitle = custom_runName ? "--title \"$custom_runName\"" : '' + rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' + custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : '' """ - multiqc -f -v . + multiqc -f ${rtitle} ${rfilename} ${custom_config_file} . """ } -multiQCOut.dump(tag:'MultiQC') +ch_multiqc_report.dump(tag:'MultiQC') -/* - * Completion e-mail notification - */ +// Output Description HTML +process Output_documentation { + publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode + + input: + file output_docs from ch_output_docs + + output: + file "results_description.html" + + script: + """ + markdown_to_html.py $output_docs -o results_description.html + """ +} + +// Completion e-mail notification workflow.onComplete { // Set up the e-mail variables - def subject = "[nf-core/sarek] Successful: ${workflow.runName}" - if (!workflow.success) subject = "[nf-core/sarek] FAILED: ${workflow.runName}" + def subject = "[{{ cookiecutter.name }}] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[{{ cookiecutter.name }}] FAILED: $workflow.runName" + } def email_fields = [:] email_fields['version'] = workflow.manifest.version email_fields['runName'] = custom_runName ?: workflow.runName @@ -3294,23 +3342,29 @@ workflow.onComplete { if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - if (workflow.container) email_fields['summary']['Docker image'] = workflow.container email_fields['summary']['Nextflow Version'] = workflow.nextflow.version email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + // TODO nf-core: If not using MultiQC, strip out this code (including params.max_multiqc_email_size) // On success try attach the multiqc report def mqc_report = null try { if (workflow.success) { - mqc_report = multiqc_report.getVal() + mqc_report = ch_multiqc_report.getVal() if (mqc_report.getClass() == ArrayList) { - log.warn "[nf-core/sarek] Found multiple reports from process 'multiqc', will use only one" + log.warn "[{{ cookiecutter.name }}] Found multiple reports from process 'multiqc', will use only one" mqc_report = mqc_report[0] } } } catch (all) { - log.warn "[nf-core/sarek] Could not attach MultiQC report to summary email" + log.warn "[{{ cookiecutter.name }}] Could not attach MultiQC report to summary email" + } + + // Check if we are only sending emails on failure + email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail } // Render the TXT template @@ -3325,48 +3379,51 @@ workflow.onComplete { def email_html = html_template.toString() // Render the sendmail template - def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] def sf = new File("$baseDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) def sendmail_html = sendmail_template.toString() // Send the HTML e-mail - if (params.email) { + if (email_address) { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/sarek] Sent summary e-mail to $params.email (sendmail)" + log.info "[{{ cookiecutter.name }}] Sent summary e-mail to $email_address (sendmail)" } catch (all) { // Catch failures and try with plaintext - [ 'mail', '-s', subject, params.email ].execute() << email_txt - log.info "[nf-core/sarek] Sent summary e-mail to $params.email (mail)" + [ 'mail', '-s', subject, email_address ].execute() << email_txt + log.info "[{{ cookiecutter.name }}] Sent summary e-mail to $email_address (mail)" } } // Write summary e-mail HTML to a file def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) output_d.mkdirs() + if (!output_d.exists()) { + output_d.mkdirs() + } def output_hf = new File(output_d, "pipeline_report.html") output_hf.withWriter { w -> w << email_html } def output_tf = new File(output_d, "pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; c_green = params.monochrome_logs ? '' : "\033[0;32m"; c_purple = params.monochrome_logs ? '' : "\033[0;35m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; + c_reset = params.monochrome_logs ? '' : "\033[0m"; if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "${c_purple}Warning, pipeline completed, but with errored process(es)${c_reset}" - log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCountFmt}${c_reset}" - log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCountFmt}${c_reset}" + log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-" + log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-" + log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}-" } - if (workflow.success) log.info "${c_purple}[nf-core/sarek]${c_green} Pipeline completed successfully${c_reset}" - else { + if (workflow.success) { + log.info "-${c_purple}[{{ cookiecutter.name }}]${c_green} Pipeline completed successfully${c_reset}-" + } else { checkHostname() - log.info "${c_purple}[nf-core/sarek]${c_red} Pipeline completed with errors${c_reset}" + log.info "-${c_purple}[{{ cookiecutter.name }}]${c_red} Pipeline completed with errors${c_reset}-" } } @@ -3395,18 +3452,16 @@ ${summary.collect { k, v -> "
$k
${v ?: '