From ebd96a7763ca6805cd76589dd7be62e8a2bebef2 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 20 Feb 2024 15:28:57 +0000 Subject: [PATCH 01/21] Template update for nf-core/tools version 2.13 --- .editorconfig | 9 +- .github/workflows/awsfulltest.yml | 4 +- .github/workflows/awstest.yml | 4 +- .github/workflows/branch.yml | 2 +- .github/workflows/ci.yml | 7 +- .github/workflows/clean-up.yml | 2 +- .github/workflows/download_pipeline.yml | 17 +- .github/workflows/linting.yml | 12 +- .github/workflows/linting_comment.yml | 4 +- .github/workflows/release-announcements.yml | 11 +- CHANGELOG.md | 2 +- README.md | 5 +- assets/multiqc_config.yml | 2 + assets/schema_input.json | 21 +- bin/check_samplesheet.py | 259 ----------- conf/modules.config | 8 - lib/NfcoreTemplate.groovy | 356 -------------- lib/Utils.groovy | 47 -- lib/WorkflowMag.groovy | 122 ----- lib/WorkflowMain.groovy | 77 --- main.nf | 100 ++-- modules.json | 28 +- modules/local/samplesheet_check.nf | 31 -- .../dumpsoftwareversions/environment.yml | 7 - .../custom/dumpsoftwareversions/main.nf | 24 - .../custom/dumpsoftwareversions/meta.yml | 37 -- .../templates/dumpsoftwareversions.py | 102 ---- .../dumpsoftwareversions/tests/main.nf.test | 43 -- .../tests/main.nf.test.snap | 33 -- .../dumpsoftwareversions/tests/tags.yml | 2 - modules/nf-core/fastqc/tests/main.nf.test | 14 +- .../nf-core/fastqc/tests/main.nf.test.snap | 76 ++- modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/main.nf | 4 +- modules/nf-core/multiqc/tests/main.nf.test | 13 +- .../nf-core/multiqc/tests/main.nf.test.snap | 32 +- nextflow.config | 7 +- nextflow_schema.json | 1 + pyproject.toml | 8 +- subworkflows/local/input_check.nf | 44 -- .../local/utils_nfcore_mag_pipeline/main.nf | 247 ++++++++++ .../nf-core/utils_nextflow_pipeline/main.nf | 126 +++++ .../nf-core/utils_nextflow_pipeline/meta.yml | 38 ++ .../tests/main.function.nf.test | 54 +++ .../tests/main.function.nf.test.snap | 12 + .../tests/main.workflow.nf.test | 123 +++++ .../tests/nextflow.config | 9 + .../utils_nextflow_pipeline/tests/tags.yml | 2 + .../nf-core/utils_nfcore_pipeline/main.nf | 440 ++++++++++++++++++ .../nf-core/utils_nfcore_pipeline/meta.yml | 24 + .../tests/main.function.nf.test | 134 ++++++ .../tests/main.function.nf.test.snap | 138 ++++++ .../tests/main.workflow.nf.test | 29 ++ .../tests/main.workflow.nf.test.snap | 15 + .../tests/nextflow.config | 9 + .../utils_nfcore_pipeline/tests/tags.yml | 2 + .../nf-core/utils_nfvalidation_plugin/main.nf | 62 +++ .../utils_nfvalidation_plugin/meta.yml | 44 ++ .../tests/main.nf.test | 200 ++++++++ .../tests/nextflow_schema.json | 96 ++++ .../utils_nfvalidation_plugin/tests/tags.yml | 2 + workflows/mag.nf | 133 ++---- 62 files changed, 2110 insertions(+), 1408 deletions(-) delete mode 100755 bin/check_samplesheet.py delete mode 100755 lib/NfcoreTemplate.groovy delete mode 100644 lib/Utils.groovy delete mode 100755 lib/WorkflowMag.groovy delete mode 100755 lib/WorkflowMain.groovy delete mode 100644 modules/local/samplesheet_check.nf delete mode 100644 modules/nf-core/custom/dumpsoftwareversions/environment.yml delete mode 100644 modules/nf-core/custom/dumpsoftwareversions/main.nf delete mode 100644 modules/nf-core/custom/dumpsoftwareversions/meta.yml delete mode 100755 modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py delete mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test delete mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap delete mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml delete mode 100644 subworkflows/local/input_check.nf create mode 100644 subworkflows/local/utils_nfcore_mag_pipeline/main.nf create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/main.nf create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/meta.yml create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/main.nf create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/meta.yml create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml create mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/main.nf create mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml create mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test create mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json create mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml diff --git a/.editorconfig b/.editorconfig index 9b990088..dd9ffa53 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,12 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset @@ -28,5 +33,5 @@ indent_size = unset indent_style = unset # ignore python -[*.{py}] +[*.{py,md}] indent_style = unset diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index fcfa7fc8..aafeed37 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v2 + uses: seqeralabs/action-tower-launch@922e5c8d5ac4e918107ec311d2ebbd65e5982b3d # v2 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters @@ -31,7 +31,7 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 24807559..1ce4b46d 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,7 +12,7 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v2 + uses: seqeralabs/action-tower-launch@922e5c8d5ac4e918107ec311d2ebbd65e5982b3d # v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -25,7 +25,7 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 323c2346..cdede3cc 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v2 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca3f7ce7..036677cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,13 +28,16 @@ jobs: - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 with: version: "${{ matrix.NXF_VER }}" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index e37cfda5..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v9 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 8611458a..f823210d 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -6,6 +6,11 @@ name: Test successful pipeline download with 'nf-core download' # - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. on: workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" pull_request: types: - opened @@ -23,13 +28,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.11" architecture: "x64" - - uses: eWaterCycle/setup-singularity@v7 + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 with: singularity-version: 3.8.3 @@ -42,13 +47,13 @@ jobs: run: | echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} - echo "REPO_BRANCH=${GITHUB_REF#refs/heads/}" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} - name: Download the pipeline env: NXF_SINGULARITY_CACHEDIR: ./ run: | - nf-core download ${{ env.REPO_LOWERCASE }} \ + nf-core download ${{ env.REPO_LOWERCASE }} \ --revision ${{ env.REPO_BRANCH }} \ --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ --compress "none" \ @@ -64,4 +69,4 @@ jobs: env: NXF_SINGULARITY_CACHEDIR: ./ NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 81cd098e..748b4311 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,10 +14,10 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Set up Python 3.11 - uses: actions/setup-python@v5 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: 3.11 cache: "pip" @@ -32,12 +32,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.11" architecture: "x64" @@ -60,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 147bcd10..b706875f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v3 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 21ac3f06..c3674af2 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -9,6 +9,11 @@ jobs: toot: runs-on: ubuntu-latest steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' > $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -20,11 +25,13 @@ jobs: Please see the changelog: ${{ github.event.release.html_url }} + ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics + send-tweet: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v5 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.10" - name: Install dependencies @@ -56,7 +63,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@v0.1.0 + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c9588ad..d3b56b3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.5.2dev - [date] +## v2.5.5dev - [date] Initial release of nf-core/mag, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index e980bd42..3f69b5f2 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,9 @@ nf-core/mag -[![GitHub Actions CI Status](https://github.com/nf-core/mag/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/mag/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/mag/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/mag/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) + +[![GitHub Actions CI Status](https://github.com/nf-core/mag/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/mag/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index ce72cfbb..a4c656e2 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -11,3 +11,5 @@ report_section_order: order: -1002 export_plots: true + +disable_version_detection: true diff --git a/assets/schema_input.json b/assets/schema_input.json index 44d6d151..6f443034 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,25 +10,22 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "fastq_1": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, "required": ["sample", "fastq_1"] diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 4a758fe0..00000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/conf/modules.config b/conf/modules.config index d91c6aba..e3ea8fa6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,14 +18,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: FASTQC { ext.args = '--quiet' } diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index e248e4c3..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,356 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") - sendmail_tf.withWriter { w -> w << sendmail_html } - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); - output_hf.delete() - - // Write summary e-mail TXT to a file - def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); - output_tf.delete() - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) - - FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") - temp_pf.delete() - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowMag.groovy b/lib/WorkflowMag.groovy deleted file mode 100755 index 207d0653..00000000 --- a/lib/WorkflowMag.groovy +++ /dev/null @@ -1,122 +0,0 @@ -// -// This file holds several functions specific to the workflow/mag.nf in the nf-core/mag pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowMag { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - - genomeExistsError(params, log) - - - if (!params.fasta) { - Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - } - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index bb2ec9e2..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,77 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/mag pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log, args) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - // Check that the profile doesn't contain spaces and doesn't end with a trailing comma - checkProfile(workflow.profile, args, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } - - // - // Exit pipeline if --profile contains spaces - // - private static void checkProfile(profile, args, log) { - if (profile.endsWith(',')) { - Nextflow.error "Profile cannot end with a trailing comma. Please remove the comma from the end of the profile string.\nHint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`." - } - if (args[0]) { - log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${args[0]}` has been detected.\n Hint: A common mistake is to provide multiple values to `-profile` separated by spaces. Please use commas to separate profiles instead,e.g., `-profile docker,test`." - } - } -} diff --git a/main.nf b/main.nf index 6c8748a2..2c326431 100644 --- a/main.nf +++ b/main.nf @@ -13,66 +13,96 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO nf-core: Remove this line if you don't need a FASTA file -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +include { MAG } from './workflows/mag' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_mag_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_mag_pipeline' + +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_mag_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log, args) +// TODO nf-core: Remove this line if you don't need a FASTA file +// This is an example of how to use getGenomeAttribute() to fetch parameters +// from igenomes.config using `--genome` +params.fasta = getGenomeAttribute('fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { MAG } from './workflows/mag' - // -// WORKFLOW: Run main nf-core/mag analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_MAG { - MAG () -} + take: + samplesheet // channel: samplesheet read in from --input + + main: + + // + // WORKFLOW: Run pipeline + // + MAG ( + samplesheet + ) + + emit: + multiqc_report = MAG.out.multiqc_report // channel: /path/to/multiqc_report.html + +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - NFCORE_MAG () + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_MAG ( + PIPELINE_INITIALISATION.out.samplesheet + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_MAG.out.multiqc_report + ) } /* diff --git a/modules.json b/modules.json index 2cb1473e..ef9a61e0 100644 --- a/modules.json +++ b/modules.json @@ -5,22 +5,36 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", - "installed_by": ["modules"] - }, "fastqc": { "branch": "master", - "git_sha": "c9488585ce7bd35ccd2a30faa2371454c8112fb9", + "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", + "git_sha": "ccacf6f5de6df3bc6d73b665c1fd2933d8bbc290", "installed_by": ["modules"] } } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "cd08c91373cd00a73255081340e4914485846ba1", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "262b17ed2aad591039f914951659177e6c39a8d8", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "cd08c91373cd00a73255081340e4914485846ba1", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 22482cb4..00000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/mag/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml deleted file mode 100644 index 9b3272bc..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: custom_dumpsoftwareversions -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index f2187611..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : - 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 5f15a5fd..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" -maintainers: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index e55b8d43..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import platform -from textwrap import dedent - -import yaml - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test deleted file mode 100644 index b1e1630b..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ /dev/null @@ -1,43 +0,0 @@ -nextflow_process { - - name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" - script "../main.nf" - process "CUSTOM_DUMPSOFTWAREVERSIONS" - tag "modules" - tag "modules_nfcore" - tag "custom" - tag "dumpsoftwareversions" - tag "custom/dumpsoftwareversions" - - test("Should run without failures") { - when { - process { - """ - def tool1_version = ''' - TOOL1: - tool1: 0.11.9 - '''.stripIndent() - - def tool2_version = ''' - TOOL2: - tool2: 1.9 - '''.stripIndent() - - input[0] = Channel.of(tool1_version, tool2_version).collectFile() - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.versions, - file(process.out.mqc_yml[0]).readLines()[0..10], - file(process.out.yml[0]).readLines()[0..7] - ).match() - } - ) - } - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap deleted file mode 100644 index 5f59a936..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ /dev/null @@ -1,33 +0,0 @@ -{ - "Should run without failures": { - "content": [ - [ - "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" - ], - [ - "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", - " \\n\\n\\n \\n \\n\\", - " \\ \\n\\n\\n\\n \\n \\", - " \\ \\n \\n\\n\\n\\n\\", - " \\n\\n \\n \\n\\", - " \\ \\n\\n\\n\\n\\n\\n \\n\\", - " \\ \\n \\n\\n\\n\\n\\", - " \\n\\n \\n \\n\\" - ], - [ - "CUSTOM_DUMPSOFTWAREVERSIONS:", - " python: 3.11.7", - " yaml: 5.4.1", - "TOOL1:", - " tool1: 0.11.9", - "TOOL2:", - " tool2: '1.9'", - "Workflow:" - ] - ], - "timestamp": "2024-01-09T23:01:18.710682" - } -} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml deleted file mode 100644 index 405aa24a..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -custom/dumpsoftwareversions: - - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 1f21c664..70edae4d 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -33,7 +33,7 @@ nextflow_process { { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_single") } ) } } @@ -63,7 +63,7 @@ nextflow_process { { assert path(process.out.html[0][1][0]).text.contains("") }, { assert path(process.out.html[0][1][1]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } ) } } @@ -89,7 +89,7 @@ nextflow_process { { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } ) } } @@ -115,7 +115,7 @@ nextflow_process { { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } ) } } @@ -153,7 +153,7 @@ nextflow_process { { assert path(process.out.html[0][1][2]).text.contains("") }, { assert path(process.out.html[0][1][3]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } ) } } @@ -179,7 +179,7 @@ nextflow_process { { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } ) } } @@ -204,7 +204,7 @@ nextflow_process { { assert process.success }, { assert snapshot(process.out.html.collect { file(it[1]).getName() } + process.out.zip.collect { file(it[1]).getName() } + - process.out.versions ).match() } + process.out.versions ).match("fastqc_stub") } ) } } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 5d624bb8..86f7c311 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,5 +1,17 @@ { - "sarscov2 single-end [fastq] - stub": { + "fastqc_versions_interleaved": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { "content": [ [ "test.html", @@ -7,14 +19,70 @@ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2024-01-17T18:40:57.254299" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" }, - "versions": { + "fastqc_versions_custom_prefix": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2024-01-17T18:36:50.033627" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 7625b752..2212096a 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.19 + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1b9f7c43..354f4430 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : - 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index d0438eda..f1c4242e 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -3,6 +3,7 @@ nextflow_process { name "Test Process MULTIQC" script "../main.nf" process "MULTIQC" + tag "modules" tag "modules_nfcore" tag "multiqc" @@ -12,7 +13,7 @@ nextflow_process { when { process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = [] input[2] = [] input[3] = [] @@ -25,7 +26,7 @@ nextflow_process { { assert process.success }, { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("multiqc_versions_single") } ) } @@ -36,7 +37,7 @@ nextflow_process { when { process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] @@ -49,7 +50,7 @@ nextflow_process { { assert process.success }, { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("multiqc_versions_config") } ) } } @@ -61,7 +62,7 @@ nextflow_process { when { process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = [] input[2] = [] input[3] = [] @@ -75,7 +76,7 @@ nextflow_process { { assert snapshot(process.out.report.collect { file(it).getName() } + process.out.data.collect { file(it).getName() } + process.out.plots.collect { file(it).getName() } + - process.out.versions ).match() } + process.out.versions ).match("multiqc_stub") } ) } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index d37e7304..c204b488 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -1,21 +1,41 @@ { - "versions": { + "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" ] ], - "timestamp": "2024-01-09T23:02:49.911994" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T09:28:51.744211298" }, - "sarscov2 single-end [fastqc] - stub": { + "multiqc_stub": { "content": [ [ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" ] ], - "timestamp": "2024-01-09T23:03:14.524346" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T09:29:28.847433492" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T09:29:13.223621555" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 1f3037e8..912d4645 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,9 +16,7 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false - - - // MultiQC options + fasta = null// MultiQC options multiqc_config = null multiqc_title = null multiqc_logo = null @@ -43,7 +41,6 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null - // Max resource options // Defaults only, expecting to be overwritten @@ -234,7 +231,7 @@ manifest { description = """Assembly, binning and annotation of metagenomes""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.5.2dev' + version = '2.5.5dev' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 68f9485e..6e20a381 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,6 +16,7 @@ "type": "string", "format": "file-path", "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", diff --git a/pyproject.toml b/pyproject.toml index 7d08e1c8..56110621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,11 +3,13 @@ [tool.ruff] line-length = 120 target-version = "py38" -select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] cache-dir = "~/.cache/ruff" -[tool.ruff.isort] +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] known-first-party = ["nf_core"] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 0aecf87f..00000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,44 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf new file mode 100644 index 00000000..dc90ad90 --- /dev/null +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -0,0 +1,247 @@ +// +// Subworkflow with functionality specific to the nf-core/pipeline pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +}// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +}// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..8ed4310c --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..db2030f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,12 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "timestamp": "2024-01-19T11:32:36.031083" + }, + "Test Function checkCondaChannels": { + "content": null, + "timestamp": "2024-01-19T11:32:50.456" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..f7c54bc6 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,123 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + params { + outdir = "tests/results" + } + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + params { + outdir = "tests/results" + } + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + print_version = false + dump_parameters = true + outdir = params.outdir + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = params.outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + print_version = false + dump_parameters = true + outdir = params.outdir + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = null + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..53574ffe --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..a8b55d6f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,440 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " ${workflow.manifest.doi}\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..10f948e6 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,138 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "timestamp": "2024-02-09T15:43:55.145717" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "timestamp": "2024-01-19T11:34:13.548431224" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "timestamp": "2024-01-19T11:34:38.840454873" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "timestamp": "2024-01-19T11:34:22.24352016" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "timestamp": "2024-01-19T11:35:04.418416984" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "timestamp": "2024-01-19T11:34:55.420000755" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "timestamp": "2024-01-19T11:35:13.436366565" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..d07ce54c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,15 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "timestamp": "2024-01-19T11:35:22.538940073" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..517ee54e --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/mag.nf b/workflows/mag.nf index 1988d9c2..218f3239 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -1,54 +1,15 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowMag.initialise(params, log) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mag_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -56,50 +17,45 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow MAG { - ch_versions = Channel.empty() + take: + ch_samplesheet // channel: samplesheet read in from --input - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - file(params.input) - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema + main: + + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() // // MODULE: Run FastQC // FASTQC ( - INPUT_CHECK.out.reads + ch_samplesheet ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } // // MODULE: MultiQC // - workflow_summary = WorkflowMag.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowMag.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) MULTIQC ( ch_multiqc_files.collect(), @@ -107,31 +63,10 @@ workflow MAG { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList() ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} -workflow.onError { - if (workflow.errorReport.contains("Process requirement exceeds available memory")) { - println("🛑 Default resources exceed availability 🛑 ") - println("💡 See here on how to configure pipeline: https://nf-co.re/docs/usage/configuration#tuning-workflow-resources 💡") - } + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /* From 21e24b260d008871701ebfc622c848c494415eed Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 29 Feb 2024 16:10:55 +0000 Subject: [PATCH 02/21] Template update for nf-core/tools version 2.13.1 --- .devcontainer/devcontainer.json | 10 +---- .github/CONTRIBUTING.md | 14 ++++--- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/awsfulltest.yml | 4 +- .github/workflows/awstest.yml | 4 +- .github/workflows/ci.yml | 2 +- .github/workflows/download_pipeline.yml | 2 +- .github/workflows/linting.yml | 2 +- .github/workflows/release-announcements.yml | 2 +- .gitpod.yml | 6 +-- README.md | 3 +- modules.json | 8 ++-- modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/main.nf | 4 +- .../nf-core/multiqc/tests/main.nf.test.snap | 12 +++--- .../local/utils_nfcore_mag_pipeline/main.nf | 10 +++-- .../tests/main.function.nf.test | 2 +- .../tests/main.function.nf.test.snap | 12 +++++- .../tests/main.workflow.nf.test | 20 ++------- .../tests/nextflow.config | 2 +- .../tests/main.function.nf.test.snap | 42 +++++++++++++++---- .../tests/main.workflow.nf.test.snap | 6 ++- .../tests/main.nf.test | 2 +- 23 files changed, 99 insertions(+), 74 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index a199d533..f9393180 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,9 +9,8 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -:::info -If you need help using or modifying nf-core/mag then the best place to ask is on the nf-core Slack [#mag](https://nfcore.slack.com/channels/mag) channel ([join our Slack here](https://nf-co.re/join/slack)). -::: +> [!NOTE] +> If you need help using or modifying nf-core/mag then the best place to ask is on the nf-core Slack [#mag](https://nfcore.slack.com/channels/mag) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -27,8 +26,11 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests -You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to -receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -90,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 459141ac..3804f91e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/mag/ - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/mag/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/mag _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index aafeed37..fcfa7fc8 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@922e5c8d5ac4e918107ec311d2ebbd65e5982b3d # v2 + uses: seqeralabs/action-tower-launch@v2 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters @@ -31,7 +31,7 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 1ce4b46d..24807559 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,7 +12,7 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@922e5c8d5ac4e918107ec311d2ebbd65e5982b3d # v2 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -25,7 +25,7 @@ jobs: } profiles: test - - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 036677cb..58750f67 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 + uses: nf-core/setup-nextflow@v1 with: version: "${{ matrix.NXF_VER }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index f823210d..08622fd5 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -28,7 +28,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Install Nextflow - uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 + uses: nf-core/setup-nextflow@v1 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 748b4311..073e1876 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -35,7 +35,7 @@ jobs: uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 + uses: nf-core/setup-nextflow@v1 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index c3674af2..d468aeaa 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -12,7 +12,7 @@ jobs: - name: get topics and convert to hashtags id: get_topics run: | - curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' > $GITHUB_OUTPUT + curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT - uses: rzr/fediverse-action@master with: diff --git a/.gitpod.yml b/.gitpod.yml index 363d5b1d..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -10,13 +10,11 @@ tasks: vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/README.md b/README.md index 3f69b5f2..c1bed97f 100644 --- a/README.md +++ b/README.md @@ -7,12 +7,13 @@ [![GitHub Actions CI Status](https://github.com/nf-core/mag/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/ci.yml) [![GitHub Actions Linting Status](https://github.com/nf-core/mag/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/mag) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/mag) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23mag-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/mag)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) diff --git a/modules.json b/modules.json index ef9a61e0..ad59084c 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "ccacf6f5de6df3bc6d73b665c1fd2933d8bbc290", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] } } @@ -21,17 +21,17 @@ "nf-core": { "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "cd08c91373cd00a73255081340e4914485846ba1", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "262b17ed2aad591039f914951659177e6c39a8d8", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", - "git_sha": "cd08c91373cd00a73255081340e4914485846ba1", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", "installed_by": ["subworkflows"] } } diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 2212096a..ca39fb67 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.20 + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 354f4430..47ac352f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : - 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index c204b488..bfebd802 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-14T09:28:51.744211298" + "timestamp": "2024-02-29T08:48:55.657331" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-14T09:29:28.847433492" + "timestamp": "2024-02-29T08:49:49.071937" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,d320d4c37e349c5588e07e7a31cd4186" + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" ] ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-02-14T09:29:13.223621555" + "timestamp": "2024-02-29T08:49:25.457567" } } \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index dc90ad90..a0389a60 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -1,5 +1,5 @@ // -// Subworkflow with functionality specific to the nf-core/pipeline pipeline +// Subworkflow with functionality specific to the nf-core/mag pipeline // /* @@ -152,7 +152,9 @@ workflow PIPELINE_COMPLETION { // def validateInputParameters() { genomeExistsError() -}// +} + +// // Validate channels from input samplesheet // def validateInputSamplesheet(input) { @@ -190,7 +192,9 @@ def genomeExistsError() { "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" error(error_string) } -}// +} + +// // Generate methods description for MultiQC // def toolCitationText() { diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test index 8ed4310c..68718e4f 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -51,4 +51,4 @@ nextflow_function { ) } } -} \ No newline at end of file +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap index db2030f8..e3f0baf4 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -3,10 +3,18 @@ "content": [ "v9.9.9" ], - "timestamp": "2024-01-19T11:32:36.031083" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" }, "Test Function checkCondaChannels": { "content": null, - "timestamp": "2024-01-19T11:32:50.456" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" } } \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test index f7c54bc6..ca964ce8 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -11,9 +11,6 @@ nextflow_workflow { test("Should run no inputs") { when { - params { - outdir = "tests/results" - } workflow { """ print_version = false @@ -39,9 +36,6 @@ nextflow_workflow { test("Should print version") { when { - params { - outdir = "tests/results" - } workflow { """ print_version = true @@ -68,19 +62,16 @@ nextflow_workflow { test("Should dump params") { when { - params { - outdir = "$outputDir" - } workflow { """ print_version = false dump_parameters = true - outdir = params.outdir + outdir = 'results' check_conda_channels = false input[0] = false input[1] = true - input[2] = params.outdir + input[2] = outdir input[3] = false """ } @@ -96,19 +87,16 @@ nextflow_workflow { test("Should not create params JSON if no output directory") { when { - params { - outdir = "$outputDir" - } workflow { """ print_version = false dump_parameters = true - outdir = params.outdir + outdir = null check_conda_channels = false input[0] = false input[1] = true - input[2] = null + input[2] = outdir input[3] = false """ } diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config index 53574ffe..d0a926bf 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -6,4 +6,4 @@ manifest { nextflowVersion = '!>=23.04.0' version = '9.9.9' doi = 'https://doi.org/10.5281/zenodo.5070524' -} \ No newline at end of file +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap index 10f948e6..1037232c 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -1,25 +1,41 @@ { "Test Function checkProfileProvided": { "content": null, - "timestamp": "2024-02-09T15:43:55.145717" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" }, "Test Function checkConfigProvided": { "content": [ true ], - "timestamp": "2024-01-19T11:34:13.548431224" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" }, "Test Function nfCoreLogo": { "content": [ "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" ], - "timestamp": "2024-01-19T11:34:38.840454873" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" }, "Test Function workflowCitation": { "content": [ "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" ], - "timestamp": "2024-01-19T11:34:22.24352016" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" }, "Test Function without logColours": { "content": [ @@ -73,13 +89,21 @@ "biwhite": "" } ], - "timestamp": "2024-01-19T11:35:04.418416984" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" }, "Test Function dashedLine": { "content": [ "-\u001b[2m----------------------------------------------------\u001b[0m-" ], - "timestamp": "2024-01-19T11:34:55.420000755" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" }, "Test Function with logColours": { "content": [ @@ -133,6 +157,10 @@ "biwhite": "\u001b[1;97m" } ], - "timestamp": "2024-01-19T11:35:13.436366565" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" } } \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap index d07ce54c..859d1030 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -10,6 +10,10 @@ ] } ], - "timestamp": "2024-01-19T11:35:22.538940073" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" } } \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test index 517ee54e..5784a33f 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -197,4 +197,4 @@ nextflow_workflow { ) } } -} \ No newline at end of file +} From 304ae690550a5bdef42614b01ca20f6de1cf316e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 26 Mar 2024 15:58:23 +0100 Subject: [PATCH 03/21] Get basic input samplesheet validation in, however need to check fromSamplesheet/map function in utils_nfcore_mag_pipeline --- assets/schema_input.json | 27 +++++++++-- conf/test_single_end.config | 40 ---------------- main.nf | 12 +++-- nextflow.config | 1 - nextflow_schema.json | 6 --- .../local/utils_nfcore_mag_pipeline/main.nf | 23 ++++----- workflows/mag.nf | 48 +++++++++++++++---- 7 files changed, 81 insertions(+), 76 deletions(-) delete mode 100644 conf/test_single_end.config diff --git a/assets/schema_input.json b/assets/schema_input.json index 6f443034..f8ec75ee 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -13,21 +13,42 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, - "fastq_1": { + "run": { + "type": "string", + "pattern": "^\\S+$", + "meta": ["run"], + "errorMessage": "Column 'run' contains an empty field. Either remove column 'run' or fill each field with a value." + }, + "group": { + "type": "string", + "pattern": "^\\S+$", + "meta": ["group"], + "errorMessage": "Column 'group' contains an empty field. Either remove column 'run' or fill each field with a value." + }, + "short_reads_1": { "type": "string", "format": "file-path", "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, - "fastq_2": { + "short_reads_2": { "type": "string", "format": "file-path", "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "dependentRequired": ["short_reads_1"], "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "long_reads": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "dependentRequired": ["short_reads_1", "short_reads_2"], + "errorMessage": "FastQ file for long reads cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "short_reads_1"] } } diff --git a/conf/test_single_end.config b/conf/test_single_end.config deleted file mode 100644 index 08439cee..00000000 --- a/conf/test_single_end.config +++ /dev/null @@ -1,40 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/mag -profile test_single_end, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test single-end profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.single_end.csv' - single_end = true - centrifuge_db = "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/minigut_cf.tar.gz" - kraken2_db = "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/minigut_kraken.tgz" - skip_krona = true - megahit_fix_cpu_1 = true - spades_fix_cpus = 1 - bowtie2_fix_cpu_1 = true - maxbin2_fix_cpu_1 = true - binning_map_mode = 'own' - min_length_unbinned_contigs = 1000000 - max_unbinned_contigs = 2 - skip_gtdbtk = true - skip_concoct = true - skip_binqc = true - skip_gtdbtk = true - skip_prokka = true - skip_metaeuk = true -} diff --git a/main.nf b/main.nf index b05a87aa..b9a381fa 100644 --- a/main.nf +++ b/main.nf @@ -46,7 +46,9 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_mag_ workflow NFCORE_MAG { take: - samplesheet // channel: samplesheet read in from --input + raw_short_reads // channel: samplesheet read in from --input + raw_long_reads + input_assemblies main: @@ -54,7 +56,9 @@ workflow NFCORE_MAG { // WORKFLOW: Run pipeline // MAG ( - samplesheet + raw_short_reads, // channel: samplesheet read in from --input + raw_long_reads, + input_assemblies ) emit: @@ -88,7 +92,9 @@ workflow { // WORKFLOW: Run main workflow // NFCORE_MAG ( - PIPELINE_INITIALISATION.out.samplesheet + PIPELINE_INITIALISATION.out.raw_short_reads, + PIPELINE_INITIALISATION.out.raw_long_reads, + PIPELINE_INITIALISATION.out.input_assemblies ) // diff --git a/nextflow.config b/nextflow.config index 35944b81..aa69c727 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,7 +11,6 @@ params { // Input options input = null - single_end = false assembly_input = null // short read preprocessing options diff --git a/nextflow_schema.json b/nextflow_schema.json index 5cfe0612..36839dc4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -22,12 +22,6 @@ "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq.gz'\n``` \n\nAlternatively, to assign different groups or to include long reads for hybrid assembly with metaSPAdes, you can specify a CSV samplesheet input file with 5 columns and the following header: sample,group,short_reads_1,short_reads_2,long_reads. See [usage docs](https://nf-co.re/mag/usage#input-specifications).", "fa_icon": "fas fa-file-csv" }, - "single_end": { - "type": "boolean", - "description": "Specifies that the input is single-end reads.", - "fa_icon": "fas fa-align-center", - "help_text": "By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--single_end` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--input`. For example:\n\n```bash\n--single_end --input '*.fastq'\n```\n\nIt is not possible to run a mixture of single-end and paired-end files in one run." - }, "assembly_input": { "type": "string", "mimetype": "text/csv", diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index a0389a60..16fb313d 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -80,28 +80,21 @@ workflow PIPELINE_INITIALISATION { // // Create channel from input file provided through params.input // - Channel + ch_samplesheet = Channel .fromSamplesheet("input") - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() .map { validateInputSamplesheet(it) } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] + .dump(tag: 'ch_samplesheet') + .branch { + long_reads: it[2] != [] + short_reads: true } - .set { ch_samplesheet } emit: - samplesheet = ch_samplesheet + raw_short_reads = ch_samplesheet.short_reads.map{meta, r1, r2, lr -> [meta, r1, r2]} + raw_long_reads = ch_samplesheet.long_reads.map{meta, r1, r2, lr -> [meta, lr]} + input_assemblies = [] // ch_input_assemblies versions = ch_versions } diff --git a/workflows/mag.nf b/workflows/mag.nf index d8f7cd50..b922d2e4 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -9,7 +9,6 @@ include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mag_pipeline' -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' // Check already if long reads are provided def hybrid = false @@ -54,6 +53,41 @@ include { PROKKA } from '../modules/nf-core/prok include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' include { METAEUK_EASYPREDICT } from '../modules/nf-core/metaeuk/easypredict/main' +// +// MODULE: Local to the pipeline +// +include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' +include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' +include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' +include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' +include { PORECHOP } from '../modules/local/porechop' +include { NANOLYSE } from '../modules/local/nanolyse' +include { FILTLONG } from '../modules/local/filtlong' +include { NANOPLOT as NANOPLOT_RAW } from '../modules/local/nanoplot' +include { NANOPLOT as NANOPLOT_FILTERED } from '../modules/local/nanoplot' +include { CENTRIFUGE_DB_PREPARATION } from '../modules/local/centrifuge_db_preparation' +include { CENTRIFUGE } from '../modules/local/centrifuge' +include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' +include { KRAKEN2 } from '../modules/local/kraken2' +include { KRONA_DB } from '../modules/local/krona_db' +include { KRONA } from '../modules/local/krona' +include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' +include { POOL_PAIRED_READS } from '../modules/local/pool_paired_reads' +include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules/local/pool_single_reads' +include { MEGAHIT } from '../modules/local/megahit' +include { SPADES } from '../modules/local/spades' +include { SPADESHYBRID } from '../modules/local/spadeshybrid' +include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' +include { QUAST } from '../modules/local/quast' +include { QUAST_BINS } from '../modules/local/quast_bins' +include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary' +include { CAT_DB } from '../modules/local/cat_db' +include { CAT_DB_GENERATE } from '../modules/local/cat_db_generate' +include { CAT } from '../modules/local/cat' +include { CAT_SUMMARY } from "../modules/local/cat_summary" +include { BIN_SUMMARY } from '../modules/local/bin_summary' +include { COMBINE_TSV as COMBINE_SUMMARY_TSV } from '../modules/local/combine_tsv' + //////////////////////////////////////////////////// /* -- Create channel for reference databases -- */ //////////////////////////////////////////////////// @@ -158,7 +192,9 @@ def busco_failed_bins = [:] workflow MAG { take: - ch_samplesheet // channel: samplesheet read in from --input + ch_raw_short_reads // channel: samplesheet read in from --input + ch_raw_long_reads + ch_input_assemblies main: @@ -179,12 +215,8 @@ workflow MAG { ch_versions = ch_versions.mix(MMSEQS_DATABASES.out.versions) } - // - // Prepare channels based on the main input channel - // - ch_raw_short_reads = ch_samplesheet.out.raw_short_reads - ch_raw_long_reads = ch_samplesheet.out.raw_long_reads - ch_input_assemblies = ch_samplesheet.out.input_assemblies + + /* ================================================================================ From c66ad234dbb813446492d31aced6a3aaaa9fa285 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 26 Mar 2024 16:00:58 +0100 Subject: [PATCH 04/21] Get pipeline seemingly running again --- .../local/utils_nfcore_mag_pipeline/main.nf | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index 16fb313d..17b50052 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -82,9 +82,10 @@ workflow PIPELINE_INITIALISATION { // ch_samplesheet = Channel .fromSamplesheet("input") - .map { - validateInputSamplesheet(it) - } + .dump(tag: 'fromSamplesheet') + // .map { + // validateInputSamplesheet(it) + // } .dump(tag: 'ch_samplesheet') .branch { long_reads: it[2] != [] @@ -151,16 +152,9 @@ def validateInputParameters() { // Validate channels from input samplesheet // def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] - - // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 - if (!endedness_ok) { - error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") - } - return [ metas[0], fastqs ] } + // // Get attribute from genome config file e.g. fasta // From c48dbab3304af7e007711a5d1f90ec36ff1fe9ba Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 27 Mar 2024 07:05:37 +0000 Subject: [PATCH 05/21] Re-add single end as can't mix the two in one run --- nextflow.config | 1 + subworkflows/local/utils_nfcore_mag_pipeline/main.nf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index aa69c727..52a06a7e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,6 +12,7 @@ params { // Input options input = null assembly_input = null + single_end = false // short read preprocessing options skip_clipping = false diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index 17b50052..17c5096e 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -93,7 +93,7 @@ workflow PIPELINE_INITIALISATION { } emit: - raw_short_reads = ch_samplesheet.short_reads.map{meta, r1, r2, lr -> [meta, r1, r2]} + raw_short_reads = ch_samplesheet.short_reads.map{meta, r1, r2, lr -> [meta, [r1, r2]]}.dump(tag: 'raw_short_reads') // TODO add if/else for single end data raw_long_reads = ch_samplesheet.long_reads.map{meta, r1, r2, lr -> [meta, lr]} input_assemblies = [] // ch_input_assemblies versions = ch_versions From f522c76cef2fbf9558c296c5c7893cd202004cde Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 27 Mar 2024 07:47:25 +0000 Subject: [PATCH 06/21] Get SR tests working, start adding assmebly --- assets/schema_assembly_input.json | 47 ++++++++++++ assets/schema_input.json | 1 + nextflow_schema.json | 16 ++++- .../local/utils_nfcore_mag_pipeline/main.nf | 72 ++++++++++++++++--- 4 files changed, 124 insertions(+), 12 deletions(-) create mode 100644 assets/schema_assembly_input.json diff --git a/assets/schema_assembly_input.json b/assets/schema_assembly_input.json new file mode 100644 index 00000000..fbef38b8 --- /dev/null +++ b/assets/schema_assembly_input.json @@ -0,0 +1,47 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/mag/master/assets/schema_input.json", + "title": "nf-core/mag pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "ID must be provided and cannot contain spaces", + "meta": ["id"] + }, + "group": { + "type": "string", + "pattern": "^\\S+$", + "meta": ["group"], + "errorMessage": "Column 'group' contains an empty field. Either remove column 'run' or fill each field with a value." + }, + "assembler": { + "type": "string", + "pattern": "MEGAHIT|SPAdes", + "meta": ["assembler"], + "errorMessage": "Column 'group' contains an empty field. Either remove column 'run' or fill each field with a value." + }, + "short_reads_2": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "dependentRequired": ["short_reads_1"], + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "long_reads": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "dependentRequired": ["short_reads_1", "short_reads_2"], + "errorMessage": "FastQ file for long reads cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + } + }, + "required": ["sample", "short_reads_1"] + } +} diff --git a/assets/schema_input.json b/assets/schema_input.json index f8ec75ee..ddfa02f6 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -17,6 +17,7 @@ "type": "string", "pattern": "^\\S+$", "meta": ["run"], + "unique": ["sample"], "errorMessage": "Column 'run' contains an empty field. Either remove column 'run' or fill each field with a value." }, "group": { diff --git a/nextflow_schema.json b/nextflow_schema.json index 36839dc4..1a0b75d1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -14,20 +14,30 @@ "properties": { "input": { "type": "string", + "mimetype": "text/csv", "format": "file-path-pattern", "exists": true, "schema": "assets/schema_input.json", - "pattern": "^\\S+\\.csv$|^\\S+\\.(fastq|fq).gz$", + "pattern": "^\\S+\\.csv$", "description": "Input FastQ files (gzip compressed) or CSV samplesheet file containing information about the samples in the experiment.", "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq.gz'\n``` \n\nAlternatively, to assign different groups or to include long reads for hybrid assembly with metaSPAdes, you can specify a CSV samplesheet input file with 5 columns and the following header: sample,group,short_reads_1,short_reads_2,long_reads. See [usage docs](https://nf-co.re/mag/usage#input-specifications).", "fa_icon": "fas fa-file-csv" }, + "single_end": { + "type": "boolean", + "description": "Specifies that the input is single-end reads.", + "fa_icon": "fas fa-align-center", + "help_text": "By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--single_end` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--input`. For example:\n\n```bash\n--single_end --input '*.fastq'\n```\n\nIt is not possible to run a mixture of single-end and paired-end files in one run." + }, "assembly_input": { "type": "string", "mimetype": "text/csv", - "format": "file-path", + "format": "file-path-pattern", + "exists": true, + "schema": "assets/schema_assembly_input.json", + "pattern": "^\\S+\\.csv$", "description": "Additional input CSV samplesheet containing information about pre-computed assemblies. When set, both read pre-processing and assembly are skipped and the pipeline begins at the binning stage.", - "help_text": "If you have pre-computed assemblies from another source, it is possible to jump straight to the binning stage of the pipeline by supplying these assemblies in a CSV file. This CSV file should have three columns and the following header: `id,group,assembler,fasta`. Short reads must still be supplied in to `--input` in CSV format. See [usage docs](https://nf-co.re/mag/usage#input-specifications) for further details.", + "help_text": "If you have pre-computed assemblies from another source, it is possible to jump straight to the binning stage of the pipeline by supplying these assemblies in a CSV file. This CSV file should have four columns and the following header: `id,group,assembler,fasta`. Short reads must still be supplied in to `--input` in CSV format. See [usage docs](https://nf-co.re/mag/usage#input-specifications) for further details.", "fa_icon": "fas fa-file-csv" }, "outdir": { diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index 17c5096e..da3d0565 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -1,4 +1,4 @@ -// + // Subworkflow with functionality specific to the nf-core/mag pipeline // @@ -78,23 +78,77 @@ workflow PIPELINE_INITIALISATION { validateInputParameters() // - // Create channel from input file provided through params.input + // Create channels from input file provided through params.input and params.assembly_input // + + // Validate FASTQ input ch_samplesheet = Channel .fromSamplesheet("input") .dump(tag: 'fromSamplesheet') - // .map { + // .map {t // validateInputSamplesheet(it) // } .dump(tag: 'ch_samplesheet') - .branch { - long_reads: it[2] != [] - short_reads: true - } + + // Prepare FASTQs channel and separate short and long reads and prepare + ch_raw_short_reads = ch_samplesheet + .map { meta, sr1, sr2, lr -> + meta.run = meta.run == null ? "0" : meta.run + meta.single_end = params.single_end + if (params.single_end) + return [ meta, [ sr1] ] + else + return [ meta, [ sr1, sr2 ] ] + } + + ch_raw_long_reads = ch_samplesheet + .map { meta, sr1, sr2, lr -> + if (lr) { + meta.run = meta.run == null ? "0" : meta.run + return [ meta, lr ] + } + } + + + if (params.assembly_input) { + + ch_input_assembly_rows = Channel + .fromSamplesheet("assembly_input") + .from(file(params.assembly_input)) + .splitCsv(header: true) + .map { row -> + if (row.size() == 4) { + def id = row.id + def group = row.group + def assembler = row.assembler ?: false + def assembly = row.fasta ? file(row.fasta, checkIfExists: true) : false + // Check if given combination is valid + if (!assembly) exit 1, "Invalid input assembly samplesheet: fasta can not be empty." + if (!assembler) exit 1, "Invalid input assembly samplesheet: assembler can not be empty." + return [ id, group, assembler, assembly ] + } else { + exit 1, "Input samplesheet contains row with ${row.size()} column(s). Expects 3." + } + } + + // build meta map + ch_input_assemblies = ch_input_assembly_rows + .map { id, group, assembler, fasta -> + def meta = [:] + meta.id = params.coassemble_group? "group-$group" : id + meta.group = group + meta.assembler = assembler + return [ meta, [ fasta ] ] + } + } else { + ch_input_assembly_rows = Channel.empty() + ch_input_assemblies = Channel.empty() + } + emit: - raw_short_reads = ch_samplesheet.short_reads.map{meta, r1, r2, lr -> [meta, [r1, r2]]}.dump(tag: 'raw_short_reads') // TODO add if/else for single end data - raw_long_reads = ch_samplesheet.long_reads.map{meta, r1, r2, lr -> [meta, lr]} + raw_short_reads = ch_raw_short_reads + raw_long_reads = ch_raw_long_reads input_assemblies = [] // ch_input_assemblies versions = ch_versions } From 7763ef0bebc7661a6c387bd19d5010a0eb75a9dc Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 27 Mar 2024 07:48:39 +0000 Subject: [PATCH 07/21] Re-add missing single end test --- conf/test_single_end.config | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 conf/test_single_end.config diff --git a/conf/test_single_end.config b/conf/test_single_end.config new file mode 100644 index 00000000..08439cee --- /dev/null +++ b/conf/test_single_end.config @@ -0,0 +1,40 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/mag -profile test_single_end, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test single-end profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mag/samplesheets/samplesheet.single_end.csv' + single_end = true + centrifuge_db = "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/minigut_cf.tar.gz" + kraken2_db = "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/minigut_kraken.tgz" + skip_krona = true + megahit_fix_cpu_1 = true + spades_fix_cpus = 1 + bowtie2_fix_cpu_1 = true + maxbin2_fix_cpu_1 = true + binning_map_mode = 'own' + min_length_unbinned_contigs = 1000000 + max_unbinned_contigs = 2 + skip_gtdbtk = true + skip_concoct = true + skip_binqc = true + skip_gtdbtk = true + skip_prokka = true + skip_metaeuk = true +} From 09f45ad3670189ceba872917f550102f6e48c1d3 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 27 Mar 2024 08:00:48 +0000 Subject: [PATCH 08/21] Re-add single-end test config --- conf/test_single_end.config | 2 -- subworkflows/local/utils_nfcore_mag_pipeline/main.nf | 2 -- 2 files changed, 4 deletions(-) diff --git a/conf/test_single_end.config b/conf/test_single_end.config index 08439cee..8830c685 100644 --- a/conf/test_single_end.config +++ b/conf/test_single_end.config @@ -26,8 +26,6 @@ params { skip_krona = true megahit_fix_cpu_1 = true spades_fix_cpus = 1 - bowtie2_fix_cpu_1 = true - maxbin2_fix_cpu_1 = true binning_map_mode = 'own' min_length_unbinned_contigs = 1000000 max_unbinned_contigs = 2 diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index da3d0565..30c09e63 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -84,11 +84,9 @@ workflow PIPELINE_INITIALISATION { // Validate FASTQ input ch_samplesheet = Channel .fromSamplesheet("input") - .dump(tag: 'fromSamplesheet') // .map {t // validateInputSamplesheet(it) // } - .dump(tag: 'ch_samplesheet') // Prepare FASTQs channel and separate short and long reads and prepare ch_raw_short_reads = ch_samplesheet From 8a8ab91a7e909ad290b941630476ea8777c185c1 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 19 Apr 2024 14:23:16 +0200 Subject: [PATCH 09/21] All --input validation checks working --- .../local/utils_nfcore_mag_pipeline/main.nf | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index 30c09e63..e7d1e66f 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -75,7 +75,9 @@ workflow PIPELINE_INITIALISATION { // // Custom validation for pipeline parameters // - validateInputParameters() + validateInputParameters( + + ) // // Create channels from input file provided through params.input and params.assembly_input @@ -84,19 +86,21 @@ workflow PIPELINE_INITIALISATION { // Validate FASTQ input ch_samplesheet = Channel .fromSamplesheet("input") - // .map {t - // validateInputSamplesheet(it) - // } + .map { + validateInputSamplesheet(it) + } // Prepare FASTQs channel and separate short and long reads and prepare ch_raw_short_reads = ch_samplesheet .map { meta, sr1, sr2, lr -> meta.run = meta.run == null ? "0" : meta.run meta.single_end = params.single_end - if (params.single_end) - return [ meta, [ sr1] ] - else + + if (params.single_end) { + return [ meta, [ sr1 ] ] + } else { return [ meta, [ sr1, sr2 ] ] + } } ch_raw_long_reads = ch_samplesheet @@ -203,8 +207,12 @@ def validateInputParameters() { // // Validate channels from input samplesheet // -def validateInputSamplesheet(input) { +def validateInputSamplesheet(meta, sr1, sr2, lr) { + + if ( !sr2 && !params.single_end ) { error("[nf-core/mag] ERROR: Single-end data must be executed with `--single_end`. Note that it is not possible to mix single- and paired-end data in one run! Check input TSV for sample: ${meta.id}") } + if ( sr2 && params.single_end ) { error("[nf-core/mag] ERROR: Paired-end data must be executed without `--single_end`. Note that it is not possible to mix single- and paired-end data in one run! Check input TSV for sample: ${meta.id}") } + return [meta, sr1, sr2, lr] } // From 4d4ba13c2f4c7fc8de3da1d9eb7e7f2dc60517f9 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 19 Apr 2024 15:01:58 +0200 Subject: [PATCH 10/21] Move all input parameter code to utils_nfcore_mag subworkflow --- nextflow_schema.json | 11 +- .../local/utils_nfcore_mag_pipeline/main.nf | 134 +++++++++++++++++- 2 files changed, 134 insertions(+), 11 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1a0b75d1..9510589c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -535,7 +535,7 @@ }, "gtdbtk_min_completeness": { "type": "number", - "default": 50.0, + "default": 50, "description": "Min. bin completeness (in %) required to apply GTDB-tk classification.", "help_text": "Completeness assessed with BUSCO analysis (100% - %Missing). Must be greater than 0 (min. 0.01) to avoid GTDB-tk errors. If too low, GTDB-tk classification results can be impaired due to not enough marker genes!", "minimum": 0.01, @@ -543,7 +543,7 @@ }, "gtdbtk_max_contamination": { "type": "number", - "default": 10.0, + "default": 10, "description": "Max. bin contamination (in %) allowed to apply GTDB-tk classification.", "help_text": "Contamination approximated based on BUSCO analysis (%Complete and duplicated). If too high, GTDB-tk classification results can be impaired due to contamination!", "minimum": 0, @@ -551,7 +551,7 @@ }, "gtdbtk_min_perc_aa": { "type": "number", - "default": 10.0, + "default": 10, "description": "Min. fraction of AA (in %) in the MSA for bins to be kept.", "minimum": 0, "maximum": 100 @@ -565,7 +565,7 @@ }, "gtdbtk_pplacer_cpus": { "type": "number", - "default": 1.0, + "default": 1, "description": "Number of CPUs used for the by GTDB-Tk run tool pplacer.", "help_text": "A low number of CPUs helps to reduce the memory required/reported by GTDB-Tk. See also the [GTDB-Tk documentation](https://ecogenomics.github.io/GTDBTk/faq.html#gtdb-tk-reaches-the-memory-limit-pplacer-crashes)." }, @@ -685,7 +685,8 @@ "type": "string", "default": "group", "description": "Defines mapping strategy to compute co-abundances for binning, i.e. which samples will be mapped against the assembly.", - "help_text": "Available: `all`, `group` or `own`. Note that `own` cannot be specified in combination with `--coassemble_group`.\n\nNote that specifying `all` without additionally specifying `--coassemble_group` results in `n^2` mapping processes for each assembly method, where `n` is the number of samples." + "help_text": "Available: `all`, `group` or `own`. Note that `own` cannot be specified in combination with `--coassemble_group`.\n\nNote that specifying `all` without additionally specifying `--coassemble_group` results in `n^2` mapping processes for each assembly method, where `n` is the number of samples.", + "enum": ["all", "group", "own"] }, "skip_binning": { "type": "boolean", diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index e7d1e66f..17590943 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -72,12 +72,10 @@ workflow PIPELINE_INITIALISATION { UTILS_NFCORE_PIPELINE ( nextflow_cli_args ) - // - // Custom validation for pipeline parameters - // - validateInputParameters( - ) + // Note: normally validateInputParameters() goes here, but + // as we need to use information from samplesheet from the input channel + // moved it below // // Create channels from input file provided through params.input and params.assembly_input @@ -111,6 +109,9 @@ workflow PIPELINE_INITIALISATION { } } + // Check already if long reads are provided + def hybrid =false + ch_raw_long_reads.map{if (it) hybrid = true} if (params.assembly_input) { @@ -147,6 +148,12 @@ workflow PIPELINE_INITIALISATION { ch_input_assemblies = Channel.empty() } + // + // Custom validation for pipeline parameters + // + validateInputParameters( + hybrid + ) emit: raw_short_reads = ch_raw_short_reads @@ -200,8 +207,123 @@ workflow PIPELINE_COMPLETION { // // Check and validate pipeline parameters // -def validateInputParameters() { +def validateInputParameters(hybrid) { genomeExistsError() + + // Check if binning mapping mode is valid + if (params.coassemble_group && params.binning_map_mode == 'own') { + error("Invalid combination of parameter '--binning_map_mode own' and parameter '--coassemble_group'. Select either 'all' or 'group' mapping mode when performing group-wise co-assembly.") + } + if (params.ancient_dna && params.binning_map_mode != 'own') { + error("Invalid combination of parameter '--binning_map_mode' and parameter '--ancient_dna'. Ancient DNA mode can only be executed with --binning_map_mode own. You supplied: --binning_map_mode ${params.binning_map_mode}") + } + + // Check if specified cpus for SPAdes are available + if ( params.spades_fix_cpus > params.max_cpus ) { + error("Invalid parameter '--spades_fix_cpus ${params.spades_fix_cpus}', max cpus are '${params.max_cpus}'.") + } + if ( params.spadeshybrid_fix_cpus > params.max_cpus ) { + error("Invalid parameter '--spadeshybrid_fix_cpus ${params.spadeshybrid_fix_cpus}', max cpus are '${params.max_cpus}'.") + } + // Check if settings concerning reproducibility of used tools are consistent and print warning if not + if (params.megahit_fix_cpu_1 || params.spades_fix_cpus != -1 || params.spadeshybrid_fix_cpus != -1) { + if (!params.skip_spades && params.spades_fix_cpus == -1) { + log.warn "At least one assembly process is run with a parameter to ensure reproducible results, but SPAdes not. Consider using the parameter '--spades_fix_cpus'." + } + if (hybrid && params.skip_spadeshybrid && params.spadeshybrid_fix_cpus == -1) { + log.warn "At least one assembly process is run with a parameter to ensure reproducible results, but SPAdes hybrid not. Consider using the parameter '--spadeshybrid_fix_cpus'." + } + if (!params.skip_megahit && !params.megahit_fix_cpu_1) { + log.warn "At least one assembly process is run with a parameter to ensure reproducible results, but MEGAHIT not. Consider using the parameter '--megahit_fix_cpu_1'." + } + if (!params.skip_binning && params.metabat_rng_seed == 0) { + log.warn "At least one assembly process is run with a parameter to ensure reproducible results, but for MetaBAT2 a random seed is specified ('--metabat_rng_seed 0'). Consider specifying a positive seed instead." + } + } + + // Check if SPAdes and single_end + if ( (!params.skip_spades || !params.skip_spadeshybrid) && params.single_end) { + log.warn 'metaSPAdes does not support single-end data. SPAdes will be skipped.' + } + + // Check if parameters for host contamination removal are valid + if ( params.host_fasta && params.host_genome) { + error('Both host fasta reference and iGenomes genome are specified to remove host contamination! Invalid combination, please specify either --host_fasta or --host_genome.') + } + if ( hybrid && (params.host_fasta || params.host_genome) ) { + log.warn 'Host read removal is only applied to short reads. Long reads might be filtered indirectly by Filtlong, which is set to use read qualities estimated based on k-mer matches to the short, already filtered reads.' + if ( params.longreads_length_weight > 1 ) { + log.warn "The parameter --longreads_length_weight is ${params.longreads_length_weight}, causing the read length being more important for long read filtering than the read quality. Set --longreads_length_weight to 1 in order to assign equal weights." + } + } + if ( params.host_genome ) { + if (!params.genomes) { + error('No config file containing genomes provided!') + } + // Check if host genome exists in the config file + if (!params.genomes.containsKey(params.host_genome)) { + error('=============================================================================\n' + + " Host genome '${params.host_genome}' not found in any config files provided to the pipeline.\n" + + ' Currently, the available genome keys are:\n' + + " ${params.genomes.keySet().join(', ')}\n" + + '===================================================================================') + } + if ( !params.genomes[params.host_genome].fasta ) { + error("No fasta file specified for the host genome ${params.host_genome}!") + } + if ( !params.genomes[params.host_genome].bowtie2 ) { + error("No Bowtie 2 index file specified for the host genome ${params.host_genome}!") + } + } + + // Check MetaBAT2 inputs + if ( !params.skip_metabat2 && params.min_contig_size < 1500 ) { + log.warn "Specified min. contig size under minimum for MetaBAT2. MetaBAT2 will be run with 1500 (other binners not affected). You supplied: --min_contig_size ${params.min_contig_size}" + } + + // Check more than one binner is run for bin refinement (required DAS by Tool) + // If the number of run binners (i.e., number of not-skipped) is more than one, otherwise throw an error + if ( params.refine_bins_dastool && !([ params.skip_metabat2, params.skip_maxbin2, params.skip_concoct ].count(false) > 1) ) { + error('Bin refinement with --refine_bins_dastool requires at least two binners to be running (not skipped). Check input.') + } + + // Check that bin refinement is actually turned on if any of the refined bins are requested for downstream + if (!params.refine_bins_dastool && params.postbinning_input != 'raw_bins_only') { + error("The parameter '--postbinning_input ${ params.postbinning_input }' for downstream steps can only be specified if bin refinement is activated with --refine_bins_dastool! Check input.") + } + + // Check if BUSCO parameters combinations are valid + if (params.skip_binqc && params.binqc_tool == 'checkm') { + error('Both --skip_binqc and --binqc_tool \'checkm\' are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool.') + } + if (params.skip_binqc) { + if (params.busco_db) { + error('Both --skip_binqc and --busco_db are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool \'busco\' with --busco_db.') + } + if (params.busco_auto_lineage_prok) { + error('Both --skip_binqc and --busco_auto_lineage_prok are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool \'busco\' with --busco_auto_lineage_prok.') + } + } + + if (params.skip_binqc && !params.skip_gtdbtk) { + log.warn '--skip_binqc is specified, but --skip_gtdbtk is explictly set to run! GTDB-tk will be omitted because GTDB-tk bin classification requires bin filtering based on BUSCO or CheckM QC results to avoid GTDB-tk errors.' + } + + // Check if CAT parameters are valid + if (params.cat_db && params.cat_db_generate) { + error('Invalid combination of parameters --cat_db and --cat_db_generate is specified! Please specify either --cat_db or --cat_db_generate.') + } + if (params.save_cat_db && !params.cat_db_generate) { + error('Invalid parameter combination: parameter --save_cat_db specified, but not --cat_db_generate! Note also that the parameter --save_cat_db does not work in combination with --cat_db.') + } + + // Chech MetaEuk db paramaters + if (params.metaeuk_mmseqs_db && params.metaeuk_db) { + error('Invalid parameter combination: both --metaeuk_mmseqs_db and --metaeuk_db are specified! Please specify either --metaeuk_mmseqs_db or --metaeuk_db.') + } + if (params.save_mmseqs_db && !params.metaeuk_mmseqs_db) { + error('Invalid parameter combination: --save_mmseqs_db supplied but no database has been requested for download with --metaeuk_mmseqs_db!') + } } // From 672f59880aeea596d72b920f05b98bfe7ddba466 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 19 Apr 2024 16:18:11 +0200 Subject: [PATCH 11/21] Complete validation transfer --- assets/schema_assembly_input.json | 24 ++-- assets/schema_input.json | 2 +- docs/usage.md | 27 +--- nextflow.config | 4 +- nextflow_schema.json | 4 +- .../local/utils_nfcore_mag_pipeline/main.nf | 136 ++++++++++-------- workflows/mag.nf | 11 -- 7 files changed, 90 insertions(+), 118 deletions(-) diff --git a/assets/schema_assembly_input.json b/assets/schema_assembly_input.json index fbef38b8..c6712717 100644 --- a/assets/schema_assembly_input.json +++ b/assets/schema_assembly_input.json @@ -17,31 +17,23 @@ "type": "string", "pattern": "^\\S+$", "meta": ["group"], - "errorMessage": "Column 'group' contains an empty field. Either remove column 'run' or fill each field with a value." + "errorMessage": "Column 'group' contains an empty field. Either remove column 'group' or fill each field with a value." }, "assembler": { "type": "string", - "pattern": "MEGAHIT|SPAdes", + "pattern": "MEGAHIT|SPAdes|SPAdesHybrid", "meta": ["assembler"], - "errorMessage": "Column 'group' contains an empty field. Either remove column 'run' or fill each field with a value." + "errorMessage": "Only MEGAHIT or SPAdes assemblies are supported" }, - "short_reads_2": { + "fasta": { "type": "string", "format": "file-path", + "pattern": "^\\S+\\.(fasta|fas|fa|fna)(\\.gz)?$", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "dependentRequired": ["short_reads_1"], - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" - }, - "long_reads": { - "type": "string", - "format": "file-path", - "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "dependentRequired": ["short_reads_1", "short_reads_2"], - "errorMessage": "FastQ file for long reads cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "unique": true, + "errorMessage": "FastA file with pre-assembled contigs must be provided, cannot contain spaces and must have extension 'fasta', 'fas', 'fa', or 'fna', all optionally gzipped." } }, - "required": ["sample", "short_reads_1"] + "required": ["id", "assembler", "fasta"] } } diff --git a/assets/schema_input.json b/assets/schema_input.json index ddfa02f6..b8177803 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -24,7 +24,7 @@ "type": "string", "pattern": "^\\S+$", "meta": ["group"], - "errorMessage": "Column 'group' contains an empty field. Either remove column 'run' or fill each field with a value." + "errorMessage": "Column 'group' contains an empty field. Either remove column 'group' or fill each field with a value." }, "short_reads_1": { "type": "string", diff --git a/docs/usage.md b/docs/usage.md index 9927db47..2c5fea9d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,32 +6,11 @@ ## Input specifications -The input data can be passed to nf-core/mag in three possible ways, either using the `--input` or `--assembly_input` parameters. - -### Direct FASTQ input (short reads only) - -The easiest way is to specify directly the path (with wildcards) to your input FASTQ files. For example: - -```bash ---input 'path/to/data/sample_*_R{1,2}.fastq.gz' -``` - -This input method only works with short read data and will assign all files to the same group. By default, this group information is only used to compute co-abundances for the binning step, but not for group-wise co-assembly (see the parameter docs for [`--coassemble_group`](https://nf-co.re/mag/parameters#coassemble_group) and [`--binning_map_mode`](https://nf-co.re/mag/parameters#binning_map_mode) for more information about how this group information can be used). - -Please note the following additional requirements: - -- Files names must be unique -- Valid file extensions: `.fastq.gz`, `.fq.gz` (files must be compressed) -- The path must be enclosed in quotes -- The path must have at least one `*` wildcard character -- When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs -- To run single-end data you must additionally specify `--single_end` -- If left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz` -- Sample name and run combinations must be unique +The input data can be passed to nf-core/mag in two possible ways, either using the `--input` parameter of raw-reads alone or `--input` additionally with `--assembly_input` that specifies pre-built assemblies. ### Samplesheet input file -Alternatively, to assign different groups or to include long reads for hybrid assembly with metaSPAdes, you can specify a CSV samplesheet input file that contains the paths to your FASTQ files and additional metadata. Furthermore when a `run` column is present, the pipeline will also run perform run- or lane-wise concatenation, for cases where you may have a sample or library sequenced with the same sequencing configuration across multiple runs. The optional run merging happens after short read QC (adapter clipping, host/PhiX removal etc.), and prior to normalisation, taxonomic profiling, and assembly. +You can specify a CSV samplesheet input file that contains the paths to your FASTQ files and additional metadata. Furthermore when a `run` column is present, the pipeline will also run perform run- or lane-wise concatenation, for cases where you may have a sample or library sequenced with the same sequencing configuration across multiple runs. The optional run merging happens after short read QC (adapter clipping, host/PhiX removal etc.), and prior to normalisation, taxonomic profiling, and assembly. At a minimum CSV file should contain the following columns: @@ -66,7 +45,7 @@ sample3,1,0,data/sample3_R1.fastq.gz,data/sample3_R2.fastq.gz, Please note the following requirements: -- a minimum 5 of comma-seperated columns +- a minimum 5 of comma-separated columns - Valid file extension: `.csv` - Must contain the header `sample,group,short_reads_1,short_reads_2,long_reads` (where `run` can be optionally added) - Run IDs must be unique within a multi-run sample. A sample with multiple runs will be automatically concatenated. diff --git a/nextflow.config b/nextflow.config index 52a06a7e..be58b311 100644 --- a/nextflow.config +++ b/nextflow.config @@ -169,6 +169,8 @@ params { help = false version = false + monochromeLogs = null // TODO remove once nf-validation removes the bug + // Config options config_profile_name = null config_profile_description = null @@ -186,7 +188,7 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validationSchemaIgnoreParams = 'genome,genomes,igenomes_base' + validationSchemaIgnoreParams = 'genome,genomes,igenomes_base,monochromeLogs' validationShowHiddenParams = false validate_params = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 9510589c..5332238a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,8 +19,8 @@ "exists": true, "schema": "assets/schema_input.json", "pattern": "^\\S+\\.csv$", - "description": "Input FastQ files (gzip compressed) or CSV samplesheet file containing information about the samples in the experiment.", - "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq.gz'\n``` \n\nAlternatively, to assign different groups or to include long reads for hybrid assembly with metaSPAdes, you can specify a CSV samplesheet input file with 5 columns and the following header: sample,group,short_reads_1,short_reads_2,long_reads. See [usage docs](https://nf-co.re/mag/usage#input-specifications).", + "description": "CSV samplesheet file containing information about the samples in the experiment.", + "help_text": "Use this to specify the location of your input FastQ files and their associated metadata. You can also use the CSV file to assign different groups or to include long reads for hybrid assembly with metaSPAdes. The CSV file must have at least 5 columns and the following header: sample,group,short_reads_1,short_reads_2,long_reads. See [usage docs](https://nf-co.re/mag/usage#input-specifications).", "fa_icon": "fas fa-file-csv" }, "single_end": { diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index 17590943..41c0143e 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -109,56 +109,61 @@ workflow PIPELINE_INITIALISATION { } } - // Check already if long reads are provided + // Check already if long reads are provided, for later parameter validation def hybrid =false ch_raw_long_reads.map{if (it) hybrid = true} - if (params.assembly_input) { + // + // Custom validation for pipeline parameters + // + validateInputParameters( + hybrid + ) - ch_input_assembly_rows = Channel + // Validate PRE-ASSEMBLED CONTIG input when supplied + if (params.assembly_input) { + ch_input_assemblies = Channel .fromSamplesheet("assembly_input") - .from(file(params.assembly_input)) - .splitCsv(header: true) - .map { row -> - if (row.size() == 4) { - def id = row.id - def group = row.group - def assembler = row.assembler ?: false - def assembly = row.fasta ? file(row.fasta, checkIfExists: true) : false - // Check if given combination is valid - if (!assembly) exit 1, "Invalid input assembly samplesheet: fasta can not be empty." - if (!assembler) exit 1, "Invalid input assembly samplesheet: assembler can not be empty." - return [ id, group, assembler, assembly ] - } else { - exit 1, "Input samplesheet contains row with ${row.size()} column(s). Expects 3." - } - } + } - // build meta map - ch_input_assemblies = ch_input_assembly_rows - .map { id, group, assembler, fasta -> - def meta = [:] - meta.id = params.coassemble_group? "group-$group" : id - meta.group = group - meta.assembler = assembler - return [ meta, [ fasta ] ] + // Prepare ASSEMBLY input channel + if (params.assembly_input) { + ch_input_assemblies + .map { meta, fasta -> + return [ meta + [id: params.coassemble_group ? "group-${meta.group}" : meta.id], [ fasta ] ] } } else { - ch_input_assembly_rows = Channel.empty() ch_input_assemblies = Channel.empty() } - // - // Custom validation for pipeline parameters - // - validateInputParameters( - hybrid - ) + // Cross validation of input assembly and read IDs: ensure groups are all represented between reads and assemblies + if (params.assembly_input) { + ch_read_ids = ch_samplesheet + .map { meta, sr1, sr2, lr -> params.coassemble_group ? meta.group : meta.id } + .unique() + .toList() + .sort() + + ch_assembly_ids = ch_input_assemblies + .map { meta, fasta -> params.coassemble_group ? meta.group : meta.id } + .unique() + .toList() + .sort() + + ch_read_ids.cross(ch_assembly_ids) + .map { ids1, ids2 -> + if (ids1.sort() != ids2.sort()) { + exit 1, "[nf-core/mag] ERROR: supplied IDs or Groups in read and assembly CSV files do not match!" + } + } + } + + emit: raw_short_reads = ch_raw_short_reads raw_long_reads = ch_raw_long_reads - input_assemblies = [] // ch_input_assemblies + input_assemblies = ch_input_assemblies versions = ch_versions } @@ -212,53 +217,50 @@ def validateInputParameters(hybrid) { // Check if binning mapping mode is valid if (params.coassemble_group && params.binning_map_mode == 'own') { - error("Invalid combination of parameter '--binning_map_mode own' and parameter '--coassemble_group'. Select either 'all' or 'group' mapping mode when performing group-wise co-assembly.") - } - if (params.ancient_dna && params.binning_map_mode != 'own') { - error("Invalid combination of parameter '--binning_map_mode' and parameter '--ancient_dna'. Ancient DNA mode can only be executed with --binning_map_mode own. You supplied: --binning_map_mode ${params.binning_map_mode}") + error("[nf-core/mag] ERROR: Invalid combination of parameter '--binning_map_mode own' and parameter '--coassemble_group'. Select either 'all' or 'group' mapping mode when performing group-wise co-assembly.") } // Check if specified cpus for SPAdes are available if ( params.spades_fix_cpus > params.max_cpus ) { - error("Invalid parameter '--spades_fix_cpus ${params.spades_fix_cpus}', max cpus are '${params.max_cpus}'.") + error("[nf-core/mag] ERROR: Invalid parameter '--spades_fix_cpus ${params.spades_fix_cpus}', max cpus are '${params.max_cpus}'.") } if ( params.spadeshybrid_fix_cpus > params.max_cpus ) { - error("Invalid parameter '--spadeshybrid_fix_cpus ${params.spadeshybrid_fix_cpus}', max cpus are '${params.max_cpus}'.") + error("[nf-core/mag] ERROR: Invalid parameter '--spadeshybrid_fix_cpus ${params.spadeshybrid_fix_cpus}', max cpus are '${params.max_cpus}'.") } // Check if settings concerning reproducibility of used tools are consistent and print warning if not if (params.megahit_fix_cpu_1 || params.spades_fix_cpus != -1 || params.spadeshybrid_fix_cpus != -1) { if (!params.skip_spades && params.spades_fix_cpus == -1) { - log.warn "At least one assembly process is run with a parameter to ensure reproducible results, but SPAdes not. Consider using the parameter '--spades_fix_cpus'." + log.warn "[nf-core/mag]: At least one assembly process is run with a parameter to ensure reproducible results, but SPAdes not. Consider using the parameter '--spades_fix_cpus'." } if (hybrid && params.skip_spadeshybrid && params.spadeshybrid_fix_cpus == -1) { - log.warn "At least one assembly process is run with a parameter to ensure reproducible results, but SPAdes hybrid not. Consider using the parameter '--spadeshybrid_fix_cpus'." + log.warn "[nf-core/mag]: At least one assembly process is run with a parameter to ensure reproducible results, but SPAdes hybrid not. Consider using the parameter '--spadeshybrid_fix_cpus'." } if (!params.skip_megahit && !params.megahit_fix_cpu_1) { - log.warn "At least one assembly process is run with a parameter to ensure reproducible results, but MEGAHIT not. Consider using the parameter '--megahit_fix_cpu_1'." + log.warn "[nf-core/mag]: At least one assembly process is run with a parameter to ensure reproducible results, but MEGAHIT not. Consider using the parameter '--megahit_fix_cpu_1'." } if (!params.skip_binning && params.metabat_rng_seed == 0) { - log.warn "At least one assembly process is run with a parameter to ensure reproducible results, but for MetaBAT2 a random seed is specified ('--metabat_rng_seed 0'). Consider specifying a positive seed instead." + log.warn "[nf-core/mag]: At least one assembly process is run with a parameter to ensure reproducible results, but for MetaBAT2 a random seed is specified ('--metabat_rng_seed 0'). Consider specifying a positive seed instead." } } // Check if SPAdes and single_end if ( (!params.skip_spades || !params.skip_spadeshybrid) && params.single_end) { - log.warn 'metaSPAdes does not support single-end data. SPAdes will be skipped.' + log.warn '[nf-core/mag]: metaSPAdes does not support single-end data. SPAdes will be skipped.' } // Check if parameters for host contamination removal are valid if ( params.host_fasta && params.host_genome) { - error('Both host fasta reference and iGenomes genome are specified to remove host contamination! Invalid combination, please specify either --host_fasta or --host_genome.') + error('[nf-core/mag] ERROR: Both host fasta reference and iGenomes genome are specified to remove host contamination! Invalid combination, please specify either --host_fasta or --host_genome.') } if ( hybrid && (params.host_fasta || params.host_genome) ) { - log.warn 'Host read removal is only applied to short reads. Long reads might be filtered indirectly by Filtlong, which is set to use read qualities estimated based on k-mer matches to the short, already filtered reads.' + log.warn '[nf-core/mag]: Host read removal is only applied to short reads. Long reads might be filtered indirectly by Filtlong, which is set to use read qualities estimated based on k-mer matches to the short, already filtered reads.' if ( params.longreads_length_weight > 1 ) { - log.warn "The parameter --longreads_length_weight is ${params.longreads_length_weight}, causing the read length being more important for long read filtering than the read quality. Set --longreads_length_weight to 1 in order to assign equal weights." + log.warn "[nf-core/mag]: The parameter --longreads_length_weight is ${params.longreads_length_weight}, causing the read length being more important for long read filtering than the read quality. Set --longreads_length_weight to 1 in order to assign equal weights." } } if ( params.host_genome ) { if (!params.genomes) { - error('No config file containing genomes provided!') + error('[nf-core/mag] ERROR: No config file containing genomes provided!') } // Check if host genome exists in the config file if (!params.genomes.containsKey(params.host_genome)) { @@ -269,60 +271,60 @@ def validateInputParameters(hybrid) { '===================================================================================') } if ( !params.genomes[params.host_genome].fasta ) { - error("No fasta file specified for the host genome ${params.host_genome}!") + error("[nf-core/mag] ERROR: No fasta file specified for the host genome ${params.host_genome}!") } if ( !params.genomes[params.host_genome].bowtie2 ) { - error("No Bowtie 2 index file specified for the host genome ${params.host_genome}!") + error("[nf-core/mag] ERROR: No Bowtie 2 index file specified for the host genome ${params.host_genome}!") } } // Check MetaBAT2 inputs if ( !params.skip_metabat2 && params.min_contig_size < 1500 ) { - log.warn "Specified min. contig size under minimum for MetaBAT2. MetaBAT2 will be run with 1500 (other binners not affected). You supplied: --min_contig_size ${params.min_contig_size}" + log.warn "[nf-core/mag]: Specified min. contig size under minimum for MetaBAT2. MetaBAT2 will be run with 1500 (other binners not affected). You supplied: --min_contig_size ${params.min_contig_size}" } // Check more than one binner is run for bin refinement (required DAS by Tool) // If the number of run binners (i.e., number of not-skipped) is more than one, otherwise throw an error if ( params.refine_bins_dastool && !([ params.skip_metabat2, params.skip_maxbin2, params.skip_concoct ].count(false) > 1) ) { - error('Bin refinement with --refine_bins_dastool requires at least two binners to be running (not skipped). Check input.') + error('[nf-core/mag] ERROR: Bin refinement with --refine_bins_dastool requires at least two binners to be running (not skipped). Check input.') } // Check that bin refinement is actually turned on if any of the refined bins are requested for downstream if (!params.refine_bins_dastool && params.postbinning_input != 'raw_bins_only') { - error("The parameter '--postbinning_input ${ params.postbinning_input }' for downstream steps can only be specified if bin refinement is activated with --refine_bins_dastool! Check input.") + error("[nf-core/mag] ERROR: The parameter '--postbinning_input ${ params.postbinning_input }' for downstream steps can only be specified if bin refinement is activated with --refine_bins_dastool! Check input.") } // Check if BUSCO parameters combinations are valid if (params.skip_binqc && params.binqc_tool == 'checkm') { - error('Both --skip_binqc and --binqc_tool \'checkm\' are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool.') + error('[nf-core/mag] ERROR: Both --skip_binqc and --binqc_tool \'checkm\' are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool.') } if (params.skip_binqc) { if (params.busco_db) { - error('Both --skip_binqc and --busco_db are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool \'busco\' with --busco_db.') + error('[nf-core/mag] ERROR: Both --skip_binqc and --busco_db are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool \'busco\' with --busco_db.') } if (params.busco_auto_lineage_prok) { - error('Both --skip_binqc and --busco_auto_lineage_prok are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool \'busco\' with --busco_auto_lineage_prok.') + error('[nf-core/mag] ERROR: Both --skip_binqc and --busco_auto_lineage_prok are specified! Invalid combination, please specify either --skip_binqc or --binqc_tool \'busco\' with --busco_auto_lineage_prok.') } } if (params.skip_binqc && !params.skip_gtdbtk) { - log.warn '--skip_binqc is specified, but --skip_gtdbtk is explictly set to run! GTDB-tk will be omitted because GTDB-tk bin classification requires bin filtering based on BUSCO or CheckM QC results to avoid GTDB-tk errors.' + log.warn '[nf-core/mag]: --skip_binqc is specified, but --skip_gtdbtk is explictly set to run! GTDB-tk will be omitted because GTDB-tk bin classification requires bin filtering based on BUSCO or CheckM QC results to avoid GTDB-tk errors.' } // Check if CAT parameters are valid if (params.cat_db && params.cat_db_generate) { - error('Invalid combination of parameters --cat_db and --cat_db_generate is specified! Please specify either --cat_db or --cat_db_generate.') + error('[nf-core/mag] ERROR: Invalid combination of parameters --cat_db and --cat_db_generate is specified! Please specify either --cat_db or --cat_db_generate.') } if (params.save_cat_db && !params.cat_db_generate) { - error('Invalid parameter combination: parameter --save_cat_db specified, but not --cat_db_generate! Note also that the parameter --save_cat_db does not work in combination with --cat_db.') + error('[nf-core/mag] ERROR: Invalid parameter combination: parameter --save_cat_db specified, but not --cat_db_generate! Note also that the parameter --save_cat_db does not work in combination with --cat_db.') } // Chech MetaEuk db paramaters if (params.metaeuk_mmseqs_db && params.metaeuk_db) { - error('Invalid parameter combination: both --metaeuk_mmseqs_db and --metaeuk_db are specified! Please specify either --metaeuk_mmseqs_db or --metaeuk_db.') + error('[nf-core/mag] ERROR: Invalid parameter combination: both --metaeuk_mmseqs_db and --metaeuk_db are specified! Please specify either --metaeuk_mmseqs_db or --metaeuk_db.') } if (params.save_mmseqs_db && !params.metaeuk_mmseqs_db) { - error('Invalid parameter combination: --save_mmseqs_db supplied but no database has been requested for download with --metaeuk_mmseqs_db!') + error('[nf-core/mag] ERROR: Invalid parameter combination: --save_mmseqs_db supplied but no database has been requested for download with --metaeuk_mmseqs_db!') } } @@ -337,6 +339,14 @@ def validateInputSamplesheet(meta, sr1, sr2, lr) { return [meta, sr1, sr2, lr] } +def validateInputSamplesheet(meta, fasta) { + + if ( !sr2 && !params.single_end ) { error("[nf-core/mag] ERROR: Single-end data must be executed with `--single_end`. Note that it is not possible to mix single- and paired-end data in one run! Check input TSV for sample: ${meta.id}") } + if ( sr2 && params.single_end ) { error("[nf-core/mag] ERROR: Paired-end data must be executed without `--single_end`. Note that it is not possible to mix single- and paired-end data in one run! Check input TSV for sample: ${meta.id}") } + + return [meta, sr1, sr2, lr] +} + // // Get attribute from genome config file e.g. fasta // diff --git a/workflows/mag.nf b/workflows/mag.nf index b922d2e4..bee983d0 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -10,17 +10,6 @@ include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pi include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mag_pipeline' -// Check already if long reads are provided -def hybrid = false -if(file(params.input).extension == 'csv'){ - Channel - .from(file(params.input)) - .splitCsv(header: true) - .map { row -> - if (row.long_reads) hybrid = true - } -} - // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // From 6300614b97e80c51ac7fa9faf1161c6c38cce05a Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 19 Apr 2024 16:22:55 +0200 Subject: [PATCH 12/21] update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bb58625..8958832e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#599](https://github.com/nf-core/mag/pull/599) Update to nf-core v2.13.1 `TEMPLATE` (by @jfy133) + ### `Fixed` - [#606](https://github.com/nf-core/mag/pull/606) - Prevent pipeline crash when premade mashdb given to or no alignments found with GTDB-TK_CLASSIFYWF (reported by @cedwardson4, fix by @jfy133) @@ -17,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Deprecated` +- [#599](https://github.com/nf-core/mag/pull/599) Direct 'fastq' input (`--input 'sample_{R1,R2}.fastq.gz'`) is no longer supported, all input must come via samplesheets (by @jfy133) + ## 2.5.4 - [2024-02-12] ### `Added` From 96e3937f938a6c63bc760835e7a86c2ea3745642 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 19 Apr 2024 16:24:44 +0200 Subject: [PATCH 13/21] Update nextflow_schema.json --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5332238a..28dd6121 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -37,7 +37,7 @@ "schema": "assets/schema_assembly_input.json", "pattern": "^\\S+\\.csv$", "description": "Additional input CSV samplesheet containing information about pre-computed assemblies. When set, both read pre-processing and assembly are skipped and the pipeline begins at the binning stage.", - "help_text": "If you have pre-computed assemblies from another source, it is possible to jump straight to the binning stage of the pipeline by supplying these assemblies in a CSV file. This CSV file should have four columns and the following header: `id,group,assembler,fasta`. Short reads must still be supplied in to `--input` in CSV format. See [usage docs](https://nf-co.re/mag/usage#input-specifications) for further details.", + "help_text": "If you have pre-computed assemblies from another source, it is possible to jump straight to the binning stage of the pipeline by supplying these assemblies in a CSV file. This CSV file should have at minimum three columns and the following header: `id,group,assembler,fasta` (group is only required when `--coassemble_group). Short reads must still be supplied in to `--input` in CSV format. See [usage docs](https://nf-co.re/mag/usage#input-specifications) for further details.", "fa_icon": "fas fa-file-csv" }, "outdir": { From cf23136b37969b657de250d771ecea17b22b6694 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 19 Apr 2024 16:25:18 +0200 Subject: [PATCH 14/21] Apply suggestions from code review --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8958832e..15f5395c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Deprecated` -- [#599](https://github.com/nf-core/mag/pull/599) Direct 'fastq' input (`--input 'sample_{R1,R2}.fastq.gz'`) is no longer supported, all input must come via samplesheets (by @jfy133) +- [#599](https://github.com/nf-core/mag/pull/599) Direct reads input (`--input 'sample_{R1,R2}.fastq.gz'`) is no longer supported, all input must come via samplesheets (by @jfy133) ## 2.5.4 - [2024-02-12] From b83feda1c9d4a3efc218a350c35dd7f607667ad6 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 19 Apr 2024 16:38:06 +0200 Subject: [PATCH 15/21] Update nextflow_schema.json --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 28dd6121..66d104de 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -20,7 +20,7 @@ "schema": "assets/schema_input.json", "pattern": "^\\S+\\.csv$", "description": "CSV samplesheet file containing information about the samples in the experiment.", - "help_text": "Use this to specify the location of your input FastQ files and their associated metadata. You can also use the CSV file to assign different groups or to include long reads for hybrid assembly with metaSPAdes. The CSV file must have at least 5 columns and the following header: sample,group,short_reads_1,short_reads_2,long_reads. See [usage docs](https://nf-co.re/mag/usage#input-specifications).", + "help_text": "Use this to specify the location of your input FastQ files and their associated metadata. You can also use the CSV file to assign different groups or to include long reads for hybrid assembly with metaSPAdes. The CSV file must have at least two columns (sample, short_reads1) and with a maximum CSV sheet having the headers: sample,run,group,short_reads_1,short_reads_2,long_reads. See [usage docs](https://nf-co.re/mag/usage#input-specifications).", "fa_icon": "fas fa-file-csv" }, "single_end": { From be05dbb266386613271ad55b00e40fcf10987c59 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 19 Apr 2024 16:44:53 +0200 Subject: [PATCH 16/21] Remove duplication --- subworkflows/local/utils_nfcore_mag_pipeline/main.nf | 8 -------- 1 file changed, 8 deletions(-) diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index 41c0143e..1fb5bcae 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -339,14 +339,6 @@ def validateInputSamplesheet(meta, sr1, sr2, lr) { return [meta, sr1, sr2, lr] } -def validateInputSamplesheet(meta, fasta) { - - if ( !sr2 && !params.single_end ) { error("[nf-core/mag] ERROR: Single-end data must be executed with `--single_end`. Note that it is not possible to mix single- and paired-end data in one run! Check input TSV for sample: ${meta.id}") } - if ( sr2 && params.single_end ) { error("[nf-core/mag] ERROR: Paired-end data must be executed without `--single_end`. Note that it is not possible to mix single- and paired-end data in one run! Check input TSV for sample: ${meta.id}") } - - return [meta, sr1, sr2, lr] -} - // // Get attribute from genome config file e.g. fasta // From 2462baffda5c43d70d90b44b4f6cb202cf782b46 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 24 Apr 2024 08:06:32 +0200 Subject: [PATCH 17/21] Fix validateInputSamplesheet usage --- subworkflows/local/utils_nfcore_mag_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index 1fb5bcae..2584fc77 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -85,7 +85,7 @@ workflow PIPELINE_INITIALISATION { ch_samplesheet = Channel .fromSamplesheet("input") .map { - validateInputSamplesheet(it) + validateInputSamplesheet(it[0], it[1], it[2], it[3]) } // Prepare FASTQs channel and separate short and long reads and prepare From 01758397cb26b2493d87903deb2b4bdb94731b7e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 6 May 2024 09:57:44 +0200 Subject: [PATCH 18/21] Fix zenodo ID --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a779d65a..a77b7a01 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![GitHub Actions CI Status](https://github.com/nf-core/mag/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/ci.yml) -[![GitHub Actions Linting Status](https://github.com/nf-core/mag/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions Linting Status](https://github.com/nf-core/mag/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3589527-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3589527) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)[![Cite Publication](https://img.shields.io/badge/Cite%20Us!-Cite%20Publication-orange)](https://doi.org/10.1093/nargab/lqac007) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) From 5dac597e7d22d97826f84d76232260479fabaa62 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 6 May 2024 09:58:03 +0200 Subject: [PATCH 19/21] Apply suggestions from code review Co-authored-by: Maxime U Garcia --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a779d65a..a77b7a01 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![GitHub Actions CI Status](https://github.com/nf-core/mag/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/ci.yml) -[![GitHub Actions Linting Status](https://github.com/nf-core/mag/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions Linting Status](https://github.com/nf-core/mag/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/mag/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/mag/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3589527-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3589527) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)[![Cite Publication](https://img.shields.io/badge/Cite%20Us!-Cite%20Publication-orange)](https://doi.org/10.1093/nargab/lqac007) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) From d89e9a494c3bb7ef753529537b3c70c8bb457204 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 6 May 2024 10:02:18 +0200 Subject: [PATCH 20/21] Fix linting error --- modules.json | 11 +- .../dumpsoftwareversions/environment.yml | 7 ++ .../custom/dumpsoftwareversions/main.nf | 24 +++++ .../custom/dumpsoftwareversions/meta.yml | 37 +++++++ .../templates/dumpsoftwareversions.py | 101 ++++++++++++++++++ .../dumpsoftwareversions/tests/main.nf.test | 43 ++++++++ .../tests/main.nf.test.snap | 33 ++++++ .../dumpsoftwareversions/tests/tags.yml | 2 + .../nf-core/utils_nfcore_pipeline/main.nf | 8 +- 9 files changed, 255 insertions(+), 11 deletions(-) create mode 100644 modules/nf-core/custom/dumpsoftwareversions/environment.yml create mode 100644 modules/nf-core/custom/dumpsoftwareversions/main.nf create mode 100644 modules/nf-core/custom/dumpsoftwareversions/meta.yml create mode 100755 modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml diff --git a/modules.json b/modules.json index 2becb55f..484cbff6 100644 --- a/modules.json +++ b/modules.json @@ -208,15 +208,6 @@ } } }, - "subworkflows": { - "nf-core": { - "fasta_binning_concoct": { - "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["subworkflows"] - } - } - }, "subworkflows": { "nf-core": { "utils_nextflow_pipeline": { @@ -226,7 +217,7 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 00000000..9b3272bc --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf new file mode 100644 index 00000000..f2187611 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -0,0 +1,24 @@ +process CUSTOM_DUMPSOFTWAREVERSIONS { + label 'process_single' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 00000000..5f15a5fd --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,37 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline template +keywords: + - custom + - dump + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] +input: + - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" +output: + - yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 00000000..da033408 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + + +import yaml +import platform +from textwrap import dedent + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + +
    Process Name \\", - " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 00000000..b1e1630b --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 00000000..5f59a936 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 00000000..405aa24a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..14558c39 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) { // Citation string for pipeline // def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + - " ${workflow.manifest.doi}\n\n" + + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + From 41e8956945d713fd83852a91cef266da062b9bbc Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 6 May 2024 08:51:03 +0000 Subject: [PATCH 21/21] Fix gunzipping of pre-supplied assmeblies --- workflows/mag.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/mag.nf b/workflows/mag.nf index bee983d0..1a33f8cf 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -618,8 +618,9 @@ workflow MAG { } } else { ch_assemblies_split = ch_input_assemblies + .dump(tag: 'ch_input_assemblies') .branch { meta, assembly -> - gzipped: assembly[0].getExtension() == "gz" + gzipped: assembly.getExtension() == "gz" ungzip: true }
    Process Name \\", + " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow