Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: pluggable workflow #175

Merged
merged 8 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changes

1. Updated all tests to use snapshots instead.
2. Made the pipeline pluggable to enable the use of it in a meta pipeline.

## v1.5.1 - Great Geraardsbergen - [March 7 2024]

Expand Down
6 changes: 6 additions & 0 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,11 @@
}
},
"required": ["sample"]
},
"dependentRequired": {
"truth_tbi": ["truth_vcf"],
"truth_bed": ["truth_vcf"],
"tbi": ["gvcf"],
"crai": ["cram"]
}
}
146 changes: 139 additions & 7 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,93 @@ nextflow.enable.dsl = 2

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
GENOME PARAMETER VALUES
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { GERMLINE } from './workflows/germline'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_cmgg_germline_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_cmgg_germline_pipeline'

include { getGenomeAttribute } from './subworkflows/local/utils_cmgg_germline_pipeline'

params.fasta = getGenomeAttribute('fasta')
params.fai = getGenomeAttribute('fai')
params.dict = getGenomeAttribute('dict')
params.strtablefile = getGenomeAttribute('strtablefile')
params.sdf = getGenomeAttribute('sdf')
params.dbsnp = getGenomeAttribute('dbsnp')
params.dbsnp_tbi = getGenomeAttribute('dbsnp_tbi')
params.vep_cache = getGenomeAttribute('vep_cache')
params.dbnsfp = getGenomeAttribute('dbnsfp')
params.dbnsfp_tbi = getGenomeAttribute('dbnsfp_tbi')
params.spliceai_indel = getGenomeAttribute('spliceai_indel')
params.spliceai_indel_tbi = getGenomeAttribute('spliceai_indel_tbi')
params.spliceai_snv = getGenomeAttribute('spliceai_snv')
params.spliceai_snv_tbi = getGenomeAttribute('spliceai_snv_tbi')
params.mastermind = getGenomeAttribute('mastermind')
params.mastermind_tbi = getGenomeAttribute('mastermind_tbi')
params.eog = getGenomeAttribute('eog')
params.eog_tbi = getGenomeAttribute('eog_tbi')
params.alphamissense = getGenomeAttribute('alphamissense')
params.alphamissense_tbi = getGenomeAttribute('alphamissense_tbi')
params.vcfanno_resources = getGenomeAttribute('vcfanno_resources')
params.vcfanno_config = getGenomeAttribute('vcfanno_config')

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
GENOME PARAMETER VALUES
VALIDATE INPUTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

//
// Check for dependencies between parameters
//

if(params.dbsnp_tbi && !params.dbsnp){
error("Please specify the dbsnp VCF with --dbsnp VCF")
}

if (params.annotate) {
// Check if a genome is given
if (!params.genome) { error("A genome should be supplied for annotation (use --genome)") }

// Check if the VEP versions were given
if (!params.vep_version) { error("A VEP version should be supplied for annotation (use --vep_version)") }
if (!params.vep_cache_version) { error("A VEP cache version should be supplied for annotation (use --vep_cache_version)") }

// Check if a species is entered
if (!params.species) { error("A species should be supplied for annotation (use --species)") }

// Check if all vcfanno files are supplied when vcfanno should be used
if (params.vcfanno && (!params.vcfanno_config || !params.vcfanno_resources)) {
error("A TOML file and resource files should be supplied when using vcfanno (use --vcfanno_config and --vcfanno_resources)")
}
}

callers = params.callers.tokenize(",")
for(caller in callers) {
if(!(caller in GlobalVariables.availableCallers)) { error("\"${caller}\" is not a supported callers please use one or more of these instead: ${GlobalVariables.availableCallers}")}
}

if (params.output_suffix && callers.size() > 1) {
error("Cannot use --output_suffix with more than one caller")
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

multiqc_logo = params.multiqc_logo ?: "$projectDir/assets/CMGG_logo.png"


/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { GERMLINE } from './workflows/germline'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_cmgg_germline_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_cmgg_germline_pipeline'

//
// WORKFLOW: Run main analysis pipeline depending on type of input
Expand All @@ -44,7 +114,69 @@ workflow NFCMGG_GERMLINE {
// WORKFLOW: Run pipeline
//
GERMLINE (
samplesheet
// Input channels
samplesheet,

// File inputs
params.fasta,
params.fai,
params.dict,
params.strtablefile,
params.sdf,
params.dbsnp,
params.dbsnp_tbi,
params.vep_cache,
params.dbnsfp,
params.dbnsfp_tbi,
params.spliceai_indel,
params.spliceai_indel_tbi,
params.spliceai_snv,
params.spliceai_snv_tbi,
params.mastermind,
params.mastermind_tbi,
params.eog,
params.eog_tbi,
params.alphamissense,
params.alphamissense_tbi,
params.vcfanno_resources,
params.vcfanno_config,
params.multiqc_config,
multiqc_logo,
params.multiqc_methods_description,
params.roi,
params.somalier_sites,
params.vcfanno_lua,
params.updio_common_cnvs,
params.automap_repeats,
params.automap_panel,
params.outdir,

// Boolean inputs
params.dragstr,
params.annotate,
params.vcfanno,
params.only_call,
params.only_merge,
params.filter,
params.normalize,
params.add_ped,
params.gemini,
params.validate,
params.updio,
params.automap,
params.vep_dbnsfp,
params.vep_spliceai,
params.vep_mastermind,
params.vep_eog,
params.vep_alphamissense,

// Value inputs
params.genome,
params.species,
params.vep_cache_version,
params.vep_chunk_size,
params.scatter_count,
params.callers.tokenize(",")
)

emit:
Expand Down
39 changes: 0 additions & 39 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -265,32 +265,6 @@ if (!params.igenomes_ignore) {
params.genomes = [:]
}

// Get iGenomes reference path if it doesn't already exist
params {
fasta = fasta ?: getGenomeAttribute('fasta')
fai = fai ?: getGenomeAttribute('fai')
dict = dict ?: getGenomeAttribute('dict')
strtablefile = strtablefile ?: getGenomeAttribute('strtablefile')
sdf = sdf ?: getGenomeAttribute('sdf')
dbsnp = dbsnp ?: getGenomeAttribute('dbsnp')
dbsnp_tbi = dbsnp_tbi ?: getGenomeAttribute('dbsnp_tbi')
vep_cache = vep_cache ?: getGenomeAttribute('vep_cache')
dbnsfp = dbnsfp ?: getGenomeAttribute('dbnsfp')
dbnsfp_tbi = dbnsfp_tbi ?: getGenomeAttribute('dbnsfp_tbi')
spliceai_indel = spliceai_indel ?: getGenomeAttribute('spliceai_indel')
spliceai_indel_tbi = spliceai_indel_tbi ?: getGenomeAttribute('spliceai_indel_tbi')
spliceai_snv = spliceai_snv ?: getGenomeAttribute('spliceai_snv')
spliceai_snv_tbi = spliceai_snv_tbi ?: getGenomeAttribute('spliceai_snv_tbi')
mastermind = mastermind ?: getGenomeAttribute('mastermind')
mastermind_tbi = mastermind_tbi ?: getGenomeAttribute('mastermind_tbi')
eog = eog ?: getGenomeAttribute('eog')
eog_tbi = eog_tbi ?: getGenomeAttribute('eog_tbi')
alphamissense = alphamissense ?: getGenomeAttribute('alphamissense')
alphamissense_tbi = alphamissense_tbi ?: getGenomeAttribute('alphamissense_tbi')
vcfanno_resources = vcfanno_resources ?: getGenomeAttribute('vcfanno_resources')
vcfanno_config = vcfanno_config ?: getGenomeAttribute('vcfanno_config')
}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
Expand Down Expand Up @@ -372,16 +346,3 @@ def check_max(obj, type) {
}
}
}

//
// Get attribute from genome config file e.g. fasta
//
def getGenomeAttribute(attribute) {
if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
if (params.genomes[ params.genome ].containsKey(attribute)) {
return params.genomes[ params.genome ][ attribute ]
}
}
return null
}

24 changes: 0 additions & 24 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
"type": "string",
"format": "file-path",
"exists": true,
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml|yml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with samples, and a header row. See [usage docs](./usage.md).",
Expand Down Expand Up @@ -459,29 +458,6 @@
"default": true,
"fa_icon": "fas fa-check-square",
"hidden": true
},
"validationShowHiddenParams": {
"type": "boolean",
"fa_icon": "far fa-eye-slash",
"description": "Show all params when using `--help`",
"hidden": true,
"help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
},
"validationLenientMode": {
"type": "boolean",
"hidden": true,
"description": "Lenient mode for parameter validation"
},
"validationFailUnrecognisedParams": {
"type": "boolean",
"hidden": true,
"description": "Fail on unrecognised parameters"
},
"validationSchemaIgnoreParams": {
"type": "string",
"default": "genomes,igenomes_base,test_data",
"hidden": true,
"description": "Comma-separated list of parameters to ignore when validating against the schema"
}
}
},
Expand Down
17 changes: 9 additions & 8 deletions subworkflows/local/cram_call_gatk4/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@ include { VCF_CONCAT_BCFTOOLS } from '../vcf_concat_bcftools/main'

workflow CRAM_CALL_GATK4 {
take:
ch_input // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes with the split bed files
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference
ch_fai // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index
ch_dict // channel: [mandatory] [ val(meta), path(dict) ] => sequence dictionary
ch_strtablefile // channel: [optional] [ val(meta), path(strtablefile) ] => STR table file
ch_dbsnp // channel: [optional] [ path(dbsnp) ] => The VCF containing the dbsnp variants
ch_dbsnp_tbi // channel: [optional] [ path(dbsnp_tbi) ] => The index of the dbsnp VCF
ch_input // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes with the split bed files
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference
ch_fai // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index
ch_dict // channel: [mandatory] [ val(meta), path(dict) ] => sequence dictionary
ch_strtablefile // channel: [optional] [ val(meta), path(strtablefile) ] => STR table file
ch_dbsnp // channel: [optional] [ path(dbsnp) ] => The VCF containing the dbsnp variants
ch_dbsnp_tbi // channel: [optional] [ path(dbsnp_tbi) ] => The index of the dbsnp VCF
dragstr // boolean: create a DragSTR model and run haplotypecaller with it

main:

Expand All @@ -26,7 +27,7 @@ workflow CRAM_CALL_GATK4 {
// Generate DRAGSTR models (if --dragstr is specified)
//

if (params.dragstr) {
if (dragstr) {

ch_input
.map { meta, cram, crai, bed ->
Expand Down
34 changes: 21 additions & 13 deletions subworkflows/local/cram_call_genotype_gatk4/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,19 @@ include { VCF_FILTER_BCFTOOLS } from '../vcf_filter_bcftools/main'

workflow CRAM_CALL_GENOTYPE_GATK4 {
take:
ch_input // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes with the split bed files
ch_gvcfs // channel: [mandatory] [ val(meta), path(gvcf), path(tbi) ] => earlier called GVCFs with their indices
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference
ch_fai // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index
ch_dict // channel: [mandatory] [ val(meta), path(dict) ] => sequence dictionary
ch_strtablefile // channel: [optional] [ path(strtablefile) ] => STR table file
ch_dbsnp // channel: [optional] [ path(dbsnp) ] => The VCF containing the dbsnp variants
ch_dbsnp_tbi // channel: [optional] [ path(dbsnp_tbi) ] => The index of the dbsnp VCF
ch_input // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes with the split bed files
ch_gvcfs // channel: [mandatory] [ val(meta), path(gvcf), path(tbi) ] => earlier called GVCFs with their indices
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference
ch_fai // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index
ch_dict // channel: [mandatory] [ val(meta), path(dict) ] => sequence dictionary
ch_strtablefile // channel: [optional] [ path(strtablefile) ] => STR table file
ch_dbsnp // channel: [optional] [ path(dbsnp) ] => The VCF containing the dbsnp variants
ch_dbsnp_tbi // channel: [optional] [ path(dbsnp_tbi) ] => The index of the dbsnp VCF
dragstr // boolean: create a DragSTR model and run haplotypecaller with it
only_call // boolean: only run the variant calling
only_merge // boolean: run until the family merging
filter // boolean: filter the VCFs
scatter_count // integer: the amount of times the VCFs should be scattered

main:

Expand All @@ -30,7 +35,8 @@ workflow CRAM_CALL_GENOTYPE_GATK4 {
ch_dict,
ch_strtablefile,
ch_dbsnp,
ch_dbsnp_tbi
ch_dbsnp_tbi,
dragstr
)
ch_versions = ch_versions.mix(CRAM_CALL_GATK4.out.versions)
ch_reports = ch_reports.mix(CRAM_CALL_GATK4.out.reports)
Expand All @@ -42,23 +48,25 @@ workflow CRAM_CALL_GENOTYPE_GATK4 {
}
.mix(CRAM_CALL_GATK4.out.gvcfs)

if(!params.only_call) {
if(!only_call) {

GVCF_JOINT_GENOTYPE_GATK4(
ch_gvcfs_ready,
ch_fasta,
ch_fai,
ch_dict,
ch_dbsnp,
ch_dbsnp_tbi
ch_dbsnp_tbi,
only_merge,
scatter_count
)
ch_versions = ch_versions.mix(GVCF_JOINT_GENOTYPE_GATK4.out.versions)

}

if(!params.only_call && !params.only_merge) {
if(!only_call && !only_merge) {

if(params.filter) {
if(filter) {
VCF_FILTER_BCFTOOLS(
GVCF_JOINT_GENOTYPE_GATK4.out.vcfs,
true
Expand Down
3 changes: 2 additions & 1 deletion subworkflows/local/cram_call_vardictjava/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ workflow CRAM_CALL_VARDICTJAVA {
ch_fai // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index
ch_dbsnp // channel: [optional] [ path(vcf) ] => the dbnsp vcf file
ch_dbsnp_tbi // channel: [optional] [ path(tbi) ] => the dbsnp vcf index file
filter // boolean: filter the VCFs

main:
ch_versions = Channel.empty()
Expand Down Expand Up @@ -106,7 +107,7 @@ workflow CRAM_CALL_VARDICTJAVA {
VCF_CONCAT_BCFTOOLS.out.vcfs.set { ch_dbsnp_annotated }
}

if(params.filter) {
if(filter) {
VCF_FILTER_BCFTOOLS(
ch_dbsnp_annotated,
false
Expand Down
Loading
Loading