From d117cc612ad4d0885d0d354f5849c352a9c6af46 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 13 Aug 2019 14:28:13 +0200 Subject: [PATCH 01/26] feat: remove unnecessary Channel.create() --- build.nf | 3 --- 1 file changed, 3 deletions(-) diff --git a/build.nf b/build.nf index 9c828f6f36..f2a65a2fe8 100644 --- a/build.nf +++ b/build.nf @@ -188,9 +188,6 @@ process DecompressFile { ch_decompressedFiles = ch_decompressedFiles.dump(tag:'DecompressedFile') ch_fastaFile = Channel.create() -ch_fastaForBWA = Channel.create() -ch_fastaReference = Channel.create() -ch_fastaForSAMTools = Channel.create() ch_otherFile = Channel.create() ch_vcfFile = Channel.create() From 58b692225871449ba8237ab9e8c5d4c5cdb413e8 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 14 Aug 2019 10:12:19 +0200 Subject: [PATCH 02/26] feat: replace --sample with --input, genomeFile with fasta... --- conf/genomes.config | 18 +++++++++--------- conf/igenomes.config | 12 ++++++------ conf/test.config | 2 +- docs/input.md | 10 +++++----- main.nf | 30 +++++++++++++++--------------- scripts/run_tests.sh | 14 +++++++------- 6 files changed, 43 insertions(+), 43 deletions(-) diff --git a/conf/genomes.config b/conf/genomes.config index 8e26207a99..4127832785 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -15,9 +15,9 @@ params { bwaIndex = "${params.genomes_base}/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}" dbsnp = "${params.genomes_base}/dbsnp_138.b37.vcf" dbsnpIndex = "${params.genomes_base}/dbsnp_138.b37.vcf.idx" - genomeDict = "${params.genomes_base}/human_g1k_v37_decoy.dict" - genomeFile = "${params.genomes_base}/human_g1k_v37_decoy.fasta" - genomeIndex = "${params.genomes_base}/human_g1k_v37_decoy.fasta.fai" + dict = "${params.genomes_base}/human_g1k_v37_decoy.dict" + fasta = "${params.genomes_base}/human_g1k_v37_decoy.fasta" + fastaFai = "${params.genomes_base}/human_g1k_v37_decoy.fasta.fai" intervals = "${params.genomes_base}/wgs_calling_regions_CAW.list" knownIndels = "${params.genomes_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf" knownIndelsIndex = "${params.genomes_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx" @@ -30,9 +30,9 @@ params { bwaIndex = "${params.genomes_base}/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}" dbsnp = "${params.genomes_base}/dbsnp_146.hg38.vcf.gz" dbsnpIndex = "${params.genomes_base}/dbsnp_146.hg38.vcf.gz.tbi" - genomeDict = "${params.genomes_base}/Homo_sapiens_assembly38.dict" - genomeFile = "${params.genomes_base}/Homo_sapiens_assembly38.fasta" - genomeIndex = "${params.genomes_base}/Homo_sapiens_assembly38.fasta.fai" + dict = "${params.genomes_base}/Homo_sapiens_assembly38.dict" + fasta = "${params.genomes_base}/Homo_sapiens_assembly38.fasta" + fastaFai = "${params.genomes_base}/Homo_sapiens_assembly38.fasta.fai" intervals = "${params.genomes_base}/wgs_calling_regions.hg38.bed" knownIndels = "${params.genomes_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" knownIndelsIndex = "${params.genomes_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" @@ -45,9 +45,9 @@ params { bwaIndex = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta.{amb,ann,bwt,pac,sa}" dbsnp = "${params.genomes_base}/dbsnp_138.b37.small.vcf" dbsnpIndex = "${params.genomes_base}/dbsnp_138.b37.small.vcf.idx" - genomeDict = "${params.genomes_base}/human_g1k_v37_decoy.small.dict" - genomeFile = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta" - genomeIndex = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta.fai" + dict = "${params.genomes_base}/human_g1k_v37_decoy.small.dict" + fasta = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta" + fastaFai = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta.fai" intervals = "${params.genomes_base}/small.intervals" knownIndels = "${params.genomes_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.small.vcf" knownIndelsIndex = "${params.genomes_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.small.vcf.idx" diff --git a/conf/igenomes.config b/conf/igenomes.config index 2c936e7a59..e23cca636d 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -15,9 +15,9 @@ bwaIndex = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/human_g1k_v37_decoy.fasta.{amb,ann,bwt,pac,sa}" dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf" dbsnpIndex = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.idx" - genomeDict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" - genomeFile = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" - genomeIndex = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" + fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" + fastaFai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_CAW.list" knownIndels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf" knownIndelsIndex = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx" @@ -30,9 +30,9 @@ bwaIndex = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/Homo_sapiens_assembly38.fasta.64.{alt,amb,ann,bwt,pac,sa}" dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" dbsnpIndex = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" - genomeDict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" - genomeFile = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" - genomeIndex = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" + dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" + fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" + fastaFai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions.hg38.bed" knownIndels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" knownIndelsIndex = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" diff --git a/conf/test.config b/conf/test.config index 8e4eaff3d8..849e725b61 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,7 +15,7 @@ params { max_memory = 7.GB max_time = 48.h // Input data - sample = 'https://github.com/nf-core/test-datasets/raw/sarek/testdata/tsv/tiny-manta-https.tsv' + input = 'https://github.com/nf-core/test-datasets/raw/sarek/testdata/tsv/tiny-manta-https.tsv' // Small reference genome // To be build with: `nextflow run build.nf --build -profile docker --outdir references` igenomesIgnore = true diff --git a/docs/input.md b/docs/input.md index 094ca38169..18e6695855 100644 --- a/docs/input.md +++ b/docs/input.md @@ -2,7 +2,7 @@ ## Information about the TSV files -Input files for Sarek can be specified using a TSV file given to the `--sample` command. +Input files for Sarek can be specified using a TSV file given to the `--input` command. The TSV file is a Tab Separated Value file with columns: - `subject gender status sample lane fastq1 fastq2` for step `mapping` with paired-end FASTQs @@ -49,10 +49,10 @@ G15511 XX 1 D0ENMT D0ENM_2 pathToFiles/D0ENMACXX111207.2_1.fastq. ## Path to a FASTQ directory for a single normal sample (step mapping) -Input files for Sarek can be specified using the path to a FASTQ directory given to the `--sample` command only with the `mapping` step. +Input files for Sarek can be specified using the path to a FASTQ directory given to the `--input` command only with the `mapping` step. ```bash -nextflow run nf-core/sarek --sample pathToDirectory ... +nextflow run nf-core/sarek --input pathToDirectory ... ``` ### Input FASTQ file name best practices @@ -128,9 +128,9 @@ G15511 XX 1 D0ENMT pathToFiles/G15511.D0ENMT.md.recal.bam pathToF ## VCF files for annotation -Input files for Sarek can be specified using the path to a VCF directory given to the `--sample` command only with the `annotate` step. +Input files for Sarek can be specified using the path to a VCF directory given to the `--input` command only with the `annotate` step. Multiple VCF files can be specified if the path is enclosed in quotes. ```bash -nextflow run nf-core/sarek --step annotate --sample "results/VariantCalling/*/.vcf.gz" ... +nextflow run nf-core/sarek --step annotate --input "results/VariantCalling/*/.vcf.gz" ... ``` diff --git a/main.nf b/main.nf index b379084268..360bbe45a6 100644 --- a/main.nf +++ b/main.nf @@ -28,10 +28,10 @@ def helpMessage() { The typical command for running the pipeline is as follows: - nextflow run nf-core/sarek --sample sample.tsv -profile docker + nextflow run nf-core/sarek --input sample.tsv -profile docker Mandatory arguments: - --sample Path to input TSV file on mapping, recalibrate and variantcalling steps + --input Path to input TSV file on mapping, recalibrate and variantcalling steps Multiple TSV files can be specified with quotes Works also with the path to a directory on mapping step with a single germline sample only Alternatively, path to VCF input file on annotate step @@ -121,7 +121,7 @@ params.multiqc_config = null params.noGVCF = null params.noStrelkaBP = null params.nucleotidesPerSecond = 1000.0 -params.sample = null +params.input = null params.sequencing_center = null params.skip = null params.snpEff_cache = null @@ -170,12 +170,12 @@ if (workflow.profile == 'awsbatch') { ch_output_docs = Channel.fromPath("${baseDir}/docs/output.md") tsvPath = null -if (params.sample) if (hasExtension(params.sample,"tsv") || hasExtension(params.sample,"vcf") || hasExtension(params.sample,"vcf.gz")) tsvPath = params.sample -if (params.sample) if (hasExtension(params.sample,"vcf") || hasExtension(params.sample,"vcf.gz")) step = "annotate" +if (params.input) if (hasExtension(params.input,"tsv") || hasExtension(params.input,"vcf") || hasExtension(params.input,"vcf.gz")) tsvPath = params.input +if (params.input) if (hasExtension(params.input,"vcf") || hasExtension(params.input,"vcf.gz")) step = "annotate" // If no input file specified, trying to get TSV files corresponding to step in the TSV directory // only for steps recalibrate and variantCalling -if (!params.sample && step != 'mapping' && step != 'annotate') { +if (!params.input && step != 'mapping' && step != 'annotate') { tsvPath = step == 'recalibrate' ? "${params.outdir}/Preprocessing/TSV/duplicateMarked.tsv": "${params.outdir}/Preprocessing/TSV/recalibrated.tsv" } @@ -189,16 +189,16 @@ if (tsvPath) { case 'annotate': break default: exit 1, "Unknown step ${step}" } -} else if (params.sample) if (!hasExtension(params.sample,"tsv")) { +} else if (params.input) if (!hasExtension(params.input,"tsv")) { println "No TSV file" if (step != 'mapping') exit 1, 'No other step than "mapping" support a dir as an input' - println "Reading ${params.sample} directory" - inputSample = extractFastqFromDir(params.sample) + println "Reading ${params.input} directory" + inputSample = extractFastqFromDir(params.input) (inputSample, fastqTMP) = inputSample.into(2) fastqTMP.toList().subscribe onNext: { - if (it.size() == 0) exit 1, "No FASTQ files found in --sample directory '${params.sample}'" + if (it.size() == 0) exit 1, "No FASTQ files found in --input directory '${params.input}'" } - tsvFile = params.sample // used in the reports + tsvFile = params.input // used in the reports } else if (step == 'annotate') { println "Annotating ${tsvFile}" } else exit 1, 'No sample were defined, see --help' @@ -212,7 +212,7 @@ if (workflow.revision) summary['Pipeline Release'] = workflow.revision summary['Run Name'] = custom_runName ?: workflow.runName summary['Max Resources'] = "${params.max_memory} memory, ${params.max_cpus} cpus, ${params.max_time} time per job" if (workflow.containerEngine) summary['Container'] = "${workflow.containerEngine} - ${workflow.container}" -if (params.sample) summary['Sample'] = params.sample +if (params.input) summary['Input'] = params.input if (params.targetBED) summary['Target BED'] = params.targetBED if (params.step) summary['Step'] = params.step if (params.tools) summary['Tools'] = tools.join(', ') @@ -2280,9 +2280,9 @@ def checkReferenceMap(referenceMap) { // Define map of reference depending of tools and step def defineReferenceMap(step, tools) { def referenceMap = [ - 'genomeDict' : checkParamReturnFile("genomeDict"), - 'genomeFile' : checkParamReturnFile("genomeFile"), - 'genomeIndex' : checkParamReturnFile("genomeIndex"), + 'genomeDict' : checkParamReturnFile("dict"), + 'genomeFile' : checkParamReturnFile("fasta"), + 'genomeIndex' : checkParamReturnFile("fastaFai"), 'intervals' : checkParamReturnFile("intervals") ] if ('mapping' in step) { diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 2e947e8a24..b8b34772df 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -103,21 +103,21 @@ esac case $TEST in ANNOTATE) - run_sarek --step annotate --tools ${ANNOTATOR} --sample ${PATHTOSAMPLE}/vcf/Strelka_1234N_variants.vcf.gz + run_sarek --step annotate --tools ${ANNOTATOR} --input ${PATHTOSAMPLE}/vcf/Strelka_1234N_variants.vcf.gz ;; GERMLINE) - run_sarek --tools=false --sample data/testdata/tiny/normal - run_sarek --tools=false --sample results/Preprocessing/TSV/duplicateMarked.tsv --step recalibrate - run_sarek --tools HaplotypeCaller --sample results/Preprocessing/TSV/recalibrated.tsv --step variantCalling + run_sarek --tools=false --input data/testdata/tiny/normal + run_sarek --tools=false --input results/Preprocessing/TSV/duplicateMarked.tsv --step recalibrate + run_sarek --tools HaplotypeCaller --input results/Preprocessing/TSV/recalibrated.tsv --step variantCalling ;; MULTIPLE) - run_sarek ${OPTIONS},snpEff,VEP,merge --sample ${PATHTOSAMPLE}/tsv/tiny-multiple${SUFFIX}.tsv + run_sarek ${OPTIONS},snpEff,VEP,merge --input ${PATHTOSAMPLE}/tsv/tiny-multiple${SUFFIX}.tsv ;; SOMATIC) - run_sarek ${OPTIONS} --sample ${PATHTOSAMPLE}/tsv/tiny-manta${SUFFIX}.tsv + run_sarek ${OPTIONS} --input ${PATHTOSAMPLE}/tsv/tiny-manta${SUFFIX}.tsv ;; TARGETED) - run_sarek ${OPTIONS} --sample ${PATHTOSAMPLE}/tsv/tiny-manta${SUFFIX}.tsv --targetBED ${PATHTOSAMPLE}/target.bed + run_sarek ${OPTIONS} --input ${PATHTOSAMPLE}/tsv/tiny-manta${SUFFIX}.tsv --targetBED ${PATHTOSAMPLE}/target.bed ;; esac From 8c9356b97e573acc954a53170c979bea506ba871 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 16 Aug 2019 10:59:16 +0200 Subject: [PATCH 03/26] feat: update comments --- main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index 360bbe45a6..9371e5cc8d 100644 --- a/main.nf +++ b/main.nf @@ -56,7 +56,7 @@ def helpMessage() { snpEff, VEP, merge Default: None --skip Specify which QC tools to skip when running Sarek - Available: bamQC, BCFtools, FastQC, MultiQC, samtools, vcftools, versions + Available: all, bamQC, BCFtools, FastQC, MultiQC, samtools, vcftools, versions Default: None --annotateTools Specify from which tools Sarek will look for VCF files to annotate, only for step annotate Available: HaplotypeCaller, Manta, MuTect2, Strelka, TIDDIT @@ -71,9 +71,9 @@ def helpMessage() { --bwaIndex bwa indexes --dbsnp dbsnp file --dbsnpIndex dbsnp index - --genomeDict genome dict - --genomeFile genome file - --genomeIndex genome index + --dict dict from the fasta reference + --fasta fasta reference + --fastafai reference index --intervals intervals --knownIndels knownIndels file --knownIndelsIndex knownIndels index From 5530bd23070b29d2b59e1e7603811fe131480ec3 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 16 Aug 2019 11:07:39 +0200 Subject: [PATCH 04/26] feat: Add TIDDIT version in MultiQC report --- bin/scrape_software_versions.py | 2 ++ main.nf | 1 + 2 files changed, 3 insertions(+) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 96281d68c5..1f9cccd7bb 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -22,6 +22,7 @@ 'samtools': ['v_samtools.txt', r"samtools (\S+)"], 'SnpEff': ['v_snpeff.txt', r"version SnpEff (\S+)"], 'Strelka': ['v_strelka.txt', r"([0-9.]+)"], + 'TIDDIT': ['v_tiddit.txt', r"TIDDIT-(\S+)"], 'vcftools': ['v_vcftools.txt', r"([0-9.]+)"], 'VEP': ['v_vep.txt', r"ensembl-vep : (\S+)"], } @@ -43,6 +44,7 @@ results['samtools'] = 'N/A' results['SnpEff'] = 'N/A' results['Strelka'] = 'N/A' +results['TIDDIT'] = 'N/A' results['vcftools'] = 'N/A' results['VEP'] = 'N/A' diff --git a/main.nf b/main.nf index 9371e5cc8d..84394b1b24 100644 --- a/main.nf +++ b/main.nf @@ -274,6 +274,7 @@ process GetSoftwareVersions { qualimap --version &> v_qualimap.txt 2>&1 || true R --version &> v_r.txt || true samtools --version &> v_samtools.txt 2>&1 || true + tiddit &> v_tiddit.txt 2>&1 || true vcftools --version &> v_vcftools.txt 2>&1 || true vep --help &> v_vep.txt 2>&1 || true From ea0e45e12892e84713c7896287dbcea3cd758ab1 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Mon, 19 Aug 2019 12:56:38 +0200 Subject: [PATCH 05/26] feat: polish code --- main.nf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 84394b1b24..23934b8c71 100644 --- a/main.nf +++ b/main.nf @@ -1122,8 +1122,10 @@ process FreeBayes { input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from pairBamFreeBayes - file(genomeFile) from Channel.value(referenceMap.genomeFile) - file(genomeIndex) from Channel.value(referenceMap.genomeIndex) + set file(genomeFile), file(genomeIndex) from Channel.value([ + referenceMap.genomeFile, + referenceMap.genomeIndex + ]) output: set val("FreeBayes"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into vcfFreeBayes From 49bc9f0fbb1ad28fb3fe6433024776bb4a99f23c Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 27 Aug 2019 11:02:24 +0200 Subject: [PATCH 06/26] feat: update docs --- docs/usage.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 29b239294c..1a3e4aa021 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -11,7 +11,7 @@ * [Reproducibility](#reproducibility) * [Main arguments](#main-arguments) * [`-profile`](#-profile) - * [`--sample`](#--sample) + * [`--input`](#--input) * [`--noGVCF`](#--nogvcf) * [`--noReports`](#--noreports) * [`--nucleotidesPerSecond`](#--nucleotidespersecond) @@ -77,7 +77,7 @@ NXF_OPTS='-Xms1g -Xmx4g' The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/sarek --sample sample.tsv -profile docker +nextflow run nf-core/sarek --input sample.tsv -profile docker ``` This will launch the pipeline with the `docker` configuration profile. @@ -142,13 +142,13 @@ If `-profile` is not specified at all the pipeline will be run locally and expec * A profile with a complete configuration for automated testing * Includes links to test data so needs no other parameters -### `--sample` +### `--input` Use this to specify the location of your input TSV file, on `mapping`, `recalibrate` and `variantcalling` steps. For example: ```bash ---sample sample.tsv +--input sample.tsv ``` Multiple TSV files can be specified if the path must be enclosed in quotes @@ -157,14 +157,14 @@ Use this to specify the location to a directory on `mapping` step with a single For example: ```bash ---sample PathToDirectory +--input PathToDirectory ``` Use this to specify the location of your VCF input file on `annotate` step. For example: ```bash ---sample sample.vcf +--input sample.vcf ``` Multiple VCF files can be specified if the path must be enclosed in quotes From b418ab92eadbb04d5b0608cc67528ca95b446883 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Mon, 2 Sep 2019 16:39:00 +0200 Subject: [PATCH 07/26] rename genomeFile, genomeIndex and genomeDict in main.nf script --- main.nf | 150 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/main.nf b/main.nf index 23934b8c71..a090cc629a 100644 --- a/main.nf +++ b/main.nf @@ -435,7 +435,7 @@ process MapReads { input: set idPatient, idSample, idRun, file(inputFile1), file(inputFile2) from inputReads - set file(genomeFile), file(bwaIndex) from Channel.value([referenceMap.genomeFile, referenceMap.bwaIndex]) + set file(fasta), file(bwaIndex) from Channel.value([referenceMap.fasta, referenceMap.bwaIndex]) output: set idPatient, idSample, idRun, file("${idRun}.bam") into bamMapped @@ -457,7 +457,7 @@ process MapReads { if (hasExtension(inputFile1,"fastq.gz") || hasExtension(inputFile1,"fq.gz")) """ bwa mem -K 100000000 -R \"${readGroup}\" ${extra} -t ${task.cpus} -M \ - ${genomeFile} ${inputFile1} ${inputFile2} | \ + ${fasta} ${inputFile1} ${inputFile2} | \ samtools sort --threads ${task.cpus} -m 2G - > ${idRun}.bam """ else if (hasExtension(inputFile1,"bam")) @@ -469,7 +469,7 @@ process MapReads { --INTERLEAVE=true \ --NON_PF=true \ | \ - bwa mem -K 100000000 -p -R \"${readGroup}\" ${extra} -t ${task.cpus} -M ${genomeFile} \ + bwa mem -K 100000000 -p -R \"${readGroup}\" ${extra} -t ${task.cpus} -M ${fasta} \ /dev/stdin - 2> >(tee ${inputFile1}.bwa.stderr.log >&2) \ | \ samtools sort --threads ${task.cpus} -m 2G - > ${idRun}.bam @@ -570,10 +570,10 @@ process BaseRecalibrator { input: set idPatient, idSample, file(bam), file(bai), file(intervalBed) from bamBaseRecalibrator - set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex), file(knownIndels), file(knownIndelsIndex) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex, - referenceMap.genomeDict, + set file(fasta), file(fastaFai), file(dict), file(dbsnp), file(dbsnpIndex), file(knownIndels), file(knownIndelsIndex) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai, + referenceMap.dict, referenceMap.dbsnp, referenceMap.dbsnpIndex, referenceMap.knownIndels, @@ -594,7 +594,7 @@ process BaseRecalibrator { -I ${bam} \ -O ${intervalBed.baseName}_${idSample}.recal.table \ --tmp-dir /tmp \ - -R ${genomeFile} \ + -R ${fasta} \ -L ${intervalBed} \ --known-sites ${dbsnp} \ ${known} \ @@ -668,10 +668,10 @@ process ApplyBQSR { input: set idPatient, idSample, file(bam), file(bai), file(recalibrationReport), file(intervalBed) from bamApplyBQSR - set file(genomeFile), file(genomeIndex), file(genomeDict)from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex, - referenceMap.genomeDict + set file(fasta), file(fastaFai), file(dict)from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai, + referenceMap.dict ]) output: @@ -681,7 +681,7 @@ process ApplyBQSR { """ gatk --java-options -Xmx${task.memory.toGiga()}g \ ApplyBQSR \ - -R ${genomeFile} \ + -R ${fasta} \ --input ${bam} \ --output ${intervalBed.baseName}_${idSample}.recal.bam \ -L ${intervalBed} \ @@ -830,10 +830,10 @@ process HaplotypeCaller { input: set idPatient, idSample, file(bam), file(bai), file(intervalBed) from bamHaplotypeCaller - set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex, - referenceMap.genomeDict, + set file(fasta), file(fastaFai), file(dict), file(dbsnp), file(dbsnpIndex) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai, + referenceMap.dict, referenceMap.dbsnp, referenceMap.dbsnpIndex ]) @@ -848,7 +848,7 @@ process HaplotypeCaller { """ gatk --java-options "-Xmx${task.memory.toGiga()}g -Xms6000m -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \ HaplotypeCaller \ - -R ${genomeFile} \ + -R ${fasta} \ -I ${bam} \ -L ${intervalBed} \ -D ${dbsnp} \ @@ -869,10 +869,10 @@ process GenotypeGVCFs { input: set idPatient, idSample, file(intervalBed), file(gvcf) from gvcfGenotypeGVCFs - set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex, - referenceMap.genomeDict, + set file(fasta), file(fastaFai), file(dict), file(dbsnp), file(dbsnpIndex) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai, + referenceMap.dict, referenceMap.dbsnp, referenceMap.dbsnpIndex ]) @@ -890,7 +890,7 @@ process GenotypeGVCFs { gatk --java-options -Xmx${task.memory.toGiga()}g \ GenotypeGVCFs \ - -R ${genomeFile} \ + -R ${fasta} \ -L ${intervalBed} \ -D ${dbsnp} \ -V ${gvcf} \ @@ -913,9 +913,9 @@ process StrelkaSingle { input: set idPatient, idSample, file(bam), file(bai) from bamStrelkaSingle file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") - set file(genomeFile), file(genomeIndex) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex + set file(fasta), file(fastaFai) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai ]) output: @@ -930,7 +930,7 @@ process StrelkaSingle { ${beforeScript} configureStrelkaGermlineWorkflow.py \ --bam ${bam} \ - --referenceFasta ${genomeFile} \ + --referenceFasta ${fasta} \ ${options} \ --runDir Strelka @@ -962,9 +962,9 @@ process MantaSingle { input: set idPatient, idSample, file(bam), file(bai) from bamMantaSingle file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") - set file(genomeFile), file(genomeIndex) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex + set file(fasta), file(fastaFai) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai ]) output: @@ -982,7 +982,7 @@ process MantaSingle { ${beforeScript} configManta.py \ ${inputbam} ${bam} \ - --reference ${genomeFile} \ + --reference ${fasta} \ ${options} \ --runDir Manta @@ -1020,9 +1020,9 @@ process TIDDIT { input: set idPatient, idSample, file(bam), file(bai) from bamTIDDIT - set file(genomeFile), file(genomeIndex) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex + set file(fasta), file(fastaFai) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai ]) output: @@ -1033,7 +1033,7 @@ process TIDDIT { script: """ - tiddit --sv -o TIDDIT_${idSample} --bam ${bam} --ref ${genomeFile} + tiddit --sv -o TIDDIT_${idSample} --bam ${bam} --ref ${fasta} mv TIDDIT_${idSample}.vcf TIDDIT_${idSample}.old.vcf @@ -1088,10 +1088,10 @@ process Mutect2 { input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from pairBamMuTect2 - set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex, - referenceMap.genomeDict, + set file(fasta), file(fastaFai), file(dict), file(dbsnp), file(dbsnpIndex) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai, + referenceMap.dict, referenceMap.dbsnp, referenceMap.dbsnpIndex ]) @@ -1105,7 +1105,7 @@ process Mutect2 { """ gatk --java-options "-Xmx${task.memory.toGiga()}g" \ Mutect2 \ - -R ${genomeFile}\ + -R ${fasta}\ -I ${bamTumor} -tumor ${idSampleTumor} \ -I ${bamNormal} -normal ${idSampleNormal} \ -L ${intervalBed} \ @@ -1122,9 +1122,9 @@ process FreeBayes { input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from pairBamFreeBayes - set file(genomeFile), file(genomeIndex) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex + set file(fasta), file(fastaFai) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai ]) output: @@ -1135,7 +1135,7 @@ process FreeBayes { script: """ freebayes \ - -f ${genomeFile} \ + -f ${fasta} \ --pooled-continuous \ --pooled-discrete \ --genotype-qualities \ @@ -1170,7 +1170,7 @@ process ConcatVCF { input: set variantCaller, idPatient, idSample, file(vcFiles) from vcfConcatenateVCFs - file(genomeIndex) from Channel.value(referenceMap.genomeIndex) + file(fastaFai) from Channel.value(referenceMap.fastaFai) file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") output: @@ -1184,7 +1184,7 @@ process ConcatVCF { else outputFile = "${variantCaller}_${idSample}.vcf" options = params.targetBED ? "-t ${targetBED}" : "" """ - concatenateVCFs.sh -i ${genomeIndex} -c ${task.cpus} -o ${outputFile} ${options} + concatenateVCFs.sh -i ${fastaFai} -c ${task.cpus} -o ${outputFile} ${options} """ } @@ -1203,10 +1203,10 @@ process Strelka { input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamStrelka file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") - set file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex, - referenceMap.genomeDict + set file(fasta), file(fastaFai), file(dict) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai, + referenceMap.dict ]) output: @@ -1222,7 +1222,7 @@ process Strelka { configureStrelkaSomaticWorkflow.py \ --tumor ${bamTumor} \ --normal ${bamNormal} \ - --referenceFasta ${genomeFile} \ + --referenceFasta ${fasta} \ ${options} \ --runDir Strelka @@ -1254,9 +1254,9 @@ process Manta { input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamManta file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") - set file(genomeFile), file(genomeIndex) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex + set file(fasta), file(fastaFai) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai ]) output: @@ -1273,7 +1273,7 @@ process Manta { configManta.py \ --normalBam ${bamNormal} \ --tumorBam ${bamTumor} \ - --reference ${genomeFile} \ + --reference ${fasta} \ ${options} \ --runDir Manta @@ -1322,10 +1322,10 @@ process StrelkaBP { input: set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(mantaCSI), file(mantaCSIi) from pairBamStrelkaBP file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null") - set file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex, - referenceMap.genomeDict + set file(fasta), file(fastaFai), file(dict) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai, + referenceMap.dict ]) output: @@ -1341,7 +1341,7 @@ process StrelkaBP { configureStrelkaSomaticWorkflow.py \ --tumor ${bamTumor} \ --normal ${bamNormal} \ - --referenceFasta ${genomeFile} \ + --referenceFasta ${fasta} \ --indelCandidates ${mantaCSI} \ ${options} \ --runDir Strelka @@ -1372,11 +1372,11 @@ process AlleleCounter { input: set idPatient, idSample, file(bam), file(bai) from bamAscat - set file(acLoci), file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([ + set file(acLoci), file(fasta), file(fastaFai), file(dict) from Channel.value([ referenceMap.acLoci, - referenceMap.genomeFile, - referenceMap.genomeIndex, - referenceMap.genomeDict + referenceMap.fasta, + referenceMap.fastaFai, + referenceMap.dict ]) output: @@ -1388,7 +1388,7 @@ process AlleleCounter { """ alleleCounter \ -l ${acLoci} \ - -r ${genomeFile} \ + -r ${fasta} \ -b ${bam} \ -o ${idSample}.alleleCount; """ @@ -1473,9 +1473,9 @@ process Mpileup { input: set idPatient, idSample, file(bam), file(bai), file(intervalBed) from bamMpileup - set file(genomeFile), file(genomeIndex) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex + set file(fasta), file(fastaFai) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai ]) output: @@ -1486,7 +1486,7 @@ process Mpileup { script: """ samtools mpileup \ - -f ${genomeFile} ${bam} \ + -f ${fasta} ${bam} \ -l ${intervalBed} \ | bgzip --threads ${task.cpus} -c > ${intervalBed.baseName}_${idSample}.pileup.gz """ @@ -1548,9 +1548,9 @@ process ControlFREEC { input: set idPatient, idSampleNormal, idSampleTumor, file(mpileupNormal), file(mpileupTumor) from mpileupOut - set file(genomeFile), file(genomeIndex), file(dbsnp), file(dbsnpIndex), file(chrDir), file(chrLength) from Channel.value([ - referenceMap.genomeFile, - referenceMap.genomeIndex, + set file(fasta), file(fastaFai), file(dbsnp), file(dbsnpIndex), file(chrDir), file(chrLength) from Channel.value([ + referenceMap.fasta, + referenceMap.fastaFai, referenceMap.dbsnp, referenceMap.dbsnpIndex, referenceMap.chrDir, @@ -2283,9 +2283,9 @@ def checkReferenceMap(referenceMap) { // Define map of reference depending of tools and step def defineReferenceMap(step, tools) { def referenceMap = [ - 'genomeDict' : checkParamReturnFile("dict"), - 'genomeFile' : checkParamReturnFile("fasta"), - 'genomeIndex' : checkParamReturnFile("fastaFai"), + 'dict' : checkParamReturnFile("dict"), + 'fasta' : checkParamReturnFile("fasta"), + 'fastaFai' : checkParamReturnFile("fastaFai"), 'intervals' : checkParamReturnFile("intervals") ] if ('mapping' in step) { From 0611d09cf8d973e3a427278138120e82f20728ca Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Mon, 2 Sep 2019 16:50:04 +0200 Subject: [PATCH 08/26] add deprecation message --- main.nf | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/main.nf b/main.nf index 5a2cf12703..7e7bb745be 100644 --- a/main.nf +++ b/main.nf @@ -102,6 +102,22 @@ def helpMessage() { // Show help message if (params.help) exit 0, helpMessage() +// Show deprecation message +if (params.sample) { + println "--sample is now deprecated, please use --input instead" + params.input = params.sample +} + +if (params.sampleDir) { + println "--sampleDir is now deprecated, please use --input instead" + params.input = params.sampleDir +} + +if (params.annotateVCF) { + println "--annotateVCF is now deprecated, please use --input instead" + params.input = params.annotateVCF +} + // Check if genome exists in the config file if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" From fab5387d0eacab70185bfaf59cfa7895a09cf5e9 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Mon, 2 Sep 2019 16:52:14 +0200 Subject: [PATCH 09/26] better deprecation message --- main.nf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/main.nf b/main.nf index 7e7bb745be..f424112441 100644 --- a/main.nf +++ b/main.nf @@ -118,6 +118,21 @@ if (params.annotateVCF) { params.input = params.annotateVCF } +if (params.genomeDict) { + println "--genomeDict is now deprecated, please use --dict instead" + params.dict = params.genomeDict +} + +if (params.genomeFile) { + println "--genomeFile is now deprecated, please use --fasta instead" + params.fasta = params.genomeFile +} + +if (params.genomeIndex) { + println "--genomeIndex is now deprecated, please use --fastaFai instead" + params.fastaFai = params.genomeIndex +} + // Check if genome exists in the config file if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" From 1afdf58f9aa8633036cd72e62fd9728fc98358ba Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Mon, 2 Sep 2019 16:54:25 +0200 Subject: [PATCH 10/26] update docs --- CHANGELOG.md | 11 ++++++++--- docs/usage.md | 24 ++++++++++++------------ 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7f781d6dd..b21a5613ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,11 +45,11 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) ### `Changed` -- [#1](https://github.com/nf-core/sarek/pull/1), [#2](https://github.com/nf-core/sarek/pull/2), [#3](https://github.com/nf-core/sarek/pull/3), [#4](https://github.com/nf-core/sarek/pull/4), [#5](https://github.com/nf-core/sarek/pull/5), [#6](https://github.com/nf-core/sarek/pull/6), [#7](https://github.com/nf-core/sarek/pull/7), [#8](https://github.com/nf-core/sarek/pull/8), [#9](https://github.com/nf-core/sarek/pull/9), [#10](https://github.com/nf-core/sarek/pull/10), [#11](https://github.com/nf-core/sarek/pull/11), [#12](https://github.com/nf-core/sarek/pull/12), [#18](https://github.com/nf-core/sarek/pull/18), [#20](https://github.com/nf-core/sarek/pull/20), [#21](https://github.com/nf-core/sarek/pull/21), [#29](https://github.com/nf-core/sarek/pull/29) - Update docs +- [#1](https://github.com/nf-core/sarek/pull/1), [#2](https://github.com/nf-core/sarek/pull/2), [#3](https://github.com/nf-core/sarek/pull/3), [#4](https://github.com/nf-core/sarek/pull/4), [#5](https://github.com/nf-core/sarek/pull/5), [#6](https://github.com/nf-core/sarek/pull/6), [#7](https://github.com/nf-core/sarek/pull/7), [#8](https://github.com/nf-core/sarek/pull/8), [#9](https://github.com/nf-core/sarek/pull/9), [#10](https://github.com/nf-core/sarek/pull/10), [#11](https://github.com/nf-core/sarek/pull/11), [#12](https://github.com/nf-core/sarek/pull/12), [#18](https://github.com/nf-core/sarek/pull/18), [#20](https://github.com/nf-core/sarek/pull/20), [#21](https://github.com/nf-core/sarek/pull/21), [#23](https://github.com/nf-core/sarek/pull/23), [#29](https://github.com/nf-core/sarek/pull/29) - Update docs - [#4](https://github.com/nf-core/sarek/pull/4) - Update `cancerit-allelecount` from `2.1.2` to `4.0.2` - [#4](https://github.com/nf-core/sarek/pull/4) - Update `gatk4` from `4.1.1.0` to `4.1.2.0` -- [#7](https://github.com/nf-core/sarek/pull/7) - `--sampleDir` is now deprecated, use `--sample` instead -- [#7](https://github.com/nf-core/sarek/pull/8) - `--annotateVCF` is now deprecated, use `--sample` instead +- [#7](https://github.com/nf-core/sarek/pull/7), [#23](https://github.com/nf-core/sarek/pull/23) - `--sampleDir` is now deprecated, use `--input` instead +- [#7](https://github.com/nf-core/sarek/pull/8), [#23](https://github.com/nf-core/sarek/pull/23) - `--annotateVCF` is now deprecated, use `--input` instead - [#8](https://github.com/nf-core/sarek/pull/8), [#12](https://github.com/nf-core/sarek/pull/12) - Improve helper script `build.nf` for downloading and building reference files - [#9](https://github.com/nf-core/sarek/pull/9) - ApplyBQSR is now parallelized - [#9](https://github.com/nf-core/sarek/pull/9) - Fastq files are named following "${idRun}_R1.fastq.gz" in the FastQC output for easier reporting @@ -69,6 +69,11 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#18](https://github.com/nf-core/sarek/pull/18), [#29](https://github.com/nf-core/sarek/pull/29) - `--noReports` is now `--skipQC all` - [#18](https://github.com/nf-core/sarek/pull/18), [#21](https://github.com/nf-core/sarek/pull/21) - Update logo - [#21](https://github.com/nf-core/sarek/pull/21) - Moved smallGRCh37 path to `genomes.config` +- [#23](https://github.com/nf-core/sarek/pull/23) - Rename `genomeFile`, `genomeIndex` and `genomeDict` by `fasta`, `fastaFai` and `dict` +- [#23](https://github.com/nf-core/sarek/pull/23) - `--sample` is now deprecated, use `--input` instead +- [#23](https://github.com/nf-core/sarek/pull/23) - `--genomeFile` is now deprecated, use `--fasta` instead +- [#23](https://github.com/nf-core/sarek/pull/23) - `--genomeIndex` is now deprecated, use `--fastaFai` instead +- [#23](https://github.com/nf-core/sarek/pull/23) - `--genomeDict` is now deprecated, use `--dict` instead ### `Removed` diff --git a/docs/usage.md b/docs/usage.md index 4e8925b01c..749ed00a18 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -26,9 +26,9 @@ * [`--bwaIndex`](#--bwaindex) * [`--dbsnp`](#--dbsnp) * [`--dbsnpIndex`](#--dbsnpindex) - * [`--genomeDict`](#--genomedict) - * [`--genomeFile`](#--genomefile) - * [`--genomeIndex`](#--genomeindex) + * [`--dict`](#--dict) + * [`--fasta`](#--fasta) + * [`--fastaFai`](#--fastaFai) * [`--intervals`](#--intervals) * [`--knownIndels`](#--knownindels) * [`--knownIndelsIndex`](#--knownindelsindex) @@ -233,9 +233,9 @@ params { bwaIndex = '' dbsnp = '' dbsnpIndex = '' - genomeDict = '' - genomeFile = '' - genomeIndex = '' + dict = '' + fasta = '' + fastaFai = '' intervals = '' knownIndels = '' knownIndelsIndex = '' @@ -287,28 +287,28 @@ If you prefer, you can specify the full path to your reference genome when you r --dbsnpIndex '[path to the dbsnp index]' ``` -### `--genomeDict` +### `--dict` If you prefer, you can specify the full path to your reference genome when you run the pipeline: ```bash ---genomeDict '[path to the genomeDict file]' +--dict '[path to the dict file]' ``` -### `--genomeFile` +### `--fasta` If you prefer, you can specify the full path to your reference genome when you run the pipeline: ```bash ---genomeFile '[path to the genome file]' +--fasta '[path to the genome file]' ``` -### `--genomeIndex` +### `--fastaFai` If you prefer, you can specify the full path to your reference genome when you run the pipeline: ```bash ---genomeIndex '[path to the genome Index]' +--fastaFai '[path to the genome Index]' ``` ### `--intervals` From 37719d27c62a687e354a8146ca5dd6db7b212174 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Mon, 2 Sep 2019 16:56:00 +0200 Subject: [PATCH 11/26] update docs --- docs/use_cases.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/use_cases.md b/docs/use_cases.md index c47ad93387..2491ad69e6 100644 --- a/docs/use_cases.md +++ b/docs/use_cases.md @@ -5,7 +5,7 @@ Using the `mapping` directive one will have a pair of mapped, deduplicated and r This is the usual option you have to give when you are starting from raw FASTQ data: ```bash -nextflow run nf-core/sarek/main.nf --sample mysample.tsv --tools +nextflow run nf-core/sarek/main.nf --input mysample.tsv --tools ``` `mapping` will start by default, you do not have to give any additional parameters, only the TSV file describing the sample (see below). @@ -20,7 +20,7 @@ Also, older version are renamed with incremented numbers. The workflow should be started in this case with the smallest set of options as written above: ```bash -nextflow run nf-core/sarek/main.nf --sample mysample.tsv --tools +nextflow run nf-core/sarek/main.nf --input mysample.tsv --tools ``` The TSV file should look like: @@ -33,22 +33,22 @@ See the [input files documentation](docs/input.md) for more information. ## Starting from raw FASTQ - a directory with normal sample only -The `--sample` option can be also used to point Sarek to a directory with FASTQ files: +The `--input` option can be also used to point Sarek to a directory with FASTQ files: ```bash -nextflow run nf-core/sarek/main.nf --sample path/to/FASTQ/files --tools +nextflow run nf-core/sarek/main.nf --input path/to/FASTQ/files --tools ``` The given directory is searched recursively for FASTQ files that are named `*_R1_*.fastq.gz`, and a matching pair with the same name except `_R2_` instead of `_R1_` is expected to exist alongside. All of the found FASTQ files are considered to belong to the sample. Each FASTQ file pair gets its own read group (`@RG`) in the resulting BAM file. -### Metadata when using `--sample` with a directory +### Metadata when using `--input` with a directory -When using `--sample` with a directory, the metadata about the sample that are written to the BAM header in the `@RG` tag are determined in the following way. +When using `--input` with a directory, the metadata about the sample that are written to the BAM header in the `@RG` tag are determined in the following way. -- The sample name (`SM`) is derived from the the last component of the path given to `--sample`. -That is, you should make sure that that directory has a meaningful name! For example, with `--sample=/my/fastqs/sample123`, the sample name will be `sample123`. +- The sample name (`SM`) is derived from the the last component of the path given to `--input`. +That is, you should make sure that that directory has a meaningful name! For example, with `--input=/my/fastqs/sample123`, the sample name will be `sample123`. - The read group id is set to *flowcell.samplename.lane*. The flowcell id and lane number are auto-detected from the name of the first read in the FASTQ file. @@ -78,7 +78,7 @@ See the [input files documentation](docs/input.md) for more information. ## Starting from recalibration ```bash -nextflow run nf-core/sarek/main.nf --sample mysample.tsv --step recalibrate --tools +nextflow run nf-core/sarek/main.nf --input mysample.tsv --step recalibrate --tools ``` And the corresponding TSV file should be like: @@ -121,5 +121,5 @@ It is adviced to pad the variant calling regions (exons or the target) to some e To add the target BED file configure the flow like: ```bash -nextflow run nf-core/sarek/main.nf --tools haplotypecaller,strelka,mutect2 --targetBED targets.bed --sample my_panel.tsv +nextflow run nf-core/sarek/main.nf --tools haplotypecaller,strelka,mutect2 --targetBED targets.bed --input my_panel.tsv ``` From 6714f8e982963c406d0ca8dc58c0360e1c084ef1 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 3 Sep 2019 13:23:48 +0200 Subject: [PATCH 12/26] Refactor deprecation for --sample, --sampleDir, --anotateVCF, --noReports, --genomeDict, --genomeFile, --genomeIndex - Add message when deprecated params are used - Deprecated params are still working - Fix usage of deprecated params - --- main.nf | 77 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/main.nf b/main.nf index f424112441..b5c5dd2f09 100644 --- a/main.nf +++ b/main.nf @@ -20,7 +20,6 @@ nf-core/sarek: */ def helpMessage() { - // TODO nf-core: Add to this help message with new command line parameters log.info nfcoreHeader() log.info""" @@ -102,36 +101,21 @@ def helpMessage() { // Show help message if (params.help) exit 0, helpMessage() -// Show deprecation message -if (params.sample) { - println "--sample is now deprecated, please use --input instead" - params.input = params.sample -} - -if (params.sampleDir) { - println "--sampleDir is now deprecated, please use --input instead" - params.input = params.sampleDir -} - -if (params.annotateVCF) { - println "--annotateVCF is now deprecated, please use --input instead" - params.input = params.annotateVCF -} - -if (params.genomeDict) { - println "--genomeDict is now deprecated, please use --dict instead" - params.dict = params.genomeDict -} - -if (params.genomeFile) { - println "--genomeFile is now deprecated, please use --fasta instead" - params.fasta = params.genomeFile -} - -if (params.genomeIndex) { - println "--genomeIndex is now deprecated, please use --fastaFai instead" - params.fastaFai = params.genomeIndex -} +// Handle deprecation +params.noReports = null +if (params.noReports) deprecationMessage("noReports", "skipQC all") +params.annotateVCF = null +if (params.annotateVCF) deprecationMessage("annotateVCF", "input") +params.genomeDict = null +if (params.genomeDict) deprecationMessage("genomeDict", "dict") +params.genomeFile = null +if (params.genomeFile) deprecationMessage("genomeFile", "fasta") +params.genomeIndex = null +if (params.genomeIndex) deprecationMessage("genomeIndex", "fastaFai") +params.sample = null +if (params.sample) deprecationMessage("sample", "input") +params.sampleDir = null +if (params.sampleDir) deprecationMessage("sampleDir", "input") // Check if genome exists in the config file if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { @@ -175,6 +159,9 @@ skipQClist = defineSkipQClist() skipQC = params.skipQC ? params.skipQC == 'all' ? skipQClist : params.skipQC.split(',').collect{it.trim().toLowerCase()} : [] if (!checkParameterList(skipQC,skipQClist)) exit 1, 'Unknown QC tool(s), see --help for more information' +// Handle deprecation +if (params.noReports) skipQC = skipQClist + annoList = defineAnnoList() annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{it.trim().toLowerCase()} : [] if (!checkParameterList(annotateTools,annoList)) exit 1, 'Unknown tool(s) to annotate, see --help for more information' @@ -200,9 +187,13 @@ if (workflow.profile == 'awsbatch') { // Stage config files ch_output_docs = Channel.fromPath("${baseDir}/docs/output.md") -tsvPath = null -if (params.input) if (hasExtension(params.input,"tsv") || hasExtension(params.input,"vcf") || hasExtension(params.input,"vcf.gz")) tsvPath = params.input -if (params.input) if (hasExtension(params.input,"vcf") || hasExtension(params.input,"vcf.gz")) step = "annotate" +if (params.input && (hasExtension(params.input,"tsv") || hasExtension(params.input,"vcf") || hasExtension(params.input,"vcf.gz"))) tsvPath = params.input +if (params.input && (hasExtension(params.input,"vcf") || hasExtension(params.input,"vcf.gz"))) step = "annotate" + +// Handle deprecation +if (params.annotateVCF) tsvPath = params.annotateVCF +if (params.sample) tsvPath = params.sample +if (params.sampleDir) tsvPath = params.sampleDir // If no input file specified, trying to get TSV files corresponding to step in the TSV directory // only for steps recalibrate and variantCalling @@ -220,7 +211,7 @@ if (tsvPath) { case 'annotate': break default: exit 1, "Unknown step ${step}" } -} else if (params.input) if (!hasExtension(params.input,"tsv")) { +} else if (params.input && !hasExtension(params.input,"tsv")) { println "No TSV file" if (step != 'mapping') exit 1, 'No other step than "mapping" support a dir as an input' println "Reading ${params.input} directory" @@ -2286,6 +2277,11 @@ def checkParameterList(list, realList) { // Check if params.item exists and return params.genomes[params.genome].item otherwise def checkParamReturnFile(item) { + // Handle deprecation + if (params.genomeDict && item == "dict") return file(params.genomeDict) + if (params.genomeFile && item == "fasta") return file(params.genomeFile) + if (params.genomeIndex && item == "fastaFai") return file(params.genomeIndex) + params."${item}" = params.genomes[params.genome]."${item}" return file(params."${item}") } @@ -2314,9 +2310,9 @@ def checkReferenceMap(referenceMap) { // Define map of reference depending of tools and step def defineReferenceMap(step, tools) { def referenceMap = [ - 'dict' : checkParamReturnFile("dict"), - 'fasta' : checkParamReturnFile("fasta"), - 'fastaFai' : checkParamReturnFile("fastaFai"), + 'dict' : checkParamReturnFile("dict"), + 'fasta' : checkParamReturnFile("fasta"), + 'fastaFai' : checkParamReturnFile("fastaFai"), 'intervals' : checkParamReturnFile("intervals") ] if ('mapping' in step) { @@ -2400,6 +2396,11 @@ def defineToolList() { ] } +// Print deprecation message +def deprecationMessage(oldParams, newParams) { + log.warn "The params `--${oldParams}` is deprecated, please use `--${newParams}` instead -- it will be removed in a future release" +} + // Channeling the TSV file containing BAM. // Format is: "subject gender status sample bam bai" def extractBam(tsvFile) { From acb59b4d287a6edb4102cc4c923eec17aa89e9f7 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 3 Sep 2019 15:22:40 +0200 Subject: [PATCH 13/26] move modifications to another branch --- bin/scrape_software_versions.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 1f9cccd7bb..96281d68c5 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -22,7 +22,6 @@ 'samtools': ['v_samtools.txt', r"samtools (\S+)"], 'SnpEff': ['v_snpeff.txt', r"version SnpEff (\S+)"], 'Strelka': ['v_strelka.txt', r"([0-9.]+)"], - 'TIDDIT': ['v_tiddit.txt', r"TIDDIT-(\S+)"], 'vcftools': ['v_vcftools.txt', r"([0-9.]+)"], 'VEP': ['v_vep.txt', r"ensembl-vep : (\S+)"], } @@ -44,7 +43,6 @@ results['samtools'] = 'N/A' results['SnpEff'] = 'N/A' results['Strelka'] = 'N/A' -results['TIDDIT'] = 'N/A' results['vcftools'] = 'N/A' results['VEP'] = 'N/A' From 7f5637baf5384ce6705ba8923e383aae63149127 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 3 Sep 2019 15:22:44 +0200 Subject: [PATCH 14/26] Fix --input for germline sample directory - tsvPath is now defined as null by default --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index b5c5dd2f09..5c2f43405b 100644 --- a/main.nf +++ b/main.nf @@ -202,6 +202,7 @@ if (!params.input && step != 'mapping' && step != 'annotate') { } inputSample = Channel.empty() +tsvPath = null if (tsvPath) { tsvFile = file(tsvPath) switch (step) { @@ -296,7 +297,6 @@ process GetSoftwareVersions { qualimap --version &> v_qualimap.txt 2>&1 || true R --version &> v_r.txt || true samtools --version &> v_samtools.txt 2>&1 || true - tiddit &> v_tiddit.txt 2>&1 || true vcftools --version &> v_vcftools.txt 2>&1 || true vep --help &> v_vep.txt 2>&1 || true From cc6750d80c6f88753ce5fa5b6b59fc33978ce3bf Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Tue, 3 Sep 2019 15:29:34 +0200 Subject: [PATCH 15/26] Add GitHubActions (#27) * feat: Add GitHubActions * feat: reduce linting time * feat: add --memory for script * feat: use --max_memory to limit memory usage to 6.GB on GitHub actions * install nextflow directly * try matrix * use export to get correct nextflow version * add linting tests * fix with node version indent * rename jobs * code polish * feat: add badges for GitHub Actions * test branch protection --- .github/workflows/branch.yml | 14 ++++++++++++++ .github/workflows/ci.yml | 27 +++++++++++++++++++++++++++ .github/workflows/linting.yml | 18 ++++++++++++++++++ .travis.yml | 2 +- CHANGELOG.md | 1 + README.md | 2 ++ scripts/build_reference.sh | 14 ++++++++++---- scripts/download_image.sh | 2 +- scripts/run_tests.sh | 12 +++++++++--- 9 files changed, 83 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/branch.yml create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/linting.yml diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100644 index 0000000000..05b94b0daa --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,14 @@ +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +on: + pull_request: + branches: + - master + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs are only ok if coming from an nf-core dev branch + - name: Check PRs + run: [ ${GITHUB_ACTOR} = "nf-core" ] && [ ${GITHUB_HEAD_REF} = "dev" ] \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..a42622b711 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,27 @@ +name: nf-core CI +# This workflow is triggered on pushes and PRs to the repository. +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + test: [ANNOTATESNPEFF, ANNOTATEVEP, GERMLINE, SOMATIC, TARGETED] + nxf_ver: ['19.04.0', ''] + steps: + - uses: actions/checkout@v1 + - name: Install Nextflow + run: | + export NXF_VER=${{ matrix.nxf_ver }} + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Download image + run: | + ${GITHUB_WORKSPACE}/scripts/download_image.sh -n docker --test ${{ matrix.test }} + - name: Build References + run: | + ${GITHUB_WORKSPACE}/scripts/build_reference.sh --test ${{ matrix.test }} --verbose --memory 6.GB + - name: Run test + run: | + ${GITHUB_WORKSPACE}/scripts/run_tests.sh --test ${{ matrix.test }} --verbose --memory 6.GB \ No newline at end of file diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000000..0bb9aadd17 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,18 @@ +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +on: [push, pull_request] + +jobs: + Markdown: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-node@v1 + with: + node-version: '10' + - name: Install markdownlint + run: | + npm install -g markdownlint-cli + - name: Run Markdownlint + run: | + markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index cbb65a0da6..7fb126f18c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -58,6 +58,6 @@ jobs: include: - stage: "Linting" name: "Markdown" - env: TEST=GERMLINE NXF_VER=19.04.0 + env: TEST=LINT NXF_VER=19.04.0 script: - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index b21a5613ed..f10181235c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#20](https://github.com/nf-core/sarek/pull/20) - Add `markdownlint` config file - [#21](https://github.com/nf-core/sarek/pull/21) - Add tests for latest Nextflow version as well - [#21](https://github.com/nf-core/sarek/pull/21) - Add `genomes.config` for genomes without AWS iGenomes +- [#XXX](https://github.com/nf-core/sarek/pull/XXX) - Use Github actions for CI ### `Changed` diff --git a/README.md b/README.md index 058a01b5ba..dd52d8cea4 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ [![nf-core][nf-core-badge]](https://nf-co.re/) [![Travis build status][travis-badge]](https://travis-ci.com/nf-core/sarek/) +[![GitHub Actions CI Status](https://github.com/nf-core/sarek/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/sarek/actions) +[![GitHub Actions Linting Status](https://github.com/nf-core/sarek/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/sarek/actions) [![CircleCi build status][circleci-badge]](https://circleci.com/gh/nf-core/sarek/) [![Install with bioconda][bioconda-badge]](https://bioconda.github.io/) diff --git a/scripts/build_reference.sh b/scripts/build_reference.sh index 2e34a04d0d..636b161628 100755 --- a/scripts/build_reference.sh +++ b/scripts/build_reference.sh @@ -4,14 +4,15 @@ set -xeuo pipefail # This script build small reference for sarek tests # https://github.com/nf-core/test-datasets/raw/sarek -usage() { echo "Usage: $0 <-p profile> <-t test> <-v>" 1>&2; exit 1; } +usage() { echo "Usage: $0 <-p profile> <-t test> <-v> <-m memory>" 1>&2; exit 1; } +MEMORY='7.GB' NXF_SINGULARITY_CACHEDIR=${NXF_SINGULARITY_CACHEDIR:-work/singularity/.} OFFLINE='' PROFILE=docker TEST=ALL -TRAVIS_BUILD_DIR=${TRAVIS_BUILD_DIR:-.} TRAVIS=${TRAVIS:-false} +TRAVIS_BUILD_DIR=${TRAVIS_BUILD_DIR:-.} VERBOSE='' while [[ $# -gt 0 ]] @@ -23,6 +24,11 @@ do shift # past argument shift # past value ;; + -m|--memory) + MEMORY=$2 + shift # past argument + shift # past value + ;; --offline) OFFLINE="--offline" shift # past value @@ -44,9 +50,9 @@ do done # Build references for smallGRCh37 -if ! [[ ANNOTATESNPEFF,ANNOTATEVEP =~ $TEST ]] +if ! [[ ANNOTATEBOTH,ANNOTATESNPEFF,ANNOTATEVEP,LINT =~ $TEST ]] then rm -rf references - nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile test,${PROFILE} --build --outdir references ${VERBOSE} ${OFFLINE} + nextflow run ${TRAVIS_BUILD_DIR}/build.nf -profile test,${PROFILE} --build --outdir references ${VERBOSE} ${OFFLINE} --max_memory ${MEMORY} rm -rf .nextflow* references/pipeline_info work fi diff --git a/scripts/download_image.sh b/scripts/download_image.sh index 862f265033..959e8fb843 100755 --- a/scripts/download_image.sh +++ b/scripts/download_image.sh @@ -74,7 +74,7 @@ then get_image sarekvep ${VERSION}.${SOURCEGENOME} ${VERSION}.${GENOME} fi -if ! [[ ANNOTATEBOTH,ANNOTATESNPEFF,ANNOTATEVEP,SNPEFF,VEP =~ $TEST ]] +if ! [[ ANNOTATEBOTH,ANNOTATESNPEFF,ANNOTATEVEP,LINT,SNPEFF,VEP =~ $TEST ]] then get_image sarek ${VERSION} ${VERSION} fi diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index d034648df3..462c270de9 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -4,14 +4,15 @@ set -xeuo pipefail # This script run sarek tests # https://github.com/nf-core/test-datasets/raw/sarek -usage() { echo "Usage: $0 <-p profile> <-t test> <-c cpus> <-n> <-v>" 1>&2; exit 1; } +usage() { echo "Usage: $0 <-p profile> <-t test> <-c cpus> <-n> <-v> <-m memory>" 1>&2; exit 1; } CPUS=2 LOGS='' -REPORTS='' +MEMORY='7.GB' NXF_SINGULARITY_CACHEDIR=${NXF_SINGULARITY_CACHEDIR:-work/singularity/.} OFFLINE=false PROFILE=docker +REPORTS='' TEST=MULTIPLE TRAVIS=${TRAVIS:-false} TRAVIS_BUILD_DIR=${TRAVIS_BUILD_DIR:-.} @@ -25,6 +26,11 @@ do CPUS=$2 shift # past value ;; + -m|--memory) + MEMORY=$2 + shift # past argument + shift # past value + ;; -n|--no-logs) LOGS=true shift # past value @@ -66,7 +72,7 @@ function manage_logs() { } function run_sarek() { - nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile test,${PROFILE} ${VERBOSE} --monochrome_logs ${REPORTS} $@ + nextflow run ${TRAVIS_BUILD_DIR}/main.nf -profile test,${PROFILE} ${VERBOSE} --monochrome_logs ${REPORTS} --max_memory ${MEMORY} $@ } if [[ $OFFLINE == false ]] From 22dff1bdde5381c3ff7ed33625e4dbce62fe8463 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 3 Sep 2019 15:38:06 +0200 Subject: [PATCH 16/26] fix syntax hopefully --- .github/workflows/branch.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 05b94b0daa..e3c557ce13 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -11,4 +11,5 @@ jobs: steps: # PRs are only ok if coming from an nf-core dev branch - name: Check PRs - run: [ ${GITHUB_ACTOR} = "nf-core" ] && [ ${GITHUB_HEAD_REF} = "dev" ] \ No newline at end of file + run: | + [ ${GITHUB_ACTOR} = "nf-core" ] && [ ${GITHUB_HEAD_REF} = "dev" ] \ No newline at end of file From 348474f1882678ae24b496ed6a325c2630c4081b Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 3 Sep 2019 15:40:37 +0200 Subject: [PATCH 17/26] undo changes --- .github/workflows/branch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index baf742fcfd..05b94b0daa 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -11,4 +11,4 @@ jobs: steps: # PRs are only ok if coming from an nf-core dev branch - name: Check PRs - run: [ ${GITHUB_ACTOR} = "nf-core" ] && [ ${GITHUB_HEAD_REF} = "dev" ] + run: [ ${GITHUB_ACTOR} = "nf-core" ] && [ ${GITHUB_HEAD_REF} = "dev" ] \ No newline at end of file From 0e217b4586aff619994b1003f0a59600bb5fdf07 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 3 Sep 2019 16:03:38 +0200 Subject: [PATCH 18/26] set tsvPAth to null at the right place... --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 5c2f43405b..f546e3cc91 100644 --- a/main.nf +++ b/main.nf @@ -187,6 +187,7 @@ if (workflow.profile == 'awsbatch') { // Stage config files ch_output_docs = Channel.fromPath("${baseDir}/docs/output.md") +tsvPath = null if (params.input && (hasExtension(params.input,"tsv") || hasExtension(params.input,"vcf") || hasExtension(params.input,"vcf.gz"))) tsvPath = params.input if (params.input && (hasExtension(params.input,"vcf") || hasExtension(params.input,"vcf.gz"))) step = "annotate" @@ -202,7 +203,6 @@ if (!params.input && step != 'mapping' && step != 'annotate') { } inputSample = Channel.empty() -tsvPath = null if (tsvPath) { tsvFile = file(tsvPath) switch (step) { From 34a1a84e799bfdf237514df88612c172a66cea8a Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 4 Sep 2019 13:16:30 +0200 Subject: [PATCH 19/26] update CHANGELOG --- CHANGELOG.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f10181235c..1f0e131173 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,7 +42,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#20](https://github.com/nf-core/sarek/pull/20) - Add `markdownlint` config file - [#21](https://github.com/nf-core/sarek/pull/21) - Add tests for latest Nextflow version as well - [#21](https://github.com/nf-core/sarek/pull/21) - Add `genomes.config` for genomes without AWS iGenomes -- [#XXX](https://github.com/nf-core/sarek/pull/XXX) - Use Github actions for CI +- [#27](https://github.com/nf-core/sarek/pull/27), [#30](https://github.com/nf-core/sarek/pull/30) - Use Github actions for CI, linting and branch protection ### `Changed` @@ -75,6 +75,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#23](https://github.com/nf-core/sarek/pull/23) - `--genomeFile` is now deprecated, use `--fasta` instead - [#23](https://github.com/nf-core/sarek/pull/23) - `--genomeIndex` is now deprecated, use `--fastaFai` instead - [#23](https://github.com/nf-core/sarek/pull/23) - `--genomeDict` is now deprecated, use `--dict` instead +- [#30](https://github.com/nf-core/sarek/pull/30) - Simplify code for `MapReads` process ### `Removed` @@ -86,7 +87,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) ### `Fixed` - [#3](https://github.com/nf-core/sarek/pull/3) - Fix Docker ownership -- [#11](https://github.com/nf-core/sarek/pull/11) - Fix MergeMpileup PublishDir +- [#11](https://github.com/nf-core/sarek/pull/11) - Fix `MergeMpileup` PublishDir - [#13](https://github.com/nf-core/sarek/pull/13) - Fix merge in annotation - [#14](https://github.com/nf-core/sarek/pull/14) - Fix output name for vcf files - [#16](https://github.com/nf-core/sarek/pull/16) - Fix path to Rscript @@ -94,7 +95,8 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#18](https://github.com/nf-core/sarek/pull/18) - Use same font for nf-core and sarek in ascii art - [#20](https://github.com/nf-core/sarek/pull/20) - Use new logo in README - [#20](https://github.com/nf-core/sarek/pull/20) - Fix path to references genomes -- [#22](https://github.com/nf-core/sarek/pull/22) - Fix --singleCPUMem issue +- [#22](https://github.com/nf-core/sarek/pull/22) - Fix `--singleCPUMem` issue +- [#30](https://github.com/nf-core/sarek/pull/30) - fix choice between `inputPairReadsFastQC` and `inputBAMFastQC` channels ## [2.3.FIX1] - 2019-03-04 From 580d5f68b8e317e66dc78cfcdd2189c1ced3fc2b Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 5 Sep 2019 14:29:51 +0200 Subject: [PATCH 20/26] better deprecation message --- main.nf | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index f29ecafdbb..1d2fed5fca 100644 --- a/main.nf +++ b/main.nf @@ -103,19 +103,19 @@ if (params.help) exit 0, helpMessage() // Handle deprecation params.noReports = null -if (params.noReports) deprecationMessage("noReports", "skipQC all") +if (params.noReports) log.warn "The params `--noReports` is deprecated -- it will be removed in a future release.\n\tPlease check: https://github.com/nf-core/sarek/blob/master/docs/usage.md#--skipQC" params.annotateVCF = null -if (params.annotateVCF) deprecationMessage("annotateVCF", "input") +if (params.annotateVCF) log.warn "The params `--annotateVCF` is deprecated -- it will be removed in a future release.\n\tPlease check: https://github.com/nf-core/sarek/blob/master/docs/usage.md#--input" params.genomeDict = null -if (params.genomeDict) deprecationMessage("genomeDict", "dict") +if (params.genomeDict) log.warn "The params `--genomeDict` is deprecated -- it will be removed in a future release.\n\tPlease check: https://github.com/nf-core/sarek/blob/master/docs/usage.md#--dict" params.genomeFile = null -if (params.genomeFile) deprecationMessage("genomeFile", "fasta") +if (params.genomeFile) log.warn "The params `--genomeFile` is deprecated -- it will be removed in a future release.\n\tPlease check: https://github.com/nf-core/sarek/blob/master/docs/usage.md#--fasta" params.genomeIndex = null -if (params.genomeIndex) deprecationMessage("genomeIndex", "fastaFai") +if (params.genomeIndex) log.warn "The params `--genomeIndex` is deprecated -- it will be removed in a future release.\n\tPlease check: https://github.com/nf-core/sarek/blob/master/docs/usage.md#--fastaFai" params.sample = null -if (params.sample) deprecationMessage("sample", "input") +if (params.sample) log.warn "The params `--sample` is deprecated -- it will be removed in a future release.\n\tPlease check: https://github.com/nf-core/sarek/blob/master/docs/usage.md#--input" params.sampleDir = null -if (params.sampleDir) deprecationMessage("sampleDir", "input") +if (params.sampleDir) log.warn "The params `--sampleDir` is deprecated -- it will be removed in a future release.\n\tPlease check: https://github.com/nf-core/sarek/blob/master/docs/usage.md#--input" // Check if genome exists in the config file if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { @@ -2385,8 +2385,9 @@ def defineToolList() { } // Print deprecation message -def deprecationMessage(oldParams, newParams) { - log.warn "The params `--${oldParams}` is deprecated, please use `--${newParams}` instead -- it will be removed in a future release" +def deprecationMessage(oldItem, newItem = null) { + extra = newItem == null ? "": ", please use `${newItem}` instead" + log.warn "The ${oldItem} is deprecated${extra} -- it will be removed in a future release" } // Channeling the TSV file containing BAM. From 1af9c374e846225e284cc858137cf740b539d598 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 6 Sep 2019 10:46:03 +0200 Subject: [PATCH 21/26] update docs --- docs/usage.md | 93 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 91 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 749ed00a18..f705309135 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -169,6 +169,62 @@ For example: Multiple VCF files can be specified if the path must be enclosed in quotes +### `--sample` + +> :warning: This params is deprecated -- it will be removed in a future release. +> Please check: [`--input`](#--input) + +Use this to specify the location of your input TSV file, on `mapping`, `recalibrate` and `variantcalling` steps. +For example: + +```bash +--sample sample.tsv +``` + +Multiple TSV files can be specified if the path must be enclosed in quotes + +Use this to specify the location to a directory on `mapping` step with a single germline sample only. +For example: + +```bash +--sample PathToDirectory +``` + +Use this to specify the location of your VCF input file on `annotate` step. +For example: + +```bash +--sample sample.vcf +``` + +Multiple VCF files can be specified if the path must be enclosed in quotes + +### `--sampleDir` + +> :warning: This params is deprecated -- it will be removed in a future release. +> Please check: [`--input`](#--input) + +Use this to specify the location to a directory on `mapping` step with a single germline sample only. +For example: + +```bash +--sampleDir PathToDirectory +``` + +### `--annotateVCF` + +> :warning: This params is deprecated -- it will be removed in a future release. +> Please check: [`--input`](#--input) + +Use this to specify the location of your VCF input file on `annotate` step. +For example: + +```bash +--annotateVCF sample.vcf +``` + +Multiple VCF files can be specified if the path must be enclosed in quotes + ### `--noGVCF` Use this to disable g.vcf from `HaplotypeCaller`. @@ -300,7 +356,7 @@ If you prefer, you can specify the full path to your reference genome when you r If you prefer, you can specify the full path to your reference genome when you run the pipeline: ```bash ---fasta '[path to the genome file]' +--fasta '[path to the reference fasta file]' ``` ### `--fastaFai` @@ -308,7 +364,40 @@ If you prefer, you can specify the full path to your reference genome when you r If you prefer, you can specify the full path to your reference genome when you run the pipeline: ```bash ---fastaFai '[path to the genome Index]' +--fastaFai '[path to the reference index]' +``` + +### `--genomeDict` + +> :warning: This params is deprecated -- it will be removed in a future release. +> Please check: [`--dict`](#--dict) + +If you prefer, you can specify the full path to your reference genome when you run the pipeline: + +```bash +--dict '[path to the dict file]' +``` + +### `--genomeFile` + +> :warning: This params is deprecated -- it will be removed in a future release. +> Please check: [`--fasta`](#--fasta) + +If you prefer, you can specify the full path to your reference genome when you run the pipeline: + +```bash +--fasta '[path to the reference fasta file]' +``` + +### `--genomeIndex` + +> :warning: This params is deprecated -- it will be removed in a future release. +> Please check: [`--fastaFai`](#--fastaFai) + +If you prefer, you can specify the full path to your reference genome when you run the pipeline: + +```bash +--fastaFai '[path to the reference index]' ``` ### `--intervals` From 9363fecb4ce3415b45958c4222803e129efff83b Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 6 Sep 2019 13:55:45 +0200 Subject: [PATCH 22/26] fix typo in params.fastaFai --- conf/genomes.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/genomes.config b/conf/genomes.config index fe68e0eaa7..c22824a932 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -17,7 +17,7 @@ params { dbsnpIndex = "${params.genomes_base}/dbsnp_138.b37.vcf.idx" dict = "${params.genomes_base}/human_g1k_v37_decoy.dict" fasta = "${params.genomes_base}/human_g1k_v37_decoy.fasta" - fastafai = "${params.genomes_base}/human_g1k_v37_decoy.fasta.fai" + fastaFai = "${params.genomes_base}/human_g1k_v37_decoy.fasta.fai" intervals = "${params.genomes_base}/wgs_calling_regions_Sarek.list" knownIndels = "${params.genomes_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf" knownIndelsIndex = "${params.genomes_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx" @@ -34,7 +34,7 @@ params { germlineResourceIndex = "${params.genomes_base}/GCF_000001405.38.AUTOSOMESXY.COMMON.BIALLELIC.SNPs.with.AF.vcf.gz.tbi" dict = "${params.genomes_base}/Homo_sapiens_assembly38.dict" fasta = "${params.genomes_base}/Homo_sapiens_assembly38.fasta" - fastafai = "${params.genomes_base}/Homo_sapiens_assembly38.fasta.fai" + fastaFai = "${params.genomes_base}/Homo_sapiens_assembly38.fasta.fai" intervals = "${params.genomes_base}/wgs_calling_regions.hg38.bed" knownIndels = "${params.genomes_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" knownIndelsIndex = "${params.genomes_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" @@ -51,7 +51,7 @@ params { germlineResourceIndex = "${params.genomes_base}/dbsnp_138.b37.small.vcf.idx" dict = "${params.genomes_base}/human_g1k_v37_decoy.small.dict" fasta = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta" - fastafai = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta.fai" + fastaFai = "${params.genomes_base}/human_g1k_v37_decoy.small.fasta.fai" intervals = "${params.genomes_base}/small.intervals" knownIndels = "${params.genomes_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.small.vcf" knownIndelsIndex = "${params.genomes_base}/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.small.vcf.idx" From d20c7c38572d159a9ff79370f42ebcc1d48792c1 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 6 Sep 2019 13:57:40 +0200 Subject: [PATCH 23/26] fix typo in channel pairBamMutect2 --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 5d297f22ab..70d8965702 100644 --- a/main.nf +++ b/main.nf @@ -1103,7 +1103,7 @@ process Mutect2 { label 'cpus_1' input: - set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from pairBamMuTect2 + set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from pairBamMutect2 set file(fasta), file(fastaFai), file(dict), file(intervals), file(germlineResource), file(germlineResourceIndex) from Channel.value([ referenceMap.fasta, referenceMap.fastaFai, From f65c1681bfe9156dc90c376b45d91fd592274fa8 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Mon, 9 Sep 2019 09:59:53 +0200 Subject: [PATCH 24/26] code polishing --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 686593b934..ec1ca9b05c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -100,7 +100,7 @@ dag { manifest { name = 'nf-core/sarek' - author = 'Maxime Garcia' + author = 'Maxime Garcia, Szilveszter Juhos' homePage = 'https://github.com/nf-core/sarek' description = 'An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing' mainScript = 'main.nf' From ed815b60d89ec9183d95ab4fc44068a610ceef27 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Mon, 9 Sep 2019 10:45:50 +0200 Subject: [PATCH 25/26] MD linting --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a488dd5778..313e89c4ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,7 +42,7 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#20](https://github.com/nf-core/sarek/pull/20) - Add `markdownlint` config file - [#21](https://github.com/nf-core/sarek/pull/21) - Add tests for latest Nextflow version as well - [#21](https://github.com/nf-core/sarek/pull/21) - Add `genomes.config` for genomes without AWS iGenomes -- [#24](https://github.com/nf-core/sarek/pull/24) - Added GATK4 Mutect2 calling and filtering +- [#24](https://github.com/nf-core/sarek/pull/24) - Added GATK4 Mutect2 calling and filtering - [#27](https://github.com/nf-core/sarek/pull/27), [#30](https://github.com/nf-core/sarek/pull/30) - Use Github actions for CI, linting and branch protection - [#31](https://github.com/nf-core/sarek/pull/31) - Add nf-core lint - [#31](https://github.com/nf-core/sarek/pull/31) - Add extra CI to GitHub Actions nf-core extra CI From 3487912606db2544a8efdfa478908d460d49f258 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Mon, 9 Sep 2019 16:10:00 +0200 Subject: [PATCH 26/26] add deprecation info in docs about --noReports --- docs/usage.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index 399ccb59ec..6d179111e4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -17,6 +17,7 @@ * [`--annotateVCF`](#--annotatevcf) * [`--noGVCF`](#--nogvcf) * [`--skipQC`](#--skipqc) + * [`--noReports`](#--noreports) * [`--nucleotidesPerSecond`](#--nucleotidespersecond) * [`--step`](#--step) * [`--tools`](#--tools) @@ -244,6 +245,13 @@ Use this to disable specific QC and Reporting tools. Available: `all`, `bamQC`, `BCFtools`, `FastQC`, `MultiQC`, `samtools`, `vcftools`, `versions` Default: `None` +### `--noReports` + +> :warning: This params is deprecated -- it will be removed in a future release. +> Please check: [`--skipQC`](#--skipQC) + +Use this to disable all QC and Reporting tools. + ### `--nucleotidesPerSecond` Use this to estimate of how many seconds it will take to call variants on any interval, the default value is `1000` is it's not specified in the `.bed` file.