From f8a9ddd6d435c914fb20870b102bed228fc4c475 Mon Sep 17 00:00:00 2001 From: beboche Date: Fri, 17 Aug 2018 12:02:20 +0200 Subject: [PATCH] modifs for callcaching behaviour, removed useles files, modified final outputs --- modules/bwaSamtools.wdl | 5 +++-- modules/cleanUpPanelCaptureTmpDirs.wdl | 16 +++++++++------- modules/computeCoverage.wdl | 4 ++-- modules/computeCoverageClamms.wdl | 4 ++-- modules/computePoorCoverage.wdl | 4 ++-- modules/gatkHaplotypeCaller.wdl | 4 +++- modules/multiqc.wdl | 4 ++-- panelCapture.wdl | 22 +++++++++++++++++----- 8 files changed, 40 insertions(+), 23 deletions(-) diff --git a/modules/bwaSamtools.wdl b/modules/bwaSamtools.wdl index 1885d09..d41f306 100644 --- a/modules/bwaSamtools.wdl +++ b/modules/bwaSamtools.wdl @@ -12,7 +12,8 @@ task bwaSamtools { String BwaExe String Platform File RefFasta - File RefFai + #File RefFai + #RefFai useles for bwa #index files for bwa File RefAmb File RefAnn @@ -32,4 +33,4 @@ task bwaSamtools { File sortedBam = "${OutDir}${SampleID}/${WorkflowType}/${SampleID}.bam" #File sortedBamIndex = "${OutDir}${SampleID}/${WorkflowType}/${SampleID}.bam.bai" } -} \ No newline at end of file +} diff --git a/modules/cleanUpPanelCaptureTmpDirs.wdl b/modules/cleanUpPanelCaptureTmpDirs.wdl index 04582da..cbffa9d 100644 --- a/modules/cleanUpPanelCaptureTmpDirs.wdl +++ b/modules/cleanUpPanelCaptureTmpDirs.wdl @@ -6,8 +6,10 @@ task cleanUpPanelCaptureTmpDirs { String WorkflowType File FinalVcf Array[String] BamArray - String FinalBam - String FinalBamIndex + #String FinalBam + #String FinalBamIndex + #String FinalCram + #String FinalCramIndex Array[String] VcfArray command { if [ -d "${OutDir}${SampleID}/${WorkflowType}/splitted_intervals" ];then \ @@ -24,12 +26,12 @@ task cleanUpPanelCaptureTmpDirs { fi rm ${sep=" " BamArray} rm ${sep=" " VcfArray} - #mv "${FinalBam}" "${OutDir}${SampleID}/${WorkflowType}/${SampleID}.bam" - #mv "${FinalBamIndex}" "${OutDir}${SampleID}/${WorkflowType}/${SampleID}.bam.bai" } output { - File finalBam = "${FinalBam}" - File finalBamIndex = "${FinalBamIndex}" + #File finalBam = "${FinalBam}" + #File finalBamIndex = "${FinalBamIndex}" + #File finalCram = "${FinalCram}" + #File finalCramIndex = "${FinalCramIndex}" File finalVcf = "${FinalVcf}" } -} \ No newline at end of file +} diff --git a/modules/computeCoverage.wdl b/modules/computeCoverage.wdl index 6203225..9ef1fa8 100644 --- a/modules/computeCoverage.wdl +++ b/modules/computeCoverage.wdl @@ -11,9 +11,9 @@ task computeCoverage { command <<< ${SortExe} -k1,1 -k2,2n -k3,3n ${BedCovFile} \ | ${AwkExe} 'BEGIN {OFS="\t"}{a=($3-$2+1);b=($7/a);print $1,$2,$3,$4,b,"+","+"}' \ - > "${OutDir}/${SampleID}/${WorkflowType}/coverage/${SampleID}_coverage.tsv" + > "${OutDir}${SampleID}/${WorkflowType}/coverage/${SampleID}_coverage.tsv" >>> output { - File TsvCoverageFile = "${OutDir}/${SampleID}/${WorkflowType}/coverage/${SampleID}_coverage.tsv" + File TsvCoverageFile = "${OutDir}${SampleID}/${WorkflowType}/coverage/${SampleID}_coverage.tsv" } } diff --git a/modules/computeCoverageClamms.wdl b/modules/computeCoverageClamms.wdl index bae9993..7b5da72 100644 --- a/modules/computeCoverageClamms.wdl +++ b/modules/computeCoverageClamms.wdl @@ -11,9 +11,9 @@ task computeCoverageClamms { command <<< ${SortExe} -k1,1 -k2,2n -k3,3n ${BedCovFile} \ | awk '{ printf "%s\t%d\t%d\t%.6g\n", $1, $2, $3, $NF/($3-$2); }' \ - > "${OutDir}/${SampleID}/${WorkflowType}/coverage/${SampleID}_coverage.bed" + > "${OutDir}${SampleID}/${WorkflowType}/coverage/${SampleID}_coverage.bed" >>> output { - File ClammsCoverageFile = "${OutDir}/${SampleID}/${WorkflowType}/coverage/${SampleID}_coverage.bed" + File ClammsCoverageFile = "${OutDir}${SampleID}/${WorkflowType}/coverage/${SampleID}_coverage.bed" } } diff --git a/modules/computePoorCoverage.wdl b/modules/computePoorCoverage.wdl index 3e2c6be..d40e202 100644 --- a/modules/computePoorCoverage.wdl +++ b/modules/computePoorCoverage.wdl @@ -21,9 +21,9 @@ task computePoorCoverage { | ${BedToolsExe} merge -c 4 -o distinct -i - \ | ${AwkExe} -v small_intervall="${BedToolsSmallInterval}" \ 'BEGIN {OFS="\t";print "#chr","start","end","region","size bp","type","UCSC link"} {a=($3-$2+1);if(a "${OutDir}/${SampleID}/${WorkflowType}/coverage/${SampleID}_poor_coverage.tsv" + > "${OutDir}${SampleID}/${WorkflowType}/coverage/${SampleID}_poor_coverage.tsv" >>> output { - File poorCoverageFile = "${OutDir}/${SampleID}/${WorkflowType}/coverage/${SampleID}_poor_coverage.tsv" + File poorCoverageFile = "${OutDir}${SampleID}/${WorkflowType}/coverage/${SampleID}_poor_coverage.tsv" } } diff --git a/modules/gatkHaplotypeCaller.wdl b/modules/gatkHaplotypeCaller.wdl index ba4ab10..a2d211c 100644 --- a/modules/gatkHaplotypeCaller.wdl +++ b/modules/gatkHaplotypeCaller.wdl @@ -15,6 +15,8 @@ task gatkHaplotypeCaller { String intervalName = basename("${GatkInterval}", ".intervals") File BamFile File BamIndex + #when callcaching on, seem to keep Bam and index in the same directory for HC execution + Pair[File, File] Bam = (BamFile, BamIndex) String SwMode command { ${SrunLow} ${GatkExe} HaplotypeCaller \ @@ -28,4 +30,4 @@ task gatkHaplotypeCaller { output { File hcVcf = "${OutDir}${SampleID}/${WorkflowType}/vcfs/${SampleID}.${intervalName}.vcf" } -} \ No newline at end of file +} diff --git a/modules/multiqc.wdl b/modules/multiqc.wdl index c80e4db..c12bc09 100644 --- a/modules/multiqc.wdl +++ b/modules/multiqc.wdl @@ -6,9 +6,9 @@ task multiqc { String MultiqcExe File Vcf command { - ${SrunLow} ${MultiqcExe} -o "${OutDir}${SampleID}/${WorkflowType}/" -n "${SampleID}_multiqc" "${OutDir}${SampleID}/${WorkflowType}/" + ${SrunLow} ${MultiqcExe} -o "${OutDir}${SampleID}/${WorkflowType}/" -n "${SampleID}_multiqc" "${OutDir}${SampleID}/${WorkflowType}/" -f } output { File multiqcHtml = "${OutDir}${SampleID}/${WorkflowType}/${SampleID}_multiqc.html" } -} \ No newline at end of file +} diff --git a/panelCapture.wdl b/panelCapture.wdl index bd55901..554f5c1 100644 --- a/panelCapture.wdl +++ b/panelCapture.wdl @@ -38,6 +38,11 @@ import "modules/cleanUpPanelCaptureTmpDirs.wdl" as runCleanUpPanelCaptureTmpDirs import "modules/multiqc.wdl" as runMultiqc workflow panelCapture { + meta { + author: "David Baux" + email: "david.baux(at)inserm.fr" + } + #variables declarations #global String srunHigh String srunLow @@ -145,7 +150,7 @@ workflow panelCapture { BwaExe = bwaExe, Platform = platform, RefFasta = refFasta, - RefFai = refFai, + #RefFai = refFai, RefAmb = refAmb, RefAnn = refAnn, RefBwt = refBwt, @@ -558,8 +563,10 @@ workflow panelCapture { WorkflowType = workflowType, FinalVcf = compressIndexVcf.bgZippedVcf, BamArray = ["${dataPath}" + basename(sambambaMarkDup.markedBam), "${dataPath}" + basename(sambambaMarkDup.markedBamIndex), "${dataPath}" + basename(gatkGatherBQSRReports.gatheredRecalTable), "${dataPath}" + basename(gatkGatherBamFiles.gatheredBam)], - FinalBam = "${dataPath}" + basename(samtoolsSort.sortedBam), - FinalBamIndex = "${dataPath}" + basename(finalIndexing.bamIndex), + #FinalBam = "${dataPath}" + basename(samtoolsSort.sortedBam), + #FinalBamIndex = "${dataPath}" + basename(finalIndexing.bamIndex), + #FinalCram = "${dataPath}" + basename(samtoolsCramConvert.cram), + #FinalCramIndex = "${dataPath}" + basename(samtoolsCramIndex.cramIndex), VcfArray = ["${dataPath}" + basename(gatkGatherVcfs.gatheredHcVcf), "${dataPath}" + basename(gatkGatherVcfs.gatheredHcVcfIndex), "${dataPath}" + basename(jvarkitVcfPolyX.polyxedVcf), "${dataPath}" + basename(jvarkitVcfPolyX.polyxedVcfIndex), "${dataPath}" + basename(gatkSplitVcfs.snpVcf), "${dataPath}" + basename(gatkSplitVcfs.snpVcfIndex), "${dataPath}" + basename(gatkSplitVcfs.indelVcf), "${dataPath}" + basename(gatkSplitVcfs.indelVcfIndex), "${dataPath}" + basename(gatkVariantFiltrationSnp.filteredSnpVcf), "${dataPath}" + basename(gatkVariantFiltrationSnp.filteredSnpVcfIndex), "${dataPath}" + basename(gatkVariantFiltrationIndel.filteredIndelVcf), "${dataPath}" + basename(gatkVariantFiltrationIndel.filteredIndelVcfIndex), "${dataPath}" + basename(gatkMergeVcfs.mergedVcf), "${dataPath}" + basename(gatkMergeVcfs.mergedVcfIndex), "${dataPath}" + basename(gatkSortVcf.sortedVcf), "${dataPath}" + basename(gatkSortVcf.sortedVcfIndex)] } call runMultiqc.multiqc { @@ -571,6 +578,11 @@ workflow panelCapture { MultiqcExe = multiqcExe, Vcf = cleanUpPanelCaptureTmpDirs.finalVcf } - - + output { + File FinalVcf = cleanUpPanelCaptureTmpDirs.finalVcf + #FinalBam = "${dataPath}" + basename(samtoolisSort.sortedBam), + #FinalBamIndex = "${dataPath}" + basename(finalIndexing.bamIndex), + File FinalCram = samtoolsCramConvert.cram + File FinalCramIndex = samtoolsCramIndex.cramIndex + } }