From c9b2cd91214c5f37bc53cc8bea0b2e2aea091bfb Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <szilveszter.juhos@scilifelab.se>
Date: Thu, 5 Jul 2018 09:21:10 +0200
Subject: [PATCH 01/38] QR for posters

---
 doc/images/GitHub.QR.png | Bin 0 -> 2228 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 doc/images/GitHub.QR.png
diff --git a/doc/images/GitHub.QR.png b/doc/images/GitHub.QR.png
new file mode 100644
index 0000000000000000000000000000000000000000..f20bee47faa7666cd911fe613a3c1730da673cc2
GIT binary patch
literal 2228
zcmeAS@N?(olHy`uVBq!ia0y~yVAKI&4mO}jWo=(6kYY)9^mSxl*x1kgCy^D%XDkkK
zcVbv~PUa<$qmb+o<jcTNrN+R}(89p*3n<j^f`OsbfPvvv0t1893<d`Af;qbaZGaMN
zN#5=*KpF^sI`6IrQk(@Ik;OpT1B~5HX4^3^u$OweIEGZ*dVAM!(qjVw7RP`8FUHg+
z%C;`seOY6&xRt<(QndrmYwz28dcD}lF{we-^S~q)9Z#ldDvaJj8&o(WK)eH!R1!Ra
zJVtMl6eL8lK(vBPdtGn;q;l2oZ@=c;&p3Yl|LybZ-^V8~az`9s65G(gs*}KYnwuTU
zLI_qqgiCWrn46XSYW(x9?f=*P^7a#cB!8G=k7U4aaj;SN#KI0Y?%$Vqn?3sS!436|
zPg|Lx7N!1Y0$ctjJ^^9j&HV>H9cpCNftuUEy5<k}10J(`|C*VAj)glBC>>`3@c?cy
z9=*8uc=;q&WS;^p!!QNt5V#=;KnEDz*iqM*@axyl30hz)EbEbi1Im2Pj|f7T<~1#>
z-s}G!sI03788{CTuIEACElRNE2Zi-#u<oaoAUE)>c@FevLZw{9fnBy2mi5?6uqm_Z
z0412Ffg*-8;(*Ww77z`JDb)l<Zy+Bi(Y*L=bMsqqj$I21nRAf*LYSE=KVvHQwXo~v
zfB)_}fW`U9;frP(C;OrI`)e08CKiZ9)Y*fa!47mbED3Sr7F+Y1UF~nv{%>Dn<=4RC
z0Tx_%>^r~|#vXB<zlVSBt_6mb_0lpV>AiM{dp}>4^@+F5dv%+aA~};Vv&b>ZXva(T
zOTVPRR;Jbhqlldg>{S_!*W6$yUjrujq6Lu5)3|9L*wEBk@dHu+FQolyEIgR}tvwGE
zvxsy?fSFn#((#=={W5>fFV<!@cT)87^r_`&NSjq}*!=hVR-S1{>6ozXufH{I<!9Ta
zu%Wu(+5-)=%!J>`LTu)j*_CfQX6<h<sQnMi&_wxp^<yS`$uqfi_HQ3Bd1FQh4rhC_
zMbEFez$3<ttOb((kaPg^3@lBqPAn*?{gD6n@0#Y@OnfAHuI7bd<%bWy+5^@%Xa8qF
zNv&c$5eGKagQFfr{2M>ShHS<)Uk@Jq_VxeQ|NCo64pp%OUHdE6{$bkxm0N_AL;x%#
zTMq^{&or{&f~PuQ$pH))SZDwp3oblBr3zSV%{Ts)aqM4zJ;?d}OWKDGqXs|<RCMOF
zgNOd{Ph9rC|I7XBb&M(`1udvz1J`xHiVa-X0eO(R4#)%7bp#bKo>ua}UQPObVf34%
zSFo|z7FcgHsFt`!l%yn<q*^5xr2;7iBLhPVT>~>+BeM_#3o8S2D-&aF0|P4qgDYm?
zJt!J-^HVa@DsgLAn*APRg9gZk;<9wBq{QM>-O{2=hP2F_R4aXb{gT`Q{oKU#%;ap{
bNXw)Uui!v;cI60tpdJQKS3j3^P6<r_Pj=+Z

literal 0
HcmV?d00001


From 428c893a5c9be824a6401f58c0cea3bdc4340a85 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <szilveszter.juhos@scilifelab.se>
Date: Thu, 5 Jul 2018 09:52:55 +0200
Subject: [PATCH 02/38] Dockerand singularity images for vcfanno

---
 containers/vcfanno/Dockerfile      | 15 +++++++++++++++
 containers/vcfanno/environment.yml |  9 +++++++++
 containers/vcfanno/to_build        |  5 +++++
 3 files changed, 29 insertions(+)
 create mode 100644 containers/vcfanno/Dockerfile
 create mode 100644 containers/vcfanno/environment.yml
 create mode 100644 containers/vcfanno/to_build

diff --git a/containers/vcfanno/Dockerfile b/containers/vcfanno/Dockerfile
new file mode 100644
index 0000000000..80a6473d02
--- /dev/null
+++ b/containers/vcfanno/Dockerfile
@@ -0,0 +1,15 @@
+FROM nfcore/base:latest
+
+LABEL \
+  author="Szilveszter Juhos" \
+  description="vcfanno Image for Sarek" \
+  maintainer="szilveszter.juhos@scilifelab.se"
+
+COPY environment.yml /
+
+RUN \
+  conda env create -f /environment.yml && \
+  conda clean -a
+
+  # Export PATH
+ENV PATH /opt/conda/envs/sarek-vcfanno-2.0/bin:$PATH
diff --git a/containers/vcfanno/environment.yml b/containers/vcfanno/environment.yml
new file mode 100644
index 0000000000..bd8418268f
--- /dev/null
+++ b/containers/vcfanno/environment.yml
@@ -0,0 +1,9 @@
+# You can use this file to create a conda environment:
+#   conda env create -f environment.yml
+name: sarek-vcfanno-2.0
+channels:
+  - bioconda
+  - conda-forge
+  - defaults
+dependencies:
+  - vcfanno=0.2.8
diff --git a/containers/vcfanno/to_build b/containers/vcfanno/to_build
new file mode 100644
index 0000000000..c2d06a77f9
--- /dev/null
+++ b/containers/vcfanno/to_build
@@ -0,0 +1,5 @@
+docker login szilvajuhos
+docker build -t szilvajuhos/sarek-vcfanno:latest .
+docker images
+docker push szilvajuhos/sarek-vcfanno:latest
+singularity pull docker://szilvajuhos/sarek-vcfanno:latest

From 6f438adc792710c9c226790e479634965d666acf Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <szilveszter.juhos@scilifelab.se>
Date: Fri, 6 Jul 2018 10:45:49 +0200
Subject: [PATCH 03/38] removing superfluous routines, adding comments

---
 annotate.nf                  | 12 ++++++++----
 configuration/genomes.config |  3 +++
 somaticVC.nf                 | 28 ++--------------------------
 3 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/annotate.nf b/annotate.nf
index c1e085f633..23c6b92790 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -73,23 +73,23 @@ vcfToAnnotate = Channel.create()
 vcfNotToAnnotate = Channel.create()
 
 if (annotateVCF == []) {
+// by default we annotate both germline and somatic results that we can find in the VariantCalling directory
   Channel.empty().mix(
     Channel.fromPath("${directoryMap.haplotypecaller}/*.vcf.gz")
       .flatten().map{vcf -> ['haplotypecaller', vcf]},
     Channel.fromPath("${directoryMap.manta}/*SV.vcf.gz")
       .flatten().map{vcf -> ['manta', vcf]},
-    Channel.fromPath("${directoryMap.mutect1}/*.vcf.gz")
-      .flatten().map{vcf -> ['mutect1', vcf]},
     Channel.fromPath("${directoryMap.mutect2}/*.vcf.gz")
       .flatten().map{vcf -> ['mutect2', vcf]},
-    Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz")
+    Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz")		// Strelka only
       .flatten().map{vcf -> ['strelka', vcf]},
-    Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz")
+    Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz")	// Strelka with Manta indel candidates
       .flatten().map{vcf -> ['strelkabp', vcf]}
   ).choice(vcfToAnnotate, vcfNotToAnnotate) {
     annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1
   }
 } else if (annotateTools == []) {
+// alternatively, annotate user-submitted VCFs
   list = ""
   annotateVCF.each{ list += ",${it}" }
   list = list.substring(1)
@@ -101,6 +101,10 @@ if (annotateVCF == []) {
 
 vcfNotToAnnotate.close()
 
+// as now have the list of VCFs to annotate, the first step is to annotate with allele frequencies, if there are any
+
+
+
 (vcfForBCFtools, vcfForVCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(4)
 
 vcfForVep = vcfForVep.map {
diff --git a/configuration/genomes.config b/configuration/genomes.config
index 56097e23fe..1653879a08 100644
--- a/configuration/genomes.config
+++ b/configuration/genomes.config
@@ -42,6 +42,9 @@ params {
       knownIndels   = "${params.genome_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
       knownIndelsIndex = "${params.genome_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
       snpeffDb      = "GRCh38.86"
+			// This a nasty-looking list of allele-frequencies files. Add/remove files to match to your sets
+			AF_files			= "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf" 
+			AF_indexes		= "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx" 
     }
     'smallGRCh37' {
       acLoci      = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci"
diff --git a/somaticVC.nf b/somaticVC.nf
index c282b8467e..b9a0da1352 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -70,7 +70,7 @@ referenceMap = defineReferenceMap()
 toolList = defineToolList()
 
 if (!SarekUtils.checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information'
-if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
+if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
 
 if (params.test && params.genome in ['GRCh37', 'GRCh38']) {
   referenceMap.intervals = file("$workflow.projectDir/repeats/tiny_${params.genome}.list")
@@ -620,6 +620,7 @@ if (params.verbose) singleMantaOutput = singleMantaOutput.view {
   Index : ${it[4].fileName}"
 }
 
+// Running Strelka Best Practice with Manta indel candidates
 // For easier joining, remaping channels to idPatient, idSampleNormal, idSampleTumor...
 
 bamsForStrelkaBP = bamsForStrelkaBP.map {
@@ -932,36 +933,11 @@ def checkParameterExistence(it, list) {
   return true
 }
 
-def checkParameterList(list, realList) {
-  // Loop through all parameters to check their existence and spelling
-  return list.every{ checkParameterExistence(it, realList) }
-}
-
 def checkParamReturnFile(item) {
   params."${item}" = params.genomes[params.genome]."${item}"
   return file(params."${item}")
 }
 
-def checkReferenceMap(referenceMap) {
-  // Loop through all the references files to check their existence
-  referenceMap.every {
-    referenceFile, fileToCheck ->
-    checkRefExistence(referenceFile, fileToCheck)
-  }
-}
-
-def checkRefExistence(referenceFile, fileToCheck) {
-  if (fileToCheck instanceof List) return fileToCheck.every{ checkRefExistence(referenceFile, it) }
-  def f = file(fileToCheck)
-  // this is an expanded wildcard: we can assume all files exist
-  if (f instanceof List && f.size() > 0) return true
-  else if (!f.exists()) {
-    log.info  "Missing references: ${referenceFile} ${fileToCheck}"
-    return false
-  }
-  return true
-}
-
 def checkUppmaxProject() {
   // check if UPPMAX project number is specified
   return !(workflow.profile == 'slurm' && !params.project)

From e00872b0b726730711132c50690e6caef6cd4a23 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Fri, 6 Jul 2018 21:53:51 +0200
Subject: [PATCH 04/38] logging improvements

---
 lib/SarekUtils.groovy | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy
index 076a5300f9..33716f2297 100644
--- a/lib/SarekUtils.groovy
+++ b/lib/SarekUtils.groovy
@@ -1,6 +1,9 @@
 import static nextflow.Nextflow.file
 import nextflow.Channel
+import groovy.util.logging.Slf4j
+@Grab('ch.qos.logback:logback-classic:1.2.1') 
 
+@Slf4j
 class SarekUtils {
 
   // Check file extension
@@ -100,13 +103,14 @@ class SarekUtils {
   }
 
   // Loop through all the references files to check their existence
+
   static def checkRefExistence(referenceFile, fileToCheck) {
     if (fileToCheck instanceof List) return fileToCheck.every{ SarekUtils.checkRefExistence(referenceFile, it) }
     def f = file(fileToCheck)
     // this is an expanded wildcard: we can assume all files exist
     if (f instanceof List && f.size() > 0) return true
     else if (!f.exists()) {
-      this.log.info  "Missing references: ${referenceFile} ${fileToCheck}"
+      log.info  "Missing references: ${referenceFile} ${fileToCheck}"
       return false
     }
     return true

From f6c60bfd4f1413801d2dfd1575f07df26134f99b Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Fri, 6 Jul 2018 21:54:55 +0200
Subject: [PATCH 05/38] nfcore based image file containing vcfanno and qc
 modules also

---
 containers/sarek/Dockerfile      | 21 +++++++++------------
 containers/sarek/environment.yml | 22 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 12 deletions(-)
 create mode 100644 containers/sarek/environment.yml

diff --git a/containers/sarek/Dockerfile b/containers/sarek/Dockerfile
index 7ef82bbd8a..fd4102563b 100644
--- a/containers/sarek/Dockerfile
+++ b/containers/sarek/Dockerfile
@@ -1,18 +1,15 @@
-FROM debian:8.9
+FROM nfcore/base:latest
 
 LABEL \
-  author="Maxime Garcia" \
+  author="Szilveszter Juhos" \
   description="Image with tools used in Sarek" \
-  maintainer="maxime.garcia@scilifelab.se"
+  maintainer="szilveszter.juhos@scilifelab.se"
 
-# Export PATH
-ENV \
-  MANTA_INSTALL_PATH=/opt/manta \
-  PATH=/opt/bcftools/bin:/opt/htslib/bin:/opt/samtools/bin:$PATH \
-  STRELKA_INSTALL_PATH=/opt/strelka
+COPY environment.yml /
 
-# Build container
-ADD \
-  build.sh /usr/bin/
 RUN \
-  build.sh
+  conda env create -f /environment.yml && \
+  conda clean -a
+
+  # Export PATH
+ENV PATH /opt/conda/envs/sarek-core/bin:$PATH
diff --git a/containers/sarek/environment.yml b/containers/sarek/environment.yml
new file mode 100644
index 0000000000..c0b720030f
--- /dev/null
+++ b/containers/sarek/environment.yml
@@ -0,0 +1,22 @@
+# You can use this file to create a conda environment for this pipeline:
+# conda env create -f environment.yml
+name: sarek-core
+channels:
+  - bioconda
+  - conda-forge
+  - defaults
+
+dependencies:
+  - conda-forge::openjdk=8.0.144 # Needed for FastQC docker - see bioconda/bioconda-recipes#5026
+  - bwa=0.7.17
+  - fastqc=0.11.7
+  - samtools=1.8
+  - gatk4=4.0.3.0
+  - qualimap=2.2.2a
+  - multiqc=1.5
+  - manta=1.3.0
+  - strelka=2.9.3
+  - bcftools=1.8
+  - htslib=1.5
+  - vcftools=0.1.15
+  - vcfanno=0.2.8

From a3f102daaf091c56be6519328608d26fe0f6d990 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Mon, 9 Jul 2018 20:06:09 +0200
Subject: [PATCH 06/38] on a way to unify to nf-core and GATK4

---
 configuration/containers.config       | 16 +++++------
 configuration/singularity-path.config | 38 +++++++++++++--------------
 lib/QC.groovy                         |  2 +-
 main.nf                               | 31 +++++++---------------
 4 files changed, 37 insertions(+), 50 deletions(-)

diff --git a/configuration/containers.config b/configuration/containers.config
index 07b862c441..780a5a47a4 100644
--- a/configuration/containers.config
+++ b/configuration/containers.config
@@ -15,20 +15,20 @@ process {
   $CompressVCF.container              = "${params.repository}/sarek:${params.tag}"
   $ConcatVCF.container                = "${params.repository}/sarek:${params.tag}"
   $CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}"
-  $GetVersionAll.container            = "${params.repository}/qctools:${params.tag}"
+  $GetVersionAll.container            = "${params.repository}/sarek:${params.tag}"
   $GetVersionAlleleCount.container    = "${params.repository}/runallelecount:${params.tag}"
   $GetVersionASCAT.container          = "${params.repository}/r-base:${params.tag}"
-  $GetVersionBamQC.container          = "${params.repository}/qctools:${params.tag}"
+  $GetVersionBamQC.container          = "${params.repository}/sarek:${params.tag}"
   $GetVersionBCFtools.container       = "${params.repository}/sarek:${params.tag}"
   $GetVersionBWAsamtools.container    = "${params.repository}/sarek:${params.tag}"
-  $GetVersionFastQC.container         = "${params.repository}/qctools:${params.tag}"
+  $GetVersionFastQC.container         = "${params.repository}/sarek:${params.tag}"
   $GetVersionFreeBayes.container      = "${params.repository}/freebayes:${params.tag}"
   $GetVersionGATK.container           = "${params.repository}/gatk:${params.tag}"
   $GetVersionManta.container          = "${params.repository}/sarek:${params.tag}"
   $GetVersionPicard.container         = "${params.repository}/picard:${params.tag}"
   $GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
   $GetVersionStrelka.container        = "${params.repository}/sarek:${params.tag}"
-  $GetVersionVCFtools.container       = "${params.repository}/qctools:${params.tag}"
+  $GetVersionVCFtools.container       = "${params.repository}/sarek:${params.tag}"
   $GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
   $IndelRealigner.container           = "${params.repository}/gatk:${params.tag}"
   $MapReads.container                 = "${params.repository}/sarek:${params.tag}"
@@ -38,15 +38,15 @@ process {
   $RecalibrateBam.container           = "${params.repository}/gatk:${params.tag}"
   $RunAlleleCount.container           = "${params.repository}/runallelecount:${params.tag}"
   $RunAscat.container                 = "${params.repository}/r-base:${params.tag}"
-  $RunBamQC.container                 = "${params.repository}/qctools:${params.tag}"
+  $RunBamQC.container                 = "${params.repository}/sarek:${params.tag}"
   $RunBcftoolsStats.container         = "${params.repository}/sarek:${params.tag}"
   $RunConvertAlleleCounts.container   = "${params.repository}/r-base:${params.tag}"
-  $RunFastQC.container                = "${params.repository}/qctools:${params.tag}"
+  $RunFastQC.container                = "${params.repository}/sarek:${params.tag}"
   $RunFreeBayes.container             = "${params.repository}/freebayes:${params.tag}"
   $RunGenotypeGVCFs.container         = "${params.repository}/gatk:${params.tag}"
   $RunHaplotypecaller.container       = "${params.repository}/gatk:${params.tag}"
   $RunManta.container                 = "${params.repository}/sarek:${params.tag}"
-  $RunMultiQC.container               = "${params.repository}/qctools:${params.tag}"
+  $RunMultiQC.container               = "${params.repository}/sarek:${params.tag}"
   $RunMutect1.container               = "${params.repository}/mutect1:${params.tag}"
   $RunMutect2.container               = "${params.repository}/gatk:${params.tag}"
   $RunSamtoolsStats.container         = "${params.repository}/sarek:${params.tag}"
@@ -55,6 +55,6 @@ process {
   $RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
   $RunStrelka.container               = "${params.repository}/sarek:${params.tag}"
   $RunStrelkaBP.container             = "${params.repository}/sarek:${params.tag}"
-  $RunVcftools.container              = "${params.repository}/qctools:${params.tag}"
+  $RunVcftools.container              = "${params.repository}/sarek:${params.tag}"
   $RunVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
 }
diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config
index 7efea0292f..d67138e4d2 100644
--- a/configuration/singularity-path.config
+++ b/configuration/singularity-path.config
@@ -14,52 +14,50 @@ singularity {
 
 process {
   $BuildBWAindexes.container          = "${params.containerPath}/sarek-${params.tag}.img"
-  $BuildPicardIndex.container         = "${params.containerPath}/picard-${params.tag}.img"
+  $BuildPicardIndex.container         = "${params.containerPath}/sarek-${params.tag}.img"
   $BuildSAMToolsIndex.container       = "${params.containerPath}/sarek-${params.tag}.img"
   $BuildVCFIndex.container            = "${params.containerPath}/igvtools-${params.tag}.img"
   $CompressVCF.container              = "${params.containerPath}/sarek-${params.tag}.img"
   $ConcatVCF.container                = "${params.containerPath}/sarek-${params.tag}.img"
-  $CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img"
-  $GetVersionAll.container            = "${params.containerPath}/qctools-${params.tag}.img"
+  $CreateRecalibrationTable.container = "${params.containerPath}/sarek-${params.tag}.img"
+  $GetVersionAll.container            = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionAlleleCount.container    = "${params.containerPath}/runallelecount-${params.tag}.img"
   $GetVersionASCAT.container          = "${params.containerPath}/r-base-${params.tag}.img"
-  $GetVersionBamQC.container          = "${params.containerPath}/qctools-${params.tag}.img"
+  $GetVersionBamQC.container          = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionBCFtools.container       = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionBWAsamtools.container    = "${params.containerPath}/sarek-${params.tag}.img"
-  $GetVersionFastQC.container         = "${params.containerPath}/qctools-${params.tag}.img"
+  $GetVersionFastQC.container         = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionFreeBayes.container      = "${params.containerPath}/freebayes-${params.tag}.img"
-  $GetVersionGATK.container           = "${params.containerPath}/gatk-${params.tag}.img"
+  $GetVersionGATK.container           = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionManta.container          = "${params.containerPath}/sarek-${params.tag}.img"
-  $GetVersionPicard.container         = "${params.containerPath}/picard-${params.tag}.img"
   $GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
   $GetVersionStrelka.container        = "${params.containerPath}/sarek-${params.tag}.img"
-  $GetVersionVCFtools.container       = "${params.containerPath}/qctools-${params.tag}.img"
+  $GetVersionVCFtools.container       = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
-  $IndelRealigner.container           = "${params.containerPath}/gatk-${params.tag}.img"
+  $IndelRealigner.container           = "${params.containerPath}/sarek-${params.tag}.img"
   $MapReads.container                 = "${params.containerPath}/sarek-${params.tag}.img"
-  $MarkDuplicates.container           = "${params.containerPath}/picard-${params.tag}.img"
+  $MarkDuplicates.container           = "${params.containerPath}/sarek-${params.tag}.img"
   $MergeBams.container                = "${params.containerPath}/sarek-${params.tag}.img"
-  $RealignerTargetCreator.container   = "${params.containerPath}/gatk-${params.tag}.img"
-  $RecalibrateBam.container           = "${params.containerPath}/gatk-${params.tag}.img"
+  $RealignerTargetCreator.container   = "${params.containerPath}/sarek-${params.tag}.img"
+  $RecalibrateBam.container           = "${params.containerPath}/sarek-${params.tag}.img"
   $RunAlleleCount.container           = "${params.containerPath}/runallelecount-${params.tag}.img"
   $RunAscat.container                 = "${params.containerPath}/r-base-${params.tag}.img"
-  $RunBamQC.container                 = "${params.containerPath}/qctools-${params.tag}.img"
+  $RunBamQC.container                 = "${params.containerPath}/sarek-${params.tag}.img"
   $RunBcftoolsStats.container         = "${params.containerPath}/sarek-${params.tag}.img"
   $RunConvertAlleleCounts.container   = "${params.containerPath}/r-base-${params.tag}.img"
-  $RunFastQC.container                = "${params.containerPath}/qctools-${params.tag}.img"
+  $RunFastQC.container                = "${params.containerPath}/sarek-${params.tag}.img"
   $RunFreeBayes.container             = "${params.containerPath}/freebayes-${params.tag}.img"
-  $RunGenotypeGVCFs.container         = "${params.containerPath}/gatk-${params.tag}.img"
-  $RunHaplotypecaller.container       = "${params.containerPath}/gatk-${params.tag}.img"
+  $RunGenotypeGVCFs.container         = "${params.containerPath}/sarek-${params.tag}.img"
+  $RunHaplotypecaller.container       = "${params.containerPath}/sarek-${params.tag}.img"
   $RunManta.container                 = "${params.containerPath}/sarek-${params.tag}.img"
-  $RunMultiQC.container               = "${params.containerPath}/qctools-${params.tag}.img"
-  $RunMutect1.container               = "${params.containerPath}/mutect1-${params.tag}.img"
-  $RunMutect2.container               = "${params.containerPath}/gatk-${params.tag}.img"
+  $RunMultiQC.container               = "${params.containerPath}/sarek-${params.tag}.img"
+  $RunMutect2.container               = "${params.containerPath}/sarek-${params.tag}.img"
   $RunSamtoolsStats.container         = "${params.containerPath}/sarek-${params.tag}.img"
   $RunSingleManta.container           = "${params.containerPath}/sarek-${params.tag}.img"
   $RunSingleStrelka.container         = "${params.containerPath}/sarek-${params.tag}.img"
   $RunSnpeff.container                = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
   $RunStrelka.container               = "${params.containerPath}/sarek-${params.tag}.img"
   $RunStrelkaBP.container             = "${params.containerPath}/sarek-${params.tag}.img"
-  $RunVcftools.container              = "${params.containerPath}/qctools-${params.tag}.img"
+  $RunVcftools.container              = "${params.containerPath}/sarek-${params.tag}.img"
   $RunVEP.container                   = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"}
 }
diff --git a/lib/QC.groovy b/lib/QC.groovy
index 2b228a8b68..c4474fa3ed 100644
--- a/lib/QC.groovy
+++ b/lib/QC.groovy
@@ -59,7 +59,7 @@ class QC {
 // Get GATK version
   static def getVersionGATK() {
     """
-    echo "GATK version"\$(java -jar \$GATK_HOME/GenomeAnalysisTK.jar --version 2>&1) > v_gatk.txt
+		gatk-launch WhyCantWeHaveVersionEasily 2>&1| awk -F/ '/java/{for(i=1;i<=NF;i++){if(\$i~/gatk4/){sub("gatk4-","",\$i);print \$i>"v_gatk.txt"}}}'
     """
   }
 
diff --git a/main.nf b/main.nf
index f683d397b6..dd38b36910 100644
--- a/main.nf
+++ b/main.nf
@@ -29,7 +29,7 @@ kate: syntax groovy; space-indent on; indent-width 2;
  - RunFastQC - Run FastQC for QC on fastq files
  - MapReads - Map reads with BWA
  - MergeBams - Merge BAMs if multilane samples
- - MarkDuplicates - Mark Duplicates with Picard
+ - MarkDuplicates - Mark Duplicates with GATK4
  - RealignerTargetCreator - Create realignment target intervals
  - IndelRealigner - Realign BAMs as T/N pair
  - CreateRecalibrationTable - Create Recalibration Table with BaseRecalibrator
@@ -262,15 +262,14 @@ process MarkDuplicates {
 
   script:
   """
-  java -Xmx${task.memory.toGiga()}g \
-  -jar \$PICARD_HOME/picard.jar MarkDuplicates \
-  INPUT=${bam} \
-  METRICS_FILE=${bam}.metrics \
-  TMP_DIR=. \
-  ASSUME_SORTED=true \
-  VALIDATION_STRINGENCY=LENIENT \
-  CREATE_INDEX=TRUE \
-  OUTPUT=${idSample}_${status}.md.bam
+  gatk-launch --java-options -Xmx${task.memory.toGiga()}g \
+  MarkDuplicates \
+  --INPUT ${bam} \
+  --METRICS_FILE ${bam}.metrics \
+  --TMP_DIR . \
+  --ASSUME_SORT_ORDER coordinate \
+  --CREATE_INDEX true \
+  --OUTPUT ${idSample}_${status}.md.bam
   """
 }
 
@@ -617,22 +616,12 @@ process GetVersionFastQC {
 
 process GetVersionGATK {
   publishDir directoryMap.version, mode: 'link'
+	errorStrategy 'ignore'	// there is no error-free way to get version from GATK4
   output: file("v_*.txt")
   when: !params.onlyQC
   script: QC.getVersionGATK()
 }
 
-process GetVersionPicard {
-  publishDir directoryMap.version, mode: 'link'
-  output: file("v_*.txt")
-  when: step == 'mapping' && !params.onlyQC
-
-  script:
-  """
-  echo "Picard version:"\$(java -jar \$PICARD_HOME/picard.jar MarkDuplicates --version 2>&1) > v_picard.txt
-  """
-}
-
 /*
 ================================================================================
 =                               F U N C T I O N S                              =

From 576c1713527fb3c23bece1b70b1dc11d2647252e Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Mon, 9 Jul 2018 21:04:00 +0200
Subject: [PATCH 07/38] It works without indelrealignment until
 CreateRecalibrationTable

---
 main.nf | 390 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 201 insertions(+), 189 deletions(-)

diff --git a/main.nf b/main.nf
index dd38b36910..407d337dbb 100644
--- a/main.nf
+++ b/main.nf
@@ -254,7 +254,7 @@ process MarkDuplicates {
     set idPatient, status, idSample, file(bam) from mergedBam
 
   output:
-    set idPatient, file("${idSample}_${status}.md.bam"), file("${idSample}_${status}.md.bai") into duplicates
+    set idPatient, file("${idSample}_${status}.md.bam"), file("${idSample}_${status}.md.bai") into duplicateMarkedBams
     set idPatient, status, idSample, val("${idSample}_${status}.md.bam"), val("${idSample}_${status}.md.bai") into markDuplicatesTSV
     file ("${bam}.metrics") into markDuplicatesReport
 
@@ -281,130 +281,144 @@ markDuplicatesTSV.map { idPatient, status, idSample, bam, bai ->
   name: 'nonRealigned.tsv', sort: true, storeDir: directoryMap.nonRealigned
 )
 
-// Create intervals for realignement using both tumor+normal as input
-// Group the marked duplicates BAMs for intervals and realign by idPatient
-// Grouping also by gender, to make a nicer channel
-duplicatesGrouped = Channel.empty()
-if (step == 'mapping') duplicatesGrouped = duplicates.groupTuple()
-else if (step == 'realign') duplicatesGrouped = bamFiles.map{
-  idPatient, status, idSample, bam, bai ->
-  [idPatient, bam, bai]
-}.groupTuple()
-
-// The duplicatesGrouped channel is duplicated
-// one copy goes to the RealignerTargetCreator process
-// and the other to the IndelRealigner process
-(duplicatesInterval, duplicatesRealign) = duplicatesGrouped.into(2)
-
-if (params.verbose) duplicatesInterval = duplicatesInterval.view {
-  "BAMs for RealignerTargetCreator:\n\
-  ID    : ${it[0]}\n\
-  Files : ${it[1].fileName}\n\
-  Files : ${it[2].fileName}"
-}
-
-if (params.verbose) duplicatesRealign = duplicatesRealign.view {
-  "BAMs to join:\n\
-  ID    : ${it[0]}\n\
-  Files : ${it[1].fileName}\n\
-  Files : ${it[2].fileName}"
-}
-
-if (params.verbose) markDuplicatesReport = markDuplicatesReport.view {
-  "MarkDuplicates report:\n\
-  File  : [${it.fileName}]"
-}
-
-// VCF indexes are added so they will be linked, and not re-created on the fly
-//  -L "1:131941-141339" \
-
-process RealignerTargetCreator {
-  tag {idPatient}
-
-  input:
-    set idPatient, file(bam), file(bai) from duplicatesInterval
-    set file(genomeFile), file(genomeIndex), file(genomeDict), file(knownIndels), file(knownIndelsIndex), file(intervals) from Channel.value([
-      referenceMap.genomeFile,
-      referenceMap.genomeIndex,
-      referenceMap.genomeDict,
-      referenceMap.knownIndels,
-      referenceMap.knownIndelsIndex,
-      referenceMap.intervals
-    ])
-
-  output:
-    set idPatient, file("${idPatient}.intervals") into intervals
-
-  when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC
-
-  script:
-  bams = bam.collect{"-I ${it}"}.join(' ')
-  known = knownIndels.collect{"-known ${it}"}.join(' ')
-  """
-  java -Xmx${task.memory.toGiga()}g \
-  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
-  -T RealignerTargetCreator \
-  ${bams} \
-  -R ${genomeFile} \
-  ${known} \
-  -nt ${task.cpus} \
-  -L ${intervals} \
-  -o ${idPatient}.intervals
-  """
-}
-
-if (params.verbose) intervals = intervals.view {
-  "Intervals to join:\n\
-  ID    : ${it[0]}\n\
-  File  : [${it[1].fileName}]"
-}
-
-bamsAndIntervals = duplicatesRealign.join(intervals)
-
-if (params.verbose) bamsAndIntervals = bamsAndIntervals.view {
-  "BAMs and Intervals joined for IndelRealigner:\n\
-  ID    : ${it[0]}\n\
-  Files : ${it[1].fileName}\n\
-  Files : ${it[2].fileName}\n\
-  File  : [${it[3].fileName}]"
-}
-
-// use nWayOut to split into T/N pair again
-process IndelRealigner {
-  tag {idPatient}
-
-  publishDir directoryMap.nonRecalibrated, mode: 'link'
-
-  input:
-    set idPatient, file(bam), file(bai), file(intervals) from bamsAndIntervals
-    set file(genomeFile), file(genomeIndex), file(genomeDict), file(knownIndels), file(knownIndelsIndex) from Channel.value([
-      referenceMap.genomeFile,
-      referenceMap.genomeIndex,
-      referenceMap.genomeDict,
-      referenceMap.knownIndels,
-      referenceMap.knownIndelsIndex])
-
-  output:
-    set idPatient, file("*.real.bam"), file("*.real.bai") into realignedBam mode flatten
-
-  when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC
-
-  script:
-  bams = bam.collect{"-I ${it}"}.join(' ')
-  known = knownIndels.collect{"-known ${it}"}.join(' ')
-  """
-  java -Xmx${task.memory.toGiga()}g \
-  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
-  -T IndelRealigner \
-  ${bams} \
-  -R ${genomeFile} \
-  -targetIntervals ${intervals} \
-  ${known} \
-  -nWayOut '.real.bam'
-  """
-}
-
-realignedBam = realignedBam.map {
+//// Create intervals for realignement using both tumor+normal as input
+//// Group the marked duplicates BAMs for intervals and realign by idPatient
+//// Grouping also by gender, to make a nicer channel
+//duplicatesGrouped = Channel.empty()
+//if (step == 'mapping') duplicatesGrouped = duplicates.groupTuple()
+//else if (step == 'realign') duplicatesGrouped = bamFiles.map{
+//  idPatient, status, idSample, bam, bai ->
+//  [idPatient, bam, bai]
+//}.groupTuple()
+//
+//// The duplicatesGrouped channel is duplicated
+//// one copy goes to the RealignerTargetCreator process
+//// and the other to the IndelRealigner process
+//(duplicatesInterval, duplicatesRealign) = duplicatesGrouped.into(2)
+//
+//if (params.verbose) duplicatesInterval = duplicatesInterval.view {
+//  "BAMs for RealignerTargetCreator:\n\
+//  ID    : ${it[0]}\n\
+//  Files : ${it[1].fileName}\n\
+//  Files : ${it[2].fileName}"
+//}
+//
+//if (params.verbose) duplicatesRealign = duplicatesRealign.view {
+//  "BAMs to join:\n\
+//  ID    : ${it[0]}\n\
+//  Files : ${it[1].fileName}\n\
+//  Files : ${it[2].fileName}"
+//}
+//
+//if (params.verbose) markDuplicatesReport = markDuplicatesReport.view {
+//  "MarkDuplicates report:\n\
+//  File  : [${it.fileName}]"
+//}
+//
+//// VCF indexes are added so they will be linked, and not re-created on the fly
+////  -L "1:131941-141339" \
+//
+//process RealignerTargetCreator {
+//  tag {idPatient}
+//
+//  input:
+//    set idPatient, file(bam), file(bai) from duplicatesInterval
+//    set file(genomeFile), file(genomeIndex), file(genomeDict), file(knownIndels), file(knownIndelsIndex), file(intervals) from Channel.value([
+//      referenceMap.genomeFile,
+//      referenceMap.genomeIndex,
+//      referenceMap.genomeDict,
+//      referenceMap.knownIndels,
+//      referenceMap.knownIndelsIndex,
+//      referenceMap.intervals
+//    ])
+//
+//  output:
+//    set idPatient, file("${idPatient}.intervals") into intervals
+//
+//  when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC
+//
+//  script:
+//  bams = bam.collect{"-I ${it}"}.join(' ')
+//  known = knownIndels.collect{"-known ${it}"}.join(' ')
+//  """
+//  gatk-launch --java-options -Xmx${task.memory.toGiga()}g \
+//  RealignerTargetCreator \
+//  ${bams} \
+//  -R ${genomeFile} \
+//  ${known} \
+//  -nt ${task.cpus} \
+//  -L ${intervals} \
+//  -o ${idPatient}.intervals
+//  """
+//}
+//
+//if (params.verbose) intervals = intervals.view {
+//  "Intervals to join:\n\
+//  ID    : ${it[0]}\n\
+//  File  : [${it[1].fileName}]"
+//}
+//
+//bamsAndIntervals = duplicatesRealign.join(intervals)
+//
+//if (params.verbose) bamsAndIntervals = bamsAndIntervals.view {
+//  "BAMs and Intervals joined for IndelRealigner:\n\
+//  ID    : ${it[0]}\n\
+//  Files : ${it[1].fileName}\n\
+//  Files : ${it[2].fileName}\n\
+//  File  : [${it[3].fileName}]"
+//}
+//
+//// use nWayOut to split into T/N pair again
+//process IndelRealigner {
+//  tag {idPatient}
+//
+//  publishDir directoryMap.nonRecalibrated, mode: 'link'
+//
+//  input:
+//    set idPatient, file(bam), file(bai), file(intervals) from bamsAndIntervals
+//    set file(genomeFile), file(genomeIndex), file(genomeDict), file(knownIndels), file(knownIndelsIndex) from Channel.value([
+//      referenceMap.genomeFile,
+//      referenceMap.genomeIndex,
+//      referenceMap.genomeDict,
+//      referenceMap.knownIndels,
+//      referenceMap.knownIndelsIndex])
+//
+//  output:
+//    set idPatient, file("*.real.bam"), file("*.real.bai") into realignedBam mode flatten
+//
+//  when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC
+//
+//  script:
+//  bams = bam.collect{"-I ${it}"}.join(' ')
+//  known = knownIndels.collect{"-known ${it}"}.join(' ')
+//  """
+//  java -Xmx${task.memory.toGiga()}g \
+//  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
+//  -T IndelRealigner \
+//  ${bams} \
+//  -R ${genomeFile} \
+//  -targetIntervals ${intervals} \
+//  ${known} \
+//  -nWayOut '.real.bam'
+//  """
+//}
+//
+//realignedBam = realignedBam.map {
+//    idPatient, bam, bai ->
+//    tag = bam.baseName.tokenize('.')[0]
+//    status   = tag[-1..-1].toInteger()
+//    idSample = tag.take(tag.length()-2)
+//    [idPatient, status, idSample, bam, bai]
+//}
+//
+//if (params.verbose) realignedBam = realignedBam.view {
+//  "Realigned BAM to CreateRecalibrationTable:\n\
+//  ID    : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\
+//  Files : [${it[3].fileName}, ${it[4].fileName}]"
+//}
+//
+//(realignedBam, realignedBamToJoin) = realignedBam.into(2)
+duplicateMarkedBams = duplicateMarkedBams.map {
     idPatient, bam, bai ->
     tag = bam.baseName.tokenize('.')[0]
     status   = tag[-1..-1].toInteger()
@@ -412,13 +426,13 @@ realignedBam = realignedBam.map {
     [idPatient, status, idSample, bam, bai]
 }
 
-if (params.verbose) realignedBam = realignedBam.view {
+if (params.verbose) duplicateMarkedBams = duplicateMarkedBams.view {
   "Realigned BAM to CreateRecalibrationTable:\n\
   ID    : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\
   Files : [${it[3].fileName}, ${it[4].fileName}]"
 }
 
-(realignedBam, realignedBamToJoin) = realignedBam.into(2)
+(mdBam, mdBamToJoin) = duplicateMarkedBams.into(2)
 
 process CreateRecalibrationTable {
   tag {idPatient + "-" + idSample}
@@ -426,7 +440,7 @@ process CreateRecalibrationTable {
   publishDir directoryMap.nonRecalibrated, mode: 'link', overwrite: false
 
   input:
-    set idPatient, status, idSample, file(bam), file(bai) from realignedBam
+    set idPatient, status, idSample, file(bam), file(bai) from mdBam // realignedBam
     set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex), file(knownIndels), file(knownIndelsIndex), file(intervals) from Channel.value([
       referenceMap.genomeFile,
       referenceMap.genomeIndex,
@@ -445,23 +459,21 @@ process CreateRecalibrationTable {
   when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC
 
   script:
-  known = knownIndels.collect{ "-knownSites ${it}" }.join(' ')
+  known = knownIndels.collect{ "--known-sites ${it}" }.join(' ')
   """
-  java -Xmx${task.memory.toGiga()}g \
-  -Djava.io.tmpdir="/tmp" \
-  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
-  -T BaseRecalibrator \
+  gatk-launch --java-options -Xmx${task.memory.toGiga()}g \
+  BaseRecalibrator \
+  --input ${bam} \
+  --output ${idSample}.recal.table \
+	--TMP_DIR /tmp \
   -R ${genomeFile} \
-  -I ${bam} \
   -L ${intervals} \
-  --disable_auto_index_creation_and_locking_when_reading_rods \
-  -knownSites ${dbsnp} \
+  --known-sites ${dbsnp} \
   ${known} \
-  -nct ${task.cpus} \
-  -l INFO \
-  -o ${idSample}.recal.table
+  --verbosity INFO
   """
 }
+
 // Create a TSV file to restart from this step
 recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable ->
   gender = patientGenders[idPatient]
@@ -470,7 +482,7 @@ recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable ->
   name: 'nonRecalibrated.tsv', sort: true, storeDir: directoryMap.nonRecalibrated
 )
 
-recalibrationTable = realignedBamToJoin.join(recalibrationTable, by:[0,1,2])
+recalibrationTable = mdBamToJoin.join(recalibrationTable, by:[0,1,2])
 
 if (step == 'recalibrate') recalibrationTable = bamFiles
 
@@ -488,55 +500,55 @@ bamForBamQC = bamForBamQC.map { it[0..4] }
 bamForSamToolsStats = bamForSamToolsStats.map{ it[0..4] }
 
 recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status
-
-process RecalibrateBam {
-  tag {idPatient + "-" + idSample}
-
-  publishDir directoryMap.recalibrated, mode: 'link'
-
-  input:
-    set idPatient, status, idSample, file(bam), file(bai), file(recalibrationReport) from recalibrationTable
-    set file(genomeFile), file(genomeIndex), file(genomeDict), file(intervals) from Channel.value([
-      referenceMap.genomeFile,
-      referenceMap.genomeIndex,
-      referenceMap.genomeDict,
-      referenceMap.intervals,
-    ])
-
-  output:
-    set idPatient, status, idSample, file("${idSample}.recal.bam"), file("${idSample}.recal.bai") into recalibratedBam, recalibratedBamForStats
-    set idPatient, status, idSample, val("${idSample}.recal.bam"), val("${idSample}.recal.bai") into recalibratedBamTSV
-
-  // HaplotypeCaller can do BQSR on the fly, so do not create a
-  // recalibrated BAM explicitly.
-  when: params.explicitBqsrNeeded && !params.onlyQC
-
-  script:
-  """
-  java -Xmx${task.memory.toGiga()}g \
-  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
-  -T PrintReads \
-  -R ${genomeFile} \
-  -I ${bam} \
-  -L ${intervals} \
-  --BQSR ${recalibrationReport} \
-  -o ${idSample}.recal.bam
-  """
-}
-// Creating a TSV file to restart from this step
-recalibratedBamTSV.map { idPatient, status, idSample, bam, bai ->
-  gender = patientGenders[idPatient]
-  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.recalibrated}/${bam}\t${directoryMap.recalibrated}/${bai}\n"
-}.collectFile(
-  name: 'recalibrated.tsv', sort: true, storeDir: directoryMap.recalibrated
-)
-
-if (params.verbose) recalibratedBam = recalibratedBam.view {
-  "Recalibrated BAM for variant Calling:\n\
-  ID    : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\
-  Files : [${it[3].fileName}, ${it[4].fileName}]"
-}
-
+//
+//process RecalibrateBam {
+//  tag {idPatient + "-" + idSample}
+//
+//  publishDir directoryMap.recalibrated, mode: 'link'
+//
+//  input:
+//    set idPatient, status, idSample, file(bam), file(bai), file(recalibrationReport) from recalibrationTable
+//    set file(genomeFile), file(genomeIndex), file(genomeDict), file(intervals) from Channel.value([
+//      referenceMap.genomeFile,
+//      referenceMap.genomeIndex,
+//      referenceMap.genomeDict,
+//      referenceMap.intervals,
+//    ])
+//
+//  output:
+//    set idPatient, status, idSample, file("${idSample}.recal.bam"), file("${idSample}.recal.bai") into recalibratedBam, recalibratedBamForStats
+//    set idPatient, status, idSample, val("${idSample}.recal.bam"), val("${idSample}.recal.bai") into recalibratedBamTSV
+//
+//  // HaplotypeCaller can do BQSR on the fly, so do not create a
+//  // recalibrated BAM explicitly.
+//  when: params.explicitBqsrNeeded && !params.onlyQC
+//
+//  script:
+//  """
+//  java -Xmx${task.memory.toGiga()}g \
+//  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
+//  -T PrintReads \
+//  -R ${genomeFile} \
+//  -I ${bam} \
+//  -L ${intervals} \
+//  --BQSR ${recalibrationReport} \
+//  -o ${idSample}.recal.bam
+//  """
+//}
+//// Creating a TSV file to restart from this step
+//recalibratedBamTSV.map { idPatient, status, idSample, bam, bai ->
+//  gender = patientGenders[idPatient]
+//  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.recalibrated}/${bam}\t${directoryMap.recalibrated}/${bai}\n"
+//}.collectFile(
+//  name: 'recalibrated.tsv', sort: true, storeDir: directoryMap.recalibrated
+//)
+//
+//if (params.verbose) recalibratedBam = recalibratedBam.view {
+//  "Recalibrated BAM for variant Calling:\n\
+//  ID    : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\
+//  Files : [${it[3].fileName}, ${it[4].fileName}]"
+//}
+//
 process RunSamtoolsStats {
   tag {idPatient + "-" + idSample}
 

From 0aea380b8b9ba7a35e9e22defc5b27c48d6eaaa4 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Mon, 9 Jul 2018 22:07:41 +0200
Subject: [PATCH 08/38] new target directories for Prerocessing, works to the
 end of ApplyBQSR

---
 lib/QC.groovy         |   2 +-
 lib/SarekUtils.groovy |   3 +-
 main.nf               | 251 ++++++++++--------------------------------
 3 files changed, 58 insertions(+), 198 deletions(-)

diff --git a/lib/QC.groovy b/lib/QC.groovy
index c4474fa3ed..5ee03b83cf 100644
--- a/lib/QC.groovy
+++ b/lib/QC.groovy
@@ -59,7 +59,7 @@ class QC {
 // Get GATK version
   static def getVersionGATK() {
     """
-		gatk-launch WhyCantWeHaveVersionEasily 2>&1| awk -F/ '/java/{for(i=1;i<=NF;i++){if(\$i~/gatk4/){sub("gatk4-","",\$i);print \$i>"v_gatk.txt"}}}'
+		gatk-launch ApplyBQSR --help 2>&1| awk -F/ '/java/{for(i=1;i<=NF;i++){if(\$i~/gatk4/){sub("gatk4-","",\$i);print \$i>"v_gatk.txt"}}}'
     """
   }
 
diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy
index 33716f2297..a21ec442bb 100644
--- a/lib/SarekUtils.groovy
+++ b/lib/SarekUtils.groovy
@@ -119,8 +119,7 @@ class SarekUtils {
   // Define map of directories
   static def defineDirectoryMap(outDir) {
     return [
-    'nonRealigned'     : "${outDir}/Preprocessing/NonRealigned",
-    'nonRecalibrated'  : "${outDir}/Preprocessing/NonRecalibrated",
+    'duplicateMarked'  : "${outDir}/Preprocessing/DuplicateMarked",
     'recalibrated'     : "${outDir}/Preprocessing/Recalibrated",
     'ascat'            : "${outDir}/VariantCalling/Ascat",
     'freebayes'        : "${outDir}/VariantCalling/FreeBayes",
diff --git a/main.nf b/main.nf
index 407d337dbb..f2e137fb13 100644
--- a/main.nf
+++ b/main.nf
@@ -83,8 +83,7 @@ if (params.sample) tsvPath = params.sample
 if (!params.sample && !params.sampleDir) {
   tsvPaths = [
       'mapping': "${workflow.projectDir}/Sarek-data/testdata/tsv/tiny.tsv",
-      'realign': "${directoryMap.nonRealigned}/nonRealigned.tsv",
-      'recalibrate': "${directoryMap.nonRecalibrated}/nonRecalibrated.tsv"
+      'recalibrate': "${directoryMap.duplicateMarked}/duplicateMarked.tsv"
   ]
   if (params.test || step != 'mapping') tsvPath = tsvPaths[step]
 }
@@ -247,7 +246,7 @@ process MarkDuplicates {
   publishDir params.outDir, mode: 'link',
     saveAs: {
       if (it == "${bam}.metrics") "${directoryMap.markDuplicatesQC}/${it}"
-      else "${directoryMap.nonRealigned}/${it}"
+      else "${directoryMap.duplicateMarked}/${it}"
     }
 
   input:
@@ -276,148 +275,11 @@ process MarkDuplicates {
 // Creating a TSV file to restart from this step
 markDuplicatesTSV.map { idPatient, status, idSample, bam, bai ->
   gender = patientGenders[idPatient]
-  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.nonRealigned}/${bam}\t${directoryMap.nonRealigned}/${bai}\n"
+  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.duplicateMarked}/${bam}\t${directoryMap.duplicateMarked}/${bai}\n"
 }.collectFile(
-  name: 'nonRealigned.tsv', sort: true, storeDir: directoryMap.nonRealigned
+  name: 'duplicateMarked.tsv', sort: true, storeDir: directoryMap.duplicateMarked
 )
 
-//// Create intervals for realignement using both tumor+normal as input
-//// Group the marked duplicates BAMs for intervals and realign by idPatient
-//// Grouping also by gender, to make a nicer channel
-//duplicatesGrouped = Channel.empty()
-//if (step == 'mapping') duplicatesGrouped = duplicates.groupTuple()
-//else if (step == 'realign') duplicatesGrouped = bamFiles.map{
-//  idPatient, status, idSample, bam, bai ->
-//  [idPatient, bam, bai]
-//}.groupTuple()
-//
-//// The duplicatesGrouped channel is duplicated
-//// one copy goes to the RealignerTargetCreator process
-//// and the other to the IndelRealigner process
-//(duplicatesInterval, duplicatesRealign) = duplicatesGrouped.into(2)
-//
-//if (params.verbose) duplicatesInterval = duplicatesInterval.view {
-//  "BAMs for RealignerTargetCreator:\n\
-//  ID    : ${it[0]}\n\
-//  Files : ${it[1].fileName}\n\
-//  Files : ${it[2].fileName}"
-//}
-//
-//if (params.verbose) duplicatesRealign = duplicatesRealign.view {
-//  "BAMs to join:\n\
-//  ID    : ${it[0]}\n\
-//  Files : ${it[1].fileName}\n\
-//  Files : ${it[2].fileName}"
-//}
-//
-//if (params.verbose) markDuplicatesReport = markDuplicatesReport.view {
-//  "MarkDuplicates report:\n\
-//  File  : [${it.fileName}]"
-//}
-//
-//// VCF indexes are added so they will be linked, and not re-created on the fly
-////  -L "1:131941-141339" \
-//
-//process RealignerTargetCreator {
-//  tag {idPatient}
-//
-//  input:
-//    set idPatient, file(bam), file(bai) from duplicatesInterval
-//    set file(genomeFile), file(genomeIndex), file(genomeDict), file(knownIndels), file(knownIndelsIndex), file(intervals) from Channel.value([
-//      referenceMap.genomeFile,
-//      referenceMap.genomeIndex,
-//      referenceMap.genomeDict,
-//      referenceMap.knownIndels,
-//      referenceMap.knownIndelsIndex,
-//      referenceMap.intervals
-//    ])
-//
-//  output:
-//    set idPatient, file("${idPatient}.intervals") into intervals
-//
-//  when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC
-//
-//  script:
-//  bams = bam.collect{"-I ${it}"}.join(' ')
-//  known = knownIndels.collect{"-known ${it}"}.join(' ')
-//  """
-//  gatk-launch --java-options -Xmx${task.memory.toGiga()}g \
-//  RealignerTargetCreator \
-//  ${bams} \
-//  -R ${genomeFile} \
-//  ${known} \
-//  -nt ${task.cpus} \
-//  -L ${intervals} \
-//  -o ${idPatient}.intervals
-//  """
-//}
-//
-//if (params.verbose) intervals = intervals.view {
-//  "Intervals to join:\n\
-//  ID    : ${it[0]}\n\
-//  File  : [${it[1].fileName}]"
-//}
-//
-//bamsAndIntervals = duplicatesRealign.join(intervals)
-//
-//if (params.verbose) bamsAndIntervals = bamsAndIntervals.view {
-//  "BAMs and Intervals joined for IndelRealigner:\n\
-//  ID    : ${it[0]}\n\
-//  Files : ${it[1].fileName}\n\
-//  Files : ${it[2].fileName}\n\
-//  File  : [${it[3].fileName}]"
-//}
-//
-//// use nWayOut to split into T/N pair again
-//process IndelRealigner {
-//  tag {idPatient}
-//
-//  publishDir directoryMap.nonRecalibrated, mode: 'link'
-//
-//  input:
-//    set idPatient, file(bam), file(bai), file(intervals) from bamsAndIntervals
-//    set file(genomeFile), file(genomeIndex), file(genomeDict), file(knownIndels), file(knownIndelsIndex) from Channel.value([
-//      referenceMap.genomeFile,
-//      referenceMap.genomeIndex,
-//      referenceMap.genomeDict,
-//      referenceMap.knownIndels,
-//      referenceMap.knownIndelsIndex])
-//
-//  output:
-//    set idPatient, file("*.real.bam"), file("*.real.bai") into realignedBam mode flatten
-//
-//  when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC
-//
-//  script:
-//  bams = bam.collect{"-I ${it}"}.join(' ')
-//  known = knownIndels.collect{"-known ${it}"}.join(' ')
-//  """
-//  java -Xmx${task.memory.toGiga()}g \
-//  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
-//  -T IndelRealigner \
-//  ${bams} \
-//  -R ${genomeFile} \
-//  -targetIntervals ${intervals} \
-//  ${known} \
-//  -nWayOut '.real.bam'
-//  """
-//}
-//
-//realignedBam = realignedBam.map {
-//    idPatient, bam, bai ->
-//    tag = bam.baseName.tokenize('.')[0]
-//    status   = tag[-1..-1].toInteger()
-//    idSample = tag.take(tag.length()-2)
-//    [idPatient, status, idSample, bam, bai]
-//}
-//
-//if (params.verbose) realignedBam = realignedBam.view {
-//  "Realigned BAM to CreateRecalibrationTable:\n\
-//  ID    : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\
-//  Files : [${it[3].fileName}, ${it[4].fileName}]"
-//}
-//
-//(realignedBam, realignedBamToJoin) = realignedBam.into(2)
 duplicateMarkedBams = duplicateMarkedBams.map {
     idPatient, bam, bai ->
     tag = bam.baseName.tokenize('.')[0]
@@ -437,7 +299,7 @@ if (params.verbose) duplicateMarkedBams = duplicateMarkedBams.view {
 process CreateRecalibrationTable {
   tag {idPatient + "-" + idSample}
 
-  publishDir directoryMap.nonRecalibrated, mode: 'link', overwrite: false
+  publishDir directoryMap.duplicateMarked, mode: 'link', overwrite: false
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from mdBam // realignedBam
@@ -477,9 +339,9 @@ process CreateRecalibrationTable {
 // Create a TSV file to restart from this step
 recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable ->
   gender = patientGenders[idPatient]
-  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.nonRecalibrated}/${bam}\t${directoryMap.nonRecalibrated}/${bai}\t${directoryMap.nonRecalibrated}/${recalTable}\n"
+  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.duplicateMarked}/${bam}\t${directoryMap.duplicateMarked}/${bai}\t${directoryMap.duplicateMarked}/${recalTable}\n"
 }.collectFile(
-  name: 'nonRecalibrated.tsv', sort: true, storeDir: directoryMap.nonRecalibrated
+  name: 'duplicateMarked.tsv', sort: true, storeDir: directoryMap.duplicateMarked
 )
 
 recalibrationTable = mdBamToJoin.join(recalibrationTable, by:[0,1,2])
@@ -500,55 +362,55 @@ bamForBamQC = bamForBamQC.map { it[0..4] }
 bamForSamToolsStats = bamForSamToolsStats.map{ it[0..4] }
 
 recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status
-//
-//process RecalibrateBam {
-//  tag {idPatient + "-" + idSample}
-//
-//  publishDir directoryMap.recalibrated, mode: 'link'
-//
-//  input:
-//    set idPatient, status, idSample, file(bam), file(bai), file(recalibrationReport) from recalibrationTable
-//    set file(genomeFile), file(genomeIndex), file(genomeDict), file(intervals) from Channel.value([
-//      referenceMap.genomeFile,
-//      referenceMap.genomeIndex,
-//      referenceMap.genomeDict,
-//      referenceMap.intervals,
-//    ])
-//
-//  output:
-//    set idPatient, status, idSample, file("${idSample}.recal.bam"), file("${idSample}.recal.bai") into recalibratedBam, recalibratedBamForStats
-//    set idPatient, status, idSample, val("${idSample}.recal.bam"), val("${idSample}.recal.bai") into recalibratedBamTSV
-//
-//  // HaplotypeCaller can do BQSR on the fly, so do not create a
-//  // recalibrated BAM explicitly.
-//  when: params.explicitBqsrNeeded && !params.onlyQC
-//
-//  script:
-//  """
-//  java -Xmx${task.memory.toGiga()}g \
-//  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
-//  -T PrintReads \
-//  -R ${genomeFile} \
-//  -I ${bam} \
-//  -L ${intervals} \
-//  --BQSR ${recalibrationReport} \
-//  -o ${idSample}.recal.bam
-//  """
-//}
-//// Creating a TSV file to restart from this step
-//recalibratedBamTSV.map { idPatient, status, idSample, bam, bai ->
-//  gender = patientGenders[idPatient]
-//  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.recalibrated}/${bam}\t${directoryMap.recalibrated}/${bai}\n"
-//}.collectFile(
-//  name: 'recalibrated.tsv', sort: true, storeDir: directoryMap.recalibrated
-//)
-//
-//if (params.verbose) recalibratedBam = recalibratedBam.view {
-//  "Recalibrated BAM for variant Calling:\n\
-//  ID    : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\
-//  Files : [${it[3].fileName}, ${it[4].fileName}]"
-//}
-//
+
+process RecalibrateBam {
+  tag {idPatient + "-" + idSample}
+
+  publishDir directoryMap.recalibrated, mode: 'link'
+
+  input:
+    set idPatient, status, idSample, file(bam), file(bai), file(recalibrationReport) from recalibrationTable
+    set file(genomeFile), file(genomeIndex), file(genomeDict), file(intervals) from Channel.value([
+      referenceMap.genomeFile,
+      referenceMap.genomeIndex,
+      referenceMap.genomeDict,
+      referenceMap.intervals,
+    ])
+
+  output:
+    set idPatient, status, idSample, file("${idSample}.recal.bam"), file("${idSample}.recal.bai") into recalibratedBam, recalibratedBamForStats
+    set idPatient, status, idSample, val("${idSample}.recal.bam"), val("${idSample}.recal.bai") into recalibratedBamTSV
+
+  // HaplotypeCaller can do BQSR on the fly, so do not create a
+  // recalibrated BAM explicitly.
+  when: params.explicitBqsrNeeded && !params.onlyQC
+
+  script:
+  """
+  gatk-launch --java-options -Xmx${task.memory.toGiga()}g \
+  ApplyBQSR \
+  -R ${genomeFile} \
+  --input ${bam} \
+  --output ${idSample}.recal.bam \
+  -L ${intervals} \
+	--create-output-bam-index true \
+	--bqsr-recal-file ${recalibrationReport} 
+  """
+}
+// Creating a TSV file to restart from this step
+recalibratedBamTSV.map { idPatient, status, idSample, bam, bai ->
+  gender = patientGenders[idPatient]
+  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.recalibrated}/${bam}\t${directoryMap.recalibrated}/${bai}\n"
+}.collectFile(
+  name: 'recalibrated.tsv', sort: true, storeDir: directoryMap.recalibrated
+)
+
+if (params.verbose) recalibratedBam = recalibratedBam.view {
+  "Recalibrated BAM for variant Calling:\n\
+  ID    : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\
+  Files : [${it[3].fileName}, ${it[4].fileName}]"
+}
+
 process RunSamtoolsStats {
   tag {idPatient + "-" + idSample}
 
@@ -628,7 +490,6 @@ process GetVersionFastQC {
 
 process GetVersionGATK {
   publishDir directoryMap.version, mode: 'link'
-	errorStrategy 'ignore'	// there is no error-free way to get version from GATK4
   output: file("v_*.txt")
   when: !params.onlyQC
   script: QC.getVersionGATK()

From 72a1b5012798ec623361a2e468aacd035a893fab Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Tue, 10 Jul 2018 21:40:01 +0200
Subject: [PATCH 09/38] HaplotypeCaller works with GATK4

---
 germlineVC.nf | 35 +++++++++++++++--------------------
 main.nf       |  4 ++--
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/germlineVC.nf b/germlineVC.nf
index ea3ab75700..9124d30395 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -310,20 +310,15 @@ process RunHaplotypecaller {
   when: 'haplotypecaller' in tools && !params.onlyQC
 
   script:
-  BQSR = (recalTable != null) ? "--BQSR $recalTable" : ''
   """
-  java -Xmx${task.memory.toGiga()}g \
-  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
-  -T HaplotypeCaller \
-  --emitRefConfidence GVCF \
-  -pairHMM LOGLESS_CACHING \
-  -R ${genomeFile} \
-  --dbsnp ${dbsnp} \
-  ${BQSR} \
-  -I ${bam} \
-  -L ${intervalBed} \
-  --disable_auto_index_creation_and_locking_when_reading_rods \
-  -o ${intervalBed.baseName}_${idSample}.g.vcf
+  gatk-launch --java-options "-Xmx${task.memory.toGiga()}g -Xms6000m -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \
+      HaplotypeCaller \
+      -R ${genomeFile} \
+      -I ${bam} \
+      -L ${intervalBed} \
+  		--dbsnp ${dbsnp} \
+  		-O ${intervalBed.baseName}_${idSample}.g.vcf 	\
+			--emit-ref-confidence GVCF
   """
 }
 hcGenomicVCF = hcGenomicVCF.groupTuple(by:[0,1,2,3])
@@ -349,17 +344,17 @@ process RunGenotypeGVCFs {
   when: 'haplotypecaller' in tools && !params.onlyQC
 
   script:
-  // Using -L is important for speed
+  // Using -L is important for speed and we have to index the interval files also
   """
-  java -Xmx${task.memory.toGiga()}g \
-  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
-  -T GenotypeGVCFs \
+  gatk-launch IndexFeatureFile -F ${gvcf}
+
+  gatk-launch --java-options -Xmx${task.memory.toGiga()}g \
+  GenotypeGVCFs \
   -R ${genomeFile} \
   -L ${intervalBed} \
   --dbsnp ${dbsnp} \
-  --variant ${gvcf} \
-  --disable_auto_index_creation_and_locking_when_reading_rods \
-  -o ${intervalBed.baseName}_${idSample}.vcf
+  -V ${gvcf} \
+  -O ${intervalBed.baseName}_${idSample}.vcf
   """
 }
 hcGenotypedVCF = hcGenotypedVCF.groupTuple(by:[0,1,2,3])
diff --git a/main.nf b/main.nf
index f2e137fb13..a452c26550 100644
--- a/main.nf
+++ b/main.nf
@@ -381,9 +381,9 @@ process RecalibrateBam {
     set idPatient, status, idSample, file("${idSample}.recal.bam"), file("${idSample}.recal.bai") into recalibratedBam, recalibratedBamForStats
     set idPatient, status, idSample, val("${idSample}.recal.bam"), val("${idSample}.recal.bai") into recalibratedBamTSV
 
-  // HaplotypeCaller can do BQSR on the fly, so do not create a
+  // GATK4 HaplotypeCaller can not do BQSR on the fly, so we have to create a
   // recalibrated BAM explicitly.
-  when: params.explicitBqsrNeeded && !params.onlyQC
+  when: !params.onlyQC
 
   script:
   """

From c6137bf07cc909a8362f2ff6dc226a35fcbf3485 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Tue, 10 Jul 2018 22:49:51 +0200
Subject: [PATCH 10/38] path and version fixed for both manta and strelka

---
 germlineVC.nf | 4 ++--
 lib/QC.groovy | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/germlineVC.nf b/germlineVC.nf
index 9124d30395..99017563cd 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -457,7 +457,7 @@ process RunSingleStrelka {
 
   script:
   """
-  \$STRELKA_INSTALL_PATH/bin/configureStrelkaGermlineWorkflow.py \
+  configureStrelkaGermlineWorkflow.py \
   --bam ${bam} \
   --referenceFasta ${genomeFile} \
   --runDir Strelka
@@ -501,7 +501,7 @@ process RunSingleManta {
 
   script:
   """
-  \$MANTA_INSTALL_PATH/bin/configManta.py \
+  configManta.py \
   --bam ${bam} \
   --reference ${genomeFile} \
   --runDir Manta
diff --git a/lib/QC.groovy b/lib/QC.groovy
index 5ee03b83cf..e3c6416f76 100644
--- a/lib/QC.groovy
+++ b/lib/QC.groovy
@@ -66,7 +66,7 @@ class QC {
 // Get Manta version
   static def getVersionManta() {
     """
-    cat \$MANTA_INSTALL_PATH/lib/python/configBuildTimeInfo.py | grep workflowVersion > v_manta.txt
+		configManta.py --version > v_manta.txt
     """
   }
 
@@ -80,7 +80,7 @@ class QC {
 // Get Strelka version
   static def getVersionStrelka() {
     """
-    cat \$STRELKA_INSTALL_PATH/lib/python/configBuildTimeInfo.py | grep workflowVersion > v_strelka.txt
+		configureStrelkaGermlineWorkflow.py --version > v_strelka.txt
     """
   }
 

From bba73f12cee5b86d5012cb8c140bdab7eb6203bb Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Wed, 11 Jul 2018 19:02:48 +0200
Subject: [PATCH 11/38] Mutect2 GATK4 initial dumb version

---
 somaticVC.nf | 87 +++++++++++++---------------------------------------
 1 file changed, 21 insertions(+), 66 deletions(-)

diff --git a/somaticVC.nf b/somaticVC.nf
index b9a0da1352..da7f17363d 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -29,10 +29,9 @@ kate: syntax groovy; space-indent on; indent-width 2;
  - RunSamtoolsStats - Run Samtools stats on recalibrated BAM files
  - RunBamQC - Run qualimap BamQC on recalibrated BAM files
  - CreateIntervalBeds - Create and sort intervals into bed files
- - RunMutect1 - Run MuTect1 for Variant Calling (Parallelized processes)
  - RunMutect2 - Run MuTect2 for Variant Calling (Parallelized processes)
  - RunFreeBayes - Run FreeBayes for Variant Calling (Parallelized processes)
- - ConcatVCF - Merge results from MuTect1 and MuTect2
+ - ConcatVCF - Merge results from paralellized variant callers
  - RunStrelka - Run Strelka for Variant Calling
  - RunManta - Run Manta for Structural Variant Calling
  - RunSingleManta - Run Manta for Single Structural Variant Calling
@@ -76,9 +75,6 @@ if (params.test && params.genome in ['GRCh37', 'GRCh38']) {
   referenceMap.intervals = file("$workflow.projectDir/repeats/tiny_${params.genome}.list")
 }
 
-// TODO
-// MuTect and Mutect2 could be run without a recalibrated BAM (they support
-// the --BQSR option), but this is not implemented, yet.
 // TODO
 // FreeBayes does not need recalibrated BAMs, but we need to test whether
 // the channels are set up correctly when we disable it
@@ -200,10 +196,9 @@ bamsTumor = bamsTumor.map { idPatient, status, idSample, bam, bai -> [idPatient,
 
 // We know that MuTect2 (and other somatic callers) are notoriously slow.
 // To speed them up we are chopping the reference into smaller pieces.
-// (see repeats/centromeres.list).
 // Do variant calling by this intervals, and re-merge the VCFs.
-// Since we are on a cluster, this can parallelize the variant call processes.
-// And push down the variant call wall clock time significanlty.
+// Since we are on a cluster or a multi-CPU machine, this can parallelize the 
+// variant call processes and push down the variant call wall clock time significanlty.
 
 process CreateIntervalBeds {
   tag {intervals.fileName}
@@ -285,48 +280,10 @@ bamsAll = bamsAll.map {
 
 bamsTumorNormalIntervals = bamsAll.spread(bedIntervals)
 
-// MuTect1, MuTect2, FreeBayes
-(bamsFMT1, bamsFMT2, bamsFFB) = bamsTumorNormalIntervals.into(3)
-
-process RunMutect1 {
-  tag {idSampleTumor + "_vs_" + idSampleNormal + "-" + intervalBed.baseName}
-
-  input:
-    set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from bamsFMT1
-    set file(genomeFile), file(genomeIndex), file(genomeDict), file(dbsnp), file(dbsnpIndex), file(cosmic), file(cosmicIndex) from Channel.value([
-      referenceMap.genomeFile,
-      referenceMap.genomeIndex,
-      referenceMap.genomeDict,
-      referenceMap.dbsnp,
-      referenceMap.dbsnpIndex,
-      referenceMap.cosmic,
-      referenceMap.cosmicIndex
-    ])
-
-  output:
-    set val("mutect1"), idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into mutect1Output
-
-  when: 'mutect1' in tools && !params.onlyQC
-
-  script:
-  """
-  java -Xmx${task.memory.toGiga()}g \
-  -jar \$MUTECT_HOME/muTect.jar \
-  -T MuTect \
-  -R ${genomeFile} \
-  --cosmic ${cosmic} \
-  --dbsnp ${dbsnp} \
-  -I:normal ${bamNormal} \
-  -I:tumor ${bamTumor} \
-  -L ${intervalBed} \
-  --disable_auto_index_creation_and_locking_when_reading_rods \
-  --out ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.call_stats.out \
-  --vcf ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf
-  """
-}
-
-mutect1Output = mutect1Output.groupTuple(by:[0,1,2,3])
+// MuTect2, FreeBayes
+( bamsFMT2, bamsFFB) = bamsTumorNormalIntervals.into(3)
 
+// This will give as a list of unfiltered calls for MuTect2. 
 process RunMutect2 {
   tag {idSampleTumor + "_vs_" + idSampleNormal + "-" + intervalBed.baseName}
 
@@ -349,19 +306,18 @@ process RunMutect2 {
 
   script:
   """
-  java -Xmx${task.memory.toGiga()}g \
-  -jar \$GATK_HOME/GenomeAnalysisTK.jar \
-  -T MuTect2 \
-  -R ${genomeFile} \
-  --cosmic ${cosmic} \
-  --dbsnp ${dbsnp} \
-  -I:normal ${bamNormal} \
-  -I:tumor ${bamTumor} \
-  --disable_auto_index_creation_and_locking_when_reading_rods \
-  -L ${intervalBed} \
-  -o ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf
+	gatk-launch --java-options "-Xmx${task.memory.toGiga()}g" \
+		Mutect2 \
+		-R ${genomeFile}\
+		-I ${bamTumor}  -tumor ${idSampleTumor} \
+		-I ${bamNormal} -normal ${idSampleNormal} \
+		-L ${intervalBed} \
+		-O ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf
   """
 }
+//		--germline_resource af-only-gnomad.vcf.gz \
+//		--normal_panel pon.vcf.gz \
+//		--dbsnp ${dbsnp} \
 
 mutect2Output = mutect2Output.groupTuple(by:[0,1,2,3])
 
@@ -400,7 +356,7 @@ freebayesOutput = freebayesOutput.groupTuple(by:[0,1,2,3])
 // we are merging the VCFs that are called separatelly for different intervals
 // so we can have a single sorted VCF containing all the calls for a given caller
 
-vcfsToMerge = mutect1Output.mix(mutect2Output, freebayesOutput)
+vcfsToMerge = mutect2Output.mix(freebayesOutput)
 if (params.verbose) vcfsToMerge = vcfsToMerge.view {
   "VCFs To be merged:\n\
   Tool  : ${it[0]}\tID    : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\
@@ -492,7 +448,7 @@ process RunStrelka {
 
   script:
   """
-  \$STRELKA_INSTALL_PATH/bin/configureStrelkaSomaticWorkflow.py \
+  configureStrelkaSomaticWorkflow.py \
   --tumor ${bamTumor} \
   --normal ${bamNormal} \
   --referenceFasta ${genomeFile} \
@@ -538,7 +494,7 @@ process RunManta {
 
   script:
   """
-  \$MANTA_INSTALL_PATH/bin/configManta.py \
+  configManta.py \
   --normalBam ${bamNormal} \
   --tumorBam ${bamTumor} \
   --reference ${genomeFile} \
@@ -591,7 +547,7 @@ process RunSingleManta {
 
   script:
   """
-  \$MANTA_INSTALL_PATH/bin/configManta.py \
+  configManta.py \
   --tumorBam ${bam} \
   --reference ${genomeFile} \
   --runDir Manta
@@ -651,7 +607,7 @@ process RunStrelkaBP {
 
   script:
   """
-  \$STRELKA_INSTALL_PATH/bin/configureStrelkaSomaticWorkflow.py \
+  configureStrelkaSomaticWorkflow.py \
   --tumor ${bamTumor} \
   --normal ${bamNormal} \
   --referenceFasta ${genomeFile} \
@@ -1003,7 +959,6 @@ def helpMessage() {
   log.info "       Option to configure which tools to use in the workflow."
   log.info "         Different tools to be separated by commas."
   log.info "       Possible values are:"
-  log.info "         mutect1 (use MuTect1 for VC)"
   log.info "         mutect2 (use MuTect2 for VC)"
   log.info "         freebayes (use FreeBayes for VC)"
   log.info "         strelka (use Strelka for VC)"

From 70a35842b7fc29322b764790b885516d04cba1bb Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Thu, 12 Jul 2018 01:26:12 +0200
Subject: [PATCH 12/38] Added genome index file

---
 somaticVC.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/somaticVC.nf b/somaticVC.nf
index da7f17363d..f8d231582f 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -327,6 +327,7 @@ process RunFreeBayes {
   input:
     set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from bamsFFB
     file(genomeFile) from Channel.value(referenceMap.genomeFile)
+    file(genomeIndex) from Channel.value(referenceMap.genomeIndex)
 
   output:
     set val("freebayes"), idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into freebayesOutput

From a8360571d60467014326cb680f8f05050ca5ed0c Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Thu, 12 Jul 2018 01:29:59 +0200
Subject: [PATCH 13/38] Changed Dockerfile to follow
 https://github.com/nf-core/methylseq/pull/29/files

---
 containers/sarek/Dockerfile | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/containers/sarek/Dockerfile b/containers/sarek/Dockerfile
index fd4102563b..28db2cb0ee 100644
--- a/containers/sarek/Dockerfile
+++ b/containers/sarek/Dockerfile
@@ -1,15 +1,9 @@
 FROM nfcore/base:latest
 
 LABEL \
-  author="Szilveszter Juhos" \
+  authors="Maxime.Gracia@scilifelab.se, Szilveszter.Juhos@scilifelab.se" \
   description="Image with tools used in Sarek" \
-  maintainer="szilveszter.juhos@scilifelab.se"
+  maintainers="Maxime.Gracia@scilifelab.se, Szilveszter.Juhos@scilifelab.se"
 
 COPY environment.yml /
-
-RUN \
-  conda env create -f /environment.yml && \
-  conda clean -a
-
-  # Export PATH
-ENV PATH /opt/conda/envs/sarek-core/bin:$PATH
+RUN conda env update -n root -f /environment.yml && conda clean -a

From 733e2c47ff22205789cdd2b788fa793d25cea5cb Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Thu, 12 Jul 2018 01:31:02 +0200
Subject: [PATCH 14/38] removed -Xmx4g because it was ignored anyway and made a
 syntax/workflow error. Maybe NXF bug?

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 9fdfe3ea38..4f6196a9fe 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -14,7 +14,7 @@ manifest {
 }
 
 env {
-  NXF_OPTS="-Xms1g -Xmx4g"
+  NXF_OPTS="-Xms1g"
 }
 
 profiles {

From 449f93906767589a6fd424bc5e1f2c02a71285aa Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Thu, 12 Jul 2018 01:31:43 +0200
Subject: [PATCH 15/38] alphabetical order and added freebayes and new htslib
 version

---
 containers/sarek/environment.yml | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/containers/sarek/environment.yml b/containers/sarek/environment.yml
index c0b720030f..7187785ab9 100644
--- a/containers/sarek/environment.yml
+++ b/containers/sarek/environment.yml
@@ -7,16 +7,18 @@ channels:
   - defaults
 
 dependencies:
-  - conda-forge::openjdk=8.0.144 # Needed for FastQC docker - see bioconda/bioconda-recipes#5026
+  - bcftools=1.8
   - bwa=0.7.17
+  - conda-forge::openjdk=8.0.144 # Needed for FastQC docker - see bioconda/bioconda-recipes#5026
   - fastqc=0.11.7
-  - samtools=1.8
+  - freebayes=1.2.0
   - gatk4=4.0.3.0
-  - qualimap=2.2.2a
-  - multiqc=1.5
+  - htslib=1.7
+  - igvtools=2.3.93
   - manta=1.3.0
+  - multiqc=1.5
+  - qualimap=2.2.2a
+  - samtools=1.8
   - strelka=2.9.3
-  - bcftools=1.8
-  - htslib=1.5
-  - vcftools=0.1.15
   - vcfanno=0.2.8
+  - vcftools=0.1.15

From 3fb6dc6fd50d03d24317e421934cdfe40562fe00 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Thu, 12 Jul 2018 01:32:54 +0200
Subject: [PATCH 16/38] freebayes and igvtools in the core container

---
 configuration/singularity-path.config | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config
index d67138e4d2..5392f30717 100644
--- a/configuration/singularity-path.config
+++ b/configuration/singularity-path.config
@@ -16,7 +16,7 @@ process {
   $BuildBWAindexes.container          = "${params.containerPath}/sarek-${params.tag}.img"
   $BuildPicardIndex.container         = "${params.containerPath}/sarek-${params.tag}.img"
   $BuildSAMToolsIndex.container       = "${params.containerPath}/sarek-${params.tag}.img"
-  $BuildVCFIndex.container            = "${params.containerPath}/igvtools-${params.tag}.img"
+  $BuildVCFIndex.container            = "${params.containerPath}/sarek-${params.tag}.img"
   $CompressVCF.container              = "${params.containerPath}/sarek-${params.tag}.img"
   $ConcatVCF.container                = "${params.containerPath}/sarek-${params.tag}.img"
   $CreateRecalibrationTable.container = "${params.containerPath}/sarek-${params.tag}.img"
@@ -27,7 +27,7 @@ process {
   $GetVersionBCFtools.container       = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionBWAsamtools.container    = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionFastQC.container         = "${params.containerPath}/sarek-${params.tag}.img"
-  $GetVersionFreeBayes.container      = "${params.containerPath}/freebayes-${params.tag}.img"
+  $GetVersionFreeBayes.container      = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionGATK.container           = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionManta.container          = "${params.containerPath}/sarek-${params.tag}.img"
   $GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"}
@@ -46,7 +46,7 @@ process {
   $RunBcftoolsStats.container         = "${params.containerPath}/sarek-${params.tag}.img"
   $RunConvertAlleleCounts.container   = "${params.containerPath}/r-base-${params.tag}.img"
   $RunFastQC.container                = "${params.containerPath}/sarek-${params.tag}.img"
-  $RunFreeBayes.container             = "${params.containerPath}/freebayes-${params.tag}.img"
+  $RunFreeBayes.container             = "${params.containerPath}/sarek-${params.tag}.img"
   $RunGenotypeGVCFs.container         = "${params.containerPath}/sarek-${params.tag}.img"
   $RunHaplotypecaller.container       = "${params.containerPath}/sarek-${params.tag}.img"
   $RunManta.container                 = "${params.containerPath}/sarek-${params.tag}.img"

From 0c71f0adad1cebcd6469154822bc386dd18be573 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Thu, 12 Jul 2018 19:24:29 +0200
Subject: [PATCH 17/38]  set -euo pipefail needed also for germline

---
 germlineVC.nf | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/germlineVC.nf b/germlineVC.nf
index 99017563cd..dc10956f9d 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -31,7 +31,7 @@ kate: syntax groovy; space-indent on; indent-width 2;
  - CreateIntervalBeds - Create and sort intervals into bed files
  - RunHaplotypecaller - Run HaplotypeCaller for Germline Variant Calling (Parallelized processes)
  - RunGenotypeGVCFs - Run HaplotypeCaller for Germline Variant Calling (Parallelized processes)
- - ConcatVCF - Merge results from HaplotypeCaller, MuTect1 and MuTect2
+ - ConcatVCF - Merge results from HaplotypeCaller, MuTect2 and other paralellized callers
  - RunSingleStrelka - Run Strelka for Germline Variant Calling
  - RunSingleManta - Run Manta for Single Structural Variant Calling
  - RunBcftoolsStats - Run BCFTools stats on vcf files
@@ -71,9 +71,6 @@ if (params.test && params.genome in ['GRCh37', 'GRCh38']) {
   referenceMap.intervals = file("$workflow.projectDir/repeats/tiny_${params.genome}.list")
 }
 
-// TODO
-// MuTect and Mutect2 could be run without a recalibrated BAM (they support
-// the --BQSR option), but this is not implemented, yet.
 // TODO
 // FreeBayes does not need recalibrated BAMs, but we need to test whether
 // the channels are set up correctly when we disable it
@@ -287,8 +284,8 @@ bamsAll = bamsAll.map {
 
 bamsTumorNormalIntervals = bamsAll.spread(bedIntervals)
 
-// MuTect1, MuTect2, FreeBayes
-(bamsFMT1, bamsFMT2, bamsFFB) = bamsTumorNormalIntervals.into(3)
+// MuTect2, FreeBayes
+(bamsFMT2, bamsFFB) = bamsTumorNormalIntervals.into(2)
 
 process RunHaplotypecaller {
   tag {idSample + "-" + intervalBed.baseName}
@@ -382,7 +379,7 @@ process ConcatVCF {
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfConcatenated
 
 
-  when: ( 'haplotypecaller' in tools || 'mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC
+  when: ( 'haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC
 
   script:
   if (variantCaller == 'haplotypecaller') outputFile = "${variantCaller}_${idSampleNormal}.vcf"
@@ -390,6 +387,7 @@ process ConcatVCF {
   else outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
 
   """
+	set -euo pipefail
   # first make a header from one of the VCF intervals
   # get rid of interval information only from the GATK command-line, but leave the rest
   FIRSTVCF=\$(ls *.vcf | head -n 1)
@@ -665,7 +663,6 @@ def defineToolList() {
     'freebayes',
     'haplotypecaller',
     'manta',
-    'mutect1',
     'mutect2',
     'strelka'
   ]

From da3be400aea1663ff5361857ebf2802e8d8ebcef Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Thu, 12 Jul 2018 19:24:53 +0200
Subject: [PATCH 18/38] die mutect1

---
 annotate.nf                           |  1 -
 buildContainers.nf                    |  4 +---
 configuration/containers.config       | 23 +++++++++-----------
 configuration/uppmax-localhost.config |  4 ----
 configuration/uppmax-slurm.config     |  6 ------
 containers/freebayes/Dockerfile       | 30 ---------------------------
 containers/mutect1/Dockerfile         | 25 ----------------------
 lib/SarekUtils.groovy                 |  1 -
 scripts/do_all.sh                     |  4 ++--
 scripts/test.sh                       |  2 +-
 somaticVC.nf                          |  4 ++--
 11 files changed, 16 insertions(+), 88 deletions(-)
 delete mode 100644 containers/freebayes/Dockerfile
 delete mode 100644 containers/mutect1/Dockerfile

diff --git a/annotate.nf b/annotate.nf
index 23c6b92790..8d45ae3fff 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -350,7 +350,6 @@ def helpMessage() {
   log.info "       Possible values are:"
   log.info "         haplotypecaller (Annotate HaplotypeCaller output)"
   log.info "         manta (Annotate Manta output)"
-  log.info "         mutect1 (Annotate MuTect1 output)"
   log.info "         mutect2 (Annotate MuTect2 output)"
   log.info "         strelka (Annotate Strelka output)"
   log.info "    --annotateVCF"
diff --git a/buildContainers.nf b/buildContainers.nf
index ba6d96ff32..7122075e7b 100644
--- a/buildContainers.nf
+++ b/buildContainers.nf
@@ -181,7 +181,6 @@ def defineContainersList(){
     'gatk',
     'gatk4',
     'igvtools',
-    'mutect1',
     'picard',
     'qctools',
     'r-base',
@@ -208,8 +207,7 @@ def helpMessage() {
   log.info "    --containers: Choose which containers to build"
   log.info "       Default: all"
   log.info "       Possible values:"
-  log.info "         all, freebayes, gatk, gatk4, igvtools, mutect1, picard"
-  log.info "         qctools, r-base, runallelecount, sarek, snpeff"
+  log.info "         all, r-base, runallelecount, sarek, snpeff"
   log.info "         snpeffgrch37, snpeffgrch38, vepgrch37, vepgrch38"
   log.info "    --docker: Build containers using Docker"
   log.info "    --help"
diff --git a/configuration/containers.config b/configuration/containers.config
index 780a5a47a4..195c240326 100644
--- a/configuration/containers.config
+++ b/configuration/containers.config
@@ -11,10 +11,10 @@ process {
   $BuildBWAindexes.container          = "${params.repository}/sarek:${params.tag}"
   $BuildPicardIndex.container         = "${params.repository}/picard:${params.tag}"
   $BuildSAMToolsIndex.container       = "${params.repository}/sarek:${params.tag}"
-  $BuildVCFIndex.container            = "${params.repository}/igvtools:${params.tag}"
+  $BuildVCFIndex.container            = "${params.repository}/sarek:${params.tag}"
   $CompressVCF.container              = "${params.repository}/sarek:${params.tag}"
   $ConcatVCF.container                = "${params.repository}/sarek:${params.tag}"
-  $CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}"
+  $CreateRecalibrationTable.container = "${params.repository}/sarek:${params.tag}"
   $GetVersionAll.container            = "${params.repository}/sarek:${params.tag}"
   $GetVersionAlleleCount.container    = "${params.repository}/runallelecount:${params.tag}"
   $GetVersionASCAT.container          = "${params.repository}/r-base:${params.tag}"
@@ -22,33 +22,30 @@ process {
   $GetVersionBCFtools.container       = "${params.repository}/sarek:${params.tag}"
   $GetVersionBWAsamtools.container    = "${params.repository}/sarek:${params.tag}"
   $GetVersionFastQC.container         = "${params.repository}/sarek:${params.tag}"
-  $GetVersionFreeBayes.container      = "${params.repository}/freebayes:${params.tag}"
-  $GetVersionGATK.container           = "${params.repository}/gatk:${params.tag}"
+  $GetVersionFreeBayes.container      = "${params.repository}/sarek:${params.tag}"
+  $GetVersionGATK.container           = "${params.repository}/sarek:${params.tag}"
   $GetVersionManta.container          = "${params.repository}/sarek:${params.tag}"
   $GetVersionPicard.container         = "${params.repository}/picard:${params.tag}"
   $GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
   $GetVersionStrelka.container        = "${params.repository}/sarek:${params.tag}"
   $GetVersionVCFtools.container       = "${params.repository}/sarek:${params.tag}"
   $GetVersionVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"}
-  $IndelRealigner.container           = "${params.repository}/gatk:${params.tag}"
   $MapReads.container                 = "${params.repository}/sarek:${params.tag}"
-  $MarkDuplicates.container           = "${params.repository}/picard:${params.tag}"
+  $MarkDuplicates.container           = "${params.repository}/sarek:${params.tag}"
   $MergeBams.container                = "${params.repository}/sarek:${params.tag}"
-  $RealignerTargetCreator.container   = "${params.repository}/gatk:${params.tag}"
-  $RecalibrateBam.container           = "${params.repository}/gatk:${params.tag}"
+  $RecalibrateBam.container           = "${params.repository}/sarek:${params.tag}"
   $RunAlleleCount.container           = "${params.repository}/runallelecount:${params.tag}"
   $RunAscat.container                 = "${params.repository}/r-base:${params.tag}"
   $RunBamQC.container                 = "${params.repository}/sarek:${params.tag}"
   $RunBcftoolsStats.container         = "${params.repository}/sarek:${params.tag}"
   $RunConvertAlleleCounts.container   = "${params.repository}/r-base:${params.tag}"
   $RunFastQC.container                = "${params.repository}/sarek:${params.tag}"
-  $RunFreeBayes.container             = "${params.repository}/freebayes:${params.tag}"
-  $RunGenotypeGVCFs.container         = "${params.repository}/gatk:${params.tag}"
-  $RunHaplotypecaller.container       = "${params.repository}/gatk:${params.tag}"
+  $RunFreeBayes.container             = "${params.repository}/sarek:${params.tag}"
+  $RunGenotypeGVCFs.container         = "${params.repository}/sarek:${params.tag}"
+  $RunHaplotypecaller.container       = "${params.repository}/sarek:${params.tag}"
   $RunManta.container                 = "${params.repository}/sarek:${params.tag}"
   $RunMultiQC.container               = "${params.repository}/sarek:${params.tag}"
-  $RunMutect1.container               = "${params.repository}/mutect1:${params.tag}"
-  $RunMutect2.container               = "${params.repository}/gatk:${params.tag}"
+  $RunMutect2.container               = "${params.repository}/sarek:${params.tag}"
   $RunSamtoolsStats.container         = "${params.repository}/sarek:${params.tag}"
   $RunSingleManta.container           = "${params.repository}/sarek:${params.tag}"
   $RunSingleStrelka.container         = "${params.repository}/sarek:${params.tag}"
diff --git a/configuration/uppmax-localhost.config b/configuration/uppmax-localhost.config
index 1f4cd5bb56..58597c59b6 100644
--- a/configuration/uppmax-localhost.config
+++ b/configuration/uppmax-localhost.config
@@ -117,10 +117,6 @@ process {
   }
   $RunMultiQC {
   }
-  $RunMutect1 {
-    cpus = 1
-    memory = {params.singleCPUMem * task.attempt}
-  }
   $RunMutect2 {
     cpus = 1
     memory = {params.singleCPUMem * task.attempt}
diff --git a/configuration/uppmax-slurm.config b/configuration/uppmax-slurm.config
index de1639964f..d4de89d0fb 100644
--- a/configuration/uppmax-slurm.config
+++ b/configuration/uppmax-slurm.config
@@ -116,12 +116,6 @@ process {
   $RunMultiQC {
     errorStrategy = { task.exitStatus == 143 ? 'retry' : 'ignore' }
   }
-  $RunMutect1 {
-    queue = 'core'
-    time = {params.runTime * task.attempt}
-    cpus = 1
-    memory = {params.singleCPUMem * task.attempt}
-  }
   $RunMutect2 {
     cpus = 1
     memory = {params.singleCPUMem * task.attempt}
diff --git a/containers/freebayes/Dockerfile b/containers/freebayes/Dockerfile
deleted file mode 100644
index efacdde8ee..0000000000
--- a/containers/freebayes/Dockerfile
+++ /dev/null
@@ -1,30 +0,0 @@
-FROM debian:8.9
-
-LABEL \
-  author="Maxime Garcia" \
-  description="FreeBayes image for use in Sarek" \
-  maintainer="maxime.garcia@scilifelab.se"
-
-# Install libraries
-RUN \
-  apt-get update && apt-get install -y --no-install-recommends \
-    build-essential \
-    ca-certificates \
-    cmake \
-    git \
-    wget \
-    zlib1g-dev \
-  && rm -rf /var/lib/apt/lists/*
-
-# Setup ENV variables
-ENV FREEBAYES_VERSION=1.1.0
-
-# Install BCFTools
-RUN \
-  git clone --recursive git://github.com/ekg/freebayes.git freebayes \
-    --branch v${FREEBAYES_VERSION} \
-  && cd freebayes \
-  && make \
-  && make install \
-  && cd .. \
-&& rm -rf freebayes
diff --git a/containers/mutect1/Dockerfile b/containers/mutect1/Dockerfile
deleted file mode 100644
index 7068fb963c..0000000000
--- a/containers/mutect1/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-FROM openjdk:7-slim
-
-LABEL \
-  author="Maxime Garcia" \
-  description="MuTect 1.1.5 image for use in Sarek" \
-  maintainer="maxime.garcia@scilifelab.se"
-
-# Install libraries
-RUN \
-  apt-get update && apt-get install -y --no-install-recommends \
-    wget \
-  && rm -rf /var/lib/apt/lists/*
-
-# Setup ENV variables
-ENV \
-  MUTECT_HOME=/opt/mutect-1.1.5 \
-  MUTECT_VERSION=1.1.5
-
-# Install MuTect1
-RUN \
-  wget --quiet -O muTect-${MUTECT_VERSION}-bin.zip \
-    https://github.com/broadinstitute/mutect/releases/download/${MUTECT_VERSION}/muTect-${MUTECT_VERSION}-bin.zip \
-  && unzip muTect-${MUTECT_VERSION}-bin.zip -d ${MUTECT_HOME} \
-  && rm muTect-${MUTECT_VERSION}-bin.zip \
-&& mv ${MUTECT_HOME}/muTect-${MUTECT_VERSION}.jar ${MUTECT_HOME}/muTect.jar
diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy
index a21ec442bb..54f41ca965 100644
--- a/lib/SarekUtils.groovy
+++ b/lib/SarekUtils.groovy
@@ -126,7 +126,6 @@ class SarekUtils {
     'gvcf-hc'          : "${outDir}/VariantCalling/HaplotypeCallerGVCF",
     'haplotypecaller'  : "${outDir}/VariantCalling/HaplotypeCaller",
     'manta'            : "${outDir}/VariantCalling/Manta",
-    'mutect1'          : "${outDir}/VariantCalling/MuTect1",
     'mutect2'          : "${outDir}/VariantCalling/MuTect2",
     'strelka'          : "${outDir}/VariantCalling/Strelka",
     'strelkabp'        : "${outDir}/VariantCalling/StrelkaBP",
diff --git a/scripts/do_all.sh b/scripts/do_all.sh
index e4780958a1..d0e693ff57 100755
--- a/scripts/do_all.sh
+++ b/scripts/do_all.sh
@@ -57,8 +57,8 @@ function toLower() {
 
 if [[ $TOOL = docker ]] && [[ GRCh37,GRCh38 =~ $GENOME ]]
 then
-    nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers freebayes,gatk,gatk4,igvtools,mutect1,picard,qctools,r-base,runallelecount,sarek,snpeff
+    nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers r-base,runallelecount,sarek,snpeff
     nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers snpeff$(toLower ${GENOME}),vep$(toLower ${GENOME})
 else
-    nextflow run buildContainers.nf -profile ${PROFILE} --verbose --singularity --repository ${REPOSITORY} --tag ${TAG} --containerPath containers/ --containers freebayes,gatk,gatk4,igvtools,mutect1,picard,qctools,r-base,runallelecount,sarek,snpeff$(toLower ${GENOME}),vep$(toLower ${GENOME})
+    nextflow run buildContainers.nf -profile ${PROFILE} --verbose --singularity --repository ${REPOSITORY} --tag ${TAG} --containerPath containers/ --containers r-base,runallelecount,sarek,snpeff$(toLower ${GENOME}),vep$(toLower ${GENOME})
 fi
diff --git a/scripts/test.sh b/scripts/test.sh
index a9f340d7c4..0d9eefa3c6 100755
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -104,7 +104,7 @@ fi
 
 if [[ ALL,TOOLS =~ $TEST ]]
 then
-  run_wrapper --somatic --sample $SAMPLE --variantCalling  --tools FreeBayes,HaplotypeCaller,MuTect1,MuTect2
+  run_wrapper --somatic --sample $SAMPLE --variantCalling  --tools FreeBayes,HaplotypeCaller,MuTect2
 fi
 
 if [[ ALL,MANTA =~ $TEST ]]
diff --git a/somaticVC.nf b/somaticVC.nf
index f8d231582f..d39ac23b5c 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -376,12 +376,13 @@ process ConcatVCF {
   output:
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfConcatenated
 
-  when: ('mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC
+  when: ( 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC
 
   script:
   outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
 
   """
+	set -euo pipefail
   # first make a header from one of the VCF intervals
   # get rid of interval information only from the GATK command-line, but leave the rest
   FIRSTVCF=\$(ls *.vcf | head -n 1)
@@ -927,7 +928,6 @@ def defineToolList() {
     'freebayes',
     'haplotypecaller',
     'manta',
-    'mutect1',
     'mutect2',
     'strelka'
   ]

From 4db0d5d580b780ece815ef572e441a698b8da143 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Tue, 17 Jul 2018 20:15:37 +0200
Subject: [PATCH 19/38] commenting out AFs as it will be for a new sprint

---
 configuration/genomes.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configuration/genomes.config b/configuration/genomes.config
index 1653879a08..53ceecf9cf 100644
--- a/configuration/genomes.config
+++ b/configuration/genomes.config
@@ -43,8 +43,8 @@ params {
       knownIndelsIndex = "${params.genome_base}/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
       snpeffDb      = "GRCh38.86"
 			// This a nasty-looking list of allele-frequencies files. Add/remove files to match to your sets
-			AF_files			= "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf" 
-			AF_indexes		= "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx" 
+			//AF_files			= "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf" 
+			//AF_indexes		= "${params.genome_base}/{00-All.dbsnp_151.hg38.CAF.TOPMED.alternate.allele.freq,hapmap_3.3_grch38_pop_stratified_af.HMAF,SweGen_hg38_stratified.SWAF}.vcf.idx" 
     }
     'smallGRCh37' {
       acLoci      = "${params.genome_base}/1000G_phase3_20130502_SNP_maf0.3.small.loci"

From efd11c67d815141fa756c64ded71b523763c4acf Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Tue, 17 Jul 2018 21:29:07 +0200
Subject: [PATCH 20/38] vcfanno is merged to the core

---
 containers/vcfanno/Dockerfile      | 15 ---------------
 containers/vcfanno/environment.yml |  9 ---------
 containers/vcfanno/to_build        |  5 -----
 3 files changed, 29 deletions(-)
 delete mode 100644 containers/vcfanno/Dockerfile
 delete mode 100644 containers/vcfanno/environment.yml
 delete mode 100644 containers/vcfanno/to_build

diff --git a/containers/vcfanno/Dockerfile b/containers/vcfanno/Dockerfile
deleted file mode 100644
index 80a6473d02..0000000000
--- a/containers/vcfanno/Dockerfile
+++ /dev/null
@@ -1,15 +0,0 @@
-FROM nfcore/base:latest
-
-LABEL \
-  author="Szilveszter Juhos" \
-  description="vcfanno Image for Sarek" \
-  maintainer="szilveszter.juhos@scilifelab.se"
-
-COPY environment.yml /
-
-RUN \
-  conda env create -f /environment.yml && \
-  conda clean -a
-
-  # Export PATH
-ENV PATH /opt/conda/envs/sarek-vcfanno-2.0/bin:$PATH
diff --git a/containers/vcfanno/environment.yml b/containers/vcfanno/environment.yml
deleted file mode 100644
index bd8418268f..0000000000
--- a/containers/vcfanno/environment.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-# You can use this file to create a conda environment:
-#   conda env create -f environment.yml
-name: sarek-vcfanno-2.0
-channels:
-  - bioconda
-  - conda-forge
-  - defaults
-dependencies:
-  - vcfanno=0.2.8
diff --git a/containers/vcfanno/to_build b/containers/vcfanno/to_build
deleted file mode 100644
index c2d06a77f9..0000000000
--- a/containers/vcfanno/to_build
+++ /dev/null
@@ -1,5 +0,0 @@
-docker login szilvajuhos
-docker build -t szilvajuhos/sarek-vcfanno:latest .
-docker images
-docker push szilvajuhos/sarek-vcfanno:latest
-singularity pull docker://szilvajuhos/sarek-vcfanno:latest

From e81e10035f3abf3095c46f90a971710ea7bc1b40 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Tue, 17 Jul 2018 21:44:29 +0200
Subject: [PATCH 21/38] removed igvtools (as went to core)

---
 buildContainers.nf | 6 ------
 buildReferences.nf | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/buildContainers.nf b/buildContainers.nf
index 7122075e7b..92294ff432 100644
--- a/buildContainers.nf
+++ b/buildContainers.nf
@@ -177,12 +177,6 @@ def grabRevision() {
 def defineContainersList(){
   // Return list of authorized containers
   return [
-    'freebayes',
-    'gatk',
-    'gatk4',
-    'igvtools',
-    'picard',
-    'qctools',
     'r-base',
     'runallelecount',
     'sarek',
diff --git a/buildReferences.nf b/buildReferences.nf
index eb185ce271..caa47761f0 100644
--- a/buildReferences.nf
+++ b/buildReferences.nf
@@ -196,7 +196,7 @@ process BuildVCFIndex {
 
   script:
   """
-  \$IGVTOOLS_HOME/igvtools index ${f_reference}
+  igvtools index ${f_reference}
   """
 }
 

From 7cf77b607eac835aa5bbb3b3a3e3f7ecd6e14ae4 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <szilveszter.juhos@scilifelab.se>
Date: Mon, 30 Jul 2018 11:53:07 +0200
Subject: [PATCH 22/38] Not using the log util, as it is not working

---
 lib/SarekUtils.groovy | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy
index 884efe8e11..43287f0388 100644
--- a/lib/SarekUtils.groovy
+++ b/lib/SarekUtils.groovy
@@ -1,9 +1,6 @@
 import static nextflow.Nextflow.file
 import nextflow.Channel
-import groovy.util.logging.Slf4j
-@Grab('ch.qos.logback:logback-classic:1.2.1') 
 
-@Slf4j
 class SarekUtils {
 
   // Check file extension
@@ -109,8 +106,8 @@ class SarekUtils {
     def f = file(fileToCheck)
     // this is an expanded wildcard: we can assume all files exist
     if (f instanceof List && f.size() > 0) return true
-    else if (!f.exists()) {
-      log.info  "Missing references: ${referenceFile} ${fileToCheck}"
+    else if (!f.exists()) { 
+			println  "Missing references: ${referenceFile} ${fileToCheck}"
       return false
     }
     return true

From e2cacd7075304be41042875c353478369d4d8074 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Tue, 31 Jul 2018 18:22:34 +0200
Subject: [PATCH 23/38] fixed indent

---
 germlineVC.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/germlineVC.nf b/germlineVC.nf
index dc10956f9d..4ffa66ae97 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -313,9 +313,9 @@ process RunHaplotypecaller {
       -R ${genomeFile} \
       -I ${bam} \
       -L ${intervalBed} \
-  		--dbsnp ${dbsnp} \
-  		-O ${intervalBed.baseName}_${idSample}.g.vcf 	\
-			--emit-ref-confidence GVCF
+      --dbsnp ${dbsnp} \
+      -O ${intervalBed.baseName}_${idSample}.g.vcf \
+      --emit-ref-confidence GVCF
   """
 }
 hcGenomicVCF = hcGenomicVCF.groupTuple(by:[0,1,2,3])

From 735463b1b0b73f6d5569951c9499249c8459e0e6 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Tue, 31 Jul 2018 18:42:59 +0200
Subject: [PATCH 24/38] gatk-launch rename

---
 germlineVC.nf | 6 +++---
 main.nf       | 6 +++---
 somaticVC.nf  | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/germlineVC.nf b/germlineVC.nf
index 4ffa66ae97..4dd71fb528 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -308,7 +308,7 @@ process RunHaplotypecaller {
 
   script:
   """
-  gatk-launch --java-options "-Xmx${task.memory.toGiga()}g -Xms6000m -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \
+  gatk --java-options "-Xmx${task.memory.toGiga()}g -Xms6000m -XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10" \
       HaplotypeCaller \
       -R ${genomeFile} \
       -I ${bam} \
@@ -343,9 +343,9 @@ process RunGenotypeGVCFs {
   script:
   // Using -L is important for speed and we have to index the interval files also
   """
-  gatk-launch IndexFeatureFile -F ${gvcf}
+  gatk IndexFeatureFile -F ${gvcf}
 
-  gatk-launch --java-options -Xmx${task.memory.toGiga()}g \
+  gatk --java-options -Xmx${task.memory.toGiga()}g \
   GenotypeGVCFs \
   -R ${genomeFile} \
   -L ${intervalBed} \
diff --git a/main.nf b/main.nf
index a452c26550..ac7b102b24 100644
--- a/main.nf
+++ b/main.nf
@@ -261,7 +261,7 @@ process MarkDuplicates {
 
   script:
   """
-  gatk-launch --java-options -Xmx${task.memory.toGiga()}g \
+  gatk --java-options -Xmx${task.memory.toGiga()}g \
   MarkDuplicates \
   --INPUT ${bam} \
   --METRICS_FILE ${bam}.metrics \
@@ -323,7 +323,7 @@ process CreateRecalibrationTable {
   script:
   known = knownIndels.collect{ "--known-sites ${it}" }.join(' ')
   """
-  gatk-launch --java-options -Xmx${task.memory.toGiga()}g \
+  gatk --java-options -Xmx${task.memory.toGiga()}g \
   BaseRecalibrator \
   --input ${bam} \
   --output ${idSample}.recal.table \
@@ -387,7 +387,7 @@ process RecalibrateBam {
 
   script:
   """
-  gatk-launch --java-options -Xmx${task.memory.toGiga()}g \
+  gatk --java-options -Xmx${task.memory.toGiga()}g \
   ApplyBQSR \
   -R ${genomeFile} \
   --input ${bam} \
diff --git a/somaticVC.nf b/somaticVC.nf
index d39ac23b5c..0e2347bb0e 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -306,7 +306,7 @@ process RunMutect2 {
 
   script:
   """
-	gatk-launch --java-options "-Xmx${task.memory.toGiga()}g" \
+	gatk --java-options "-Xmx${task.memory.toGiga()}g" \
 		Mutect2 \
 		-R ${genomeFile}\
 		-I ${bamTumor}  -tumor ${idSampleTumor} \

From a64dd3c87e74835e715fe7c8f2a4ce683666faf2 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Tue, 31 Jul 2018 18:44:42 +0200
Subject: [PATCH 25/38] MarkDuplicates got all the memory

---
 configuration/uppmax-localhost.config | 4 ++--
 configuration/uppmax-slurm.config     | 5 +----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/configuration/uppmax-localhost.config b/configuration/uppmax-localhost.config
index 58597c59b6..92466106ae 100644
--- a/configuration/uppmax-localhost.config
+++ b/configuration/uppmax-localhost.config
@@ -14,7 +14,7 @@ env {
 params {
   genome_base = params.genome == 'GRCh37' ? '/sw/data/uppnex/ToolBox/ReferenceAssemblies/hg38make/bundle/2.8/b37' : params.genome == 'GRCh38' ? '/sw/data/uppnex/ToolBox/hg38bundle' : 'References/smallGRCh37'
   singleCPUMem  = 8.GB
-  totalMemory = 104.GB // change to 240 on irma
+  totalMemory = 92.GB // change to 240 on irma
 }
 
 executor {
@@ -70,7 +70,7 @@ process {
     memory = {params.totalMemory}
   }
   $MarkDuplicates {
-    memory = {params.singleCPUMem * 2 * task.attempt}
+	// for deep sequencing we do need all the memory
   }
   $MergeBams {
     cpus = 16
diff --git a/configuration/uppmax-slurm.config b/configuration/uppmax-slurm.config
index d4de89d0fb..9858ca35cd 100644
--- a/configuration/uppmax-slurm.config
+++ b/configuration/uppmax-slurm.config
@@ -52,10 +52,7 @@ process {
     time = {params.runTime * task.attempt}
   }
   $MarkDuplicates {
-    cpus = 1
-    memory = {params.singleCPUMem * 8 * task.attempt}
-    queue = 'core'
-    time = {params.runTime * task.attempt}
+		// looks like overkill, but for deep sequencing we do need all the memory
   }
   $MergeBams {
     memory = {params.singleCPUMem * task.attempt}

From 5ab4491ad9a6048a389b086369e1dd34a1b028e2 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Wed, 1 Aug 2018 17:19:31 +0200
Subject: [PATCH 26/38] Latest versions

---
 containers/sarek/Dockerfile      | 5 +++--
 containers/sarek/environment.yml | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/containers/sarek/Dockerfile b/containers/sarek/Dockerfile
index 28db2cb0ee..340e699e71 100644
--- a/containers/sarek/Dockerfile
+++ b/containers/sarek/Dockerfile
@@ -1,9 +1,10 @@
 FROM nfcore/base:latest
 
 LABEL \
-  authors="Maxime.Gracia@scilifelab.se, Szilveszter.Juhos@scilifelab.se" \
+  authors="Maxime.Garcia@scilifelab.se, Szilveszter.Juhos@scilifelab.se" \
   description="Image with tools used in Sarek" \
-  maintainers="Maxime.Gracia@scilifelab.se, Szilveszter.Juhos@scilifelab.se"
+	maintainer="Maxime Garcia <maxime.garcia@scilifelab.se>, Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>"
 
 COPY environment.yml /
 RUN conda env update -n root -f /environment.yml && conda clean -a
+ENV PATH /opt/conda/bin:$PATH
diff --git a/containers/sarek/environment.yml b/containers/sarek/environment.yml
index 7187785ab9..25f08613f0 100644
--- a/containers/sarek/environment.yml
+++ b/containers/sarek/environment.yml
@@ -12,10 +12,10 @@ dependencies:
   - conda-forge::openjdk=8.0.144 # Needed for FastQC docker - see bioconda/bioconda-recipes#5026
   - fastqc=0.11.7
   - freebayes=1.2.0
-  - gatk4=4.0.3.0
-  - htslib=1.7
+  - gatk4=4.0.6.0
+  - htslib=1.9
   - igvtools=2.3.93
-  - manta=1.3.0
+  - manta=1.4.0
   - multiqc=1.5
   - qualimap=2.2.2a
   - samtools=1.8

From 537faa83656a291588bf9719901f04c88f96dc9b Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Wed, 1 Aug 2018 17:21:04 +0200
Subject: [PATCH 27/38] MarkDuplicates MAX_RECORDS_IN_RAM decreased to 50000

---
 main.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main.nf b/main.nf
index ac7b102b24..44f8384ccf 100644
--- a/main.nf
+++ b/main.nf
@@ -263,6 +263,7 @@ process MarkDuplicates {
   """
   gatk --java-options -Xmx${task.memory.toGiga()}g \
   MarkDuplicates \
+  --MAX_RECORDS_IN_RAM 50000 \
   --INPUT ${bam} \
   --METRICS_FILE ${bam}.metrics \
   --TMP_DIR . \

From 24b1ffb1bef4ef02117ca97dd20fa87140250597 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Wed, 1 Aug 2018 17:21:41 +0200
Subject: [PATCH 28/38] Picard removed from buildReferences.nf

---
 buildReferences.nf                    | 23 +++++++++++------------
 configuration/containers.config       |  3 +--
 configuration/singularity-path.config |  2 +-
 configuration/uppmax-localhost.config |  2 +-
 configuration/uppmax-slurm.config     |  2 +-
 5 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/buildReferences.nf b/buildReferences.nf
index caa47761f0..cfc7a567a2 100644
--- a/buildReferences.nf
+++ b/buildReferences.nf
@@ -29,7 +29,7 @@ kate: syntax groovy; space-indent on; indent-width 2;
  - ProcessReference - Download all references if needed
  - DecompressFile - Extract files if needed
  - BuildBWAindexes - Build indexes for BWA
- - BuildPicardIndex - Build index with Picard
+ - BuildReferenceIndex - Build index for FASTA refs
  - BuildSAMToolsIndex - Build index with SAMTools
  - BuildVCFIndex - Build index for VCF files
 ================================================================================
@@ -98,7 +98,7 @@ if (params.verbose) ch_decompressedFiles = ch_decompressedFiles.view {
 
 ch_fastaFile = Channel.create()
 ch_fastaForBWA = Channel.create()
-ch_fastaForPicard = Channel.create()
+ch_fastaReference = Channel.create()
 ch_fastaForSAMTools = Channel.create()
 ch_otherFile = Channel.create()
 ch_vcfFile = Channel.create()
@@ -108,7 +108,7 @@ ch_decompressedFiles
     it =~ ".fasta" ? 0 :
     it =~ ".vcf" ? 1 : 2}
 
-(ch_fastaForBWA, ch_fastaForPicard, ch_fastaForSAMTools, ch_fastaFileToKeep) = ch_fastaFile.into(4)
+(ch_fastaForBWA, ch_fastaReference, ch_fastaForSAMTools, ch_fastaFileToKeep) = ch_fastaFile.into(4)
 (ch_vcfFile, ch_vcfFileToKeep) = ch_vcfFile.into(2)
 
 ch_notCompressedfiles
@@ -137,29 +137,28 @@ if (params.verbose) bwaIndexes.flatten().view {
   "BWA index           : ${it.fileName}"
 }
 
-process BuildPicardIndex {
+process BuildReferenceIndex {
   tag {f_reference}
 
   publishDir params.outDir, mode: 'link'
 
   input:
-    file(f_reference) from ch_fastaForPicard
+    file(f_reference) from ch_fastaReference
 
   output:
-    file("*.dict") into ch_picardIndex
+    file("*.dict") into ch_referenceIndex
 
   script:
   """
-  java -Xmx${task.memory.toGiga()}g \
-  -jar \$PICARD_HOME/picard.jar \
+  gatk --java-options "-Xmx${task.memory.toGiga()}g" \
   CreateSequenceDictionary \
-  REFERENCE=${f_reference} \
-  OUTPUT=${f_reference.baseName}.dict
+  --REFERENCE ${f_reference} \
+  --OUTPUT ${f_reference.baseName}.dict
   """
 }
 
-if (params.verbose) ch_picardIndex.view {
-  "Picard index        : ${it.fileName}"
+if (params.verbose) ch_referenceIndex.view {
+  "Reference index        : ${it.fileName}"
 }
 
 process BuildSAMToolsIndex {
diff --git a/configuration/containers.config b/configuration/containers.config
index 195c240326..5639a291fa 100644
--- a/configuration/containers.config
+++ b/configuration/containers.config
@@ -9,7 +9,7 @@
 
 process {
   $BuildBWAindexes.container          = "${params.repository}/sarek:${params.tag}"
-  $BuildPicardIndex.container         = "${params.repository}/picard:${params.tag}"
+  $BuildReferenceIndex.container         = "${params.repository}/sarek:${params.tag}"
   $BuildSAMToolsIndex.container       = "${params.repository}/sarek:${params.tag}"
   $BuildVCFIndex.container            = "${params.repository}/sarek:${params.tag}"
   $CompressVCF.container              = "${params.repository}/sarek:${params.tag}"
@@ -25,7 +25,6 @@ process {
   $GetVersionFreeBayes.container      = "${params.repository}/sarek:${params.tag}"
   $GetVersionGATK.container           = "${params.repository}/sarek:${params.tag}"
   $GetVersionManta.container          = "${params.repository}/sarek:${params.tag}"
-  $GetVersionPicard.container         = "${params.repository}/picard:${params.tag}"
   $GetVersionSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"}
   $GetVersionStrelka.container        = "${params.repository}/sarek:${params.tag}"
   $GetVersionVCFtools.container       = "${params.repository}/sarek:${params.tag}"
diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config
index 5392f30717..9b51a46318 100644
--- a/configuration/singularity-path.config
+++ b/configuration/singularity-path.config
@@ -14,7 +14,7 @@ singularity {
 
 process {
   $BuildBWAindexes.container          = "${params.containerPath}/sarek-${params.tag}.img"
-  $BuildPicardIndex.container         = "${params.containerPath}/sarek-${params.tag}.img"
+  $BuildReferenceIndex.container      = "${params.containerPath}/sarek-${params.tag}.img"
   $BuildSAMToolsIndex.container       = "${params.containerPath}/sarek-${params.tag}.img"
   $BuildVCFIndex.container            = "${params.containerPath}/sarek-${params.tag}.img"
   $CompressVCF.container              = "${params.containerPath}/sarek-${params.tag}.img"
diff --git a/configuration/uppmax-localhost.config b/configuration/uppmax-localhost.config
index 92466106ae..4e6abb1f64 100644
--- a/configuration/uppmax-localhost.config
+++ b/configuration/uppmax-localhost.config
@@ -43,7 +43,7 @@ process {
   $BuildBWAindexes {
     memory = {params.totalMemory} // TODO This is likely too high
   }
-  $BuildPicardIndex {
+  $BuildReferenceIndex {
     memory = {params.totalMemory} // TODO This is likely too high
   }
   $BuildSAMToolsIndex {
diff --git a/configuration/uppmax-slurm.config b/configuration/uppmax-slurm.config
index 9858ca35cd..07f95a74be 100644
--- a/configuration/uppmax-slurm.config
+++ b/configuration/uppmax-slurm.config
@@ -27,7 +27,7 @@ process {
 
   $BuildBWAindexes {
   }
-  $BuildPicardIndex {
+  $BuildReferenceIndex {
   }
   $BuildSAMToolsIndex {
   }

From a5b3f7c08036d9f651d5130bc941cc28bc460763 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Wed, 1 Aug 2018 17:32:03 +0200
Subject: [PATCH 29/38] adding all the cpus and mem to MarkDuplicates

---
 configuration/uppmax-localhost.config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configuration/uppmax-localhost.config b/configuration/uppmax-localhost.config
index 4e6abb1f64..c83bab64ea 100644
--- a/configuration/uppmax-localhost.config
+++ b/configuration/uppmax-localhost.config
@@ -71,6 +71,8 @@ process {
   }
   $MarkDuplicates {
 	// for deep sequencing we do need all the memory
+    cpus = 16
+    memory = {params.totalMemory}
   }
   $MergeBams {
     cpus = 16

From 6ebed7ab113b8db041d14860ab62166a5e9881fd Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <szilveszter.juhos@scilifelab.se>
Date: Thu, 2 Aug 2018 15:29:34 +0200
Subject: [PATCH 30/38] update submodule Sarek-data a.k.a. #609

---
 Sarek-data | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Sarek-data b/Sarek-data
index d717855502..ba299d76c8 160000
--- a/Sarek-data
+++ b/Sarek-data
@@ -1 +1 @@
-Subproject commit d717855502f2e7501e124f2fe116733fc01ae114
+Subproject commit ba299d76c851dc916c051ef5f77d7a4ab39dcc9f

From 58bab1363b4d97e27fa8e7372803138698b334ae Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <szilveszter.juhos@scilifelab.se>
Date: Thu, 2 Aug 2018 15:30:34 +0200
Subject: [PATCH 31/38] s/gatk-launch/gatk/

---
 lib/QC.groovy | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/QC.groovy b/lib/QC.groovy
index e3c6416f76..2f6885d84b 100644
--- a/lib/QC.groovy
+++ b/lib/QC.groovy
@@ -59,7 +59,7 @@ class QC {
 // Get GATK version
   static def getVersionGATK() {
     """
-		gatk-launch ApplyBQSR --help 2>&1| awk -F/ '/java/{for(i=1;i<=NF;i++){if(\$i~/gatk4/){sub("gatk4-","",\$i);print \$i>"v_gatk.txt"}}}'
+		gatk ApplyBQSR --help 2>&1| awk -F/ '/java/{for(i=1;i<=NF;i++){if(\$i~/gatk4/){sub("gatk4-","",\$i);print \$i>"v_gatk.txt"}}}'
     """
   }
 

From 12826fddae3b96a1251f262f0aebf569c8a5032e Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <szilveszter.juhos@scilifelab.se>
Date: Thu, 2 Aug 2018 15:30:56 +0200
Subject: [PATCH 32/38] removing realign parts

---
 main.nf | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/main.nf b/main.nf
index 44f8384ccf..85f22d8a8e 100644
--- a/main.nf
+++ b/main.nf
@@ -30,7 +30,6 @@ kate: syntax groovy; space-indent on; indent-width 2;
  - MapReads - Map reads with BWA
  - MergeBams - Merge BAMs if multilane samples
  - MarkDuplicates - Mark Duplicates with GATK4
- - RealignerTargetCreator - Create realignment target intervals
  - IndelRealigner - Realign BAMs as T/N pair
  - CreateRecalibrationTable - Create Recalibration Table with BaseRecalibrator
  - RecalibrateBam - Recalibrate Bam with PrintReads
@@ -96,7 +95,6 @@ if (tsvPath) {
   tsvFile = file(tsvPath)
   switch (step) {
     case 'mapping': fastqFiles = extractFastq(tsvFile); break
-    case 'realign': bamFiles = SarekUtils.extractBams(tsvFile, "somatic"); break
     case 'recalibrate': bamFiles = extractRecal(tsvFile); break
     default: exit 1, "Unknown step ${step}"
   }
@@ -317,9 +315,9 @@ process CreateRecalibrationTable {
 
   output:
     set idPatient, status, idSample, file("${idSample}.recal.table") into recalibrationTable
-    set idPatient, status, idSample, val("${idSample}_${status}.md.real.bam"), val("${idSample}_${status}.md.real.bai"), val("${idSample}.recal.table") into recalibrationTableTSV
+    set idPatient, status, idSample, val("${idSample}_${status}.md.bam"), val("${idSample}_${status}.md.bai"), val("${idSample}.recal.table") into recalibrationTableTSV
 
-  when: ( step == 'mapping' || step == 'realign' ) && !params.onlyQC
+  when: ( step == 'mapping' ) && !params.onlyQC
 
   script:
   known = knownIndels.collect{ "--known-sites ${it}" }.join(' ')
@@ -542,7 +540,6 @@ def defineReferenceMap() {
 def defineStepList() {
   return [
     'mapping',
-    'realign',
     'recalibrate'
   ]
 }
@@ -675,7 +672,6 @@ def helpMessage() {
   log.info "       Option to start workflow"
   log.info "       Possible values are:"
   log.info "         mapping (default, will start workflow with FASTQ files)"
-  log.info "         realign (will start workflow with non-realigned BAM files)"
   log.info "         recalibrate (will start workflow with non-recalibrated BAM files)"
   log.info "    --noReports"
   log.info "       Disable QC tools and MultiQC to generate a HTML report"

From b21dc688a1bb7e8c25c65aa9835a0c43a72a7c24 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <szilveszter.juhos@scilifelab.se>
Date: Thu, 2 Aug 2018 15:31:56 +0200
Subject: [PATCH 33/38] removing realign parts

---
 scripts/test.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/test.sh b/scripts/test.sh
index c57abbcabd..6fda73b9a5 100755
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -91,7 +91,6 @@ fi
 if [[ ALL,STEP =~ $TEST ]]
 then
   run_wrapper --germline --sampleDir Sarek-data/testdata/tiny/normal
-  run_wrapper --germline --step realign --noReports
   run_wrapper --germline --step recalibrate --noReports
   clean_repo
 fi
@@ -104,7 +103,7 @@ fi
 
 if [[ ALL,TOOLS =~ $TEST ]]
 then
-  run_wrapper --somatic --sample $SAMPLE --variantCalling  --tools FreeBayes,HaplotypeCaller,MuTect2
+  run_wrapper --somatic --sample $SAMPLE --variantCalling  --tools FreeBayes,HaplotypeCaller,Mutect2
 fi
 
 if [[ ALL,MANTA =~ $TEST ]]

From db137ce23d851439f5d7b7b353cac9ca9298a8f5 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <szilveszter.juhos@scilifelab.se>
Date: Thu, 2 Aug 2018 15:38:09 +0200
Subject: [PATCH 34/38] small review changes

---
 annotate.nf                           | 2 +-
 configuration/uppmax-localhost.config | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/annotate.nf b/annotate.nf
index 8d45ae3fff..a87f252fbc 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -73,7 +73,7 @@ vcfToAnnotate = Channel.create()
 vcfNotToAnnotate = Channel.create()
 
 if (annotateVCF == []) {
-// by default we annotate both germline and somatic results that we can find in the VariantCalling directory
+// we annote all available vcfs by default that we can find in the VariantCalling directory
   Channel.empty().mix(
     Channel.fromPath("${directoryMap.haplotypecaller}/*.vcf.gz")
       .flatten().map{vcf -> ['haplotypecaller', vcf]},
diff --git a/configuration/uppmax-localhost.config b/configuration/uppmax-localhost.config
index 4e6abb1f64..d10f5a57e1 100644
--- a/configuration/uppmax-localhost.config
+++ b/configuration/uppmax-localhost.config
@@ -14,7 +14,7 @@ env {
 params {
   genome_base = params.genome == 'GRCh37' ? '/sw/data/uppnex/ToolBox/ReferenceAssemblies/hg38make/bundle/2.8/b37' : params.genome == 'GRCh38' ? '/sw/data/uppnex/ToolBox/hg38bundle' : 'References/smallGRCh37'
   singleCPUMem  = 8.GB
-  totalMemory = 92.GB // change to 240 on irma
+  totalMemory = 104.GB // change to 240 on irma
 }
 
 executor {

From 97d3fc5c991b9ce0b5ce9ce0d79425cc2800618a Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Sat, 4 Aug 2018 02:27:41 +0200
Subject: [PATCH 35/38] adding singularty environments for tests

---
 scripts/test.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/scripts/test.sh b/scripts/test.sh
index 6fda73b9a5..bcd017d1f1 100755
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -9,6 +9,18 @@ SAMPLE=Sarek-data/testdata/tsv/tiny.tsv
 TEST=ALL
 TRAVIS=${TRAVIS:-false}
 
+TMPDIR=`pwd`/tmp
+mkdir -p $TMPDIR
+export NXF_SINGULARITY_CACHEDIR=$TMPDIR
+export NXF_TEMP=$TMPDIR
+
+export SINGULARITY_TMPDIR=$TMPDIR
+export SINGULARITY_CACHEDIR=$TMPDIR
+
+
+# remove Reference directory
+rm -rf References
+
 while [[ $# -gt 0 ]]
 do
   key=$1

From 98cf879e1375d10ca75f72daa6cc2e4af203274d Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Sat, 4 Aug 2018 02:28:13 +0200
Subject: [PATCH 36/38] VEP fancy parametrisation

---
 annotate.nf | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/annotate.nf b/annotate.nf
index a87f252fbc..d3571a15cd 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -228,11 +228,12 @@ process RunVEP {
   finalannotator = annotator == "snpeff" ? 'merge' : 'vep'
   genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome
   """
-  vep \
+  vep --dir /opt/vep/.vep/ \
   -i ${vcf} \
   -o ${vcf.simpleName}_VEP.ann.vcf \
   --assembly ${genome} \
   --cache \
+	--cache_version 91 \
   --database \
   --everything \
   --filter_common \

From 393da8b72d4e58f439008696d628222e9f729e7e Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Tue, 14 Aug 2018 20:27:06 +0200
Subject: [PATCH 37/38] memory adjustments for samtools and MarkDuplicates

---
 configuration/uppmax-localhost.config | 4 ++--
 configuration/uppmax-slurm.config     | 4 +++-
 main.nf                               | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/configuration/uppmax-localhost.config b/configuration/uppmax-localhost.config
index bb234a8762..f5907f6181 100644
--- a/configuration/uppmax-localhost.config
+++ b/configuration/uppmax-localhost.config
@@ -70,9 +70,9 @@ process {
     memory = {params.totalMemory}
   }
   $MarkDuplicates {
-	// for deep sequencing we do need all the memory
+		// Actually the -Xmx value should be kept lower  
     cpus = 16
-    memory = {params.totalMemory}
+    memory = {2 * params.singleCPUMem}
   }
   $MergeBams {
     cpus = 16
diff --git a/configuration/uppmax-slurm.config b/configuration/uppmax-slurm.config
index 07f95a74be..821a3855d8 100644
--- a/configuration/uppmax-slurm.config
+++ b/configuration/uppmax-slurm.config
@@ -52,7 +52,9 @@ process {
     time = {params.runTime * task.attempt}
   }
   $MarkDuplicates {
-		// looks like overkill, but for deep sequencing we do need all the memory
+		// Actually the -Xmx value should be kept lower  
+    cpus = 16
+    memory = {2 * params.singleCPUMem}
   }
   $MergeBams {
     memory = {params.singleCPUMem * task.attempt}
diff --git a/main.nf b/main.nf
index 85f22d8a8e..ecd26d3461 100644
--- a/main.nf
+++ b/main.nf
@@ -179,7 +179,7 @@ process MapReads {
   """
   bwa mem -R \"${readGroup}\" ${extra} -t ${task.cpus} -M \
   ${genomeFile} ${fastqFile1} ${fastqFile2} | \
-  samtools sort --threads ${task.cpus} -m 4G - > ${idRun}.bam
+  samtools sort --threads ${task.cpus} -m 2G - > ${idRun}.bam
   """
 }
 

From 9d20c68e5298d380485586b40f28deb65d7d7bf9 Mon Sep 17 00:00:00 2001
From: Szilveszter Juhos <Szilveszter.Juhos@scilifelab.se>
Date: Wed, 15 Aug 2018 00:26:37 +0200
Subject: [PATCH 38/38] s/sarek-core/sarek/ also deleting build.sh

---
 containers/sarek/build.sh        | 103 -------------------------------
 containers/sarek/environment.yml |   2 +-
 2 files changed, 1 insertion(+), 104 deletions(-)
 delete mode 100755 containers/sarek/build.sh

diff --git a/containers/sarek/build.sh b/containers/sarek/build.sh
deleted file mode 100755
index fbe57b5aee..0000000000
--- a/containers/sarek/build.sh
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/bin/sh
-set -eu pipefail
-
-# Local ENV variables
-BCFTOOLS_VERSION=1.5
-BWA_VERSION=0.7.16
-HTSLIB_VERSION=1.5
-MANTA_VERSION=1.1.1
-SAMTOOLS_VERSION=1.5
-STRELKA_VERSION=2.8.2
-
-# Install libraries
-apt-get update && apt-get install -y --no-install-recommends \
-  build-essential \
-  bzip2 \
-  ca-certificates \
-  g++ \
-  gcc \
-  git \
-  libbz2-dev \
-  liblzma-dev \
-  libncurses5-dev \
-  libncursesw5-dev \
-  make \
-  python \
-  python3 \
-  unzip \
-  wget \
-  zlib1g-dev
-
-# Install tools
-mkdir /build
-
-# Install BWA
-cd /build
-git clone http://github.com/lh3/bwa.git bwa \
-  --branch v${BWA_VERSION}
-cd bwa
-make
-cp bwa /usr/local/bin/bwa
-
-# Install HTSlib
-cd /build
-wget --quiet -O htslib-${HTSLIB_VERSION}.tar.bz2 \
-  https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2
-tar xfj htslib-${HTSLIB_VERSION}.tar.bz2
-rm htslib-${HTSLIB_VERSION}.tar.bz2
-cd htslib-${HTSLIB_VERSION}
-./configure --prefix=/opt/htslib
-make && make install
-
-# Install BCFtools
-cd /build
-wget --quiet -O bcftools-${BCFTOOLS_VERSION}.tar.bz2 \
-  https://github.com/samtools/bcftools/releases/download/${BCFTOOLS_VERSION}/bcftools-${BCFTOOLS_VERSION}.tar.bz2
-tar xfj bcftools-${BCFTOOLS_VERSION}.tar.bz2
-rm bcftools-${BCFTOOLS_VERSION}.tar.bz2
-cd bcftools-${BCFTOOLS_VERSION}
-./configure --prefix=/opt/bcftools
-make && make install
-
-# Install Samtools
-cd /build
-wget --quiet -O samtools-${SAMTOOLS_VERSION}.tar.bz2 \
-  https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2
-tar xfj samtools-${SAMTOOLS_VERSION}.tar.bz2
-rm samtools-${SAMTOOLS_VERSION}.tar.bz2
-cd samtools-${SAMTOOLS_VERSION}
-./configure --prefix=/opt/samtools
-make && make install
-
-# Install Manta
-cd /build
-wget --quiet -O manta-${MANTA_VERSION}.centos5_x86_64.tar.bz2 \
-  https://github.com/Illumina/manta/releases/download/v${MANTA_VERSION}/manta-${MANTA_VERSION}.centos5_x86_64.tar.bz2
-tar xvjf manta-${MANTA_VERSION}.centos5_x86_64.tar.bz2
-mv manta-${MANTA_VERSION}.centos5_x86_64 $MANTA_INSTALL_PATH
-rm manta-${MANTA_VERSION}.centos5_x86_64.tar.bz2
-
-# Install Strelka
-cd /build
-wget --quiet -O strelka-${STRELKA_VERSION}.centos5_x86_64.tar.bz2 \
-  https://github.com/Illumina/strelka/releases/download/v${STRELKA_VERSION}/strelka-${STRELKA_VERSION}.centos5_x86_64.tar.bz2
-tar xvjf strelka-${STRELKA_VERSION}.centos5_x86_64.tar.bz2
-mv strelka-${STRELKA_VERSION}.centos5_x86_64 $STRELKA_INSTALL_PATH
-rm strelka-${STRELKA_VERSION}.centos5_x86_64.tar.bz2
-
-# Clean up install
-cd /
-apt-get remove -y \
-  build-essential \
-  ca-certificates \
-  gcc \
-  git \
-  libbz2-dev \
-  liblzma-dev \
-  libncurses5-dev \
-  libncursesw5-dev \
-  unzip \
-  wget \
-  zlib1g-dev
-apt-get clean
-rm -rf /build /var/lib/apt/lists/* /opt/get-pip.py
diff --git a/containers/sarek/environment.yml b/containers/sarek/environment.yml
index 25f08613f0..af3ea825f4 100644
--- a/containers/sarek/environment.yml
+++ b/containers/sarek/environment.yml
@@ -1,6 +1,6 @@
 # You can use this file to create a conda environment for this pipeline:
 # conda env create -f environment.yml
-name: sarek-core
+name: sarek
 channels:
   - bioconda
   - conda-forge