From 334b873d14a75b743688ff364a54e381856ee475 Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Fri, 23 Nov 2018 12:07:12 +0100 Subject: [PATCH 1/3] skip NC_001422.1 chromosome in gnomAD check since it doesnot exist --- conf/exoom.cfg | 57 ++++++ parameters_exoom.txt | 57 ++++++ protocols/AnnotateVcf.sh | 4 +- protocols/CreateInhouseProjects.sh.save | 252 ++++++++++++++++++++++++ templates/generate_template_new.sh | 109 ++++++++++ 5 files changed, 477 insertions(+), 2 deletions(-) create mode 100644 conf/exoom.cfg create mode 100644 parameters_exoom.txt create mode 100644 protocols/CreateInhouseProjects.sh.save create mode 100755 templates/generate_template_new.sh diff --git a/conf/exoom.cfg b/conf/exoom.cfg new file mode 100644 index 00000000..21a3c8e9 --- /dev/null +++ b/conf/exoom.cfg @@ -0,0 +1,57 @@ +protocols/AnnotateVcf.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=4 +protocols/BaseRecalibrator.sh #MOLGENIS walltime=23:59:00 mem=10gb ppn=8 +protocols/BwaAlignAndSortSam.sh #MOLGENIS walltime=23:59:00 nodes=1 ppn=4 mem=13gb +protocols/CartegeniaFiltering.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/CartegeniaTree.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/CheckOutput.sh #MOLGENIS walltime=00:30:00 mem=1gb +protocols/CmdLineAnnotator.sh #MOLGENIS walltime=05:59:00 mem=12gb ppn=2 +protocols/CollectBamIndexMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 +protocols/CollectGCBiasMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 +protocols/CollectHSMetrics.sh #MOLGENIS walltime=05:59:00 mem=5gb ppn=2 +protocols/CollectMultipleMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 +protocols/CollectWgsMetrics.sh #MOLGENIS walltime=11:59:00 mem=6gb ppn=3 +protocols/CompressingFinalVcf.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 +protocols/ConcordanceCheck.sh #MOLGENIS walltime=05:59:59 mem=10gb ppn=1 +protocols/Convading.sh #MOLGENIS walltime=05:59:00 mem=4gb ppn=1 +protocols/CopyGvcfToPrm.sh #MOLGENIS walltime=02:00:00 mem=4gb queue=duo-ds +protocols/CopyPrmTmpData.sh #MOLGENIS walltime=02:00:00 mem=4gb +protocols/CopyResultsGavinStandAlone.sh #MOLGENIS walltime=01:59:00 mem=1gb +protocols/CopyToResultsDir.sh #MOLGENIS walltime=05:59:00 nodes=1 cores=1 mem=4gb +protocols/CountAllFinishedFiles.sh #MOLGENIS walltime=00:05:00 mem=1gb +protocols/CoverageCalculations.sh #MOLGENIS walltime=05:59:00 mem=12gb nodes=1 ppn=1 +protocols/CramConversion.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=9 +protocols/CreateExternSamplesProjects.sh #MOLGENIS walltime=02:00:00 mem=4gb +protocols/CreateInhouseProjects.sh #MOLGENIS walltime=02:00:00 mem=4gb +protocols/DecisionTree.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/DetermineTrio.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/FastQC.sh #MOLGENIS ppn=1 mem=2gb walltime=05:59:00 +protocols/FlagstatMetrics.sh #MOLGENIS walltime=03:00:00 mem=30gb ppn=5 +protocols/Gavin.sh #MOLGENIS walltime=05:59:00 mem=6gb +protocols/GenderCalculate.sh #MOLGENIS ppn=4 mem=6gb walltime=03:00:00 +protocols/GenderCheck.sh #MOLGENIS ppn=4 mem=6gb walltime=00:30:00 +protocols/IndelFiltration.sh #MOLGENIS walltime=05:59:00 mem=10gb +protocols/InSilicoConcordance.sh #MOLGENIS ppn=1 mem=5gb walltime=00:20:00 +protocols/MakeDedupBamMd5.sh #MOLGENIS walltime=01:00:00 mem=4gb +protocols/MantaAnnotation.sh #MOLGENIS walltime=23:59:00 mem=6gb ppn=2 +protocols/Manta.sh #MOLGENIS walltime=16:00:00 mem=30gb ppn=8 +protocols/MarkDuplicates.sh #MOLGENIS walltime=16:00:00 mem=30gb ppn=5 +protocols/MergeBam.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/MergeBatches.sh #MOLGENIS walltime=05:59:00 mem=13gb ppn=2 +protocols/MergeIndelsAndSnps.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 +protocols/MultiQC.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/PrepareFastQ.sh #MOLGENIS ppn=4 mem=8gb walltime=07:00:00 +protocols/PrepareVcf.sh #MOLGENIS ppn=4 mem=8gb walltime=07:00:00 +protocols/QCReport.sh #MOLGENIS walltime=00:20:00 mem=4gb ppn=1 +protocols/QCStats.sh #MOLGENIS ppn=1 mem=8gb walltime=01:00:00 +protocols/SnpEff.sh #MOLGENIS walltime=23:59:00 mem=5gb ppn=2 +protocols/SnpFiltration.sh #MOLGENIS walltime=05:59:00 mem=10gb +protocols/SplitIndelsAndSNPs.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=2 +protocols/StartPipeline.sh #MOLGENIS walltime=02:00:00 mem=4gb +protocols/Template.sh #MOLGENIS walltime=23:59:00 mem=5gb ppn=10 +protocols/VariantCalling.sh #MOLGENIS walltime=23:59:00 mem=13gb ppn=1 +protocols/VariantCombine.sh #MOLGENIS walltime=23:59:00 mem=32gb ppn=4 +protocols/VariantConcordanceGenotyping.sh #MOLGENIS walltime=23:59:00 mem=17gb ppn=2 +protocols/VariantGenotyping.sh #MOLGENIS walltime=23:59:00 mem=17gb ppn=2 +protocols/VcfToTable.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 +protocols/VEP.sh #MOLGENIS walltime=23:59:00 mem=6gb ppn=8 +protocols/XHMM.sh #MOLGENIS walltime=05:59:00 mem=4gb ppn=1 diff --git a/parameters_exoom.txt b/parameters_exoom.txt new file mode 100644 index 00000000..21a3c8e9 --- /dev/null +++ b/parameters_exoom.txt @@ -0,0 +1,57 @@ +protocols/AnnotateVcf.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=4 +protocols/BaseRecalibrator.sh #MOLGENIS walltime=23:59:00 mem=10gb ppn=8 +protocols/BwaAlignAndSortSam.sh #MOLGENIS walltime=23:59:00 nodes=1 ppn=4 mem=13gb +protocols/CartegeniaFiltering.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/CartegeniaTree.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/CheckOutput.sh #MOLGENIS walltime=00:30:00 mem=1gb +protocols/CmdLineAnnotator.sh #MOLGENIS walltime=05:59:00 mem=12gb ppn=2 +protocols/CollectBamIndexMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 +protocols/CollectGCBiasMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 +protocols/CollectHSMetrics.sh #MOLGENIS walltime=05:59:00 mem=5gb ppn=2 +protocols/CollectMultipleMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 +protocols/CollectWgsMetrics.sh #MOLGENIS walltime=11:59:00 mem=6gb ppn=3 +protocols/CompressingFinalVcf.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 +protocols/ConcordanceCheck.sh #MOLGENIS walltime=05:59:59 mem=10gb ppn=1 +protocols/Convading.sh #MOLGENIS walltime=05:59:00 mem=4gb ppn=1 +protocols/CopyGvcfToPrm.sh #MOLGENIS walltime=02:00:00 mem=4gb queue=duo-ds +protocols/CopyPrmTmpData.sh #MOLGENIS walltime=02:00:00 mem=4gb +protocols/CopyResultsGavinStandAlone.sh #MOLGENIS walltime=01:59:00 mem=1gb +protocols/CopyToResultsDir.sh #MOLGENIS walltime=05:59:00 nodes=1 cores=1 mem=4gb +protocols/CountAllFinishedFiles.sh #MOLGENIS walltime=00:05:00 mem=1gb +protocols/CoverageCalculations.sh #MOLGENIS walltime=05:59:00 mem=12gb nodes=1 ppn=1 +protocols/CramConversion.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=9 +protocols/CreateExternSamplesProjects.sh #MOLGENIS walltime=02:00:00 mem=4gb +protocols/CreateInhouseProjects.sh #MOLGENIS walltime=02:00:00 mem=4gb +protocols/DecisionTree.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/DetermineTrio.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/FastQC.sh #MOLGENIS ppn=1 mem=2gb walltime=05:59:00 +protocols/FlagstatMetrics.sh #MOLGENIS walltime=03:00:00 mem=30gb ppn=5 +protocols/Gavin.sh #MOLGENIS walltime=05:59:00 mem=6gb +protocols/GenderCalculate.sh #MOLGENIS ppn=4 mem=6gb walltime=03:00:00 +protocols/GenderCheck.sh #MOLGENIS ppn=4 mem=6gb walltime=00:30:00 +protocols/IndelFiltration.sh #MOLGENIS walltime=05:59:00 mem=10gb +protocols/InSilicoConcordance.sh #MOLGENIS ppn=1 mem=5gb walltime=00:20:00 +protocols/MakeDedupBamMd5.sh #MOLGENIS walltime=01:00:00 mem=4gb +protocols/MantaAnnotation.sh #MOLGENIS walltime=23:59:00 mem=6gb ppn=2 +protocols/Manta.sh #MOLGENIS walltime=16:00:00 mem=30gb ppn=8 +protocols/MarkDuplicates.sh #MOLGENIS walltime=16:00:00 mem=30gb ppn=5 +protocols/MergeBam.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/MergeBatches.sh #MOLGENIS walltime=05:59:00 mem=13gb ppn=2 +protocols/MergeIndelsAndSnps.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 +protocols/MultiQC.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 +protocols/PrepareFastQ.sh #MOLGENIS ppn=4 mem=8gb walltime=07:00:00 +protocols/PrepareVcf.sh #MOLGENIS ppn=4 mem=8gb walltime=07:00:00 +protocols/QCReport.sh #MOLGENIS walltime=00:20:00 mem=4gb ppn=1 +protocols/QCStats.sh #MOLGENIS ppn=1 mem=8gb walltime=01:00:00 +protocols/SnpEff.sh #MOLGENIS walltime=23:59:00 mem=5gb ppn=2 +protocols/SnpFiltration.sh #MOLGENIS walltime=05:59:00 mem=10gb +protocols/SplitIndelsAndSNPs.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=2 +protocols/StartPipeline.sh #MOLGENIS walltime=02:00:00 mem=4gb +protocols/Template.sh #MOLGENIS walltime=23:59:00 mem=5gb ppn=10 +protocols/VariantCalling.sh #MOLGENIS walltime=23:59:00 mem=13gb ppn=1 +protocols/VariantCombine.sh #MOLGENIS walltime=23:59:00 mem=32gb ppn=4 +protocols/VariantConcordanceGenotyping.sh #MOLGENIS walltime=23:59:00 mem=17gb ppn=2 +protocols/VariantGenotyping.sh #MOLGENIS walltime=23:59:00 mem=17gb ppn=2 +protocols/VcfToTable.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 +protocols/VEP.sh #MOLGENIS walltime=23:59:00 mem=6gb ppn=8 +protocols/XHMM.sh #MOLGENIS walltime=05:59:00 mem=4gb ppn=1 diff --git a/protocols/AnnotateVcf.sh b/protocols/AnnotateVcf.sh index c3f56c81..887a5cee 100644 --- a/protocols/AnnotateVcf.sh +++ b/protocols/AnnotateVcf.sh @@ -75,9 +75,9 @@ then then echo -e "\n[[annotation]]\nfile=\"${gnomADGenomesAnnotation}/gnomad.genomes.r2.0.2.sites.chrX.normalized.vcf.gz\"\nfields=[\"AF_POPMAX\",\"segdup\"]\nnames=[\"gnomAD_genome_AF_MAX\",\"gnomAD_genome_RF_Filter\"]\nops=[\"self\",\"self\"]" >> "${vcfAnnoGnomadGenomesConf}" echo -e "\n[[annotation]]\nfile=\"${gonlAnnotation}/gonl.chrX.release4.gtc.vcf.gz\"\nfields=[\"AC\",\"AN\"]\nnames=[\"GoNL_AC\",\"GoNL_AN\"]\nops=[\"self\",\"first\"]" >> "${vcfAnnoGnomadGenomesConf}" - elif [[ ${batchID} == *"Y"* || ${batchID} == *"MT"* ]] + elif [[ ${batchID} == *"Y"* || ${batchID} == *"NC_001422.1"* || ${batchID} == *"MT"* ]] then - echo "chromosome Y/MT is not in gnomAD, do nothing" + echo "chromosome Y/MT/NC_001422.1 is not in gnomAD, do nothing" else echo -e "\n[[annotation]]\nfile=\"${gnomADGenomesAnnotation}/gnomad.genomes.r2.0.2.sites.chr${batchID}.normalized.vcf.gz\"\nfields=[\"AF_POPMAX\",\"segdup\"]\nnames=[\"gnomAD_genome_AF_MAX\",\"gnomAD_genome_RF_Filter\"]\nops=[\"self\",\"self\"]" >> "${vcfAnnoGnomadGenomesConf}" echo -e "\n[[annotation]]\nfile=\"${gonlAnnotation}/gonl.chrCombined.snps_indels.r5.vcf.gz\"\nfields=[\"AC\",\"AN\"]\nnames=[\"GoNL_AC\",\"GoNL_AN\"]\nops=[\"self\",\"first\"]" >> "${vcfAnnoGnomadGenomesConf}" diff --git a/protocols/CreateInhouseProjects.sh.save b/protocols/CreateInhouseProjects.sh.save new file mode 100644 index 00000000..cec5a27d --- /dev/null +++ b/protocols/CreateInhouseProjects.sh.save @@ -0,0 +1,252 @@ +#MOLGENIS walltime=02:00:00 mem=4gb + +#string tmpName +#list seqType +#string project +#string projectRawArrayTmpDataDir +#string projectRawTmpDataDir +#string projectJobsDir +#string projectLogsDir +#string intermediateDir +#string projectResultsDir +#string projectQcDir +#string computeVersion +#string group_parameters +#string groupname + +#list sequencingStartDate +#list sequencer +#list run +#list flowcell +#list barcode +#list lane +#list externalSampleID + +#string mainParameters +#string worksheet +#string outputdir +#string workflowpath +#string tmpdir_parameters +#string environment_parameters +#string ngsversion +#string ngsUtilsVersion + +#string dataDir + +#string coveragePerBaseDir +#string coveragePerTargetDir + +#string project +#string logsDir + +umask 0007 +module load "${ngsUtilsVersion}" +module load "${ngsversion}" + +array_contains () { + local array="$1[@]" + local seeking="${2}" + local in=1 + rejected="false" + for element in "${!array-}"; do + if [[ "${element}" == "${seeking}" ]]; then + in=0 + rejected="true" + continue + fi + done +} + +# +# Create project dirs. +# +mkdir -p "${projectRawArrayTmpDataDir}" +mkdir -p "${projectRawTmpDataDir}" +mkdir -p "${projectJobsDir}" +mkdir -p "${projectLogsDir}" +mkdir -p "${intermediateDir}" +mkdir -p "${projectResultsDir}/alignment/" +mkdir -p "${projectResultsDir}/qc/statistics/" +mkdir -p "${projectResultsDir}/variants/cnv/" +mkdir -p "${projectResultsDir}/variants/gVCF/" +mkdir -p "${projectResultsDir}/variants/GAVIN/" +mkdir -p "${projectQcDir}" +mkdir -p "${intermediateDir}/GeneNetwork/" +mkdir -p -m 2770 "${logsDir}/${project}/" +# +# Create symlinks to the raw data required to analyse this project. +# Do this for each sequence file and it's accompanying MD5 checksum. +# (There may be multiple sequence files per sample) +# +rocketPoint=$(pwd) +cd "${projectRawTmpDataDir}" +max_index=${#externalSampleID[@]}-1 + +for ((samplenumber = 0; samplenumber <= max_index; samplenumber++)) +do + if [[ "${seqType[samplenumber]}" == 'SR' ]] + then + if [[ "${barcode[samplenumber]}" == 'None' ]] + then + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}.fq.gz" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz" + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}.fq.gz.md5" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz.md5" + else + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz" + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz.md5" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz.md5" + fi + elif [[ "${seqType[samplenumber]}" == 'PE' ]] + then + if [[ "${barcode[samplenumber]}" == 'None' ]] + then + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_1.fq.gz" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz" + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_2.fq.gz" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz" + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_1.fq.gz.md5" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz.md5" + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_2.fq.gz.md5" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz.md5" + else + array_contains arrayRejected "${barcode[samplenumber]}" + if [ "${rejected}" == "false" ] + then + + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz" + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz" + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz.md5" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz.md5" + ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz.md5" \ + "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz.md5" + else + echo -e "\n############ barcode: ${barcode[samplenumber]} IS REJECTED#######################\n" + fi + fi + fi +done + +# +# Create subset of samples for this project. +# +extract_samples_from_GAF_list.pl --i "${worksheet}" --o "${projectJobsDir}/${project}.csv" --c project --q "${project}" +sampleSheetCsv="${projectJobsDir}/${project}.csv" +perl -pi -e 's/\r(?!\n)//g' "${sampleSheetCsv}" +barcodesGrepCommand="" + +# +# Execute MOLGENIS/compute to create job scripts to analyse this project. +# + + +cd "${rocketPoint}" +rm -f "${projectJobsDir}/${project}.filteredRejected.csv" +rm -f "${intermediateDir}/${project}.filteredBarcodes.csv" + +if [ -f "rejectedBarcodes.txt" ] +then + size=$(cat "rejectedBarcodes.txt" | wc -l) + teller=1 + + while read line + do + if [[ "${teller}" -lt "${size}" ]] + then + barcodesGrepCommand+="${line}|" + elif [ "${teller}" == ${size} ] + then + echo "last line" + barcodesGrepCommand+="${line}" + fi + teller=$((teller+1)) + done "${projectJobsDir}/${project}.filteredRejected.csv" + grep -E "${barcodesGrepCommand}" "${sampleSheetCsv}" > "${intermediateDir}/${project}.filteredBarcodes.csv" + cp "${sampleSheetCsv}" "${projectJobsDir}/${project}.original.csv" + samplesheetCsv="${projectJobsDir}/${project}.filteredRejected.csv" +fi +if [[ -f .compute.properties ]] +then + rm .compute.properties +fi + +batching="_small" + +capturingKitProject=$(python "${EBROOTNGS_DNA}/scripts/getCapturingKit.py" "${sampleSheetCsv}" | sed 's|\\||') +captKit=$(echo "${capturingKitProject}" | awk 'BEGIN {FS="/"}{print $2}') + +if [ ! -d "${dataDir}/${capturingKitProject}" ] +then + echo "Bedfile does not exist! Exiting" + echo "ls ${dataDir}/${capturingKitProject}" + exit 1 +fi + +if [[ "${capturingKitProject,,}" == *"exoom"* || "${capturingKitProject,,}" == *"exome"* || "${capturingKitProject,,}" == *"all_exon_v1"* || "${capturingKitProject,,}" == *"wgs"* ]] +then + batching="_chr" + if [ ! -e "${coveragePerTargetDir}/${captKit}/${captKit}" ] + then + echo "Bedfile in ${coveragePerTargetDir} does not exist! Exiting" + echo "ls ${coveragePerTargetDir}/${captKit}/${captKit}" + exit 1 + fi +else + if [ ! -e "${coveragePerBaseDir}/${captKit}/${captKit}" ] + then + echo "Bedfile in ${coveragePerBaseDir} does not exist! Exiting" + echo "ls ${coveragePerTargetDir}/${captKit}/${captKit}" + exit 1 + fi +fi + +if [ "${captKit}" == *"ONCO"* ] +then + if [ ! -f ${dataDir}/${capturingKitProject}/human_g1k_v37/GSA_SNPS.bed + then + echo "cannot do concordance check later on since ${dataDir}/${capturingKitProject}/human_g1k_v37/GSA_SNPS.bed is missing! EXIT!" + exit 1 + fi +fi + +if [[ ]] + +cd ../tmp/ +head -1 protocols/*.sh > parameters_exoom.txt + +echo "BATCHIDLIST=${EBROOTNGS_DNA}/batchIDList${batching}.csv" + +sh "${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh" \ +-p "${mainParameters}" \ +-p "${EBROOTNGS_DNA}/batchIDList${batching}.csv" \ +-p "${sampleSheetCsv}" \ +-p "${environment_parameters}" \ +-p "${group_parameters}" \ +-p "${tmpdir_parameters}" \ +-rundir "${projectJobsDir}" \ +--header "${EBROOTNGS_DNA}/templates/slurm/header_tnt.ftl" \ +--footer "${EBROOTNGS_DNA}/templates/slurm/footer_tnt.ftl" \ +--submit "${EBROOTNGS_DNA}/templates/slurm/submit.ftl" \ +-w "${workflowpath}" \ +-b slurm \ +-g \ +-weave \ +-runid "${runid}" \ +-o "ngsversion=${ngsversion};\ +batchIDList=${EBROOTNGS_DNA}/batchIDList${batching}.csv;\ +groupname=${groupname}" + + +if [ -f "${intermediateDir}/${project}.filteredBarcodes.csv" ] +then + echo -e "\n################### THE FOLLOWING LINES ARE REJECTED BECAUSE OF TOO LOW PERCENTAGE READS ###############\n" + cat "${intermediateDir}/${project}.filteredBarcodes.csv" + cat "${intermediateDir}/${project}.filteredBarcodes.csv" > "${logsDir}/${project}/${runid}.pipeline.rejectedsamples" +fi diff --git a/templates/generate_template_new.sh b/templates/generate_template_new.sh new file mode 100755 index 00000000..9b405369 --- /dev/null +++ b/templates/generate_template_new.sh @@ -0,0 +1,109 @@ +#!/bin/bash + +module load NGS_DNA/3.5.1 +module list +host=$(hostname -s) +environmentParameters="parameters_${host}" + +function showHelp() { + # + # Display commandline help on STDOUT. + # + cat < ${i%.*} ; rm "${i}" ;done + +build="b37" +species="homo_sapiens" + +if [ -s build.txt ]; then build=$(cat build.txt);fi +if [ -s species.txt ];then species=$(cat species.txt); fi + +sampleSize=$(cat externalSampleIDs.txt | wc -l) ; echo "Samplesize is ${sampleSize}" + +if [ $sampleSize -gt 199 ];then workflow=${EBROOTNGS_DNA}/workflow_samplesize_bigger_than_200.csv ; else workflow=${EBROOTNGS_DNA}/workflow.csv ;fi + +### Converting parameters to compute parameters +echo "tmpName,${tmpDirectory}" > ${genScripts}/tmpdir_parameters.csv +perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${genScripts}/tmpdir_parameters.csv" > "${genScripts}/parameters_tmpdir_converted.csv" +perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/parameters.csv" > "${genScripts}/parameters_converted.csv" +perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/parameters_${group}.csv" > "${genScripts}/parameters_group_converted.csv" +perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/${environmentParameters}.csv" > "${genScripts}/parameters_environment_converted.csv" + +## has to be set, otherwise it will crash due to parameters which are not set, this variable will be updated in the next step +batching="_small" + +## make a copy of the pipeline to get correct resources depending on which run (exome, panel or WGS) and the number of samples +mkdir tmp/ +cp -r ${EBROOTNGS_DNA} tmp/ + +sh "${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh" \ +-p "${genScripts}/parameters_converted.csv" \ +-p "${genScripts}/parameters_tmpdir_converted.csv" \ +-p "${EBROOTNGS_DNA}/batchIDList${batching}.csv" \ +-p "${genScripts}/parameters_group_converted.csv" \ +-p "${genScripts}/parameters_environment_converted.csv" \ +-p "${genScripts}/${filePrefix}.csv" \ +-w "${EBROOTNGS_DNA}/create_in-house_ngs_projects_workflow.csv" \ +-rundir "${genScripts}/scripts" \ +--runid "${runID}" \ +-o workflowpath="${workflow};\ +outputdir=scripts/jobs;mainParameters=${genScripts}/parameters_converted.csv;\ +group_parameters=${genScripts}/parameters_group_converted.csv;\ +groupname=${group};\ +ngsversion=$(module list | grep -o -P 'NGS_DNA(.+)');\ +environment_parameters=${genScripts}/parameters_environment_converted.csv;\ +tmpdir_parameters=${genScripts}/parameters_tmpdir_converted.csv;\ +worksheet=${genScripts}/${filePrefix}.csv" \ +-weave \ +--generate From d6bc566c2d7b320e2c4bacc10526434043e9309f Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Fri, 23 Nov 2018 12:15:33 +0100 Subject: [PATCH 2/3] removed stuff that should not be in the repo already --- conf/exoom.cfg | 57 --------------- parameters_exoom.txt | 57 --------------- templates/generate_template_new.sh | 109 ----------------------------- 3 files changed, 223 deletions(-) delete mode 100644 conf/exoom.cfg delete mode 100644 parameters_exoom.txt delete mode 100755 templates/generate_template_new.sh diff --git a/conf/exoom.cfg b/conf/exoom.cfg deleted file mode 100644 index 21a3c8e9..00000000 --- a/conf/exoom.cfg +++ /dev/null @@ -1,57 +0,0 @@ -protocols/AnnotateVcf.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=4 -protocols/BaseRecalibrator.sh #MOLGENIS walltime=23:59:00 mem=10gb ppn=8 -protocols/BwaAlignAndSortSam.sh #MOLGENIS walltime=23:59:00 nodes=1 ppn=4 mem=13gb -protocols/CartegeniaFiltering.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/CartegeniaTree.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/CheckOutput.sh #MOLGENIS walltime=00:30:00 mem=1gb -protocols/CmdLineAnnotator.sh #MOLGENIS walltime=05:59:00 mem=12gb ppn=2 -protocols/CollectBamIndexMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 -protocols/CollectGCBiasMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 -protocols/CollectHSMetrics.sh #MOLGENIS walltime=05:59:00 mem=5gb ppn=2 -protocols/CollectMultipleMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 -protocols/CollectWgsMetrics.sh #MOLGENIS walltime=11:59:00 mem=6gb ppn=3 -protocols/CompressingFinalVcf.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 -protocols/ConcordanceCheck.sh #MOLGENIS walltime=05:59:59 mem=10gb ppn=1 -protocols/Convading.sh #MOLGENIS walltime=05:59:00 mem=4gb ppn=1 -protocols/CopyGvcfToPrm.sh #MOLGENIS walltime=02:00:00 mem=4gb queue=duo-ds -protocols/CopyPrmTmpData.sh #MOLGENIS walltime=02:00:00 mem=4gb -protocols/CopyResultsGavinStandAlone.sh #MOLGENIS walltime=01:59:00 mem=1gb -protocols/CopyToResultsDir.sh #MOLGENIS walltime=05:59:00 nodes=1 cores=1 mem=4gb -protocols/CountAllFinishedFiles.sh #MOLGENIS walltime=00:05:00 mem=1gb -protocols/CoverageCalculations.sh #MOLGENIS walltime=05:59:00 mem=12gb nodes=1 ppn=1 -protocols/CramConversion.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=9 -protocols/CreateExternSamplesProjects.sh #MOLGENIS walltime=02:00:00 mem=4gb -protocols/CreateInhouseProjects.sh #MOLGENIS walltime=02:00:00 mem=4gb -protocols/DecisionTree.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/DetermineTrio.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/FastQC.sh #MOLGENIS ppn=1 mem=2gb walltime=05:59:00 -protocols/FlagstatMetrics.sh #MOLGENIS walltime=03:00:00 mem=30gb ppn=5 -protocols/Gavin.sh #MOLGENIS walltime=05:59:00 mem=6gb -protocols/GenderCalculate.sh #MOLGENIS ppn=4 mem=6gb walltime=03:00:00 -protocols/GenderCheck.sh #MOLGENIS ppn=4 mem=6gb walltime=00:30:00 -protocols/IndelFiltration.sh #MOLGENIS walltime=05:59:00 mem=10gb -protocols/InSilicoConcordance.sh #MOLGENIS ppn=1 mem=5gb walltime=00:20:00 -protocols/MakeDedupBamMd5.sh #MOLGENIS walltime=01:00:00 mem=4gb -protocols/MantaAnnotation.sh #MOLGENIS walltime=23:59:00 mem=6gb ppn=2 -protocols/Manta.sh #MOLGENIS walltime=16:00:00 mem=30gb ppn=8 -protocols/MarkDuplicates.sh #MOLGENIS walltime=16:00:00 mem=30gb ppn=5 -protocols/MergeBam.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/MergeBatches.sh #MOLGENIS walltime=05:59:00 mem=13gb ppn=2 -protocols/MergeIndelsAndSnps.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 -protocols/MultiQC.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/PrepareFastQ.sh #MOLGENIS ppn=4 mem=8gb walltime=07:00:00 -protocols/PrepareVcf.sh #MOLGENIS ppn=4 mem=8gb walltime=07:00:00 -protocols/QCReport.sh #MOLGENIS walltime=00:20:00 mem=4gb ppn=1 -protocols/QCStats.sh #MOLGENIS ppn=1 mem=8gb walltime=01:00:00 -protocols/SnpEff.sh #MOLGENIS walltime=23:59:00 mem=5gb ppn=2 -protocols/SnpFiltration.sh #MOLGENIS walltime=05:59:00 mem=10gb -protocols/SplitIndelsAndSNPs.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=2 -protocols/StartPipeline.sh #MOLGENIS walltime=02:00:00 mem=4gb -protocols/Template.sh #MOLGENIS walltime=23:59:00 mem=5gb ppn=10 -protocols/VariantCalling.sh #MOLGENIS walltime=23:59:00 mem=13gb ppn=1 -protocols/VariantCombine.sh #MOLGENIS walltime=23:59:00 mem=32gb ppn=4 -protocols/VariantConcordanceGenotyping.sh #MOLGENIS walltime=23:59:00 mem=17gb ppn=2 -protocols/VariantGenotyping.sh #MOLGENIS walltime=23:59:00 mem=17gb ppn=2 -protocols/VcfToTable.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 -protocols/VEP.sh #MOLGENIS walltime=23:59:00 mem=6gb ppn=8 -protocols/XHMM.sh #MOLGENIS walltime=05:59:00 mem=4gb ppn=1 diff --git a/parameters_exoom.txt b/parameters_exoom.txt deleted file mode 100644 index 21a3c8e9..00000000 --- a/parameters_exoom.txt +++ /dev/null @@ -1,57 +0,0 @@ -protocols/AnnotateVcf.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=4 -protocols/BaseRecalibrator.sh #MOLGENIS walltime=23:59:00 mem=10gb ppn=8 -protocols/BwaAlignAndSortSam.sh #MOLGENIS walltime=23:59:00 nodes=1 ppn=4 mem=13gb -protocols/CartegeniaFiltering.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/CartegeniaTree.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/CheckOutput.sh #MOLGENIS walltime=00:30:00 mem=1gb -protocols/CmdLineAnnotator.sh #MOLGENIS walltime=05:59:00 mem=12gb ppn=2 -protocols/CollectBamIndexMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 -protocols/CollectGCBiasMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 -protocols/CollectHSMetrics.sh #MOLGENIS walltime=05:59:00 mem=5gb ppn=2 -protocols/CollectMultipleMetrics.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=3 -protocols/CollectWgsMetrics.sh #MOLGENIS walltime=11:59:00 mem=6gb ppn=3 -protocols/CompressingFinalVcf.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 -protocols/ConcordanceCheck.sh #MOLGENIS walltime=05:59:59 mem=10gb ppn=1 -protocols/Convading.sh #MOLGENIS walltime=05:59:00 mem=4gb ppn=1 -protocols/CopyGvcfToPrm.sh #MOLGENIS walltime=02:00:00 mem=4gb queue=duo-ds -protocols/CopyPrmTmpData.sh #MOLGENIS walltime=02:00:00 mem=4gb -protocols/CopyResultsGavinStandAlone.sh #MOLGENIS walltime=01:59:00 mem=1gb -protocols/CopyToResultsDir.sh #MOLGENIS walltime=05:59:00 nodes=1 cores=1 mem=4gb -protocols/CountAllFinishedFiles.sh #MOLGENIS walltime=00:05:00 mem=1gb -protocols/CoverageCalculations.sh #MOLGENIS walltime=05:59:00 mem=12gb nodes=1 ppn=1 -protocols/CramConversion.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=9 -protocols/CreateExternSamplesProjects.sh #MOLGENIS walltime=02:00:00 mem=4gb -protocols/CreateInhouseProjects.sh #MOLGENIS walltime=02:00:00 mem=4gb -protocols/DecisionTree.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/DetermineTrio.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/FastQC.sh #MOLGENIS ppn=1 mem=2gb walltime=05:59:00 -protocols/FlagstatMetrics.sh #MOLGENIS walltime=03:00:00 mem=30gb ppn=5 -protocols/Gavin.sh #MOLGENIS walltime=05:59:00 mem=6gb -protocols/GenderCalculate.sh #MOLGENIS ppn=4 mem=6gb walltime=03:00:00 -protocols/GenderCheck.sh #MOLGENIS ppn=4 mem=6gb walltime=00:30:00 -protocols/IndelFiltration.sh #MOLGENIS walltime=05:59:00 mem=10gb -protocols/InSilicoConcordance.sh #MOLGENIS ppn=1 mem=5gb walltime=00:20:00 -protocols/MakeDedupBamMd5.sh #MOLGENIS walltime=01:00:00 mem=4gb -protocols/MantaAnnotation.sh #MOLGENIS walltime=23:59:00 mem=6gb ppn=2 -protocols/Manta.sh #MOLGENIS walltime=16:00:00 mem=30gb ppn=8 -protocols/MarkDuplicates.sh #MOLGENIS walltime=16:00:00 mem=30gb ppn=5 -protocols/MergeBam.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/MergeBatches.sh #MOLGENIS walltime=05:59:00 mem=13gb ppn=2 -protocols/MergeIndelsAndSnps.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 -protocols/MultiQC.sh #MOLGENIS walltime=05:59:00 mem=10gb ppn=10 -protocols/PrepareFastQ.sh #MOLGENIS ppn=4 mem=8gb walltime=07:00:00 -protocols/PrepareVcf.sh #MOLGENIS ppn=4 mem=8gb walltime=07:00:00 -protocols/QCReport.sh #MOLGENIS walltime=00:20:00 mem=4gb ppn=1 -protocols/QCStats.sh #MOLGENIS ppn=1 mem=8gb walltime=01:00:00 -protocols/SnpEff.sh #MOLGENIS walltime=23:59:00 mem=5gb ppn=2 -protocols/SnpFiltration.sh #MOLGENIS walltime=05:59:00 mem=10gb -protocols/SplitIndelsAndSNPs.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=2 -protocols/StartPipeline.sh #MOLGENIS walltime=02:00:00 mem=4gb -protocols/Template.sh #MOLGENIS walltime=23:59:00 mem=5gb ppn=10 -protocols/VariantCalling.sh #MOLGENIS walltime=23:59:00 mem=13gb ppn=1 -protocols/VariantCombine.sh #MOLGENIS walltime=23:59:00 mem=32gb ppn=4 -protocols/VariantConcordanceGenotyping.sh #MOLGENIS walltime=23:59:00 mem=17gb ppn=2 -protocols/VariantGenotyping.sh #MOLGENIS walltime=23:59:00 mem=17gb ppn=2 -protocols/VcfToTable.sh #MOLGENIS walltime=05:59:00 mem=6gb ppn=1 -protocols/VEP.sh #MOLGENIS walltime=23:59:00 mem=6gb ppn=8 -protocols/XHMM.sh #MOLGENIS walltime=05:59:00 mem=4gb ppn=1 diff --git a/templates/generate_template_new.sh b/templates/generate_template_new.sh deleted file mode 100755 index 9b405369..00000000 --- a/templates/generate_template_new.sh +++ /dev/null @@ -1,109 +0,0 @@ -#!/bin/bash - -module load NGS_DNA/3.5.1 -module list -host=$(hostname -s) -environmentParameters="parameters_${host}" - -function showHelp() { - # - # Display commandline help on STDOUT. - # - cat < ${i%.*} ; rm "${i}" ;done - -build="b37" -species="homo_sapiens" - -if [ -s build.txt ]; then build=$(cat build.txt);fi -if [ -s species.txt ];then species=$(cat species.txt); fi - -sampleSize=$(cat externalSampleIDs.txt | wc -l) ; echo "Samplesize is ${sampleSize}" - -if [ $sampleSize -gt 199 ];then workflow=${EBROOTNGS_DNA}/workflow_samplesize_bigger_than_200.csv ; else workflow=${EBROOTNGS_DNA}/workflow.csv ;fi - -### Converting parameters to compute parameters -echo "tmpName,${tmpDirectory}" > ${genScripts}/tmpdir_parameters.csv -perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${genScripts}/tmpdir_parameters.csv" > "${genScripts}/parameters_tmpdir_converted.csv" -perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/parameters.csv" > "${genScripts}/parameters_converted.csv" -perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/parameters_${group}.csv" > "${genScripts}/parameters_group_converted.csv" -perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/${environmentParameters}.csv" > "${genScripts}/parameters_environment_converted.csv" - -## has to be set, otherwise it will crash due to parameters which are not set, this variable will be updated in the next step -batching="_small" - -## make a copy of the pipeline to get correct resources depending on which run (exome, panel or WGS) and the number of samples -mkdir tmp/ -cp -r ${EBROOTNGS_DNA} tmp/ - -sh "${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh" \ --p "${genScripts}/parameters_converted.csv" \ --p "${genScripts}/parameters_tmpdir_converted.csv" \ --p "${EBROOTNGS_DNA}/batchIDList${batching}.csv" \ --p "${genScripts}/parameters_group_converted.csv" \ --p "${genScripts}/parameters_environment_converted.csv" \ --p "${genScripts}/${filePrefix}.csv" \ --w "${EBROOTNGS_DNA}/create_in-house_ngs_projects_workflow.csv" \ --rundir "${genScripts}/scripts" \ ---runid "${runID}" \ --o workflowpath="${workflow};\ -outputdir=scripts/jobs;mainParameters=${genScripts}/parameters_converted.csv;\ -group_parameters=${genScripts}/parameters_group_converted.csv;\ -groupname=${group};\ -ngsversion=$(module list | grep -o -P 'NGS_DNA(.+)');\ -environment_parameters=${genScripts}/parameters_environment_converted.csv;\ -tmpdir_parameters=${genScripts}/parameters_tmpdir_converted.csv;\ -worksheet=${genScripts}/${filePrefix}.csv" \ --weave \ ---generate From 21ab1f63661219b5a17f9a70933a5f1bda5de63c Mon Sep 17 00:00:00 2001 From: RoanKanninga Date: Fri, 23 Nov 2018 12:16:29 +0100 Subject: [PATCH 3/3] removed .save fike --- protocols/CreateInhouseProjects.sh.save | 252 ------------------------ 1 file changed, 252 deletions(-) delete mode 100644 protocols/CreateInhouseProjects.sh.save diff --git a/protocols/CreateInhouseProjects.sh.save b/protocols/CreateInhouseProjects.sh.save deleted file mode 100644 index cec5a27d..00000000 --- a/protocols/CreateInhouseProjects.sh.save +++ /dev/null @@ -1,252 +0,0 @@ -#MOLGENIS walltime=02:00:00 mem=4gb - -#string tmpName -#list seqType -#string project -#string projectRawArrayTmpDataDir -#string projectRawTmpDataDir -#string projectJobsDir -#string projectLogsDir -#string intermediateDir -#string projectResultsDir -#string projectQcDir -#string computeVersion -#string group_parameters -#string groupname - -#list sequencingStartDate -#list sequencer -#list run -#list flowcell -#list barcode -#list lane -#list externalSampleID - -#string mainParameters -#string worksheet -#string outputdir -#string workflowpath -#string tmpdir_parameters -#string environment_parameters -#string ngsversion -#string ngsUtilsVersion - -#string dataDir - -#string coveragePerBaseDir -#string coveragePerTargetDir - -#string project -#string logsDir - -umask 0007 -module load "${ngsUtilsVersion}" -module load "${ngsversion}" - -array_contains () { - local array="$1[@]" - local seeking="${2}" - local in=1 - rejected="false" - for element in "${!array-}"; do - if [[ "${element}" == "${seeking}" ]]; then - in=0 - rejected="true" - continue - fi - done -} - -# -# Create project dirs. -# -mkdir -p "${projectRawArrayTmpDataDir}" -mkdir -p "${projectRawTmpDataDir}" -mkdir -p "${projectJobsDir}" -mkdir -p "${projectLogsDir}" -mkdir -p "${intermediateDir}" -mkdir -p "${projectResultsDir}/alignment/" -mkdir -p "${projectResultsDir}/qc/statistics/" -mkdir -p "${projectResultsDir}/variants/cnv/" -mkdir -p "${projectResultsDir}/variants/gVCF/" -mkdir -p "${projectResultsDir}/variants/GAVIN/" -mkdir -p "${projectQcDir}" -mkdir -p "${intermediateDir}/GeneNetwork/" -mkdir -p -m 2770 "${logsDir}/${project}/" -# -# Create symlinks to the raw data required to analyse this project. -# Do this for each sequence file and it's accompanying MD5 checksum. -# (There may be multiple sequence files per sample) -# -rocketPoint=$(pwd) -cd "${projectRawTmpDataDir}" -max_index=${#externalSampleID[@]}-1 - -for ((samplenumber = 0; samplenumber <= max_index; samplenumber++)) -do - if [[ "${seqType[samplenumber]}" == 'SR' ]] - then - if [[ "${barcode[samplenumber]}" == 'None' ]] - then - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}.fq.gz" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz" - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}.fq.gz.md5" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz.md5" - else - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz" - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz.md5" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}.fq.gz.md5" - fi - elif [[ "${seqType[samplenumber]}" == 'PE' ]] - then - if [[ "${barcode[samplenumber]}" == 'None' ]] - then - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_1.fq.gz" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz" - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_2.fq.gz" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz" - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_1.fq.gz.md5" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz.md5" - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_2.fq.gz.md5" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz.md5" - else - array_contains arrayRejected "${barcode[samplenumber]}" - if [ "${rejected}" == "false" ] - then - - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz" - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz" - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz.md5" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_1.fq.gz.md5" - ln -sf "../../../../../rawdata/ngs/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz.md5" \ - "${projectRawTmpDataDir}/${sequencingStartDate[samplenumber]}_${sequencer[samplenumber]}_${run[samplenumber]}_${flowcell[samplenumber]}_L${lane[samplenumber]}_${barcode[samplenumber]}_2.fq.gz.md5" - else - echo -e "\n############ barcode: ${barcode[samplenumber]} IS REJECTED#######################\n" - fi - fi - fi -done - -# -# Create subset of samples for this project. -# -extract_samples_from_GAF_list.pl --i "${worksheet}" --o "${projectJobsDir}/${project}.csv" --c project --q "${project}" -sampleSheetCsv="${projectJobsDir}/${project}.csv" -perl -pi -e 's/\r(?!\n)//g' "${sampleSheetCsv}" -barcodesGrepCommand="" - -# -# Execute MOLGENIS/compute to create job scripts to analyse this project. -# - - -cd "${rocketPoint}" -rm -f "${projectJobsDir}/${project}.filteredRejected.csv" -rm -f "${intermediateDir}/${project}.filteredBarcodes.csv" - -if [ -f "rejectedBarcodes.txt" ] -then - size=$(cat "rejectedBarcodes.txt" | wc -l) - teller=1 - - while read line - do - if [[ "${teller}" -lt "${size}" ]] - then - barcodesGrepCommand+="${line}|" - elif [ "${teller}" == ${size} ] - then - echo "last line" - barcodesGrepCommand+="${line}" - fi - teller=$((teller+1)) - done "${projectJobsDir}/${project}.filteredRejected.csv" - grep -E "${barcodesGrepCommand}" "${sampleSheetCsv}" > "${intermediateDir}/${project}.filteredBarcodes.csv" - cp "${sampleSheetCsv}" "${projectJobsDir}/${project}.original.csv" - samplesheetCsv="${projectJobsDir}/${project}.filteredRejected.csv" -fi -if [[ -f .compute.properties ]] -then - rm .compute.properties -fi - -batching="_small" - -capturingKitProject=$(python "${EBROOTNGS_DNA}/scripts/getCapturingKit.py" "${sampleSheetCsv}" | sed 's|\\||') -captKit=$(echo "${capturingKitProject}" | awk 'BEGIN {FS="/"}{print $2}') - -if [ ! -d "${dataDir}/${capturingKitProject}" ] -then - echo "Bedfile does not exist! Exiting" - echo "ls ${dataDir}/${capturingKitProject}" - exit 1 -fi - -if [[ "${capturingKitProject,,}" == *"exoom"* || "${capturingKitProject,,}" == *"exome"* || "${capturingKitProject,,}" == *"all_exon_v1"* || "${capturingKitProject,,}" == *"wgs"* ]] -then - batching="_chr" - if [ ! -e "${coveragePerTargetDir}/${captKit}/${captKit}" ] - then - echo "Bedfile in ${coveragePerTargetDir} does not exist! Exiting" - echo "ls ${coveragePerTargetDir}/${captKit}/${captKit}" - exit 1 - fi -else - if [ ! -e "${coveragePerBaseDir}/${captKit}/${captKit}" ] - then - echo "Bedfile in ${coveragePerBaseDir} does not exist! Exiting" - echo "ls ${coveragePerTargetDir}/${captKit}/${captKit}" - exit 1 - fi -fi - -if [ "${captKit}" == *"ONCO"* ] -then - if [ ! -f ${dataDir}/${capturingKitProject}/human_g1k_v37/GSA_SNPS.bed - then - echo "cannot do concordance check later on since ${dataDir}/${capturingKitProject}/human_g1k_v37/GSA_SNPS.bed is missing! EXIT!" - exit 1 - fi -fi - -if [[ ]] - -cd ../tmp/ -head -1 protocols/*.sh > parameters_exoom.txt - -echo "BATCHIDLIST=${EBROOTNGS_DNA}/batchIDList${batching}.csv" - -sh "${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh" \ --p "${mainParameters}" \ --p "${EBROOTNGS_DNA}/batchIDList${batching}.csv" \ --p "${sampleSheetCsv}" \ --p "${environment_parameters}" \ --p "${group_parameters}" \ --p "${tmpdir_parameters}" \ --rundir "${projectJobsDir}" \ ---header "${EBROOTNGS_DNA}/templates/slurm/header_tnt.ftl" \ ---footer "${EBROOTNGS_DNA}/templates/slurm/footer_tnt.ftl" \ ---submit "${EBROOTNGS_DNA}/templates/slurm/submit.ftl" \ --w "${workflowpath}" \ --b slurm \ --g \ --weave \ --runid "${runid}" \ --o "ngsversion=${ngsversion};\ -batchIDList=${EBROOTNGS_DNA}/batchIDList${batching}.csv;\ -groupname=${groupname}" - - -if [ -f "${intermediateDir}/${project}.filteredBarcodes.csv" ] -then - echo -e "\n################### THE FOLLOWING LINES ARE REJECTED BECAUSE OF TOO LOW PERCENTAGE READS ###############\n" - cat "${intermediateDir}/${project}.filteredBarcodes.csv" - cat "${intermediateDir}/${project}.filteredBarcodes.csv" > "${logsDir}/${project}/${runid}.pipeline.rejectedsamples" -fi