From d562a8bdb411679deedb8ba6db3b6d7c31e23789 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Sat, 9 Nov 2013 15:35:41 +0100 Subject: [PATCH 1/2] public rna seq --- .../protocols/ConvertSNVMixToGen.ftl | 66 +++++++++---------- .../protocols/SNVMix.ftl | 5 +- .../parameters.csv | 2 +- .../protocols/FluxCapacitor.ftl | 2 +- .../protocols/MakeExpressionTable.ftl | 2 +- .../parameters.csv | 8 +-- .../protocols/HTSeq-count.ftl | 35 ---------- .../protocols/HTSeq_count.ftl | 30 ++++++--- .../protocols/MakeExpressionTable.ftl | 19 ++++-- compute4/STAR_RNA-seq_mapping/parameters.csv | 4 +- .../protocols/STARMapping.ftl | 19 +++--- .../protocols/SortFilterBam.ftl | 2 +- 12 files changed, 88 insertions(+), 106 deletions(-) delete mode 100644 compute4/RNA-seq_quantify_gene_level/protocols/HTSeq-count.ftl diff --git a/compute4/RNA-seq_genotype_calling/protocols/ConvertSNVMixToGen.ftl b/compute4/RNA-seq_genotype_calling/protocols/ConvertSNVMixToGen.ftl index 0bb8de0d..b5d88731 100644 --- a/compute4/RNA-seq_genotype_calling/protocols/ConvertSNVMixToGen.ftl +++ b/compute4/RNA-seq_genotype_calling/protocols/ConvertSNVMixToGen.ftl @@ -48,34 +48,34 @@ do done -# ${JAVA_HOME}/bin/java \ - # -Xmx4g \ - # -jar /target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar \ - # --mode SNVMixToGen \ - # --fileList ${genotypeFolder}/fileList.txt \ - # --p-value 0.8 \ - # --out ${genotypeFolder}/___tmp___chr - -# returnCode=$? -# echo "Return code ${returnCode}" - -# if [ "${returnCode}" -eq "0" ] -# then + ${JAVA_HOME}/bin/java \ + -Xmx4g \ + -jar /target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar \ + --mode SNVMixToGen \ + --fileList ${genotypeFolder}/fileList.txt \ + --p-value 0.8 \ + --out ${genotypeFolder}/___tmp___chr + + returnCode=$? + echo "Return code ${returnCode}" + + if [ "${returnCode}" -eq "0" ] + then - # echo "Moving temp files: ${genotypeFolder}/___tmp___chr* to ${genotypeFolder}/chr*" - # tmpFiles="${genotypeFolder}/___tmp___chr*" - # for f in $tmpFiles - # do - # mv $f ${f//___tmp___/} - # done + echo "Moving temp files: ${genotypeFolder}/___tmp___chr* to ${genotypeFolder}/chr*" + tmpFiles="${genotypeFolder}/___tmp___chr*" + for f in $tmpFiles + do + mv $f ${f//___tmp___/} + done -# else + else - # echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n" - Return non zero return code - # exit 1 + echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n" + #Return non zero return code + exit 1 -# fi + fi chrTriTyperDirs="" @@ -97,18 +97,18 @@ do genFileSorted=${genFile//.gen/.sorted.gen} - # sort -k3,3n ${genFile} > ${genFileSorted} + sort -k3,3n ${genFile} > ${genFileSorted} genFileSortedFiltered=${genFile//.gen/_CR0.8_maf0.01.gen} - # /target/gpfs2/gcc/tools/qctool/qctool_v1.3-linux-x86_64/qctool \ - # -g $genFileSorted \ - # -s ${sampleFile} \ - # -og ${genFileSortedFiltered} \ - # -maf 0.01 1 \ - # -hwe 4 \ - # -snp-missing-rate 0.8 \ - # -omit-chromosome + /target/gpfs2/gcc/tools/qctool/qctool_v1.3-linux-x86_64/qctool \ + -g $genFileSorted \ + -s ${sampleFile} \ + -og ${genFileSortedFiltered} \ + -maf 0.01 1 \ + -hwe 4 \ + -snp-missing-rate 0.8 \ + -omit-chromosome trityperFolder=${genFile%.gen} mkdir -p ${trityperFolder} diff --git a/compute4/RNA-seq_genotype_calling/protocols/SNVMix.ftl b/compute4/RNA-seq_genotype_calling/protocols/SNVMix.ftl index 8cd97315..a9d3066d 100644 --- a/compute4/RNA-seq_genotype_calling/protocols/SNVMix.ftl +++ b/compute4/RNA-seq_genotype_calling/protocols/SNVMix.ftl @@ -82,7 +82,10 @@ returnCode=$? echo "return code snvMix ${returnCode}" -if [ $returnCode -eq 0 ] + +count=`cut -f 1 -d ":" ${snvmixOut}___tmp___ | uniq | wc -l` + +if [ $count -ge 22 ] then echo "Moving temp file: ${snvmixOut}___tmp___ to $snvmixOut" diff --git a/compute4/RNA-seq_quantify_fluxCapacitor/parameters.csv b/compute4/RNA-seq_quantify_fluxCapacitor/parameters.csv index 43aa9c91..d9a92458 100644 --- a/compute4/RNA-seq_quantify_fluxCapacitor/parameters.csv +++ b/compute4/RNA-seq_quantify_fluxCapacitor/parameters.csv @@ -19,7 +19,7 @@ run,,,, baseFolder,,,, sortedBam,${baseFolder}/${studyEna}/${sample}/${run}/${run}Aligned.out.sorted.bam,,, expressionFolder,${baseFolder}/${mergedStudy}/expressionData/,,, -gtfExpression,"${baseFolder}/${studyEna}/${sample}/${sample}.flux.gtf",,, +gtfExpression,"${baseFolder}/${studyEna}/${sample}/${run}/${run}.flux.gtf",,, expressionTable,${expressionFolder}/expression_table.transcr.v71.flux.txt,,, #,,,, annotationGtf,/target/gpfs2/gcc/home/dasha/resources/hg19/v71/Homo_sapiens.GRCh37.71.cut.sorted.gtf,,, diff --git a/compute4/RNA-seq_quantify_fluxCapacitor/protocols/FluxCapacitor.ftl b/compute4/RNA-seq_quantify_fluxCapacitor/protocols/FluxCapacitor.ftl index 62498df5..1dc206cb 100644 --- a/compute4/RNA-seq_quantify_fluxCapacitor/protocols/FluxCapacitor.ftl +++ b/compute4/RNA-seq_quantify_fluxCapacitor/protocols/FluxCapacitor.ftl @@ -1,4 +1,4 @@ -#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6 +#MOLGENIS walltime=24:00:00 nodes=1 cores=2 mem=7 bamToBed="${bamToBed}" sortedBam="${sortedBam}" diff --git a/compute4/RNA-seq_quantify_fluxCapacitor/protocols/MakeExpressionTable.ftl b/compute4/RNA-seq_quantify_fluxCapacitor/protocols/MakeExpressionTable.ftl index 90f970a4..8294c697 100644 --- a/compute4/RNA-seq_quantify_fluxCapacitor/protocols/MakeExpressionTable.ftl +++ b/compute4/RNA-seq_quantify_fluxCapacitor/protocols/MakeExpressionTable.ftl @@ -1,4 +1,4 @@ -#MOLGENIS walltime=6:00:00 nodes=1 cores=1 mem=4 +#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=4 #FOREACH mergedStudy mkdir -p ${expressionFolder} diff --git a/compute4/RNA-seq_quantify_gene_level/parameters.csv b/compute4/RNA-seq_quantify_gene_level/parameters.csv index 65c122f2..f14ed02d 100644 --- a/compute4/RNA-seq_quantify_gene_level/parameters.csv +++ b/compute4/RNA-seq_quantify_gene_level/parameters.csv @@ -1,8 +1,8 @@ Name,defaultValue,description,dataType,hasOne_name clusterQueue,gcc,,, scheduler,PBS,,, -mem,4,Memory in GB,, -walltime,6:00:00,,, +mem,6,Memory in GB,, +walltime,24:00:00,,, cores,1,,, defaultInterpreter,#!/bin/bash,,, jobname,jobname,,string, @@ -19,8 +19,8 @@ run,,,, baseFolder,,,, sortedBam,${baseFolder}/${studyEna}/${sample}/${run}/${run}Aligned.out.sorted.bam,,, expressionFolder,${baseFolder}/${mergedStudy}/expressionData/,,, -txtExpression,"${baseFolder}/${studyEna}/${sample}/${sample}.htseq.txt",,, -expressionTable,${expressionFolder}/expression_table.transcr.v71.htseq.txt,,, +txtExpression,"${baseFolder}/${studyEna}/${sample}/${run}/${run}.htseq.txt",,, +expressionTable,${expressionFolder}/expression_table.genelevel.v71.htseq.txt,,, #,,,, annotationGtf,/target/gpfs2/gcc/home/dasha/resources/hg19/v71/Homo_sapiens.GRCh37.71.cut.sorted.gtf,,, geneAnnotationTxt,/target/gpfs2/gcc/home/dasha/resources/hg19/annotations/v71/annotation_geneIds_v71.txt.gz,,, diff --git a/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq-count.ftl b/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq-count.ftl deleted file mode 100644 index e2a6a7eb..00000000 --- a/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq-count.ftl +++ /dev/null @@ -1,35 +0,0 @@ -#MOLGENIS walltime=20:00:00 nodes=1 cores=1 mem=6 - -sortedBam="${sortedBam}" -htseq-count="${htseq-count}" -annotationGtf="${annotationGtf}" -txtExpression="${txtExpression}" -samtools=${samtools} - -<#noparse> - -echo -e "sortedBam=${sortedBam}\nannotationGtf=${annotationGtf}\ntxtExpression=${txtExpression}" - - -alloutputsexist ${sortedByName} -echo "Sorting bam file by name" - -${samtools} sort \ --n \ -${sortedBam} \ -${sortedBam%bam}byName - - -alloutputsexist ${txtExpression} -echo -e "\nQuantifying expression" - -/target/gpfs2/gcc/tools/Python-2.7.3/bin/python \ -${htseq-count} \ - -m union \ - -s no \ - ${sortedBam%bam}byName.bam \ - ${annotationGtf} | \ -head -n -5 \ -> ${txtExpression} - - \ No newline at end of file diff --git a/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq_count.ftl b/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq_count.ftl index 4b1f66a9..3fccd400 100644 --- a/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq_count.ftl +++ b/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq_count.ftl @@ -1,4 +1,4 @@ -#MOLGENIS walltime=20:00:00 nodes=1 cores=1 mem=6 +#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6 sortedBam="${sortedBam}" htseq_count="${htseq_count}" @@ -10,28 +10,38 @@ samtools=${samtools} echo -e "sortedBam=${sortedBam}\nannotationGtf=${annotationGtf}\ntxtExpression=${txtExpression}" +alloutputsexist ${txtExpression} echo "Sorting bam file by name" -${samtools} sort \ --n \ -${sortedBam} \ -${sortedBam%bam}byName +${samtools} \ + sort \ + -n \ + ${sortedBam} \ + ${sortedBam%bam}byName echo -e "\nQuantifying expression" -${samtools} \ +if ${samtools} \ view -h \ ${sortedBam%bam}byName.bam | \ -/target/gpfs2/gcc/tools/Python-2.7.3/bin/python \ -${htseq_count} \ + /target/gpfs2/gcc/tools/Python-2.7.3/bin/python \ + ${htseq_count} \ -m union \ -s no \ - \ ${annotationGtf} | \ -head -n -5 \ -> ${txtExpression} + head -n -5 \ + > ${txtExpression}___tmp___; +then + echo "Gene count succesfull" + mv ${txtExpression}___tmp___ ${txtExpression} +else + echo "Genecount failed" +fi + +rm ${sortedBam%bam}byName echo "Finished!" diff --git a/compute4/RNA-seq_quantify_gene_level/protocols/MakeExpressionTable.ftl b/compute4/RNA-seq_quantify_gene_level/protocols/MakeExpressionTable.ftl index c596d23e..a034c816 100644 --- a/compute4/RNA-seq_quantify_gene_level/protocols/MakeExpressionTable.ftl +++ b/compute4/RNA-seq_quantify_gene_level/protocols/MakeExpressionTable.ftl @@ -1,20 +1,25 @@ -#MOLGENIS walltime=6:00:00 nodes=1 cores=1 mem=4 +#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6 #FOREACH mergedStudy mkdir -p ${expressionFolder} rm -f ${expressionFolder}/fileList.txt -<#assign samples=sample?size - 1> -<#list 0..samples as i> - echo -e "${sample[i]}\t${txtExpression[i]}" >> ${expressionFolder}/fileList.txt +<#assign runs=run?size - 1> +<#list 0..runs as i> + echo -e "${run[i]}\t${txtExpression[i]}" >> ${expressionFolder}/fileList.txt -${JAVA_HOME}/bin/java \ +if ${JAVA_HOME}/bin/java \ -Xmx4g \ -jar ${processReadCountsJar} \ --mode makeExpressionTable \ --fileList ${expressionFolder}/fileList.txt \ --annot ${geneAnnotationTxt} \ - --out ${expressionTable} - + --out ${expressionTable}___tmp___ +then + echo "table create succesfull" + mv ${expressionTable}___tmp___ ${expressionTable} +else + echo "table create failed" +fi diff --git a/compute4/STAR_RNA-seq_mapping/parameters.csv b/compute4/STAR_RNA-seq_mapping/parameters.csv index fb664c6a..014a43a1 100644 --- a/compute4/STAR_RNA-seq_mapping/parameters.csv +++ b/compute4/STAR_RNA-seq_mapping/parameters.csv @@ -1,7 +1,7 @@ Name,defaultValue,description,dataType,hasOne_name -clusterQueue,ss,,, +clusterQueue,gcc,,, mem,4,Memory in GB,, -walltime,6:00:00,,, +walltime,24:00:00,,, cores,1,,, defaultInterpreter,#!/bin/bash,,, jobname,jobname,,string, diff --git a/compute4/STAR_RNA-seq_mapping/protocols/STARMapping.ftl b/compute4/STAR_RNA-seq_mapping/protocols/STARMapping.ftl index 4e4b598a..228901fe 100644 --- a/compute4/STAR_RNA-seq_mapping/protocols/STARMapping.ftl +++ b/compute4/STAR_RNA-seq_mapping/protocols/STARMapping.ftl @@ -1,4 +1,4 @@ -#MOLGENIS walltime=6:00:00 nodes=1 cores=8 mem=30 +#MOLGENIS walltime=6:00:00 nodes=1 cores=8 mem=40 fastq1="${fastq1}" fastq2="${fastq2}" @@ -13,21 +13,20 @@ echo -e "fastq1=${fastq1}\nfastq2=${fastq2}\noutputFolder=${outputFolder}\nprefi mkdir -p ${outputFolder} -alloutputsexist ${outputFolder}/${prefix}Aligned.out.sam - +if [[ -f ${outputFolder}/${prefix}Aligned.out.sorted.bam && -f ${outputFolder}/${prefix}Aligned.out.sorted.bam.bai ]]; then + echo "skipping, next step already has output" + rm -f ${outputFolder}/${outputPrefix}Aligned.out.sam + exit 0 +fi -#Output of this step is removed at the end of next step. Only run this step if output of next step is not present -if [ -f ${outputFolder}/${outputPrefix}Aligned.out.sorted.bam ] && [ -f ${outputFolder}/${outputPrefix}Aligned.out.sorted.bam.bai ] -then - echo "skipping" - exit 0 +alloutputsexist ${outputFolder}/${prefix}Aligned.out.sam -fi inputs ${fastq1} -seq=`head -2 ${fastq1} | tail -1` +seq=`zcat ${fastq1} | head -2 | tail -1` +echo "seq used to determine read length: ${seq}" readLength="${#seq}" if [ $readLength -ge 90 ]; then diff --git a/compute4/STAR_RNA-seq_mapping/protocols/SortFilterBam.ftl b/compute4/STAR_RNA-seq_mapping/protocols/SortFilterBam.ftl index eab3d287..55dea9ca 100644 --- a/compute4/STAR_RNA-seq_mapping/protocols/SortFilterBam.ftl +++ b/compute4/STAR_RNA-seq_mapping/protocols/SortFilterBam.ftl @@ -1,4 +1,4 @@ -#MOLGENIS walltime=6:00:00 nodes=1 cores=1 mem=4 +#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6 outputFolder=${outputFolder} outputPrefix=${outputPrefix} From 2fddcf5cbfe90d69a69646200c6e5951fed80fe3 Mon Sep 17 00:00:00 2001 From: Patrick Deelen Date: Wed, 22 Jan 2014 14:10:45 +0100 Subject: [PATCH 2/2] compute rnaseq stuff --- .../RNA-seq_genotype_calling/parameters.csv | 2 +- .../protocols/CreateSNVMixFileList.ftl | 48 ++ .../protocols/SNVMix.ftl | 2 +- .../RNA-seq_genotype_calling/workflowCall.csv | 2 + .../workflowCreateFileList.csv | 2 + .../workflowMerge.csv | 2 + .../parameters.csv | 5 +- .../protocols/HTSeq_count.ftl | 5 +- .../b37AllChrBins.csv | 590 ++++++++++++++++++ .../example_worksheet.csv | 1 + .../RNA-seq_sample_imputation/parameters.csv | 40 ++ .../protocols/impute.ftl | 274 ++++++++ .../templates/DataTransfer.sh.ftl | 26 + .../templates/Footer.ftl | 0 .../templates/Header.ftl | 58 ++ .../templates/Submit.sh.ftl | 8 + .../templates/templ-cluster.ftl | 18 + .../templates/templ-download-grid.ftl | 3 + .../templates/templ-errorfile.ftl | 1 + .../templates/templ-exe-grid-dir.ftl | 1 + .../templates/templ-exe-grid.ftl | 7 + .../templates/templ-extrafile.ftl | 1 + .../templates/templ-jdl-grid.ftl | 19 + .../templates/templ-logfile.ftl | 1 + .../templates/templ-outfile.ftl | 1 + .../templates/templ-submit.ftl | 4 + .../templates/templ-upload-grid.ftl | 6 + .../templates/test_template.ftl | 18 + .../RNA-seq_sample_imputation/workflow.csv | 2 + .../workflowCall.csv | 2 + .../workflowCreateFileList.csv | 2 + .../workflowMerge.csv | 2 + .../example_worksheet.csv | 1 + .../RNA-seq_sample_phasing/parameters.csv | 36 ++ .../protocols/phase.ftl | 115 ++++ .../templates/DataTransfer.sh.ftl | 26 + .../templates/Footer.ftl | 0 .../templates/Header.ftl | 58 ++ .../templates/Submit.sh.ftl | 8 + .../templates/templ-cluster.ftl | 18 + .../templates/templ-download-grid.ftl | 3 + .../templates/templ-errorfile.ftl | 1 + .../templates/templ-exe-grid-dir.ftl | 1 + .../templates/templ-exe-grid.ftl | 7 + .../templates/templ-extrafile.ftl | 1 + .../templates/templ-jdl-grid.ftl | 19 + .../templates/templ-logfile.ftl | 1 + .../templates/templ-outfile.ftl | 1 + .../templates/templ-submit.ftl | 4 + .../templates/templ-upload-grid.ftl | 6 + .../templates/test_template.ftl | 18 + compute4/RNA-seq_sample_phasing/workflow.csv | 2 + .../RNA-seq_sample_phasing/workflowCall.csv | 2 + .../workflowCreateFileList.csv | 2 + .../RNA-seq_sample_phasing/workflowMerge.csv | 2 + compute4/STAR_RNA-seq_mapping/parameters.csv | 13 +- .../protocols/STARMapping.ftl | 44 +- .../protocols/SortFilterBam.ftl | 11 +- .../STAR_RNA-seq_mapping/templates/Header.ftl | 2 +- compute4/STAR_RNA-seq_mapping/workflow.csv | 2 +- .../STAR_RNA-seq_mapping/workflowStep1.csv | 2 + .../STAR_RNA-seq_mapping/workflowStep2.csv | 2 + 62 files changed, 1534 insertions(+), 27 deletions(-) create mode 100644 compute4/RNA-seq_genotype_calling/protocols/CreateSNVMixFileList.ftl create mode 100644 compute4/RNA-seq_genotype_calling/workflowCall.csv create mode 100644 compute4/RNA-seq_genotype_calling/workflowCreateFileList.csv create mode 100644 compute4/RNA-seq_genotype_calling/workflowMerge.csv create mode 100644 compute4/RNA-seq_sample_imputation/b37AllChrBins.csv create mode 100644 compute4/RNA-seq_sample_imputation/example_worksheet.csv create mode 100644 compute4/RNA-seq_sample_imputation/parameters.csv create mode 100644 compute4/RNA-seq_sample_imputation/protocols/impute.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/DataTransfer.sh.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/Footer.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/Header.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/Submit.sh.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-cluster.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-download-grid.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-errorfile.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-exe-grid-dir.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-exe-grid.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-extrafile.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-jdl-grid.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-logfile.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-outfile.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-submit.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/templ-upload-grid.ftl create mode 100644 compute4/RNA-seq_sample_imputation/templates/test_template.ftl create mode 100644 compute4/RNA-seq_sample_imputation/workflow.csv create mode 100644 compute4/RNA-seq_sample_imputation/workflowCall.csv create mode 100644 compute4/RNA-seq_sample_imputation/workflowCreateFileList.csv create mode 100644 compute4/RNA-seq_sample_imputation/workflowMerge.csv create mode 100644 compute4/RNA-seq_sample_phasing/example_worksheet.csv create mode 100644 compute4/RNA-seq_sample_phasing/parameters.csv create mode 100644 compute4/RNA-seq_sample_phasing/protocols/phase.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/DataTransfer.sh.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/Footer.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/Header.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/Submit.sh.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-cluster.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-download-grid.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-errorfile.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-exe-grid-dir.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-exe-grid.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-extrafile.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-jdl-grid.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-logfile.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-outfile.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-submit.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/templ-upload-grid.ftl create mode 100644 compute4/RNA-seq_sample_phasing/templates/test_template.ftl create mode 100644 compute4/RNA-seq_sample_phasing/workflow.csv create mode 100644 compute4/RNA-seq_sample_phasing/workflowCall.csv create mode 100644 compute4/RNA-seq_sample_phasing/workflowCreateFileList.csv create mode 100644 compute4/RNA-seq_sample_phasing/workflowMerge.csv create mode 100644 compute4/STAR_RNA-seq_mapping/workflowStep1.csv create mode 100644 compute4/STAR_RNA-seq_mapping/workflowStep2.csv diff --git a/compute4/RNA-seq_genotype_calling/parameters.csv b/compute4/RNA-seq_genotype_calling/parameters.csv index 65cd04a5..cc51032e 100644 --- a/compute4/RNA-seq_genotype_calling/parameters.csv +++ b/compute4/RNA-seq_genotype_calling/parameters.csv @@ -32,4 +32,4 @@ SNVMix,/target/gpfs2/gcc/home/dasha/tools/SNVMix2-0.11.8-r5/SNVMix2,,, GenotypeCallingJar,/target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar,,, snpList,/target/gpfs2/gcc/home/dasha/resources/hg19/genotypes/1000G/all_snp_positions.txt,,, shapeitversion,v2.644,,, -shapeitBin,${tooldir}/Shapeit-${shapeitversion}/shapeit.v2.r644.linux.x86_64,,string, \ No newline at end of file +shapeitBin,${tooldir}/Shapeit-${shapeitversion}/shapeit.v2.r644.linux.x86_64,,string, diff --git a/compute4/RNA-seq_genotype_calling/protocols/CreateSNVMixFileList.ftl b/compute4/RNA-seq_genotype_calling/protocols/CreateSNVMixFileList.ftl new file mode 100644 index 00000000..180bde99 --- /dev/null +++ b/compute4/RNA-seq_genotype_calling/protocols/CreateSNVMixFileList.ftl @@ -0,0 +1,48 @@ +#MOLGENIS walltime=1:00:00 nodes=1 cores=1 mem=4 + +#FOREACH mergedStudy + +genotypeFolder="${genotypeFolder}" +JAVA_HOME="${JAVA_HOME}" + +declare -a samples=(${ssvQuoted(sample)}) +declare -a snvmixOuts=(${ssvQuoted(snvmixOut)}) +<#noparse> + +mkdir -p ${genotypeFolder} + +echo "genotypeFolder=${genotypeFolder}" +echo "snvMixOuts=${snvmixOuts[*]}" +echo "samples=${samples[*]}" + +rm -f ${genotypeFolder}/fileList.txt + + + +declare -a samplesProcessed=() + +for (( i = 0 ; i < ${#samples[@]} ; i++ )) +do + + for processedSample in ${samplesProcessed[@]} + do + if [ $processedSample == ${samples[$i]} ] + then + continue 2 + fi + done + + samplesProcessed=("${samplesProcessed[@]}" "${samples[$i]}") + echo -e "sample:${samples[$i]}\tgenotype file:${snvmixOuts[$i]}" + + if [ -f ${snvmixOuts[$i]} ] + then + echo -e "${samples[$i]}\t${snvmixOuts[$i]}" >> ${genotypeFolder}/fileList.txt + else + echo "Skipping sample ${samples[$i]} no snvmix output" + fi + + +done + + diff --git a/compute4/RNA-seq_genotype_calling/protocols/SNVMix.ftl b/compute4/RNA-seq_genotype_calling/protocols/SNVMix.ftl index a9d3066d..d570084d 100644 --- a/compute4/RNA-seq_genotype_calling/protocols/SNVMix.ftl +++ b/compute4/RNA-seq_genotype_calling/protocols/SNVMix.ftl @@ -83,7 +83,7 @@ returnCode=$? echo "return code snvMix ${returnCode}" -count=`cut -f 1 -d ":" ${snvmixOut}___tmp___ | uniq | wc -l` + count=`cut -f 1 -d ":" ${snvmixOut}___tmp___ | uniq | wc -l` if [ $count -ge 22 ] then diff --git a/compute4/RNA-seq_genotype_calling/workflowCall.csv b/compute4/RNA-seq_genotype_calling/workflowCall.csv new file mode 100644 index 00000000..ca8bc0fa --- /dev/null +++ b/compute4/RNA-seq_genotype_calling/workflowCall.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +SNVMix,SNVMix, diff --git a/compute4/RNA-seq_genotype_calling/workflowCreateFileList.csv b/compute4/RNA-seq_genotype_calling/workflowCreateFileList.csv new file mode 100644 index 00000000..985b4ec0 --- /dev/null +++ b/compute4/RNA-seq_genotype_calling/workflowCreateFileList.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +CreateSNVMixFileList,CreateSNVMixFileList, diff --git a/compute4/RNA-seq_genotype_calling/workflowMerge.csv b/compute4/RNA-seq_genotype_calling/workflowMerge.csv new file mode 100644 index 00000000..c7c2d4a5 --- /dev/null +++ b/compute4/RNA-seq_genotype_calling/workflowMerge.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +ConvertSNVMixToGen,ConvertSNVMixToGen, diff --git a/compute4/RNA-seq_quantify_gene_level/parameters.csv b/compute4/RNA-seq_quantify_gene_level/parameters.csv index f14ed02d..40db80b8 100644 --- a/compute4/RNA-seq_quantify_gene_level/parameters.csv +++ b/compute4/RNA-seq_quantify_gene_level/parameters.csv @@ -9,8 +9,8 @@ jobname,jobname,,string, #,,,, home,/target/gpfs2/gcc/home/dasha/,,, root,/target/gpfs2/gcc/,the root to your tools and data,string, -bashrc,${root}/gcc.bashrc,,, -toolDir,${root}tools/,root Dir for tools,string, +bashrc,/gcc/groups/gcc/home/gcc.bashrc,,, +toolDir,/gcc/tools/,root Dir for tools,string, #,,,, studyEna,,,, mergedStudy,,,,expressionFolder @@ -30,3 +30,4 @@ JAVA_HOME,${toolDir}/jdk/,,, samtools,${toolDir}samtools-0.1.18/samtools,,, htseq_count,/target/gpfs2/gcc/home/dasha/tools/HTSeq-0.5.4p3/HTSeq/scripts/count.py,,, processReadCountsJar,/target/gpfs2/gcc/home/dasha/scripts/processReadCounts/ProcessReadCounts/dist/ProcessReadCounts.jar,,, +python,python,,, diff --git a/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq_count.ftl b/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq_count.ftl index 3fccd400..6fd1fa8e 100644 --- a/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq_count.ftl +++ b/compute4/RNA-seq_quantify_gene_level/protocols/HTSeq_count.ftl @@ -5,6 +5,7 @@ htseq_count="${htseq_count}" annotationGtf="${annotationGtf}" txtExpression="${txtExpression}" samtools=${samtools} +python=${python} <#noparse> @@ -18,14 +19,14 @@ ${samtools} \ sort \ -n \ ${sortedBam} \ - ${sortedBam%bam}byName + ${TMPDIR}/nameSorted.bam echo -e "\nQuantifying expression" if ${samtools} \ view -h \ - ${sortedBam%bam}byName.bam | \ + ${TMPDIR}/nameSorted.bam | \ /target/gpfs2/gcc/tools/Python-2.7.3/bin/python \ ${htseq_count} \ -m union \ diff --git a/compute4/RNA-seq_sample_imputation/b37AllChrBins.csv b/compute4/RNA-seq_sample_imputation/b37AllChrBins.csv new file mode 100644 index 00000000..a9d0ed6f --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/b37AllChrBins.csv @@ -0,0 +1,590 @@ +chr fromChrPos toChrPos +1 1 5000000 +1 5000001 10000000 +1 10000001 15000000 +1 15000001 20000000 +1 20000001 25000000 +1 25000001 30000000 +1 30000001 35000000 +1 35000001 40000000 +1 40000001 45000000 +1 45000001 50000000 +1 50000001 55000000 +1 55000001 60000000 +1 60000001 65000000 +1 65000001 70000000 +1 70000001 75000000 +1 75000001 80000000 +1 80000001 85000000 +1 85000001 90000000 +1 90000001 95000000 +1 95000001 100000000 +1 100000001 105000000 +1 105000001 110000000 +1 110000001 115000000 +1 115000001 120000000 +1 120000001 125000000 +1 125000001 130000000 +1 130000001 135000000 +1 135000001 140000000 +1 140000001 145000000 +1 145000001 150000000 +1 150000001 155000000 +1 155000001 160000000 +1 160000001 165000000 +1 165000001 170000000 +1 170000001 175000000 +1 175000001 180000000 +1 180000001 185000000 +1 185000001 190000000 +1 190000001 195000000 +1 195000001 200000000 +1 200000001 205000000 +1 205000001 210000000 +1 210000001 215000000 +1 215000001 220000000 +1 220000001 225000000 +1 225000001 230000000 +1 230000001 235000000 +1 235000001 240000000 +1 240000001 245000000 +1 245000001 250000000 +2 1 5000000 +2 5000001 10000000 +2 10000001 15000000 +2 15000001 20000000 +2 20000001 25000000 +2 25000001 30000000 +2 30000001 35000000 +2 35000001 40000000 +2 40000001 45000000 +2 45000001 50000000 +2 50000001 55000000 +2 55000001 60000000 +2 60000001 65000000 +2 65000001 70000000 +2 70000001 75000000 +2 75000001 80000000 +2 80000001 85000000 +2 85000001 90000000 +2 90000001 95000000 +2 95000001 100000000 +2 100000001 105000000 +2 105000001 110000000 +2 110000001 115000000 +2 115000001 120000000 +2 120000001 125000000 +2 125000001 130000000 +2 130000001 135000000 +2 135000001 140000000 +2 140000001 145000000 +2 145000001 150000000 +2 150000001 155000000 +2 155000001 160000000 +2 160000001 165000000 +2 165000001 170000000 +2 170000001 175000000 +2 175000001 180000000 +2 180000001 185000000 +2 185000001 190000000 +2 190000001 195000000 +2 195000001 200000000 +2 200000001 205000000 +2 205000001 210000000 +2 210000001 215000000 +2 215000001 220000000 +2 220000001 225000000 +2 225000001 230000000 +2 230000001 235000000 +2 235000001 240000000 +2 240000001 245000000 +3 1 5000000 +3 5000001 10000000 +3 10000001 15000000 +3 15000001 20000000 +3 20000001 25000000 +3 25000001 30000000 +3 30000001 35000000 +3 35000001 40000000 +3 40000001 45000000 +3 45000001 50000000 +3 50000001 55000000 +3 55000001 60000000 +3 60000001 65000000 +3 65000001 70000000 +3 70000001 75000000 +3 75000001 80000000 +3 80000001 85000000 +3 85000001 90000000 +3 90000001 95000000 +3 95000001 100000000 +3 100000001 105000000 +3 105000001 110000000 +3 110000001 115000000 +3 115000001 120000000 +3 120000001 125000000 +3 125000001 130000000 +3 130000001 135000000 +3 135000001 140000000 +3 140000001 145000000 +3 145000001 150000000 +3 150000001 155000000 +3 155000001 160000000 +3 160000001 165000000 +3 165000001 170000000 +3 170000001 175000000 +3 175000001 180000000 +3 180000001 185000000 +3 185000001 190000000 +3 190000001 195000000 +3 195000001 200000000 +4 1 5000000 +4 5000001 10000000 +4 10000001 15000000 +4 15000001 20000000 +4 20000001 25000000 +4 25000001 30000000 +4 30000001 35000000 +4 35000001 40000000 +4 40000001 45000000 +4 45000001 50000000 +4 50000001 55000000 +4 55000001 60000000 +4 60000001 65000000 +4 65000001 70000000 +4 70000001 75000000 +4 75000001 80000000 +4 80000001 85000000 +4 85000001 90000000 +4 90000001 95000000 +4 95000001 100000000 +4 100000001 105000000 +4 105000001 110000000 +4 110000001 115000000 +4 115000001 120000000 +4 120000001 125000000 +4 125000001 130000000 +4 130000001 135000000 +4 135000001 140000000 +4 140000001 145000000 +4 145000001 150000000 +4 150000001 155000000 +4 155000001 160000000 +4 160000001 165000000 +4 165000001 170000000 +4 170000001 175000000 +4 175000001 180000000 +4 180000001 185000000 +4 185000001 190000000 +4 190000001 195000000 +5 1 5000000 +5 5000001 10000000 +5 10000001 15000000 +5 15000001 20000000 +5 20000001 25000000 +5 25000001 30000000 +5 30000001 35000000 +5 35000001 40000000 +5 40000001 45000000 +5 45000001 50000000 +5 50000001 55000000 +5 55000001 60000000 +5 60000001 65000000 +5 65000001 70000000 +5 70000001 75000000 +5 75000001 80000000 +5 80000001 85000000 +5 85000001 90000000 +5 90000001 95000000 +5 95000001 100000000 +5 100000001 105000000 +5 105000001 110000000 +5 110000001 115000000 +5 115000001 120000000 +5 120000001 125000000 +5 125000001 130000000 +5 130000001 135000000 +5 135000001 140000000 +5 140000001 145000000 +5 145000001 150000000 +5 150000001 155000000 +5 155000001 160000000 +5 160000001 165000000 +5 165000001 170000000 +5 170000001 175000000 +5 175000001 180000000 +5 180000001 185000000 +6 1 5000000 +6 5000001 10000000 +6 10000001 15000000 +6 15000001 20000000 +6 20000001 25000000 +6 25000001 30000000 +6 30000001 35000000 +6 35000001 40000000 +6 40000001 45000000 +6 45000001 50000000 +6 50000001 55000000 +6 55000001 60000000 +6 60000001 65000000 +6 65000001 70000000 +6 70000001 75000000 +6 75000001 80000000 +6 80000001 85000000 +6 85000001 90000000 +6 90000001 95000000 +6 95000001 100000000 +6 100000001 105000000 +6 105000001 110000000 +6 110000001 115000000 +6 115000001 120000000 +6 120000001 125000000 +6 125000001 130000000 +6 130000001 135000000 +6 135000001 140000000 +6 140000001 145000000 +6 145000001 150000000 +6 150000001 155000000 +6 155000001 160000000 +6 160000001 165000000 +6 165000001 170000000 +6 170000001 175000000 +7 1 5000000 +7 5000001 10000000 +7 10000001 15000000 +7 15000001 20000000 +7 20000001 25000000 +7 25000001 30000000 +7 30000001 35000000 +7 35000001 40000000 +7 40000001 45000000 +7 45000001 50000000 +7 50000001 55000000 +7 55000001 60000000 +7 60000001 65000000 +7 65000001 70000000 +7 70000001 75000000 +7 75000001 80000000 +7 80000001 85000000 +7 85000001 90000000 +7 90000001 95000000 +7 95000001 100000000 +7 100000001 105000000 +7 105000001 110000000 +7 110000001 115000000 +7 115000001 120000000 +7 120000001 125000000 +7 125000001 130000000 +7 130000001 135000000 +7 135000001 140000000 +7 140000001 145000000 +7 145000001 150000000 +7 150000001 155000000 +7 155000001 160000000 +8 1 5000000 +8 5000001 10000000 +8 10000001 15000000 +8 15000001 20000000 +8 20000001 25000000 +8 25000001 30000000 +8 30000001 35000000 +8 35000001 40000000 +8 40000001 45000000 +8 45000001 50000000 +8 50000001 55000000 +8 55000001 60000000 +8 60000001 65000000 +8 65000001 70000000 +8 70000001 75000000 +8 75000001 80000000 +8 80000001 85000000 +8 85000001 90000000 +8 90000001 95000000 +8 95000001 100000000 +8 100000001 105000000 +8 105000001 110000000 +8 110000001 115000000 +8 115000001 120000000 +8 120000001 125000000 +8 125000001 130000000 +8 130000001 135000000 +8 135000001 140000000 +8 140000001 145000000 +8 145000001 150000000 +9 1 5000000 +9 5000001 10000000 +9 10000001 15000000 +9 15000001 20000000 +9 20000001 25000000 +9 25000001 30000000 +9 30000001 35000000 +9 35000001 40000000 +9 40000001 45000000 +9 45000001 50000000 +9 50000001 55000000 +9 55000001 60000000 +9 60000001 65000000 +9 65000001 70000000 +9 70000001 75000000 +9 75000001 80000000 +9 80000001 85000000 +9 85000001 90000000 +9 90000001 95000000 +9 95000001 100000000 +9 100000001 105000000 +9 105000001 110000000 +9 110000001 115000000 +9 115000001 120000000 +9 120000001 125000000 +9 125000001 130000000 +9 130000001 135000000 +9 135000001 140000000 +9 140000001 145000000 +10 1 5000000 +10 5000001 10000000 +10 10000001 15000000 +10 15000001 20000000 +10 20000001 25000000 +10 25000001 30000000 +10 30000001 35000000 +10 35000001 40000000 +10 40000001 45000000 +10 45000001 50000000 +10 50000001 55000000 +10 55000001 60000000 +10 60000001 65000000 +10 65000001 70000000 +10 70000001 75000000 +10 75000001 80000000 +10 80000001 85000000 +10 85000001 90000000 +10 90000001 95000000 +10 95000001 100000000 +10 100000001 105000000 +10 105000001 110000000 +10 110000001 115000000 +10 115000001 120000000 +10 120000001 125000000 +10 125000001 130000000 +10 130000001 135000000 +10 135000001 140000000 +11 1 5000000 +11 5000001 10000000 +11 10000001 15000000 +11 15000001 20000000 +11 20000001 25000000 +11 25000001 30000000 +11 30000001 35000000 +11 35000001 40000000 +11 40000001 45000000 +11 45000001 50000000 +11 50000001 55000000 +11 55000001 60000000 +11 60000001 65000000 +11 65000001 70000000 +11 70000001 75000000 +11 75000001 80000000 +11 80000001 85000000 +11 85000001 90000000 +11 90000001 95000000 +11 95000001 100000000 +11 100000001 105000000 +11 105000001 110000000 +11 110000001 115000000 +11 115000001 120000000 +11 120000001 125000000 +11 125000001 130000000 +11 130000001 135000000 +11 135000001 140000000 +12 1 5000000 +12 5000001 10000000 +12 10000001 15000000 +12 15000001 20000000 +12 20000001 25000000 +12 25000001 30000000 +12 30000001 35000000 +12 35000001 40000000 +12 40000001 45000000 +12 45000001 50000000 +12 50000001 55000000 +12 55000001 60000000 +12 60000001 65000000 +12 65000001 70000000 +12 70000001 75000000 +12 75000001 80000000 +12 80000001 85000000 +12 85000001 90000000 +12 90000001 95000000 +12 95000001 100000000 +12 100000001 105000000 +12 105000001 110000000 +12 110000001 115000000 +12 115000001 120000000 +12 120000001 125000000 +12 125000001 130000000 +12 130000001 135000000 +13 1 5000000 +13 5000001 10000000 +13 10000001 15000000 +13 15000001 20000000 +13 20000001 25000000 +13 25000001 30000000 +13 30000001 35000000 +13 35000001 40000000 +13 40000001 45000000 +13 45000001 50000000 +13 50000001 55000000 +13 55000001 60000000 +13 60000001 65000000 +13 65000001 70000000 +13 70000001 75000000 +13 75000001 80000000 +13 80000001 85000000 +13 85000001 90000000 +13 90000001 95000000 +13 95000001 100000000 +13 100000001 105000000 +13 105000001 110000000 +13 110000001 115000000 +13 115000001 120000000 +14 1 5000000 +14 5000001 10000000 +14 10000001 15000000 +14 15000001 20000000 +14 20000001 25000000 +14 25000001 30000000 +14 30000001 35000000 +14 35000001 40000000 +14 40000001 45000000 +14 45000001 50000000 +14 50000001 55000000 +14 55000001 60000000 +14 60000001 65000000 +14 65000001 70000000 +14 70000001 75000000 +14 75000001 80000000 +14 80000001 85000000 +14 85000001 90000000 +14 90000001 95000000 +14 95000001 100000000 +14 100000001 105000000 +14 105000001 110000000 +15 1 5000000 +15 5000001 10000000 +15 10000001 15000000 +15 15000001 20000000 +15 20000001 25000000 +15 25000001 30000000 +15 30000001 35000000 +15 35000001 40000000 +15 40000001 45000000 +15 45000001 50000000 +15 50000001 55000000 +15 55000001 60000000 +15 60000001 65000000 +15 65000001 70000000 +15 70000001 75000000 +15 75000001 80000000 +15 80000001 85000000 +15 85000001 90000000 +15 90000001 95000000 +15 95000001 100000000 +15 100000001 105000000 +16 1 5000000 +16 5000001 10000000 +16 10000001 15000000 +16 15000001 20000000 +16 20000001 25000000 +16 25000001 30000000 +16 30000001 35000000 +16 35000001 40000000 +16 40000001 45000000 +16 45000001 50000000 +16 50000001 55000000 +16 55000001 60000000 +16 60000001 65000000 +16 65000001 70000000 +16 70000001 75000000 +16 75000001 80000000 +16 80000001 85000000 +16 85000001 90000000 +16 90000001 95000000 +17 1 5000000 +17 5000001 10000000 +17 10000001 15000000 +17 15000001 20000000 +17 20000001 25000000 +17 25000001 30000000 +17 30000001 35000000 +17 35000001 40000000 +17 40000001 45000000 +17 45000001 50000000 +17 50000001 55000000 +17 55000001 60000000 +17 60000001 65000000 +17 65000001 70000000 +17 70000001 75000000 +17 75000001 80000000 +17 80000001 85000000 +18 1 5000000 +18 5000001 10000000 +18 10000001 15000000 +18 15000001 20000000 +18 20000001 25000000 +18 25000001 30000000 +18 30000001 35000000 +18 35000001 40000000 +18 40000001 45000000 +18 45000001 50000000 +18 50000001 55000000 +18 55000001 60000000 +18 60000001 65000000 +18 65000001 70000000 +18 70000001 75000000 +18 75000001 80000000 +19 1 5000000 +19 5000001 10000000 +19 10000001 15000000 +19 15000001 20000000 +19 20000001 25000000 +19 25000001 30000000 +19 30000001 35000000 +19 35000001 40000000 +19 40000001 45000000 +19 45000001 50000000 +19 50000001 55000000 +19 55000001 60000000 +20 1 5000000 +20 5000001 10000000 +20 10000001 15000000 +20 15000001 20000000 +20 20000001 25000000 +20 25000001 30000000 +20 30000001 35000000 +20 35000001 40000000 +20 40000001 45000000 +20 45000001 50000000 +20 50000001 55000000 +20 55000001 60000000 +20 60000001 65000000 +21 1 5000000 +21 5000001 10000000 +21 10000001 15000000 +21 15000001 20000000 +21 20000001 25000000 +21 25000001 30000000 +21 30000001 35000000 +21 35000001 40000000 +21 40000001 45000000 +21 45000001 50000000 +22 1 5000000 +22 5000001 10000000 +22 10000001 15000000 +22 15000001 20000000 +22 20000001 25000000 +22 25000001 30000000 +22 30000001 35000000 +22 35000001 40000000 +22 40000001 45000000 +22 45000001 50000000 +22 50000001 55000000 diff --git a/compute4/RNA-seq_sample_imputation/example_worksheet.csv b/compute4/RNA-seq_sample_imputation/example_worksheet.csv new file mode 100644 index 00000000..0162d1cf --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/example_worksheet.csv @@ -0,0 +1 @@ +studyEna,mergedStudy,sample,run,baseFolder diff --git a/compute4/RNA-seq_sample_imputation/parameters.csv b/compute4/RNA-seq_sample_imputation/parameters.csv new file mode 100644 index 00000000..fb797dd5 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/parameters.csv @@ -0,0 +1,40 @@ +Name,defaultValue,description,dataType,hasOne_name +clusterQueue,gcc,,, +scheduler,PBS,,, +mem,4,Memory in GB,, +walltime,23:59:00,,, +cores,1,,, +defaultInterpreter,#!/bin/bash,,, +jobname,jobname,,string, +#,,,, +home,/target/gpfs2/gcc/home/dasha/,,, +root,/target/gpfs2/gcc/,the root to your tools and data,string, +bashrc,${root}/gcc.bashrc,,, +tooldir,${root}tools/,root Dir for tools,string, +#,,,, +studyEna,,,, +mergedStudy,,,, +sample,,,,"mergedBam,snvmixOut,mergedStudy,mpileupFile,genotypeFolder,phasedFolder,chunkFile,imputationFolder" +run,,,,"sortedBam,sample,mergedBam,snvmixOut" +baseFolder,,,, +mpileupFile,${baseFolder}/${studyEna}/${sample}/${sample}MergedRuns.mpileup,,, +snvmixOut,${mpileupFile}.snvmix,,,sample +sortedBam,${baseFolder}/${studyEna}/${sample}/${run}/${run}Aligned.out.sorted.bam,,, +genotypeFolder,${baseFolder}/${studyEna}/${sample}/gen/,,, +mergedBam,${baseFolder}/${studyEna}/${sample}/${sample}MergedRuns.bam,,, +#,,,, +faFile,/target/gpfs2/gcc/home/dasha/resources/hg19/indices/human_g1k_v37.fa,,, +#,,,, +JAVA_HOME,${tooldir}/jdk/,,, +#,,,, +samtools,${tooldir}samtools-0.1.18/samtools,,, +SNVMix,/target/gpfs2/gcc/home/dasha/tools/SNVMix2-0.11.8-r5/SNVMix2,,, +GenotypeCallingJar,/target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar,,, +snpList,/target/gpfs2/gcc/home/dasha/resources/hg19/genotypes/1000G/all_snp_positions.txt,,, +shapeitversion,v2.644,,, +shapeitBin,${tooldir}/Shapeit-${shapeitversion}/shapeit.v2.r644.linux.x86_64,,string, +phasedFolder,${baseFolder}/${studyEna}/${sample}/phased/,,, +chunkFile,/target/gpfs2/gcc/home/pdeelen/Projects/RnaSeqEqtl/compute-pipelines/RNA-seq_sample_imputation/b37AllChrBins.csv,,, +imputationFolder,${baseFolder}/${studyEna}/${sample}/imputed/,,, +impute2Bin,${tooldir}/impute_${impute2version}/impute2,,string, +impute2version,v2.3.0_x86_64_static,,, \ No newline at end of file diff --git a/compute4/RNA-seq_sample_imputation/protocols/impute.ftl b/compute4/RNA-seq_sample_imputation/protocols/impute.ftl new file mode 100644 index 00000000..3670a2ca --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/protocols/impute.ftl @@ -0,0 +1,274 @@ +#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=2 + +#FOREACH sample + + +phasedFolder="${phasedFolder}" +imputationFolder="${imputationFolder}" +JAVA_HOME="${JAVA_HOME}" +tooldir="${tooldir}" +chunkFile="${chunkFile}" +impute2Bin="${impute2Bin}" + +sample="${sample}" +snvmixOut="${snvmixOut}" + +<#noparse> + + + +echo "phasedFolder=${phasedFolder}" +echo "sample=${sample}" + +additonalImpute2Param="-Ne 20000 -k_hap 1500" + +imputationIntermediatesFolder="${imputationFolder}/imputationChunks/" + + +mkdir -p $imputationIntermediatesFolder + +IFS=$'\r\n' + +chunkFileLines=($(cat ${chunkFile})) + +unset IFS + +#remove header +unset chunkFileLines[0] + +#loop over the chunkFileLines +for chunkLine in "${chunkFileLines[@]}" +do + + + chunkElements=(${chunkLine}) + + chr=${chunkElements[0]} + fromChrPos=${chunkElements[1]} + toChrPos=${chunkElements[2]} + + echo "chr: ${chr}" + echo "fromChrPos: ${fromChrPos}" + echo "toChrPos: ${toChrPos}" + + tmpOutput="${imputationIntermediatesFolder}/~chr${chr}_${fromChrPos}-${toChrPos}" + finalOutput="${imputationIntermediatesFolder}/chr${chr}_${fromChrPos}-${toChrPos}" + + #Skip if chunk is imputed + if [ -f "${finalOutput}" ] && [ -f "${finalOutput}_info" ] + then + echo "skipping chunk" + continue + fi + + known_haps_g="${phasedFolder}/${sample}chr${chr}.haps" + m="/target/gpfs2/gcc/resources/geneticMap/hapmapPhase2/b37/genetic_map_chr${chr}_combined_b37.txt" + h="/target/gpfs2/gcc/resources/impute2Reference/gonl5/chr${chr}.hap.gz" + l="/target/gpfs2/gcc/resources/impute2Reference/gonl5/chr${chr}.legend.gz" + + # + # + ## + ### Start old imputation script + ## + # + # + + inputs $known_haps_g + inputs $m + inputs $h + inputs $l + + $impute2Bin \ + -known_haps_g $known_haps_g \ + -m $m \ + -h $h \ + -l $l \ + -int $fromChrPos $toChrPos \ + -o $tmpOutput \ + -use_prephased_g \ + $additonalImpute2Param + + #Get return code from last program call + returnCode=$? + + echo "returnCode impute2: ${returnCode}" + + if [ $returnCode -eq 0 ] + then + + #If there are no SNPs in this bin we will create empty files + if [ ! -f ${tmpOutput}_info ] + then + + echo "Impute2 did not output files. Usually this means there where no SNPs in this region so, generate empty files" + echo "Touching file: ${tmpOutput}" + echo "Touching file: ${tmpOutput}_info" + echo "Touching file: ${tmpOutput}_info_by_sample" + + touch ${tmpOutput} + touch ${tmpOutput}_info + touch ${tmpOutput}_info_by_sample + + fi + + + + echo -e "\nMoving temp files to final files\n\n" + + for tempFile in ${tmpOutput}* ; do + finalFile=`echo $tempFile | sed -e "s/~//g"` + echo "Moving temp file: ${tempFile} to ${finalFile}" + mv $tempFile $finalFile + putFile $finalFile + done + + elif [ `grep "ERROR: There are no type 2 SNPs after applying the command-line settings for this run" ${tmpOutput}_summary | wc -l | awk '{print $1}'` == 1 ] + then + + if [ ! -f ${tmpOutput}_info ] + then + echo "Impute2 found no type 2 SNPs in this region. We now create empty output" + echo "Touching file: ${tmpOutput}" + echo "Touching file: ${tmpOutput}_info" + echo "Touching file: ${tmpOutput}_info_by_sample" + + touch ${tmpOutput} + touch ${tmpOutput}_info + touch ${tmpOutput}_info_by_sample + + fi + + echo -e "\nMoving temp files to final files\n\n" + + for tempFile in ${tmpOutput}* ; do + finalFile=`echo $tempFile | sed -e "s/~//g"` + echo "Moving temp file: ${tempFile} to ${finalFile}" + mv $tempFile $finalFile + putFile $finalFile + done + + + else + + echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n" + #Return non zero return code + exit 1 + + fi + + # + # + ## + ### End old imputation script + ## + # + # + + +done + + +# +# +## +### Start Concat +## +# +# + +# Delete old chunk files +for chr in {1..22} +do + rm -f ${imputationFolder}/~chr${chr} + rm -f ${imputationFolder}/~chr${chr}_info + rm -f ${imputationFolder}/chr${chr} + rm -f ${imputationFolder}/chr${chr}_info +done + +# Header set is false +for chr in {1..22} +do + headerSet[${chr}]="false" +done + +#loop over the chunkFileLines +for chunkLine in "${chunkFileLines[@]}" +do + + chunkElements=(${chunkLine}) + + chr=${chunkElements[0]} + fromChrPos=${chunkElements[1]} + toChrPos=${chunkElements[2]} + + echo "chr: ${chr}" + echo "fromChrPos: ${fromChrPos}" + echo "toChrPos: ${toChrPos}" + + cat ${imputationIntermediatesFolder}/chr${chr}_${fromChrPos}-${toChrPos} >> ${imputationFolder}/~chr${chr} + + returnCode=$? + if [ $returnCode -ne 0 ] + then + echo "Failed to append gen${imputationIntermediatesFolder}/chr${chr}_${fromChrPos}-${toChrPos} to ${imputationFolder}/~chr${chr}" >&2 + exit -1 + fi + + chunkInfoFile="${imputationIntermediatesFolder}/chr${chr}_${fromChrPos}-${toChrPos}_info" + + #Skip empty files + lineCount=`wc -l ${chunkInfoFile} | awk '{print $1}'` + echo "linecount ${lineCount} in: ${chunkInfoFile}" + if [ "$lineCount" -eq "0" ] + then + echo "skipping empty info file: ${chunkInfoFile}" + continue + fi + + #Print header if not yet done needed + if [ "${headerSet[$chr]}" == "false" ] + then + echo "print header from: ${chunkInfoFile}" + head -n 1 < $chunkInfoFile >> ${imputationFolder}/~chr${chr}_info + + returnCode=$? + if [ $returnCode -ne 0 ] + then + echo "Failed to print header of info file ${chunkInfoFile} to ${imputationFolder}/~chr${chr}_info" >&2 + exit -1 + fi + + headerSet[${chr}]="true" + fi + + #Cat without header + tail -n +2 < $chunkInfoFile >> ${imputationFolder}/~chr${chr}_info + + returnCode=$? + if [ $returnCode -ne 0 ] + then + echo "Failed to append info file ${chunkInfoFile} to ${imputationFolder}/~chr${chr}_info" >&2 + exit -1 + fi + + +done + +for chr in {1..22} +do + mv ${imputationFolder}/~chr${chr} ${imputationFolder}/chr${chr} + mv ${imputationFolder}/~chr${chr}_info ${imputationFolder}/chr${chr}_info +done + +# +# +## +### End Concat +## +# +# + + + + diff --git a/compute4/RNA-seq_sample_imputation/templates/DataTransfer.sh.ftl b/compute4/RNA-seq_sample_imputation/templates/DataTransfer.sh.ftl new file mode 100644 index 00000000..6866a5d2 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/DataTransfer.sh.ftl @@ -0,0 +1,26 @@ +<#noparse>#!/bin/bash +getFile() +{ + ARGS=($@) + NUMBER="${#ARGS[@]}"; +if [ "$NUMBER" -eq "1" ] + then + myFile=${ARGS[0]} + + if test ! -e $myFile; + then + echo "WARNING in getFile/putFile: $myFile is missing" 1>&2 + fi + + else + echo "Example usage: getData \"\$TMPDIR/datadir/myfile.txt\"" + fi +} + +putFile() +{ + `getFile $@` +} + +export -f getFile +export -f putFile \ No newline at end of file diff --git a/compute4/RNA-seq_sample_imputation/templates/Footer.ftl b/compute4/RNA-seq_sample_imputation/templates/Footer.ftl new file mode 100644 index 00000000..e69de29b diff --git a/compute4/RNA-seq_sample_imputation/templates/Header.ftl b/compute4/RNA-seq_sample_imputation/templates/Header.ftl new file mode 100644 index 00000000..f105b577 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/Header.ftl @@ -0,0 +1,58 @@ +#!/bin/bash +#PBS -N ${jobname} +#PBS -q ${clusterQueue} +#PBS -l nodes=1:ppn=${cores} +#PBS -l walltime=${walltime} +#PBS -l mem=${mem} +#PBS -e ${jobname}.err +#PBS -o ${jobname}.out +#PBS -W umask=0057 + +# Configures the GCC bash environment +. ${root}/gcc.bashrc + +<#function ssvQuoted items> + <#local result = ""> + <#list items as item> + <#if item_index != 0> + <#local result = result + " "> + + <#local result = result + "\"" + item + "\""> + + <#return result> + + + +inputs() +{ + for name in $@ + do + if test ! -e $name; + then + echo "$name is missing" 1>&2 + exit 1; + fi + done +} + +alloutputsexist() +{ + all_exist=true + for name in $@ + do + if test ! -e $name; + then + all_exist=false + fi + done + if $all_exist; + then + echo "skipped" + echo "skipped" 1>&2 + sleep 30 + exit 0; + else + return; + fi +} + diff --git a/compute4/RNA-seq_sample_imputation/templates/Submit.sh.ftl b/compute4/RNA-seq_sample_imputation/templates/Submit.sh.ftl new file mode 100644 index 00000000..c9f3598a --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/Submit.sh.ftl @@ -0,0 +1,8 @@ + +<#foreach j in jobs> +#${j.name} +${j.name}=$(qsub -N ${j.name}<#if j.prevSteps_Name?size > 0> -W depend=afterok<#foreach d in j.prevSteps_Name>:$${d} ${j.name}.sh) +echo $${j.name} +sleep 0 + + diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-cluster.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-cluster.ftl new file mode 100644 index 00000000..1308393e --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-cluster.ftl @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -q ${clusterqueue} +#PBS -l nodes=1:ppn=${cores} +#PBS -l walltime=${walltime} +#PBS -l mem=${memory}gb +#PBS -e ${location}/err/err_${scriptID}.err +#PBS -o ${location}/out/out_${scriptID}.out +mkdir -p ${location}/err +mkdir -p ${location}/out +printf "${scriptID}_started " >>${location}/log_${jobID}.txt +date "+DATE: %m/%d/%y%tTIME: %H:%M:%S" >>${location}/log_${jobID}.txt +date "+start time: %m/%d/%y%t %H:%M:%S" >>${location}/extra/${scriptID}.txt +echo running on node: `hostname` >>${location}/extra/${scriptID}.txt +${actualcommand} +${verificationcommand} +printf "${scriptID}_finished " >>${location}/log_${jobID}.txt +date "+finish time: %m/%d/%y%t %H:%M:%S" >>${location}/extra/${scriptID}.txt +date "+DATE: %m/%d/%y%tTIME: %H:%M:%S" >>${location}/log_${jobID}.txt diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-download-grid.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-download-grid.ftl new file mode 100644 index 00000000..8313d257 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-download-grid.ftl @@ -0,0 +1,3 @@ +#download input data +lcg-cp lfn://grid/lsgrid/${srm_name} \ +file:///$TMPDIR/${just_name} diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-errorfile.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-errorfile.ftl new file mode 100644 index 00000000..cfe02a7a --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-errorfile.ftl @@ -0,0 +1 @@ +${location}/err/err_${scriptID}.err \ No newline at end of file diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-exe-grid-dir.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-exe-grid-dir.ftl new file mode 100644 index 00000000..e88240ae --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-exe-grid-dir.ftl @@ -0,0 +1 @@ +$TMPDIR/ \ No newline at end of file diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-exe-grid.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-exe-grid.ftl new file mode 100644 index 00000000..8a215101 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-exe-grid.ftl @@ -0,0 +1,7 @@ +#download executable +lcg-cp lfn://grid/lsgrid/${srm_name} \ +file:///$TMPDIR/${just_name} +chmod 755 $TMPDIR/${just_name} + +/bin/hostname + diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-extrafile.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-extrafile.ftl new file mode 100644 index 00000000..f8e1aa39 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-extrafile.ftl @@ -0,0 +1 @@ +${location}/extra/${scriptID}.txt \ No newline at end of file diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-jdl-grid.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-jdl-grid.ftl new file mode 100644 index 00000000..fd157e20 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-jdl-grid.ftl @@ -0,0 +1,19 @@ +Type="Job"; +JobType="Normal"; + +Executable = "/bin/sh"; +Arguments = "${script_name}.sh"; + +StdError = "${error_log}"; +StdOutput = "${output_log}"; + +InputSandbox = {"${script_location}/${script_name}.sh${extra_inputs}"}; +OutputSandbox = {"${error_log}","${output_log}"${extra_outputs}}; + +Requirements = other.GlueCEUniqueID == "ce.grid.rug.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="gb-ce-tud.ewi.tudelft.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="gb-ce-nki.els.sara.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="gb-ce-lumc.lumc.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="gb-ce-rug.sara.usor.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="gb-ce-ams.els.sara.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="creamce.grid.rug.nl:8443/cream-pbs-medium"; diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-logfile.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-logfile.ftl new file mode 100644 index 00000000..4efcfb97 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-logfile.ftl @@ -0,0 +1 @@ +${location}/log_${jobID}.txt \ No newline at end of file diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-outfile.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-outfile.ftl new file mode 100644 index 00000000..8a88f9a6 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-outfile.ftl @@ -0,0 +1 @@ +${location}/out/out_${scriptID}.out \ No newline at end of file diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-submit.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-submit.ftl new file mode 100644 index 00000000..d5a52954 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-submit.ftl @@ -0,0 +1,4 @@ +#job_${submitID} +job_${submitID}=$(qsub -N ${scriptID} ${dependancy} ${scriptID}.sh) +echo $job_${submitID} +sleep 8 diff --git a/compute4/RNA-seq_sample_imputation/templates/templ-upload-grid.ftl b/compute4/RNA-seq_sample_imputation/templates/templ-upload-grid.ftl new file mode 100644 index 00000000..abcdb97d --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/templ-upload-grid.ftl @@ -0,0 +1,6 @@ +#upload result data; +#created an empty file first to avoid uploading error if file is not created +echo -n "" >> file:///$TMPDIR/${just_name} +lcg-cr -l lfn://grid/lsgrid/${srm_name} \ +file:///$TMPDIR/${just_name} + diff --git a/compute4/RNA-seq_sample_imputation/templates/test_template.ftl b/compute4/RNA-seq_sample_imputation/templates/test_template.ftl new file mode 100644 index 00000000..817bc310 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/templates/test_template.ftl @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -q ${clusterqueue} +#PBS -l nodes=1:ppn=${cores} +#PBS -l walltime=${walltime} +#PBS -l mem=${memory}gb +#PBS -e ${location}/err/err_${scriptID}.err +#PBS -o ${location}/out/out_${scriptID}.out +mkdir -p ${location}/err +mkdir -p ${location}/out +printf "${scriptID}_started " >>${location}/log_${jobID}.txt +date "+DATE: %m/%d/%y%tTIME: %H:%M:%S" >>${location}/log_${jobID}.txt +date "+start time: %m/%d/%y%t %H:%M:%S" >>${location}/extra/${scriptID}.txt +echo running on node: `hostname` >>${location}/extra/${scriptID}.txt +${actualcommand} +${verificationcommand} +printf "${scriptID}_finished " >>${location}/log_${jobID}.txt +date "+finish time: %m/%d/%y%t %H:%M:%S" >>${location}/extra/${scriptID}.txt +date "+DATE: %m/%d/%y%tTIME: %H:%M:%S" >>${location}/log_${jobID}.txt \ No newline at end of file diff --git a/compute4/RNA-seq_sample_imputation/workflow.csv b/compute4/RNA-seq_sample_imputation/workflow.csv new file mode 100644 index 00000000..4339c812 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/workflow.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +impute,impute, diff --git a/compute4/RNA-seq_sample_imputation/workflowCall.csv b/compute4/RNA-seq_sample_imputation/workflowCall.csv new file mode 100644 index 00000000..ca8bc0fa --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/workflowCall.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +SNVMix,SNVMix, diff --git a/compute4/RNA-seq_sample_imputation/workflowCreateFileList.csv b/compute4/RNA-seq_sample_imputation/workflowCreateFileList.csv new file mode 100644 index 00000000..985b4ec0 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/workflowCreateFileList.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +CreateSNVMixFileList,CreateSNVMixFileList, diff --git a/compute4/RNA-seq_sample_imputation/workflowMerge.csv b/compute4/RNA-seq_sample_imputation/workflowMerge.csv new file mode 100644 index 00000000..c7c2d4a5 --- /dev/null +++ b/compute4/RNA-seq_sample_imputation/workflowMerge.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +ConvertSNVMixToGen,ConvertSNVMixToGen, diff --git a/compute4/RNA-seq_sample_phasing/example_worksheet.csv b/compute4/RNA-seq_sample_phasing/example_worksheet.csv new file mode 100644 index 00000000..0162d1cf --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/example_worksheet.csv @@ -0,0 +1 @@ +studyEna,mergedStudy,sample,run,baseFolder diff --git a/compute4/RNA-seq_sample_phasing/parameters.csv b/compute4/RNA-seq_sample_phasing/parameters.csv new file mode 100644 index 00000000..50b3071e --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/parameters.csv @@ -0,0 +1,36 @@ +Name,defaultValue,description,dataType,hasOne_name +clusterQueue,gcc,,, +scheduler,PBS,,, +mem,4,Memory in GB,, +walltime,23:59:00,,, +cores,1,,, +defaultInterpreter,#!/bin/bash,,, +jobname,jobname,,string, +#,,,, +home,/target/gpfs2/gcc/home/dasha/,,, +root,/target/gpfs2/gcc/,the root to your tools and data,string, +bashrc,${root}/gcc.bashrc,,, +tooldir,${root}tools/,root Dir for tools,string, +#,,,, +studyEna,,,, +mergedStudy,,,, +sample,,,,"mergedBam,snvmixOut,mergedStudy,mpileupFile,genotypeFolder,phasedFolder" +run,,,,"sortedBam,sample,mergedBam,snvmixOut" +baseFolder,,,, +mpileupFile,${baseFolder}/${studyEna}/${sample}/${sample}MergedRuns.mpileup,,, +snvmixOut,${mpileupFile}.snvmix,,,sample +sortedBam,${baseFolder}/${studyEna}/${sample}/${run}/${run}Aligned.out.sorted.bam,,, +genotypeFolder,${baseFolder}/${studyEna}/${sample}/gen/,,, +mergedBam,${baseFolder}/${studyEna}/${sample}/${sample}MergedRuns.bam,,, +#,,,, +faFile,/target/gpfs2/gcc/home/dasha/resources/hg19/indices/human_g1k_v37.fa,,, +#,,,, +JAVA_HOME,${tooldir}/jdk/,,, +#,,,, +samtools,${tooldir}samtools-0.1.18/samtools,,, +SNVMix,/target/gpfs2/gcc/home/dasha/tools/SNVMix2-0.11.8-r5/SNVMix2,,, +GenotypeCallingJar,/target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar,,, +snpList,/target/gpfs2/gcc/home/dasha/resources/hg19/genotypes/1000G/all_snp_positions.txt,,, +shapeitversion,v2.644,,, +shapeitBin,${tooldir}/Shapeit-${shapeitversion}/shapeit.v2.r644.linux.x86_64,,string, +phasedFolder,${baseFolder}/${studyEna}/${sample}/phased/,,, \ No newline at end of file diff --git a/compute4/RNA-seq_sample_phasing/protocols/phase.ftl b/compute4/RNA-seq_sample_phasing/protocols/phase.ftl new file mode 100644 index 00000000..51687963 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/protocols/phase.ftl @@ -0,0 +1,115 @@ +#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=2 + +#FOREACH sample + +genotypeFolder="${genotypeFolder}" +phasedFolder="${phasedFolder}" +shapeitBin="${shapeitBin}" +JAVA_HOME="${JAVA_HOME}" +tooldir="${tooldir}" + +sample="${sample}" +snvmixOut="${snvmixOut}" + +<#noparse> + +mkdir -p ${genotypeFolder} +mkdir -p ${phasedFolder} + +echo "genotypeFolder=${genotypeFolder}" +echo "phasedFolder=${phasedFolder}" +echo "snvMixOuts=${snvmixOut}" +echo "samples=${sample}" + +rm -f ${genotypeFolder}/fileList.txt + + + + +echo -e "${sample}\t${snvmixOut}" >> ${genotypeFolder}/fileList.txt + + + ${JAVA_HOME}/bin/java \ + -Xmx2g \ + -jar /target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar \ + --mode SNVMixToGen \ + --fileList ${genotypeFolder}/fileList.txt \ + --p-value 0.8 \ + --out ${genotypeFolder}/___tmp___${sample}chr + + returnCode=$? + echo "Return code ${returnCode}" + + if [ "${returnCode}" -eq "0" ] + then + + echo "Moving temp files: ${genotypeFolder}/___tmp___${sample}chr* to ${genotypeFolder}/${sample}chr*" + tmpFiles="${genotypeFolder}/___tmp___${sample}chr*" + for f in $tmpFiles + do + mv $f ${f//___tmp___/} + done + + else + + echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n" + #Return non zero return code + exit 1 + + fi + + +for chr in {1..22} +do + + sort -k3,3n ${genotypeFolder}/${sample}chr${chr}.gen > ${genotypeFolder}/${sample}chr${chr}.sorted.gen + + /target/gpfs2/gcc/tools/Shapeit-v2.644/shapeit.v2.r644.linux.x86_64 \ + -check \ + --input-gen ${genotypeFolder}/${sample}chr${chr}.sorted.gen ${genotypeFolder}/${sample}chr.sample \ + --input-ref /target/gpfs2/gcc/resources/impute2Reference/gonl5/chr${chr}.hap.gz /target/gpfs2/gcc/resources/impute2Reference/gonl5/chr${chr}.legend.gz /target/gpfs2/gcc/resources/impute2Reference/gonl5/gonl5.sample\ + --output-log ${genotypeFolder}/${sample}chr${chr}Check \ + --input-thr 0.8 \ + + + + + /target/gpfs2/gcc/tools/Shapeit-v2.644/shapeit.v2.r644.linux.x86_64 \ + --input-gen ${genotypeFolder}/${sample}chr${chr}.sorted.gen ${genotypeFolder}/${sample}chr.sample \ + --input-ref /target/gpfs2/gcc/resources/impute2Reference/gonl5/chr${chr}.hap.gz /target/gpfs2/gcc/resources/impute2Reference/gonl5/chr${chr}.legend.gz /target/gpfs2/gcc/resources/impute2Reference/gonl5/gonl5.sample\ + --output-log ${phasedFolder}/___tmp___${sample}chr${chr} \ + --output-max ${phasedFolder}/___tmp___${sample}chr${chr} \ + --input-map /target/gpfs2/gcc/resources/geneticMap/hapmapPhase2/b37/genetic_map_chr${chr}_combined_b37.txt \ + --input-thr 0.8 \ + --exclude-snp ${genotypeFolder}/${sample}chr${chr}Check.snp.strand.exclude \ + --no-mcmc \ + --thread 1 + + + returnCode=$? + echo "Return code ${returnCode}" + + if [ "${returnCode}" -eq "0" ] + then + + echo "Moving temp files: ${phasedFolder}/___tmp___${sample}chr${chr}* to ${phasedFolder}/${sample}chr${chr}*" + tmpFiles="${phasedFolder}/___tmp___${sample}chr${chr}*" + for f in $tmpFiles + do + mv $f ${f//___tmp___/} + done + + else + + echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n" + #Return non zero return code + exit 1 + + fi + + +done + + + + diff --git a/compute4/RNA-seq_sample_phasing/templates/DataTransfer.sh.ftl b/compute4/RNA-seq_sample_phasing/templates/DataTransfer.sh.ftl new file mode 100644 index 00000000..6866a5d2 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/DataTransfer.sh.ftl @@ -0,0 +1,26 @@ +<#noparse>#!/bin/bash +getFile() +{ + ARGS=($@) + NUMBER="${#ARGS[@]}"; +if [ "$NUMBER" -eq "1" ] + then + myFile=${ARGS[0]} + + if test ! -e $myFile; + then + echo "WARNING in getFile/putFile: $myFile is missing" 1>&2 + fi + + else + echo "Example usage: getData \"\$TMPDIR/datadir/myfile.txt\"" + fi +} + +putFile() +{ + `getFile $@` +} + +export -f getFile +export -f putFile \ No newline at end of file diff --git a/compute4/RNA-seq_sample_phasing/templates/Footer.ftl b/compute4/RNA-seq_sample_phasing/templates/Footer.ftl new file mode 100644 index 00000000..e69de29b diff --git a/compute4/RNA-seq_sample_phasing/templates/Header.ftl b/compute4/RNA-seq_sample_phasing/templates/Header.ftl new file mode 100644 index 00000000..f105b577 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/Header.ftl @@ -0,0 +1,58 @@ +#!/bin/bash +#PBS -N ${jobname} +#PBS -q ${clusterQueue} +#PBS -l nodes=1:ppn=${cores} +#PBS -l walltime=${walltime} +#PBS -l mem=${mem} +#PBS -e ${jobname}.err +#PBS -o ${jobname}.out +#PBS -W umask=0057 + +# Configures the GCC bash environment +. ${root}/gcc.bashrc + +<#function ssvQuoted items> + <#local result = ""> + <#list items as item> + <#if item_index != 0> + <#local result = result + " "> + + <#local result = result + "\"" + item + "\""> + + <#return result> + + + +inputs() +{ + for name in $@ + do + if test ! -e $name; + then + echo "$name is missing" 1>&2 + exit 1; + fi + done +} + +alloutputsexist() +{ + all_exist=true + for name in $@ + do + if test ! -e $name; + then + all_exist=false + fi + done + if $all_exist; + then + echo "skipped" + echo "skipped" 1>&2 + sleep 30 + exit 0; + else + return; + fi +} + diff --git a/compute4/RNA-seq_sample_phasing/templates/Submit.sh.ftl b/compute4/RNA-seq_sample_phasing/templates/Submit.sh.ftl new file mode 100644 index 00000000..c9f3598a --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/Submit.sh.ftl @@ -0,0 +1,8 @@ + +<#foreach j in jobs> +#${j.name} +${j.name}=$(qsub -N ${j.name}<#if j.prevSteps_Name?size > 0> -W depend=afterok<#foreach d in j.prevSteps_Name>:$${d} ${j.name}.sh) +echo $${j.name} +sleep 0 + + diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-cluster.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-cluster.ftl new file mode 100644 index 00000000..1308393e --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-cluster.ftl @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -q ${clusterqueue} +#PBS -l nodes=1:ppn=${cores} +#PBS -l walltime=${walltime} +#PBS -l mem=${memory}gb +#PBS -e ${location}/err/err_${scriptID}.err +#PBS -o ${location}/out/out_${scriptID}.out +mkdir -p ${location}/err +mkdir -p ${location}/out +printf "${scriptID}_started " >>${location}/log_${jobID}.txt +date "+DATE: %m/%d/%y%tTIME: %H:%M:%S" >>${location}/log_${jobID}.txt +date "+start time: %m/%d/%y%t %H:%M:%S" >>${location}/extra/${scriptID}.txt +echo running on node: `hostname` >>${location}/extra/${scriptID}.txt +${actualcommand} +${verificationcommand} +printf "${scriptID}_finished " >>${location}/log_${jobID}.txt +date "+finish time: %m/%d/%y%t %H:%M:%S" >>${location}/extra/${scriptID}.txt +date "+DATE: %m/%d/%y%tTIME: %H:%M:%S" >>${location}/log_${jobID}.txt diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-download-grid.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-download-grid.ftl new file mode 100644 index 00000000..8313d257 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-download-grid.ftl @@ -0,0 +1,3 @@ +#download input data +lcg-cp lfn://grid/lsgrid/${srm_name} \ +file:///$TMPDIR/${just_name} diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-errorfile.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-errorfile.ftl new file mode 100644 index 00000000..cfe02a7a --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-errorfile.ftl @@ -0,0 +1 @@ +${location}/err/err_${scriptID}.err \ No newline at end of file diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-exe-grid-dir.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-exe-grid-dir.ftl new file mode 100644 index 00000000..e88240ae --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-exe-grid-dir.ftl @@ -0,0 +1 @@ +$TMPDIR/ \ No newline at end of file diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-exe-grid.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-exe-grid.ftl new file mode 100644 index 00000000..8a215101 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-exe-grid.ftl @@ -0,0 +1,7 @@ +#download executable +lcg-cp lfn://grid/lsgrid/${srm_name} \ +file:///$TMPDIR/${just_name} +chmod 755 $TMPDIR/${just_name} + +/bin/hostname + diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-extrafile.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-extrafile.ftl new file mode 100644 index 00000000..f8e1aa39 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-extrafile.ftl @@ -0,0 +1 @@ +${location}/extra/${scriptID}.txt \ No newline at end of file diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-jdl-grid.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-jdl-grid.ftl new file mode 100644 index 00000000..fd157e20 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-jdl-grid.ftl @@ -0,0 +1,19 @@ +Type="Job"; +JobType="Normal"; + +Executable = "/bin/sh"; +Arguments = "${script_name}.sh"; + +StdError = "${error_log}"; +StdOutput = "${output_log}"; + +InputSandbox = {"${script_location}/${script_name}.sh${extra_inputs}"}; +OutputSandbox = {"${error_log}","${output_log}"${extra_outputs}}; + +Requirements = other.GlueCEUniqueID == "ce.grid.rug.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="gb-ce-tud.ewi.tudelft.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="gb-ce-nki.els.sara.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="gb-ce-lumc.lumc.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="gb-ce-rug.sara.usor.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="gb-ce-ams.els.sara.nl:8443/cream-pbs-medium" +|| other.GlueCEUniqueID =="creamce.grid.rug.nl:8443/cream-pbs-medium"; diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-logfile.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-logfile.ftl new file mode 100644 index 00000000..4efcfb97 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-logfile.ftl @@ -0,0 +1 @@ +${location}/log_${jobID}.txt \ No newline at end of file diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-outfile.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-outfile.ftl new file mode 100644 index 00000000..8a88f9a6 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-outfile.ftl @@ -0,0 +1 @@ +${location}/out/out_${scriptID}.out \ No newline at end of file diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-submit.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-submit.ftl new file mode 100644 index 00000000..d5a52954 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-submit.ftl @@ -0,0 +1,4 @@ +#job_${submitID} +job_${submitID}=$(qsub -N ${scriptID} ${dependancy} ${scriptID}.sh) +echo $job_${submitID} +sleep 8 diff --git a/compute4/RNA-seq_sample_phasing/templates/templ-upload-grid.ftl b/compute4/RNA-seq_sample_phasing/templates/templ-upload-grid.ftl new file mode 100644 index 00000000..abcdb97d --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/templ-upload-grid.ftl @@ -0,0 +1,6 @@ +#upload result data; +#created an empty file first to avoid uploading error if file is not created +echo -n "" >> file:///$TMPDIR/${just_name} +lcg-cr -l lfn://grid/lsgrid/${srm_name} \ +file:///$TMPDIR/${just_name} + diff --git a/compute4/RNA-seq_sample_phasing/templates/test_template.ftl b/compute4/RNA-seq_sample_phasing/templates/test_template.ftl new file mode 100644 index 00000000..817bc310 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/templates/test_template.ftl @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -q ${clusterqueue} +#PBS -l nodes=1:ppn=${cores} +#PBS -l walltime=${walltime} +#PBS -l mem=${memory}gb +#PBS -e ${location}/err/err_${scriptID}.err +#PBS -o ${location}/out/out_${scriptID}.out +mkdir -p ${location}/err +mkdir -p ${location}/out +printf "${scriptID}_started " >>${location}/log_${jobID}.txt +date "+DATE: %m/%d/%y%tTIME: %H:%M:%S" >>${location}/log_${jobID}.txt +date "+start time: %m/%d/%y%t %H:%M:%S" >>${location}/extra/${scriptID}.txt +echo running on node: `hostname` >>${location}/extra/${scriptID}.txt +${actualcommand} +${verificationcommand} +printf "${scriptID}_finished " >>${location}/log_${jobID}.txt +date "+finish time: %m/%d/%y%t %H:%M:%S" >>${location}/extra/${scriptID}.txt +date "+DATE: %m/%d/%y%tTIME: %H:%M:%S" >>${location}/log_${jobID}.txt \ No newline at end of file diff --git a/compute4/RNA-seq_sample_phasing/workflow.csv b/compute4/RNA-seq_sample_phasing/workflow.csv new file mode 100644 index 00000000..cfea3307 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/workflow.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +phase,phase, diff --git a/compute4/RNA-seq_sample_phasing/workflowCall.csv b/compute4/RNA-seq_sample_phasing/workflowCall.csv new file mode 100644 index 00000000..ca8bc0fa --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/workflowCall.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +SNVMix,SNVMix, diff --git a/compute4/RNA-seq_sample_phasing/workflowCreateFileList.csv b/compute4/RNA-seq_sample_phasing/workflowCreateFileList.csv new file mode 100644 index 00000000..985b4ec0 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/workflowCreateFileList.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +CreateSNVMixFileList,CreateSNVMixFileList, diff --git a/compute4/RNA-seq_sample_phasing/workflowMerge.csv b/compute4/RNA-seq_sample_phasing/workflowMerge.csv new file mode 100644 index 00000000..c7c2d4a5 --- /dev/null +++ b/compute4/RNA-seq_sample_phasing/workflowMerge.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +ConvertSNVMixToGen,ConvertSNVMixToGen, diff --git a/compute4/STAR_RNA-seq_mapping/parameters.csv b/compute4/STAR_RNA-seq_mapping/parameters.csv index 014a43a1..b260b2e4 100644 --- a/compute4/STAR_RNA-seq_mapping/parameters.csv +++ b/compute4/STAR_RNA-seq_mapping/parameters.csv @@ -7,17 +7,18 @@ defaultInterpreter,#!/bin/bash,,, jobname,jobname,,string, #,,,, root,/target/gpfs2/gcc/,the root to your tools and data,string, -bashrc,${root}/gcc.bashrc,,, -toolDir,${root}tools/,root Dir for tools,string, -JAVA_HOME,${toolDir}/jdk/,,, +bashrc,/gcc/groups/gcc/home/gcc.bashrc,,, +toolDir,/gcc/tools/,root Dir for tools,string, +JAVA_HOME,${toolDir}/jdk1.7.0_25/,,, #,,,, fastq1,,,, fastq2,,,, outputFolder,,,, outputPrefix,,,, #,,,, -STARindex,/target/gpfs2/gcc/home/dasha/resources/hg19/indices/STAR_index_masked1000G/,,, -annotationGtf,/target/gpfs2/gcc/home/dasha/resources/hg19/v71/Homo_sapiens.GRCh37.71.cut.sorted.gtf,,, +STARindex,/gcc/groups/gcc/tmp02/pdeelen/Projects/RnaSeqEqtl/STAR_index_masked1000G/,,, +annotationGtf,/gcc/groups/gcc/tmp02/pdeelen/Projects/RnaSeqEqtl/Homo_sapiens.GRCh37.71.cut.sorted.gtf,,, #,,,, samtools,${toolDir}samtools-0.1.18/samtools,,, -STAR,/target/gpfs2/gcc/home/dasha/tools/STAR_2.3.1l/STAR,,, \ No newline at end of file +STAR,/gcc/groups/gcc/tmp02/pdeelen/Projects/RnaSeqEqtl/STAR_2.3.1l/STAR,,, +picardTools,${toolDir}/picard-tools-1.102/,,, diff --git a/compute4/STAR_RNA-seq_mapping/protocols/STARMapping.ftl b/compute4/STAR_RNA-seq_mapping/protocols/STARMapping.ftl index 228901fe..cd33349b 100644 --- a/compute4/STAR_RNA-seq_mapping/protocols/STARMapping.ftl +++ b/compute4/STAR_RNA-seq_mapping/protocols/STARMapping.ftl @@ -1,4 +1,4 @@ -#MOLGENIS walltime=6:00:00 nodes=1 cores=8 mem=40 +#MOLGENIS walltime=24:00:00 nodes=1 cores=8 mem=40 fastq1="${fastq1}" fastq2="${fastq2}" @@ -6,6 +6,8 @@ outputFolder="${outputFolder}" prefix="${outputPrefix}" STAR="${STAR}" STARindex="${STARindex}" +picardTools="${picardTools}" +JAVA_HOME="${JAVA_HOME}" <#noparse> @@ -13,14 +15,7 @@ echo -e "fastq1=${fastq1}\nfastq2=${fastq2}\noutputFolder=${outputFolder}\nprefi mkdir -p ${outputFolder} -if [[ -f ${outputFolder}/${prefix}Aligned.out.sorted.bam && -f ${outputFolder}/${prefix}Aligned.out.sorted.bam.bai ]]; then - echo "skipping, next step already has output" - rm -f ${outputFolder}/${outputPrefix}Aligned.out.sam - exit 0 -fi - - -alloutputsexist ${outputFolder}/${prefix}Aligned.out.sam +alloutputsexist ${outputFolder}/${prefix}Aligned.out.sorted.bam inputs ${fastq1} @@ -47,7 +42,7 @@ then echo "Mapping single-end reads" echo "Allowing $numMism mismatches" ${STAR} \ - --outFileNamePrefix ${outputFolder}/${prefix}___tmp___ \ + --outFileNamePrefix ${TMPDIR}/${prefix}___tmp___ \ --readFilesIn ${fastq1} \ --readFilesCommand zcat \ --genomeDir ${STARindex} \ @@ -66,7 +61,7 @@ else let numMism=$numMism*2 echo "Allowing $numMism mismatches" ${STAR} \ - --outFileNamePrefix ${outputFolder}/${prefix}___tmp___ \ + --outFileNamePrefix ${TMPDIR}/${prefix}___tmp___ \ --readFilesIn ${fastq1} ${fastq2} \ --readFilesCommand zcat \ --genomeDir ${STARindex} \ @@ -82,6 +77,29 @@ fi echo "STAR return code: ${starReturnCode}" if [ $starReturnCode -eq 0 ] +then + + for tempFile in ${TMPDIR}/${prefix}___tmp___* ; do + finalFile=`echo $tempFile | sed -e "s/___tmp___//g"` + echo "Moving temp file: ${tempFile} to ${finalFile}" + mv $tempFile $finalFile + done + +else + + echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n" + #Return non zero return code + exit 1 + +fi + +${JAVA_HOME}/bin/java -Xmx40g -Xms40g -jar ${picardTools}/SortSam.jar I=${TMPDIR}/${prefix}Aligned.out.sam O=${outputFolder}/${prefix}___tmp___Aligned.out.sorted.bam SO=coordinate TMP_DIR=${TMPDIR} CREATE_MD5_FILE=true CREATE_INDEX=true + +returnCode=$? + +echo "Picard return code: ${returnCode}" + +if [ $returnCode -eq 0 ] then for tempFile in ${outputFolder}/${prefix}___tmp___* ; do @@ -90,6 +108,10 @@ then mv $tempFile $finalFile done + cp ${TMPDIR}/${prefix}Log.out ${outputFolder}/${prefix}Log.out + cp ${TMPDIR}/${prefix}Log.final.out ${outputFolder}/${prefix}Log.final.out + gzip -c ${TMPDIR}/${prefix}SJ.out.tab > ${outputFolder}/${prefix}SJ.out.tab.gz + else echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n" diff --git a/compute4/STAR_RNA-seq_mapping/protocols/SortFilterBam.ftl b/compute4/STAR_RNA-seq_mapping/protocols/SortFilterBam.ftl index 55dea9ca..bc755144 100644 --- a/compute4/STAR_RNA-seq_mapping/protocols/SortFilterBam.ftl +++ b/compute4/STAR_RNA-seq_mapping/protocols/SortFilterBam.ftl @@ -42,6 +42,8 @@ then exit 1 fi +rm ${outputFolder}/${outputPrefix}Aligned.out.bam + echo "bam file sorted" ${samtools} index \ @@ -58,6 +60,11 @@ then mv $tempFile $finalFile done + md5sum ${outputFolder}/${outputPrefix}Aligned.out.sorted.bam ${outputFolder}/${outputPrefix}Aligned.out.sorted.bam.bai > ${outputFolder}/${outputPrefix}Aligned.out.sorted.bam.md5 + + + rm ${outputFolder}/${outputPrefix}Aligned.out.sam + else echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n" @@ -67,8 +74,8 @@ else fi -rm ${outputFolder}/${outputPrefix}Aligned.out.bam -rm ${outputFolder}/${outputPrefix}Aligned.out.sam + + \ No newline at end of file diff --git a/compute4/STAR_RNA-seq_mapping/templates/Header.ftl b/compute4/STAR_RNA-seq_mapping/templates/Header.ftl index e00d9bff..fb8365cb 100644 --- a/compute4/STAR_RNA-seq_mapping/templates/Header.ftl +++ b/compute4/STAR_RNA-seq_mapping/templates/Header.ftl @@ -9,7 +9,7 @@ #PBS -W umask=0007 # Configures the GCC bash environment -. ${root}/gcc.bashrc +. ${bashrc} echo "Running on: " `hostname` diff --git a/compute4/STAR_RNA-seq_mapping/workflow.csv b/compute4/STAR_RNA-seq_mapping/workflow.csv index c75cad64..eda74905 100644 --- a/compute4/STAR_RNA-seq_mapping/workflow.csv +++ b/compute4/STAR_RNA-seq_mapping/workflow.csv @@ -1,3 +1,3 @@ name,protocol_name,PreviousSteps_name STARMapping,STARMapping, -SortFilterBam,SortFilterBam,STARMapping + diff --git a/compute4/STAR_RNA-seq_mapping/workflowStep1.csv b/compute4/STAR_RNA-seq_mapping/workflowStep1.csv new file mode 100644 index 00000000..5339fcbe --- /dev/null +++ b/compute4/STAR_RNA-seq_mapping/workflowStep1.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +STARMapping,STARMapping, diff --git a/compute4/STAR_RNA-seq_mapping/workflowStep2.csv b/compute4/STAR_RNA-seq_mapping/workflowStep2.csv new file mode 100644 index 00000000..9c4bf559 --- /dev/null +++ b/compute4/STAR_RNA-seq_mapping/workflowStep2.csv @@ -0,0 +1,2 @@ +name,protocol_name,PreviousSteps_name +SortFilterBam,SortFilterBam,