Skip to content

Commit

Permalink
Merge pull request #99 from PatrickDeelen/master
Browse files Browse the repository at this point in the history
public rna seq
  • Loading branch information
freerkvandijk committed Feb 7, 2014
2 parents 9bed2c6 + 2fddcf5 commit decbaae
Show file tree
Hide file tree
Showing 68 changed files with 1,613 additions and 124 deletions.
2 changes: 1 addition & 1 deletion compute4/RNA-seq_genotype_calling/parameters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ SNVMix,/target/gpfs2/gcc/home/dasha/tools/SNVMix2-0.11.8-r5/SNVMix2,,,
GenotypeCallingJar,/target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar,,,
snpList,/target/gpfs2/gcc/home/dasha/resources/hg19/genotypes/1000G/all_snp_positions.txt,,,
shapeitversion,v2.644,,,
shapeitBin,${tooldir}/Shapeit-${shapeitversion}/shapeit.v2.r644.linux.x86_64,,string,
shapeitBin,${tooldir}/Shapeit-${shapeitversion}/shapeit.v2.r644.linux.x86_64,,string,
66 changes: 33 additions & 33 deletions compute4/RNA-seq_genotype_calling/protocols/ConvertSNVMixToGen.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -48,34 +48,34 @@ do
done


# ${JAVA_HOME}/bin/java \
# -Xmx4g \
# -jar /target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar \
# --mode SNVMixToGen \
# --fileList ${genotypeFolder}/fileList.txt \
# --p-value 0.8 \
# --out ${genotypeFolder}/___tmp___chr

# returnCode=$?
# echo "Return code ${returnCode}"

# if [ "${returnCode}" -eq "0" ]
# then
${JAVA_HOME}/bin/java \
-Xmx4g \
-jar /target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar \
--mode SNVMixToGen \
--fileList ${genotypeFolder}/fileList.txt \
--p-value 0.8 \
--out ${genotypeFolder}/___tmp___chr

returnCode=$?
echo "Return code ${returnCode}"

if [ "${returnCode}" -eq "0" ]
then

# echo "Moving temp files: ${genotypeFolder}/___tmp___chr* to ${genotypeFolder}/chr*"
# tmpFiles="${genotypeFolder}/___tmp___chr*"
# for f in $tmpFiles
# do
# mv $f ${f//___tmp___/}
# done
echo "Moving temp files: ${genotypeFolder}/___tmp___chr* to ${genotypeFolder}/chr*"
tmpFiles="${genotypeFolder}/___tmp___chr*"
for f in $tmpFiles
do
mv $f ${f//___tmp___/}
done

# else
else

# echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n"
Return non zero return code
# exit 1
echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n"
#Return non zero return code
exit 1

# fi
fi

chrTriTyperDirs=""

Expand All @@ -97,18 +97,18 @@ do
genFileSorted=${genFile//.gen/.sorted.gen}


# sort -k3,3n ${genFile} > ${genFileSorted}
sort -k3,3n ${genFile} > ${genFileSorted}

genFileSortedFiltered=${genFile//.gen/_CR0.8_maf0.01.gen}

# /target/gpfs2/gcc/tools/qctool/qctool_v1.3-linux-x86_64/qctool \
# -g $genFileSorted \
# -s ${sampleFile} \
# -og ${genFileSortedFiltered} \
# -maf 0.01 1 \
# -hwe 4 \
# -snp-missing-rate 0.8 \
# -omit-chromosome
/target/gpfs2/gcc/tools/qctool/qctool_v1.3-linux-x86_64/qctool \
-g $genFileSorted \
-s ${sampleFile} \
-og ${genFileSortedFiltered} \
-maf 0.01 1 \
-hwe 4 \
-snp-missing-rate 0.8 \
-omit-chromosome

trityperFolder=${genFile%.gen}
mkdir -p ${trityperFolder}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#MOLGENIS walltime=1:00:00 nodes=1 cores=1 mem=4

#FOREACH mergedStudy

genotypeFolder="${genotypeFolder}"
JAVA_HOME="${JAVA_HOME}"

declare -a samples=(${ssvQuoted(sample)})
declare -a snvmixOuts=(${ssvQuoted(snvmixOut)})
<#noparse>

mkdir -p ${genotypeFolder}

echo "genotypeFolder=${genotypeFolder}"
echo "snvMixOuts=${snvmixOuts[*]}"
echo "samples=${samples[*]}"

rm -f ${genotypeFolder}/fileList.txt



declare -a samplesProcessed=()

for (( i = 0 ; i < ${#samples[@]} ; i++ ))
do

for processedSample in ${samplesProcessed[@]}
do
if [ $processedSample == ${samples[$i]} ]
then
continue 2
fi
done

samplesProcessed=("${samplesProcessed[@]}" "${samples[$i]}")
echo -e "sample:${samples[$i]}\tgenotype file:${snvmixOuts[$i]}"

if [ -f ${snvmixOuts[$i]} ]
then
echo -e "${samples[$i]}\t${snvmixOuts[$i]}" >> ${genotypeFolder}/fileList.txt
else
echo "Skipping sample ${samples[$i]} no snvmix output"
fi


done

</#noparse>
5 changes: 4 additions & 1 deletion compute4/RNA-seq_genotype_calling/protocols/SNVMix.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ returnCode=$?

echo "return code snvMix ${returnCode}"

if [ $returnCode -eq 0 ]

count=`cut -f 1 -d ":" ${snvmixOut}___tmp___ | uniq | wc -l`

if [ $count -ge 22 ]
then

echo "Moving temp file: ${snvmixOut}___tmp___ to $snvmixOut"
Expand Down
2 changes: 2 additions & 0 deletions compute4/RNA-seq_genotype_calling/workflowCall.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name,protocol_name,PreviousSteps_name
SNVMix,SNVMix,
2 changes: 2 additions & 0 deletions compute4/RNA-seq_genotype_calling/workflowCreateFileList.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name,protocol_name,PreviousSteps_name
CreateSNVMixFileList,CreateSNVMixFileList,
2 changes: 2 additions & 0 deletions compute4/RNA-seq_genotype_calling/workflowMerge.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name,protocol_name,PreviousSteps_name
ConvertSNVMixToGen,ConvertSNVMixToGen,
2 changes: 1 addition & 1 deletion compute4/RNA-seq_quantify_fluxCapacitor/parameters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ run,,,,
baseFolder,,,,
sortedBam,${baseFolder}/${studyEna}/${sample}/${run}/${run}Aligned.out.sorted.bam,,,
expressionFolder,${baseFolder}/${mergedStudy}/expressionData/,,,
gtfExpression,"${baseFolder}/${studyEna}/${sample}/${sample}.flux.gtf",,,
gtfExpression,"${baseFolder}/${studyEna}/${sample}/${run}/${run}.flux.gtf",,,
expressionTable,${expressionFolder}/expression_table.transcr.v71.flux.txt,,,
#,,,,
annotationGtf,/target/gpfs2/gcc/home/dasha/resources/hg19/v71/Homo_sapiens.GRCh37.71.cut.sorted.gtf,,,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6
#MOLGENIS walltime=24:00:00 nodes=1 cores=2 mem=7

bamToBed="${bamToBed}"
sortedBam="${sortedBam}"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#MOLGENIS walltime=6:00:00 nodes=1 cores=1 mem=4
#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=4

#FOREACH mergedStudy
mkdir -p ${expressionFolder}
Expand Down
13 changes: 7 additions & 6 deletions compute4/RNA-seq_quantify_gene_level/parameters.csv
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
Name,defaultValue,description,dataType,hasOne_name
clusterQueue,gcc,,,
scheduler,PBS,,,
mem,4,Memory in GB,,
walltime,6:00:00,,,
mem,6,Memory in GB,,
walltime,24:00:00,,,
cores,1,,,
defaultInterpreter,#!/bin/bash,,,
jobname,jobname,,string,
#,,,,
home,/target/gpfs2/gcc/home/dasha/,,,
root,/target/gpfs2/gcc/,the root to your tools and data,string,
bashrc,${root}/gcc.bashrc,,,
toolDir,${root}tools/,root Dir for tools,string,
bashrc,/gcc/groups/gcc/home/gcc.bashrc,,,
toolDir,/gcc/tools/,root Dir for tools,string,
#,,,,
studyEna,,,,
mergedStudy,,,,expressionFolder
Expand All @@ -19,8 +19,8 @@ run,,,,
baseFolder,,,,
sortedBam,${baseFolder}/${studyEna}/${sample}/${run}/${run}Aligned.out.sorted.bam,,,
expressionFolder,${baseFolder}/${mergedStudy}/expressionData/,,,
txtExpression,"${baseFolder}/${studyEna}/${sample}/${sample}.htseq.txt",,,
expressionTable,${expressionFolder}/expression_table.transcr.v71.htseq.txt,,,
txtExpression,"${baseFolder}/${studyEna}/${sample}/${run}/${run}.htseq.txt",,,
expressionTable,${expressionFolder}/expression_table.genelevel.v71.htseq.txt,,,
#,,,,
annotationGtf,/target/gpfs2/gcc/home/dasha/resources/hg19/v71/Homo_sapiens.GRCh37.71.cut.sorted.gtf,,,
geneAnnotationTxt,/target/gpfs2/gcc/home/dasha/resources/hg19/annotations/v71/annotation_geneIds_v71.txt.gz,,,
Expand All @@ -30,3 +30,4 @@ JAVA_HOME,${toolDir}/jdk/,,,
samtools,${toolDir}samtools-0.1.18/samtools,,,
htseq_count,/target/gpfs2/gcc/home/dasha/tools/HTSeq-0.5.4p3/HTSeq/scripts/count.py,,,
processReadCountsJar,/target/gpfs2/gcc/home/dasha/scripts/processReadCounts/ProcessReadCounts/dist/ProcessReadCounts.jar,,,
python,python,,,
35 changes: 0 additions & 35 deletions compute4/RNA-seq_quantify_gene_level/protocols/HTSeq-count.ftl

This file was deleted.

33 changes: 22 additions & 11 deletions compute4/RNA-seq_quantify_gene_level/protocols/HTSeq_count.ftl
Original file line number Diff line number Diff line change
@@ -1,37 +1,48 @@
#MOLGENIS walltime=20:00:00 nodes=1 cores=1 mem=6
#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6

sortedBam="${sortedBam}"
htseq_count="${htseq_count}"
annotationGtf="${annotationGtf}"
txtExpression="${txtExpression}"
samtools=${samtools}
python=${python}

<#noparse>

echo -e "sortedBam=${sortedBam}\nannotationGtf=${annotationGtf}\ntxtExpression=${txtExpression}"

alloutputsexist ${txtExpression}

echo "Sorting bam file by name"

${samtools} sort \
-n \
${sortedBam} \
${sortedBam%bam}byName
${samtools} \
sort \
-n \
${sortedBam} \
${TMPDIR}/nameSorted.bam


echo -e "\nQuantifying expression"

${samtools} \
if ${samtools} \
view -h \
${sortedBam%bam}byName.bam | \
/target/gpfs2/gcc/tools/Python-2.7.3/bin/python \
${htseq_count} \
${TMPDIR}/nameSorted.bam | \
/target/gpfs2/gcc/tools/Python-2.7.3/bin/python \
${htseq_count} \
-m union \
-s no \
- \
${annotationGtf} | \
head -n -5 \
> ${txtExpression}
head -n -5 \
> ${txtExpression}___tmp___;
then
echo "Gene count succesfull"
mv ${txtExpression}___tmp___ ${txtExpression}
else
echo "Genecount failed"
fi

rm ${sortedBam%bam}byName

echo "Finished!"
</#noparse>
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
#MOLGENIS walltime=6:00:00 nodes=1 cores=1 mem=4
#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6

#FOREACH mergedStudy
mkdir -p ${expressionFolder}

rm -f ${expressionFolder}/fileList.txt

<#assign samples=sample?size - 1>
<#list 0..samples as i>
echo -e "${sample[i]}\t${txtExpression[i]}" >> ${expressionFolder}/fileList.txt
<#assign runs=run?size - 1>
<#list 0..runs as i>
echo -e "${run[i]}\t${txtExpression[i]}" >> ${expressionFolder}/fileList.txt
</#list>

${JAVA_HOME}/bin/java \
if ${JAVA_HOME}/bin/java \
-Xmx4g \
-jar ${processReadCountsJar} \
--mode makeExpressionTable \
--fileList ${expressionFolder}/fileList.txt \
--annot ${geneAnnotationTxt} \
--out ${expressionTable}

--out ${expressionTable}___tmp___
then
echo "table create succesfull"
mv ${expressionTable}___tmp___ ${expressionTable}
else
echo "table create failed"
fi
Loading

0 comments on commit decbaae

Please sign in to comment.