Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

public rna seq #99

Merged
merged 2 commits into from
Feb 7, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compute4/RNA-seq_genotype_calling/parameters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ SNVMix,/target/gpfs2/gcc/home/dasha/tools/SNVMix2-0.11.8-r5/SNVMix2,,,
GenotypeCallingJar,/target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar,,,
snpList,/target/gpfs2/gcc/home/dasha/resources/hg19/genotypes/1000G/all_snp_positions.txt,,,
shapeitversion,v2.644,,,
shapeitBin,${tooldir}/Shapeit-${shapeitversion}/shapeit.v2.r644.linux.x86_64,,string,
shapeitBin,${tooldir}/Shapeit-${shapeitversion}/shapeit.v2.r644.linux.x86_64,,string,
66 changes: 33 additions & 33 deletions compute4/RNA-seq_genotype_calling/protocols/ConvertSNVMixToGen.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -48,34 +48,34 @@ do
done


# ${JAVA_HOME}/bin/java \
# -Xmx4g \
# -jar /target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar \
# --mode SNVMixToGen \
# --fileList ${genotypeFolder}/fileList.txt \
# --p-value 0.8 \
# --out ${genotypeFolder}/___tmp___chr

# returnCode=$?
# echo "Return code ${returnCode}"

# if [ "${returnCode}" -eq "0" ]
# then
${JAVA_HOME}/bin/java \
-Xmx4g \
-jar /target/gpfs2/gcc/home/dasha/scripts/genotyping/GenotypeCalling/dist/GenotypeCalling.jar \
--mode SNVMixToGen \
--fileList ${genotypeFolder}/fileList.txt \
--p-value 0.8 \
--out ${genotypeFolder}/___tmp___chr

returnCode=$?
echo "Return code ${returnCode}"

if [ "${returnCode}" -eq "0" ]
then

# echo "Moving temp files: ${genotypeFolder}/___tmp___chr* to ${genotypeFolder}/chr*"
# tmpFiles="${genotypeFolder}/___tmp___chr*"
# for f in $tmpFiles
# do
# mv $f ${f//___tmp___/}
# done
echo "Moving temp files: ${genotypeFolder}/___tmp___chr* to ${genotypeFolder}/chr*"
tmpFiles="${genotypeFolder}/___tmp___chr*"
for f in $tmpFiles
do
mv $f ${f//___tmp___/}
done

# else
else

# echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n"
Return non zero return code
# exit 1
echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n"
#Return non zero return code
exit 1

# fi
fi

chrTriTyperDirs=""

Expand All @@ -97,18 +97,18 @@ do
genFileSorted=${genFile//.gen/.sorted.gen}


# sort -k3,3n ${genFile} > ${genFileSorted}
sort -k3,3n ${genFile} > ${genFileSorted}

genFileSortedFiltered=${genFile//.gen/_CR0.8_maf0.01.gen}

# /target/gpfs2/gcc/tools/qctool/qctool_v1.3-linux-x86_64/qctool \
# -g $genFileSorted \
# -s ${sampleFile} \
# -og ${genFileSortedFiltered} \
# -maf 0.01 1 \
# -hwe 4 \
# -snp-missing-rate 0.8 \
# -omit-chromosome
/target/gpfs2/gcc/tools/qctool/qctool_v1.3-linux-x86_64/qctool \
-g $genFileSorted \
-s ${sampleFile} \
-og ${genFileSortedFiltered} \
-maf 0.01 1 \
-hwe 4 \
-snp-missing-rate 0.8 \
-omit-chromosome

trityperFolder=${genFile%.gen}
mkdir -p ${trityperFolder}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#MOLGENIS walltime=1:00:00 nodes=1 cores=1 mem=4

#FOREACH mergedStudy

genotypeFolder="${genotypeFolder}"
JAVA_HOME="${JAVA_HOME}"

declare -a samples=(${ssvQuoted(sample)})
declare -a snvmixOuts=(${ssvQuoted(snvmixOut)})
<#noparse>

mkdir -p ${genotypeFolder}

echo "genotypeFolder=${genotypeFolder}"
echo "snvMixOuts=${snvmixOuts[*]}"
echo "samples=${samples[*]}"

rm -f ${genotypeFolder}/fileList.txt



declare -a samplesProcessed=()

for (( i = 0 ; i < ${#samples[@]} ; i++ ))
do

for processedSample in ${samplesProcessed[@]}
do
if [ $processedSample == ${samples[$i]} ]
then
continue 2
fi
done

samplesProcessed=("${samplesProcessed[@]}" "${samples[$i]}")
echo -e "sample:${samples[$i]}\tgenotype file:${snvmixOuts[$i]}"

if [ -f ${snvmixOuts[$i]} ]
then
echo -e "${samples[$i]}\t${snvmixOuts[$i]}" >> ${genotypeFolder}/fileList.txt
else
echo "Skipping sample ${samples[$i]} no snvmix output"
fi


done

</#noparse>
5 changes: 4 additions & 1 deletion compute4/RNA-seq_genotype_calling/protocols/SNVMix.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ returnCode=$?

echo "return code snvMix ${returnCode}"

if [ $returnCode -eq 0 ]

count=`cut -f 1 -d ":" ${snvmixOut}___tmp___ | uniq | wc -l`

if [ $count -ge 22 ]
then

echo "Moving temp file: ${snvmixOut}___tmp___ to $snvmixOut"
Expand Down
2 changes: 2 additions & 0 deletions compute4/RNA-seq_genotype_calling/workflowCall.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name,protocol_name,PreviousSteps_name
SNVMix,SNVMix,
2 changes: 2 additions & 0 deletions compute4/RNA-seq_genotype_calling/workflowCreateFileList.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name,protocol_name,PreviousSteps_name
CreateSNVMixFileList,CreateSNVMixFileList,
2 changes: 2 additions & 0 deletions compute4/RNA-seq_genotype_calling/workflowMerge.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name,protocol_name,PreviousSteps_name
ConvertSNVMixToGen,ConvertSNVMixToGen,
2 changes: 1 addition & 1 deletion compute4/RNA-seq_quantify_fluxCapacitor/parameters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ run,,,,
baseFolder,,,,
sortedBam,${baseFolder}/${studyEna}/${sample}/${run}/${run}Aligned.out.sorted.bam,,,
expressionFolder,${baseFolder}/${mergedStudy}/expressionData/,,,
gtfExpression,"${baseFolder}/${studyEna}/${sample}/${sample}.flux.gtf",,,
gtfExpression,"${baseFolder}/${studyEna}/${sample}/${run}/${run}.flux.gtf",,,
expressionTable,${expressionFolder}/expression_table.transcr.v71.flux.txt,,,
#,,,,
annotationGtf,/target/gpfs2/gcc/home/dasha/resources/hg19/v71/Homo_sapiens.GRCh37.71.cut.sorted.gtf,,,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6
#MOLGENIS walltime=24:00:00 nodes=1 cores=2 mem=7

bamToBed="${bamToBed}"
sortedBam="${sortedBam}"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#MOLGENIS walltime=6:00:00 nodes=1 cores=1 mem=4
#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=4

#FOREACH mergedStudy
mkdir -p ${expressionFolder}
Expand Down
13 changes: 7 additions & 6 deletions compute4/RNA-seq_quantify_gene_level/parameters.csv
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
Name,defaultValue,description,dataType,hasOne_name
clusterQueue,gcc,,,
scheduler,PBS,,,
mem,4,Memory in GB,,
walltime,6:00:00,,,
mem,6,Memory in GB,,
walltime,24:00:00,,,
cores,1,,,
defaultInterpreter,#!/bin/bash,,,
jobname,jobname,,string,
#,,,,
home,/target/gpfs2/gcc/home/dasha/,,,
root,/target/gpfs2/gcc/,the root to your tools and data,string,
bashrc,${root}/gcc.bashrc,,,
toolDir,${root}tools/,root Dir for tools,string,
bashrc,/gcc/groups/gcc/home/gcc.bashrc,,,
toolDir,/gcc/tools/,root Dir for tools,string,
#,,,,
studyEna,,,,
mergedStudy,,,,expressionFolder
Expand All @@ -19,8 +19,8 @@ run,,,,
baseFolder,,,,
sortedBam,${baseFolder}/${studyEna}/${sample}/${run}/${run}Aligned.out.sorted.bam,,,
expressionFolder,${baseFolder}/${mergedStudy}/expressionData/,,,
txtExpression,"${baseFolder}/${studyEna}/${sample}/${sample}.htseq.txt",,,
expressionTable,${expressionFolder}/expression_table.transcr.v71.htseq.txt,,,
txtExpression,"${baseFolder}/${studyEna}/${sample}/${run}/${run}.htseq.txt",,,
expressionTable,${expressionFolder}/expression_table.genelevel.v71.htseq.txt,,,
#,,,,
annotationGtf,/target/gpfs2/gcc/home/dasha/resources/hg19/v71/Homo_sapiens.GRCh37.71.cut.sorted.gtf,,,
geneAnnotationTxt,/target/gpfs2/gcc/home/dasha/resources/hg19/annotations/v71/annotation_geneIds_v71.txt.gz,,,
Expand All @@ -30,3 +30,4 @@ JAVA_HOME,${toolDir}/jdk/,,,
samtools,${toolDir}samtools-0.1.18/samtools,,,
htseq_count,/target/gpfs2/gcc/home/dasha/tools/HTSeq-0.5.4p3/HTSeq/scripts/count.py,,,
processReadCountsJar,/target/gpfs2/gcc/home/dasha/scripts/processReadCounts/ProcessReadCounts/dist/ProcessReadCounts.jar,,,
python,python,,,
35 changes: 0 additions & 35 deletions compute4/RNA-seq_quantify_gene_level/protocols/HTSeq-count.ftl

This file was deleted.

33 changes: 22 additions & 11 deletions compute4/RNA-seq_quantify_gene_level/protocols/HTSeq_count.ftl
Original file line number Diff line number Diff line change
@@ -1,37 +1,48 @@
#MOLGENIS walltime=20:00:00 nodes=1 cores=1 mem=6
#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6

sortedBam="${sortedBam}"
htseq_count="${htseq_count}"
annotationGtf="${annotationGtf}"
txtExpression="${txtExpression}"
samtools=${samtools}
python=${python}

<#noparse>

echo -e "sortedBam=${sortedBam}\nannotationGtf=${annotationGtf}\ntxtExpression=${txtExpression}"

alloutputsexist ${txtExpression}

echo "Sorting bam file by name"

${samtools} sort \
-n \
${sortedBam} \
${sortedBam%bam}byName
${samtools} \
sort \
-n \
${sortedBam} \
${TMPDIR}/nameSorted.bam


echo -e "\nQuantifying expression"

${samtools} \
if ${samtools} \
view -h \
${sortedBam%bam}byName.bam | \
/target/gpfs2/gcc/tools/Python-2.7.3/bin/python \
${htseq_count} \
${TMPDIR}/nameSorted.bam | \
/target/gpfs2/gcc/tools/Python-2.7.3/bin/python \
${htseq_count} \
-m union \
-s no \
- \
${annotationGtf} | \
head -n -5 \
> ${txtExpression}
head -n -5 \
> ${txtExpression}___tmp___;
then
echo "Gene count succesfull"
mv ${txtExpression}___tmp___ ${txtExpression}
else
echo "Genecount failed"
fi

rm ${sortedBam%bam}byName

echo "Finished!"
</#noparse>
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
#MOLGENIS walltime=6:00:00 nodes=1 cores=1 mem=4
#MOLGENIS walltime=24:00:00 nodes=1 cores=1 mem=6

#FOREACH mergedStudy
mkdir -p ${expressionFolder}

rm -f ${expressionFolder}/fileList.txt

<#assign samples=sample?size - 1>
<#list 0..samples as i>
echo -e "${sample[i]}\t${txtExpression[i]}" >> ${expressionFolder}/fileList.txt
<#assign runs=run?size - 1>
<#list 0..runs as i>
echo -e "${run[i]}\t${txtExpression[i]}" >> ${expressionFolder}/fileList.txt
</#list>

${JAVA_HOME}/bin/java \
if ${JAVA_HOME}/bin/java \
-Xmx4g \
-jar ${processReadCountsJar} \
--mode makeExpressionTable \
--fileList ${expressionFolder}/fileList.txt \
--annot ${geneAnnotationTxt} \
--out ${expressionTable}

--out ${expressionTable}___tmp___
then
echo "table create succesfull"
mv ${expressionTable}___tmp___ ${expressionTable}
else
echo "table create failed"
fi
Loading