Skip to content

Commit

Permalink
Merge pull request #236 from npklein/master
Browse files Browse the repository at this point in the history
Make PROseq pipeline folder
  • Loading branch information
npklein committed Jun 16, 2015
2 parents 47f84cd + b8f6327 commit d77ab11
Show file tree
Hide file tree
Showing 19 changed files with 1,213 additions and 0 deletions.
66 changes: 66 additions & 0 deletions compute5/PROseq/protocols/AddOrReplaceReadGroups.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#MOLGENIS walltime=23:59:00 mem=6gb nodes=1 ppn=4

#Parameter mapping #why not string foo,bar? instead of string foo\nstring bar
#string stage
#string checkStage
#string starVersion
#string WORKDIR
#string projectDir

#string picardVersion
#string starAlignmentPassTwoDir
#string sampleName
#string internalId


#string addOrReplaceGroupsDir
#string addOrReplaceGroupsBam
#string addOrReplaceGroupsBai



echo "## "$(date)" ## $0 Started "

getFile ${starAlignmentPassTwoDir}/Aligned.out.sam

${stage} picard-tools/${picardVersion}
${checkStage}

set -x
set -e

mkdir -p ${addOrReplaceGroupsDir}

echo "## "$(date)" Start $0"

if java -Xmx6g -XX:ParallelGCThreads=4 -jar $PICARD_HOME/AddOrReplaceReadGroups.jar \
INPUT=${starAlignmentPassTwoDir}/Aligned.out.sam \
OUTPUT=${addOrReplaceGroupsBam} \
SORT_ORDER=coordinate \
RGID=${internalId} \
RGLB=${sampleName}_${internalId} \
RGPL=ILLUMINA \
RGPU=${sampleName}_${internalId}_${internalId} \
RGSM=${sampleName} \
RGDT=$(date --rfc-3339=date) \
CREATE_INDEX=true \
MAX_RECORDS_IN_RAM=4000000 \
TMP_DIR=${addOrReplaceGroupsDir} \

then
echo "returncode: $?";

putFile ${addOrReplaceGroupsBam}
putFile ${addOrReplaceGroupsBai}

echo "succes moving files";
else
echo "returncode: $?";
echo "fail";
fi

echo "## "$(date)" ## $0 Done "



echo "## "$(date)" ## $0 Done "
65 changes: 65 additions & 0 deletions compute5/PROseq/protocols/CollectMultipleMetrics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#MOLGENIS walltime=23:59:00 mem=4gb nodes=1 ppn=4

#string stage
#string checkStage
#string picardVersion
#string RVersion
#string reads2FqGz
#string collectMultipleMetricsDir
#string collectMultipleMetricsPrefix
#string onekgGenomeFasta
#string sortedBam
#string sortedBai
#string toolDir


getFile ${sortedBam}
getFile ${sortedBai}
getFile ${onekgGenomeFasta}


#load modules
${stage} picard/${picardVersion}

#Check modules
${checkStage}

mkdir -p ${collectMultipleMetricsDir}_QC

echo "## "$(date)" Start $0"

insertSizeMetrics=""
if [ ${#reads2FqGz} -ne 0 ]; then
insertSizeMetrics="PROGRAM=CollectInsertSizeMetrics"
fi

#Run Picard CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, QualityScoreDistribution and MeanQualityByCycle
if java -jar -Xmx4g -XX:ParallelGCThreads=4 ${toolDir}picard/${picardVersion}/CollectMultipleMetrics.jar \
I=${sortedBam} \
O=${collectMultipleMetricsPrefix} \
R=${onekgGenomeFasta} \
PROGRAM=CollectAlignmentSummaryMetrics \
PROGRAM=QualityScoreDistribution \
PROGRAM=MeanQualityByCycle \
$insertSizeMetrics \
TMP_DIR=${collectMultipleMetricsDir}
then
echo "returncode: $?";
putFile ${collectMultipleMetricsPrefix}.alignment_summary_metrics
putFile ${collectMultipleMetricsPrefix}.quality_by_cycle_metrics
putFile ${collectMultipleMetricsPrefix}.quality_by_cycle.pdf
putFile ${collectMultipleMetricsPrefix}.quality_distribution_metrics
putFile ${collectMultipleMetricsPrefix}.quality_distribution.pdf

if [ ${#reads2FqGz} -ne 0 ]; then
putFile ${collectMultipleMetricsPrefix}.insert_size_histogram.pdf
putFile ${collectMultipleMetricsPrefix}.insert_size_metrics
fi
echo "succes moving files";
else
echo "returncode: $?";
echo "fail";
fi


echo "## "$(date)" ## $0 Done "
54 changes: 54 additions & 0 deletions compute5/PROseq/protocols/CollectRnaSeqMetrics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#MOLGENIS walltime=23:59:00 mem=8gb nodes=1 ppn=4

#string stage
#string checkStage
#string picardVersion
#string sortedBam
#string sortedBai
#string collectRnaSeqMetricsDir
#string collectRnaSeqMetrics
#string collectRnaSeqMetricsChart
#string genesRefFlat
#string rRnaIntervalList
#string onekgGenomeFasta
#string toolDir
#string RVersion

getFile ${sortedBam}
getFile ${sortedBai}

#Load module
${stage} picard/${picardVersion}
${stage} R/${RVersion}

#Check modules
${checkStage}

mkdir -p ${collectRnaSeqMetricsDir}

echo "## "$(date)" ## $0 Started "

if java -Xmx8g -XX:ParallelGCThreads=4 -jar ${toolDir}picard/${picardVersion}/CollectRnaSeqMetrics.jar \
INPUT=${sortedBam} \
OUTPUT=${collectRnaSeqMetrics} \
CHART_OUTPUT=${collectRnaSeqMetricsChart} \
METRIC_ACCUMULATION_LEVEL=SAMPLE \
METRIC_ACCUMULATION_LEVEL=READ_GROUP \
REFERENCE_SEQUENCE=${onekgGenomeFasta} \
REF_FLAT=${genesRefFlat} \
RIBOSOMAL_INTERVALS=${rRnaIntervalList} \
STRAND_SPECIFICITY=NONE \
TMP_DIR=${collectRnaSeqMetricsDir}

then
echo "returncode: $?";
putFile ${collectRnaSeqMetrics}
putFile ${collectRnaSeqMetricsChart}

echo "succes moving files";
else
echo "returncode: $?";
echo "fail";
fi

echo "## "$(date)" ## $0 Done "
58 changes: 58 additions & 0 deletions compute5/PROseq/protocols/CombineBedFiles.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#MOLGENIS walltime=23:59:00 mem=6gb ppn=4

#string stage
#string checkStage
#string projectDir
#list genotypeHarminzerOutput
#string combinedBEDDir
#string plinkVersion
#string genotypeHarmonizerDir



getFile ${genotypeHarminzerOutput}.bed
getFile ${genotypeHarminzerOutput}.bim
getFile ${genotypeHarminzerOutput}.fam
getFile ${genotypeHarminzerOutput}.log

#Load module
${stage} PLINK/${plinkVersion}

#Check staging of module
${checkStage}

mkdir -p ${combinedBEDDir}


echo "## "$(date)" ## $0 Started "

{
echo "$(printf '%s.bed %s.bim %s.fam\n' $(printf '%s\n' ${genotypeHarminzerOutput[@]}) $(printf '%s\n' ${genotypeHarminzerOutput[@]}) $(printf '%s\n' ${genotypeHarminzerOutput[@]}))"
} > ${combinedBEDDir}combinedFiles.txt.tmp
# remove first line (e.g. first sample) as this will be used as input for plink
# to which the other samples will be merged
sed '1d' ${combinedBEDDir}combinedFiles.txt.tmp > ${combinedBEDDir}combinedFiles.txt
rm ${combinedBEDDir}combinedFiles.txt.tmp

if plink \
--bfile ${genotypeHarminzerOutput[0]} \
--merge-list ${combinedBEDDir}combinedFiles.txt \
--make-bed \
--out ${combinedBEDDir}combinedFiles

then
echo "returncode: $?";
putFile ${combinedBEDDir}combinedFiles.txt
putFile ${combinedBEDDir}combinedFiles.log
putFile ${combinedBEDDir}combinedFiles.bed
putFile ${combinedBEDDir}combinedFiles.bim
putFile ${combinedBEDDir}combinedFiles.fam
putFile ${combinedBEDDir}combinedFiles.nosex

echo "succes moving files";
else
echo "returncode: $?";
echo "fail";
fi

echo "## "$(date)" ## $0 Done "
103 changes: 103 additions & 0 deletions compute5/PROseq/protocols/Fastqc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#MOLGENIS nodes=1 ppn=1 mem=1gb walltime=10:00:00

#string stage
#string checkStage
#string fastqcVersion
#string WORKDIR
#string projectDir
#string fastqcDir
#string fastqcZipExt
#string pairedEndfastqcZip1
#string pairedEndfastqcZip2
#string reads1FqGz
#string reads2FqGz
#string sampleName

echo -e "test ${reads1FqGz} ${reads2FqGz} 1: $(basename ${reads1FqGz} .gz)${fastqcZipExt} \n2: $(basename ${reads2FqGz} .gz)${fastqcZipExt} "

${stage} FastQC/${fastqcVersion}
${checkStage}

set -e

echo "## "$(date)" ## $0 Started "

if [ ${#reads2FqGz} -eq 0 ]; then

echo "## "$(date)" Started single end fastqc"
alloutputsexist \
${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt} \
${singleEndfastqcZip}

getFile ${reads1FqGz}

mkdir -p ${fastqcDir}
cd ${fastqcDir}

##################################################################
echo
echo "## "$(date)" reads1FqGz"
if fastqc --noextract ${reads1FqGz} --outdir ${fastqcDir}
echo
cp -v ${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt} ${singleEndfastqcZip}

##################################################################

cd $OLDPWD
then
echo "returncode: $?";
putFile ${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt}
putFile ${singleEndfastqcZip}
echo "succes moving files";
else
echo "returncode: $?";
echo "fail";
fi


else
echo "## "$(date)" Started paired end fastqc"

alloutputsexist \
${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt} \
${fastqcDir}/$(basename ${reads2FqGz} .gz)${fastqcZipExt} \
${pairedEndfastqcZip1} \
${pairedEndfastqcZip2}

getFile ${reads1FqGz}
getFile ${reads2FqGz}

mkdir -p ${fastqcDir}
cd ${fastqcDir}

##################################################################
echo
echo "## "$(date)" reads1FqGz"
if fastqc --noextract ${reads1FqGz} --outdir ${fastqcDir}

cp -v ${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt} ${pairedEndfastqcZip1}
echo
echo "## "$(date)" reads2FqGz"
fastqc --noextract ${reads2FqGz} --outdir ${fastqcDir}
echo
cp -v ${fastqcDir}/$(basename ${reads2FqGz} .gz)${fastqcZipExt} ${pairedEndfastqcZip2}

##################################################################
cd $OLDPWD

then
echo "returncode: $?";
putFile ${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt}
putFile ${fastqcDir}/$(basename ${reads2FqGz} .gz)${fastqcZipExt}
putFile ${pairedEndfastqcZip1}
putFile ${pairedEndfastqcZip2}

echo "succes moving files";
else
echo "returncode: $?";
echo "fail";
fi
fi


echo "## "$(date)" ## $0 Done "
Loading

0 comments on commit d77ab11

Please sign in to comment.