-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #236 from npklein/master
Make PROseq pipeline folder
- Loading branch information
Showing
19 changed files
with
1,213 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#MOLGENIS walltime=23:59:00 mem=6gb nodes=1 ppn=4 | ||
|
||
#Parameter mapping #why not string foo,bar? instead of string foo\nstring bar | ||
#string stage | ||
#string checkStage | ||
#string starVersion | ||
#string WORKDIR | ||
#string projectDir | ||
|
||
#string picardVersion | ||
#string starAlignmentPassTwoDir | ||
#string sampleName | ||
#string internalId | ||
|
||
|
||
#string addOrReplaceGroupsDir | ||
#string addOrReplaceGroupsBam | ||
#string addOrReplaceGroupsBai | ||
|
||
|
||
|
||
echo "## "$(date)" ## $0 Started " | ||
|
||
getFile ${starAlignmentPassTwoDir}/Aligned.out.sam | ||
|
||
${stage} picard-tools/${picardVersion} | ||
${checkStage} | ||
|
||
set -x | ||
set -e | ||
|
||
mkdir -p ${addOrReplaceGroupsDir} | ||
|
||
echo "## "$(date)" Start $0" | ||
|
||
if java -Xmx6g -XX:ParallelGCThreads=4 -jar $PICARD_HOME/AddOrReplaceReadGroups.jar \ | ||
INPUT=${starAlignmentPassTwoDir}/Aligned.out.sam \ | ||
OUTPUT=${addOrReplaceGroupsBam} \ | ||
SORT_ORDER=coordinate \ | ||
RGID=${internalId} \ | ||
RGLB=${sampleName}_${internalId} \ | ||
RGPL=ILLUMINA \ | ||
RGPU=${sampleName}_${internalId}_${internalId} \ | ||
RGSM=${sampleName} \ | ||
RGDT=$(date --rfc-3339=date) \ | ||
CREATE_INDEX=true \ | ||
MAX_RECORDS_IN_RAM=4000000 \ | ||
TMP_DIR=${addOrReplaceGroupsDir} \ | ||
|
||
then | ||
echo "returncode: $?"; | ||
|
||
putFile ${addOrReplaceGroupsBam} | ||
putFile ${addOrReplaceGroupsBai} | ||
|
||
echo "succes moving files"; | ||
else | ||
echo "returncode: $?"; | ||
echo "fail"; | ||
fi | ||
|
||
echo "## "$(date)" ## $0 Done " | ||
|
||
|
||
|
||
echo "## "$(date)" ## $0 Done " |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#MOLGENIS walltime=23:59:00 mem=4gb nodes=1 ppn=4 | ||
|
||
#string stage | ||
#string checkStage | ||
#string picardVersion | ||
#string RVersion | ||
#string reads2FqGz | ||
#string collectMultipleMetricsDir | ||
#string collectMultipleMetricsPrefix | ||
#string onekgGenomeFasta | ||
#string sortedBam | ||
#string sortedBai | ||
#string toolDir | ||
|
||
|
||
getFile ${sortedBam} | ||
getFile ${sortedBai} | ||
getFile ${onekgGenomeFasta} | ||
|
||
|
||
#load modules | ||
${stage} picard/${picardVersion} | ||
|
||
#Check modules | ||
${checkStage} | ||
|
||
mkdir -p ${collectMultipleMetricsDir}_QC | ||
|
||
echo "## "$(date)" Start $0" | ||
|
||
insertSizeMetrics="" | ||
if [ ${#reads2FqGz} -ne 0 ]; then | ||
insertSizeMetrics="PROGRAM=CollectInsertSizeMetrics" | ||
fi | ||
|
||
#Run Picard CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, QualityScoreDistribution and MeanQualityByCycle | ||
if java -jar -Xmx4g -XX:ParallelGCThreads=4 ${toolDir}picard/${picardVersion}/CollectMultipleMetrics.jar \ | ||
I=${sortedBam} \ | ||
O=${collectMultipleMetricsPrefix} \ | ||
R=${onekgGenomeFasta} \ | ||
PROGRAM=CollectAlignmentSummaryMetrics \ | ||
PROGRAM=QualityScoreDistribution \ | ||
PROGRAM=MeanQualityByCycle \ | ||
$insertSizeMetrics \ | ||
TMP_DIR=${collectMultipleMetricsDir} | ||
then | ||
echo "returncode: $?"; | ||
putFile ${collectMultipleMetricsPrefix}.alignment_summary_metrics | ||
putFile ${collectMultipleMetricsPrefix}.quality_by_cycle_metrics | ||
putFile ${collectMultipleMetricsPrefix}.quality_by_cycle.pdf | ||
putFile ${collectMultipleMetricsPrefix}.quality_distribution_metrics | ||
putFile ${collectMultipleMetricsPrefix}.quality_distribution.pdf | ||
|
||
if [ ${#reads2FqGz} -ne 0 ]; then | ||
putFile ${collectMultipleMetricsPrefix}.insert_size_histogram.pdf | ||
putFile ${collectMultipleMetricsPrefix}.insert_size_metrics | ||
fi | ||
echo "succes moving files"; | ||
else | ||
echo "returncode: $?"; | ||
echo "fail"; | ||
fi | ||
|
||
|
||
echo "## "$(date)" ## $0 Done " |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#MOLGENIS walltime=23:59:00 mem=8gb nodes=1 ppn=4 | ||
|
||
#string stage | ||
#string checkStage | ||
#string picardVersion | ||
#string sortedBam | ||
#string sortedBai | ||
#string collectRnaSeqMetricsDir | ||
#string collectRnaSeqMetrics | ||
#string collectRnaSeqMetricsChart | ||
#string genesRefFlat | ||
#string rRnaIntervalList | ||
#string onekgGenomeFasta | ||
#string toolDir | ||
#string RVersion | ||
|
||
getFile ${sortedBam} | ||
getFile ${sortedBai} | ||
|
||
#Load module | ||
${stage} picard/${picardVersion} | ||
${stage} R/${RVersion} | ||
|
||
#Check modules | ||
${checkStage} | ||
|
||
mkdir -p ${collectRnaSeqMetricsDir} | ||
|
||
echo "## "$(date)" ## $0 Started " | ||
|
||
if java -Xmx8g -XX:ParallelGCThreads=4 -jar ${toolDir}picard/${picardVersion}/CollectRnaSeqMetrics.jar \ | ||
INPUT=${sortedBam} \ | ||
OUTPUT=${collectRnaSeqMetrics} \ | ||
CHART_OUTPUT=${collectRnaSeqMetricsChart} \ | ||
METRIC_ACCUMULATION_LEVEL=SAMPLE \ | ||
METRIC_ACCUMULATION_LEVEL=READ_GROUP \ | ||
REFERENCE_SEQUENCE=${onekgGenomeFasta} \ | ||
REF_FLAT=${genesRefFlat} \ | ||
RIBOSOMAL_INTERVALS=${rRnaIntervalList} \ | ||
STRAND_SPECIFICITY=NONE \ | ||
TMP_DIR=${collectRnaSeqMetricsDir} | ||
|
||
then | ||
echo "returncode: $?"; | ||
putFile ${collectRnaSeqMetrics} | ||
putFile ${collectRnaSeqMetricsChart} | ||
|
||
echo "succes moving files"; | ||
else | ||
echo "returncode: $?"; | ||
echo "fail"; | ||
fi | ||
|
||
echo "## "$(date)" ## $0 Done " |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#MOLGENIS walltime=23:59:00 mem=6gb ppn=4 | ||
|
||
#string stage | ||
#string checkStage | ||
#string projectDir | ||
#list genotypeHarminzerOutput | ||
#string combinedBEDDir | ||
#string plinkVersion | ||
#string genotypeHarmonizerDir | ||
|
||
|
||
|
||
getFile ${genotypeHarminzerOutput}.bed | ||
getFile ${genotypeHarminzerOutput}.bim | ||
getFile ${genotypeHarminzerOutput}.fam | ||
getFile ${genotypeHarminzerOutput}.log | ||
|
||
#Load module | ||
${stage} PLINK/${plinkVersion} | ||
|
||
#Check staging of module | ||
${checkStage} | ||
|
||
mkdir -p ${combinedBEDDir} | ||
|
||
|
||
echo "## "$(date)" ## $0 Started " | ||
|
||
{ | ||
echo "$(printf '%s.bed %s.bim %s.fam\n' $(printf '%s\n' ${genotypeHarminzerOutput[@]}) $(printf '%s\n' ${genotypeHarminzerOutput[@]}) $(printf '%s\n' ${genotypeHarminzerOutput[@]}))" | ||
} > ${combinedBEDDir}combinedFiles.txt.tmp | ||
# remove first line (e.g. first sample) as this will be used as input for plink | ||
# to which the other samples will be merged | ||
sed '1d' ${combinedBEDDir}combinedFiles.txt.tmp > ${combinedBEDDir}combinedFiles.txt | ||
rm ${combinedBEDDir}combinedFiles.txt.tmp | ||
|
||
if plink \ | ||
--bfile ${genotypeHarminzerOutput[0]} \ | ||
--merge-list ${combinedBEDDir}combinedFiles.txt \ | ||
--make-bed \ | ||
--out ${combinedBEDDir}combinedFiles | ||
|
||
then | ||
echo "returncode: $?"; | ||
putFile ${combinedBEDDir}combinedFiles.txt | ||
putFile ${combinedBEDDir}combinedFiles.log | ||
putFile ${combinedBEDDir}combinedFiles.bed | ||
putFile ${combinedBEDDir}combinedFiles.bim | ||
putFile ${combinedBEDDir}combinedFiles.fam | ||
putFile ${combinedBEDDir}combinedFiles.nosex | ||
|
||
echo "succes moving files"; | ||
else | ||
echo "returncode: $?"; | ||
echo "fail"; | ||
fi | ||
|
||
echo "## "$(date)" ## $0 Done " |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#MOLGENIS nodes=1 ppn=1 mem=1gb walltime=10:00:00 | ||
|
||
#string stage | ||
#string checkStage | ||
#string fastqcVersion | ||
#string WORKDIR | ||
#string projectDir | ||
#string fastqcDir | ||
#string fastqcZipExt | ||
#string pairedEndfastqcZip1 | ||
#string pairedEndfastqcZip2 | ||
#string reads1FqGz | ||
#string reads2FqGz | ||
#string sampleName | ||
|
||
echo -e "test ${reads1FqGz} ${reads2FqGz} 1: $(basename ${reads1FqGz} .gz)${fastqcZipExt} \n2: $(basename ${reads2FqGz} .gz)${fastqcZipExt} " | ||
|
||
${stage} FastQC/${fastqcVersion} | ||
${checkStage} | ||
|
||
set -e | ||
|
||
echo "## "$(date)" ## $0 Started " | ||
|
||
if [ ${#reads2FqGz} -eq 0 ]; then | ||
|
||
echo "## "$(date)" Started single end fastqc" | ||
alloutputsexist \ | ||
${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt} \ | ||
${singleEndfastqcZip} | ||
|
||
getFile ${reads1FqGz} | ||
|
||
mkdir -p ${fastqcDir} | ||
cd ${fastqcDir} | ||
|
||
################################################################## | ||
echo | ||
echo "## "$(date)" reads1FqGz" | ||
if fastqc --noextract ${reads1FqGz} --outdir ${fastqcDir} | ||
echo | ||
cp -v ${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt} ${singleEndfastqcZip} | ||
|
||
################################################################## | ||
|
||
cd $OLDPWD | ||
then | ||
echo "returncode: $?"; | ||
putFile ${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt} | ||
putFile ${singleEndfastqcZip} | ||
echo "succes moving files"; | ||
else | ||
echo "returncode: $?"; | ||
echo "fail"; | ||
fi | ||
|
||
|
||
else | ||
echo "## "$(date)" Started paired end fastqc" | ||
|
||
alloutputsexist \ | ||
${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt} \ | ||
${fastqcDir}/$(basename ${reads2FqGz} .gz)${fastqcZipExt} \ | ||
${pairedEndfastqcZip1} \ | ||
${pairedEndfastqcZip2} | ||
|
||
getFile ${reads1FqGz} | ||
getFile ${reads2FqGz} | ||
|
||
mkdir -p ${fastqcDir} | ||
cd ${fastqcDir} | ||
|
||
################################################################## | ||
echo | ||
echo "## "$(date)" reads1FqGz" | ||
if fastqc --noextract ${reads1FqGz} --outdir ${fastqcDir} | ||
|
||
cp -v ${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt} ${pairedEndfastqcZip1} | ||
echo | ||
echo "## "$(date)" reads2FqGz" | ||
fastqc --noextract ${reads2FqGz} --outdir ${fastqcDir} | ||
echo | ||
cp -v ${fastqcDir}/$(basename ${reads2FqGz} .gz)${fastqcZipExt} ${pairedEndfastqcZip2} | ||
|
||
################################################################## | ||
cd $OLDPWD | ||
|
||
then | ||
echo "returncode: $?"; | ||
putFile ${fastqcDir}/$(basename ${reads1FqGz} .gz)${fastqcZipExt} | ||
putFile ${fastqcDir}/$(basename ${reads2FqGz} .gz)${fastqcZipExt} | ||
putFile ${pairedEndfastqcZip1} | ||
putFile ${pairedEndfastqcZip2} | ||
|
||
echo "succes moving files"; | ||
else | ||
echo "returncode: $?"; | ||
echo "fail"; | ||
fi | ||
fi | ||
|
||
|
||
echo "## "$(date)" ## $0 Done " |
Oops, something went wrong.