diff --git a/README.md b/README.md index e1b07997..10affd22 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@

NGS_DNA pipeline

+

Manual

Find manual on installation and use at https://molgenis.gitbooks.io/molgenis-pipelines/ @@ -7,6 +8,7 @@ The sequencer is producing reads (in FastQ format) and are aligned to the hg19 r Sambamba (Tarasov et al.2) is processing the aligned reads and then we applied GATK (McKenna et al. 3) duplicate removal, performed SNP and INDEL discovery and genotyping using standard hard filtering parameters to GATK Best Practices recommendations (Van der Auwera et al.4) +

References

1. Li Durbin, Fast and accurate short read alignment with Burrows-Wheeler transform. 2. Sambamba: Fast processing of NGS alignment formats diff --git a/generate_template.sh b/generate_template.sh index a51ee452..c5d6b4a0 100755 --- a/generate_template.sh +++ b/generate_template.sh @@ -1,94 +1,109 @@ #!/bin/bash module load NGS_DNA/3.4.1 -module list -HOST=$(hostname -s) -thisDir=$(pwd) +module list +host=$(hostname -s) +environmentParameters="parameters_${host}" + +function showHelp() { + # + # Display commandline help on STDOUT. + # + cat < ${GENSCRIPTS}/tmpdir_parameters.csv - -perl ${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl ${GENSCRIPTS}/tmpdir_parameters.csv > \ -${GENSCRIPTS}/tmpdir_parameters_converted.csv - -perl ${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl ${EBROOTNGS_DNA}/parameters.csv > \ -${GENSCRIPTS}/out.csv - -perl ${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl ${EBROOTNGS_DNA}/parameters_${GROUP}.csv > \ -${GENSCRIPTS}/group_parameters.csv - -perl ${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl ${EBROOTNGS_DNA}/${ENVIRONMENT_PARAMETERS} > \ -${GENSCRIPTS}/environment_parameters.csv - - -sh $EBROOTMOLGENISMINCOMPUTE/molgenis_compute.sh \ --p ${GENSCRIPTS}/out.csv \ --p ${GENSCRIPTS}/group_parameters.csv \ --p ${GENSCRIPTS}/environment_parameters.csv \ --p ${GENSCRIPTS}/tmpdir_parameters_converted.csv \ --p ${EBROOTNGS_DNA}/batchIDList${BATCH}.csv \ --p ${GENSCRIPTS}/${PROJECT}.csv \ --w ${EBROOTNGS_DNA}/create_in-house_ngs_projects_workflow.csv \ --rundir ${GENSCRIPTS}/scripts \ ---runid ${RUNID} \ --o "workflowpath=${WORKFLOW};\ -outputdir=scripts/jobs;mainParameters=${GENSCRIPTS}/out.csv;\ -group_parameters=${GENSCRIPTS}/group_parameters.csv;\ -groupname=${GROUP};\ +if [ -f "${genScripts}/out.csv" ];then rm -rf "${genScripts}/out.csv" ; fi + +echo "tmpName,${tmpDirectory}" > ${genScripts}/tmpdir_parameters.csv +perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${genScripts}/tmpdir_parameters.csv" > "${genScripts}/tmpdir_parameters_converted.csv" +perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/parameters.csv" > "${genScripts}/out.csv" +perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/parameters_${group}.csv" > "${genScripts}/group_parameters.csv" +perl "${EBROOTNGS_DNA}/scripts/convertParametersGitToMolgenis.pl" "${EBROOTNGS_DNA}/${environmentParameters}.csv" > "${genScripts}/environment_parameters.csv" + +echo "BATCHIDLIST=${EBROOTNGS_DNA}/batchIDList${batch}.csv" + +sh "${EBROOTMOLGENISMINCOMPUTE}/molgenis_compute.sh" \ +-p "${genScripts}/out.csv" \ +-p "${genScripts}/group_parameters.csv" \ +-p "${genScripts}/environment_parameters.csv" \ +-p "${genScripts}/tmpdir_parameters_converted.csv" \ +-p "${EBROOTNGS_DNA}/batchIDList${batch}.csv" \ +-p "${genScripts}/${project}.csv" \ +-w "${EBROOTNGS_DNA}/create_in-house_ngs_projects_workflow.csv" \ +-rundir "${genScripts}/scripts" \ +--runid "${runID}" \ +-o workflowpath="${workflow};\ +outputdir=scripts/jobs;mainParameters=${genScripts}/out.csv;\ +group_parameters=${genScripts}/group_parameters.csv;\ +groupname=${group};\ ngsversion=$(module list | grep -o -P 'NGS_DNA(.+)');\ -environment_parameters=${GENSCRIPTS}/environment_parameters.csv;\ -tmpdir_parameters=${GENSCRIPTS}/tmpdir_parameters_converted.csv;\ -batchIDList=${EBROOTNGS_DNA}/batchIDList${BATCH}.csv;\ -worksheet=${GENSCRIPTS}/${PROJECT}.csv" \ +environment_parameters=${genScripts}/environment_parameters.csv;\ +tmpdir_parameters=${genScripts}/tmpdir_parameters_converted.csv;\ +batchIDList=${EBROOTNGS_DNA}/batchIDList${batch}.csv;\ +worksheet=${genScripts}/${project}.csv" \ -weave \ --generate - diff --git a/protocols/CreateExternSamplesProjects.sh b/protocols/CreateExternSamplesProjects.sh index b298fe7a..972a19b9 100755 --- a/protocols/CreateExternSamplesProjects.sh +++ b/protocols/CreateExternSamplesProjects.sh @@ -38,8 +38,8 @@ set -e set -u umask 0007 -module load $ngsUtilsVersion -module load $ngsversion +module load ${ngsUtilsVersion} +module load ${ngsversion} module list # diff --git a/test/test_pipeline.sh b/test/test_pipeline.sh index 2eaebacc..29e7b134 100644 --- a/test/test_pipeline.sh +++ b/test/test_pipeline.sh @@ -57,23 +57,28 @@ cp generate_template.sh ${workfolder}/generatedscripts/PlatinumSubset/generate_t fgrep "computeVersion," parameters.csv > ${workfolder}/generatedscripts/PlatinumSubset/mcVersion.txt NGS_DNA_VERSION=NGS_DNA/3.4.1 -module load $NGS_DNA_VERSION -perl -pi -e "s|module load $NGS_DNA_VERSION|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|" ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh -perl -pi -e 's|PROJECT=projectXX|PROJECT=PlatinumSubset|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh -perl -pi -e 's|RUNID=runXX|RUNID=run01|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh +module load ${NGS_DNA_VERSION} +EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/ + +perl -pi -e "s|module load ${NGS_DNA_VERSION}|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|" ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh +echo "perl -pi -e |module load ${NGS_DNA_VERSION}|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/| ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh" perl -pi -e 's|ngsversion=.*|ngsversion="test";\\|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh perl -pi -e 's|create_in-house_ngs_projects_workflow.csv|create_external_samples_ngs_projects_workflow.csv|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh perl -pi -e 's|sh \$EBROOTMOLGENISMINCOMPUTE/molgenis_compute.sh|module load Molgenis-Compute/dummy\nsh \$EBROOTMOLGENISMINCOMPUTE/molgenis_compute.sh|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh perl -pi -e "s|module load Molgenis-Compute/dummy|module load Molgenis-Compute/\$mcVersion|" ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh -perl -pi -e 's|WORKFLOW=\${EBROOTNGS_DNA}/workflow.csv|WORKFLOW=\${EBROOTNGS_DNA}/test_workflow.csv|' ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh + +perl -pi -e 's|workflow=\${EBROOTNGS_DNA}/workflow.csv|workflow=${EBROOTNGS_DNA}/test_workflow.csv|" ${workfolder}/generatedscripts/PlatinumSubset/generate_template.sh cp test/PlatinumSubset.csv ${workfolder}/generatedscripts/PlatinumSubset/ cd ${workfolder}/generatedscripts/PlatinumSubset/ -sh generate_template.sh +sh generate_template.sh cd scripts -perl -pi -e 's|module load \$ngsversion|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/\n|' *.sh +###### Load a version of molgenis compute +perl -pi -e "s|module load test|module load ${NGS_DNA_VERSION}| +###### +perl -pi -e "s|/apps/software/${NGS_DNA_VERSION}/|/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|g" *.sh sh submit.sh cd ${workfolder}/projects/PlatinumSubset/run01/jobs/ @@ -88,8 +93,8 @@ for i in $(ls s*_GenderCheck_1.sh); do touch $i.finished ; touch ${i%.*}.env; ch for i in $(ls s*_GenderCalculate_1.sh); do touch $i.finished ; touch ${i%.*}.env; chmod 755 ${i%.*}.env ;done printf "This is a male\n" > //groups/umcg-gaf//tmp04//tmp//PlatinumSubset/run01//PlatinumSample_NA12891.chosenSex.txt printf "Male\n" >> //groups/umcg-gaf//tmp04//tmp//PlatinumSubset/run01//PlatinumSample_NA12891.chosenSex.txt -perl -pi -e 's|module load test|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|' s*_QCStats_*.sh -perl -pi -e 's|module load test|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|' s*_DecisionTree_*.sh +perl -pi -e 's|module load test|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|' s*_QCStats_*.sh +perl -pi -e 's|module load test|EBROOTNGS_DNA=/groups/umcg-gaf/tmp04/tmp/NGS_DNA/|' s*_DecisionTree_*.sh perl -pi -e 's|module load test|#|' s*_QCReport_0.sh perl -pi -e 's|countShScripts-3\)\)|countShScripts-4))|' s*_CountAllFinishedFiles_0.sh perl -pi -e 's|--time=16:00:00|--time=05:59:00|' *.sh