Skip to content

Commit

Permalink
added the stabilizing changes for pooled VCF formation
Browse files Browse the repository at this point in the history
  • Loading branch information
lskatz committed Jan 22, 2016
1 parent 1f8efe0 commit f242115
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 8 deletions.
3 changes: 2 additions & 1 deletion scripts/launch_set.pl
Original file line number Diff line number Diff line change
Expand Up @@ -703,8 +703,9 @@ sub indexAndCompressVcf{
my($vcf,$holdjid,$settings)=@_;
my $j={};
eval{
# removing fixVcf because it is done internally in launch_vcf.pl
#set_fixVcf.pl --fail-sites --fail-samples --min_alt_frac $$settings{min_alt_frac} --min_coverage $$settings{min_coverage} '$vcf' > $vcf.reevaluated && mv '$vcf.reevaluated' '$vcf' && \
$j=$sge->pleaseExecute("
set_fixVcf.pl --min_alt_frac $$settings{min_alt_frac} --min_coverage $$settings{min_coverage} '$vcf' > $vcf.reevaluated && mv '$vcf.reevaluated' '$vcf' && \
vcf-sort < '$vcf' > '$vcf.sorted.tmp' && mv '$vcf.sorted.tmp' '$vcf' && \
bgzip -f '$vcf' && tabix '$vcf.gz'
",{qsubxopts=>"-hold_jid $holdjid",jobname=>"sortAndCompress",numcpus=>1});
Expand Down
4 changes: 3 additions & 1 deletion scripts/launch_varscan.pl
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ sub varscanWorker{
system("bgzip $vcf.tmp.vcf && tabix $vcf.tmp.vcf.gz");
die if $?;

# Fix the VCF and rename the sample
# Fix the VCF and rename the sample.
# Just do one CPU since this is already being
# called in a worker thread.
system("set_fixVcf.pl --numcpus 1 --min_coverage $$settings{coverage} --min_alt_frac $$settings{altFreq} --fail-samples --fail-sites --rename-sample $samplename $vcf.tmp.vcf.gz > $vcf");
die if $?;

Expand Down
18 changes: 16 additions & 2 deletions scripts/mergeVcf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ if [ "$TEMPDIR" == "" ]; then
TEMPDIR=$(mktemp --directory /tmp/mergeVcf.XXXXXX);
fi
mkdir -pv "$TEMPDIR"; # just in case
export TEMPDIR;

REGIONSFILE="$TEMPDIR/regions.txt";
touch $REGIONSFILE; #make sure this file is accessible
Expand Down Expand Up @@ -75,7 +76,14 @@ echo "$script: temporary directory is $TEMPDIR";
echo "$script: Running bcftools merge";
export IN;
export script;
echo "$REGION" | xargs -P $NUMCPUS -n 1 -I {} bash -c 'echo "$script: merging SNPs in {}"; out='$TEMPDIR'/merged.$$.vcf; bcftools merge --merge all --regions "{}" --force-samples -o $out $IN && bgzip $out && tabix $out.gz && echo "$script: finished with region {}";'
echo "$REGION" | xargs -P $NUMCPUS -n 1 -I {} bash -c '
echo "$script: merging SNPs in {}";
out='$TEMPDIR'/merged.$$.vcf;
bcftools merge --merge all --regions "{}" --force-samples -o $out $IN && \
bgzip $out && \
tabix $out.gz && \
echo "$script: finished with region {}";
'
if [ $? -gt 0 ]; then
echo "$script: ERROR with bcftools merge"
rm -rvf $TEMPDIR;
Expand All @@ -91,6 +99,12 @@ if [ $? -gt 0 ]; then
exit 1;
fi

# Run fixVcf.pl so that if all the samples in a site failed,
# then the site fails.
#bgzip $TEMPDIR/concat.vcf && tabix $TEMPDIR/concat.vcf.gz && \
#set_fixVcf.pl $TEMPDIR/concat.vcf > $TEMPDIR/fixed.vcf && \
#mv $TEMPDIR/fixed.vcf $TEMPDIR/concat.vcf

# Generate a SNPs-only merged file, if requested
if [ "$ALSOSNPS" == 1 ]; then
echo "$script: parsing $TEMPDIR/concat.vcf for SNPs"
Expand All @@ -111,7 +125,7 @@ for VCF in $TEMPDIR/concat.vcf $TEMPDIR/hqPos.vcf; do
fi;

echo "$script: Sorting $VCF and removing indels";
bcftools annotate --include '%TYPE!="indel" && %FILTER!="isIndel"' < $VCF > $VCF.tmp && mv $VCF.tmp $VCF;
bcftools annotate --include '%TYPE!="indel"' < $VCF > $VCF.tmp && mv $VCF.tmp $VCF;

echo "$script: compressing $VCF";
bgzip $VCF
Expand Down
21 changes: 17 additions & 4 deletions scripts/set_fixVcf.pl
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,12 @@ sub fixVcfLine{
# Innocent until proven guilty
if($$settings{'pass-until-fail'}){
$y{FILTER}=['PASS'];
} else {
# If we are not assuming innocent until guilty,
# and if there is no value set yet, then it is FAIL
if(!defined($y{FILTER}[0]) || $y{FILTER}[0] eq '.'){
$y{FILTER}=['FAIL'];
}
}

# Fix up the samples' tags
Expand All @@ -290,7 +296,7 @@ sub fixVcfLine{
if(!$y{gtypes}{$samplename}{FT}){
push(@{ $y{FORMAT} }, 'FT');
# must set a default value in case it doesn't get set
$y{gtypes}{$samplename}{FT}='.';
$y{gtypes}{$samplename}{FT}='FAIL';
}
# Convert FILTER to FT tag
if($y{gtypes}{$samplename}{FT} eq '.'){
Expand All @@ -300,6 +306,12 @@ sub fixVcfLine{
# Innocent until proven guilty
if($$settings{'pass-until-fail'}){
$y{gtypes}{$samplename}{FT}='PASS';
}else{
# If we are not assuming innocent until guilty,
# and if there is no value set yet, then it is FAIL
if($y{gtypes}{$samplename}{FT} eq '.'){
$y{gtypes}{$samplename}{FT}='FAIL';
}
}
}

Expand Down Expand Up @@ -334,9 +346,10 @@ sub fixVcfLine{
}

sub usage{
"Fixes a given VCF by adding, removing, or editing fields. This script
will also reevaluate sites on whether they have passed filters using the FT tag.
However, the FILTER field will simply be set to 'PASS'.
"Fixes a given VCF by adding, removing, or editing fields.
Without any options, useless INFO tags and ALT nts are
removed. Additionally, each site will be failed if all samples
have failed in the FT tag field
Usage: $0 file.vcf.gz > fixed.vcf
--numcpus 1 Num CPUS currently has no effect on this script.
--tempdir /tmp/ Choose a temporary directory (optional)
Expand Down

0 comments on commit f242115

Please sign in to comment.