-
Notifications
You must be signed in to change notification settings - Fork 593
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enabled GVCF type filtering support in SelectVariants (#7193)
- Added optional argument `--ignore-non-ref-in-types` to support correct handling of VariantContexts that contain a NON_REF allele - Default behavior does not change - Note that this only enables correct handling of GVCF input. The filtered output files are VCF (not GVCF) files, since reference blocks are not extended when a variant is filtered out - Added integration test
- Loading branch information
1 parent
a88ac4e
commit 692e7a3
Showing
5 changed files
with
104 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
...s/variantutils/SelectVariants/expected/testSelectVariants_VariantTypeSelectionForGVCF.vcf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
##fileformat=VCFv4.2 | ||
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location"> | ||
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block"> | ||
##FORMAT=<ID=MIN_GQ,Number=1,Type=Integer,Description="Minimum GQ observed within the GVCF block"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> | ||
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias."> | ||
##GVCFBlock=minGQ=0(inclusive),maxGQ=5(exclusive) | ||
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> | ||
##INFO=<ID=BLOCK_SIZE,Number=1,Type=Integer,Description="Size of the homozygous reference GVCF block"> | ||
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> | ||
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval"> | ||
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> | ||
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> | ||
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> | ||
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> | ||
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> | ||
##contig=<ID=chr20,length=64444167> | ||
##source=SelectVariants | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA1 | ||
chr20 69511 . A G,<NON_REF> 2253.77 . BaseQRankSum=1.169;DP=82;MQ=31.05;MQ0=0;MQRankSum=-0.866;ReadPosRankSum=1.689 GT:AD:DP:GQ:PL:SB 1/1:1,79,0:80:99:2284,207,0,2287,237,2316:0,1,46,33 | ||
chr20 69635 . A T,<NON_REF> 60.77 . BaseQRankSum=0.937;DP=7;MQ=34.15;MQ0=0;MQRankSum=1.300;ReadPosRankSum=1.754 GT:AD:DP:GQ:PL:SB 0/1:4,3,0:7:10:10,0,119,101,128,229:0,4,0,3 | ||
chr20 69785 . T G,<NON_REF> 2253.77 . BaseQRankSum=1.169;DP=82;MQ=31.05;MQ0=0;MQRankSum=-0.866;ReadPosRankSum=1.689 GT:DP:GQ:PL:SB 1/1:80:99:2284,207,0,2287,237,2316:0,1,46,33 |
35 changes: 35 additions & 0 deletions
35
...org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/gvcfExample.g.vcf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
##fileformat=VCFv4.1 | ||
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location"> | ||
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block"> | ||
##FORMAT=<ID=MIN_GQ,Number=1,Type=Integer,Description="Minimum GQ observed within the GVCF block"> | ||
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> | ||
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias."> | ||
##GVCFBlock=minGQ=0(inclusive),maxGQ=5(exclusive) | ||
##INFO=<ID=BLOCK_SIZE,Number=1,Type=Integer,Description="Size of the homozygous reference GVCF block"> | ||
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> | ||
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> | ||
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval"> | ||
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> | ||
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> | ||
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> | ||
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> | ||
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> | ||
##contig=<ID=chr20,length=64444167,assembly=Homo_sapiens_assembly38.fasta> | ||
##source=SelectVariants | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA1 | ||
chr20 69485 . G <NON_REF> . . BLOCK_SIZE=20;END=69510 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:94:99:82:99:0,120,1800 | ||
chr20 69511 . A G,<NON_REF> 2253.77 . BaseQRankSum=1.169;DP=82;MQ=31.05;MQ0=0;MQRankSum=-0.866;ReadPosRankSum=1.689 GT:AD:DP:GQ:PL:SB 1/1:1,79,0:80:99:2284,207,0,2287,237,2316:0,1,46,33 | ||
chr20 69512 . A <NON_REF> . . BLOCK_SIZE=10;END=69521 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:96:99:82:99:0,120,1800 | ||
chr20 69522 . A <NON_REF> . . BLOCK_SIZE=27;END=69548 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:95:0:95:0:0,0,0 | ||
chr20 69549 . T <NON_REF> . . BLOCK_SIZE=86;END=69634 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:6:16:5:16:0,16,90 | ||
chr20 69635 . A T,<NON_REF> 60.77 . BaseQRankSum=0.937;DP=7;MQ=34.15;MQ0=0;MQRankSum=1.300;ReadPosRankSum=1.754 GT:AD:DP:GQ:PL:SB 0/1:4,3,0:7:10:10,0,119,101,128,229:0,4,0,3 | ||
chr20 69762 . A <NON_REF> . . BLOCK_SIZE=1;END=69762 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:18:7:18:0,18,270 | ||
chr20 69763 . A <NON_REF> . . BLOCK_SIZE=4;END=69766 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:21:7:21:0,21,253 | ||
chr20 69767 . A <NON_REF> . . BLOCK_SIZE=4;END=69770 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:12:7:12:0,12,180 | ||
chr20 69771 . TAAAAA T,<NON_REF> 60.77 . BaseQRankSum=0.937;DP=7;MQ=34.15;MQ0=0;MQRankSum=1.300;ReadPosRankSum=1.754 GT:AD:DP:GQ:PL:SB 0/1:0,0,0:0:20:20,0,119,101,128,229:0,4,0,3 | ||
chr20 69777 . G <NON_REF> . . BLOCK_SIZE=10;END=69783 GT:DP:GQ:MIN_DP:MIN_GQ:PL 0/0:7:0:3:0:0,0,0 | ||
chr20 69785 . T G,<NON_REF> 2253.77 . BaseQRankSum=1.169;DP=82;MQ=31.05;MQ0=0;MQRankSum=-0.866;ReadPosRankSum=1.689 GT:DP:GQ:PL:SB 1/1:80:99:2284,207,0,2287,237,2316:0,1,46,33 |