Skip to content

Commit

Permalink
Replace --allowMissingData with --errorIfMissingData (gives opposite …
Browse files Browse the repository at this point in the history
…default behavior as previously) and print NA for null object in VariantsToTable
  • Loading branch information
ronlevine committed Jul 28, 2017
1 parent 3426b82 commit 71a9b07
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
*
* <h3>Caveats</h3>
* <ul>
* <li>Some annotations cannot be applied to all variant sites, so VCFs typically contain records where some annotation values are missing. By default this tool throws an error if you request export of an annotation for which not all records have values. You can override this behavior by setting `--allowMissingData` in the command line. As a result, the tool will emit the special value NA for the missing annotations in those records.</li>
* <li>Some annotations cannot be applied to all variant sites, so VCFs typically contain records where some annotation values are missing. By default this tool will emit the special value NA for the missing annotations if you request export of an annotation for which not all records have values. You can override this behavior by setting --errorIfMissingData in the command line. As a result, the tool will throw an error if a record is missing a value.</li>
* <li>When you request export of FORMAT/sample-level annotations (such as GT), the annotations will be identified per-sample. If multiple samples are present in the VCF, the columns will be ordered alphabetically by sample name (SM tag).</li>
* </ul>
*/
Expand Down Expand Up @@ -136,18 +136,14 @@ public final class VariantsToTable extends VariantWalker {
private boolean moltenizeOutput = false;

/**
* By default, this tool throws a UserException error when it encounters a record that does not contain a value for one of the requested fields. This
* is generally useful when you mistype -F CHRM, so that you get a friendly warning about CHROM not being
* found before the tool runs through 40M records. However, in some cases you genuinely want to disable this behavior, for example to allow the use
* of fields for which not all records have a value (e.g., AC not being calculated for filtered records, if included). When provided, this argument
* will cause VariantsToTable to write out NA values for missing fields instead of throwing an error.
* Note that this flag only applies to standard columns (CHROM, ID, QUAL) and the INFO field and it does not apply to the genotype field.
* By default, this tool will write out NA values indicating missing data when it encounters a field without a value in a record.
* If this flag is added to the command, the tool will instead exit with an error if missing data is encountered.
*/
@Advanced
@Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", optional=true)
private boolean allowMissingData = false;
private static final String MISSING_DATA = "NA";
@Argument(fullName="errorIfMissingData", shortName="EMD", doc="If provided, we will require every record to contain every field", optional=true)
public boolean errorIfMissingData = false;

private static final String MISSING_DATA = "NA";

private SortedSet<String> samples;
private long nRecords = 0L;
Expand Down Expand Up @@ -282,36 +278,50 @@ private List<List<String>> extractFields(final VariantContext vc) {
final String val = wildVals.isEmpty() ? MISSING_DATA : Utils.join(",", wildVals);

addFieldValue(val, records);
} else if ( ! allowMissingData ) {
throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc));
} else {
addFieldValue(MISSING_DATA, records);
handleMissingData(errorIfMissingData, field, records, vc);
}
}

if ( addGenotypeFields ) {
addGenotypeFieldsToRecords(vc, records);
addGenotypeFieldsToRecords(vc, records, errorIfMissingData);
}

return records;
}

private void addGenotypeFieldsToRecords(final VariantContext vc, final List<List<String>> records) {
private void addGenotypeFieldsToRecords(final VariantContext vc, final List<List<String>> records, final boolean errorIfMissingData) {
for ( final String sample : samples ) {
for ( final String gf : genotypeFieldsToTake ) {
if ( vc.hasGenotype(sample) && vc.getGenotype(sample).hasAnyAttribute(gf) ) {
if (VCFConstants.GENOTYPE_KEY.equals(gf)) {
addFieldValue(vc.getGenotype(sample).getGenotypeString(true), records);
} else {
addFieldValue(vc.getGenotype(sample).getAnyAttribute(gf), records);
}
/**
* TODO - If gf == "FT" and the GT record is not filtered, Genotype.getAnyAttribute == null. Genotype.hasAnyAttribute should be changed so it
* returns false for this condition. Presently, it always returns true. Once this is fixed, then only the "addFieldValue" statement will
* remain in the following logic block.
*/
if (vc.getGenotype(sample).getAnyAttribute(gf) != null) {
addFieldValue(vc.getGenotype(sample).getAnyAttribute(gf), records);
} else {
handleMissingData(errorIfMissingData, gf, records, vc);
} }
} else {
addFieldValue(MISSING_DATA, records);
handleMissingData(errorIfMissingData, gf, records, vc);
}
}
}
}

private static void handleMissingData(final boolean errorIfMissingData, final String field, final List<List<String>> records, final VariantContext vc) {
if (errorIfMissingData) {
throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc));
} else {
addFieldValue(MISSING_DATA, records);
}
}

private static void addFieldValue(final Object val, final List<List<String>> result) {
final int numResultRecords = result.size();

Expand All @@ -336,6 +346,10 @@ else if ( (val instanceof List) && ((List)val).size() == numResultRecords ) {
}

private static String prettyPrintObject(final Object val) {
if ( val == null ) {
return "";
}

if ( val instanceof List ) {
return prettyPrintObject(((List) val).toArray());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@
import java.util.Arrays;

public final class VariantsToTableIntegrationTest extends CommandLineProgramTest {
private String variantsToTableCmd(String moreArgs) {
private String variantsToTableCmd(final String moreArgs) {
return " --variant " + getToolTestDataDir() + "soap_gatk_annotated.noChr_lines.vcf" +
" -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F FILTER -F TRANSITION -F DP -F SB -F set -F RankSumP -F refseq.functionalClass*" +
" -O %s" + moreArgs;
" -O %s " + moreArgs;
}

private String variantsToTableMultiAllelicCmd(String moreArgs) {
private String variantsToTableMultiAllelicCmd(final String moreArgs) {
return " --variant " + getToolTestDataDir() + "multiallelic.vcf" +
" -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F MULTI-ALLELIC -F AC -F AF" +
" -O %s" + moreArgs;
}

private String variantsToTableCmdNoSamples(String moreArgs) {
private String variantsToTableCmdNoSamples(final String moreArgs) {
return " --variant " + getToolTestDataDir() + "vcfexample.noSamples.vcf" +
" -O %s" + moreArgs;
}
Expand All @@ -46,11 +46,22 @@ public void testOutputFileFail() throws IOException {
@Test
public void testComplexVariantsToTableFail() throws IOException {
final IntegrationTestSpec spec = new IntegrationTestSpec(
variantsToTableCmd(""),
variantsToTableCmd("--errorIfMissingData"),
1, UserException.class);
spec.executeTest("testComplexVariantsToTable-FAIL", this);
}

@Test
public void testUnfilteredGenotypeFieldsFail() throws IOException {
final IntegrationTestSpec spec = new IntegrationTestSpec(
" --variant " + getToolTestDataDir() + "vcfexample2.vcf" +
" -GF RD -GF FT --errorIfMissingData" +
" -O %s",
1,
UserException.class);
spec.executeTest("testUnfilteredGenotypeFields-FAIL", this);
}

@Test
public void testNoSamples() throws IOException {
final IntegrationTestSpec spec = new IntegrationTestSpec(
Expand All @@ -71,8 +82,8 @@ public void testNoSamplesSoNoGenotypes() throws IOException {
@Test
public void testComplexVariantsToTable() throws IOException {
final IntegrationTestSpec spec = new IntegrationTestSpec(
variantsToTableCmd(" -AMD"),
Arrays.asList(getToolTestDataDir() + "expected.soap_gatk_annotated.noChr_lines.AMD.table"));
variantsToTableCmd(""),
Arrays.asList(getToolTestDataDir() + "expected.soap_gatk_annotated.noChr_lines.table"));
spec.executeTest("testComplexVariantsToTable", this);
}

Expand Down Expand Up @@ -102,6 +113,16 @@ public void testGenotypeFields() throws IOException {
spec.executeTest("testGenotypeFields", this);
}

@Test
public void testUnfilteredGenotypeFields() throws IOException {
final IntegrationTestSpec spec = new IntegrationTestSpec(
" --variant " + getToolTestDataDir() + "vcfexample2.vcf" +
" -GF RD -GF FT" +
" -O %s",
Arrays.asList(getToolTestDataDir() + "expected.vcfexample2.GF_RD.FT.table"));
spec.executeTest("testUnfilteredGenotypeFields", this);
}

@Test
public void testMultiallelicGenotypeFields() throws IOException {
final IntegrationTestSpec spec = new IntegrationTestSpec(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
NA06985.RD NA06985.FT NA06986.RD NA06986.FT NA06994.RD NA06994.FT NA07000.RD NA07000.FT NA07037.RD NA07037.FT NA07051.RD NA07051.FT NA07346.RD NA07346.FT NA07347.RD NA07347.FT NA07357.RD NA07357.FT NA10847.RD NA10847.FT NA10851.RD NA10851.FT NA11829.RD NA11829.FT NA11830.RD NA11830.FT NA11831.RD NA11831.FT NA11832.RD NA11832.FT NA11840.RD NA11840.FT NA11881.RD NA11881.FT NA11894.RD NA11894.FT NA11918.RD NA11918.FT NA11919.RD NA11919.FT NA11920.RD NA11920.FT NA11931.RD NA11931.FT NA11992.RD NA11992.FT NA11993.RD NA11993.FT NA11994.RD NA11994.FT NA11995.RD NA11995.FT NA12003.RD NA12003.FT NA12004.RD NA12004.FT NA12005.RD NA12005.FT NA12006.RD NA12006.FT NA12043.RD NA12043.FT NA12044.RD NA12044.FT NA12045.RD NA12045.FT NA12144.RD NA12144.FT NA12154.RD NA12154.FT NA12155.RD NA12155.FT NA12156.RD NA12156.FT NA12234.RD NA12234.FT NA12249.RD NA12249.FT NA12287.RD NA12287.FT NA12414.RD NA12414.FT NA12489.RD NA12489.FT NA12716.RD NA12716.FT NA12717.RD NA12717.FT NA12749.RD NA12749.FT NA12750.RD NA12750.FT NA12751.RD NA12751.FT NA12760.RD NA12760.FT NA12761.RD NA12761.FT NA12762.RD NA12762.FT NA12763.RD NA12763.FT NA12776.RD NA12776.FT NA12812.RD NA12812.FT NA12813.RD NA12813.FT NA12814.RD NA12814.FT NA12815.RD NA12815.FT NA12828.RD NA12828.FT NA12872.RD NA12872.FT NA12873.RD NA12873.FT NA12874.RD NA12874.FT
NA NA 10 NA 2 NA 5 NA 1 NA 9 NA 3 NA 3 NA 3 NA 4 NA 9 NA 2 NA 4 NA 3 NA 3 NA 3 NA 2 NA 6 NA NA NA 4 NA 5 NA 3 NA 6 NA 3 NA 2 NA 1 NA 1 NA 1 NA 1 NA 1 NA 5 NA 1 NA 4 NA 9 NA 2 NA 3 NA 1 NA 1 NA 5 NA 4 NA 4 NA 1 NA 6 NA 3 NA 3 NA 8 NA 2 NA 1 NA 3 NA 2 NA 2 NA 1 NA 5 NA 2 NA 3 NA 2 NA 2 NA 2 NA 4 NA 2 NA
NA NA 8 NA 1 NA 6 NA 3 NA 8 NA 4 NA 3 NA 3 NA 3 NA 9 NA 1 NA 3 NA 2 NA 3 NA 3 NA 3 NA 8 NA NA NA 2 NA 7 NA 3 NA 4 NA 1 NA 2 NA 1 NA 1 NA 1 NA 1 NA 1 NA 5 NA 2 NA 6 NA 4 NA 1 NA 3 NA NA NA 1 NA 5 NA 5 NA 3 NA 1 NA 6 NA 3 NA 4 NA 5 NA 2 NA NA NA 1 NA 2 NA 3 NA NA NA 5 NA NA NA 3 NA 2 NA 3 NA 3 NA 4 NA 2 NA
NA NA 6 NA 1 NA 5 NA 3 NA 5 NA 4 NA 3 NA 4 NA 1 NA 6 NA 1 NA NA NA 1 NA 2 NA 3 NA 3 NA 7 NA NA NA NA NA 7 NA 3 NA 3 NA 1 NA 2 NA 2 NA 1 NA 3 NA 1 NA 2 NA 5 NA 3 NA 4 NA 4 NA 4 NA 2 NA 3 NA 1 NA 4 NA 5 NA 3 NA 1 NA 8 NA 3 NA 1 NA 4 NA 1 NA NA NA 1 NA 1 NA 3 NA NA NA 4 NA 3 NA 3 NA 2 NA 4 NA 3 NA 4 NA 2 NA
NA NA 8 NA NA NA 3 NA 3 NA 3 NA 4 NA 2 NA 2 NA 2 NA 11 NA NA NA 1 NA 2 NA 1 NA 3 NA 2 NA 7 NA NA NA 2 NA 2 NA 5 NA 5 NA NA NA NA NA 1 NA NA NA 4 NA 1 NA 2 NA 3 NA 3 NA 5 NA 5 NA 5 NA 1 NA 2 NA 1 NA 4 NA 8 NA 3 NA 6 NA 5 NA 2 NA 2 NA 2 NA 2 NA 2 NA 1 NA NA NA 4 NA 1 NA 4 NA 3 NA 4 NA 2 NA 3 NA 3 NA 4 NA 2 NA
NA NA 7 NA NA NA 3 NA 3 NA 3 NA 4 NA 2 NA 2 NA 2 NA 11 NA NA NA 1 NA 2 NA 1 NA 3 NA 1 NA 6 NA NA NA 2 NA NA NA 6 NA 5 NA NA NA NA NA 1 NA NA NA 4 NA 1 NA 2 NA 3 NA 5 NA 5 NA 4 NA 1 NA 1 NA 2 NA 1 NA 4 NA 8 NA 3 NA 6 NA 5 NA 2 NA 2 NA 1 NA 2 NA 2 NA NA NA NA NA 4 NA 1 NA 3 NA 2 NA 4 NA 2 NA 3 NA 3 NA 4 NA 1 NA
NA NA 7 NA NA NA 1 NA 3 NA 4 NA 3 NA 2 NA 2 NA 4 NA 7 NA NA NA 1 NA NA NA NA NA 3 NA 1 NA 5 NA NA NA 2 NA 1 NA 6 NA 2 NA NA NA NA NA NA NA NA NA 2 NA 3 NA 2 NA 3 NA 5 NA 5 NA 5 NA 1 NA 2 NA 1 NA 1 NA 5 NA 7 NA 3 NA 5 NA 5 NA 2 NA NA NA 1 NA 2 NA 1 NA NA NA NA NA 4 NA 2 NA 2 NA 1 NA 4 NA 2 NA 2 NA 3 NA 4 NA 1 NA
NA NA 8 NA 1 NA 2 NA 3 NA 4 NA 3 NA 3 NA 2 NA 5 NA 6 NA NA NA 1 NA NA NA 1 NA 3 NA 1 NA 5 NA 1 NA 2 NA 1 NA 6 NA 3 NA NA NA NA NA 1 NA 1 NA 2 NA 3 NA NA NA 3 NA 3 NA 5 NA 3 NA 2 NA 1 NA 1 NA 1 NA 4 NA 7 NA 3 NA 5 NA 4 NA 2 NA NA NA 1 NA 2 NA NA NA NA NA NA NA 3 NA 2 NA 2 NA 1 NA 4 NA 2 NA 1 NA 3 NA 4 NA 1 NA
NA NA 8 NA 1 NA 2 NA 3 NA 4 NA 3 NA 3 NA 2 NA 5 NA 6 NA NA NA 1 NA NA NA 1 NA 3 NA 1 NA 5 NA 1 NA 2 NA 2 NA 6 NA 3 NA NA NA NA NA 1 NA 1 NA 2 NA 3 NA NA NA 3 NA 3 NA 5 NA 2 NA 2 NA 1 NA 1 NA 1 NA 3 NA 7 NA 2 NA 5 NA 3 NA 2 NA NA NA NA NA 1 NA NA NA NA NA NA NA 3 NA 2 NA 2 NA 1 NA 4 NA 2 NA 1 NA 3 NA 4 NA 1 NA
NA NA 9 NA 1 NA 1 NA 4 NA 5 NA 3 NA 3 NA 2 NA 7 NA 3 NA NA NA 2 NA NA NA 1 NA 3 NA 4 NA 5 NA 1 NA 4 NA 3 NA 7 NA 6 NA NA NA 1 NA 2 NA 2 NA 1 NA 2 NA 2 NA 4 NA 1 NA 4 NA 5 NA 7 NA 1 NA 1 NA 1 NA 4 NA 5 NA NA NA 3 NA 3 NA 1 NA 2 NA NA NA 1 NA NA NA NA NA NA NA 2 NA 2 NA 2 NA 2 NA 3 NA 1 NA 3 NA 4 NA 4 NA 2 NA
2 NA 9 NA 1 NA 1 NA 4 NA 9 NA 3 NA 4 NA 3 NA 7 NA 2 NA NA NA 3 NA 2 NA 1 NA 4 NA 6 NA 5 NA 1 NA 4 NA 6 NA 6 NA 6 NA 1 NA 1 NA 3 NA 3 NA 3 NA 3 NA 2 NA 4 NA 2 NA 5 NA 5 NA 9 NA 1 NA 1 NA 1 NA 4 NA 4 NA NA NA 4 NA 1 NA 1 NA 8 NA NA NA 2 NA 1 NA NA NA NA NA 1 NA 2 NA 2 NA 1 NA 3 NA 1 NA 4 NA 4 NA 4 NA 2 NA
3 NA 16 NA 3 NA 5 NA 4 NA 10 NA 3 NA 4 NA 3 NA 6 NA 5 NA 2 NA 5 NA 4 NA NA NA 6 NA 7 NA 7 NA 1 NA 4 NA 5 NA 5 NA 8 NA 2 NA 1 NA 3 NA 5 NA 5 NA 4 NA 5 NA 4 NA 4 NA 6 NA 8 NA 7 NA 1 NA 3 NA 1 NA 4 NA 7 NA NA NA 5 NA 4 NA 1 NA 10 NA NA NA 5 NA 3 NA NA NA NA NA NA NA NA NA 2 NA 2 NA 4 NA 2 NA 4 NA 4 NA 4 NA 3 NA
3 NA 16 NA 3 NA 5 NA 4 NA 10 NA 3 NA 4 NA 3 NA 6 NA 5 NA 2 NA 5 NA 4 NA NA NA 6 NA 7 NA 7 NA 1 NA 4 NA 5 NA 5 NA 8 NA 2 NA 1 NA 3 NA 5 NA 5 NA 4 NA 4 NA 4 NA 4 NA 6 NA 6 NA 7 NA 1 NA 3 NA 1 NA 4 NA 7 NA NA NA 5 NA 4 NA 1 NA 10 NA NA NA 6 NA 3 NA NA NA NA NA NA NA NA NA 2 NA 2 NA 4 NA 2 NA 4 NA 4 NA 4 NA 3 NA
3 NA 16 NA 5 NA 11 NA 4 NA 10 NA 3 NA 4 NA 4 NA 7 NA 10 NA 2 NA 4 NA 4 NA NA NA 6 NA 7 NA 8 NA 2 NA 5 NA 11 NA 4 NA 7 NA 4 NA 3 NA 2 NA 4 NA 6 NA 5 NA 9 NA 3 NA 6 NA 5 NA 11 NA 8 NA 1 NA 3 NA 1 NA 6 NA 8 NA 2 NA 2 NA 5 NA 1 NA 9 NA 1 NA 6 NA 4 NA NA NA 1 NA 1 NA 1 NA 2 NA 5 NA 5 NA 2 NA 4 NA 4 NA 4 NA 3 NA
2 NA 11 NA 6 NA 10 NA 5 NA 5 NA 6 NA 2 NA 4 NA 6 NA 22 NA 3 NA 3 NA 2 NA 5 NA 6 NA 8 NA 5 NA 3 NA 1 NA 14 NA 3 NA 10 NA 3 NA 3 NA 7 NA 4 NA 4 NA 2 NA 4 NA 2 NA 6 NA 2 NA 11 NA 4 NA NA NA 10 NA NA NA 3 NA 1 NA 6 NA 5 NA 8 NA 8 NA 1 NA 5 NA 5 NA 4 NA NA NA 1 NA 3 NA 1 NA 4 NA 9 NA 3 NA 1 NA 3 NA 4 NA 3 NA 3 NA

0 comments on commit 71a9b07

Please sign in to comment.