From 5ae634fffc2153f0e8e76e8ef1518df6633c0dbc Mon Sep 17 00:00:00 2001 From: Ted Sharpe Date: Thu, 7 Apr 2022 11:50:40 -0400 Subject: [PATCH] address final reviewer comments --- .../hellbender/engine/MultiFeatureWalker.java | 73 +++++++++++++------ .../examples/ExampleMultiFeatureWalker.java | 2 + .../hellbender/tools/sv/BafEvidence.java | 4 + .../hellbender/tools/sv/DepthEvidence.java | 9 +++ .../tools/sv/DiscordantPairEvidence.java | 5 ++ .../tools/sv/SplitReadEvidence.java | 6 +- 6 files changed, 76 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/MultiFeatureWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/MultiFeatureWalker.java index 512a5607d18..69343278391 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/MultiFeatureWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/MultiFeatureWalker.java @@ -25,8 +25,8 @@ */ public abstract class MultiFeatureWalker extends WalkerBase { - SAMSequenceDictionary dictionary; - final Set samples = new TreeSet<>(); + private SAMSequenceDictionary dictionary; + private final Set samples = new TreeSet<>(); @Override public boolean requiresFeatures(){ @@ -84,6 +84,8 @@ public void traverse() { * * @param feature Current Feature being processed. * @param header Header object for the source from which the feature was drawn (may be null) + * @param readsContext An object that allows querying for the reads the overlap the feature + * @param referenceContext An object that allows querying for the reference sequence associated with the feature */ public abstract void apply( final F feature, final Object header, @@ -98,7 +100,7 @@ public abstract void apply( final F feature, /** * Get the list of sample names we accumulated */ - public Set getSampleNames() { return samples; } + public Set getSampleNames() { return Collections.unmodifiableSet(samples); } /** * Choose the most comprehensive dictionary available (see betterDictionary method below), @@ -111,46 +113,55 @@ public abstract void apply( final F feature, * dictionaries available.) */ private void setDictionaryAndSamples() { - dictionary = getMasterSequenceDictionary(); + DictSource dictSource = new DictSource(getMasterSequenceDictionary(), + StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME); if ( hasReference() ) { - dictionary = betterDictionary(reference.getSequenceDictionary(), dictionary); + final DictSource refDictSource = new DictSource(reference.getSequenceDictionary(), + StandardArgumentDefinitions.REFERENCE_LONG_NAME); + dictSource = betterDictionary(refDictSource, dictSource); } if ( hasReads() ) { - dictionary = betterDictionary(reads.getSequenceDictionary(), dictionary); + final DictSource readsDictSource = new DictSource(reads.getSequenceDictionary(), "read-source"); + dictSource = betterDictionary(readsDictSource, dictSource); } for ( final FeatureInput input : features.getAllInputs() ) { final Object header = features.getHeader(input); if ( header instanceof SVFeaturesHeader ) { final SVFeaturesHeader svFeaturesHeader = (SVFeaturesHeader)header; - dictionary = betterDictionary(svFeaturesHeader.getDictionary(), dictionary); + final DictSource featureDictSource = new DictSource(svFeaturesHeader.getDictionary(), + input.getName()); + dictSource = betterDictionary(featureDictSource, dictSource); final List sampleNames = svFeaturesHeader.getSampleNames(); if ( sampleNames != null ) { samples.addAll(svFeaturesHeader.getSampleNames()); } } else if (header instanceof VCFHeader ) { final VCFHeader vcfHeader = (VCFHeader)header; - dictionary = betterDictionary(vcfHeader.getSequenceDictionary(), dictionary); + final DictSource featureDictSource = new DictSource(vcfHeader.getSequenceDictionary(), + input.getName()); + dictSource = betterDictionary(featureDictSource, dictSource); samples.addAll(vcfHeader.getSampleNamesInOrder()); } } - if ( dictionary == null ) { + if ( dictSource.getDictionary() == null ) { throw new UserException("No dictionary found. Provide one as --" + StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME + " or --" + StandardArgumentDefinitions.REFERENCE_LONG_NAME + "."); } + dictionary = dictSource.getDictionary(); } /** * Makes sure that the two dictionaries are consistent with regard to contig names and order. * Returns the more comprehensive (larger) dictionary if they're consistent. */ - private static SAMSequenceDictionary betterDictionary( final SAMSequenceDictionary newDict, - final SAMSequenceDictionary curDict ) { - if ( curDict == null ) return newDict; - if ( newDict == null ) return curDict; - final SAMSequenceDictionary smallDict; - final SAMSequenceDictionary largeDict; - if ( newDict.size() <= curDict.size() ) { + private static DictSource betterDictionary( final DictSource newDict, + final DictSource curDict ) { + if ( curDict.getDictionary() == null ) return newDict; + if ( newDict.getDictionary() == null ) return curDict; + final DictSource smallDict; + final DictSource largeDict; + if ( newDict.getDictionary().size() <= curDict.getDictionary().size() ) { smallDict = newDict; largeDict = curDict; } else { @@ -158,21 +169,39 @@ private static SAMSequenceDictionary betterDictionary( final SAMSequenceDictiona largeDict = newDict; } int lastIdx = -1; - for ( final SAMSequenceRecord rec : smallDict.getSequences() ) { - final int newIdx = largeDict.getSequenceIndex(rec.getContig()); + final SAMSequenceDictionary largeDictionary = largeDict.getDictionary(); + for ( final SAMSequenceRecord rec : smallDict.getDictionary().getSequences() ) { + final int newIdx = largeDictionary.getSequenceIndex(rec.getContig()); if ( newIdx == -1 ) { - throw new UserException("Contig " + rec.getContig() + - " not found in the larger dictionary"); + throw new UserException("Contig " + rec.getContig() + " in the dictionary read from " + + smallDict.getSource() + " does not appear in the larger dictionary read from " + + largeDict.getSource()); } if ( newIdx <= lastIdx ) { - throw new UserException("Contig " + rec.getContig() + - " not in same order as in larger dictionary"); + final String prevContig = largeDictionary.getSequence(lastIdx).getContig(); + throw new UserException("Contigs out of order: Contig " + rec.getContig() + + " comes before contig " + prevContig + " in the dictionary read from " + + largeDict.getSource() + ", but follows it in the dictionary read from " + + smallDict.getSource()); } lastIdx = newIdx; } return largeDict; } + public static final class DictSource { + private final SAMSequenceDictionary dictionary; + private final String source; + + public DictSource( final SAMSequenceDictionary dictionary, final String source ) { + this.dictionary = dictionary; + this.source = source; + } + + public SAMSequenceDictionary getDictionary() { return dictionary; } + public String getSource() { return source; } + } + public static final class MergingIterator implements Iterator> { final SAMSequenceDictionary dictionary; final PriorityQueue> priorityQueue; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleMultiFeatureWalker.java b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleMultiFeatureWalker.java index c1b48a6c403..97d0711ae9d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleMultiFeatureWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleMultiFeatureWalker.java @@ -42,5 +42,7 @@ public class ExampleMultiFeatureWalker extends MultiFeatureWalker { final ReferenceContext referenceContext ) { // We'll just keep track of the Features we see, in the order that we see them. features.add(feature); + // And print them + System.out.println(feature); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java index ed150c95e62..730c1dfda6d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java @@ -67,4 +67,8 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(sample, contig, position, value); } + + @Override public String toString() { + return contig + "\t" + position + "\t" + sample + "\t" + value; + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/DepthEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/DepthEvidence.java index cf129d42057..1ce973ed951 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/DepthEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/DepthEvidence.java @@ -75,4 +75,13 @@ public int hashCode() { result = 31 * result + Arrays.hashCode(counts); return result; } + + @Override public String toString() { + final StringBuilder sb = new StringBuilder(contig + "\t" + start + "\t" + end); + for ( final int count : counts ) { + sb.append("\t"); + sb.append(count); + } + return sb.toString(); + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidence.java index 4cece44e223..50548307047 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/DiscordantPairEvidence.java @@ -98,4 +98,9 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(sample, startContig, endContig, start, end, startStrand, endStrand); } + + @Override public String toString() { + return startContig + "\t" + start + "\t" + end + "\t" + sample + "\t" + endContig + "\t" + + startStrand + "\t" + endStrand; + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidence.java index fdf68f283bb..ff35b6701c7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/SplitReadEvidence.java @@ -82,4 +82,8 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(sample, contig, position, count, strand); } -} \ No newline at end of file + + @Override public String toString() { + return contig + "\t" + position + "\t" + sample + "\t" + count + "\t" + strand; + } +}