Skip to content

Commit

Permalink
address final reviewer comments
Browse files Browse the repository at this point in the history
  • Loading branch information
tedsharpe committed Apr 7, 2022
1 parent 16c7af7 commit 5ae634f
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
*/
public abstract class MultiFeatureWalker<F extends Feature> extends WalkerBase {

SAMSequenceDictionary dictionary;
final Set<String> samples = new TreeSet<>();
private SAMSequenceDictionary dictionary;
private final Set<String> samples = new TreeSet<>();

@Override
public boolean requiresFeatures(){
Expand Down Expand Up @@ -84,6 +84,8 @@ public void traverse() {
*
* @param feature Current Feature being processed.
* @param header Header object for the source from which the feature was drawn (may be null)
* @param readsContext An object that allows querying for the reads the overlap the feature
* @param referenceContext An object that allows querying for the reference sequence associated with the feature
*/
public abstract void apply( final F feature,
final Object header,
Expand All @@ -98,7 +100,7 @@ public abstract void apply( final F feature,
/**
* Get the list of sample names we accumulated
*/
public Set<String> getSampleNames() { return samples; }
public Set<String> getSampleNames() { return Collections.unmodifiableSet(samples); }

/**
* Choose the most comprehensive dictionary available (see betterDictionary method below),
Expand All @@ -111,68 +113,95 @@ public abstract void apply( final F feature,
* dictionaries available.)
*/
private void setDictionaryAndSamples() {
dictionary = getMasterSequenceDictionary();
DictSource dictSource = new DictSource(getMasterSequenceDictionary(),
StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME);
if ( hasReference() ) {
dictionary = betterDictionary(reference.getSequenceDictionary(), dictionary);
final DictSource refDictSource = new DictSource(reference.getSequenceDictionary(),
StandardArgumentDefinitions.REFERENCE_LONG_NAME);
dictSource = betterDictionary(refDictSource, dictSource);
}
if ( hasReads() ) {
dictionary = betterDictionary(reads.getSequenceDictionary(), dictionary);
final DictSource readsDictSource = new DictSource(reads.getSequenceDictionary(), "read-source");
dictSource = betterDictionary(readsDictSource, dictSource);
}
for ( final FeatureInput<? extends Feature> input : features.getAllInputs() ) {
final Object header = features.getHeader(input);
if ( header instanceof SVFeaturesHeader ) {
final SVFeaturesHeader svFeaturesHeader = (SVFeaturesHeader)header;
dictionary = betterDictionary(svFeaturesHeader.getDictionary(), dictionary);
final DictSource featureDictSource = new DictSource(svFeaturesHeader.getDictionary(),
input.getName());
dictSource = betterDictionary(featureDictSource, dictSource);
final List<String> sampleNames = svFeaturesHeader.getSampleNames();
if ( sampleNames != null ) {
samples.addAll(svFeaturesHeader.getSampleNames());
}
} else if (header instanceof VCFHeader ) {
final VCFHeader vcfHeader = (VCFHeader)header;
dictionary = betterDictionary(vcfHeader.getSequenceDictionary(), dictionary);
final DictSource featureDictSource = new DictSource(vcfHeader.getSequenceDictionary(),
input.getName());
dictSource = betterDictionary(featureDictSource, dictSource);
samples.addAll(vcfHeader.getSampleNamesInOrder());
}
}
if ( dictionary == null ) {
if ( dictSource.getDictionary() == null ) {
throw new UserException("No dictionary found. Provide one as --" +
StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME + " or --" +
StandardArgumentDefinitions.REFERENCE_LONG_NAME + ".");
}
dictionary = dictSource.getDictionary();
}

/**
* Makes sure that the two dictionaries are consistent with regard to contig names and order.
* Returns the more comprehensive (larger) dictionary if they're consistent.
*/
private static SAMSequenceDictionary betterDictionary( final SAMSequenceDictionary newDict,
final SAMSequenceDictionary curDict ) {
if ( curDict == null ) return newDict;
if ( newDict == null ) return curDict;
final SAMSequenceDictionary smallDict;
final SAMSequenceDictionary largeDict;
if ( newDict.size() <= curDict.size() ) {
private static DictSource betterDictionary( final DictSource newDict,
final DictSource curDict ) {
if ( curDict.getDictionary() == null ) return newDict;
if ( newDict.getDictionary() == null ) return curDict;
final DictSource smallDict;
final DictSource largeDict;
if ( newDict.getDictionary().size() <= curDict.getDictionary().size() ) {
smallDict = newDict;
largeDict = curDict;
} else {
smallDict = curDict;
largeDict = newDict;
}
int lastIdx = -1;
for ( final SAMSequenceRecord rec : smallDict.getSequences() ) {
final int newIdx = largeDict.getSequenceIndex(rec.getContig());
final SAMSequenceDictionary largeDictionary = largeDict.getDictionary();
for ( final SAMSequenceRecord rec : smallDict.getDictionary().getSequences() ) {
final int newIdx = largeDictionary.getSequenceIndex(rec.getContig());
if ( newIdx == -1 ) {
throw new UserException("Contig " + rec.getContig() +
" not found in the larger dictionary");
throw new UserException("Contig " + rec.getContig() + " in the dictionary read from " +
smallDict.getSource() + " does not appear in the larger dictionary read from " +
largeDict.getSource());
}
if ( newIdx <= lastIdx ) {
throw new UserException("Contig " + rec.getContig() +
" not in same order as in larger dictionary");
final String prevContig = largeDictionary.getSequence(lastIdx).getContig();
throw new UserException("Contigs out of order: Contig " + rec.getContig() +
" comes before contig " + prevContig + " in the dictionary read from " +
largeDict.getSource() + ", but follows it in the dictionary read from " +
smallDict.getSource());
}
lastIdx = newIdx;
}
return largeDict;
}

public static final class DictSource {
private final SAMSequenceDictionary dictionary;
private final String source;

public DictSource( final SAMSequenceDictionary dictionary, final String source ) {
this.dictionary = dictionary;
this.source = source;
}

public SAMSequenceDictionary getDictionary() { return dictionary; }
public String getSource() { return source; }
}

public static final class MergingIterator<F extends Feature> implements Iterator<PQEntry<F>> {
final SAMSequenceDictionary dictionary;
final PriorityQueue<PQEntry<F>> priorityQueue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,7 @@ public class ExampleMultiFeatureWalker extends MultiFeatureWalker<Feature> {
final ReferenceContext referenceContext ) {
// We'll just keep track of the Features we see, in the order that we see them.
features.add(feature);
// And print them
System.out.println(feature);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,8 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(sample, contig, position, value);
}

@Override public String toString() {
return contig + "\t" + position + "\t" + sample + "\t" + value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,13 @@ public int hashCode() {
result = 31 * result + Arrays.hashCode(counts);
return result;
}

@Override public String toString() {
final StringBuilder sb = new StringBuilder(contig + "\t" + start + "\t" + end);
for ( final int count : counts ) {
sb.append("\t");
sb.append(count);
}
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,9 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(sample, startContig, endContig, start, end, startStrand, endStrand);
}

@Override public String toString() {
return startContig + "\t" + start + "\t" + end + "\t" + sample + "\t" + endContig + "\t" +
startStrand + "\t" + endStrand;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,8 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(sample, contig, position, count, strand);
}
}

@Override public String toString() {
return contig + "\t" + position + "\t" + sample + "\t" + count + "\t" + strand;
}
}

0 comments on commit 5ae634f

Please sign in to comment.