Skip to content

Commit

Permalink
[ADAM-1083] Cleaning up org.bdgenomics.adam.models.
Browse files Browse the repository at this point in the history
Along with #1263 and #1264, this resolves #1083.

* Removing unused org.bdgenomics.adam.models.ReadBucket class.
* Move org.bdgenomics.adam.models.ReferencePositionPair and
  org.bdgenomics.adam.models.SingleReadBucket in to org.bdgenomics.adam.rdd.read
  and make package private.
* Clean up duplicated methods and methods that were incorrectly in companion
  singleton for SequenceDictionary and ReadGroupDictionary.
* Removed all SamReader references.
* Make writable file headers private to ADAM.
* Eliminated manual VCF parsing code in SnpTable.
* Cleaned up scaladoc for all classes and singleton objects.
* Moved `NonoverlappingRegions` test code out of `InnerBroadcastRegionJoinSuite`.
  • Loading branch information
fnothaft authored and heuermh committed Nov 16, 2016
1 parent f5cd15e commit c1b4b7d
Show file tree
Hide file tree
Showing 26 changed files with 771 additions and 458 deletions.
25 changes: 19 additions & 6 deletions adam-core/src/main/scala/org/bdgenomics/adam/models/Alphabet.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ package org.bdgenomics.adam.models
import scala.util.Try

/**
* Created by bryan on 4/17/15.
*
* An alphabet of symbols and related operations
*
*/
trait Alphabet {

Expand All @@ -45,11 +42,16 @@ trait Alphabet {
symbols.flatMap(symbol => Seq(symbol.label.toLower -> symbol, symbol.label.toUpper -> symbol)).toMap

/**
* Reverses the string and compliments each residue.
*
* Fails if a residue has no complement.
*
* @param s Each char in this string represents a symbol on the alphabet.
* If the char is not in the alphabet then a NoSuchElementException is thrown
* @return the reversed complement of the given string.
* @throws IllegalArgumentException if the string contains a symbol which is not in the alphabet
*
* @see reverseComplement
*/
def reverseComplementExact(s: String): String = {
reverseComplement(
Expand All @@ -59,30 +61,38 @@ trait Alphabet {
}

/**
* Reverses the string and compliments each residue.
*
* If a residue has no known complement, that residue is replaced with a
* placeholder "not-found" value.
*
* @param s Each char in this string represents a symbol on the alphabet.
* @param notFound If the char is not in the alphabet then this function is called.
* default behavior is to return a new Symbol representing the unknown character,
* so that the unknown char is treated as the complement
* @return the reversed complement of the given string.
*
* @see reverseComplementExact
*/
def reverseComplement(s: String, notFound: (Char => Symbol) = ((c: Char) => Symbol(c, c))) = {
s.map(x => Try(apply(x)).getOrElse(notFound(x)).complement).reverse
}

/** number of symbols in the alphabet */
/**
* The number of symbols in the alphabet.
*/
def size = symbols.size

/**
* @param c char to lookup as a symbol in this alphabet
* @return the given symbol
*/
def apply(c: Char): Symbol = symbolMap(c)

}

/**
* A symbol in an alphabet
* A symbol in an alphabet.
*
* @param label a character which represents the symbol
* @param complement acharacter which represents the complement of the symbol
*/
Expand All @@ -103,6 +113,9 @@ class DNAAlphabet extends Alphabet {
)
}

/**
* Singleton object with references to all supported alphabets.
*/
object Alphabet {
val dna = new DNAAlphabet
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,20 @@
package org.bdgenomics.adam.models

/**
* A wrapper around the attrTuple (key) and value pair. Includes the attrTuple-type explicitly, rather than
* embedding the corresponding information in the type of 'value', because otherwise it'd be difficult
* to extract the correct type for Byte and NumericSequence values.
* A wrapper around the attrTuple (key) and value pair seen in many formats.
*
* Roughly analogous to Picards SAMTagAndValue.
* Includes the attrTuple-type explicitly, rather than embedding the
* corresponding information in the type of 'value', because otherwise it'd be
* difficult to extract the correct type for Byte and NumericSequence values.
*
* This class is roughly analogous to htsjdk's SAMTagAndValue.
*
* @param tag The string key associated with this pair.
* @param tagType An enumerated value representing the type of the 'value' parameter.
* @param value The 'value' half of the pair.
*/
case class Attribute(tag: String, tagType: TagType.Value, value: Any) {

override def toString: String = {
val byteSequenceTypes = Array(TagType.NumericByteSequence, TagType.NumericUnsignedByteSequence)
val intSequenceTypes = Array(TagType.NumericIntSequence, TagType.NumericUnsignedIntSequence)
Expand All @@ -47,25 +50,82 @@ case class Attribute(tag: String, tagType: TagType.Value, value: Any) {
}
}

/**
* An enumeration that describes the different data types that can be stored in
* an attribute.
*/
object TagType extends Enumeration {

/**
* A representation of the type of data stored in a tagged field.
*
* @param abbreviation A string describing the data type underlying the
* attribute. The string values that are stored with the attribute come from
* the SAM file format spec: http://samtools.sourceforge.net/SAMv1.pdf
*/
class TypeVal(val abbreviation: String) extends Val(nextId, abbreviation) {
override def toString(): String = abbreviation
}
def TypeValue(abbreviation: String): Val = new TypeVal(abbreviation)

// These String values come from the SAM file format spec: http://samtools.sourceforge.net/SAMv1.pdf
private def TypeValue(abbreviation: String): Val = new TypeVal(abbreviation)

/**
* An attribute storing a character. SAM "A".
*/
val Character = TypeValue("A")

/**
* An attribute storing an integer. SAM "i".
*/
val Integer = TypeValue("i")

/**
* An attribute storing a floating point value. SAM "f".
*/
val Float = TypeValue("f")

/**
* An attribute storing a string. SAM "Z".
*/
val String = TypeValue("Z")

/**
* An attribute storing hex formatted bytes. SAM "H".
*/
val ByteSequence = TypeValue("H")

/**
* An attribute storing a numeric array of signed bytes. SAM "B:c".
*/
val NumericByteSequence = TypeValue("B:c")

/**
* An attribute storing a numeric array of signed ints. SAM "B:i".
*/
val NumericIntSequence = TypeValue("B:i")

/**
* An attribute storing a numeric array of signed short ints. SAM "B:i".
*/
val NumericShortSequence = TypeValue("B:s")

/**
* An attribute storing a numeric array of unsigned bytes. SAM "B:C".
*/
val NumericUnsignedByteSequence = TypeValue("B:C")

/**
* An attribute storing a numeric array of unsigned ints. SAM "B:I".
*/
val NumericUnsignedIntSequence = TypeValue("B:I")

/**
* An attribute storing a numeric array of unsigned short ints. SAM "B:i".
*/
val NumericUnsignedShortSequence = TypeValue("B:S")
val NumericFloatSequence = TypeValue("B:f")

/**
* An attribute storing a numeric array of floats. SAM "B:f".
*/
val NumericFloatSequence = TypeValue("B:f")
}
42 changes: 24 additions & 18 deletions adam-core/src/main/scala/org/bdgenomics/adam/models/Coverage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ import org.apache.spark.rdd.RDD
import org.bdgenomics.formats.avro.Feature

/**
* Converts from avro Feature to Coverage.
* Singleton object for converting from Avro Feature to Coverage.
*/
object Coverage {
private[adam] object Coverage {

/**
* Creates Coverage from ReferenceRegion and coverage count in that ReferenceRegion.
Expand All @@ -32,7 +32,7 @@ object Coverage {
* @param count Coverage count for each base pair in region
* @return Coverage spanning the specified ReferenceRegion
*/
private[adam] def apply(region: ReferenceRegion, count: Double): Coverage = {
def apply(region: ReferenceRegion, count: Double): Coverage = {
Coverage(region.referenceName, region.start, region.end, count)
}

Expand All @@ -42,8 +42,11 @@ object Coverage {
* @param feature Feature to create coverage from
* @return Coverage spanning the specified feature
*/
private[adam] def apply(feature: Feature): Coverage = {
Coverage(feature.getContigName, feature.getStart, feature.getEnd, feature.getScore)
def apply(feature: Feature): Coverage = {
Coverage(feature.getContigName,
feature.getStart,
feature.getEnd,
feature.getScore)
}

/**
Expand All @@ -52,20 +55,23 @@ object Coverage {
* @param rdd RDD of Features to extract Coverage from
* @return RDD of Coverage spanning all features in rdd
*/
private[adam] def apply(rdd: RDD[Feature]): RDD[Coverage] = {
def apply(rdd: RDD[Feature]): RDD[Coverage] = {
rdd.map(f => Coverage(f))
}
}

/**
* Coverage record for CoverageRDD.
* Contains Region indexed by contig name, start and end, as well as count of coverage at
* each base pair in that region.
*
* @param contigName Specifies chromosomal location of coverage
* @param start Specifies start position of coverage
* @param end Specifies end position of coverage
* @param count Specifies count of coverage at location
* Contains Region indexed by contig name, start and end, as well as the average
* coverage at each base pair in that region.
*
* @param contigName The chromosome that this coverage was observed on.
* @param start The start coordinate of the region where this coverage value was
* observed.
* @param end The end coordinate of the region where this coverage value was
* observed.
* @param count The average coverage across this region.
*/
case class Coverage(contigName: String, start: Long, end: Long, count: Double) {

Expand All @@ -75,12 +81,12 @@ case class Coverage(contigName: String, start: Long, end: Long, count: Double) {
* @return Feature built from Coverage
*/
def toFeature: Feature = {
val fb = Feature.newBuilder()
fb.setContigName(contigName)
fb.setStart(start)
fb.setEnd(end)
fb.setScore(count)
fb.build()
Feature.newBuilder()
.setContigName(contigName)
.setStart(start)
.setEnd(end)
.setScore(count)
.build()
}
}

Loading

0 comments on commit c1b4b7d

Please sign in to comment.