[ADAM-1083] Cleaning up org.bdgenomics.adam.models.

Along with #1263 and #1264, this resolves #1083. * Removing unused org.bdgenomics.adam.models.ReadBucket class. * Move org.bdgenomics.adam.models.ReferencePositionPair and org.bdgenomics.adam.models.SingleReadBucket in to org.bdgenomics.adam.rdd.read and make package private. * Clean up duplicated methods and methods that were incorrectly in companion singleton for SequenceDictionary and ReadGroupDictionary. * Removed all SamReader references. * Make writable file headers private to ADAM. * Eliminated manual VCF parsing code in SnpTable. * Cleaned up scaladoc for all classes and singleton objects. * Moved `NonoverlappingRegions` test code out of `InnerBroadcastRegionJoinSuite`.
bigdatagenomics · Nov 16, 2016 · c1b4b7d · c1b4b7d
1 parent f5cd15e
commit c1b4b7d
Show file tree

Hide file tree

Showing 26 changed files with 771 additions and 458 deletions.
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/Alphabet.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/Alphabet.scala
@@ -20,10 +20,7 @@ package org.bdgenomics.adam.models
 import scala.util.Try
 
 /**
- * Created by bryan on 4/17/15.
- *
  * An alphabet of symbols and related operations
- *
  */
 trait Alphabet {
 
@@ -45,11 +42,16 @@ trait Alphabet {
       symbols.flatMap(symbol => Seq(symbol.label.toLower -> symbol, symbol.label.toUpper -> symbol)).toMap
 
   /**
+   * Reverses the string and compliments each residue.
+   *
+   * Fails if a residue has no complement.
    *
    * @param s Each char in this string represents a symbol on the alphabet.
    *          If the char is not in the alphabet then a NoSuchElementException is thrown
    * @return the reversed complement of the given string.
    * @throws IllegalArgumentException if the string contains a symbol which is not in the alphabet
+   *
+   * @see reverseComplement
    */
   def reverseComplementExact(s: String): String = {
     reverseComplement(
@@ -59,30 +61,38 @@ trait Alphabet {
   }
 
   /**
+   * Reverses the string and compliments each residue.
+   *
+   * If a residue has no known complement, that residue is replaced with a
+   * placeholder "not-found" value.
    *
    * @param s Each char in this string represents a symbol on the alphabet.
    * @param notFound If the char is not in the alphabet then this function is called.
    *                 default behavior is to return a new Symbol representing the unknown character,
    *                 so that the unknown char is treated as the complement
    * @return the reversed complement of the given string.
+   *
+   * @see reverseComplementExact
    */
   def reverseComplement(s: String, notFound: (Char => Symbol) = ((c: Char) => Symbol(c, c))) = {
     s.map(x => Try(apply(x)).getOrElse(notFound(x)).complement).reverse
   }
 
-  /** number of symbols in the alphabet */
+  /**
+   * The number of symbols in the alphabet.
+   */
   def size = symbols.size
 
   /**
    * @param c char to lookup as a symbol in this alphabet
    * @return the given symbol
    */
   def apply(c: Char): Symbol = symbolMap(c)
-
 }
 
 /**
- * A symbol in an alphabet
+ * A symbol in an alphabet.
+ *
  * @param label a character which represents the symbol
  * @param complement acharacter which represents the complement of the symbol
  */
@@ -103,6 +113,9 @@ class DNAAlphabet extends Alphabet {
   )
 }
 
+/**
+ * Singleton object with references to all supported alphabets.
+ */
 object Alphabet {
   val dna = new DNAAlphabet
 }
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/Attribute.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/Attribute.scala
@@ -18,17 +18,20 @@
 package org.bdgenomics.adam.models
 
 /**
- * A wrapper around the attrTuple (key) and value pair.  Includes the attrTuple-type explicitly, rather than
- * embedding the corresponding information in the type of 'value', because otherwise it'd be difficult
- * to extract the correct type for Byte and NumericSequence values.
+ * A wrapper around the attrTuple (key) and value pair seen in many formats.
  *
- * Roughly analogous to Picards SAMTagAndValue.
+ * Includes the attrTuple-type explicitly, rather than embedding the
+ * corresponding information in the type of 'value', because otherwise it'd be
+ * difficult to extract the correct type for Byte and NumericSequence values.
+ *
+ * This class is roughly analogous to htsjdk's SAMTagAndValue.
  *
  * @param tag The string key associated with this pair.
  * @param tagType An enumerated value representing the type of the 'value' parameter.
  * @param value The 'value' half of the pair.
  */
 case class Attribute(tag: String, tagType: TagType.Value, value: Any) {
+
   override def toString: String = {
     val byteSequenceTypes = Array(TagType.NumericByteSequence, TagType.NumericUnsignedByteSequence)
     val intSequenceTypes = Array(TagType.NumericIntSequence, TagType.NumericUnsignedIntSequence)
@@ -47,25 +50,82 @@ case class Attribute(tag: String, tagType: TagType.Value, value: Any) {
   }
 }
 
+/**
+ * An enumeration that describes the different data types that can be stored in
+ * an attribute.
+ */
 object TagType extends Enumeration {
 
+  /**
+   * A representation of the type of data stored in a tagged field.
+   *
+   * @param abbreviation A string describing the data type underlying the
+   *   attribute. The string values that are stored with the attribute come from
+   *   the SAM file format spec: http://samtools.sourceforge.net/SAMv1.pdf
+   */
   class TypeVal(val abbreviation: String) extends Val(nextId, abbreviation) {
     override def toString(): String = abbreviation
   }
-  def TypeValue(abbreviation: String): Val = new TypeVal(abbreviation)
 
-  // These String values come from the SAM file format spec: http://samtools.sourceforge.net/SAMv1.pdf
+  private def TypeValue(abbreviation: String): Val = new TypeVal(abbreviation)
+
+  /**
+   * An attribute storing a character. SAM "A".
+   */
   val Character = TypeValue("A")
+
+  /**
+   * An attribute storing an integer. SAM "i".
+   */
   val Integer = TypeValue("i")
+
+  /**
+   * An attribute storing a floating point value. SAM "f".
+   */
   val Float = TypeValue("f")
+
+  /**
+   * An attribute storing a string. SAM "Z".
+   */
   val String = TypeValue("Z")
+
+  /**
+   * An attribute storing hex formatted bytes. SAM "H".
+   */
   val ByteSequence = TypeValue("H")
+
+  /**
+   * An attribute storing a numeric array of signed bytes. SAM "B:c".
+   */
   val NumericByteSequence = TypeValue("B:c")
+
+  /**
+   * An attribute storing a numeric array of signed ints. SAM "B:i".
+   */
   val NumericIntSequence = TypeValue("B:i")
+
+  /**
+   * An attribute storing a numeric array of signed short ints. SAM "B:i".
+   */
   val NumericShortSequence = TypeValue("B:s")
+
+  /**
+   * An attribute storing a numeric array of unsigned bytes. SAM "B:C".
+   */
   val NumericUnsignedByteSequence = TypeValue("B:C")
+
+  /**
+   * An attribute storing a numeric array of unsigned ints. SAM "B:I".
+   */
   val NumericUnsignedIntSequence = TypeValue("B:I")
+
+  /**
+   * An attribute storing a numeric array of unsigned short ints. SAM "B:i".
+   */
   val NumericUnsignedShortSequence = TypeValue("B:S")
-  val NumericFloatSequence = TypeValue("B:f")
 
+  /**
+   * An attribute storing a numeric array of floats. SAM "B:f".
+   */
+  val NumericFloatSequence = TypeValue("B:f")
 }
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/Coverage.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/Coverage.scala
@@ -21,9 +21,9 @@ import org.apache.spark.rdd.RDD
 import org.bdgenomics.formats.avro.Feature
 
 /**
- * Converts from avro Feature to Coverage.
+ * Singleton object for converting from Avro Feature to Coverage.
  */
-object Coverage {
+private[adam] object Coverage {
 
   /**
    * Creates Coverage from ReferenceRegion and coverage count in that ReferenceRegion.
@@ -32,7 +32,7 @@ object Coverage {
    * @param count Coverage count for each base pair in region
    * @return Coverage spanning the specified ReferenceRegion
    */
-  private[adam] def apply(region: ReferenceRegion, count: Double): Coverage = {
+  def apply(region: ReferenceRegion, count: Double): Coverage = {
     Coverage(region.referenceName, region.start, region.end, count)
   }
 
@@ -42,8 +42,11 @@ object Coverage {
    * @param feature Feature to create coverage from
    * @return Coverage spanning the specified feature
    */
-  private[adam] def apply(feature: Feature): Coverage = {
-    Coverage(feature.getContigName, feature.getStart, feature.getEnd, feature.getScore)
+  def apply(feature: Feature): Coverage = {
+    Coverage(feature.getContigName,
+      feature.getStart,
+      feature.getEnd,
+      feature.getScore)
   }
 
   /**
@@ -52,20 +55,23 @@ object Coverage {
    * @param rdd RDD of Features to extract Coverage from
    * @return RDD of Coverage spanning all features in rdd
    */
-  private[adam] def apply(rdd: RDD[Feature]): RDD[Coverage] = {
+  def apply(rdd: RDD[Feature]): RDD[Coverage] = {
     rdd.map(f => Coverage(f))
   }
 }
 
 /**
  * Coverage record for CoverageRDD.
- * Contains Region indexed by contig name, start and end, as well as count of coverage at
- * each base pair in that region.
  *
- * @param contigName Specifies chromosomal location of coverage
- * @param start Specifies start position of coverage
- * @param end  Specifies end position of coverage
- * @param count Specifies count of coverage at location
+ * Contains Region indexed by contig name, start and end, as well as the average
+ * coverage at each base pair in that region.
+ *
+ * @param contigName The chromosome that this coverage was observed on.
+ * @param start The start coordinate of the region where this coverage value was
+ *   observed.
+ * @param end The end coordinate of the region where this coverage value was
+ *   observed.
+ * @param count The average coverage across this region.
  */
 case class Coverage(contigName: String, start: Long, end: Long, count: Double) {
 
@@ -75,12 +81,12 @@ case class Coverage(contigName: String, start: Long, end: Long, count: Double) {
    * @return Feature built from Coverage
    */
   def toFeature: Feature = {
-    val fb = Feature.newBuilder()
-    fb.setContigName(contigName)
-    fb.setStart(start)
-    fb.setEnd(end)
-    fb.setScore(count)
-    fb.build()
+    Feature.newBuilder()
+      .setContigName(contigName)
+      .setStart(start)
+      .setEnd(end)
+      .setScore(count)
+      .build()
   }
 }