bigdatagenomics · fnothaft · May 9, 2014 · May 8, 2014
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/ADAMVCFOutputFormat.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/ADAMVCFOutputFormat.scala
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2014. Regents of the University of California
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.bdgenomics.adam.rdd.variation
+
+import org.broadinstitute.variant.vcf.{ VCFHeaderLine, VCFHeader }
+import org.bdgenomics.adam.converters.VariantAnnotationConverter
+import fi.tkk.ics.hadoop.bam.{ VCFFormat, KeyIgnoringVCFOutputFormat }
+import scala.collection.JavaConversions._
+
+object ADAMVCFOutputFormat {
+  private var header: Option[VCFHeader] = None
+
+  def getHeader: VCFHeader = header match {
+    case Some(h) => h
+    case None    => setHeader(Seq())
+  }
+
+  def setHeader(samples: Seq[String]): VCFHeader = {
+    header = Some(new VCFHeader(
+      (VariantAnnotationConverter.infoHeaderLines ++ VariantAnnotationConverter.formatHeaderLines).toSet: Set[VCFHeaderLine],
+      samples))
+    header.get
+  }
+}
+
+/**
+ * Wrapper for Hadoop-BAM to work around requirement for no-args constructor. Depends on
+ * ADAMVCFOutputFormat object to maintain global state (such as samples)
+ *
+ * @tparam K
+ */
+class ADAMVCFOutputFormat[K] extends KeyIgnoringVCFOutputFormat[K](VCFFormat.VCF) {
+  setHeader(ADAMVCFOutputFormat.getHeader)
+}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/ADAMVariationContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/ADAMVariationContext.scala
@@ -17,46 +17,17 @@
 package org.bdgenomics.adam.rdd.variation
 
 import org.bdgenomics.adam.avro.{ ADAMDatabaseVariantAnnotation, ADAMGenotype }
-import org.bdgenomics.adam.converters.{ VariantAnnotationConverter, VariantContextConverter }
+import org.bdgenomics.adam.converters.VariantContextConverter
 import org.bdgenomics.adam.models.{ ADAMVariantContext, SequenceDictionary }
 import org.bdgenomics.adam.rdd.variation.ADAMVariationContext._
 import fi.tkk.ics.hadoop.bam._
 import org.apache.hadoop.io.LongWritable
-import org.apache.hadoop.mapreduce.Job
 import org.apache.spark.{ SparkContext, Logging }
 import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
-import org.broadinstitute.variant.vcf.{ VCFHeaderLine, VCFHeader }
 import parquet.hadoop.util.ContextUtil
-import scala.collection.JavaConversions._
 import org.bdgenomics.adam.util.HadoopUtil
 
-private object ADAMVCFOutputFormat {
-  private var header: Option[VCFHeader] = None
-
-  def getHeader: VCFHeader = header match {
-    case Some(h) => h
-    case None    => setHeader(Seq())
-  }
-
-  def setHeader(samples: Seq[String]): VCFHeader = {
-    header = Some(new VCFHeader(
-      (VariantAnnotationConverter.infoHeaderLines ++ VariantAnnotationConverter.formatHeaderLines).toSet: Set[VCFHeaderLine],
-      samples))
-    header.get
-  }
-}
-
-/**
- * Wrapper for Hadoop-BAM to work around requirement for no-args constructor. Depends on
- * ADAMVCFOutputFormat object to maintain global state (such as samples)
- *
- * @tparam K
- */
-private class ADAMVCFOutputFormat[K] extends KeyIgnoringVCFOutputFormat[K](VCFFormat.VCF) {
-  setHeader(ADAMVCFOutputFormat.getHeader)
-}
-
 object ADAMVariationContext {
   implicit def sparkContextToADAMVariationContext(sc: SparkContext): ADAMVariationContext = new ADAMVariationContext(sc)
   implicit def rddToADAMVariantContextRDD(rdd: RDD[ADAMVariantContext]) = new ADAMVariantContextRDDFunctions(rdd)
@@ -100,7 +71,7 @@ class ADAMVariationContext(sc: SparkContext) extends Serializable with Logging {
     log.info("Writing %s file to %s".format(vcfFormat, filePath))
 
     // Initialize global header object required by Hadoop VCF Writer
-    ADAMVCFOutputFormat.setHeader(variants.adamGetCallsetSamples)
+    ADAMVCFOutputFormat.setHeader(variants.adamGetCallsetSamples())
 
     // TODO: Sort variants according to sequence dictionary (if supplied)
     val converter = new VariantContextConverter(dict)
@@ -117,7 +88,7 @@ class ADAMVariationContext(sc: SparkContext) extends Serializable with Logging {
       classOf[LongWritable], classOf[VariantContextWritable], classOf[ADAMVCFOutputFormat[LongWritable]],
       conf)
 
-    log.info("Write %d records".format(gatkVCs.count))
+    log.info("Write %d records".format(gatkVCs.count()))
   }
 }
 
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/ADAMVCFOutputFormat.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/ADAMVCFOutputFormat.scala