Skip to content

Commit

Permalink
updating hadoop-bam dependency version to 7.5.0-SNAPSHOT, htsjdk to 2…
Browse files Browse the repository at this point in the history
….2.1 and minimum JDK to JDK8
  • Loading branch information
heuermh committed Apr 27, 2016
1 parent 70eaa70 commit 760270c
Show file tree
Hide file tree
Showing 11 changed files with 71 additions and 26 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
language: java
jdk:
- oraclejdk8
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

package org.bdgenomics.adam.io

import hbparquet.hadoop.util.ContextUtil
import htsjdk.samtools.BAMRecordCodec
import htsjdk.samtools.ValidationStringency
import htsjdk.samtools.SAMRecord
Expand Down Expand Up @@ -72,7 +71,7 @@ class BAMFilteredRecordReader extends BAMRecordReader {
}
isInitialized = true

val conf: Configuration = ContextUtil.getConfiguration(ctx)
val conf: Configuration = ctx.getConfiguration()

val split: FileVirtualSplit = spl.asInstanceOf[FileVirtualSplit]
val file: Path = split.getPath()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ import org.bdgenomics.utils.instrumentation.Metrics
import org.bdgenomics.utils.io.LocalFileByteAccess
import org.bdgenomics.utils.misc.HadoopUtil
import org.seqdoop.hadoop_bam._
import org.seqdoop.hadoop_bam.util.SAMHeaderReader
import org.seqdoop.hadoop_bam.util.{ BGZFCodec, SAMHeaderReader }
import scala.collection.JavaConversions._
import scala.collection.Map
import scala.reflect.ClassTag
Expand Down Expand Up @@ -578,6 +578,7 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log
def loadVcf(filePath: String, sd: Option[SequenceDictionary]): RDD[VariantContext] = {
val job = HadoopUtil.newJob(sc)
val vcc = new VariantContextConverter(sd)
job.getConfiguration().set("io.compression.codecs", classOf[BGZFCodec].getCanonicalName())
val records = sc.newAPIHadoopFile(
filePath,
classOf[VCFInputFormat], classOf[LongWritable], classOf[VariantContextWritable],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.bdgenomics.adam.rdd.read

import htsjdk.samtools.SAMFileHeader
import hbparquet.hadoop.util.ContextUtil
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.{ OutputFormat, RecordWriter, TaskAttemptContext }
import org.apache.spark.rdd.InstrumentedOutputFormat
Expand Down Expand Up @@ -85,7 +84,7 @@ class ADAMBAMOutputFormatHeaderLess[K]
setWriteHeader(false)

override def getRecordWriter(context: TaskAttemptContext): RecordWriter[K, SAMRecordWritable] = {
val conf = ContextUtil.getConfiguration(context)
val conf = context.getConfiguration()

// where is our header file?
val path = new Path(conf.get("org.bdgenomics.adam.rdd.read.bam_header_path"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.bdgenomics.adam.rdd.read

import htsjdk.samtools.SAMFileHeader
import hbparquet.hadoop.util.ContextUtil
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.{ OutputFormat, RecordWriter, TaskAttemptContext }
import org.apache.spark.rdd.InstrumentedOutputFormat
Expand Down Expand Up @@ -86,7 +85,7 @@ class ADAMSAMOutputFormatHeaderLess[K]
setWriteHeader(false)

override def getRecordWriter(context: TaskAttemptContext): RecordWriter[K, SAMRecordWritable] = {
val conf = ContextUtil.getConfiguration(context)
val conf = context.getConfiguration()

// where is our header file?
val path = new Path(conf.get("org.bdgenomics.adam.rdd.read.bam_header_path"))
Expand Down
Binary file added adam-core/src/test/resources/test.compressed.bcf
Binary file not shown.
Binary file not shown.
Binary file added adam-core/src/test/resources/test.vcf.bgzf.gz
Binary file not shown.
Binary file added adam-core/src/test/resources/test.vcf.gz
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,30 @@ class ADAMContextSuite extends ADAMFunSuite {
assert(gt.getReadDepth === 20)
}

sparkTest("can read a gzipped .vcf file") {
val path = resourcePath("test.vcf.gz")
val vcs = sc.loadVcf(path, None)
assert(vcs.count === 6)
}

sparkTest("can read a BGZF gzipped .vcf file") {
val path = resourcePath("test.vcf.bgzf.gz")
val vcs = sc.loadVcf(path, None)
assert(vcs.count === 6)
}

sparkTest("can read an uncompressed BCFv2.1 file") {
val path = resourcePath("test.uncompressed.bcf")
val vcs = sc.loadVcf(path, None)
assert(vcs.count === 6)
}

sparkTest("can read a BGZF compressed BCFv2.1 file") {
val path = resourcePath("test.compressed.bcf")
val vcs = sc.loadVcf(path, None)
assert(vcs.count === 6)
}

(1 to 4) foreach { testNumber =>
val inputName = "interleaved_fastq_sample%d.ifq".format(testNumber)
val path = ClassLoader.getSystemClassLoader.getResource(inputName).getFile
Expand Down
59 changes: 40 additions & 19 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,19 @@
<url>http://bdgenomics.org/</url>

<properties>
<java.version>1.7</java.version>
<avro.version>1.7.7</avro.version>
<java.version>1.8</java.version>
<avro.version>1.7.7</avro.version> <!-- note: 1.8.0 is available -->
<!-- informative only, used in about template substitution -->
<scala.version>2.10</scala.version>
<spark.version>1.5.2</spark.version>
<parquet.version>1.8.1</parquet.version>
<!-- Edit the following line to configure the Hadoop (HDFS) version. -->
<hadoop.version>2.6.0</hadoop.version>
<hadoop-bam.version>7.5.0-SNAPSHOT</hadoop-bam.version> <!-- note: currently builds against hadoop 2.2.0 -->
<scoverage.version>1.1.1</scoverage.version>
<slf4j.version>1.7.21</slf4j.version>
<utils.version>0.2.4</utils.version>
<htsjdk.version>1.139</htsjdk.version>
<htsjdk.version>2.2.1</htsjdk.version>
</properties>

<modules>
Expand Down Expand Up @@ -112,7 +114,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<version>3.5.1</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
Expand Down Expand Up @@ -140,8 +142,8 @@
<message>ADAM requires Maven 3.1.1 or greater</message>
</requireMavenVersion>
<requireJavaVersion>
<version>[1.7,)</version>
<message>ADAM requires Java 1.7 or greater</message>
<version>[1.8,)</version>
<message>ADAM requires Java 1.8 or greater</message>
</requireJavaVersion>
</rules>
</configuration>
Expand All @@ -166,17 +168,17 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.1</version>
<version>2.4.3</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.18.1</version>
<version>2.19.1</version>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>templating-maven-plugin</artifactId>
<version>1.0-alpha-3</version>
<version>1.0.0</version>
</plugin>
<plugin>
<groupId>org.scalatest</groupId>
Expand All @@ -186,7 +188,7 @@
<plugin>
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<version>2.2.0</version>
<version>2.2.1</version>
<configuration>
<gitDescribe>
<always>true</always>
Expand Down Expand Up @@ -222,7 +224,12 @@
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>1.9.1</version>
<version>1.10</version>
</plugin>
<plugin>
<groupId>org.scalariform</groupId>
<artifactId>scalariform-maven-plugin</artifactId>
<version>0.1.4</version>
</plugin>
</plugins>
</pluginManagement>
Expand Down Expand Up @@ -282,7 +289,6 @@
<plugin>
<groupId>org.scalariform</groupId>
<artifactId>scalariform-maven-plugin</artifactId>
<version>0.1.4</version>
<executions>
<execution>
<id>default-cli</id>
Expand Down Expand Up @@ -310,7 +316,7 @@
</dependency>
<dependency>
<groupId>org.scoverage</groupId>
<artifactId>scalac-scoverage-plugin_2.10</artifactId>
<artifactId>scalac-scoverage-plugin_2.10</artifactId> <!-- note: this shows up as compile scope -->
<version>${scoverage.version}</version>
</dependency>
<dependency>
Expand Down Expand Up @@ -463,7 +469,7 @@
<dependency>
<groupId>org.seqdoop</groupId>
<artifactId>hadoop-bam</artifactId>
<version>7.1.0</version>
<version>${hadoop-bam.version}</version>
<exclusions>
<exclusion>
<groupId>org.seqdoop</groupId>
Expand All @@ -484,23 +490,38 @@
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.10</artifactId>
<version>2.2.5</version>
<version>2.2.6</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jul-to-slf4j</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.12</version>
<version>${slf4j.version}</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.1</version>
<version>4.5.2</version>
</dependency>
<dependency>
<groupId>com.netflix.servo</groupId>
<artifactId>servo-core</artifactId>
<version>0.10.0</version> <!-- note: version 0.11.0 is jdk8-only -->
<version>0.10.0</version> <!-- note: versions 0.11.0+ (currently 0.12.3) are jdk8-only -->
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
Expand All @@ -522,7 +543,7 @@
<dependency>
<groupId>net.codingwell</groupId>
<artifactId>scala-guice_2.10</artifactId>
<version>4.0.0</version>
<version>4.0.1</version>
</dependency>
</dependencies>
</dependencyManagement>
Expand Down

0 comments on commit 760270c

Please sign in to comment.