Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Squashed #907 and ran format-sources #908

Merged
merged 1 commit into from
Jan 7, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ class JavaAlignmentRecordRDD(val jrdd: JavaRDD[AlignmentRecord]) extends Seriali
* @param compressCodec Name of the compression codec to use.
* @param disableDictionaryEncoding Whether or not to disable bit-packing.
*/
def adamSave(filePath: java.lang.String,
blockSize: java.lang.Integer,
pageSize: java.lang.Integer,
compressCodec: CompressionCodecName,
disableDictionaryEncoding: java.lang.Boolean) {
def adamSave(
filePath: java.lang.String,
blockSize: java.lang.Integer,
pageSize: java.lang.Integer,
compressCodec: CompressionCodecName,
disableDictionaryEncoding: java.lang.Boolean) {
jrdd.rdd.adamParquetSave(
filePath,
blockSize,
Expand All @@ -62,8 +63,9 @@ class JavaAlignmentRecordRDD(val jrdd: JavaRDD[AlignmentRecord]) extends Seriali
* @param filePath Path to save the file at.
* @param asSam If true, saves as SAM. If false, saves as BAM.
*/
def adamSAMSave(filePath: java.lang.String,
asSam: java.lang.Boolean) {
def adamSAMSave(
filePath: java.lang.String,
asSam: java.lang.Boolean) {
jrdd.rdd.adamSAMSave(filePath, asSam)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ object AlleleCountHelper extends Serializable {
}

def countAlleles(adamVariants: RDD[Genotype], args: AlleleCountArgs) {
val usefulData = adamVariants.map(p => (p.getVariant.getContig.getContigName,
val usefulData = adamVariants.map(p => (
p.getVariant.getContig.getContigName,
p.getVariant.getStart,
p.getVariant.getReferenceAllele,
p.getVariant.getAlternateAllele,
p.getAlleles.get(0),
p.getAlleles.get(1)))
p.getAlleles.get(1)
))
val reduced_Variants = usefulData.flatMap(p => Seq((p._1, p._2, p._3, p._4, p._5), (p._1, p._2, p._3, p._4, p._6)))
val alleles = reduced_Variants.flatMap(chooseAllele)
alleles.groupBy(identity).map { case (a, b) => "%s\t%s\t%s\t%d".format(a._1, a._2, a._3, b.size) }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ class CalculateDepth(protected val args: CalculateDepthArgs) extends BDGSparkCom
println("%20s\t%15s\t% 5d".format(
"%s:%d".format(region.referenceName, region.start),
variantNames(region),
count))
count
))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ class CountReadKmers(protected val args: CountReadKmersArgs) extends BDGSparkCom
// read from disk
var adamRecords: RDD[AlignmentRecord] = sc.loadAlignments(
args.inputPath,
projection = Some(Projection(AlignmentRecordField.sequence)))
projection = Some(Projection(AlignmentRecordField.sequence))
)

if (args.repartition != -1) {
log.info("Repartitioning reads to '%d' partitions".format(args.repartition))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class FlagStat(protected val args: FlagStatArgs) extends BDGSparkCommand[FlagSta
AlignmentRecordField.secondOfPair,
AlignmentRecordField.properPair,
AlignmentRecordField.mapq,
AlignmentRecordField.failedVendorQualityChecks)
AlignmentRecordField.failedVendorQualityChecks
)

val adamFile: RDD[AlignmentRecord] = sc.loadAlignments(args.inputPath, projection = Some(projection))

Expand Down Expand Up @@ -113,7 +114,8 @@ class FlagStat(protected val args: FlagStatArgs) extends BDGSparkCommand[FlagSta
percent(passedVendorQuality.singleton, passedVendorQuality.total),
percent(failedVendorQuality.singleton, failedVendorQuality.total),
passedVendorQuality.withMateMappedToDiffChromosome, failedVendorQuality.withMateMappedToDiffChromosome,
passedVendorQuality.withMateMappedToDiffChromosomeMapQ5, failedVendorQuality.withMateMappedToDiffChromosomeMapQ5)
passedVendorQuality.withMateMappedToDiffChromosomeMapQ5, failedVendorQuality.withMateMappedToDiffChromosomeMapQ5
)

Option(args.outputPath) match {
case Some(outputPath) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class Flatten(val args: FlattenArgs) extends BDGSparkCommand[FlattenArgs] with L
args.pageSize,
args.compressionCodec,
args.disableDictionaryEncoding,
Some(flatSchema))
Some(flatSchema)
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class PrintGenes(protected val args: PrintGenesArgs)
transcript.region.referenceName,
transcript.region.start, transcript.region.end,
if (transcript.strand) "+" else "-",
transcript.exons.size)
transcript.exons.size
)
}
}
21 changes: 13 additions & 8 deletions adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans
log.info("Locally realigning indels.")
val consensusGenerator = Option(args.knownIndelsFile)
.fold(new ConsensusGeneratorFromReads().asInstanceOf[ConsensusGenerator])(
new ConsensusGeneratorFromKnowns(_, sc).asInstanceOf[ConsensusGenerator])
new ConsensusGeneratorFromKnowns(_, sc).asInstanceOf[ConsensusGenerator]
)

adamRecords = oldRdd.adamRealignIndels(
consensusGenerator,
Expand Down Expand Up @@ -227,7 +228,8 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans
if ((args.useAlignedReadPredicate || args.limitProjection) &&
(args.forceLoadBam || args.forceLoadFastq || args.forceLoadIFastq)) {
throw new IllegalArgumentException(
"-aligned_read_predicate and -limit_projection only apply to Parquet files, but a non-Parquet force load flag was passed.")
"-aligned_read_predicate and -limit_projection only apply to Parquet files, but a non-Parquet force load flag was passed."
)
}

val rdd =
Expand All @@ -246,7 +248,8 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans
None
}
val proj = if (args.limitProjection) {
Some(Projection(AlignmentRecordField.contig,
Some(Projection(
AlignmentRecordField.contig,
AlignmentRecordField.start,
AlignmentRecordField.end,
AlignmentRecordField.mapq,
Expand All @@ -265,13 +268,16 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans
AlignmentRecordField.duplicateRead,
AlignmentRecordField.mismatchingPositions,
AlignmentRecordField.secondaryAlignment,
AlignmentRecordField.supplementaryAlignment))
AlignmentRecordField.supplementaryAlignment
))
} else {
None
}
sc.loadParquetAlignments(args.inputPath,
sc.loadParquetAlignments(
args.inputPath,
predicate = pred,
projection = proj)
projection = proj
)
} else {
sc.loadAlignments(
args.inputPath,
Expand All @@ -297,8 +303,7 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans
concatFilename,
recordGroupOpt = Option(args.fastqRecordGroup)
)
}
)
})

this.apply(concatRddOpt match {
case Some(concatRdd) => rdd ++ concatRdd
Expand Down
21 changes: 13 additions & 8 deletions adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,41 +36,47 @@ class ViewArgs extends Args4jBase with ParquetArgs with ADAMSaveAnyArgs {
required = false,
name = "-f",
metaVar = "N",
usage = "Restrict to reads that match all of the bits in <N>")
usage = "Restrict to reads that match all of the bits in <N>"
)
var matchAllBits: Int = 0

@Args4jOption(
required = false,
name = "-F",
metaVar = "N",
usage = "Restrict to reads that match none of the bits in <N>")
usage = "Restrict to reads that match none of the bits in <N>"
)
var mismatchAllBits: Int = 0

@Args4jOption(
required = false,
name = "-g",
metaVar = "N",
usage = "Restrict to reads that match any of the bits in <N>")
usage = "Restrict to reads that match any of the bits in <N>"
)
var matchSomeBits: Int = 0

@Args4jOption(
required = false,
name = "-G",
metaVar = "N",
usage = "Restrict to reads that mismatch at least one of the bits in <N>")
usage = "Restrict to reads that mismatch at least one of the bits in <N>"
)
var mismatchSomeBits: Int = 0

@Args4jOption(
required = false,
name = "-c",
usage = "Print count of matching records, instead of the records themselves")
usage = "Print count of matching records, instead of the records themselves"
)
var printCount: Boolean = false

@Args4jOption(
required = false,
name = "-o",
metaVar = "<FILE>",
usage = "Output to <FILE>; can also pass <FILE> as the second argument")
usage = "Output to <FILE>; can also pass <FILE> as the second argument"
)
var outputPathArg: String = null

@Args4jOption(required = false, name = "-sort_fastq_output", usage = "Sets whether to sort the FASTQ output, if saving as FASTQ. False by default. Ignored if not saving as FASTQ.")
Expand Down Expand Up @@ -148,8 +154,7 @@ class View(val args: ViewArgs) extends BDGSparkCommand[ViewArgs] {
reads.filter(read =>
allFilters.forall(_(read)) &&
(matchSomeFilters.isEmpty || matchSomeFilters.exists(_(read))) &&
(mismatchSomeFilters.isEmpty || mismatchSomeFilters.exists(_(read)))
)
(mismatchSomeFilters.isEmpty || mismatchSomeFilters.exists(_(read))))
} else
reads
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class IndexedBamInputFormat extends BAMInputFormat {
override def createRecordReader(split: InputSplit, ctx: TaskAttemptContext): RecordReader[LongWritable, SAMRecordWritable] = {
val rr: RecordReader[LongWritable, SAMRecordWritable] = new BAMFilteredRecordReader()
assert(IndexedBamInputFormat.optViewRegion.isDefined)
BAMFilteredRecordReader.setRegion(IndexedBamInputFormat.optViewRegion.get)
IndexedBamInputFormat.optViewRegion.foreach { (refReg) => BAMFilteredRecordReader.setRegion(refReg) }
rr.initialize(split, ctx)
rr
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@ abstract class ConsensusGenerator extends Serializable {
* @param reads Reads to preprocess.
* @return Preprocessed reads.
*/
def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord]
def preprocessReadsForRealignment(
reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord]

/**
* For all reads in this region, generates the list of consensus sequences for realignment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ class ConsensusGeneratorFromKnowns(file: String, @transient sc: SparkContext) ex
* @param reads Reads to preprocess.
* @return Preprocessed reads.
*/
def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
def preprocessReadsForRealignment(
reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
reads
}

Expand All @@ -67,8 +68,8 @@ class ConsensusGeneratorFromKnowns(file: String, @transient sc: SparkContext) ex
val table = indelTable.value

// get region
val start = reads.map(_.record.getStart.toLong).reduce(_ min _)
val end = reads.map(_.getEnd.toLong).reduce(_ max _)
val start = reads.map(_.record.getStart).min
val end = reads.map(_.getEnd).max
val refId = reads.head.record.getContig.getContigName

val region = ReferenceRegion(refId, start, end + 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ class ConsensusGeneratorFromReads extends ConsensusGenerator {
* @param reads Reads to process.
* @return Reads with indels normalized if they contain a single indel.
*/
def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
def preprocessReadsForRealignment(
reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
reads.map(r => {
// if there are two alignment blocks (sequence matches) then there is a single indel in the read
if (r.samtoolsCigar.numAlignmentBlocks == 2) {
Expand Down Expand Up @@ -74,10 +75,14 @@ class ConsensusGeneratorFromReads extends ConsensusGenerator {
.flatMap(r => {
// try to generate a consensus alignment - if a consensus exists, add it to our
// list of consensuses to test
Consensus.generateAlternateConsensus(r.getSequence,
ReferencePosition(r.getContig.getContigName,
r.getStart),
r.samtoolsCigar)
Consensus.generateAlternateConsensus(
r.getSequence,
ReferencePosition(
r.getContig.getContigName,
r.getStart
),
r.samtoolsCigar
)
})
.toSeq
.distinct
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ import org.bdgenomics.adam.rich.RichCigar._
import org.bdgenomics.adam.util.MdTag
import org.bdgenomics.formats.avro.AlignmentRecord

class ConsensusGeneratorFromSmithWaterman(wMatch: Double,
wMismatch: Double,
wInsert: Double,
wDelete: Double) extends ConsensusGeneratorFromReads {
class ConsensusGeneratorFromSmithWaterman(
wMatch: Double,
wMismatch: Double,
wInsert: Double,
wDelete: Double) extends ConsensusGeneratorFromReads {

/**
* Attempts realignment of all reads using Smith-Waterman. Accepts all realignments that have one
Expand All @@ -37,25 +38,30 @@ class ConsensusGeneratorFromSmithWaterman(wMatch: Double,
* @param reads Reads to process.
* @return Reads with indels normalized if they contain a single indel.
*/
override def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
override def preprocessReadsForRealignment(
reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
val rds: Iterable[RichAlignmentRecord] = reads.map(r => {

val sw = new SmithWatermanConstantGapScoring(r.record.getSequence.toString,
val sw = new SmithWatermanConstantGapScoring(
r.record.getSequence,
reference,
wMatch,
wMismatch,
wInsert,
wDelete)
wDelete
)
println("for " + r.record.getReadName + " sw to " + sw.xStart + " with " + sw.cigarX)

// if we realign with fewer than three alignment blocks, then take the new alignment
if (sw.cigarX.numAlignmentBlocks <= 2) {
val mdTag = MdTag(r.record.getSequence.toString,
val mdTag = MdTag(
r.record.getSequence,
reference.drop(sw.xStart),
sw.cigarX,
region.start)
region.start
)

val newRead: RichAlignmentRecord = AlignmentRecord.newBuilder(r)
.setStart(sw.xStart + region.start)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,12 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria
*
* @see buildScoringMatrix
*/
@tailrec private[smithwaterman] final def move(matrix: Array[Array[Char]],
i: Int,
j: Int,
cX: String,
cY: String): (String, String, Int, Int) = {
@tailrec private[smithwaterman] final def move(
matrix: Array[Array[Char]],
i: Int,
j: Int,
cX: String,
cY: String): (String, String, Int, Int) = {
if (matrix(i)(j) == 'T') {
// return if told to terminate
(cigarFromRNNCigar(cX), cigarFromRNNCigar(cY), i, j)
Expand All @@ -160,8 +161,9 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria
* @param moveMatrix Move matrix to track back on.
* @return Tuple of Cigar for X, Y.
*/
private[smithwaterman] def trackback(scoreMatrix: Array[Array[Double]],
moveMatrix: Array[Array[Char]]): (Cigar, Cigar, Int, Int) = {
private[smithwaterman] def trackback(
scoreMatrix: Array[Array[Double]],
moveMatrix: Array[Array[Char]]): (Cigar, Cigar, Int, Int) = {
assert(scoreMatrix.length == xSequence.length + 1)
assert(scoreMatrix.forall(_.length == ySequence.length + 1))
assert(moveMatrix.length == xSequence.length + 1)
Expand Down
Loading