Skip to content

Commit

Permalink
Merge pull request #908 from fnothaft/huitseeker-BDGLinting
Browse files Browse the repository at this point in the history
Squashed #907 and ran format-sources
  • Loading branch information
heuermh committed Jan 7, 2016
2 parents 630cc1a + 82838b7 commit af778e0
Show file tree
Hide file tree
Showing 76 changed files with 892 additions and 746 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ class JavaAlignmentRecordRDD(val jrdd: JavaRDD[AlignmentRecord]) extends Seriali
* @param compressCodec Name of the compression codec to use.
* @param disableDictionaryEncoding Whether or not to disable bit-packing.
*/
def adamSave(filePath: java.lang.String,
blockSize: java.lang.Integer,
pageSize: java.lang.Integer,
compressCodec: CompressionCodecName,
disableDictionaryEncoding: java.lang.Boolean) {
def adamSave(
filePath: java.lang.String,
blockSize: java.lang.Integer,
pageSize: java.lang.Integer,
compressCodec: CompressionCodecName,
disableDictionaryEncoding: java.lang.Boolean) {
jrdd.rdd.adamParquetSave(
filePath,
blockSize,
Expand All @@ -62,8 +63,9 @@ class JavaAlignmentRecordRDD(val jrdd: JavaRDD[AlignmentRecord]) extends Seriali
* @param filePath Path to save the file at.
* @param asSam If true, saves as SAM. If false, saves as BAM.
*/
def adamSAMSave(filePath: java.lang.String,
asSam: java.lang.Boolean) {
def adamSAMSave(
filePath: java.lang.String,
asSam: java.lang.Boolean) {
jrdd.rdd.adamSAMSave(filePath, asSam)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ object AlleleCountHelper extends Serializable {
}

def countAlleles(adamVariants: RDD[Genotype], args: AlleleCountArgs) {
val usefulData = adamVariants.map(p => (p.getVariant.getContig.getContigName,
val usefulData = adamVariants.map(p => (
p.getVariant.getContig.getContigName,
p.getVariant.getStart,
p.getVariant.getReferenceAllele,
p.getVariant.getAlternateAllele,
p.getAlleles.get(0),
p.getAlleles.get(1)))
p.getAlleles.get(1)
))
val reduced_Variants = usefulData.flatMap(p => Seq((p._1, p._2, p._3, p._4, p._5), (p._1, p._2, p._3, p._4, p._6)))
val alleles = reduced_Variants.flatMap(chooseAllele)
alleles.groupBy(identity).map { case (a, b) => "%s\t%s\t%s\t%d".format(a._1, a._2, a._3, b.size) }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ class CalculateDepth(protected val args: CalculateDepthArgs) extends BDGSparkCom
println("%20s\t%15s\t% 5d".format(
"%s:%d".format(region.referenceName, region.start),
variantNames(region),
count))
count
))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ class CountReadKmers(protected val args: CountReadKmersArgs) extends BDGSparkCom
// read from disk
var adamRecords: RDD[AlignmentRecord] = sc.loadAlignments(
args.inputPath,
projection = Some(Projection(AlignmentRecordField.sequence)))
projection = Some(Projection(AlignmentRecordField.sequence))
)

if (args.repartition != -1) {
log.info("Repartitioning reads to '%d' partitions".format(args.repartition))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class FlagStat(protected val args: FlagStatArgs) extends BDGSparkCommand[FlagSta
AlignmentRecordField.secondOfPair,
AlignmentRecordField.properPair,
AlignmentRecordField.mapq,
AlignmentRecordField.failedVendorQualityChecks)
AlignmentRecordField.failedVendorQualityChecks
)

val adamFile: RDD[AlignmentRecord] = sc.loadAlignments(args.inputPath, projection = Some(projection))

Expand Down Expand Up @@ -113,7 +114,8 @@ class FlagStat(protected val args: FlagStatArgs) extends BDGSparkCommand[FlagSta
percent(passedVendorQuality.singleton, passedVendorQuality.total),
percent(failedVendorQuality.singleton, failedVendorQuality.total),
passedVendorQuality.withMateMappedToDiffChromosome, failedVendorQuality.withMateMappedToDiffChromosome,
passedVendorQuality.withMateMappedToDiffChromosomeMapQ5, failedVendorQuality.withMateMappedToDiffChromosomeMapQ5)
passedVendorQuality.withMateMappedToDiffChromosomeMapQ5, failedVendorQuality.withMateMappedToDiffChromosomeMapQ5
)

Option(args.outputPath) match {
case Some(outputPath) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class Flatten(val args: FlattenArgs) extends BDGSparkCommand[FlattenArgs] with L
args.pageSize,
args.compressionCodec,
args.disableDictionaryEncoding,
Some(flatSchema))
Some(flatSchema)
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class PrintGenes(protected val args: PrintGenesArgs)
transcript.region.referenceName,
transcript.region.start, transcript.region.end,
if (transcript.strand) "+" else "-",
transcript.exons.size)
transcript.exons.size
)
}
}
21 changes: 13 additions & 8 deletions adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans
log.info("Locally realigning indels.")
val consensusGenerator = Option(args.knownIndelsFile)
.fold(new ConsensusGeneratorFromReads().asInstanceOf[ConsensusGenerator])(
new ConsensusGeneratorFromKnowns(_, sc).asInstanceOf[ConsensusGenerator])
new ConsensusGeneratorFromKnowns(_, sc).asInstanceOf[ConsensusGenerator]
)

adamRecords = oldRdd.adamRealignIndels(
consensusGenerator,
Expand Down Expand Up @@ -227,7 +228,8 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans
if ((args.useAlignedReadPredicate || args.limitProjection) &&
(args.forceLoadBam || args.forceLoadFastq || args.forceLoadIFastq)) {
throw new IllegalArgumentException(
"-aligned_read_predicate and -limit_projection only apply to Parquet files, but a non-Parquet force load flag was passed.")
"-aligned_read_predicate and -limit_projection only apply to Parquet files, but a non-Parquet force load flag was passed."
)
}

val rdd =
Expand All @@ -246,7 +248,8 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans
None
}
val proj = if (args.limitProjection) {
Some(Projection(AlignmentRecordField.contig,
Some(Projection(
AlignmentRecordField.contig,
AlignmentRecordField.start,
AlignmentRecordField.end,
AlignmentRecordField.mapq,
Expand All @@ -265,13 +268,16 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans
AlignmentRecordField.duplicateRead,
AlignmentRecordField.mismatchingPositions,
AlignmentRecordField.secondaryAlignment,
AlignmentRecordField.supplementaryAlignment))
AlignmentRecordField.supplementaryAlignment
))
} else {
None
}
sc.loadParquetAlignments(args.inputPath,
sc.loadParquetAlignments(
args.inputPath,
predicate = pred,
projection = proj)
projection = proj
)
} else {
sc.loadAlignments(
args.inputPath,
Expand All @@ -297,8 +303,7 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans
concatFilename,
recordGroupOpt = Option(args.fastqRecordGroup)
)
}
)
})

this.apply(concatRddOpt match {
case Some(concatRdd) => rdd ++ concatRdd
Expand Down
21 changes: 13 additions & 8 deletions adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,41 +36,47 @@ class ViewArgs extends Args4jBase with ParquetArgs with ADAMSaveAnyArgs {
required = false,
name = "-f",
metaVar = "N",
usage = "Restrict to reads that match all of the bits in <N>")
usage = "Restrict to reads that match all of the bits in <N>"
)
var matchAllBits: Int = 0

@Args4jOption(
required = false,
name = "-F",
metaVar = "N",
usage = "Restrict to reads that match none of the bits in <N>")
usage = "Restrict to reads that match none of the bits in <N>"
)
var mismatchAllBits: Int = 0

@Args4jOption(
required = false,
name = "-g",
metaVar = "N",
usage = "Restrict to reads that match any of the bits in <N>")
usage = "Restrict to reads that match any of the bits in <N>"
)
var matchSomeBits: Int = 0

@Args4jOption(
required = false,
name = "-G",
metaVar = "N",
usage = "Restrict to reads that mismatch at least one of the bits in <N>")
usage = "Restrict to reads that mismatch at least one of the bits in <N>"
)
var mismatchSomeBits: Int = 0

@Args4jOption(
required = false,
name = "-c",
usage = "Print count of matching records, instead of the records themselves")
usage = "Print count of matching records, instead of the records themselves"
)
var printCount: Boolean = false

@Args4jOption(
required = false,
name = "-o",
metaVar = "<FILE>",
usage = "Output to <FILE>; can also pass <FILE> as the second argument")
usage = "Output to <FILE>; can also pass <FILE> as the second argument"
)
var outputPathArg: String = null

@Args4jOption(required = false, name = "-sort_fastq_output", usage = "Sets whether to sort the FASTQ output, if saving as FASTQ. False by default. Ignored if not saving as FASTQ.")
Expand Down Expand Up @@ -148,8 +154,7 @@ class View(val args: ViewArgs) extends BDGSparkCommand[ViewArgs] {
reads.filter(read =>
allFilters.forall(_(read)) &&
(matchSomeFilters.isEmpty || matchSomeFilters.exists(_(read))) &&
(mismatchSomeFilters.isEmpty || mismatchSomeFilters.exists(_(read)))
)
(mismatchSomeFilters.isEmpty || mismatchSomeFilters.exists(_(read))))
} else
reads
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class IndexedBamInputFormat extends BAMInputFormat {
override def createRecordReader(split: InputSplit, ctx: TaskAttemptContext): RecordReader[LongWritable, SAMRecordWritable] = {
val rr: RecordReader[LongWritable, SAMRecordWritable] = new BAMFilteredRecordReader()
assert(IndexedBamInputFormat.optViewRegion.isDefined)
BAMFilteredRecordReader.setRegion(IndexedBamInputFormat.optViewRegion.get)
IndexedBamInputFormat.optViewRegion.foreach { (refReg) => BAMFilteredRecordReader.setRegion(refReg) }
rr.initialize(split, ctx)
rr
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@ abstract class ConsensusGenerator extends Serializable {
* @param reads Reads to preprocess.
* @return Preprocessed reads.
*/
def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord]
def preprocessReadsForRealignment(
reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord]

/**
* For all reads in this region, generates the list of consensus sequences for realignment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ class ConsensusGeneratorFromKnowns(file: String, @transient sc: SparkContext) ex
* @param reads Reads to preprocess.
* @return Preprocessed reads.
*/
def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
def preprocessReadsForRealignment(
reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
reads
}

Expand All @@ -67,8 +68,8 @@ class ConsensusGeneratorFromKnowns(file: String, @transient sc: SparkContext) ex
val table = indelTable.value

// get region
val start = reads.map(_.record.getStart.toLong).reduce(_ min _)
val end = reads.map(_.getEnd.toLong).reduce(_ max _)
val start = reads.map(_.record.getStart).min
val end = reads.map(_.getEnd).max
val refId = reads.head.record.getContig.getContigName

val region = ReferenceRegion(refId, start, end + 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ class ConsensusGeneratorFromReads extends ConsensusGenerator {
* @param reads Reads to process.
* @return Reads with indels normalized if they contain a single indel.
*/
def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
def preprocessReadsForRealignment(
reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
reads.map(r => {
// if there are two alignment blocks (sequence matches) then there is a single indel in the read
if (r.samtoolsCigar.numAlignmentBlocks == 2) {
Expand Down Expand Up @@ -74,10 +75,14 @@ class ConsensusGeneratorFromReads extends ConsensusGenerator {
.flatMap(r => {
// try to generate a consensus alignment - if a consensus exists, add it to our
// list of consensuses to test
Consensus.generateAlternateConsensus(r.getSequence,
ReferencePosition(r.getContig.getContigName,
r.getStart),
r.samtoolsCigar)
Consensus.generateAlternateConsensus(
r.getSequence,
ReferencePosition(
r.getContig.getContigName,
r.getStart
),
r.samtoolsCigar
)
})
.toSeq
.distinct
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ import org.bdgenomics.adam.rich.RichCigar._
import org.bdgenomics.adam.util.MdTag
import org.bdgenomics.formats.avro.AlignmentRecord

class ConsensusGeneratorFromSmithWaterman(wMatch: Double,
wMismatch: Double,
wInsert: Double,
wDelete: Double) extends ConsensusGeneratorFromReads {
class ConsensusGeneratorFromSmithWaterman(
wMatch: Double,
wMismatch: Double,
wInsert: Double,
wDelete: Double) extends ConsensusGeneratorFromReads {

/**
* Attempts realignment of all reads using Smith-Waterman. Accepts all realignments that have one
Expand All @@ -37,25 +38,30 @@ class ConsensusGeneratorFromSmithWaterman(wMatch: Double,
* @param reads Reads to process.
* @return Reads with indels normalized if they contain a single indel.
*/
override def preprocessReadsForRealignment(reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
override def preprocessReadsForRealignment(
reads: Iterable[RichAlignmentRecord],
reference: String,
region: ReferenceRegion): Iterable[RichAlignmentRecord] = {
val rds: Iterable[RichAlignmentRecord] = reads.map(r => {

val sw = new SmithWatermanConstantGapScoring(r.record.getSequence.toString,
val sw = new SmithWatermanConstantGapScoring(
r.record.getSequence,
reference,
wMatch,
wMismatch,
wInsert,
wDelete)
wDelete
)
println("for " + r.record.getReadName + " sw to " + sw.xStart + " with " + sw.cigarX)

// if we realign with fewer than three alignment blocks, then take the new alignment
if (sw.cigarX.numAlignmentBlocks <= 2) {
val mdTag = MdTag(r.record.getSequence.toString,
val mdTag = MdTag(
r.record.getSequence,
reference.drop(sw.xStart),
sw.cigarX,
region.start)
region.start
)

val newRead: RichAlignmentRecord = AlignmentRecord.newBuilder(r)
.setStart(sw.xStart + region.start)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,12 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria
*
* @see buildScoringMatrix
*/
@tailrec private[smithwaterman] final def move(matrix: Array[Array[Char]],
i: Int,
j: Int,
cX: String,
cY: String): (String, String, Int, Int) = {
@tailrec private[smithwaterman] final def move(
matrix: Array[Array[Char]],
i: Int,
j: Int,
cX: String,
cY: String): (String, String, Int, Int) = {
if (matrix(i)(j) == 'T') {
// return if told to terminate
(cigarFromRNNCigar(cX), cigarFromRNNCigar(cY), i, j)
Expand All @@ -160,8 +161,9 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria
* @param moveMatrix Move matrix to track back on.
* @return Tuple of Cigar for X, Y.
*/
private[smithwaterman] def trackback(scoreMatrix: Array[Array[Double]],
moveMatrix: Array[Array[Char]]): (Cigar, Cigar, Int, Int) = {
private[smithwaterman] def trackback(
scoreMatrix: Array[Array[Double]],
moveMatrix: Array[Array[Char]]): (Cigar, Cigar, Int, Int) = {
assert(scoreMatrix.length == xSequence.length + 1)
assert(scoreMatrix.forall(_.length == ySequence.length + 1))
assert(moveMatrix.length == xSequence.length + 1)
Expand Down
Loading

0 comments on commit af778e0

Please sign in to comment.