diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/ADAMPredicate.scala b/adam-core/src/main/scala/org/bdgenomics/adam/predicates/ADAMPredicate.scala deleted file mode 100644 index 1a52d702eb..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/ADAMPredicate.scala +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* -* Copyright (c) 2014. Mount Sinai School of Medicine -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.bdgenomics.adam.predicates - -import parquet.filter.{ RecordFilter, UnboundRecordFilter } -import java.lang.Iterable -import parquet.column.ColumnReader -import org.apache.spark.rdd.RDD - -/** - * - * ADAMPredicate: Classes derived from ADAMPredicate can be used to set ParquetInputFormat.setUnboundRecordFilter - * for predicate pushdown, or alternatively, filter an already loaded RDD - * - */ -trait ADAMPredicate[T] extends UnboundRecordFilter { - val recordCondition: RecordCondition[T] - - final def apply(rdd: RDD[T]): RDD[T] = { - rdd.filter(recordCondition.filter) - } - - override def bind(readers: Iterable[ColumnReader]): RecordFilter = recordCondition.recordFilter.bind(readers) -} diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/AlignmentRecordConditions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/predicates/AlignmentRecordConditions.scala deleted file mode 100644 index a46674422b..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/AlignmentRecordConditions.scala +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.bdgenomics.adam.predicates - -import org.bdgenomics.adam.projections.AlignmentRecordField -import org.bdgenomics.formats.avro.AlignmentRecord - -object AlignmentRecordConditions { - - def apply(field: AlignmentRecordField.Value, bool: Boolean = true): RecordCondition[AlignmentRecord] = { - RecordCondition[AlignmentRecord](FieldCondition(field, bool)) - } - - def apply(field: AlignmentRecordField.Value, filter: Int => Boolean): RecordCondition[AlignmentRecord] = { - RecordCondition[AlignmentRecord](FieldCondition(field.toString, filter)) - } - - val isMapped = apply(AlignmentRecordField.readMapped) - val isUnique = apply(AlignmentRecordField.duplicateRead, false) - - val isPrimaryAlignment = apply(AlignmentRecordField.primaryAlignment) - - val passedVendorQualityChecks = apply(AlignmentRecordField.failedVendorQualityChecks, false) - - def isHighQuality(minQuality: Int) = apply(AlignmentRecordField.mapq, (x: Int) => x > minQuality) - -} diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/FieldCondition.scala b/adam-core/src/main/scala/org/bdgenomics/adam/predicates/FieldCondition.scala deleted file mode 100644 index 1348cb53a8..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/FieldCondition.scala +++ /dev/null @@ -1,89 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* -* Copyright (c) 2014. Mount Sinai School of Medicine -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.bdgenomics.adam.predicates - -import parquet.filter.ColumnPredicates.Predicate -import parquet.column.ColumnReader -import parquet.filter.UnboundRecordFilter -import parquet.filter.ColumnRecordFilter._ -import org.bdgenomics.adam.predicates.ColumnReaderInput.ColumnReaderInput -import org.bdgenomics.adam.projections.AlignmentRecordField -import scala.Predef._ - -object ColumnReaderInput extends Serializable { - trait ColumnReaderInput[T] extends Serializable { - def convert(x: ColumnReader): T - } - implicit object ColumnReaderInputInt extends ColumnReaderInput[Int] { - def convert(input: ColumnReader): Int = input.getInteger - } - implicit object ColumnReaderInputLong extends ColumnReaderInput[Long] { - def convert(input: ColumnReader): Long = input.getLong - } - implicit object ColumnReaderInputString extends ColumnReaderInput[String] { - def convert(input: ColumnReader): String = input.getBinary.toStringUsingUTF8 - } - implicit object ColumnReaderInputDouble extends ColumnReaderInput[Double] { - def convert(input: ColumnReader): Double = input.getDouble - } - implicit object ColumnReaderInputFloat extends ColumnReaderInput[Float] { - def convert(input: ColumnReader): Float = input.getFloat - } - implicit object ColumnReaderInputBoolean extends ColumnReaderInput[Boolean] { - def convert(input: ColumnReader): Boolean = input.getBoolean - } -} - -case class FieldCondition[T](fieldName: String, filter: T => Boolean)(implicit converter: ColumnReaderInput[T]) - extends Predicate { - - def apply(input: Any): Boolean = { - filter(input.asInstanceOf[T]) - } - - override def apply(input: ColumnReader): Boolean = { - filter(converter.convert(input)) - } - - def columnFilter: UnboundRecordFilter = column(fieldName, this) - -} - -object FieldCondition { - - def apply(field: AlignmentRecordField.Value, - filterValue: Boolean)(implicit converter: ColumnReaderInput[Boolean]): FieldCondition[Boolean] = { - FieldCondition(field.toString, PredicateUtils(filterValue)) - } -} diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/GenotypeConditions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/predicates/GenotypeConditions.scala deleted file mode 100644 index 2bc4fb7635..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/GenotypeConditions.scala +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.bdgenomics.adam.predicates - -import org.bdgenomics.formats.avro.Genotype - -object GenotypeConditions { - - val isPassing = RecordCondition[Genotype](FieldCondition("variantCallingAnnotations.variantIsPassing", PredicateUtils(true))) - - def hasMinReadDepth(minReadDepth: Int) = RecordCondition[Genotype](FieldCondition("variantCallingAnnotations.readDepth", (x: Int) => x > minReadDepth)) - -} diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/GenotypePredicates.scala b/adam-core/src/main/scala/org/bdgenomics/adam/predicates/GenotypePredicates.scala deleted file mode 100644 index b495d4119a..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/GenotypePredicates.scala +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.predicates - -import org.bdgenomics.formats.avro.Genotype - -class GenotypeRecordPASSPredicate extends ADAMPredicate[Genotype] { - - override val recordCondition = RecordCondition[Genotype](FieldCondition("variantCallingAnnotations.variantIsPassing", PredicateUtils(true))) - -} - diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/HighQualityReadPredicate.scala b/adam-core/src/main/scala/org/bdgenomics/adam/predicates/HighQualityReadPredicate.scala deleted file mode 100644 index 77341aae68..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/HighQualityReadPredicate.scala +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.predicates - -import org.bdgenomics.formats.avro.AlignmentRecord - -class HighQualityReadPredicate extends ADAMPredicate[AlignmentRecord] { - - override val recordCondition = AlignmentRecordConditions.isMapped && AlignmentRecordConditions.isHighQuality(30) - -} diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/LocusPredicate.scala b/adam-core/src/main/scala/org/bdgenomics/adam/predicates/LocusPredicate.scala deleted file mode 100644 index 0b9836801f..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/LocusPredicate.scala +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.predicates - -import parquet.filter.{ RecordFilter, UnboundRecordFilter } - -import java.lang.Iterable -import parquet.column.ColumnReader -import parquet.filter.AndRecordFilter.and -import parquet.filter.ColumnPredicates.equalTo -import parquet.filter.ColumnRecordFilter.column -import org.bdgenomics.adam.projections.AlignmentRecordField - -class LocusPredicate extends UnboundRecordFilter { - - def bind(readers: Iterable[ColumnReader]): RecordFilter = { - and(column(AlignmentRecordField.readMapped.toString(), equalTo(true)), - and(column(AlignmentRecordField.primaryAlignment.toString(), equalTo(true)), - and(column(AlignmentRecordField.failedVendorQualityChecks.toString(), equalTo(false)), - column(AlignmentRecordField.duplicateRead.toString(), equalTo(false))))).bind(readers) - } -} diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/PredicateUtils.scala b/adam-core/src/main/scala/org/bdgenomics/adam/predicates/PredicateUtils.scala deleted file mode 100644 index 9f2ee30756..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/PredicateUtils.scala +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.predicates - -object PredicateUtils { - def apply(b: Boolean) = (x: Boolean) => (x == b) -} diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/RecordCondition.scala b/adam-core/src/main/scala/org/bdgenomics/adam/predicates/RecordCondition.scala deleted file mode 100644 index 180e0c78d4..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/RecordCondition.scala +++ /dev/null @@ -1,115 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/* -* Copyright (c) 2014. Mount Sinai School of Medicine -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.bdgenomics.adam.predicates - -import org.apache.avro.specific.SpecificRecord -import org.apache.avro.Schema -import org.apache.avro.Schema.Field -import parquet.filter.{ AndRecordFilter, OrRecordFilter, UnboundRecordFilter } - -import org.bdgenomics.adam.util.ImplicitJavaConversions._ -import scala.annotation.tailrec - -object RecordCondition { - - // Convert predicate on single field to predicate on record - def getRecordPredicate[T <: SpecificRecord: Manifest, U](condition: FieldCondition[U]): T => Boolean = { - @tailrec - def getFieldValue(record: SpecificRecord, fieldPath: Seq[String]): Any = { - val schema = record.getSchema - val field: Field = schema.getField(fieldPath.head) - val fieldType = field.schema.getTypes.filter(_.getType != Schema.Type.NULL)(0) - if (fieldType.getType == Schema.Type.RECORD) { - getFieldValue(record.get(field.pos).asInstanceOf[SpecificRecord], fieldPath.tail) - } else { - record.get(field.pos) - } - } - - (record: T) => { - val fieldName = condition.fieldName - val filter: Any => Boolean = condition.apply - val fieldValue = getFieldValue(record, fieldName.split("\\.")) - filter(fieldValue) - } - } - - // Create a record predicate from many individual field predicates - def apply[T <: SpecificRecord: Manifest](conditions: FieldCondition[_]*): RecordCondition[T] = { - conditions.map(c => { - val fieldPredicate = getRecordPredicate(c) - new RecordCondition(fieldPredicate, c.columnFilter) - }).reduce(_ && _) - } -} - -/** - * - * A RecordCondition is a filter on any Avro defined records and - * contains an UnboundRecordFilter that can be used for predicate pushdown - * with Parquet stored files - * - */ -class RecordCondition[T <% SpecificRecord: Manifest](val filter: T => Boolean, val recordFilter: UnboundRecordFilter) - extends Serializable { - - // Combine two predicates through an AND - def and(other: RecordCondition[T]): RecordCondition[T] = &&(other) - def &&(other: RecordCondition[T]): RecordCondition[T] = { - - // Local variables to avoid serialization - val thisFilter = filter - val otherFilter = other.filter - - new RecordCondition[T](filter = (r: T) => thisFilter(r) && otherFilter(r), - recordFilter = AndRecordFilter.and(recordFilter, other.recordFilter)) - } - - // Combine two predicats through an OR - def or(other: RecordCondition[T]): RecordCondition[T] = ||(other) - def ||(other: RecordCondition[T]): RecordCondition[T] = { - - // Local variables to avoid serialization - val thisFilter = filter - val otherFilter = other.filter - - new RecordCondition[T](filter = (r: T) => thisFilter(r) || otherFilter(r), - recordFilter = OrRecordFilter.or(recordFilter, other.recordFilter)) - } - - // Apply the predicate on a record - def apply(record: T): Boolean = { - filter(record) - } -} diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/UniqueMappedReadPredicate.scala b/adam-core/src/main/scala/org/bdgenomics/adam/predicates/UniqueMappedReadPredicate.scala deleted file mode 100644 index 87214cb0b9..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/predicates/UniqueMappedReadPredicate.scala +++ /dev/null @@ -1,28 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.predicates - -import org.bdgenomics.formats.avro.AlignmentRecord - -class UniqueMappedReadPredicate extends ADAMPredicate[AlignmentRecord] { - - override val recordCondition = AlignmentRecordConditions.isMapped && AlignmentRecordConditions.isUnique && - AlignmentRecordConditions.isPrimaryAlignment && AlignmentRecordConditions.passedVendorQualityChecks - -} - diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/predicates/GenotypePredicatesSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/predicates/GenotypePredicatesSuite.scala deleted file mode 100644 index 89753761ca..0000000000 --- a/adam-core/src/test/scala/org/bdgenomics/adam/predicates/GenotypePredicatesSuite.scala +++ /dev/null @@ -1,110 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.predicates - -import com.google.common.io.Files -import java.util.logging.Level -import java.io.File -import org.apache.commons.io.FileUtils -import org.apache.spark.rdd.RDD -import org.bdgenomics.adam.rdd.ADAMContext._ -import org.bdgenomics.adam.util.{ ParquetLogger, ADAMFunSuite } -import org.bdgenomics.formats.avro.{ - Contig, - Genotype, - Variant, - VariantCallingAnnotations -} - -class GenotypePredicatesSuite extends ADAMFunSuite { - - sparkTest("Load only only PASSing records") { - ParquetLogger.hadoopLoggerLevel(Level.SEVERE) - - val v0 = Variant.newBuilder - .setContig(Contig.newBuilder.setContigName("chr11").build) - .setStart(17409571) - .setReferenceAllele("T") - .setAlternateAllele("C") - .build - - val passFilterAnnotation = - VariantCallingAnnotations.newBuilder().setVariantIsPassing(true).build() - val failFilterAnnotation = - VariantCallingAnnotations.newBuilder().setVariantIsPassing(false).build() - - val genotypes = sc.parallelize(List( - Genotype.newBuilder() - .setVariant(v0) - .setVariantCallingAnnotations(passFilterAnnotation) - .setSampleId("NA12878") - .build(), - Genotype.newBuilder() - .setVariant(v0) - .setVariantCallingAnnotations(failFilterAnnotation) - .setSampleId("NA12878") - .build())) - - val genotypesParquetFile = new File(Files.createTempDir(), "genotypes.adam") - genotypes.adamParquetSave(genotypesParquetFile.getAbsolutePath) - - val gts1: RDD[Genotype] = - sc.loadParquetGenotypes(genotypesParquetFile.getAbsolutePath) - .filter(_.getVariantCallingAnnotations.getVariantIsPassing) - assert(gts1.count === 1) - - FileUtils.deleteDirectory(genotypesParquetFile.getParentFile) - } - - sparkTest("Load all records and filter to only PASSing records") { - ParquetLogger.hadoopLoggerLevel(Level.SEVERE) - - val v0 = Variant.newBuilder - .setContig(Contig.newBuilder.setContigName("11").build) - .setStart(17409571) - .setReferenceAllele("T") - .setAlternateAllele("C") - .build - - val passFilterAnnotation = - VariantCallingAnnotations.newBuilder().setVariantIsPassing(true).build() - val failFilterAnnotation = - VariantCallingAnnotations.newBuilder().setVariantIsPassing(false).build() - - val genotypes = sc.parallelize(List( - Genotype.newBuilder().setVariant(v0) - .setSampleId("ignored") - .setVariantCallingAnnotations(passFilterAnnotation).build(), - Genotype.newBuilder() - .setSampleId("ignored") - .setVariant(v0) - .setVariantCallingAnnotations(failFilterAnnotation).build())) - - val genotypesParquetFile = new File(Files.createTempDir(), "genotypes.adam") - genotypes.adamParquetSave(genotypesParquetFile.getAbsolutePath) - - val gts: RDD[Genotype] = sc.loadParquetGenotypes(genotypesParquetFile.getAbsolutePath) - assert(gts.count === 2) - - val predicate = new GenotypeRecordPASSPredicate - val filtered = predicate(gts) - assert(filtered.count === 1) - - FileUtils.deleteDirectory(genotypesParquetFile.getParentFile) - } -} diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/predicates/RecordConditionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/predicates/RecordConditionSuite.scala deleted file mode 100644 index 692c3f4470..0000000000 --- a/adam-core/src/test/scala/org/bdgenomics/adam/predicates/RecordConditionSuite.scala +++ /dev/null @@ -1,213 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.predicates - -import org.scalatest.FunSuite -import org.bdgenomics.formats.avro._ -import org.bdgenomics.adam.projections.AlignmentRecordField - -class RecordConditionSuite extends FunSuite { - - test("create record condition from simple field condition") { - val mappedReadCondition = RecordCondition[AlignmentRecord]( - FieldCondition(AlignmentRecordField.readMapped.toString(), (x: Boolean) => x)) - - val mappedRead = AlignmentRecord.newBuilder - .setReadMapped(true) - .build - assert(mappedReadCondition(mappedRead)) - - val unmappedRead = AlignmentRecord.newBuilder - .setReadMapped(false) - .build - - assert(!mappedReadCondition(unmappedRead)) - - val underspecifiedRead = AlignmentRecord.newBuilder - .setMapq(30) - .build - - assert(!mappedReadCondition(underspecifiedRead)) - - } - - test("create record condition from nested field condition") { - val v0 = Variant.newBuilder - .setContig(Contig.newBuilder.setContigName("11").build) - .setStart(17409571) - .setReferenceAllele("T") - .setAlternateAllele("C") - .build - - val passFilterAnnotation = - VariantCallingAnnotations.newBuilder().setVariantIsPassing(true).build() - val failFilterAnnotation = - VariantCallingAnnotations.newBuilder().setVariantIsPassing(false).build() - - val passGenotype = - Genotype.newBuilder().setVariant(v0) - .setSampleId("ignored") - .setVariantCallingAnnotations(passFilterAnnotation) - .build - val failGenotype = Genotype.newBuilder() - .setSampleId("ignored") - .setVariant(v0) - .setVariantCallingAnnotations(failFilterAnnotation) - .build - - val isPassing = RecordCondition[Genotype](FieldCondition("variantCallingAnnotations.variantIsPassing", PredicateUtils(true))) - - assert(isPassing(passGenotype)) - assert(!isPassing(failGenotype)) - - } - - test("create record condition from multiple field conditions") { - val mappedReadCondition = RecordCondition[AlignmentRecord]( - FieldCondition(AlignmentRecordField.readMapped.toString(), (x: Boolean) => x), - FieldCondition(AlignmentRecordField.primaryAlignment.toString(), (x: Boolean) => x), - FieldCondition(AlignmentRecordField.failedVendorQualityChecks.toString(), (x: Boolean) => !x), - FieldCondition(AlignmentRecordField.duplicateRead.toString(), (x: Boolean) => !x)) - - val mappedRead = AlignmentRecord.newBuilder - .setReadMapped(true) - .setPrimaryAlignment(true) - .setFailedVendorQualityChecks(false) - .setDuplicateRead(false) - .build - - assert(mappedReadCondition(mappedRead)) - - val mappedDuplicateRead = AlignmentRecord.newBuilder - .setReadMapped(true) - .setPrimaryAlignment(true) - .setFailedVendorQualityChecks(false) - .setDuplicateRead(true) - .build - - assert(!mappedReadCondition(mappedDuplicateRead)) - - val mappedSecondaryAlignmentRead = AlignmentRecord.newBuilder - .setReadMapped(true) - .setPrimaryAlignment(false) - .setFailedVendorQualityChecks(false) - .setDuplicateRead(false) - .build - - assert(!mappedReadCondition(mappedSecondaryAlignmentRead)) - - val unmappedRead = AlignmentRecord.newBuilder - .setReadMapped(false) - .build - - assert(!mappedReadCondition(unmappedRead)) - } - - test("create record condition from non-equality field conditions") { - val highQualityReadCondition = RecordCondition[AlignmentRecord]( - FieldCondition(AlignmentRecordField.readMapped.toString(), PredicateUtils(true)), - FieldCondition(AlignmentRecordField.mapq.toString(), (x: Int) => x > 10)) - - val highQualityRead = AlignmentRecord.newBuilder - .setReadMapped(true) - .setMapq(30) - .build - - assert(highQualityReadCondition(highQualityRead)) - - val lowQualityRead = AlignmentRecord.newBuilder - .setReadMapped(true) - .setMapq(5) - .build - - assert(!highQualityReadCondition(lowQualityRead)) - } - - test("create record condition OR of record conditions") { - - val sample1Conditon = RecordCondition[AlignmentRecord]( - FieldCondition(AlignmentRecordField.recordGroupSample.toString(), (x: String) => x == "sample1")) - - val sample2Conditon = RecordCondition[AlignmentRecord]( - FieldCondition(AlignmentRecordField.recordGroupSample.toString(), (x: String) => x == "sample2")) - - val sample1ORsample2 = sample1Conditon || sample2Conditon - - val sample1Read = AlignmentRecord.newBuilder - .setRecordGroupSample("sample1") - .build - - val sample2Read = AlignmentRecord.newBuilder - .setRecordGroupSample("sample2") - .build - - val sample3Read = AlignmentRecord.newBuilder - .setRecordGroupSample("sample3") - .build - - assert(sample1ORsample2(sample1Read)) - assert(sample1ORsample2(sample2Read)) - assert(!sample1ORsample2(sample3Read)) - } - - test("high quality adam read condition") { - - val highQualityReadCondition = AlignmentRecordConditions.isHighQuality(10) - val highQualityRead = AlignmentRecord.newBuilder - .setReadMapped(true) - .setMapq(30) - .build - - assert(highQualityReadCondition(highQualityRead)) - - val lowQualityRead = AlignmentRecord.newBuilder - .setReadMapped(true) - .setMapq(5) - .build - - assert(!highQualityReadCondition(lowQualityRead)) - } - - test("passing genotype record condition") { - val v0 = Variant.newBuilder - .setContig(Contig.newBuilder.setContigName("11").build) - .setStart(17409571) - .setReferenceAllele("T") - .setAlternateAllele("C") - .build - - val passFilterAnnotation = - VariantCallingAnnotations.newBuilder().setVariantIsPassing(true).build() - val failFilterAnnotation = - VariantCallingAnnotations.newBuilder().setVariantIsPassing(false).build() - - val genotypes = Seq[Genotype]( - Genotype.newBuilder().setVariant(v0) - .setSampleId("ignored") - .setVariantCallingAnnotations(passFilterAnnotation).build(), - Genotype.newBuilder() - .setSampleId("ignored") - .setVariant(v0) - .setVariantCallingAnnotations(failFilterAnnotation).build()) - - val filtered = genotypes.filter(GenotypeConditions.isPassing.filter) - - assert(filtered.size == 1) - } - -}