Skip to content

Commit

Permalink
REFACTOR: Update importance API test cases to reflect changes (#237)
Browse files Browse the repository at this point in the history
  • Loading branch information
NickEdwards7502 committed Sep 11, 2024
1 parent e08f12a commit ddc5912
Showing 1 changed file with 7 additions and 20 deletions.
27 changes: 7 additions & 20 deletions src/test/scala/au/csiro/variantspark/api/ImportanceApiTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,20 @@ import au.csiro.variantspark.test.SparkTest
import org.junit.Test
import org.junit.Assert._
import au.csiro.variantspark.api._
import au.csiro.variantspark.algo.RandomForestParams
import org.apache.spark.sql.{SparkSession, SQLContext}
import org.apache.spark.SparkConf

class ImportanceApiTest extends SparkTest {
@Test
def testImportanceAnalysisLegacyApi() {
implicit val vsContext = VSContext(spark)
val fs = vsContext.featureSource("data/chr22_1000.vcf")
assertEquals(1092, fs.sampleNames.size)
val ls = vsContext.loadLabel("data/chr22-labels.csv", "22_16050678")
assertEquals(1092, ls.getLabels(fs.sampleNames).length)
val importanceAnalysis =
ImportanceAnalysis(fs, ls, nTrees = 200, batchSize = 50, seed = Some(17L))
val importanceDF = importanceAnalysis.variableImportance
import importanceDF.sqlContext._
importanceDF.cache()
val top10Variables = importanceDF.orderBy(desc("importance")).limit(10).collect()
top10Variables.foreach(println _)
assertEquals(10, top10Variables.size)
assertEquals("22_16050678_C_T", top10Variables.head.getString(0))
}

@Test
def testImportanceAnalysisNewApi() {
implicit val vsContext = VSContext(spark)
implicit val sqlContext = spark.sqlContext
val features = vsContext.importVCF("data/chr22_1000.vcf")
val label = vsContext.loadLabel("data/chr22-labels.csv", "22_16050678")
val impAnalysis =
features.importanceAnalysis(label, nTrees = 200, batchSize = 50, seed = Some(17L))
val params = RandomForestParams(seed = 17L)
val rfModel = RFModelTrainer.trainModel(features, label, params, 200, 50)
val impAnalysis = new ImportanceAnalysis(sqlContext, features, rfModel)
val top10Variables = impAnalysis.importantVariables(10)
assertEquals(10, top10Variables.size)
assertEquals("22_16050678_C_T", top10Variables.head._1)
Expand Down

0 comments on commit ddc5912

Please sign in to comment.