Skip to content

Commit

Permalink
Merge pull request apache#30 from shivaram/string-tests
Browse files Browse the repository at this point in the history
Add tests for partitioning with string keys
  • Loading branch information
concretevitamin committed Feb 27, 2014
2 parents aacd726 + 21fa2d8 commit c5bce07
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 5 deletions.
2 changes: 2 additions & 0 deletions pkg/R/RDD.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
as.character(.sparkREnv[["libname"]]),
broadcastArr,
prev_jrdd$classTag())
# The RDD is serialized after we create a RRDD
rdd@env$serialized <- TRUE
rdd@env$jrdd_val <- rddRef$asJavaRDD()
rdd@env$jrdd_val
})
Expand Down
23 changes: 19 additions & 4 deletions pkg/inst/tests/test_shuffle.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@ numPairs <- list(list(1L, 100), list(2L, 200), list(4L, -1), list(3L, 1),
list(3L, 0))
numPairsRdd <- parallelize(sc, numPairs, length(numPairs))

strList <- list("Dexter Morgan: Blood. Sometimes it sets my teeth on edge, ",
"other times it helps me control the chaos.",
"Dexter Morgan: Harry and Dorris Morgan did a wonderful job ",
"raising me. But they're both dead now. I didn't kill them. Honest.")
strList <- list("Dexter Morgan: Blood. Sometimes it sets my teeth on edge and ",
"Dexter Morgan: Harry and Dorris Morgan did a wonderful job ")
strListRDD <- parallelize(sc, strList, 4)

test_that("groupByKey for integers", {
Expand Down Expand Up @@ -104,3 +102,20 @@ test_that("partitionBy works with dependencies", {
expect_equal(actual_first, expected_first)
expect_equal(actual_second, expected_second)
})

test_that("test partitionBy with string keys", {
words <- flatMap(strListRDD, function(line) { strsplit(line, " ")[[1]] })
wordCount <- lapply(words, function(word) { list(word, 1L) })

resultRDD <- partitionBy(wordCount, 2L)
expected_first <- list(list("Dexter", 1), list("Dexter", 1))
expected_second <- list(list("and", 1), list("and", 1))

actual_first <- Filter(function(item) { item[[1]] == "Dexter" },
collectPartition(resultRDD, 0L))
actual_second <- Filter(function(item) { item[[1]] == "and" },
collectPartition(resultRDD, 1L))

expect_equal(actual_first, expected_first)
expect_equal(actual_second, expected_second)
})
17 changes: 16 additions & 1 deletion pkg/inst/tests/test_textFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,22 @@ test_that("textFile() followed by a collect() returns the same content", {
unlink(fileName)
})

test_that("textFile() word count works as expected", {
fileName <- tempfile(pattern="spark-test", fileext=".tmp")
writeLines(mockFile, fileName)

rdd <- textFile(sc, fileName)

words <- flatMap(rdd, function(line) { strsplit(line, " ")[[1]] })
wordCount <- lapply(words, function(word) { list(word, 1L) })

counts <- reduceByKey(wordCount, "+", 2L)
output <- collect(counts)
expected <- list(list("pretty.", 1), list("is", 2), list("awesome.", 1),
list("Spark", 2))
expect_equal(output, expected)
})

test_that("several transformations on RDD created by textFile()", {
fileName <- tempfile(pattern="spark-test", fileext=".tmp")
writeLines(mockFile, fileName)
Expand All @@ -40,4 +56,3 @@ test_that("several transformations on RDD created by textFile()", {

unlink(fileName)
})

0 comments on commit c5bce07

Please sign in to comment.