Skip to content

Commit

Permalink
[SPARK-20585][SPARKR] R generic hint support
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

Adds support for generic hints on `SparkDataFrame`

## How was this patch tested?

Unit tests, `check-cran.sh`

Author: zero323 <zero323@users.noreply.github.com>

Closes #17851 from zero323/SPARK-20585.
  • Loading branch information
zero323 authored and Felix Cheung committed May 4, 2017
1 parent b8302cc commit 9c36aa2
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 0 deletions.
1 change: 1 addition & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ exportMethods("arrange",
"group_by",
"groupBy",
"head",
"hint",
"insertInto",
"intersect",
"isLocal",
Expand Down
30 changes: 30 additions & 0 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -3715,3 +3715,33 @@ setMethod("rollup",
sgd <- callJMethod(x@sdf, "rollup", jcol)
groupedData(sgd)
})

#' hint
#'
#' Specifies execution plan hint and return a new SparkDataFrame.
#'
#' @param x a SparkDataFrame.
#' @param name a name of the hint.
#' @param ... optional parameters for the hint.
#' @return A SparkDataFrame.
#' @family SparkDataFrame functions
#' @aliases hint,SparkDataFrame,character-method
#' @rdname hint
#' @name hint
#' @export
#' @examples
#' \dontrun{
#' df <- createDataFrame(mtcars)
#' avg_mpg <- mean(groupBy(createDataFrame(mtcars), "cyl"), "mpg")
#'
#' head(join(df, hint(avg_mpg, "broadcast"), df$cyl == avg_mpg$cyl))
#' }
#' @note hint since 2.2.0
setMethod("hint",
signature(x = "SparkDataFrame", name = "character"),
function(x, name, ...) {
parameters <- list(...)
stopifnot(all(sapply(parameters, is.character)))
jdf <- callJMethod(x@sdf, "hint", name, parameters)
dataFrame(jdf)
})
4 changes: 4 additions & 0 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,10 @@ setGeneric("group_by", function(x, ...) { standardGeneric("group_by") })
#' @export
setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })

#' @rdname hint
#' @export
setGeneric("hint", function(x, name, ...) { standardGeneric("hint") })

#' @rdname insertInto
#' @export
setGeneric("insertInto", function(x, tableName, ...) { standardGeneric("insertInto") })
Expand Down
12 changes: 12 additions & 0 deletions R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -2182,6 +2182,18 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {

unlink(jsonPath2)
unlink(jsonPath3)

# Join with broadcast hint
df1 <- sql("SELECT * FROM range(10e10)")
df2 <- sql("SELECT * FROM range(10e10)")

execution_plan <- capture.output(explain(join(df1, df2, df1$id == df2$id)))
expect_false(any(grepl("BroadcastHashJoin", execution_plan)))

execution_plan_hint <- capture.output(
explain(join(df1, hint(df2, "broadcast"), df1$id == df2$id))
)
expect_true(any(grepl("BroadcastHashJoin", execution_plan_hint)))
})

test_that("toJSON() on DataFrame", {
Expand Down

0 comments on commit 9c36aa2

Please sign in to comment.