Skip to content

Commit

Permalink
Merge pull request apache#189 from palantir/rk/upstream
Browse files Browse the repository at this point in the history
Merge from upstream
  • Loading branch information
robert3005 authored May 20, 2017
2 parents c458265 + cb40201 commit 7f39dff
Show file tree
Hide file tree
Showing 205 changed files with 2,907 additions and 996 deletions.
1 change: 1 addition & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ exportClasses("SparkDataFrame")
exportMethods("arrange",
"as.data.frame",
"attach",
"broadcast",
"cache",
"checkpoint",
"coalesce",
Expand Down
29 changes: 29 additions & 0 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -3769,3 +3769,32 @@ setMethod("alias",
sdf <- callJMethod(object@sdf, "alias", data)
dataFrame(sdf)
})

#' broadcast
#'
#' Return a new SparkDataFrame marked as small enough for use in broadcast joins.
#'
#' Equivalent to \code{hint(x, "broadcast")}.
#'
#' @param x a SparkDataFrame.
#' @return a SparkDataFrame.
#'
#' @aliases broadcast,SparkDataFrame-method
#' @family SparkDataFrame functions
#' @rdname broadcast
#' @name broadcast
#' @export
#' @examples
#' \dontrun{
#' df <- createDataFrame(mtcars)
#' avg_mpg <- mean(groupBy(createDataFrame(mtcars), "cyl"), "mpg")
#'
#' head(join(df, broadcast(avg_mpg), df$cyl == avg_mpg$cyl))
#' }
#' @note broadcast since 2.3.0
setMethod("broadcast",
signature(x = "SparkDataFrame"),
function(x) {
sdf <- callJStatic("org.apache.spark.sql.functions", "broadcast", x@sdf)
dataFrame(sdf)
})
4 changes: 2 additions & 2 deletions R/pkg/R/context.R
Original file line number Diff line number Diff line change
Expand Up @@ -258,15 +258,15 @@ includePackage <- function(sc, pkg) {
#'
#' # Large Matrix object that we want to broadcast
#' randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000))
#' randomMatBr <- broadcast(sc, randomMat)
#' randomMatBr <- broadcastRDD(sc, randomMat)
#'
#' # Use the broadcast variable inside the function
#' useBroadcast <- function(x) {
#' sum(value(randomMatBr) * x)
#' }
#' sumRDD <- lapply(rdd, useBroadcast)
#'}
broadcast <- function(sc, object) {
broadcastRDD <- function(sc, object) {
objName <- as.character(substitute(object))
serializedObj <- serialize(object, connection = NULL)

Expand Down
Loading

0 comments on commit 7f39dff

Please sign in to comment.