diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 47d92f141cc7d..0414cbdfee14b 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) { #' @rdname DataFrame #' @export +#' +#' @param sdf A Java object reference to the backing Scala DataFrame +#' @param isCached TRUE if the dataFrame is cached dataFrame <- function(sdf, isCached = FALSE) { new("DataFrame", sdf, isCached) } @@ -244,7 +247,7 @@ setMethod("columns", }) #' @rdname columns -#' @export +#' @aliases names,DataFrame,function-method setMethod("names", signature(x = "DataFrame"), function(x) { @@ -399,23 +402,23 @@ setMethod("repartition", dataFrame(sdf) }) -#' toJSON -#' -#' Convert the rows of a DataFrame into JSON objects and return an RDD where -#' each element contains a JSON string. -#' -#' @param x A SparkSQL DataFrame -#' @return A StringRRDD of JSON objects -#' @rdname tojson -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) -#' newRDD <- toJSON(df) -#'} +# toJSON +# +# Convert the rows of a DataFrame into JSON objects and return an RDD where +# each element contains a JSON string. +# +#@param x A SparkSQL DataFrame +# @return A StringRRDD of JSON objects +# @rdname tojson +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# sqlCtx <- sparkRSQL.init(sc) +# path <- "path/to/file.json" +# df <- jsonFile(sqlCtx, path) +# newRDD <- toJSON(df) +#} setMethod("toJSON", signature(x = "DataFrame"), function(x) { @@ -578,8 +581,8 @@ setMethod("limit", dataFrame(res) }) -# Take the first NUM rows of a DataFrame and return a the results as a data.frame - +#' Take the first NUM rows of a DataFrame and return a the results as a data.frame +#' #' @rdname take #' @export #' @examples @@ -644,22 +647,22 @@ setMethod("first", take(x, 1) }) -#' toRDD() -#' -#' Converts a Spark DataFrame to an RDD while preserving column names. -#' -#' @param x A Spark DataFrame -#' -#' @rdname DataFrame -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) -#' rdd <- toRDD(df) -#' } +# toRDD() +# +# Converts a Spark DataFrame to an RDD while preserving column names. +# +# @param x A Spark DataFrame +# +# @rdname DataFrame +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# sqlCtx <- sparkRSQL.init(sc) +# path <- "path/to/file.json" +# df <- jsonFile(sqlCtx, path) +# rdd <- toRDD(df) +# } setMethod("toRDD", signature(x = "DataFrame"), function(x) { @@ -706,6 +709,7 @@ setMethod("groupBy", #' #' Compute aggregates by specifying a list of columns #' +#' @param x a DataFrame #' @rdname DataFrame #' @export setMethod("agg", @@ -721,7 +725,7 @@ setMethod("agg", # the requested map function. # ################################################################################### -#' @rdname lapply +# @rdname lapply setMethod("lapply", signature(X = "DataFrame", FUN = "function"), function(X, FUN) { @@ -729,14 +733,14 @@ setMethod("lapply", lapply(rdd, FUN) }) -#' @rdname lapply +# @rdname lapply setMethod("map", signature(X = "DataFrame", FUN = "function"), function(X, FUN) { lapply(X, FUN) }) -#' @rdname flatMap +# @rdname flatMap setMethod("flatMap", signature(X = "DataFrame", FUN = "function"), function(X, FUN) { @@ -744,7 +748,7 @@ setMethod("flatMap", flatMap(rdd, FUN) }) -#' @rdname lapplyPartition +# @rdname lapplyPartition setMethod("lapplyPartition", signature(X = "DataFrame", FUN = "function"), function(X, FUN) { @@ -752,14 +756,14 @@ setMethod("lapplyPartition", lapplyPartition(rdd, FUN) }) -#' @rdname lapplyPartition +# @rdname lapplyPartition setMethod("mapPartitions", signature(X = "DataFrame", FUN = "function"), function(X, FUN) { lapplyPartition(X, FUN) }) -#' @rdname foreach +# @rdname foreach setMethod("foreach", signature(x = "DataFrame", func = "function"), function(x, func) { @@ -767,7 +771,7 @@ setMethod("foreach", foreach(rdd, func) }) -#' @rdname foreach +# @rdname foreach setMethod("foreachPartition", signature(x = "DataFrame", func = "function"), function(x, func) { @@ -1009,7 +1013,7 @@ setMethod("sortDF", }) #' @rdname sortDF -#' @export +#' @aliases orderBy,DataFrame,function-method setMethod("orderBy", signature(x = "DataFrame", col = "characterOrColumn"), function(x, col) { @@ -1046,7 +1050,7 @@ setMethod("filter", }) #' @rdname filter -#' @export +#' @aliases where,DataFrame,function-method setMethod("where", signature(x = "DataFrame", condition = "characterOrColumn"), function(x, condition) { diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 4f05ba524a01a..cae06e6af2bff 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -150,21 +150,21 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) { dataFrame(sdf) } -#' toDF -#' -#' Converts an RDD to a DataFrame by infer the types. -#' -#' @param x An RDD -#' -#' @rdname DataFrame -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x))) -#' df <- toDF(rdd) -#' } +# toDF +# +# Converts an RDD to a DataFrame by infer the types. +# +# @param x An RDD +# +# @rdname DataFrame +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# sqlCtx <- sparkRSQL.init(sc) +# rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x))) +# df <- toDF(rdd) +# } setGeneric("toDF", function(x, ...) { standardGeneric("toDF") }) @@ -207,23 +207,23 @@ jsonFile <- function(sqlCtx, path) { } -#' JSON RDD -#' -#' Loads an RDD storing one JSON object per string as a DataFrame. -#' -#' @param sqlCtx SQLContext to use -#' @param rdd An RDD of JSON string -#' @param schema A StructType object to use as schema -#' @param samplingRatio The ratio of simpling used to infer the schema -#' @return A DataFrame -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' rdd <- texFile(sc, "path/to/json") -#' df <- jsonRDD(sqlCtx, rdd) -#' } +# JSON RDD +# +# Loads an RDD storing one JSON object per string as a DataFrame. +# +# @param sqlCtx SQLContext to use +# @param rdd An RDD of JSON string +# @param schema A StructType object to use as schema +# @param samplingRatio The ratio of simpling used to infer the schema +# @return A DataFrame +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# sqlCtx <- sparkRSQL.init(sc) +# rdd <- texFile(sc, "path/to/json") +# df <- jsonRDD(sqlCtx, rdd) +# } # TODO: support schema jsonRDD <- function(sqlCtx, rdd, schema = NULL, samplingRatio = 1.0) {