forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request apache#168 from sun-rui/SPARKR-153_2
[SPARKR-153] phase 2: implement aggregateByKey() and foldByKey().
- Loading branch information
Showing
5 changed files
with
243 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
% Generated by roxygen2 (4.0.2): do not edit by hand | ||
\docType{methods} | ||
\name{aggregateByKey} | ||
\alias{aggregateByKey} | ||
\alias{aggregateByKey,RDD,ANY,ANY,ANY,integer-method} | ||
\title{Aggregate a pair RDD by each key.} | ||
\usage{ | ||
aggregateByKey(rdd, zeroValue, seqOp, combOp, numPartitions) | ||
|
||
\S4method{aggregateByKey}{RDD,ANY,ANY,ANY,integer}(rdd, zeroValue, seqOp, | ||
combOp, numPartitions) | ||
} | ||
\arguments{ | ||
\item{rdd}{An RDD.} | ||
|
||
\item{zeroValue}{A neutral "zero value".} | ||
|
||
\item{seqOp}{A function to aggregate the values of each key. It may return | ||
a different result type from the type of the values.} | ||
|
||
\item{combOp}{A function to aggregate results of seqOp.} | ||
} | ||
\value{ | ||
An RDD containing the aggregation result. | ||
} | ||
\description{ | ||
Aggregate the values of each key in an RDD, using given combine functions | ||
and a neutral "zero value". This function can return a different result type, | ||
U, than the type of the values in this RDD, V. Thus, we need one operation | ||
for merging a V into a U and one operation for merging two U's, The former | ||
operation is used for merging values within a partition, and the latter is | ||
used for merging values between partitions. To avoid memory allocation, both | ||
of these functions are allowed to modify and return their first argument | ||
instead of creating a new U. | ||
} | ||
\examples{ | ||
\dontrun{ | ||
sc <- sparkR.init() | ||
rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4))) | ||
zeroValue <- list(0, 0) | ||
seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) } | ||
combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) } | ||
aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L) | ||
# list(list(1, list(3, 2)), list(2, list(7, 2))) | ||
} | ||
} | ||
\seealso{ | ||
foldByKey, combineByKey | ||
} | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
% Generated by roxygen2 (4.0.2): do not edit by hand | ||
\docType{methods} | ||
\name{foldByKey} | ||
\alias{foldByKey} | ||
\alias{foldByKey,RDD,ANY,ANY,integer-method} | ||
\title{Fold a pair RDD by each key.} | ||
\usage{ | ||
foldByKey(rdd, zeroValue, func, numPartitions) | ||
|
||
\S4method{foldByKey}{RDD,ANY,ANY,integer}(rdd, zeroValue, func, numPartitions) | ||
} | ||
\arguments{ | ||
\item{rdd}{An RDD.} | ||
|
||
\item{zeroValue}{A neutral "zero value".} | ||
|
||
\item{func}{An associative function for folding values of each key.} | ||
} | ||
\value{ | ||
An RDD containing the aggregation result. | ||
} | ||
\description{ | ||
Aggregate the values of each key in an RDD, using an associative function "func" | ||
and a neutral "zero value" which may be added to the result an arbitrary | ||
number of times, and must not change the result (e.g., 0 for addition, or | ||
1 for multiplication.). | ||
} | ||
\examples{ | ||
\dontrun{ | ||
sc <- sparkR.init() | ||
rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4))) | ||
foldByKey(rdd, 0, "+", 2L) # list(list(1, 3), list(2, 7)) | ||
} | ||
} | ||
\seealso{ | ||
aggregateByKey, combineByKey | ||
} | ||
|