Skip to content

Commit

Permalink
[SPARK-1766] sorted functions to meet pedantic requirements
Browse files Browse the repository at this point in the history
Pedantry is underrated

Author: Chris Cope <ccope@resilientscience.com>

Closes #1859 from copester/master and squashes the following commits:

0fb4499 [Chris Cope] [SPARK-1766] sorted functions to meet pedantic requirements
  • Loading branch information
Chris Cope authored and pwendell committed Aug 10, 2014
1 parent b431e67 commit e45daf2
Showing 1 changed file with 19 additions and 19 deletions.
38 changes: 19 additions & 19 deletions core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,25 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
combineByKey[V]((v: V) => v, func, func, partitioner)
}

/**
* Merge the values for each key using an associative reduce function. This will also perform
* the merging locally on each mapper before sending results to a reducer, similarly to a
* "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
*/
def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = {
reduceByKey(new HashPartitioner(numPartitions), func)
}

/**
* Merge the values for each key using an associative reduce function. This will also perform
* the merging locally on each mapper before sending results to a reducer, similarly to a
* "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
* parallelism level.
*/
def reduceByKey(func: (V, V) => V): RDD[(K, V)] = {
reduceByKey(defaultPartitioner(self), func)
}

/**
* Merge the values for each key using an associative reduce function, but return the results
* immediately to the master as a Map. This will also perform the merging locally on each mapper
Expand Down Expand Up @@ -374,15 +393,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
countApproxDistinctByKey(relativeSD, defaultPartitioner(self))
}

/**
* Merge the values for each key using an associative reduce function. This will also perform
* the merging locally on each mapper before sending results to a reducer, similarly to a
* "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
*/
def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = {
reduceByKey(new HashPartitioner(numPartitions), func)
}

/**
* Group the values for each key in the RDD into a single sequence. Allows controlling the
* partitioning of the resulting key-value pair RDD by passing a Partitioner.
Expand Down Expand Up @@ -482,16 +492,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
combineByKey(createCombiner, mergeValue, mergeCombiners, defaultPartitioner(self))
}

/**
* Merge the values for each key using an associative reduce function. This will also perform
* the merging locally on each mapper before sending results to a reducer, similarly to a
* "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
* parallelism level.
*/
def reduceByKey(func: (V, V) => V): RDD[(K, V)] = {
reduceByKey(defaultPartitioner(self), func)
}

/**
* Group the values for each key in the RDD into a single sequence. Hash-partitions the
* resulting RDD with the existing partitioner/parallelism level.
Expand Down

0 comments on commit e45daf2

Please sign in to comment.