From 555731d74a0ff5dc2ef38f8552df1e1891579a6f Mon Sep 17 00:00:00 2001 From: Alain Date: Mon, 20 Apr 2015 13:54:44 -0700 Subject: [PATCH] [PYSPARK] Fix doc of "fold"function in rdd.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the discussion in #5587, it’s necessary to point out the lambda function in “fold” needs to take the opposite order. --- python/pyspark/rdd.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index d0f2c62da33d5..7d200f7cc5f2a 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -820,6 +820,9 @@ def fold(self, zeroValue, op): as its result value to avoid object allocation; however, it should not modify C{t2}. + Note that the provided lambda function should take the opposite order, + which means C{t1} needs to be elements and C{t2} be the "zero value." + >>> from operator import add >>> sc.parallelize([1, 2, 3, 4, 5]).fold(0, add) 15 @@ -827,7 +830,7 @@ def fold(self, zeroValue, op): def func(iterator): acc = zeroValue for obj in iterator: - acc = op(acc, obj) + acc = op(obj, acc) yield acc vals = self.mapPartitions(func).collect() return reduce(op, vals, zeroValue)