From 555731d74a0ff5dc2ef38f8552df1e1891579a6f Mon Sep 17 00:00:00 2001
From: Alain <aihe@usc.edu>
Date: Mon, 20 Apr 2015 13:54:44 -0700
Subject: [PATCH] [PYSPARK] Fix doc of "fold"function in rdd.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to the discussion in #5587, it’s necessary to point out the
lambda function in “fold” needs to take the opposite order.
---
 python/pyspark/rdd.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index d0f2c62da33d5..7d200f7cc5f2a 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -820,6 +820,9 @@ def fold(self, zeroValue, op):
         as its result value to avoid object allocation; however, it should not
         modify C{t2}.
 
+        Note that the provided lambda function should take the opposite order,
+        which means C{t1} needs to be elements and C{t2} be the "zero value."
+
         >>> from operator import add
         >>> sc.parallelize([1, 2, 3, 4, 5]).fold(0, add)
         15
@@ -827,7 +830,7 @@ def fold(self, zeroValue, op):
         def func(iterator):
             acc = zeroValue
             for obj in iterator:
-                acc = op(acc, obj)
+                acc = op(obj, acc)
             yield acc
         vals = self.mapPartitions(func).collect()
         return reduce(op, vals, zeroValue)