Skip to content

Commit

Permalink
[SPARK-4672][Core]Checkpoint() should clear f to shorten the serializ…
Browse files Browse the repository at this point in the history
…ation chain

The related JIRA is https://issues.apache.org/jira/browse/SPARK-4672

The f closure of `PartitionsRDD(ZippedPartitionsRDD2)` contains a `$outer` that references EdgeRDD/VertexRDD, which causes task's serialization chain become very long in iterative GraphX applications. As a result, StackOverflow error will occur. If we set "f = null" in `clearDependencies()`, checkpoint() can cut off the long serialization chain. More details and explanation can be found in the JIRA.

Author: JerryLead <JerryLead@163.com>
Author: Lijie Xu <csxulijie@gmail.com>

Closes apache#3545 from JerryLead/my_core and squashes the following commits:

f7faea5 [JerryLead] checkpoint() should clear the f to avoid StackOverflow error
c0169da [JerryLead] Merge branch 'master' of https://github.com/apache/spark
52799e3 [Lijie Xu] Merge pull request #1 from apache/master
  • Loading branch information
JerryLead authored and ankurdave committed Dec 3, 2014
1 parent 17c162f commit 77be8b9
Showing 1 changed file with 6 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag](

private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag](
sc: SparkContext,
f: (Iterator[A], Iterator[B]) => Iterator[V],
var f: (Iterator[A], Iterator[B]) => Iterator[V],
var rdd1: RDD[A],
var rdd2: RDD[B],
preservesPartitioning: Boolean = false)
Expand All @@ -92,13 +92,14 @@ private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag]
super.clearDependencies()
rdd1 = null
rdd2 = null
f = null
}
}

private[spark] class ZippedPartitionsRDD3
[A: ClassTag, B: ClassTag, C: ClassTag, V: ClassTag](
sc: SparkContext,
f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V],
var f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V],
var rdd1: RDD[A],
var rdd2: RDD[B],
var rdd3: RDD[C],
Expand All @@ -117,13 +118,14 @@ private[spark] class ZippedPartitionsRDD3
rdd1 = null
rdd2 = null
rdd3 = null
f = null
}
}

private[spark] class ZippedPartitionsRDD4
[A: ClassTag, B: ClassTag, C: ClassTag, D:ClassTag, V: ClassTag](
sc: SparkContext,
f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
var f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
var rdd1: RDD[A],
var rdd2: RDD[B],
var rdd3: RDD[C],
Expand All @@ -145,5 +147,6 @@ private[spark] class ZippedPartitionsRDD4
rdd2 = null
rdd3 = null
rdd4 = null
f = null
}
}

0 comments on commit 77be8b9

Please sign in to comment.