Skip to content

Commit

Permalink
SPARK-1019: pyspark RDD take() throws an NPE
Browse files Browse the repository at this point in the history
  • Loading branch information
pwendell committed Mar 10, 2014
1 parent b9be160 commit daae80e
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
3 changes: 2 additions & 1 deletion core/src/main/scala/org/apache/spark/TaskContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class TaskContext(
}

def executeOnCompleteCallbacks() {
onCompleteCallbacks.foreach{_()}
// Process complete callbacks in the reverse order of registration
onCompleteCallbacks.reverse.foreach{_()}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ private[spark] class PythonRDD[T: ClassTag](
}
}.start()

/*
* Partial fix for SPARK-1019: Attempts to stop reading the input stream since
* other completion callbacks might invalidate the input. Because interruption
* is not synchronous this still leaves a potential race where the interruption is
* processed only after the stream becomes invalid.
*/
context.addOnCompleteCallback(() => context.interrupted = true)

// Return an iterator that read lines from the process's stdout
val stream = new DataInputStream(new BufferedInputStream(worker.getInputStream, bufferSize))
val stdoutIterator = new Iterator[Array[Byte]] {
Expand Down

0 comments on commit daae80e

Please sign in to comment.