diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index 89f3fd47724fe..0d6751f3fa6d2 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -57,7 +57,6 @@ object PythonRunner { val builder = new ProcessBuilder(Seq(pythonExec, "-u", formattedPythonFile) ++ otherArgs) val env = builder.environment() env.put("PYTHONPATH", pythonPath) - env.put("PYSPARK_PYTHON", pythonExec) env.put("PYSPARK_GATEWAY_PORT", "" + gatewayServer.getListeningPort) builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize val process = builder.start() diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 4e1ad472a6d2c..099cc1db0ba52 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -86,11 +86,12 @@ def run(self): java_import(gateway.jvm, "org.apache.spark.api.python.*") java_import(gateway.jvm, "org.apache.spark.streaming.api.java.*") java_import(gateway.jvm, "org.apache.spark.streaming.api.python.*") - java_import(gateway.jvm, "org.apache.spark.streaming.*") # for Duration and Time + java_import(gateway.jvm, "org.apache.spark.streaming.*") java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*") java_import(gateway.jvm, "org.apache.spark.sql.SQLContext") java_import(gateway.jvm, "org.apache.spark.sql.hive.HiveContext") java_import(gateway.jvm, "org.apache.spark.sql.hive.LocalHiveContext") java_import(gateway.jvm, "org.apache.spark.sql.hive.TestHiveContext") java_import(gateway.jvm, "scala.Tuple2") + return gateway diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py index b1d1b0d8dc165..696de900b0a69 100644 --- a/python/pyspark/streaming/dstream.py +++ b/python/pyspark/streaming/dstream.py @@ -412,7 +412,7 @@ def get_output(rdd, time): # TODO: implement countByWindow # TODO: implement reduceByWindow -# transform Operation +# Transform Operation # TODO: implement transform # TODO: implement transformWith # Following operation has dependency with transform @@ -421,7 +421,7 @@ def get_output(rdd, time): # TODO: implement cogroup # TODO: implement join # TODO: implement leftOuterJoin -# TODO: implemtnt rightOuterJoin +# TODO: implement rightOuterJoin class PipelinedDStream(DStream):