Skip to content
This repository has been archived by the owner on Nov 30, 2019. It is now read-only.

Commit

Permalink
SPARK-5361, add in test case
Browse files Browse the repository at this point in the history
  • Loading branch information
Winston Chen committed Jan 22, 2015
1 parent 9f1a097 commit 4cf1187
Showing 1 changed file with 27 additions and 7 deletions.
34 changes: 27 additions & 7 deletions python/pyspark/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ def test_compressed_serializer(self):
self.assertEqual(["abc", u"123", range(5)] + range(1000), list(ser.load_stream(io)))



class PySparkTestCase(unittest.TestCase):

def setUp(self):
Expand Down Expand Up @@ -714,6 +715,25 @@ def test_sample(self):
wr_s21 = rdd.sample(True, 0.4, 21).collect()
self.assertNotEqual(set(wr_s11), set(wr_s21))

def test_multiple_python_java_RDD_conversions(self):
# Regression test for SPARK-5361
data = [
(u'1', {u'director': u'David Lean'}),
(u'2', {u'director': u'Andrew Dominik'})
]
from pyspark.rdd import RDD
data_rdd = self.sc.parallelize(data)
data_java_rdd = data_rdd._to_java_object_rdd()
data_python_rdd = self.sc._jvm.SerDe.javaToPython(data_java_rdd)
converted_rdd = RDD(data_python_rdd, self.sc)
self.assertEqual(2, converted_rdd.count())

# conversion between python and java RDD threw exceptions
data_java_rdd = converted_rdd._to_java_object_rdd()
data_python_rdd = self.sc._jvm.SerDe.javaToPython(data_java_rdd)
converted_rdd = RDD(data_python_rdd, self.sc)
self.assertEqual(2, converted_rdd.count())


class ProfilerTests(PySparkTestCase):

Expand Down Expand Up @@ -1105,19 +1125,19 @@ def test_oldhadoop(self):

def test_newhadoop(self):
basepath = self.tempdir.name
ints = sorted(self.sc.newAPIHadoopFile(
basepath + "/sftestdata/sfint/",
"org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
"org.apache.hadoop.io.IntWritable",
ints = sorted(self.sc.newAPIHadoopFile(\
basepath + "/sftestdata/sfint/",\
"org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",\
"org.apache.hadoop.io.IntWritable",\
"org.apache.hadoop.io.Text").collect())
ei = [(1, u'aa'), (1, u'aa'), (2, u'aa'), (2, u'bb'), (2, u'bb'), (3, u'cc')]
self.assertEqual(ints, ei)

hellopath = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
newconf = {"mapred.input.dir": hellopath}
hello = self.sc.newAPIHadoopRDD("org.apache.hadoop.mapreduce.lib.input.TextInputFormat",
"org.apache.hadoop.io.LongWritable",
"org.apache.hadoop.io.Text",
hello = self.sc.newAPIHadoopRDD("org.apache.hadoop.mapreduce.lib.input.TextInputFormat",\
"org.apache.hadoop.io.LongWritable",\
"org.apache.hadoop.io.Text",\
conf=newconf).collect()
result = [(0, u'Hello World!')]
self.assertEqual(hello, result)
Expand Down

0 comments on commit 4cf1187

Please sign in to comment.