Skip to content

Commit

Permalink
[SPARK-4578] fix asDict() with nested Row()
Browse files Browse the repository at this point in the history
The Row object is created on the fly once the field is accessed, so we should access them by getattr() in asDict(0

Author: Davies Liu <davies@databricks.com>

Closes #3434 from davies/fix_asDict and squashes the following commits:

b20f1e7 [Davies Liu] fix asDict() with nested Row()
  • Loading branch information
Davies Liu authored and pwendell committed Nov 25, 2014
1 parent b660de7 commit 050616b
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
2 changes: 1 addition & 1 deletion python/pyspark/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1178,7 +1178,7 @@ class Row(tuple):

def asDict(self):
""" Return as a dict """
return dict(zip(self.__FIELDS__, self))
return dict((n, getattr(self, n)) for n in self.__FIELDS__)

def __repr__(self):
# call collect __repr__ for nested objects
Expand Down
7 changes: 4 additions & 3 deletions python/pyspark/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ def setUpClass(cls):
@classmethod
def tearDownClass(cls):
ReusedPySparkTestCase.tearDownClass()
shutil.rmtree(cls.tempdir.name)
shutil.rmtree(cls.tempdir.name, ignore_errors=True)

def setUp(self):
self.sqlCtx = SQLContext(self.sc)
Expand Down Expand Up @@ -930,8 +930,9 @@ def test_convert_row_to_dict(self):
rdd = self.sc.parallelize([row])
srdd = self.sqlCtx.inferSchema(rdd)
srdd.registerTempTable("test")
row = self.sqlCtx.sql("select l[0].a AS la from test").first()
self.assertEqual(1, row.asDict()["la"])
row = self.sqlCtx.sql("select l, d from test").first()
self.assertEqual(1, row.asDict()["l"][0].a)
self.assertEqual(1.0, row.asDict()['d']['key'].c)

def test_infer_schema_with_udt(self):
from pyspark.tests import ExamplePoint, ExamplePointUDT
Expand Down

0 comments on commit 050616b

Please sign in to comment.