Skip to content

Commit

Permalink
changed conversion to use Iterator[InternalRow] instead of Array
Browse files Browse the repository at this point in the history
arrow conversion done at partition by executors

some cleanup of APIs, made tests complete for non-complex data types

closes apache#23
  • Loading branch information
BryanCutler committed Feb 23, 2017
1 parent f44e6d7 commit e0bf11b
Show file tree
Hide file tree
Showing 6 changed files with 326 additions and 186 deletions.
15 changes: 10 additions & 5 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,11 +393,11 @@ def collect(self):
@ignore_unicode_prefix
@since(2.0)
def collectAsArrow(self):
"""Returns all the records as an ArrowRecordBatch
"""Returns all records as list of deserialized ArrowPayloads
"""
with SCCallSiteSync(self._sc) as css:
port = self._jdf.collectAsArrowToPython()
return list(_load_from_socket(port, ArrowSerializer()))[0]
return list(_load_from_socket(port, ArrowSerializer()))

@ignore_unicode_prefix
@since(2.0)
Expand Down Expand Up @@ -1611,6 +1611,9 @@ def toPandas(self, useArrow=False):
This is only available if Pandas is installed and available.
:param useArrow: Make use of Apache Arrow for conversion, pyarrow must be installed
on the calling Python process.
.. note:: This method should only be used if the resulting Pandas's DataFrame is expected
to be small, as all the data is loaded into the driver's memory.
Expand All @@ -1619,11 +1622,13 @@ def toPandas(self, useArrow=False):
0 2 Alice
1 5 Bob
"""
import pandas as pd

if useArrow:
return self.collectAsArrow().to_pandas()
from pyarrow.table import concat_tables
tables = self.collectAsArrow()
table = concat_tables(tables)
return table.to_pandas()
else:
import pandas as pd
return pd.DataFrame.from_records(self.collect(), columns=self.columns)

##########################################################################################
Expand Down
Loading

0 comments on commit e0bf11b

Please sign in to comment.