Skip to content

Commit

Permalink
Address review comments with minor cleanups.
Browse files Browse the repository at this point in the history
  • Loading branch information
pythonspeed committed Jan 26, 2024
1 parent e02eeae commit 2df6625
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 14 deletions.
28 changes: 18 additions & 10 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2282,7 +2282,16 @@ def to_pandas(
return self._to_pandas_without_object_columns(
self, use_pyarrow_extension_array, **kwargs
)
else:
return self._to_pandas_with_object_columns(
use_pyarrow_extension_array, **kwargs
)

def _to_pandas_with_object_columns(
self,
use_pyarrow_extension_array: bool = False, # noqa: FBT001
**kwargs: Any,
) -> pd.DataFrame:
# Find which columns are of type pl.Object, and which aren't:
object_columns = []
not_object_columns = []
Expand All @@ -2293,12 +2302,13 @@ def to_pandas(
not_object_columns.append(i)

# Export columns that aren't pl.Object, in the same order:
df_without_objects = self.select(
[col(self.columns[i]) for i in not_object_columns]
)
pandas_df = self._to_pandas_without_object_columns(
df_without_objects, use_pyarrow_extension_array, **kwargs
)
if not_object_columns:
df_without_objects = self[:, not_object_columns]
pandas_df = self._to_pandas_without_object_columns(
df_without_objects, use_pyarrow_extension_array, **kwargs
)
else:
pandas_df = pd.DataFrame()

# Add columns that are pl.Object, using Series' custom to_pandas()
# logic for this case. We do this in order, so the original index for
Expand All @@ -2307,7 +2317,7 @@ def to_pandas(
# missing columns to the inserted column's left.
for i in object_columns:
name = self.columns[i]
pandas_df.insert(i, name, self.get_column(name).to_pandas())
pandas_df.insert(i, name, self.to_series(i).to_pandas())

return pandas_df

Expand Down Expand Up @@ -3674,9 +3684,7 @@ def unpack_table_name(name: str) -> tuple[str | None, str | None, str]:

# ensure conversion to pandas uses the pyarrow extension array option
# so that we can make use of the sql/db export *without* copying data
res: int | None = self.to_pandas(
use_pyarrow_extension_array=True,
).to_sql(
res: int | None = self.to_pandas(use_pyarrow_extension_array=True,).to_sql(
name=unpacked_table_name,
schema=db_schema,
con=engine_sa,
Expand Down
8 changes: 4 additions & 4 deletions py-polars/tests/unit/interop/test_to_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,9 @@ def test_to_pandas_datetime() -> None:
@pytest.mark.parametrize("use_pyarrow_extension_array", [True, False])
def test_object_to_pandas_series(use_pyarrow_extension_array: bool) -> None:
values = [object(), [1, 2, 3]]
assert (
pd.testing.assert_series_equal(
pl.Series("a", values, dtype=pl.Object).to_pandas(
use_pyarrow_extension_array=use_pyarrow_extension_array
)
== pd.Series(values, dtype=object)
).all()
),
pd.Series(values, dtype=object, name="a"),
)

0 comments on commit 2df6625

Please sign in to comment.