From 2df6625e3bbfe4b4c02d1970bb1aec825e4f2646 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 26 Jan 2024 08:23:33 -0500 Subject: [PATCH] Address review comments with minor cleanups. --- py-polars/polars/dataframe/frame.py | 28 ++++++++++++------- .../tests/unit/interop/test_to_pandas.py | 8 +++--- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index d3c4d921315c..9f3c6639acd2 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -2282,7 +2282,16 @@ def to_pandas( return self._to_pandas_without_object_columns( self, use_pyarrow_extension_array, **kwargs ) + else: + return self._to_pandas_with_object_columns( + use_pyarrow_extension_array, **kwargs + ) + def _to_pandas_with_object_columns( + self, + use_pyarrow_extension_array: bool = False, # noqa: FBT001 + **kwargs: Any, + ) -> pd.DataFrame: # Find which columns are of type pl.Object, and which aren't: object_columns = [] not_object_columns = [] @@ -2293,12 +2302,13 @@ def to_pandas( not_object_columns.append(i) # Export columns that aren't pl.Object, in the same order: - df_without_objects = self.select( - [col(self.columns[i]) for i in not_object_columns] - ) - pandas_df = self._to_pandas_without_object_columns( - df_without_objects, use_pyarrow_extension_array, **kwargs - ) + if not_object_columns: + df_without_objects = self[:, not_object_columns] + pandas_df = self._to_pandas_without_object_columns( + df_without_objects, use_pyarrow_extension_array, **kwargs + ) + else: + pandas_df = pd.DataFrame() # Add columns that are pl.Object, using Series' custom to_pandas() # logic for this case. We do this in order, so the original index for @@ -2307,7 +2317,7 @@ def to_pandas( # missing columns to the inserted column's left. for i in object_columns: name = self.columns[i] - pandas_df.insert(i, name, self.get_column(name).to_pandas()) + pandas_df.insert(i, name, self.to_series(i).to_pandas()) return pandas_df @@ -3674,9 +3684,7 @@ def unpack_table_name(name: str) -> tuple[str | None, str | None, str]: # ensure conversion to pandas uses the pyarrow extension array option # so that we can make use of the sql/db export *without* copying data - res: int | None = self.to_pandas( - use_pyarrow_extension_array=True, - ).to_sql( + res: int | None = self.to_pandas(use_pyarrow_extension_array=True,).to_sql( name=unpacked_table_name, schema=db_schema, con=engine_sa, diff --git a/py-polars/tests/unit/interop/test_to_pandas.py b/py-polars/tests/unit/interop/test_to_pandas.py index bc9f5dc5c661..2d15c16dcd7f 100644 --- a/py-polars/tests/unit/interop/test_to_pandas.py +++ b/py-polars/tests/unit/interop/test_to_pandas.py @@ -173,9 +173,9 @@ def test_to_pandas_datetime() -> None: @pytest.mark.parametrize("use_pyarrow_extension_array", [True, False]) def test_object_to_pandas_series(use_pyarrow_extension_array: bool) -> None: values = [object(), [1, 2, 3]] - assert ( + pd.testing.assert_series_equal( pl.Series("a", values, dtype=pl.Object).to_pandas( use_pyarrow_extension_array=use_pyarrow_extension_array - ) - == pd.Series(values, dtype=object) - ).all() + ), + pd.Series(values, dtype=object, name="a"), + )