From 5de08c728b0fa667e8baabb4806cb6807d6cd469 Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Thu, 25 Jan 2024 14:46:30 -0600 Subject: [PATCH] fix(pandas): support non-string categorical columns --- ibis/formats/pandas.py | 4 +++- ibis/formats/tests/test_pandas.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index 7fa0e4f37137..7badf372de8a 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -33,7 +33,9 @@ def to_ibis(cls, typ, nullable=True): elif pdt.is_datetime64_dtype(typ): return dt.Timestamp(nullable=nullable) elif isinstance(typ, pdt.CategoricalDtype): - return dt.String(nullable=nullable) + if typ.categories is None or pdt.is_string_dtype(typ.categories): + return dt.String(nullable=nullable) + return cls.to_ibis(typ.categories.dtype, nullable=nullable) elif pdt.is_extension_array_dtype(typ): if _has_arrow_dtype and isinstance(typ, pd.ArrowDtype): return PyArrowType.to_ibis(typ.pyarrow_dtype, nullable=nullable) diff --git a/ibis/formats/tests/test_pandas.py b/ibis/formats/tests/test_pandas.py index 5bba865f0be5..546eaa5be65c 100644 --- a/ibis/formats/tests/test_pandas.py +++ b/ibis/formats/tests/test_pandas.py @@ -103,6 +103,8 @@ def test_dtype_from_pandas_arrow_list_dtype(): dt.Timestamp("US/Eastern"), ), (pd.CategoricalDtype(), dt.String()), + (pd.CategoricalDtype(["a", "b", "c"]), dt.String()), + (pd.CategoricalDtype(np.array([1, 2, 3], dtype="int64")), dt.int64), (pd.Series([], dtype="string").dtype, dt.String()), ], ids=str,