Skip to content

Commit

Permalink
fix(pandas): make case work for non-RangeIndex dataframes (#9083)
Browse files Browse the repository at this point in the history
## Description of changes

This PR makes `PandasExecutor` create the `Series` with an index that
matches the incoming data. Currently, when the incoming data does not
use a `RangeIndex`, the output index is a union of a `RangeIndex` and
the incoming data index.
  • Loading branch information
dlovell authored Apr 30, 2024
1 parent 1960d54 commit 73dd685
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
3 changes: 2 additions & 1 deletion ibis/backends/pandas/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,10 @@ def visit(
if base is not None:
cases = tuple(base == case for case in cases)
cases, _ = cls.asframe(cases, concat=False)
index = cases[0].index
results, _ = cls.asframe(results, concat=False)
out = np.select(cases, results, default)
return pd.Series(out)
return pd.Series(out, index=index)

@classmethod
def visit(cls, op: ops.TimestampTruncate | ops.DateTruncate, arg, unit):
Expand Down
24 changes: 24 additions & 0 deletions ibis/backends/pandas/tests/test_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,30 @@ def test_simple_case_column(batting, batting_df):
tm.assert_series_equal(result, expected)


def test_non_range_index():
def do_replace(col):
return col.cases(
(
(1, "one"),
(2, "two"),
),
default="unk",
)

df = pd.DataFrame(
{
"A": pd.Series({i: i % 3 for i in (0, 1, 2, 4)}),
"B": 0,
}
)
expr = (
ibis.pandas.connect({"t": df})
.table("t")
.mutate(A=lambda t: t["A"].pipe(do_replace))
)
assert df.index.equals(expr.execute().index)


def test_table_distinct(t, df):
expr = t[["dup_strings"]].distinct()
result = expr.execute()
Expand Down

0 comments on commit 73dd685

Please sign in to comment.