Skip to content

Commit

Permalink
depr(selectors): deprecate c and r selectors in favor of cols a…
Browse files Browse the repository at this point in the history
…nd `index`
  • Loading branch information
jcrist committed Sep 10, 2024
1 parent 1cf5439 commit 29b865e
Show file tree
Hide file tree
Showing 14 changed files with 187 additions and 112 deletions.

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions docs/_freeze/posts/selectors/index/execute-results/html.json

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -572,14 +572,15 @@ quartodoc:
- matches
- any_of
- all_of
- c
- cols
- across
- if_any
- if_all
- r
- index
- first
- last
- all
- none

- title: Type System
desc: "Data types and schemas"
Expand Down
2 changes: 1 addition & 1 deletion docs/how-to/visualization/matplotlib.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ grouped = t.group_by("species").aggregate(count=ibis._.count())
grouped = grouped.mutate(row_number=ibis.row_number().over()).select(
"row_number",
(
~s.c("row_number") & s.all()
~s.cols("row_number") & s.all()
), # see https://github.com/ibis-project/ibis/issues/6803
)
grouped
Expand Down
2 changes: 1 addition & 1 deletion docs/posts/ibis-to-file/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import ibis.selectors as s
expr = (
t.group_by("species")
.mutate(s.across(s.numeric() & ~s.c("year"), (_ - _.mean()) / _.std()))
.mutate(s.across(s.numeric() & ~s.cols("year"), (_ - _.mean()) / _.std()))
)
expr
```
Expand Down
17 changes: 9 additions & 8 deletions docs/posts/selectors/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,11 @@ sense.
We can exclude `year` from the normalization using another selector:

```{python}
t.mutate(s.across(s.numeric() & ~s.c("year"), (_ - _.mean()) / _.std()))
t.mutate(s.across(s.numeric() & ~s.cols("year"), (_ - _.mean()) / _.std()))
```

`c` is short for "column" and the `~` means "negate". Combining those we get "not the year column"!
`cols` selects one or more columns, and the `~` means "negate". Combining those
we get "every column except for 'year'"!

Pretty neat right?

Expand All @@ -65,7 +66,7 @@ With selectors, all you need to do is slap a `.group_by("species")` onto `t`:

```{python}
t.group_by("species").mutate(
s.across(s.numeric() & ~s.c("year"), (_ - _.mean()) / _.std())
s.across(s.numeric() & ~s.cols("year"), (_ - _.mean()) / _.std())
)
```

Expand All @@ -81,7 +82,7 @@ Grouped min/max normalization? Easy:

```{python}
t.group_by("species").mutate(
s.across(s.numeric() & ~s.c("year"), (_ - _.min()) / (_.max() - _.min()))
s.across(s.numeric() & ~s.cols("year"), (_ - _.min()) / (_.max() - _.min()))
)
```

Expand All @@ -107,7 +108,7 @@ What if I want to compute multiple things? Heck yeah!
```{python}
t.group_by("sex").mutate(
s.across(
s.numeric() & ~s.c("year"),
s.numeric() & ~s.cols("year"),
dict(centered=_ - _.mean(), zscore=(_ - _.mean()) / _.std()),
)
).select("sex", s.endswith(("_centered", "_zscore")))
Expand Down Expand Up @@ -144,14 +145,14 @@ t.select(s.startswith("bill")).mutate(
We've seen lots of mutate use, but selectors also work with `.agg`:

```{python}
t.group_by("year").agg(s.across(s.numeric() & ~s.c("year"), _.mean())).order_by("year")
t.group_by("year").agg(s.across(s.numeric() & ~s.cols("year"), _.mean())).order_by("year")
```

Naturally, selectors work in grouping keys too, for even more convenience:

```{python}
t.group_by(~s.numeric() | s.c("year")).mutate(
s.across(s.numeric() & ~s.c("year"), dict(centered=_ - _.mean(), std=_.std()))
t.group_by(~s.numeric() | s.cols("year")).mutate(
s.across(s.numeric() & ~s.cols("year"), dict(centered=_ - _.mean(), std=_.std()))
).select("species", s.endswith(("_centered", "_std")))
```

Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1345,7 +1345,7 @@ def test_memtable_column_naming_mismatch(con, monkeypatch, df, columns):
def test_pivot_longer(backend):
diamonds = backend.diamonds
df = diamonds.execute()
res = diamonds.pivot_longer(s.c("x", "y", "z"), names_to="pos", values_to="xyz")
res = diamonds.pivot_longer(s.cols("x", "y", "z"), names_to="pos", values_to="xyz")
assert res.schema().names == (
"carat",
"cut",
Expand Down Expand Up @@ -2469,7 +2469,7 @@ def test_union_generates_predictable_aliases(con):
assert len(df) == 2


@pytest.mark.parametrize("id_cols", [s.none(), [], s.c()])
@pytest.mark.parametrize("id_cols", [s.none(), [], s.cols()])
def test_pivot_wider_empty_id_columns(con, backend, id_cols, monkeypatch):
monkeypatch.setattr(ibis.options, "default_backend", con)
data = pd.DataFrame(
Expand Down
30 changes: 17 additions & 13 deletions ibis/backends/tests/tpc/ds/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,7 +1341,7 @@ def test_24(store_sales, store_returns, store, item, customer, customer_address)
.group_by(_.c_last_name, _.c_first_name, _.s_store_name)
.having(_.netpaid.sum() > ssales.netpaid.mean().as_scalar() * 0.05)
.agg(paid=_.netpaid.sum())
.order_by(~s.c("paid"))
.order_by(~s.cols("paid"))
)


Expand Down Expand Up @@ -1497,17 +1497,17 @@ def test_28(store_sales):
def test_29(store_sales, store_returns, catalog_sales, date_dim, store, item):
d1 = (
date_dim.filter(_.d_moy == 9, _.d_year == 1999)
.drop(~s.c("d_date_sk"))
.drop(~s.cols("d_date_sk"))
.rename(d1_date_sk="d_date_sk")
)
d2 = (
date_dim.filter(_.d_moy.between(9, 9 + 3), _.d_year == 1999)
.drop(~s.c("d_date_sk"))
.drop(~s.cols("d_date_sk"))
.rename(d2_date_sk="d_date_sk")
)
d3 = (
date_dim.filter(_.d_year.isin((1999, 1999 + 1, 1999 + 2)))
.drop(~s.c("d_date_sk"))
.drop(~s.cols("d_date_sk"))
.rename(d3_date_sk="d_date_sk")
)
return (
Expand Down Expand Up @@ -1864,7 +1864,7 @@ def test_35(
.relocate("cd_dep_employed_count", before="cnt2")
.relocate("cd_dep_college_count", before="cnt3")
.order_by(
s.across(s.startswith("cd_") | s.c("ca_state"), _.asc(nulls_first=True))
s.across(s.startswith("cd_") | s.cols("ca_state"), _.asc(nulls_first=True))
)
.limit(100)
)
Expand Down Expand Up @@ -1894,7 +1894,7 @@ def test_36(store_sales, date_dim, item, store):
g_category=lit(0),
g_class=lit(0),
)
.relocate(s.c("i_category", "i_class"), after="gross_margin")
.relocate(s.cols("i_category", "i_class"), after="gross_margin")
)
return (
results.select(
Expand Down Expand Up @@ -2035,7 +2035,9 @@ def test_39(inventory, item, warehouse, date_dim):
)
.order_by(
s.across(
s.c("wsk1", "isk1", "dmoy1", "mean1", "cov1", "d_moy", "mean", "cov"),
s.cols(
"wsk1", "isk1", "dmoy1", "mean1", "cov1", "d_moy", "mean", "cov"
),
_.asc(nulls_first=True),
)
)
Expand Down Expand Up @@ -2169,7 +2171,7 @@ def test_42(date_dim, store_sales, item):
.join(item.filter(_.i_manager_id == 1), [("ss_item_sk", "i_item_sk")])
.group_by(_.d_year, _.i_category_id, _.i_category)
.agg(total_sales=_.ss_ext_sales_price.sum())
.order_by(_.total_sales.desc(), ~s.c("total_sales"))
.order_by(_.total_sales.desc(), ~s.cols("total_sales"))
.limit(100)
)

Expand Down Expand Up @@ -2268,7 +2270,7 @@ def test_45(web_sales, customer, customer_address, date_dim, item):
)
.group_by(_.ca_zip, _.ca_city)
.agg(total_web_sales=_.ws_sales_price.sum())
.order_by(~s.c("total_web_sales"))
.order_by(~s.cols("total_web_sales"))
.limit(100)
)

Expand Down Expand Up @@ -2318,7 +2320,7 @@ def test_46(
_.amt,
_.profit,
)
.order_by(s.across(~s.c("amt", "profit"), _.asc(nulls_first=True)))
.order_by(s.across(~s.cols("amt", "profit"), _.asc(nulls_first=True)))
.limit(100)
)

Expand Down Expand Up @@ -2346,7 +2348,7 @@ def test_47(item, store_sales, date_dim, store):
.mutate(
avg_monthly_sales=_.sum_sales.mean().over(
# TODO: add support for selectors in window over specification
# group_by=~s.c("sum_sales", "d_moy")
# group_by=~s.cols("sum_sales", "d_moy")
group_by=(
_.i_category,
_.i_brand,
Expand Down Expand Up @@ -2966,7 +2968,9 @@ def test_57(item, catalog_sales, date_dim, call_center):
)
> 0.1,
)
.order_by((_.sum_sales - _.avg_monthly_sales).asc(nulls_first=True), s.r[1:10])
.order_by(
(_.sum_sales - _.avg_monthly_sales).asc(nulls_first=True), s.index[1:10]
)
.limit(100)
)

Expand Down Expand Up @@ -4885,7 +4889,7 @@ def test_89(item, store_sales, date_dim, store):
.order_by(
_.sum_sales - _.avg_monthly_sales,
_.s_store_name,
s.r[:9] & ~s.c("s_store_name"),
s.index[:9] & ~s.cols("s_store_name"),
)
).limit(100)

Expand Down
26 changes: 9 additions & 17 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1881,7 +1881,7 @@ def mutate(self, *exprs: Sequence[ir.Expr] | None, **mutations: ir.Value) -> Tab
Mutate across multiple columns
>>> t.mutate(s.across(s.numeric() & ~s.c("year"), _ - _.mean())).head()
>>> t.mutate(s.across(s.numeric() & ~s.cols("year"), _ - _.mean())).head()
┏━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━┓
┃ species ┃ year ┃ bill_length_mm ┃
┡━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━┩
Expand Down Expand Up @@ -2051,7 +2051,7 @@ def select(
Projection with a selector
>>> import ibis.selectors as s
>>> t.select(s.numeric() & ~s.c("year")).head()
>>> t.select(s.numeric() & ~s.cols("year")).head()
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
Expand All @@ -2067,7 +2067,7 @@ def select(
Projection + aggregation across multiple columns
>>> from ibis import _
>>> t.select(s.across(s.numeric() & ~s.c("year"), _.mean())).head()
>>> t.select(s.across(s.numeric() & ~s.cols("year"), _.mean())).head()
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
Expand Down Expand Up @@ -2161,7 +2161,7 @@ def rename(
>>> import ibis
>>> import ibis.selectors as s
>>> ibis.options.interactive = True
>>> first3 = s.r[:3] # first 3 columns
>>> first3 = s.index[:3] # first 3 columns
>>> t = ibis.examples.penguins_raw_raw.fetch().select(first3)
>>> t
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
Expand Down Expand Up @@ -3597,7 +3597,7 @@ def pivot_longer(
Here we convert column names not matching the selector for the `religion` column
and convert those names into values
>>> relig_income.pivot_longer(~s.c("religion"), names_to="income", values_to="count")
>>> relig_income.pivot_longer(~s.cols("religion"), names_to="income", values_to="count")
┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓
┃ religion ┃ income ┃ count ┃
┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩
Expand Down Expand Up @@ -3718,7 +3718,7 @@ def pivot_longer(
>>> len(who.columns)
60
>>> who.pivot_longer(
... s.r["new_sp_m014":"newrel_f65"],
... s.index["new_sp_m014":"newrel_f65"],
... names_to=["diagnosis", "gender", "age"],
... names_pattern="new_?(.*)_(.)(.*)",
... values_to="count",
Expand Down Expand Up @@ -3749,7 +3749,7 @@ def pivot_longer(
Let's recode gender and age to numeric values using a mapping
>>> who.pivot_longer(
... s.r["new_sp_m014":"newrel_f65"],
... s.index["new_sp_m014":"newrel_f65"],
... names_to=["diagnosis", "gender", "age"],
... names_pattern="new_?(.*)_(.)(.*)",
... names_transform=dict(
Expand Down Expand Up @@ -3784,7 +3784,7 @@ def pivot_longer(
The number of match groups in `names_pattern` must match the length of `names_to`
>>> who.pivot_longer( # quartodoc: +EXPECTED_FAILURE
... s.r["new_sp_m014":"newrel_f65"],
... s.index["new_sp_m014":"newrel_f65"],
... names_to=["diagnosis", "gender", "age"],
... names_pattern="new_?(.*)_.(.*)",
... )
Expand All @@ -3795,7 +3795,7 @@ def pivot_longer(
`names_transform` must be a mapping or callable
>>> who.pivot_longer(
... s.r["new_sp_m014":"newrel_f65"], names_transform="upper"
... s.index["new_sp_m014":"newrel_f65"], names_transform="upper"
... ) # quartodoc: +EXPECTED_FAILURE
Traceback (most recent call last):
...
Expand Down Expand Up @@ -4429,14 +4429,6 @@ def relocate(
├────────┼────────┼────────┼───────┼───────┼───────┤
│ a │ a │ a │ 1 │ 1 │ 1 │
└────────┴────────┴────────┴───────┴───────┴───────┘
>>> t.relocate(s.any_of(s.c(*"ae")))
┏━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
┃ a ┃ e ┃ b ┃ c ┃ d ┃ f ┃
┡━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
│ int64 │ string │ int64 │ int64 │ string │ string │
├───────┼────────┼───────┼───────┼────────┼────────┤
│ 1 │ a │ 1 │ 1 │ a │ a │
└───────┴────────┴───────┴───────┴────────┴────────┘
When multiple columns are selected with `before` or `after`, those
selected columns are moved before and after the `selectors` input
Expand Down
Loading

0 comments on commit 29b865e

Please sign in to comment.