depr(selectors): deprecate c and r selectors in favor of cols a…

…nd `index`
jcrist · Sep 10, 2024 · 53d2304 · 53d2304
1 parent ed7244f
commit 53d2304
Show file tree

Hide file tree

Showing 14 changed files with 183 additions and 108 deletions.
diff --git a/docs/_freeze/posts/ibis-to-file/index/execute-results/html.json b/docs/_freeze/posts/ibis-to-file/index/execute-results/html.json
diff --git a/docs/_freeze/posts/selectors/index/execute-results/html.json b/docs/_freeze/posts/selectors/index/execute-results/html.json
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -572,14 +572,15 @@ quartodoc:
             - matches
             - any_of
             - all_of
-            - c
+            - cols
             - across
             - if_any
             - if_all
-            - r
+            - index
             - first
             - last
             - all
+            - none
 
     - title: Type System
       desc: "Data types and schemas"

diff --git a/docs/how-to/visualization/matplotlib.qmd b/docs/how-to/visualization/matplotlib.qmd
@@ -24,7 +24,7 @@ grouped = t.group_by("species").aggregate(count=ibis._.count())
 grouped = grouped.mutate(row_number=ibis.row_number().over()).select(
     "row_number",
     (
-        ~s.c("row_number") & s.all()
+        ~s.cols("row_number") & s.all()
     ),  # see https://github.com/ibis-project/ibis/issues/6803
 )
 grouped

diff --git a/docs/posts/ibis-to-file/index.qmd b/docs/posts/ibis-to-file/index.qmd
@@ -38,7 +38,7 @@ import ibis.selectors as s
 
 expr = (
     t.group_by("species")
-     .mutate(s.across(s.numeric() & ~s.c("year"), (_ - _.mean()) / _.std()))
+     .mutate(s.across(s.numeric() & ~s.cols("year"), (_ - _.mean()) / _.std()))
 )
 expr
 ```

diff --git a/docs/posts/selectors/index.qmd b/docs/posts/selectors/index.qmd
@@ -49,10 +49,11 @@ sense.
 We can exclude `year` from the normalization using another selector:
 
 ```{python}
-t.mutate(s.across(s.numeric() & ~s.c("year"), (_ - _.mean()) / _.std()))
+t.mutate(s.across(s.numeric() & ~s.cols("year"), (_ - _.mean()) / _.std()))
 ```
 
-`c` is short for "column" and the `~` means "negate". Combining those we get "not the year column"!
+`cols` selects one or more columns, and the `~` means "negate". Combining those
+we get "every column except for 'year'"!
 
 Pretty neat right?
 
@@ -65,7 +66,7 @@ With selectors, all you need to do is slap a `.group_by("species")` onto `t`:
 
 ```{python}
 t.group_by("species").mutate(
-    s.across(s.numeric() & ~s.c("year"), (_ - _.mean()) / _.std())
+    s.across(s.numeric() & ~s.cols("year"), (_ - _.mean()) / _.std())
 )
 ```
 
@@ -81,7 +82,7 @@ Grouped min/max normalization? Easy:
 
 ```{python}
 t.group_by("species").mutate(
-    s.across(s.numeric() & ~s.c("year"), (_ - _.min()) / (_.max() - _.min()))
+    s.across(s.numeric() & ~s.cols("year"), (_ - _.min()) / (_.max() - _.min()))
 )
 ```
 
@@ -107,7 +108,7 @@ What if I want to compute multiple things? Heck yeah!
 ```{python}
 t.group_by("sex").mutate(
     s.across(
-        s.numeric() & ~s.c("year"),
+        s.numeric() & ~s.cols("year"),
         dict(centered=_ - _.mean(), zscore=(_ - _.mean()) / _.std()),
     )
 ).select("sex", s.endswith(("_centered", "_zscore")))
@@ -144,14 +145,14 @@ t.select(s.startswith("bill")).mutate(
 We've seen lots of mutate use, but selectors also work with `.agg`:
 
 ```{python}
-t.group_by("year").agg(s.across(s.numeric() & ~s.c("year"), _.mean())).order_by("year")
+t.group_by("year").agg(s.across(s.numeric() & ~s.cols("year"), _.mean())).order_by("year")
 ```
 
 Naturally, selectors work in grouping keys too, for even more convenience:
 
 ```{python}
-t.group_by(~s.numeric() | s.c("year")).mutate(
-    s.across(s.numeric() & ~s.c("year"), dict(centered=_ - _.mean(), std=_.std()))
+t.group_by(~s.numeric() | s.cols("year")).mutate(
+    s.across(s.numeric() & ~s.cols("year"), dict(centered=_ - _.mean(), std=_.std()))
 ).select("species", s.endswith(("_centered", "_std")))
 ```
 

diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py
@@ -1350,7 +1350,7 @@ def test_memtable_column_naming_mismatch(con, monkeypatch, df, columns):
 def test_pivot_longer(backend):
     diamonds = backend.diamonds
     df = diamonds.execute()
-    res = diamonds.pivot_longer(s.c("x", "y", "z"), names_to="pos", values_to="xyz")
+    res = diamonds.pivot_longer(s.cols("x", "y", "z"), names_to="pos", values_to="xyz")
     assert res.schema().names == (
         "carat",
         "cut",
@@ -2474,7 +2474,7 @@ def test_union_generates_predictable_aliases(con):
     assert len(df) == 2
 
 
-@pytest.mark.parametrize("id_cols", [s.none(), [], s.c()])
+@pytest.mark.parametrize("id_cols", [s.none(), [], s.cols()])
 def test_pivot_wider_empty_id_columns(con, backend, id_cols, monkeypatch):
     monkeypatch.setattr(ibis.options, "default_backend", con)
     data = pd.DataFrame(

diff --git a/ibis/backends/tests/tpc/ds/test_queries.py b/ibis/backends/tests/tpc/ds/test_queries.py
@@ -1341,7 +1341,7 @@ def test_24(store_sales, store_returns, store, item, customer, customer_address)
         .group_by(_.c_last_name, _.c_first_name, _.s_store_name)
         .having(_.netpaid.sum() > ssales.netpaid.mean().as_scalar() * 0.05)
         .agg(paid=_.netpaid.sum())
-        .order_by(~s.c("paid"))
+        .order_by(~s.cols("paid"))
     )
 
 
@@ -1497,17 +1497,17 @@ def test_28(store_sales):
 def test_29(store_sales, store_returns, catalog_sales, date_dim, store, item):
     d1 = (
         date_dim.filter(_.d_moy == 9, _.d_year == 1999)
-        .drop(~s.c("d_date_sk"))
+        .drop(~s.cols("d_date_sk"))
         .rename(d1_date_sk="d_date_sk")
     )
     d2 = (
         date_dim.filter(_.d_moy.between(9, 9 + 3), _.d_year == 1999)
-        .drop(~s.c("d_date_sk"))
+        .drop(~s.cols("d_date_sk"))
         .rename(d2_date_sk="d_date_sk")
     )
     d3 = (
         date_dim.filter(_.d_year.isin((1999, 1999 + 1, 1999 + 2)))
-        .drop(~s.c("d_date_sk"))
+        .drop(~s.cols("d_date_sk"))
         .rename(d3_date_sk="d_date_sk")
     )
     return (
@@ -1864,7 +1864,7 @@ def test_35(
         .relocate("cd_dep_employed_count", before="cnt2")
         .relocate("cd_dep_college_count", before="cnt3")
         .order_by(
-            s.across(s.startswith("cd_") | s.c("ca_state"), _.asc(nulls_first=True))
+            s.across(s.startswith("cd_") | s.cols("ca_state"), _.asc(nulls_first=True))
         )
         .limit(100)
     )
@@ -1894,7 +1894,7 @@ def test_36(store_sales, date_dim, item, store):
             g_category=lit(0),
             g_class=lit(0),
         )
-        .relocate(s.c("i_category", "i_class"), after="gross_margin")
+        .relocate(s.cols("i_category", "i_class"), after="gross_margin")
     )
     return (
         results.select(
@@ -2035,7 +2035,9 @@ def test_39(inventory, item, warehouse, date_dim):
         )
         .order_by(
             s.across(
-                s.c("wsk1", "isk1", "dmoy1", "mean1", "cov1", "d_moy", "mean", "cov"),
+                s.cols(
+                    "wsk1", "isk1", "dmoy1", "mean1", "cov1", "d_moy", "mean", "cov"
+                ),
                 _.asc(nulls_first=True),
             )
         )
@@ -2169,7 +2171,7 @@ def test_42(date_dim, store_sales, item):
         .join(item.filter(_.i_manager_id == 1), [("ss_item_sk", "i_item_sk")])
         .group_by(_.d_year, _.i_category_id, _.i_category)
         .agg(total_sales=_.ss_ext_sales_price.sum())
-        .order_by(_.total_sales.desc(), ~s.c("total_sales"))
+        .order_by(_.total_sales.desc(), ~s.cols("total_sales"))
         .limit(100)
     )
 
@@ -2268,7 +2270,7 @@ def test_45(web_sales, customer, customer_address, date_dim, item):
         )
         .group_by(_.ca_zip, _.ca_city)
         .agg(total_web_sales=_.ws_sales_price.sum())
-        .order_by(~s.c("total_web_sales"))
+        .order_by(~s.cols("total_web_sales"))
         .limit(100)
     )
 
@@ -2318,7 +2320,7 @@ def test_46(
             _.amt,
             _.profit,
         )
-        .order_by(s.across(~s.c("amt", "profit"), _.asc(nulls_first=True)))
+        .order_by(s.across(~s.cols("amt", "profit"), _.asc(nulls_first=True)))
         .limit(100)
     )
 
@@ -2346,7 +2348,7 @@ def test_47(item, store_sales, date_dim, store):
         .mutate(
             avg_monthly_sales=_.sum_sales.mean().over(
                 # TODO: add support for selectors in window over specification
-                # group_by=~s.c("sum_sales", "d_moy")
+                # group_by=~s.cols("sum_sales", "d_moy")
                 group_by=(
                     _.i_category,
                     _.i_brand,
@@ -2966,7 +2968,9 @@ def test_57(item, catalog_sales, date_dim, call_center):
             )
             > 0.1,
         )
-        .order_by((_.sum_sales - _.avg_monthly_sales).asc(nulls_first=True), s.r[1:10])
+        .order_by(
+            (_.sum_sales - _.avg_monthly_sales).asc(nulls_first=True), s.index[1:10]
+        )
         .limit(100)
     )
 
@@ -4884,7 +4888,7 @@ def test_89(item, store_sales, date_dim, store):
         .order_by(
             _.sum_sales - _.avg_monthly_sales,
             _.s_store_name,
-            s.r[:9] & ~s.c("s_store_name"),
+            s.index[:9] & ~s.cols("s_store_name"),
         )
     ).limit(100)
 

diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py
@@ -778,7 +778,7 @@ def __getitem__(self, what):
 
         Selectors
 
-        >>> t[~s.numeric() | (s.numeric() & ~s.c("year"))].head()
+        >>> t[~s.numeric() | (s.numeric() & ~s.cols("year"))].head()
         ┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓
         ┃ species ┃ island    ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃
         ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩
@@ -2015,7 +2015,7 @@ def mutate(self, *exprs: Sequence[ir.Expr] | None, **mutations: ir.Value) -> Tab
 
         Mutate across multiple columns
 
-        >>> t.mutate(s.across(s.numeric() & ~s.c("year"), _ - _.mean())).head()
+        >>> t.mutate(s.across(s.numeric() & ~s.cols("year"), _ - _.mean())).head()
         ┏━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━┓
         ┃ species ┃ year  ┃ bill_length_mm ┃
         ┡━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━┩
@@ -2185,7 +2185,7 @@ def select(
         Projection with a selector
 
         >>> import ibis.selectors as s
-        >>> t.select(s.numeric() & ~s.c("year")).head()
+        >>> t.select(s.numeric() & ~s.cols("year")).head()
         ┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
         ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃
         ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
@@ -2201,7 +2201,7 @@ def select(
         Projection + aggregation across multiple columns
 
         >>> from ibis import _
-        >>> t.select(s.across(s.numeric() & ~s.c("year"), _.mean())).head()
+        >>> t.select(s.across(s.numeric() & ~s.cols("year"), _.mean())).head()
         ┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
         ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃
         ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
@@ -3731,7 +3731,7 @@ def pivot_longer(
         Here we convert column names not matching the selector for the `religion` column
         and convert those names into values
 
-        >>> relig_income.pivot_longer(~s.c("religion"), names_to="income", values_to="count")
+        >>> relig_income.pivot_longer(~s.cols("religion"), names_to="income", values_to="count")
         ┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓
         ┃ religion ┃ income             ┃ count ┃
         ┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩
@@ -4563,14 +4563,6 @@ def relocate(
         ├────────┼────────┼────────┼───────┼───────┼───────┤
         │ a      │ a      │ a      │     1 │     1 │     1 │
         └────────┴────────┴────────┴───────┴───────┴───────┘
-        >>> t.relocate(s.any_of(s.c(*"ae")))
-        ┏━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
-        ┃ a     ┃ e      ┃ b     ┃ c     ┃ d      ┃ f      ┃
-        ┡━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
-        │ int64 │ string │ int64 │ int64 │ string │ string │
-        ├───────┼────────┼───────┼───────┼────────┼────────┤
-        │     1 │ a      │     1 │     1 │ a      │ a      │
-        └───────┴────────┴───────┴───────┴────────┴────────┘
 
         When multiple columns are selected with `before` or `after`, those
         selected columns are moved before and after the `selectors` input