diff --git a/ibis/backends/bigquery/client.py b/ibis/backends/bigquery/client.py index bb8c326da5a3..fa33502a5954 100644 --- a/ibis/backends/bigquery/client.py +++ b/ibis/backends/bigquery/client.py @@ -159,7 +159,7 @@ def rename_partitioned_column(table_expr, bq_table, partition_col): # is not _PARTITIONTIME if partition_col is None or partition_field != NATIVE_PARTITION_COL: return table_expr - return table_expr.relabel({NATIVE_PARTITION_COL: partition_col}) + return table_expr.rename({partition_col: NATIVE_PARTITION_COL}) def parse_project_and_dataset(project: str, dataset: str = "") -> tuple[str, str, str]: diff --git a/ibis/backends/dask/tests/execution/test_join.py b/ibis/backends/dask/tests/execution/test_join.py index dd2200d65dad..e9805c74c142 100644 --- a/ibis/backends/dask/tests/execution/test_join.py +++ b/ibis/backends/dask/tests/execution/test_join.py @@ -201,7 +201,7 @@ def test_join_with_post_expression_filter(how, left): def test_multi_join_with_post_expression_filter(how, left, df1): lhs = left[["key", "key2"]] rhs = left[["key2", "value"]] - rhs2 = left[["key2", "value"]].relabel({"value": "value2"}) + rhs2 = left[["key2", "value"]].rename(value2="value") joined = lhs.join(rhs, "key2", how=how) projected = joined[lhs, rhs.value] diff --git a/ibis/backends/impala/tests/conftest.py b/ibis/backends/impala/tests/conftest.py index 1027a060c4b6..3654aeedcb9c 100644 --- a/ibis/backends/impala/tests/conftest.py +++ b/ibis/backends/impala/tests/conftest.py @@ -157,7 +157,7 @@ def _get_original_column_names(self, tablename: str) -> list[str]: def _get_renamed_table(self, tablename: str) -> ir.Table: t = self.connection.table(tablename) original_names = self._get_original_column_names(tablename) - return t.relabel(dict(zip(t.columns, original_names))) + return t.rename(dict(zip(original_names, t.columns))) @property def batting(self) -> ir.Table: diff --git a/ibis/backends/impala/tests/test_sql.py b/ibis/backends/impala/tests/test_sql.py index ce72f3d14b97..53b811273a90 100644 --- a/ibis/backends/impala/tests/test_sql.py +++ b/ibis/backends/impala/tests/test_sql.py @@ -9,16 +9,6 @@ from ibis.backends.impala.tests.mocks import MockImpalaConnection -def test_relabel_projection(snapshot): - # GH #551 - types = ["int32", "string", "double"] - table = ibis.table(zip(["foo", "bar", "baz"], types), name="table") - relabeled = table.relabel({"foo": "one", "baz": "three"}) - - result = ImpalaCompiler.to_sql(relabeled) - snapshot.assert_match(result, "out.sql") - - @pytest.fixture(scope="module") def con(): return MockImpalaConnection() diff --git a/ibis/backends/pandas/tests/execution/test_join.py b/ibis/backends/pandas/tests/execution/test_join.py index 78eac1735116..8fd990ea86e1 100644 --- a/ibis/backends/pandas/tests/execution/test_join.py +++ b/ibis/backends/pandas/tests/execution/test_join.py @@ -146,7 +146,7 @@ def test_join_with_post_expression_filter(how, left): def test_multi_join_with_post_expression_filter(how, left, df1): lhs = left[["key", "key2"]] rhs = left[["key2", "value"]] - rhs2 = left[["key2", "value"]].relabel({"value": "value2"}) + rhs2 = left[["key2", "value"]].rename(value2="value") joined = lhs.join(rhs, "key2", how=how) projected = joined[lhs, rhs.value] diff --git a/ibis/backends/pyspark/tests/test_basic.py b/ibis/backends/pyspark/tests/test_basic.py index 14fe67789c9d..31974cdf5306 100644 --- a/ibis/backends/pyspark/tests/test_basic.py +++ b/ibis/backends/pyspark/tests/test_basic.py @@ -200,11 +200,11 @@ def test_alias_after_select(con): # new id != t['id'] (lambda t: t.mutate(id=t["id"] + 1), 0, False), # new column id is selections[0], OK to replace since - # new id == t['id'] (relabel is a no-op) - (lambda t: t.relabel({"id": "id"}), 0, True), + # new id == t['id'] (rename is a no-op) + (lambda t: t.rename({"id": "id"}), 0, True), # new column id2 is selections[0], cannot be replaced since # id2 does not exist in the table - (lambda t: t.relabel({"id": "id2"}), 0, False), + (lambda t: t.rename({"id2": "id"}), 0, False), ], ) def test_can_be_replaced_by_column_name(selection_fn, selection_idx, expected): diff --git a/ibis/backends/trino/tests/conftest.py b/ibis/backends/trino/tests/conftest.py index 3b89b8e51354..81451dde5123 100644 --- a/ibis/backends/trino/tests/conftest.py +++ b/ibis/backends/trino/tests/conftest.py @@ -110,8 +110,8 @@ def connect(*, tmpdir, worker_id, **kw): def _remap_column_names(self, table_name: str) -> dict[str, str]: table = self.connection.table(table_name) - return table.relabel( - dict(zip(table.schema().names, TEST_TABLES[table_name].names)) + return table.rename( + dict(zip(TEST_TABLES[table_name].names, table.schema().names)) ) @property diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index ba250653961e..d9e620f8aa44 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -1728,24 +1728,46 @@ def relabel( | Callable[[str], str | None] | str | Literal["snake_case", "ALL_CAPS"], + ) -> Table: + """Deprecated in favor of `Table.rename`""" + if isinstance(substitutions, Mapping): + substitutions = {new: old for old, new in substitutions.items()} + return self.rename(substitutions) + + def rename( + self, + method: str + | Callable[[str], str | None] + | Literal["snake_case", "ALL_CAPS"] + | Mapping[str, str] + | None = None, + /, + **substitutions: str, ) -> Table: """Rename columns in the table. Parameters ---------- + method + An optional method for renaming columns. May be one of: + + - A format string to use to rename all columns, like + ``"prefix_{name}"``. + - A function from old name to new name. If the function returns + ``None`` the old name is used. + - The literal strings ``"snake_case"`` or ``"ALL_CAPS"`` to + rename all columns using a ``snake_case`` or ``"ALL_CAPS"`` + naming convention respectively. + - A mapping from new name to old name. Existing columns not present + in the mapping will passthrough with their original name. substitutions - A mapping, function, or format string mapping old to new column - names. If a column isn't in the mapping (or if the callable returns - None) it is left with its original name. May also pass a format - string to rename all columns, like ``"prefix_{name}"``. Also - accepts the literal string ``"snake_case"`` or ``"ALL_CAPS"`` which - will relabel all columns to use a ``snake_case`` or ``"ALL_CAPS"`` - naming convention. + Columns to be explicitly renamed, expressed as ``new_name=old_name`` + keyword arguments. Returns ------- Table - A relabeled table expression + A renamed table expression Examples -------- @@ -1773,9 +1795,10 @@ def relabel( │ … │ … │ … │ └───────────┴───────────────┴─────────────────────────────────────┘ - Relabel column names using a mapping from old name to new name + Rename specific columns by passing keyword arguments like + ``new_name="old_name"`` - >>> t.relabel({"studyName": "study_name"}).head(1) + >>> t.rename(study_name="studyName").head(1) ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ study_name ┃ Sample Number ┃ Species ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ @@ -1784,9 +1807,20 @@ def relabel( │ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │ └────────────┴───────────────┴─────────────────────────────────────┘ - Relabel column names using a snake_case convention + Rename all columns using a format string - >>> t.relabel("snake_case").head(1) + >>> t.rename("p_{name}").head(1) + ┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ p_studyName ┃ p_Sample Number ┃ p_Species ┃ + ┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ string │ int64 │ string │ + ├─────────────┼─────────────────┼─────────────────────────────────────┤ + │ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │ + └─────────────┴─────────────────┴─────────────────────────────────────┘ + + Rename all columns using a snake_case convention + + >>> t.rename("snake_case").head(1) ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ study_name ┃ sample_number ┃ species ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ @@ -1795,9 +1829,9 @@ def relabel( │ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │ └────────────┴───────────────┴─────────────────────────────────────┘ - Relabel column names using a ALL_CAPS convention + Rename all columns using an ALL_CAPS convention - >>> t.relabel("ALL_CAPS").head(1) + >>> t.rename("ALL_CAPS").head(1) ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ STUDY_NAME ┃ SAMPLE_NUMBER ┃ SPECIES ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ @@ -1806,20 +1840,9 @@ def relabel( │ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │ └────────────┴───────────────┴─────────────────────────────────────┘ - Relabel columns using a format string + Rename all columns using a callable - >>> t.relabel("p_{name}").head(1) - ┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ p_studyName ┃ p_Sample Number ┃ p_Species ┃ - ┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ - │ string │ int64 │ string │ - ├─────────────┼─────────────────┼─────────────────────────────────────┤ - │ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │ - └─────────────┴─────────────────┴─────────────────────────────────────┘ - - Relabel column names using a callable - - >>> t.relabel(str.upper).head(1) + >>> t.rename(str.upper).head(1) ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ STUDYNAME ┃ SAMPLE NUMBER ┃ SPECIES ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ @@ -1828,33 +1851,52 @@ def relabel( │ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │ └───────────┴───────────────┴─────────────────────────────────────┘ """ - observed = set() + if isinstance(method, Mapping): + substitutions.update(method) + method = None + + # A mapping from old_name -> renamed expr + renamed = {} + + if substitutions: + schema = self.schema() + for new_name, old_name in substitutions.items(): + col = self[old_name] + if old_name not in renamed: + renamed[old_name] = col.name(new_name) + else: + raise ValueError( + "duplicate new names passed for renaming {old_name!r}" + ) - if isinstance(substitutions, Mapping): - rename = substitutions.get - elif substitutions in {"snake_case", "ALL_CAPS"}: + if method is None: + + def rename(c): + return None + + elif isinstance(method, str) and method in {"snake_case", "ALL_CAPS"}: def rename(c): c = c.strip() if " " in c: # Handle "space case possibly with-hyphens" - if substitutions == "snake_case": + if method == "snake_case": return "_".join(c.lower().split()).replace("-", "_") - elif substitutions == "ALL_CAPS": + elif method == "ALL_CAPS": return "_".join(c.upper().split()).replace("-", "_") # Handle PascalCase, camelCase, and kebab-case c = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", c) c = re.sub(r"([a-z\d])([A-Z])", r"\1_\2", c) c = c.replace("-", "_") - if substitutions == "snake_case": + if method == "snake_case": return c.lower() - elif substitutions == "ALL_CAPS": + elif method == "ALL_CAPS": return c.upper() - elif isinstance(substitutions, str): + elif isinstance(method, str): def rename(name): - return substitutions.format(name=name) + return method.format(name=name) # Detect the case of missing or extra format string parameters try: @@ -1866,21 +1908,18 @@ def rename(name): if invalid: raise ValueError("Format strings must take a single parameter `name`") else: - rename = substitutions + rename = method exprs = [] for c in self.columns: - expr = self[c] - if (name := rename(c)) is not None: - expr = expr.name(name) - observed.add(c) + if c in renamed: + expr = renamed[c] + else: + expr = self[c] + if (name := rename(c)) is not None: + expr = expr.name(name) exprs.append(expr) - if isinstance(substitutions, Mapping): - for c in substitutions: - if c not in observed: - raise KeyError(f"{c!r} is not an existing column") - return self.select(exprs) def drop(self, *fields: str | Selector) -> Table: diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index 863f6a366053..f459eeac8077 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -128,7 +128,7 @@ def test_typo_method_name_recommendation(table): # Existing columns take precedence over raising an error # for a common method typo - table2 = table.relabel({"a": "sort"}) + table2 = table.rename(sort="a") assert isinstance(table2.sort, Column) @@ -374,68 +374,103 @@ def test_filter_fusion_distinct_table_objects(con): assert_equal(expr, expr4) -def test_column_relabel(): +def test_relabel(): table = api.table({"x": "int32", "y": "string", "z": "double"}) - sol = sch.schema({"x_1": "int32", "y_1": "string", "z": "double"}) # Using a mapping res = table.relabel({"x": "x_1", "y": "y_1"}).schema() + sol = sch.schema({"x_1": "int32", "y_1": "string", "z": "double"}) assert_equal(res, sol) # Using a function res = table.relabel(lambda x: None if x == "z" else f"{x}_1").schema() assert_equal(res, sol) + # Using a format string + res = table.relabel("_{name}_") + sol = table.relabel({"x": "_x_", "y": "_y_", "z": "_z_"}) + assert_equal(res, sol) + # Mapping with unknown columns errors - with pytest.raises(KeyError, match="is not an existing column"): + with pytest.raises(com.IbisTypeError, match="'missing' is not found in table"): table.relabel({"missing": "oops"}) -def test_relabel_format_string(): +def test_rename(): + table = api.table({"x": "int32", "y": "string", "z": "double"}) + sol = sch.schema({"x_1": "int32", "y_1": "string", "z": "double"}) + + # Using kwargs + res = table.rename(x_1="x", y_1="y").schema() + assert_equal(res, sol) + + # Using a mapping + res = table.rename({"x_1": "x", "y_1": "y"}).schema() + assert_equal(res, sol) + + # Using a mix + res = table.rename({"x_1": "x"}, y_1="y").schema() + assert_equal(res, sol) + + +def test_rename_function(): + table = api.table({"x": "int32", "y": "string", "z": "double"}) + + res = table.rename(lambda x: None if x == "z" else f"{x}_1").schema() + sol = sch.schema({"x_1": "int32", "y_1": "string", "z": "double"}) + assert_equal(res, sol) + + # Explicit rename takes precedence + res = table.rename(lambda x: f"{x}_1", z_2="z").schema() + sol = sch.schema({"x_1": "int32", "y_1": "string", "z_2": "double"}) + assert_equal(res, sol) + + +def test_rename_format_string(): t = ibis.table({"x": "int", "y": "int", "z": "int"}) - res = t.relabel("_{name}_") - sol = t.relabel({"x": "_x_", "y": "_y_", "z": "_z_"}) + res = t.rename("_{name}_") + sol = t.rename({"_x_": "x", "_y_": "y", "_z_": "z"}) assert_equal(res, sol) with pytest.raises(ValueError, match="Format strings must"): - t.relabel("no format string parameter") + t.rename("no format string parameter") with pytest.raises(ValueError, match="Format strings must"): - t.relabel("{unknown} format string parameter") + t.rename("{unknown} format string parameter") -def test_relabel_snake_case(): +def test_rename_snake_case(): cases = [ ("cola", "cola"), - ("ColB", "col_b"), - ("colC", "col_c"), - ("col-d", "col_d"), + ("col_b", "ColB"), + ("col_c", "colC"), + ("col_d", "col-d"), ("col_e", "col_e"), - (" Column F ", "column_f"), - ("Column G-with-hyphens", "column_g_with_hyphens"), - ("Col H notCamelCase", "col_h_notcamelcase"), + ("column_f", " Column F "), + ("column_g_with_hyphens", "Column G-with-hyphens"), + ("col_h_notcamelcase", "Col H notCamelCase"), ] - t = ibis.table({c: "int" for c, _ in cases}) - res = t.relabel("snake_case") - sol = t.relabel(dict(cases)) + t = ibis.table({c: "int" for _, c in cases}) + res = t.rename("snake_case") + sol = t.rename(dict(cases)) assert_equal(res, sol) -def test_relabel_all_caps(): +def test_rename_all_caps(): cases = [ - ("cola", "COLA"), - ("ColB", "COL_B"), - ("colC", "COL_C"), - ("col-d", "COL_D"), - ("col_e", "COL_E"), - (" Column F ", "COLUMN_F"), - ("Column G-with-hyphens", "COLUMN_G_WITH_HYPHENS"), - ("Col H notCamelCase", "COL_H_NOTCAMELCASE"), + ("COLA", "cola"), + ("COL_B", "ColB"), + ("COL_C", "colC"), + ("COL_D", "col-d"), + ("COL_E", "col_e"), + ("COLUMN_F", " Column F "), + ("COLUMN_G_WITH_HYPHENS", "Column G-with-hyphens"), + ("COL_H_NOTCAMELCASE", "Col H notCamelCase"), ] - t = ibis.table({c: "int" for c, _ in cases}) - res = t.relabel("ALL_CAPS") - sol = t.relabel(dict(cases)) + t = ibis.table({c: "int" for _, c in cases}) + res = t.rename("ALL_CAPS") + sol = t.rename(dict(cases)) assert_equal(res, sol) @@ -1536,7 +1571,7 @@ def test_merge_as_of_allows_overlapping_columns(): signal_one = signal_one[ "value", "timestamp_received", "field" ] # select columns we care about - signal_one = signal_one.relabel({"value": "current", "field": "signal_one"}) + signal_one = signal_one.rename(current="value", signal_one="field") signal_two = table[ table["field"].contains("signal_two") & table["field"].contains("voltage") @@ -1544,7 +1579,7 @@ def test_merge_as_of_allows_overlapping_columns(): signal_two = signal_two[ "value", "timestamp_received", "field" ] # select columns we care about - signal_two = signal_two.relabel({"value": "voltage", "field": "signal_two"}) + signal_two = signal_two.rename(voltage="value", signal_two="field") merged = ibis.api.asof_join(signal_one, signal_two, "timestamp_received") assert merged.columns == [ diff --git a/ibis/tests/sql/test_compiler.py b/ibis/tests/sql/test_compiler.py index 582807db6320..e78db023542e 100644 --- a/ibis/tests/sql/test_compiler.py +++ b/ibis/tests/sql/test_compiler.py @@ -159,7 +159,7 @@ def test_agg_filter_with_alias(snapshot): def test_table_drop_with_filter(snapshot): left = ibis.table( [("a", "int64"), ("b", "string"), ("c", "timestamp")], name="t" - ).relabel({"c": "C"}) + ).rename(C="c") left = left.filter(left.C == datetime.datetime(2018, 1, 1)) left = left.drop("C") left = left.mutate(the_date=datetime.datetime(2018, 1, 1))