Skip to content

Commit

Permalink
feat(api): add Table.rename, with support for renaming via keyword …
Browse files Browse the repository at this point in the history
…arguments
  • Loading branch information
jcrist authored and cpcloud committed Aug 30, 2023
1 parent f6faf89 commit 917d7ec
Show file tree
Hide file tree
Showing 10 changed files with 164 additions and 100 deletions.
2 changes: 1 addition & 1 deletion ibis/backends/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def rename_partitioned_column(table_expr, bq_table, partition_col):
# is not _PARTITIONTIME
if partition_col is None or partition_field != NATIVE_PARTITION_COL:
return table_expr
return table_expr.relabel({NATIVE_PARTITION_COL: partition_col})
return table_expr.rename({partition_col: NATIVE_PARTITION_COL})


def parse_project_and_dataset(project: str, dataset: str = "") -> tuple[str, str, str]:
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/dask/tests/execution/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def test_join_with_post_expression_filter(how, left):
def test_multi_join_with_post_expression_filter(how, left, df1):
lhs = left[["key", "key2"]]
rhs = left[["key2", "value"]]
rhs2 = left[["key2", "value"]].relabel({"value": "value2"})
rhs2 = left[["key2", "value"]].rename(value2="value")

joined = lhs.join(rhs, "key2", how=how)
projected = joined[lhs, rhs.value]
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/impala/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def _get_original_column_names(self, tablename: str) -> list[str]:
def _get_renamed_table(self, tablename: str) -> ir.Table:
t = self.connection.table(tablename)
original_names = self._get_original_column_names(tablename)
return t.relabel(dict(zip(t.columns, original_names)))
return t.rename(dict(zip(original_names, t.columns)))

@property
def batting(self) -> ir.Table:
Expand Down
10 changes: 0 additions & 10 deletions ibis/backends/impala/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,6 @@
from ibis.backends.impala.tests.mocks import MockImpalaConnection


def test_relabel_projection(snapshot):
# GH #551
types = ["int32", "string", "double"]
table = ibis.table(zip(["foo", "bar", "baz"], types), name="table")
relabeled = table.relabel({"foo": "one", "baz": "three"})

result = ImpalaCompiler.to_sql(relabeled)
snapshot.assert_match(result, "out.sql")


@pytest.fixture(scope="module")
def con():
return MockImpalaConnection()
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/pandas/tests/execution/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def test_join_with_post_expression_filter(how, left):
def test_multi_join_with_post_expression_filter(how, left, df1):
lhs = left[["key", "key2"]]
rhs = left[["key2", "value"]]
rhs2 = left[["key2", "value"]].relabel({"value": "value2"})
rhs2 = left[["key2", "value"]].rename(value2="value")

joined = lhs.join(rhs, "key2", how=how)
projected = joined[lhs, rhs.value]
Expand Down
6 changes: 3 additions & 3 deletions ibis/backends/pyspark/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,11 @@ def test_alias_after_select(con):
# new id != t['id']
(lambda t: t.mutate(id=t["id"] + 1), 0, False),
# new column id is selections[0], OK to replace since
# new id == t['id'] (relabel is a no-op)
(lambda t: t.relabel({"id": "id"}), 0, True),
# new id == t['id'] (rename is a no-op)
(lambda t: t.rename({"id": "id"}), 0, True),
# new column id2 is selections[0], cannot be replaced since
# id2 does not exist in the table
(lambda t: t.relabel({"id": "id2"}), 0, False),
(lambda t: t.rename({"id2": "id"}), 0, False),
],
)
def test_can_be_replaced_by_column_name(selection_fn, selection_idx, expected):
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/trino/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def connect(*, tmpdir, worker_id, **kw):

def _remap_column_names(self, table_name: str) -> dict[str, str]:
table = self.connection.table(table_name)
return table.relabel(
dict(zip(table.schema().names, TEST_TABLES[table_name].names))
return table.rename(
dict(zip(TEST_TABLES[table_name].names, table.schema().names))
)

@property
Expand Down
133 changes: 86 additions & 47 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1728,24 +1728,46 @@ def relabel(
| Callable[[str], str | None]
| str
| Literal["snake_case", "ALL_CAPS"],
) -> Table:
"""Deprecated in favor of `Table.rename`"""
if isinstance(substitutions, Mapping):
substitutions = {new: old for old, new in substitutions.items()}
return self.rename(substitutions)

def rename(
self,
method: str
| Callable[[str], str | None]
| Literal["snake_case", "ALL_CAPS"]
| Mapping[str, str]
| None = None,
/,
**substitutions: str,
) -> Table:
"""Rename columns in the table.
Parameters
----------
method
An optional method for renaming columns. May be one of:
- A format string to use to rename all columns, like
``"prefix_{name}"``.
- A function from old name to new name. If the function returns
``None`` the old name is used.
- The literal strings ``"snake_case"`` or ``"ALL_CAPS"`` to
rename all columns using a ``snake_case`` or ``"ALL_CAPS"``
naming convention respectively.
- A mapping from new name to old name. Existing columns not present
in the mapping will passthrough with their original name.
substitutions
A mapping, function, or format string mapping old to new column
names. If a column isn't in the mapping (or if the callable returns
None) it is left with its original name. May also pass a format
string to rename all columns, like ``"prefix_{name}"``. Also
accepts the literal string ``"snake_case"`` or ``"ALL_CAPS"`` which
will relabel all columns to use a ``snake_case`` or ``"ALL_CAPS"``
naming convention.
Columns to be explicitly renamed, expressed as ``new_name=old_name``
keyword arguments.
Returns
-------
Table
A relabeled table expression
A renamed table expression
Examples
--------
Expand Down Expand Up @@ -1773,9 +1795,10 @@ def relabel(
│ … │ … │ … │
└───────────┴───────────────┴─────────────────────────────────────┘
Relabel column names using a mapping from old name to new name
Rename specific columns by passing keyword arguments like
``new_name="old_name"``
>>> t.relabel({"studyName": "study_name"}).head(1)
>>> t.rename(study_name="studyName").head(1)
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ study_name ┃ Sample Number ┃ Species ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
Expand All @@ -1784,9 +1807,20 @@ def relabel(
│ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │
└────────────┴───────────────┴─────────────────────────────────────┘
Relabel column names using a snake_case convention
Rename all columns using a format string
>>> t.relabel("snake_case").head(1)
>>> t.rename("p_{name}").head(1)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ p_studyName ┃ p_Sample Number ┃ p_Species ┃
┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ string │ int64 │ string │
├─────────────┼─────────────────┼─────────────────────────────────────┤
│ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │
└─────────────┴─────────────────┴─────────────────────────────────────┘
Rename all columns using a snake_case convention
>>> t.rename("snake_case").head(1)
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ study_name ┃ sample_number ┃ species ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
Expand All @@ -1795,9 +1829,9 @@ def relabel(
│ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │
└────────────┴───────────────┴─────────────────────────────────────┘
Relabel column names using a ALL_CAPS convention
Rename all columns using an ALL_CAPS convention
>>> t.relabel("ALL_CAPS").head(1)
>>> t.rename("ALL_CAPS").head(1)
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ STUDY_NAME ┃ SAMPLE_NUMBER ┃ SPECIES ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
Expand All @@ -1806,20 +1840,9 @@ def relabel(
│ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │
└────────────┴───────────────┴─────────────────────────────────────┘
Relabel columns using a format string
Rename all columns using a callable
>>> t.relabel("p_{name}").head(1)
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ p_studyName ┃ p_Sample Number ┃ p_Species ┃
┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ string │ int64 │ string │
├─────────────┼─────────────────┼─────────────────────────────────────┤
│ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │
└─────────────┴─────────────────┴─────────────────────────────────────┘
Relabel column names using a callable
>>> t.relabel(str.upper).head(1)
>>> t.rename(str.upper).head(1)
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ STUDYNAME ┃ SAMPLE NUMBER ┃ SPECIES ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
Expand All @@ -1828,33 +1851,52 @@ def relabel(
│ PAL0708 │ 1 │ Adelie Penguin (Pygoscelis adeliae) │
└───────────┴───────────────┴─────────────────────────────────────┘
"""
observed = set()
if isinstance(method, Mapping):
substitutions.update(method)
method = None

# A mapping from old_name -> renamed expr
renamed = {}

if substitutions:
schema = self.schema()
for new_name, old_name in substitutions.items():
col = self[old_name]
if old_name not in renamed:
renamed[old_name] = col.name(new_name)
else:
raise ValueError(
"duplicate new names passed for renaming {old_name!r}"
)

if isinstance(substitutions, Mapping):
rename = substitutions.get
elif substitutions in {"snake_case", "ALL_CAPS"}:
if method is None:

def rename(c):
return None

elif isinstance(method, str) and method in {"snake_case", "ALL_CAPS"}:

def rename(c):
c = c.strip()
if " " in c:
# Handle "space case possibly with-hyphens"
if substitutions == "snake_case":
if method == "snake_case":
return "_".join(c.lower().split()).replace("-", "_")
elif substitutions == "ALL_CAPS":
elif method == "ALL_CAPS":
return "_".join(c.upper().split()).replace("-", "_")
# Handle PascalCase, camelCase, and kebab-case
c = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", c)
c = re.sub(r"([a-z\d])([A-Z])", r"\1_\2", c)
c = c.replace("-", "_")
if substitutions == "snake_case":
if method == "snake_case":
return c.lower()
elif substitutions == "ALL_CAPS":
elif method == "ALL_CAPS":
return c.upper()

elif isinstance(substitutions, str):
elif isinstance(method, str):

def rename(name):
return substitutions.format(name=name)
return method.format(name=name)

# Detect the case of missing or extra format string parameters
try:
Expand All @@ -1866,21 +1908,18 @@ def rename(name):
if invalid:
raise ValueError("Format strings must take a single parameter `name`")
else:
rename = substitutions
rename = method

exprs = []
for c in self.columns:
expr = self[c]
if (name := rename(c)) is not None:
expr = expr.name(name)
observed.add(c)
if c in renamed:
expr = renamed[c]
else:
expr = self[c]
if (name := rename(c)) is not None:
expr = expr.name(name)
exprs.append(expr)

if isinstance(substitutions, Mapping):
for c in substitutions:
if c not in observed:
raise KeyError(f"{c!r} is not an existing column")

return self.select(exprs)

def drop(self, *fields: str | Selector) -> Table:
Expand Down
Loading

0 comments on commit 917d7ec

Please sign in to comment.