Skip to content

Commit

Permalink
feat(api): add builtin support for relabeling columns to snake case
Browse files Browse the repository at this point in the history
  • Loading branch information
jcrist authored and cpcloud committed Jan 26, 2023
1 parent fc7d103 commit 1157273
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
23 changes: 21 additions & 2 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import contextlib
import functools
import itertools
import re
import sys
import warnings
from keyword import iskeyword
Expand Down Expand Up @@ -744,7 +745,10 @@ def select(
projection = select

def relabel(
self, substitutions: Mapping[str, str] | Callable[[str], str | None]
self,
substitutions: Mapping[str, str]
| Callable[[str], str | None]
| Literal["snake_case"],
) -> Table:
"""Rename columns in the table.
Expand All @@ -753,7 +757,9 @@ def relabel(
substitutions
A mapping or function from old to new column names. If a column
isn't in the mapping (or if the callable returns None) it is left
with its original name.
with its original name. May also pass the string ``"snake_case"``,
which will relabel all columns to use a ``snake_case`` naming
convention.
Returns
-------
Expand All @@ -764,6 +770,19 @@ def relabel(

if isinstance(substitutions, Mapping):
rename = substitutions.get
elif substitutions == "snake_case":

def rename(c):
c = c.strip()
if " " in c:
# Handle "space case possibly with-hyphens"
return "_".join(c.lower().split()).replace("-", "_")
# Handle PascalCase, camelCase, and kebab-case
c = re.sub(r"([A-Z]+)([A-Z][a-z])", r'\1_\2', c)
c = re.sub(r"([a-z\d])([A-Z])", r'\1_\2', c)
c = c.replace("-", "_")
return c.lower()

else:
rename = substitutions

Expand Down
19 changes: 18 additions & 1 deletion ibis/tests/expr/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def test_filter_fusion_distinct_table_objects(con):
assert_equal(expr, expr4)


def test_column_relabel(table):
def test_column_relabel():
table = api.table({"x": "int32", "y": "string", "z": "double"})
sol = sch.schema({"x_1": "int32", "y_1": "string", "z": "double"})

Expand All @@ -387,6 +387,23 @@ def test_column_relabel(table):
table.relabel({"missing": "oops"})


def test_relabel_snake_case():
cases = [
("cola", "cola"),
("ColB", "col_b"),
("colC", "col_c"),
("col-d", "col_d"),
("col_e", "col_e"),
(" Column F ", "column_f"),
("Column G-with-hyphens", "column_g_with_hyphens"),
("Col H notCamelCase", "col_h_notcamelcase"),
]
t = ibis.table({c: "int" for c, _ in cases})
res = t.relabel("snake_case")
sol = t.relabel(dict(cases))
assert_equal(res, sol)


def test_limit(table):
limited = table.limit(10, offset=5)
assert limited.op().n == 10
Expand Down

0 comments on commit 1157273

Please sign in to comment.