Skip to content

Commit

Permalink
refactor(python): make structify behaviour experimental, while also…
Browse files Browse the repository at this point in the history
… extending it to aliased expressions (pola-rs#6615)
  • Loading branch information
alexander-beedie authored and vincent committed Feb 9, 2023
1 parent b5de827 commit 1ac67c3
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 134 deletions.
24 changes: 11 additions & 13 deletions py-polars/polars/cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import sys
from types import TracebackType
from typing import Any

if sys.version_info >= (3, 8):
from typing import Literal
Expand All @@ -23,6 +22,7 @@
# which 'POLARS_' environment variables are recognised, as there are other lower-level
# and/or experimental settings that should not be saved or reset with the Config vars.
POLARS_CFG_ENV_VARS = {
"POLARS_AUTO_STRUCTIFY",
"POLARS_FMT_MAX_COLS",
"POLARS_FMT_MAX_ROWS",
"POLARS_FMT_STR_LEN",
Expand All @@ -38,9 +38,6 @@
"POLARS_TABLE_WIDTH",
"POLARS_VERBOSE",
}
# register Config-local attributes (with their defaults) here,
# eg: => {"misc_config_attr":True, "other_config_attr":False, etc}
POLARS_CFG_LOCAL_VARS: dict[str, Any] = {}


class Config:
Expand Down Expand Up @@ -74,10 +71,6 @@ def __exit__(
"""Reset any Config options that were set within the scope."""
self.restore_defaults().load(self._original_state)

# note: class-local attributes can be used for options that don't have
# a Rust component (so, no need to register environment variables).
# eg: misc_config_attr:bool = True

@classmethod
def load(cls, cfg: str) -> type[Config]:
"""
Expand All @@ -104,7 +97,7 @@ def restore_defaults(cls) -> type[Config]:
Notes
-----
This method operates by removing all Config options from the environment,
and then setting any class-local flags back to their default value.
and then setting any local (non-env) options back to their default value.
Examples
--------
Expand All @@ -113,8 +106,6 @@ def restore_defaults(cls) -> type[Config]:
"""
for var in POLARS_CFG_ENV_VARS:
os.environ.pop(var, None)
for flag, value in POLARS_CFG_LOCAL_VARS.items():
setattr(cls, flag, value)
cls.set_fmt_float()
return cls

Expand All @@ -133,9 +124,10 @@ def save(cls) -> str:
for key in sorted(POLARS_CFG_ENV_VARS)
if (key in os.environ)
}
config_vars = {attr: getattr(cls, attr) for attr in POLARS_CFG_LOCAL_VARS}
# note: 'local' vars are unused; preserved here for
# backwards-compatibility with previously-saved configs
return json.dumps(
{"environment": environment_vars, "local": config_vars},
{"environment": environment_vars, "local": {}},
separators=(",", ":"),
)

Expand Down Expand Up @@ -189,6 +181,12 @@ def set_ascii_tables(cls, active: bool = True) -> type[Config]:
os.environ["POLARS_FMT_TABLE_FORMATTING"] = fmt
return cls

@classmethod
def set_auto_structify(cls, active: bool = False) -> type[Config]:
"""Allow multi-output expressions to be automatically turned into Structs."""
os.environ["POLARS_AUTO_STRUCTIFY"] = str(int(active))
return cls

@classmethod
def set_fmt_str_lengths(cls, n: int) -> type[Config]:
"""
Expand Down
24 changes: 16 additions & 8 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5630,11 +5630,16 @@ def select(
│ 10 │
└─────────┘
Note that, when using kwargs syntax, expressions with multiple
outputs are automatically instantiated as Struct columns:
Expressions with multiple outputs can be automatically instantiated as Structs
by enabling the experimental setting ``Config.set_auto_structify(True)``:
>>> from polars.datatypes import INTEGER_DTYPES
>>> df.select(is_odd=(pl.col(INTEGER_DTYPES) % 2).suffix("_is_odd"))
>>> with pl.Config() as cfg:
... cfg.set_auto_structify(True) # doctest: +IGNORE_RESULT
... df.select(
... is_odd=(pl.col(INTEGER_DTYPES) % 2).suffix("_is_odd"),
... )
...
shape: (3, 1)
┌───────────┐
│ is_odd │
Expand Down Expand Up @@ -5751,12 +5756,15 @@ def with_columns(
│ 4 ┆ 13.0 ┆ true ┆ 52.0 ┆ false │
└─────┴──────┴───────┴──────┴───────┘
Note that, when using kwargs syntax, expressions with multiple
outputs are automatically instantiated as Struct columns:
Expressions with multiple outputs can be automatically instantiated as Structs
by enabling the experimental setting ``Config.set_auto_structify(True)``:
>>> df.drop("c").with_columns(
... diffs=pl.col(["a", "b"]).diff().suffix("_diff"),
... )
>>> with pl.Config() as cfg:
... cfg.set_auto_structify(True) # doctest: +IGNORE_RESULT
... df.drop("c").with_columns(
... diffs=pl.col(["a", "b"]).diff().suffix("_diff"),
... )
...
shape: (4, 3)
┌─────┬──────┬─────────────┐
│ a ┆ b ┆ diffs │
Expand Down
32 changes: 25 additions & 7 deletions py-polars/polars/internals/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,25 @@ def selection_to_pyexpr_list(
| pli.WhenThen
| pli.WhenThenThen
]
| None
),
structify: bool = False,
) -> list[PyExpr]:
if isinstance(exprs, (str, Expr, pli.Series)):
if exprs is None:
exprs = []
elif isinstance(exprs, (str, Expr, pli.Series)):
exprs = [exprs]
return [
expr_to_lit_or_expr(e, str_to_lit=False, structify=structify)._pyexpr
for e in exprs
]


return [expr_to_lit_or_expr(e, str_to_lit=False)._pyexpr for e in exprs]
def expr_output_name(expr: pli.Expr) -> str | None:
try:
return expr.meta.output_name()
except Exception:
return None


def expr_to_lit_or_expr(
Expand All @@ -91,6 +104,7 @@ def expr_to_lit_or_expr(
),
str_to_lit: bool = True,
structify: bool = False,
name: str | None = None,
) -> Expr:
"""
Convert args to expressions.
Expand All @@ -100,11 +114,13 @@ def expr_to_lit_or_expr(
expr
Any argument.
str_to_lit
If True string argument `"foo"` will be converted to `lit("foo")`,
If False it will be converted to `col("foo")`
If True string argument `"foo"` will be converted to `lit("foo")`.
If False it will be converted to `col("foo")`.
structify
If the final unaliased expression has multiple output names,
automagically convert it to struct
automatically convert it to struct.
name
Apply the given name as an alias to the resulting expression.
Returns
-------
Expand Down Expand Up @@ -133,9 +149,11 @@ def expr_to_lit_or_expr(
if structify:
unaliased_expr = expr.meta.undo_aliases()
if unaliased_expr.meta.has_multiple_outputs():
expr = cast(Expr, pli.struct(expr))
expr_name = expr_output_name(expr)
expr = cast(Expr, pli.struct(expr if expr_name is None else unaliased_expr))
name = name or expr_name

return expr
return expr if name is None else expr.alias(name)


def wrap_expr(pyexpr: PyExpr) -> Expr:
Expand Down
84 changes: 47 additions & 37 deletions py-polars/polars/internals/lazyframe/frame.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import os
import subprocess
import sys
import typing
Expand Down Expand Up @@ -56,7 +57,7 @@
)

try:
from polars.polars import PyLazyFrame
from polars.polars import PyExpr, PyLazyFrame

_DOCUMENTING = False
except ImportError:
Expand Down Expand Up @@ -1575,14 +1576,14 @@ def select(
Examples
--------
>>> df = pl.DataFrame(
>>> ldf = pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6, 7, 8],
... "ham": ["a", "b", "c"],
... }
... ).lazy()
>>> df.select("foo").collect()
>>> ldf.select("foo").collect()
shape: (3, 1)
┌─────┐
│ foo │
Expand All @@ -1593,7 +1594,7 @@ def select(
│ 2 │
│ 3 │
└─────┘
>>> df.select(["foo", "bar"]).collect()
>>> ldf.select(["foo", "bar"]).collect()
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
Expand All @@ -1605,7 +1606,7 @@ def select(
│ 3 ┆ 8 │
└─────┴─────┘
>>> df.select(pl.col("foo") + 1).collect()
>>> ldf.select(pl.col("foo") + 1).collect()
shape: (3, 1)
┌─────┐
│ foo │
Expand All @@ -1617,7 +1618,7 @@ def select(
│ 4 │
└─────┘
>>> df.select([pl.col("foo") + 1, pl.col("bar") + 1]).collect()
>>> ldf.select([pl.col("foo") + 1, pl.col("bar") + 1]).collect()
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
Expand All @@ -1629,23 +1630,30 @@ def select(
│ 4 ┆ 9 │
└─────┴─────┘
>>> df.select(pl.when(pl.col("foo") > 2).then(10).otherwise(0)).collect()
>>> ldf.select(
... value=pl.when(pl.col("foo") > 2).then(10).otherwise(0),
... ).collect()
shape: (3, 1)
┌─────────
literal
│ ---
│ i32
╞═════════
│ 0
│ 0
│ 10
└─────────
Note that, when using kwargs syntax, expressions with multiple
outputs are automatically instantiated as Struct columns:
┌───────┐
value
│ --- │
│ i32 │
╞═══════╡
│ 0 │
│ 0 │
│ 10 │
└───────┘
Expressions with multiple outputs can be automatically instantiated as Structs
by enabling the experimental setting ``Config.set_auto_structify(True)``:
>>> from polars.datatypes import INTEGER_DTYPES
>>> df.select(is_odd=(pl.col(INTEGER_DTYPES) % 2).suffix("_is_odd")).collect()
>>> with pl.Config() as cfg:
... cfg.set_auto_structify(True) # doctest: +IGNORE_RESULT
... ldf.select(
... is_odd=(pl.col(INTEGER_DTYPES) % 2).suffix("_is_odd"),
... ).collect()
...
shape: (3, 1)
┌───────────┐
│ is_odd │
Expand All @@ -1663,9 +1671,12 @@ def select(
elif exprs is None:
exprs = []

exprs = pli.selection_to_pyexpr_list(exprs)
structify = bool(int(os.environ.get("POLARS_AUTO_STRUCTIFY", 0)))
exprs = pli.selection_to_pyexpr_list(exprs, structify=structify)
exprs.extend(
pli.expr_to_lit_or_expr(expr, structify=True)._pyexpr.alias(name)
pli.expr_to_lit_or_expr(
expr, structify=structify, name=name, str_to_lit=False
)._pyexpr
for name, expr in named_exprs.items()
)
return self._from_pyldf(self._ldf.select(exprs))
Expand Down Expand Up @@ -2547,12 +2558,15 @@ def with_columns(
│ 4 ┆ 13.0 ┆ true ┆ 52.0 ┆ false │
└─────┴──────┴───────┴──────┴───────┘
Note that, when using kwargs syntax, expressions with multiple
outputs are automatically instantiated as Struct columns:
Expressions with multiple outputs can be automatically instantiated as Structs
by enabling the experimental setting ``Config.set_auto_structify(True)``:
>>> ldf.drop("c").with_columns(
... diffs=pl.col(["a", "b"]).diff().suffix("_diff"),
... ).collect()
>>> with pl.Config() as cfg:
... cfg.set_auto_structify(True) # doctest: +IGNORE_RESULT
... ldf.drop("c").with_columns(
... diffs=pl.col(["a", "b"]).diff().suffix("_diff"),
... ).collect()
...
shape: (4, 3)
┌─────┬──────┬─────────────┐
│ a ┆ b ┆ diffs │
Expand All @@ -2568,22 +2582,18 @@ def with_columns(
"""
if exprs is None and not named_exprs:
raise ValueError("Expected at least one of 'exprs' or **named_exprs")
elif exprs is None:
exprs = []
elif isinstance(exprs, pli.Expr):
exprs = [exprs]
elif isinstance(exprs, pli.Series):
exprs = [pli.lit(exprs)]
else:
exprs = list(exprs)

structify = bool(int(os.environ.get("POLARS_AUTO_STRUCTIFY", 0)))
exprs = pli.selection_to_pyexpr_list(exprs, structify=structify)
exprs.extend(
pli.expr_to_lit_or_expr(expr, structify=True).alias(name)
pli.expr_to_lit_or_expr(expr, structify=structify, name=name)
for name, expr in named_exprs.items()
)
pyexprs = []
for e in exprs:
if isinstance(e, pli.Expr):
if isinstance(e, PyExpr):
pyexprs.append(e)
elif isinstance(e, pli.Expr):
pyexprs.append(e._pyexpr)
elif isinstance(e, pli.Series):
pyexprs.append(pli.lit(e)._pyexpr)
Expand Down
Loading

0 comments on commit 1ac67c3

Please sign in to comment.