Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(RFC): Adds agg, field utility classes #3505

Draft
wants to merge 28 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
fe7e77e
feat(RFC): Adds `agg`, `field` utility classes
dangotbanned Jul 26, 2024
c418066
test: add tests for `agg`, `field`
dangotbanned Jul 26, 2024
c92ede0
style(ruff): Format docstrings
dangotbanned Jul 26, 2024
662a137
fix(typing): Add missing `str` for `field.one_of`
dangotbanned Jul 26, 2024
35360b7
refactor(typing): Add `OneOfType` alias
dangotbanned Jul 26, 2024
7a4451a
feat: Support non-variadic input to `field.one_of`
dangotbanned Jul 26, 2024
951f2a1
test: Update/ add tests for `agg`, `field`
dangotbanned Jul 26, 2024
766d1bf
chore: Add `agg`, `field` to `__all__`
dangotbanned Jul 26, 2024
5929a5e
fix: Support already parsed shorthand in `FieldChannelMixin`
dangotbanned Jul 26, 2024
297f695
Merge branch 'main' into shorthand-namespace
dangotbanned Jul 29, 2024
208ac8b
wip
dangotbanned Jul 30, 2024
07dbcc2
fix: botched commit
dangotbanned Jul 31, 2024
bd4defe
Merge branch 'shorthand-namespace' of https://github.com/dangotbanned…
dangotbanned Jul 31, 2024
5da8485
Merge remote-tracking branch 'origin/main' into shorthand-namespace
dangotbanned Jul 31, 2024
83c3cfe
revert: Remove more of unfinished commit
dangotbanned Jul 31, 2024
23405ba
revert: Remove _ChannelValueNamespace
dangotbanned Aug 1, 2024
d4cf757
refactor(typing): Annotate `field_into` return types
dangotbanned Aug 1, 2024
e449796
chore: tidy up wip `field_into`
dangotbanned Aug 1, 2024
daeb895
Merge remote-tracking branch 'upstream/main' into shorthand-namespace
dangotbanned Aug 13, 2024
6570912
Merge branch 'main' into shorthand-namespace
dangotbanned Aug 16, 2024
4e02ac4
Merge remote-tracking branch 'upstream/HEAD' into shorthand-namespace
dangotbanned Aug 27, 2024
b472612
Merge branch 'main' into shorthand-namespace
dangotbanned Sep 1, 2024
f981130
Merge branch 'main' into shorthand-namespace
dangotbanned Oct 3, 2024
4c9851b
Merge remote-tracking branch 'upstream/main' into shorthand-namespace
dangotbanned Nov 4, 2024
a8e1ea1
refactor(ruff): Lint for `3.9`
dangotbanned Nov 4, 2024
f890feb
refactor: Factor out `SelectionPredicateComposition`
dangotbanned Nov 4, 2024
106d76f
refactor(typing): Update & rename aliases
dangotbanned Nov 4, 2024
0e806ce
feat: Permit `field.eq("field", None)`
dangotbanned Nov 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions altair/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,7 @@
"YOffsetDatum",
"YOffsetValue",
"YValue",
"agg",
"api",
"binding",
"binding_checkbox",
Expand All @@ -609,6 +610,7 @@
"default_data_transformer",
"display",
"expr",
"field",
"graticule",
"hconcat",
"is_chart_type",
Expand Down
1 change: 1 addition & 0 deletions altair/vegalite/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# ruff: noqa: F403
from .v5 import *
from .v5._api_rfc import agg as agg, field as field
372 changes: 372 additions & 0 deletions altair/vegalite/v5/_api_rfc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,372 @@
"""
Request for comment on additions to `api.py`.

Ideally these would be introduced *after* cleaning up the top-level namespace.

Actual runtime dependencies:
- altair.utils.core
- altair.utils.schemapi

The rest are to define aliases only.
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Dict, Literal, Mapping, Sequence, Union

from typing_extensions import TypeAlias

from altair.utils.core import TYPECODE_MAP as _TYPE_CODE
from altair.utils.core import parse_shorthand as _parse
from altair.utils.schemapi import Optional, SchemaBase, Undefined
from altair.vegalite.v5.api import Parameter, SelectionPredicateComposition
from altair.vegalite.v5.schema._typing import (
BinnedTimeUnit_T,
MultiTimeUnit_T,
SingleTimeUnit_T,
Type_T,
)
from altair.vegalite.v5.schema.core import (
FieldEqualPredicate,
FieldGTEPredicate,
FieldGTPredicate,
FieldLTEPredicate,
FieldLTPredicate,
FieldOneOfPredicate,
FieldRangePredicate,
FieldValidPredicate,
)

if TYPE_CHECKING:
from altair.utils.core import DataFrameLike
from altair.vegalite.v5.schema._typing import AggregateOp_T
from altair.vegalite.v5.schema.core import Predicate

__all__ = ["agg", "field"]

EncodeType: TypeAlias = Union[Type_T, Literal["O", "N", "Q", "T", "G"], None]
AnyTimeUnit: TypeAlias = Union[MultiTimeUnit_T, BinnedTimeUnit_T, SingleTimeUnit_T]
TimeUnitType: TypeAlias = Optional[Union[Dict[str, Any], SchemaBase, AnyTimeUnit]]
RangeType: TypeAlias = Union[
Dict[str, Any],
Parameter,
SchemaBase,
Sequence[Union[Dict[str, Any], None, float, Parameter, SchemaBase]],
]
ValueType: TypeAlias = Union[str, bool, float, Dict[str, Any], Parameter, SchemaBase]
OneOfType: TypeAlias = Union[str, bool, float, Dict[str, Any], SchemaBase]


_ENCODINGS = frozenset(
(
"ordinal",
"O",
"nominal",
"N",
"quantitative",
"Q",
"temporal",
"T",
"geojson",
"G",
None,
)
)


def _parse_aggregate(
aggregate: AggregateOp_T, name: str | None, encode_type: EncodeType, /
) -> dict[str, Any]:
if encode_type in _ENCODINGS:
enc = f":{_TYPE_CODE.get(s, s)}" if (s := encode_type) else ""
return _parse(f"{aggregate}({name or ''}){enc}")
else:
msg = (
f"Expected a short/long-form encoding type, but got {encode_type!r}.\n\n"
f"Try passing one of the following to `type`:\n"
f"{', '.join(sorted(f'{e!r}' for e in _ENCODINGS))}."
)
raise TypeError(msg)


def _wrap_composition(predicate: Predicate, /) -> SelectionPredicateComposition:
return SelectionPredicateComposition(predicate.to_dict())


def _one_of_flatten(
values: tuple[OneOfType, ...] | tuple[Sequence[OneOfType]] | tuple[Any, ...], /
) -> Sequence[OneOfType]:
if (
len(values) == 1
and not isinstance(values[0], (str, bool, float, int, Mapping, SchemaBase))
and isinstance(values[0], Sequence)
):
return values[0]
elif len(values) > 1:
return values
else:
msg = (
f"Expected `values` to be either a single `Sequence` "
f"or used variadically, but got: {values!r}."
)
raise TypeError(msg)


def _one_of_variance(val_1: Any, *rest: OneOfType) -> Sequence[Any]:
# Required that all elements are the same type
tp = type(val_1)
if all(isinstance(v, tp) for v in rest):
return (val_1, *rest)
else:
msg = (
f"Expected all `values` to be of the same type, but got:\n"
f"{tuple(f'{type(v).__name__}' for v in (val_1, *rest))!r}"
)
raise TypeError(msg)


class agg:
"""
Utility class providing autocomplete for shorthand.

Functional alternative to shorthand mini-language.
"""

def __new__( # type: ignore[misc]
cls, shorthand: dict[str, Any] | str, /, data: DataFrameLike | None = None
) -> dict[str, Any]:
return _parse(shorthand=shorthand, data=data)

@classmethod
def argmin(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("argmin", col_name, type)

@classmethod
def argmax(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("argmax", col_name, type)

@classmethod
def average(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("average", col_name, type)

@classmethod
def count(
cls, col_name: str | None = None, /, type: EncodeType = "Q"
) -> dict[str, Any]:
return _parse_aggregate("count", col_name, type)

@classmethod
def distinct(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("distinct", col_name, type)

@classmethod
def max(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("max", col_name, type)

@classmethod
def mean(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("mean", col_name, type)

@classmethod
def median(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("median", col_name, type)

@classmethod
def min(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("min", col_name, type)

@classmethod
def missing(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("missing", col_name, type)

@classmethod
def product(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("product", col_name, type)

@classmethod
def q1(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("q1", col_name, type)

@classmethod
def q3(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("q3", col_name, type)

@classmethod
def ci0(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("ci0", col_name, type)

@classmethod
def ci1(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("ci1", col_name, type)

@classmethod
def stderr(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("stderr", col_name, type)

@classmethod
def stdev(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("stdev", col_name, type)

@classmethod
def stdevp(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("stdevp", col_name, type)

@classmethod
def sum(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("sum", col_name, type)

@classmethod
def valid(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("valid", col_name, type)

@classmethod
def values(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("values", col_name, type)

@classmethod
def variance(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("variance", col_name, type)

@classmethod
def variancep(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("variancep", col_name, type)

@classmethod
def exponential(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("exponential", col_name, type)

@classmethod
def exponentialb(
cls, col_name: str | None = None, /, type: EncodeType = None
) -> dict[str, Any]:
return _parse_aggregate("exponentialb", col_name, type)


class field:
"""
Utility class for field predicates and shorthand parsing.

Examples
--------
>>> field("Origin")
{'field': 'Origin'}

>>> field("Origin:N")
{'field': 'Origin', 'type': 'nominal'}

>>> field.one_of("Origin", "Japan", "Europe")
SelectionPredicateComposition({'field': 'Origin', 'oneOf': ['Japan', 'Europe']})
"""

def __new__( # type: ignore[misc]
Copy link
Member Author

@dangotbanned dangotbanned Jul 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wanted to highlight this current design quirk.

import altair as alt

>>> alt.field("Origin:N")
{'field': 'Origin', 'type': 'nominal'}

>>> alt.agg("Origin:N")
{'field': 'Origin', 'type': 'nominal'}
  1. I'm anticipating users would expect alt.field() to work this way, despite shorthand being unrelated.
  2. I also think using alt.agg() without specifying an aggregation wouldn't seem an intuitive way to define a field.
  3. The other choices for what to return
    • Each wrapped Field...Predicate already has a constructor
    • the faux-parent Predicate class is just a Union
    • the unrelated Field class seems to be wrapping a str (not a {"field": "..."})

cls, shorthand: dict[str, Any] | str, /, data: DataFrameLike | None = None
) -> dict[str, Any]:
return _parse(shorthand=shorthand, data=data)

@classmethod
def one_of(
cls,
field: str,
/,
*values: OneOfType | Sequence[OneOfType],
timeUnit: TimeUnitType = Undefined,
) -> SelectionPredicateComposition:
seq = _one_of_flatten(values)
one_of = _one_of_variance(*seq)
p = FieldOneOfPredicate(field=field, oneOf=one_of, timeUnit=timeUnit)
return _wrap_composition(p)

@classmethod
def eq(
cls, field: str, value: ValueType, /, *, timeUnit: TimeUnitType = Undefined
) -> SelectionPredicateComposition:
p = FieldEqualPredicate(field=field, equal=value, timeUnit=timeUnit)
return _wrap_composition(p)

@classmethod
def lt(
cls, field: str, value: ValueType, /, *, timeUnit: TimeUnitType = Undefined
) -> SelectionPredicateComposition:
p = FieldLTPredicate(field=field, lt=value, timeUnit=timeUnit)
return _wrap_composition(p)

@classmethod
def lte(
cls, field: str, value: ValueType, /, *, timeUnit: TimeUnitType = Undefined
) -> SelectionPredicateComposition:
p = FieldLTEPredicate(field=field, lte=value, timeUnit=timeUnit)
return _wrap_composition(p)

@classmethod
def gt(
cls, field: str, value: ValueType, /, *, timeUnit: TimeUnitType = Undefined
) -> SelectionPredicateComposition:
p = FieldGTPredicate(field=field, gt=value, timeUnit=timeUnit)
return _wrap_composition(p)

@classmethod
def gte(
cls, field: str, value: ValueType, /, *, timeUnit: TimeUnitType = Undefined
) -> SelectionPredicateComposition:
p = FieldGTEPredicate(field=field, gte=value, timeUnit=timeUnit)
return _wrap_composition(p)

@classmethod
def valid(
cls, field: str, value: bool, /, *, timeUnit: TimeUnitType = Undefined
) -> SelectionPredicateComposition:
p = FieldValidPredicate(field=field, valid=value, timeUnit=timeUnit)
return _wrap_composition(p)

@classmethod
def range(
cls, field: str, value: RangeType, /, *, timeUnit: TimeUnitType = Undefined
) -> SelectionPredicateComposition:
p = FieldRangePredicate(field=field, range=value, timeUnit=timeUnit)
return _wrap_composition(p)
Loading