From 546b4e0aacb3af4efd4bf4d2b390d28f89c19101 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 12:34:39 +0100 Subject: [PATCH 01/31] refactor: Remove `toolz.curried.pipe` dependency in `vegalite.v5.api` - Made `SupportsGeoInterface` usable for runtime checking - Simplifies type narrowing logic and aligns with `DataFrameLike` - Adds `is_data_type` - Used to reduce code duplication, specifically where `pipe` was previously referenced. - Avoids the `None` case introduced by `DataTransformerType | None`. --- altair/utils/data.py | 27 +++++++++++++++++++++++---- altair/vegalite/v5/api.py | 12 ++++-------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index 871b43092..f3bf12b67 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -3,7 +3,22 @@ import random import hashlib import warnings -from typing import Union, MutableMapping, Optional, Dict, Sequence, TYPE_CHECKING, List +from typing import ( + Union, + MutableMapping, + Optional, + Dict, + Sequence, + TYPE_CHECKING, + List, + TypeVar, + Protocol, + TypedDict, + Literal, + overload, + runtime_checkable, + Any, +) import pandas as pd from toolz import curried @@ -15,14 +30,16 @@ from .deprecation import AltairDeprecationWarning from .plugin_registry import PluginRegistry - -from typing import Protocol, TypedDict, Literal - +if sys.version_info >= (3, 13): + from typing import TypeIs +else: + from typing_extensions import TypeIs if TYPE_CHECKING: import pyarrow.lib +@runtime_checkable class SupportsGeoInterface(Protocol): __geo_interface__: MutableMapping @@ -32,6 +49,8 @@ class SupportsGeoInterface(Protocol): VegaLiteDataDict = Dict[str, Union[str, dict, List[dict]]] ToValuesReturnType = Dict[str, Union[dict, List[dict]]] +def is_data_type(obj: Any) -> TypeIs[DataType]: + return isinstance(obj, (dict, pd.DataFrame, DataFrameLike, SupportsGeoInterface)) # ============================================================================== diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py index 37a13c2ae..6425c77fe 100644 --- a/altair/vegalite/v5/api.py +++ b/altair/vegalite/v5/api.py @@ -4,8 +4,6 @@ import io import json import jsonschema -import pandas as pd -from toolz.curried import pipe as _pipe import itertools import sys import pathlib @@ -29,7 +27,7 @@ compile_with_vegafusion as _compile_with_vegafusion, ) from ...utils.core import DataFrameLike -from ...utils.data import DataType +from ...utils.data import DataType, is_data_type as _is_data_type from ...utils.deprecation import AltairDeprecationWarning if sys.version_info >= (3, 13): @@ -114,16 +112,14 @@ def _prepare_data(data, context=None): return data # convert dataframes or objects with __geo_interface__ to dict - elif isinstance(data, pd.DataFrame) or hasattr(data, "__geo_interface__"): - data = _pipe(data, data_transformers.get()) + elif not isinstance(data, dict) and _is_data_type(data): + if func := data_transformers.get(): + data = func(data) # convert string input to a URLData elif isinstance(data, str): data = core.UrlData(data) - elif isinstance(data, DataFrameLike): - data = _pipe(data, data_transformers.get()) - # consolidate inline data to top-level datasets if context is not None and data_transformers.consolidate_datasets: data = _consolidate_data(data, context) From 3caf7fff05c55323103e69a1d1542289b0817c88 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 12:51:32 +0100 Subject: [PATCH 02/31] refactor: Remove unneeded `@curried.curry` from `to_values` Currying a single argument function is equivalent to referencing the function itself. This only would've added performance overhead. --- altair/utils/data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index f3bf12b67..73954b432 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -230,7 +230,6 @@ def to_csv( return {"url": os.path.join(urlpath, filename), "format": {"type": "csv"}} -@curried.curry def to_values(data: DataType) -> ToValuesReturnType: """Replace a DataFrame by a data model with values.""" check_data_type(data) From d36a2c4df8b7b8c1831a9e7ceef4871f7878b386 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 12:58:04 +0100 Subject: [PATCH 03/31] refactor: Remove `toolz.curried.pipe` dependency in `_magics` NOTE: Unsure why this differs from `vegalite.v5.api._prepare_data`. Have only removed the dependency. --- altair/_magics.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/altair/_magics.py b/altair/_magics.py index bac190aa3..246ea2a1f 100644 --- a/altair/_magics.py +++ b/altair/_magics.py @@ -10,7 +10,6 @@ import IPython from IPython.core import magic_arguments import pandas as pd -from toolz import curried from altair.vegalite import v5 as vegalite_v5 @@ -41,7 +40,9 @@ def _prepare_data(data, data_transformers): if data is None or isinstance(data, dict): return data elif isinstance(data, pd.DataFrame): - return curried.pipe(data, data_transformers.get()) + if func := data_transformers.get(): + data = func(data) + return data elif isinstance(data, str): return {"url": data} else: From d6dce190b3bc4020c219b1f60018443a51d429ad Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 13:28:56 +0100 Subject: [PATCH 04/31] refactor: Replaced attribute checks with instance checks for `SupportsGeoInterface` Now possible due to `@runtime_checkable` --- altair/utils/data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index 73954b432..186fd730a 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -233,7 +233,7 @@ def to_csv( def to_values(data: DataType) -> ToValuesReturnType: """Replace a DataFrame by a data model with values.""" check_data_type(data) - if hasattr(data, "__geo_interface__"): + if isinstance(data, SupportsGeoInterface): if isinstance(data, pd.DataFrame): data = sanitize_dataframe(data) # Maybe the type could be further clarified here that it is @@ -276,7 +276,7 @@ def _compute_data_hash(data_str: str) -> str: def _data_to_json_string(data: DataType) -> str: """Return a JSON string representation of the input data""" check_data_type(data) - if hasattr(data, "__geo_interface__"): + if isinstance(data, SupportsGeoInterface): if isinstance(data, pd.DataFrame): data = sanitize_dataframe(data) # Maybe the type could be further clarified here that it is @@ -302,7 +302,7 @@ def _data_to_json_string(data: DataType) -> str: def _data_to_csv_string(data: Union[dict, pd.DataFrame, DataFrameLike]) -> str: """return a CSV string representation of the input data""" check_data_type(data) - if hasattr(data, "__geo_interface__"): + if isinstance(data, SupportsGeoInterface): raise NotImplementedError( "to_csv does not work with data that " "contains the __geo_interface__ attribute" From aae983a3b45d4293a30e47a69118a43c490a9dbb Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 13:36:13 +0100 Subject: [PATCH 05/31] refactor: Simplified `check_data_type` Added missing `sys` import from earlier commit --- altair/utils/data.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index 186fd730a..37d7d6792 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -3,6 +3,7 @@ import random import hashlib import warnings +import sys from typing import ( Union, MutableMapping, @@ -256,9 +257,7 @@ def to_values(data: DataType) -> ToValuesReturnType: def check_data_type(data: DataType) -> None: - if not isinstance(data, (dict, pd.DataFrame, DataFrameLike)) and not any( - hasattr(data, attr) for attr in ["__geo_interface__"] - ): + if not is_data_type(data): raise TypeError( "Expected dict, DataFrame or a __geo_interface__ attribute, got: {}".format( type(data) From b9dc070c30a23f09adac2d3686114ac3fc9c5cc9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 13:48:34 +0100 Subject: [PATCH 06/31] refactor: Remove `@curried.curry` dependency from `limit_rows` This pattern is what I've used in most cases. It works and type checks correctly, but could be improved as a decorator. Would have the benefit of preserving the return type, if one were to inspect the intermediate function. Also replacing the `TypeVar` with a `Union` seems to have satisfied `mypy`. --- altair/utils/data.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index 37d7d6792..3d72a9a66 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -1,3 +1,4 @@ +from functools import partial import json import os import random @@ -89,12 +90,19 @@ class MaxRowsError(Exception): pass -@curried.curry -def limit_rows(data: TDataType, max_rows: Optional[int] = 5000) -> TDataType: +@overload +def limit_rows(data: Literal[None] = ..., max_rows: Optional[int] = ...) -> partial: ... +@overload +def limit_rows(data: DataType, max_rows: Optional[int]) -> DataType: ... +def limit_rows( + data: Optional[DataType] = None, max_rows: Optional[int] = 5000 +) -> Union[partial, DataType]: """Raise MaxRowsError if the data model has more than max_rows. If max_rows is None, then do not perform any check. """ + if data is None: + return partial(limit_rows, max_rows=max_rows) check_data_type(data) def raise_max_rows_error(): @@ -111,7 +119,7 @@ def raise_max_rows_error(): "on how to plot large datasets." ) - if hasattr(data, "__geo_interface__"): + if isinstance(data, SupportsGeoInterface): if data.__geo_interface__["type"] == "FeatureCollection": values = data.__geo_interface__["features"] else: @@ -122,9 +130,7 @@ def raise_max_rows_error(): if "values" in data: values = data["values"] else: - # mypy gets confused as it doesn't see Dict[Any, Any] - # as equivalent to TDataType - return data # type: ignore[return-value] + return data elif isinstance(data, DataFrameLike): pa_table = arrow_table_from_dfi_dataframe(data) if max_rows is not None and pa_table.num_rows > max_rows: From c33433f1a8895cb81cc470e20426bdc60f38d1fb Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 13:53:52 +0100 Subject: [PATCH 07/31] refactor: Remove `@curried.curry` dependency from `sample` Same pattern as `limit_rows`. Also aliased the return type, aligning with the other transformers. --- altair/utils/data.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index 3d72a9a66..ffcd161af 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -51,6 +51,11 @@ class SupportsGeoInterface(Protocol): VegaLiteDataDict = Dict[str, Union[str, dict, List[dict]]] ToValuesReturnType = Dict[str, Union[dict, List[dict]]] +SampleReturnType = Optional[ + Union[pd.DataFrame, Dict[str, Sequence], "pyarrow.lib.Table"] +] + + def is_data_type(obj: Any) -> TypeIs[DataType]: return isinstance(obj, (dict, pd.DataFrame, DataFrameLike, SupportsGeoInterface)) @@ -145,11 +150,22 @@ def raise_max_rows_error(): return data -@curried.curry +@overload def sample( - data: DataType, n: Optional[int] = None, frac: Optional[float] = None -) -> Optional[Union[pd.DataFrame, Dict[str, Sequence], "pyarrow.lib.Table"]]: + data: Literal[None] = ..., n: Optional[int] = ..., frac: Optional[float] = ... +) -> partial: ... +@overload +def sample( + data: DataType, n: Optional[int], frac: Optional[float] +) -> SampleReturnType: ... +def sample( + data: Optional[DataType] = None, + n: Optional[int] = None, + frac: Optional[float] = None, +) -> Union[partial, SampleReturnType]: """Reduce the size of the data model by sampling without replacement.""" + if data is None: + return partial(sample, n=n, frac=frac) check_data_type(data) if isinstance(data, pd.DataFrame): return data.sample(n=n, frac=frac) From 8a9d59389df6d1a270f389346cbe2dbabf2e67ad Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 14:39:08 +0100 Subject: [PATCH 08/31] refactor: Remove `@curried.curry` dependency from `to_json`, `to_csv` - Not super happy with the added length, but was difficult to avoid given the number of arguments. - Moved all the generic parts to new function `_to_text`. - Replaced `os.path` with `pathlib.Path`. Can be identified (but not autofixed) with [PTH](https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth). - Although this might be better handled by [urllib](https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin) --- altair/utils/data.py | 104 ++++++++++++++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 26 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index ffcd161af..69d7dda74 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -1,10 +1,10 @@ from functools import partial import json -import os import random import hashlib import warnings import sys +from pathlib import Path from typing import ( Union, MutableMapping, @@ -48,6 +48,7 @@ class SupportsGeoInterface(Protocol): DataType = Union[dict, pd.DataFrame, SupportsGeoInterface, DataFrameLike] TDataType = TypeVar("TDataType", bound=DataType) +NonGeoDataType = Union[dict, pd.DataFrame, DataFrameLike] VegaLiteDataDict = Dict[str, Union[str, dict, List[dict]]] ToValuesReturnType = Dict[str, Union[dict, List[dict]]] @@ -199,58 +200,109 @@ def sample( return None -class _JsonFormatDict(TypedDict): - type: Literal["json"] +_FormatType = Literal["csv", "json"] -class _CsvFormatDict(TypedDict): - type: Literal["csv"] +class _FormatDict(TypedDict): + type: _FormatType -class _ToJsonReturnUrlDict(TypedDict): +class _ToFormatReturnUrlDict(TypedDict): url: str - format: _JsonFormatDict + format: _FormatDict -class _ToCsvReturnUrlDict(TypedDict): - url: str - format: _CsvFormatDict +@overload +def to_json( + data: Literal[None] = ..., + prefix: str = ..., + extension: str = ..., + filename: str = ..., + urlpath: str = ..., +) -> partial: ... -@curried.curry +@overload def to_json( data: DataType, + prefix: str, + extension: str, + filename: str, + urlpath: str, +) -> _ToFormatReturnUrlDict: ... + + +def to_json( + data: Optional[DataType] = None, prefix: str = "altair-data", extension: str = "json", filename: str = "{prefix}-{hash}.{extension}", urlpath: str = "", -) -> _ToJsonReturnUrlDict: +) -> Union[partial, _ToFormatReturnUrlDict]: """ Write the data model to a .json file and return a url based data model. """ - data_json = _data_to_json_string(data) - data_hash = _compute_data_hash(data_json) - filename = filename.format(prefix=prefix, hash=data_hash, extension=extension) - with open(filename, "w") as f: - f.write(data_json) - return {"url": os.path.join(urlpath, filename), "format": {"type": "json"}} + kwds = _to_text_kwds(prefix, extension, filename, urlpath) + if data is None: + return partial(to_json, **kwds) + else: + data_str = _data_to_json_string(data) + return _to_text(data_str, **kwds, format=_FormatDict(type="json")) -@curried.curry +@overload def to_csv( - data: Union[dict, pd.DataFrame, DataFrameLike], + data: Literal[None] = ..., + prefix: str = ..., + extension: str = ..., + filename: str = ..., + urlpath: str = ..., +) -> partial: ... + + +@overload +def to_csv( + data: NonGeoDataType, + prefix: str, + extension: str, + filename: str, + urlpath: str, +) -> _ToFormatReturnUrlDict: ... + + +def to_csv( + data: Optional[NonGeoDataType] = None, prefix: str = "altair-data", extension: str = "csv", filename: str = "{prefix}-{hash}.{extension}", urlpath: str = "", -) -> _ToCsvReturnUrlDict: +) -> Union[partial, _ToFormatReturnUrlDict]: """Write the data model to a .csv file and return a url based data model.""" - data_csv = _data_to_csv_string(data) - data_hash = _compute_data_hash(data_csv) + kwds = _to_text_kwds(prefix, extension, filename, urlpath) + if data is None: + return partial(to_csv, **kwds) + else: + data_str = _data_to_csv_string(data) + return _to_text(data_str, **kwds, format=_FormatDict(type="csv")) + + +def _to_text( + data: str, + prefix: str, + extension: str, + filename: str, + urlpath: str, + format: _FormatDict, +) -> _ToFormatReturnUrlDict: + data_hash = _compute_data_hash(data) filename = filename.format(prefix=prefix, hash=data_hash, extension=extension) - with open(filename, "w") as f: - f.write(data_csv) - return {"url": os.path.join(urlpath, filename), "format": {"type": "csv"}} + Path(filename).write_text(data) + url = str(Path(urlpath, filename)) + return _ToFormatReturnUrlDict({"url": url, "format": format}) + + +def _to_text_kwds(prefix: str, extension: str, filename: str, urlpath: str, /) -> Dict[str, str]: # fmt: skip + return {"prefix": prefix, "extension": extension, "filename": filename, "urlpath": urlpath} # fmt: skip def to_values(data: DataType) -> ToValuesReturnType: From bf99d725d0c607e9ee668928baf85ee96a9a285e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 14:45:35 +0100 Subject: [PATCH 09/31] refactor: Remove top-level `toolz` dependency from `utils.data` The nested imports are purely for a deprecation warning introduced in [4.10.0](https://github.com/vega/altair/commit/af4148d99fe73f9e5c40df2a237d7bdd489987a9). 4 years on and over a major version later, would these be due for removal? --- altair/utils/data.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index 69d7dda74..f129edafe 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -23,8 +23,6 @@ ) import pandas as pd -from toolz import curried -from typing import TypeVar from ._importers import import_pyarrow_interchange from .core import sanitize_dataframe, sanitize_arrow_table, DataFrameLike @@ -414,7 +412,9 @@ def pipe(data, *funcs): AltairDeprecationWarning, stacklevel=1, ) - return curried.pipe(data, *funcs) + from toolz.curried import pipe + + return pipe(data, *funcs) def curry(*args, **kwargs): @@ -428,7 +428,9 @@ def curry(*args, **kwargs): AltairDeprecationWarning, stacklevel=1, ) - return curried.curry(*args, **kwargs) + from toolz.curried import curry + + return curry(*args, **kwargs) def arrow_table_from_dfi_dataframe(dfi_df: DataFrameLike) -> "pyarrow.lib.Table": From 26e77c43c2f259896eda4d88a47aad95137591b2 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 15:26:31 +0100 Subject: [PATCH 10/31] refactor: Remove `toolz` dependency in `vegalite.data` - Both `@curried.curry` and `curried.pipe` no longer needed - Originally rewrote a generic curried pipe, but given that the two functions are explicitly named here - it seemed excessive. --- altair/vegalite/data.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/altair/vegalite/data.py b/altair/vegalite/data.py index fbeda0fee..ed94b223a 100644 --- a/altair/vegalite/data.py +++ b/altair/vegalite/data.py @@ -1,4 +1,3 @@ -from toolz import curried from ..utils.core import sanitize_dataframe from ..utils.data import ( MaxRowsError, @@ -14,13 +13,31 @@ from ..utils.data import DataTransformerRegistry as _DataTransformerRegistry from ..utils.data import DataType, ToValuesReturnType from ..utils.plugin_registry import PluginEnabler +from typing import Optional, Literal, Union, overload +from collections.abc import Callable -@curried.curry +@overload def default_data_transformer( - data: DataType, max_rows: int = 5000 -) -> ToValuesReturnType: - return curried.pipe(data, limit_rows(max_rows=max_rows), to_values) + data: Literal[None] = ..., max_rows: int = ... +) -> Callable[[DataType], ToValuesReturnType]: ... +@overload +def default_data_transformer( + data: DataType, max_rows: int = ... +) -> ToValuesReturnType: ... +def default_data_transformer( + data: Optional[DataType] = None, max_rows: int = 5000 +) -> Union[Callable[[DataType], ToValuesReturnType], ToValuesReturnType]: + if data is None: + + def pipe(data: DataType, /) -> ToValuesReturnType: + data = limit_rows(data, max_rows=max_rows) + return to_values(data) + + return pipe + + else: + return to_values(limit_rows(data, max_rows=max_rows)) class DataTransformerRegistry(_DataTransformerRegistry): From 3800fc52bfb66e269fe72de8e211f4d9f31c77ff Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 15:37:50 +0100 Subject: [PATCH 11/31] refactor: Remove `@curried.curry` dependency in `utils._vegafusion_data` - Required a new alias `NonLikeDataType` to resolve the overloads. - Merged the two default branches - `max_rows` should probably be removed since it is never used --- altair/utils/_vegafusion_data.py | 49 +++++++++++++++++++++++++------- altair/utils/data.py | 1 + 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/altair/utils/_vegafusion_data.py b/altair/utils/_vegafusion_data.py index ce30e8d6d..7d108450d 100644 --- a/altair/utils/_vegafusion_data.py +++ b/altair/utils/_vegafusion_data.py @@ -1,8 +1,10 @@ -from toolz import curried import uuid from weakref import WeakValueDictionary from typing import ( + Any, + Literal, + Optional, Union, Dict, Set, @@ -10,11 +12,18 @@ TypedDict, Final, TYPE_CHECKING, + overload, ) +from collections.abc import Callable from altair.utils._importers import import_vegafusion from altair.utils.core import DataFrameLike -from altair.utils.data import DataType, ToValuesReturnType, MaxRowsError +from altair.utils.data import ( + DataType, + ToValuesReturnType, + MaxRowsError, + NonLikeDataType, +) from altair.vegalite.data import default_data_transformer if TYPE_CHECKING: @@ -36,21 +45,41 @@ class _ToVegaFusionReturnUrlDict(TypedDict): url: str -@curried.curry +_VegaFusionReturnType = Union[_ToVegaFusionReturnUrlDict, ToValuesReturnType] + + +@overload +def vegafusion_data_transformer( + data: Literal[None] = ..., max_rows: int = ... +) -> Callable[..., Any]: ... + + +@overload +def vegafusion_data_transformer( + data: DataFrameLike, max_rows: int +) -> ToValuesReturnType: ... + + +@overload def vegafusion_data_transformer( - data: DataType, max_rows: int = 100000 -) -> Union[_ToVegaFusionReturnUrlDict, ToValuesReturnType]: + data: NonLikeDataType, max_rows: int +) -> _VegaFusionReturnType: ... + + +def vegafusion_data_transformer( + data: Optional[DataType] = None, max_rows: int = 100000 +) -> Union[Callable[..., Any], _VegaFusionReturnType]: """VegaFusion Data Transformer""" - if hasattr(data, "__geo_interface__"): - # Use default transformer for geo interface objects - # # (e.g. a geopandas GeoDataFrame) - return default_data_transformer(data) + if data is None: + return vegafusion_data_transformer elif isinstance(data, DataFrameLike): table_name = f"table_{uuid.uuid4()}".replace("-", "_") extracted_inline_tables[table_name] = data return {"url": VEGAFUSION_PREFIX + table_name} else: - # Use default transformer if we don't recognize data type + # Use default transformer for geo interface objects + # # (e.g. a geopandas GeoDataFrame) + # Or if we don't recognize data type return default_data_transformer(data) diff --git a/altair/utils/data.py b/altair/utils/data.py index f129edafe..3d50a0062 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -47,6 +47,7 @@ class SupportsGeoInterface(Protocol): DataType = Union[dict, pd.DataFrame, SupportsGeoInterface, DataFrameLike] TDataType = TypeVar("TDataType", bound=DataType) NonGeoDataType = Union[dict, pd.DataFrame, DataFrameLike] +NonLikeDataType = Union[dict, pd.DataFrame, SupportsGeoInterface] VegaLiteDataDict = Dict[str, Union[str, dict, List[dict]]] ToValuesReturnType = Dict[str, Union[dict, List[dict]]] From b706a94bf822fcd03b170a11dc299c8218de9a39 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 15:47:28 +0100 Subject: [PATCH 12/31] fix: Update `DataTransformerType` to describe currying The original describes a function before currying, but not the defered variant. --- altair/utils/data.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index 3d50a0062..cc95edf38 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -72,8 +72,15 @@ def is_data_type(obj: Any) -> TypeIs[DataType]: # form. # ============================================================================== class DataTransformerType(Protocol): - def __call__(self, data: DataType, **kwargs) -> VegaLiteDataDict: - pass + @overload + def __call__( + self, data: Literal[None] = None, **kwargs + ) -> "DataTransformerType": ... + @overload + def __call__(self, data: DataType, **kwargs) -> VegaLiteDataDict: ... + def __call__( + self, data: Optional[DataType] = None, **kwargs + ) -> Union["DataTransformerType", VegaLiteDataDict]: ... class DataTransformerRegistry(PluginRegistry[DataTransformerType]): From cf54ec278548ae3c38a3bc36917982d315f1c3d0 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 16:02:37 +0100 Subject: [PATCH 13/31] refactor: Remove `toolz.curry` dependency in `utils.plugin_registry` Switching from `curry` -> `functools.partial` is trivial. Everything else I'm not too sure on. Had to battle `mypy` to arrive at this and not really happy with the result. The change to `plugin_type`'s default should mean the assert in `register` is somewhat more useful. --- altair/utils/plugin_registry.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/altair/utils/plugin_registry.py b/altair/utils/plugin_registry.py index f6281ed14..44217a09a 100644 --- a/altair/utils/plugin_registry.py +++ b/altair/utils/plugin_registry.py @@ -1,10 +1,9 @@ -from typing import Any, Dict, List, Optional, Generic, TypeVar, cast +from functools import partial +from typing import Any, Dict, List, Optional, Generic, TypeVar, Union, cast from types import TracebackType - +from collections.abc import Callable from importlib.metadata import entry_points -from toolz import curry - PluginType = TypeVar("PluginType") @@ -71,7 +70,7 @@ class PluginRegistry(Generic[PluginType]): # in the registry rather than passed to the plugins _global_settings = {} # type: Dict[str, Any] - def __init__(self, entry_point_group: str = "", plugin_type: type = object): + def __init__(self, entry_point_group: str = "", plugin_type: type[Any] = Callable): """Create a PluginRegistry for a named entry point group. Parameters @@ -90,7 +89,9 @@ def __init__(self, entry_point_group: str = "", plugin_type: type = object): self._options = {} # type: Dict[str, Any] self._global_settings = self.__class__._global_settings.copy() # type: dict - def register(self, name: str, value: Optional[PluginType]) -> Optional[PluginType]: + def register( + self, name: str, value: Union[Optional[PluginType], Any] + ) -> Optional[PluginType]: """Register a plugin by name and value. This method is used for explicit registration of a plugin and shouldn't be @@ -199,10 +200,15 @@ def options(self) -> Dict[str, Any]: """Return the current options dictionary""" return self._options - def get(self) -> Optional[PluginType]: + def get(self) -> Optional[Union[PluginType, Callable[..., Any]]]: """Return the currently active plugin.""" if self._options: - return curry(self._active, **self._options) + if func := self._active: + # NOTE: Fully do not understand this one + # error: Argument 1 to "partial" has incompatible type "PluginType"; expected "Callable[..., Never]" + return partial(func, **self._options) # type: ignore[arg-type] + else: + raise TypeError("Unclear what this meant by passing to curry.") else: return self._active From 78d1a7c9484bab495753d5227047982795ca016f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 18:51:08 +0100 Subject: [PATCH 14/31] fix: Ignore special form not a type This is valid for the `assert`. An alternative would be using `callable` from builtins, or reusing `PluginType`'s bounds/constraints. --- altair/utils/plugin_registry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/altair/utils/plugin_registry.py b/altair/utils/plugin_registry.py index 44217a09a..9f4d793fc 100644 --- a/altair/utils/plugin_registry.py +++ b/altair/utils/plugin_registry.py @@ -6,6 +6,7 @@ PluginType = TypeVar("PluginType") +T = TypeVar("T", bound=Callable) class NoSuchEntryPoint(Exception): @@ -70,7 +71,7 @@ class PluginRegistry(Generic[PluginType]): # in the registry rather than passed to the plugins _global_settings = {} # type: Dict[str, Any] - def __init__(self, entry_point_group: str = "", plugin_type: type[Any] = Callable): + def __init__(self, entry_point_group: str = "", plugin_type: type[T] = Callable): # type: ignore[assignment] """Create a PluginRegistry for a named entry point group. Parameters From 8cd40e2df811d35ced8edccf3b06c9a957977878 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 18:54:59 +0100 Subject: [PATCH 15/31] fix: drop `type` subscript, not available in earlier versions --- altair/utils/plugin_registry.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/altair/utils/plugin_registry.py b/altair/utils/plugin_registry.py index 9f4d793fc..7f6bf594a 100644 --- a/altair/utils/plugin_registry.py +++ b/altair/utils/plugin_registry.py @@ -6,7 +6,6 @@ PluginType = TypeVar("PluginType") -T = TypeVar("T", bound=Callable) class NoSuchEntryPoint(Exception): @@ -71,7 +70,7 @@ class PluginRegistry(Generic[PluginType]): # in the registry rather than passed to the plugins _global_settings = {} # type: Dict[str, Any] - def __init__(self, entry_point_group: str = "", plugin_type: type[T] = Callable): # type: ignore[assignment] + def __init__(self, entry_point_group: str = "", plugin_type: type = Callable): # type: ignore[assignment] """Create a PluginRegistry for a named entry point group. Parameters From 8bfaf418de8a22bbfe97db14f81e44550176b848 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Sun, 26 May 2024 19:25:59 +0100 Subject: [PATCH 16/31] fix: Use 3.8 compatible `typing.Callable` over `collections.abc` --- altair/utils/_vegafusion_data.py | 3 +-- altair/utils/plugin_registry.py | 3 +-- altair/vegalite/data.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/altair/utils/_vegafusion_data.py b/altair/utils/_vegafusion_data.py index 7d108450d..770f796a2 100644 --- a/altair/utils/_vegafusion_data.py +++ b/altair/utils/_vegafusion_data.py @@ -1,6 +1,5 @@ import uuid from weakref import WeakValueDictionary - from typing import ( Any, Literal, @@ -13,8 +12,8 @@ Final, TYPE_CHECKING, overload, + Callable, ) -from collections.abc import Callable from altair.utils._importers import import_vegafusion from altair.utils.core import DataFrameLike diff --git a/altair/utils/plugin_registry.py b/altair/utils/plugin_registry.py index 7f6bf594a..b1cce92ec 100644 --- a/altair/utils/plugin_registry.py +++ b/altair/utils/plugin_registry.py @@ -1,7 +1,6 @@ from functools import partial -from typing import Any, Dict, List, Optional, Generic, TypeVar, Union, cast +from typing import Any, Dict, List, Optional, Generic, TypeVar, Union, cast, Callable from types import TracebackType -from collections.abc import Callable from importlib.metadata import entry_points diff --git a/altair/vegalite/data.py b/altair/vegalite/data.py index ed94b223a..8e8c5be0e 100644 --- a/altair/vegalite/data.py +++ b/altair/vegalite/data.py @@ -13,8 +13,7 @@ from ..utils.data import DataTransformerRegistry as _DataTransformerRegistry from ..utils.data import DataType, ToValuesReturnType from ..utils.plugin_registry import PluginEnabler -from typing import Optional, Literal, Union, overload -from collections.abc import Callable +from typing import Optional, Literal, Union, overload, Callable @overload From f34eb6f46a45764ea621c48a0aa63c08bda37547 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Jun 2024 18:52:05 +0100 Subject: [PATCH 17/31] test: Remove `toolz.pipe` dependency in `test_data` --- tests/utils/test_data.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py index 0d746e79d..d74c9afad 100644 --- a/tests/utils/test_data.py +++ b/tests/utils/test_data.py @@ -1,8 +1,7 @@ import os - +from typing import Any, Callable import pytest import pandas as pd -from toolz import pipe from altair.utils.data import ( limit_rows, @@ -14,6 +13,14 @@ ) +def _pipe(data: Any, *funcs: Callable[..., Any]) -> Any: + # Redefined to maintain existing tests + # Originally part of `toolz` dependency + for func in funcs: + data = func(data) + return data + + def _create_dataframe(N): data = pd.DataFrame({"x": range(N), "y": range(N)}) return data @@ -30,9 +37,9 @@ def test_limit_rows(): result = limit_rows(data, max_rows=20) assert data is result with pytest.raises(MaxRowsError): - pipe(data, limit_rows(max_rows=5)) + _pipe(data, limit_rows(max_rows=5)) data = _create_data_with_values(10) - result = pipe(data, limit_rows(max_rows=20)) + result = _pipe(data, limit_rows(max_rows=20)) assert data is result with pytest.raises(MaxRowsError): limit_rows(data, max_rows=5) @@ -41,7 +48,7 @@ def test_limit_rows(): def test_sample(): """Test the sample data transformer.""" data = _create_dataframe(20) - result = pipe(data, sample(n=10)) + result = _pipe(data, sample(n=10)) assert len(result) == 10 assert isinstance(result, pd.DataFrame) data = _create_data_with_values(20) @@ -50,7 +57,7 @@ def test_sample(): assert "values" in result assert len(result["values"]) == 10 data = _create_dataframe(20) - result = pipe(data, sample(frac=0.5)) + result = _pipe(data, sample(frac=0.5)) assert len(result) == 10 assert isinstance(result, pd.DataFrame) data = _create_data_with_values(20) @@ -63,7 +70,7 @@ def test_sample(): def test_to_values(): """Test the to_values data transformer.""" data = _create_dataframe(10) - result = pipe(data, to_values) + result = _pipe(data, to_values) assert result == {"values": data.to_dict(orient="records")} @@ -71,7 +78,7 @@ def test_type_error(): """Ensure that TypeError is raised for types other than dict/DataFrame.""" for f in (sample, limit_rows, to_values): with pytest.raises(TypeError): - pipe(0, f) + _pipe(0, f) def test_dataframe_to_json(): @@ -81,8 +88,8 @@ def test_dataframe_to_json(): """ data = _create_dataframe(10) try: - result1 = pipe(data, to_json) - result2 = pipe(data, to_json) + result1 = _pipe(data, to_json) + result2 = _pipe(data, to_json) filename = result1["url"] output = pd.read_json(filename) finally: @@ -99,8 +106,8 @@ def test_dict_to_json(): """ data = _create_data_with_values(10) try: - result1 = pipe(data, to_json) - result2 = pipe(data, to_json) + result1 = _pipe(data, to_json) + result2 = _pipe(data, to_json) filename = result1["url"] output = pd.read_json(filename).to_dict(orient="records") finally: @@ -117,8 +124,8 @@ def test_dataframe_to_csv(): """ data = _create_dataframe(10) try: - result1 = pipe(data, to_csv) - result2 = pipe(data, to_csv) + result1 = _pipe(data, to_csv) + result2 = _pipe(data, to_csv) filename = result1["url"] output = pd.read_csv(filename) finally: @@ -135,8 +142,8 @@ def test_dict_to_csv(): """ data = _create_data_with_values(10) try: - result1 = pipe(data, to_csv) - result2 = pipe(data, to_csv) + result1 = _pipe(data, to_csv) + result2 = _pipe(data, to_csv) filename = result1["url"] output = pd.read_csv(filename).to_dict(orient="records") finally: From 48eb2aaac08ba91326181f5b9fb890028b8ec7d9 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Jun 2024 19:08:47 +0100 Subject: [PATCH 18/31] build: drop `toolz` dependency --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a7ad687f7..6196d4790 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,6 @@ dependencies = [ "numpy", # If you update the minimum required pandas version, also update it in build.yml "pandas>=0.25", - "toolz", "packaging" ] description = "Vega-Altair: A declarative statistical visualization library for Python." From 1e8167b51413ba8b0162d6d75dc60b6939ad304e Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Jun 2024 21:11:26 +0100 Subject: [PATCH 19/31] refactor: Implement optional `toolz` imports This preserves the existing `AltairDeprecationWarning` but removes the final traces of `toolz` dependency in `altair`. --- altair/utils/_importers.py | 29 ++++++++++++++++++++++++++++- altair/utils/data.py | 24 +++--------------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/altair/utils/_importers.py b/altair/utils/_importers.py index b7fa8a958..cc407a45d 100644 --- a/altair/utils/_importers.py +++ b/altair/utils/_importers.py @@ -1,6 +1,12 @@ +import warnings +from importlib import import_module +from importlib.metadata import version as importlib_version from types import ModuleType +from typing import Any, Callable + from packaging.version import Version -from importlib.metadata import version as importlib_version + +from altair.utils.deprecation import AltairDeprecationWarning def import_vegafusion() -> ModuleType: @@ -95,3 +101,24 @@ def pyarrow_available() -> bool: return True except (ImportError, RuntimeError): return False + + +def import_toolz_function( + function: str, module: str = "toolz.curried" +) -> Callable[..., Any]: + msg = ( + f"alt.{function}() is deprecated, and will be removed in a future release. " + f"Use {module}.{function}() instead." + ) + warnings.warn(msg, AltairDeprecationWarning, stacklevel=1) + msg_start = f"Usage of deprecated alt.{function}() requires {module}\n\n" + msg = f"{msg_start}ImportError: " + try: + toolz_curried = import_module(module) + except ImportError as err: + msg = f"{msg} {err.args[0]}" + raise ImportError(msg) from err + if func := getattr(toolz_curried, function, None): + return func + else: + raise ImportError(msg_start) diff --git a/altair/utils/data.py b/altair/utils/data.py index cc95edf38..8ef55b083 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -2,7 +2,6 @@ import json import random import hashlib -import warnings import sys from pathlib import Path from typing import ( @@ -24,10 +23,9 @@ import pandas as pd -from ._importers import import_pyarrow_interchange +from ._importers import import_pyarrow_interchange, import_toolz_function from .core import sanitize_dataframe, sanitize_arrow_table, DataFrameLike from .core import sanitize_geo_interface -from .deprecation import AltairDeprecationWarning from .plugin_registry import PluginRegistry if sys.version_info >= (3, 13): @@ -414,15 +412,7 @@ def pipe(data, *funcs): Deprecated: use toolz.curried.pipe() instead. """ - warnings.warn( - "alt.pipe() is deprecated, and will be removed in a future release. " - "Use toolz.curried.pipe() instead.", - AltairDeprecationWarning, - stacklevel=1, - ) - from toolz.curried import pipe - - return pipe(data, *funcs) + return import_toolz_function("pipe")(data, *funcs) def curry(*args, **kwargs): @@ -430,15 +420,7 @@ def curry(*args, **kwargs): Deprecated: use toolz.curried.curry() instead. """ - warnings.warn( - "alt.curry() is deprecated, and will be removed in a future release. " - "Use toolz.curried.curry() instead.", - AltairDeprecationWarning, - stacklevel=1, - ) - from toolz.curried import curry - - return curry(*args, **kwargs) + return import_toolz_function("curry")(*args, **kwargs) def arrow_table_from_dfi_dataframe(dfi_df: DataFrameLike) -> "pyarrow.lib.Table": From 2f5aa5605a6e5d9dfd8a355f7760fb99092b1533 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Jun 2024 21:14:58 +0100 Subject: [PATCH 20/31] test: adds `test_toolz` covering both `pipe`, `curry` --- tests/utils/test_data.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py index d74c9afad..9678da6a8 100644 --- a/tests/utils/test_data.py +++ b/tests/utils/test_data.py @@ -2,7 +2,6 @@ from typing import Any, Callable import pytest import pandas as pd - from altair.utils.data import ( limit_rows, MaxRowsError, @@ -10,7 +9,11 @@ to_values, to_json, to_csv, + curry, + pipe, ) +from altair.utils._importers import import_toolz_function +from altair.utils.deprecation import AltairDeprecationWarning def _pipe(data: Any, *funcs: Callable[..., Any]) -> Any: @@ -151,3 +154,24 @@ def test_dict_to_csv(): assert result1 == result2 assert data == {"values": output} + + +def test_toolz(): + expected_msg = r"Usage.+ requires" + data = _create_data_with_values(10) + try: + with pytest.warns(AltairDeprecationWarning, match="toolz.curried.pipe"): + result1 = pipe(data, to_values) + assert isinstance(result1, dict) + kwds = {"prefix": "dummy"} + with pytest.warns(AltairDeprecationWarning, match="toolz.curried.curry"): + result2 = curry(to_csv, **kwds) + assert "curry" in type(result2).__name__ + assert result2.func_name == to_csv.__name__ + assert result2.keywords == kwds + except ImportError as err: + assert expected_msg in err.msg + with pytest.raises(ImportError, match=expected_msg): + dummy = "fake_function_name" + with pytest.warns(AltairDeprecationWarning, match=f"toolz.curried.{dummy}"): + func = import_toolz_function(dummy) # noqa: F841 From 60b8231c4ff785c1fd61918097caff58f8be2cc2 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 6 Jun 2024 21:28:57 +0100 Subject: [PATCH 21/31] test: fix assert match on `test_toolz` Forgot I replaced a literal string with regex --- tests/utils/test_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py index 9678da6a8..e37cb402f 100644 --- a/tests/utils/test_data.py +++ b/tests/utils/test_data.py @@ -1,4 +1,5 @@ import os +import re from typing import Any, Callable import pytest import pandas as pd @@ -170,7 +171,7 @@ def test_toolz(): assert result2.func_name == to_csv.__name__ assert result2.keywords == kwds except ImportError as err: - assert expected_msg in err.msg + assert re.search(expected_msg, err.msg) with pytest.raises(ImportError, match=expected_msg): dummy = "fake_function_name" with pytest.warns(AltairDeprecationWarning, match=f"toolz.curried.{dummy}"): From a8e537f1547a9547e1dfc75e6848574829fcdd8a Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:16:46 +0100 Subject: [PATCH 22/31] refactor(typing): Replace `Literal[None]` -> `None https://github.com/vega/altair/pull/3426#discussion_r1638015988 --- altair/utils/_vegafusion_data.py | 3 +-- altair/utils/data.py | 12 +++++------- altair/vegalite/data.py | 4 ++-- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/altair/utils/_vegafusion_data.py b/altair/utils/_vegafusion_data.py index 770f796a2..40117851f 100644 --- a/altair/utils/_vegafusion_data.py +++ b/altair/utils/_vegafusion_data.py @@ -2,7 +2,6 @@ from weakref import WeakValueDictionary from typing import ( Any, - Literal, Optional, Union, Dict, @@ -49,7 +48,7 @@ class _ToVegaFusionReturnUrlDict(TypedDict): @overload def vegafusion_data_transformer( - data: Literal[None] = ..., max_rows: int = ... + data: None = ..., max_rows: int = ... ) -> Callable[..., Any]: ... diff --git a/altair/utils/data.py b/altair/utils/data.py index 8ef55b083..f8f10eac1 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -71,9 +71,7 @@ def is_data_type(obj: Any) -> TypeIs[DataType]: # ============================================================================== class DataTransformerType(Protocol): @overload - def __call__( - self, data: Literal[None] = None, **kwargs - ) -> "DataTransformerType": ... + def __call__(self, data: None = None, **kwargs) -> "DataTransformerType": ... @overload def __call__(self, data: DataType, **kwargs) -> VegaLiteDataDict: ... def __call__( @@ -101,7 +99,7 @@ class MaxRowsError(Exception): @overload -def limit_rows(data: Literal[None] = ..., max_rows: Optional[int] = ...) -> partial: ... +def limit_rows(data: None = ..., max_rows: Optional[int] = ...) -> partial: ... @overload def limit_rows(data: DataType, max_rows: Optional[int]) -> DataType: ... def limit_rows( @@ -157,7 +155,7 @@ def raise_max_rows_error(): @overload def sample( - data: Literal[None] = ..., n: Optional[int] = ..., frac: Optional[float] = ... + data: None = ..., n: Optional[int] = ..., frac: Optional[float] = ... ) -> partial: ... @overload def sample( @@ -218,7 +216,7 @@ class _ToFormatReturnUrlDict(TypedDict): @overload def to_json( - data: Literal[None] = ..., + data: None = ..., prefix: str = ..., extension: str = ..., filename: str = ..., @@ -256,7 +254,7 @@ def to_json( @overload def to_csv( - data: Literal[None] = ..., + data: None = ..., prefix: str = ..., extension: str = ..., filename: str = ..., diff --git a/altair/vegalite/data.py b/altair/vegalite/data.py index 8e8c5be0e..84f30fbaa 100644 --- a/altair/vegalite/data.py +++ b/altair/vegalite/data.py @@ -13,12 +13,12 @@ from ..utils.data import DataTransformerRegistry as _DataTransformerRegistry from ..utils.data import DataType, ToValuesReturnType from ..utils.plugin_registry import PluginEnabler -from typing import Optional, Literal, Union, overload, Callable +from typing import Optional, Union, overload, Callable @overload def default_data_transformer( - data: Literal[None] = ..., max_rows: int = ... + data: None = ..., max_rows: int = ... ) -> Callable[[DataType], ToValuesReturnType]: ... @overload def default_data_transformer( From b0d0e5c6607339db655ef1c5387910adce6a2c46 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Thu, 13 Jun 2024 16:45:05 +0100 Subject: [PATCH 23/31] refactor(typing): Replace single-use `NonLikeDataType` alias with `Union` See [review](https://github.com/vega/altair/pull/3426#discussion_r1638030144) The name was not descriptive, and upon further thought, was less helpful than spelling out the type in full. --- altair/utils/_vegafusion_data.py | 6 ++++-- altair/utils/data.py | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/altair/utils/_vegafusion_data.py b/altair/utils/_vegafusion_data.py index 40117851f..32c36b44d 100644 --- a/altair/utils/_vegafusion_data.py +++ b/altair/utils/_vegafusion_data.py @@ -1,3 +1,4 @@ +from __future__ import annotations import uuid from weakref import WeakValueDictionary from typing import ( @@ -20,11 +21,12 @@ DataType, ToValuesReturnType, MaxRowsError, - NonLikeDataType, + SupportsGeoInterface, ) from altair.vegalite.data import default_data_transformer if TYPE_CHECKING: + import pandas as pd from vegafusion.runtime import ChartState # type: ignore # Temporary storage for dataframes that have been extracted @@ -60,7 +62,7 @@ def vegafusion_data_transformer( @overload def vegafusion_data_transformer( - data: NonLikeDataType, max_rows: int + data: Union[dict, pd.DataFrame, SupportsGeoInterface], max_rows: int ) -> _VegaFusionReturnType: ... diff --git a/altair/utils/data.py b/altair/utils/data.py index f8f10eac1..342967e06 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -45,7 +45,6 @@ class SupportsGeoInterface(Protocol): DataType = Union[dict, pd.DataFrame, SupportsGeoInterface, DataFrameLike] TDataType = TypeVar("TDataType", bound=DataType) NonGeoDataType = Union[dict, pd.DataFrame, DataFrameLike] -NonLikeDataType = Union[dict, pd.DataFrame, SupportsGeoInterface] VegaLiteDataDict = Dict[str, Union[str, dict, List[dict]]] ToValuesReturnType = Dict[str, Union[dict, List[dict]]] From 64841611dad4f01715296fb34526a5c54a65982f Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 14 Jun 2024 15:52:54 +0100 Subject: [PATCH 24/31] feat: Remove `alt.curry`, `alt.pipe` https://github.com/vega/altair/pull/3426#issuecomment-2167599495 --- altair/__init__.py | 2 -- altair/utils/data.py | 19 +------------------ altair/vegalite/data.py | 4 ---- altair/vegalite/v5/__init__.py | 2 -- altair/vegalite/v5/data.py | 4 ---- tests/utils/test_data.py | 3 +-- 6 files changed, 2 insertions(+), 32 deletions(-) diff --git a/altair/__init__.py b/altair/__init__.py index 55feb3cdb..2f9e4acad 100644 --- a/altair/__init__.py +++ b/altair/__init__.py @@ -572,7 +572,6 @@ "concat", "condition", "core", - "curry", "data", "data_transformers", "datum", @@ -591,7 +590,6 @@ "overload", "param", "parse_shorthand", - "pipe", "renderers", "repeat", "sample", diff --git a/altair/utils/data.py b/altair/utils/data.py index 342967e06..269d962eb 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -23,7 +23,7 @@ import pandas as pd -from ._importers import import_pyarrow_interchange, import_toolz_function +from ._importers import import_pyarrow_interchange from .core import sanitize_dataframe, sanitize_arrow_table, DataFrameLike from .core import sanitize_geo_interface from .plugin_registry import PluginRegistry @@ -403,23 +403,6 @@ def _data_to_csv_string(data: Union[dict, pd.DataFrame, DataFrameLike]) -> str: ) -def pipe(data, *funcs): - """ - Pipe a value through a sequence of functions - - Deprecated: use toolz.curried.pipe() instead. - """ - return import_toolz_function("pipe")(data, *funcs) - - -def curry(*args, **kwargs): - """Curry a callable function - - Deprecated: use toolz.curried.curry() instead. - """ - return import_toolz_function("curry")(*args, **kwargs) - - def arrow_table_from_dfi_dataframe(dfi_df: DataFrameLike) -> "pyarrow.lib.Table": """Convert a DataFrame Interchange Protocol compatible object to an Arrow Table""" import pyarrow as pa diff --git a/altair/vegalite/data.py b/altair/vegalite/data.py index 84f30fbaa..5b7a90a3d 100644 --- a/altair/vegalite/data.py +++ b/altair/vegalite/data.py @@ -1,9 +1,7 @@ from ..utils.core import sanitize_dataframe from ..utils.data import ( MaxRowsError, - curry, limit_rows, - pipe, sample, to_csv, to_json, @@ -52,11 +50,9 @@ def disable_max_rows(self) -> PluginEnabler: __all__ = ( "DataTransformerRegistry", "MaxRowsError", - "curry", "sanitize_dataframe", "default_data_transformer", "limit_rows", - "pipe", "sample", "to_csv", "to_json", diff --git a/altair/vegalite/v5/__init__.py b/altair/vegalite/v5/__init__.py index 8c75d5cc2..17c61a6e5 100644 --- a/altair/vegalite/v5/__init__.py +++ b/altair/vegalite/v5/__init__.py @@ -9,8 +9,6 @@ from .data import ( MaxRowsError, - pipe, - curry, limit_rows, sample, to_json, diff --git a/altair/vegalite/v5/data.py b/altair/vegalite/v5/data.py index 1e47db526..841c1d8bb 100644 --- a/altair/vegalite/v5/data.py +++ b/altair/vegalite/v5/data.py @@ -1,9 +1,7 @@ from ..data import ( MaxRowsError, - curry, default_data_transformer, limit_rows, - pipe, sample, to_csv, to_json, @@ -34,10 +32,8 @@ __all__ = ( "MaxRowsError", - "curry", "default_data_transformer", "limit_rows", - "pipe", "sample", "to_csv", "to_json", diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py index e37cb402f..cc1aefd8a 100644 --- a/tests/utils/test_data.py +++ b/tests/utils/test_data.py @@ -10,8 +10,6 @@ to_values, to_json, to_csv, - curry, - pipe, ) from altair.utils._importers import import_toolz_function from altair.utils.deprecation import AltairDeprecationWarning @@ -157,6 +155,7 @@ def test_dict_to_csv(): assert data == {"values": output} +@pytest.mark.skip def test_toolz(): expected_msg = r"Usage.+ requires" data = _create_data_with_values(10) From 3e4edcb28fcbbc81f256c1ff93c596a61ca524dc Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 14 Jun 2024 15:56:23 +0100 Subject: [PATCH 25/31] test: add `F821` ignores in `test_toolz` --- tests/utils/test_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py index cc1aefd8a..d2bfa9579 100644 --- a/tests/utils/test_data.py +++ b/tests/utils/test_data.py @@ -161,11 +161,11 @@ def test_toolz(): data = _create_data_with_values(10) try: with pytest.warns(AltairDeprecationWarning, match="toolz.curried.pipe"): - result1 = pipe(data, to_values) + result1 = pipe(data, to_values) # noqa: F821 assert isinstance(result1, dict) kwds = {"prefix": "dummy"} with pytest.warns(AltairDeprecationWarning, match="toolz.curried.curry"): - result2 = curry(to_csv, **kwds) + result2 = curry(to_csv, **kwds) # noqa: F821 assert "curry" in type(result2).__name__ assert result2.func_name == to_csv.__name__ assert result2.keywords == kwds From 650ee0e31060d25eae15690bbd62d45443c17a8d Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Fri, 14 Jun 2024 16:04:41 +0100 Subject: [PATCH 26/31] maint: Remove `test_toolz` --- tests/utils/test_data.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py index d2bfa9579..889e919be 100644 --- a/tests/utils/test_data.py +++ b/tests/utils/test_data.py @@ -1,5 +1,4 @@ import os -import re from typing import Any, Callable import pytest import pandas as pd @@ -11,8 +10,6 @@ to_json, to_csv, ) -from altair.utils._importers import import_toolz_function -from altair.utils.deprecation import AltairDeprecationWarning def _pipe(data: Any, *funcs: Callable[..., Any]) -> Any: @@ -153,25 +150,3 @@ def test_dict_to_csv(): assert result1 == result2 assert data == {"values": output} - - -@pytest.mark.skip -def test_toolz(): - expected_msg = r"Usage.+ requires" - data = _create_data_with_values(10) - try: - with pytest.warns(AltairDeprecationWarning, match="toolz.curried.pipe"): - result1 = pipe(data, to_values) # noqa: F821 - assert isinstance(result1, dict) - kwds = {"prefix": "dummy"} - with pytest.warns(AltairDeprecationWarning, match="toolz.curried.curry"): - result2 = curry(to_csv, **kwds) # noqa: F821 - assert "curry" in type(result2).__name__ - assert result2.func_name == to_csv.__name__ - assert result2.keywords == kwds - except ImportError as err: - assert re.search(expected_msg, err.msg) - with pytest.raises(ImportError, match=expected_msg): - dummy = "fake_function_name" - with pytest.warns(AltairDeprecationWarning, match=f"toolz.curried.{dummy}"): - func = import_toolz_function(dummy) # noqa: F841 From 7b6e0d78a4c322844e3fd658114c2d087fad9897 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 18 Jun 2024 12:13:04 +0100 Subject: [PATCH 27/31] chore: merge main --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6196d4790..ba6de061a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "jinja2", # If you update the minimum required jsonschema version, also update it in build.yml "jsonschema>=3.0", - "numpy", + "numpy<2.0.0", # If you update the minimum required pandas version, also update it in build.yml "pandas>=0.25", "packaging" From 88fbd1141b39ca74b47106d254969661e77bdabf Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 18 Jun 2024 12:40:35 +0100 Subject: [PATCH 28/31] revert: remove now-uneeded `import_toolz_function` --- altair/utils/_importers.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/altair/utils/_importers.py b/altair/utils/_importers.py index cc407a45d..9f7f567e4 100644 --- a/altair/utils/_importers.py +++ b/altair/utils/_importers.py @@ -1,13 +1,8 @@ -import warnings -from importlib import import_module from importlib.metadata import version as importlib_version from types import ModuleType -from typing import Any, Callable from packaging.version import Version -from altair.utils.deprecation import AltairDeprecationWarning - def import_vegafusion() -> ModuleType: min_version = "1.5.0" @@ -101,24 +96,3 @@ def pyarrow_available() -> bool: return True except (ImportError, RuntimeError): return False - - -def import_toolz_function( - function: str, module: str = "toolz.curried" -) -> Callable[..., Any]: - msg = ( - f"alt.{function}() is deprecated, and will be removed in a future release. " - f"Use {module}.{function}() instead." - ) - warnings.warn(msg, AltairDeprecationWarning, stacklevel=1) - msg_start = f"Usage of deprecated alt.{function}() requires {module}\n\n" - msg = f"{msg_start}ImportError: " - try: - toolz_curried = import_module(module) - except ImportError as err: - msg = f"{msg} {err.args[0]}" - raise ImportError(msg) from err - if func := getattr(toolz_curried, function, None): - return func - else: - raise ImportError(msg_start) From bd949a964a195b8325ff5ba1c1b6dc8a60fe7ea8 Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Tue, 18 Jun 2024 18:44:55 +0100 Subject: [PATCH 29/31] fix(typing): add missing `...` for some overloads Spotted while testing a solution for [comment](https://github.com/vega/altair/pull/3426#issuecomment-2176461593). This isn't a solution for that, but ensures the overloads solve correctly. --- altair/utils/data.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index 269d962eb..2b429d760 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -100,7 +100,7 @@ class MaxRowsError(Exception): @overload def limit_rows(data: None = ..., max_rows: Optional[int] = ...) -> partial: ... @overload -def limit_rows(data: DataType, max_rows: Optional[int]) -> DataType: ... +def limit_rows(data: DataType, max_rows: Optional[int] = ...) -> DataType: ... def limit_rows( data: Optional[DataType] = None, max_rows: Optional[int] = 5000 ) -> Union[partial, DataType]: @@ -226,10 +226,10 @@ def to_json( @overload def to_json( data: DataType, - prefix: str, - extension: str, - filename: str, - urlpath: str, + prefix: str = ..., + extension: str = ..., + filename: str = ..., + urlpath: str = ..., ) -> _ToFormatReturnUrlDict: ... @@ -264,10 +264,10 @@ def to_csv( @overload def to_csv( data: NonGeoDataType, - prefix: str, - extension: str, - filename: str, - urlpath: str, + prefix: str = ..., + extension: str = ..., + filename: str = ..., + urlpath: str = ..., ) -> _ToFormatReturnUrlDict: ... From 5780f2111d47cf06c4e6b5e8bfc30b4649ada2bb Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:15:07 +0100 Subject: [PATCH 30/31] docs(typing): Remove `NonGeoDataType` and amend error message in `_data_to_csv_string` See https://github.com/vega/altair/pull/3426#issuecomment-2177601705 See https://github.com/vega/altair/issues/3441 --- altair/utils/data.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/altair/utils/data.py b/altair/utils/data.py index 2b429d760..41b5e71aa 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -44,7 +44,6 @@ class SupportsGeoInterface(Protocol): DataType = Union[dict, pd.DataFrame, SupportsGeoInterface, DataFrameLike] TDataType = TypeVar("TDataType", bound=DataType) -NonGeoDataType = Union[dict, pd.DataFrame, DataFrameLike] VegaLiteDataDict = Dict[str, Union[str, dict, List[dict]]] ToValuesReturnType = Dict[str, Union[dict, List[dict]]] @@ -263,7 +262,7 @@ def to_csv( @overload def to_csv( - data: NonGeoDataType, + data: Union[dict, pd.DataFrame, DataFrameLike], prefix: str = ..., extension: str = ..., filename: str = ..., @@ -272,7 +271,7 @@ def to_csv( def to_csv( - data: Optional[NonGeoDataType] = None, + data: Optional[Union[dict, pd.DataFrame, DataFrameLike]] = None, prefix: str = "altair-data", extension: str = "csv", filename: str = "{prefix}-{hash}.{extension}", @@ -378,8 +377,9 @@ def _data_to_csv_string(data: Union[dict, pd.DataFrame, DataFrameLike]) -> str: check_data_type(data) if isinstance(data, SupportsGeoInterface): raise NotImplementedError( - "to_csv does not work with data that " - "contains the __geo_interface__ attribute" + f"to_csv does not yet work with data that " + f"is of type {type(SupportsGeoInterface).__name__!r}.\n" + f"See https://github.com/vega/altair/issues/3441" ) elif isinstance(data, pd.DataFrame): data = sanitize_dataframe(data) From 9f0d58ae46932c2af0f06245bb00bbddfba00ece Mon Sep 17 00:00:00 2001 From: dangotbanned <125183946+dangotbanned@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:35:08 +0100 Subject: [PATCH 31/31] fix: ensure `SupportsGeoInterface` branch is triggered in `vegafusion_data_transformer` Fixes: https://github.com/vega/altair/pull/3426#discussion_r1645668115 --- altair/utils/_vegafusion_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/altair/utils/_vegafusion_data.py b/altair/utils/_vegafusion_data.py index 32c36b44d..cfd8a5760 100644 --- a/altair/utils/_vegafusion_data.py +++ b/altair/utils/_vegafusion_data.py @@ -72,7 +72,7 @@ def vegafusion_data_transformer( """VegaFusion Data Transformer""" if data is None: return vegafusion_data_transformer - elif isinstance(data, DataFrameLike): + elif isinstance(data, DataFrameLike) and not isinstance(data, SupportsGeoInterface): table_name = f"table_{uuid.uuid4()}".replace("-", "_") extracted_inline_tables[table_name] = data return {"url": VEGAFUSION_PREFIX + table_name}