vega · mattijn · Mar 24, 2024 · Mar 24, 2024 · Mar 24, 2024 · Mar 24, 2024
diff --git a/altair/_magics.py b/altair/_magics.py
@@ -9,7 +9,7 @@
 
 import IPython
 from IPython.core import magic_arguments
-import pandas as pd
+from altair.utils.core import DataFrameLike, _is_pandas_dataframe
 from toolz import curried
 
 from altair.vegalite import v5 as vegalite_v5
@@ -40,10 +40,12 @@ def _prepare_data(data, data_transformers):
     """Convert input data to data for use within schema"""
     if data is None or isinstance(data, dict):
         return data
-    elif isinstance(data, pd.DataFrame):
+    elif isinstance(data, DataFrameLike):
         return curried.pipe(data, data_transformers.get())
     elif isinstance(data, str):
         return {"url": data}
+    elif _is_pandas_dataframe(data):
+        return curried.pipe(data, data_transformers.get())
     else:
         warnings.warn("data of type {} not recognized".format(type(data)), stacklevel=1)
         return data

diff --git a/altair/utils/_importers.py b/altair/utils/_importers.py
@@ -95,3 +95,27 @@ def pyarrow_available() -> bool:
         return True
     except ImportError:
         return False
+
+
+def import_pandas() -> ModuleType:
+    min_version = "0.25"
+    try:
+        version = importlib_version("pandas")
+        if Version(version) < Version(min_version):
+            raise RuntimeError(
+                f"The pandas package must be version {min_version} or greater. "
+                f"Found version {version}"
+            )
+        import pandas as pd
+
+        return pd
+    except ImportError as err:
+        raise ImportError(
+            f"Serialization of the DataFrame requires\n"
-            f"Serialization of the DataFrame requires\n"
+            f"Serialization of this data requires\n"
-            f"Serialization of the DataFrame requires\n"
+            f"Serialization of this data requires\n"
+            f"version {min_version} or greater of the 'pandas' package. \n"
+            f"This can be installed with pip using:\n"
+            f'   pip install "pandas>={min_version}"\n'
+            "or conda:\n"
+            f'   conda install -c conda-forge "pandas>={min_version}"\n\n'
+            f"ImportError: {err.args[0]}"
+        ) from err
diff --git a/altair/utils/core.py b/altair/utils/core.py
@@ -25,9 +25,7 @@
 from types import ModuleType
 
 import jsonschema
-import pandas as pd
 import numpy as np
-from pandas.api.types import infer_dtype
 
 from altair.utils.schemapi import SchemaBase
 from altair.utils._dfi_types import Column, DtypeKind, DataFrame as DfiDataFrame
@@ -40,6 +38,7 @@
 from typing import Literal, Protocol, TYPE_CHECKING, runtime_checkable
 
 if TYPE_CHECKING:
+    import pandas as pd
     from pandas.core.interchange.dataframe_protocol import Column as PandasColumn
 
 V = TypeVar("V")
@@ -53,6 +52,11 @@ def __dataframe__(
     ) -> DfiDataFrame: ...
 
 
+def _is_pandas_dataframe(obj: Any) -> bool:
+    """Check if the object is an instance of a pandas DataFrame."""
+    return all(attr in dir(obj) for attr in ["iloc", "columns", "index"])
+
+
 TYPECODE_MAP = {
     "ordinal": "O",
     "nominal": "N",
@@ -208,7 +212,10 @@ def infer_vegalite_type(
     ----------
     data: object
     """
-    typ = infer_dtype(data, skipna=False)
+    from altair.utils._importers import import_pandas
+
+    pd = import_pandas()
+    typ = pd.api.types.infer_dtype(data, skipna=False)
 
     if typ in [
         "floating",
@@ -299,7 +306,7 @@ def numpy_is_subtype(dtype: Any, subtype: Any) -> bool:
         return False
 
 
-def sanitize_dataframe(df: pd.DataFrame) -> pd.DataFrame:  # noqa: C901
+def sanitize_dataframe(df: "pd.DataFrame") -> "pd.DataFrame":  # noqa: C901
     """Sanitize a DataFrame to prepare it for serialization.
 
     * Make a copy
@@ -316,6 +323,9 @@ def sanitize_dataframe(df: pd.DataFrame) -> pd.DataFrame:  # noqa: C901
     * convert dedicated string column to objects and replace NaN with None
     * Raise a ValueError for TimeDelta dtypes
     """
+    from altair.utils._importers import import_pandas
+
+    pd = import_pandas()
     df = df.copy()
 
     if isinstance(df.columns, pd.RangeIndex):
@@ -448,7 +458,7 @@ def sanitize_arrow_table(pa_table):
 
 def parse_shorthand(
     shorthand: Union[Dict[str, Any], str],
-    data: Optional[Union[pd.DataFrame, DataFrameLike]] = None,
+    data: Optional[Union[DataFrameLike, "pd.DataFrame"]] = None,
     parse_aggregates: bool = True,
     parse_window_ops: bool = False,
     parse_timeunits: bool = True,
@@ -601,15 +611,15 @@ def parse_shorthand(
                         # Fall back to pandas-based inference.
                         # Note: The AttributeError catch is a workaround for
                         # https://github.com/pandas-dev/pandas/issues/55332
-                        if isinstance(data, pd.DataFrame):
+                        if _is_pandas_dataframe(data):
                             attrs["type"] = infer_vegalite_type(data[unescaped_field])
                         else:
                             raise
 
                     if isinstance(attrs["type"], tuple):
                         attrs["sort"] = attrs["type"][1]
                         attrs["type"] = attrs["type"][0]
-        elif isinstance(data, pd.DataFrame):
+        elif _is_pandas_dataframe(data):
             # Fallback if pyarrow is not installed or if pandas is older than 1.5
             #
             # Remove escape sequences so that types can be inferred for columns with special characters

diff --git a/altair/utils/data.py b/altair/utils/data.py
@@ -5,12 +5,16 @@
 import warnings
 from typing import Union, MutableMapping, Optional, Dict, Sequence, TYPE_CHECKING, List
 
-import pandas as pd
 from toolz import curried
 from typing import TypeVar
 
 from ._importers import import_pyarrow_interchange
-from .core import sanitize_dataframe, sanitize_arrow_table, DataFrameLike
+from .core import (
+    sanitize_dataframe,
+    sanitize_arrow_table,
+    DataFrameLike,
+    _is_pandas_dataframe,
+)
 from .core import sanitize_geo_interface
 from .deprecation import AltairDeprecationWarning
 from .plugin_registry import PluginRegistry
@@ -21,13 +25,14 @@
 
 if TYPE_CHECKING:
     import pyarrow.lib
+    import pandas as pd
 
 
 class SupportsGeoInterface(Protocol):
     __geo_interface__: MutableMapping
 
 
-DataType = Union[dict, pd.DataFrame, SupportsGeoInterface, DataFrameLike]
+DataType = Union[dict, DataFrameLike, SupportsGeoInterface, "pd.DataFrame"]
 TDataType = TypeVar("TDataType", bound=DataType)
 
 VegaLiteDataDict = Dict[str, Union[str, dict, List[dict]]]
@@ -96,7 +101,7 @@ def raise_max_rows_error():
             values = data.__geo_interface__["features"]
         else:
             values = data.__geo_interface__
-    elif isinstance(data, pd.DataFrame):
+    elif _is_pandas_dataframe(data):
         values = data
     elif isinstance(data, dict):
         if "values" in data:
@@ -122,10 +127,10 @@ def raise_max_rows_error():
 @curried.curry
 def sample(
     data: DataType, n: Optional[int] = None, frac: Optional[float] = None
-) -> Optional[Union[pd.DataFrame, Dict[str, Sequence], "pyarrow.lib.Table"]]:
+) -> Optional[Union["pd.DataFrame", Dict[str, Sequence], "pyarrow.lib.Table"]]:
     """Reduce the size of the data model by sampling without replacement."""
     check_data_type(data)
-    if isinstance(data, pd.DataFrame):
+    if _is_pandas_dataframe(data):
         return data.sample(n=n, frac=frac)
     elif isinstance(data, dict):
         if "values" in data:
@@ -196,7 +201,7 @@ def to_json(
 
 @curried.curry
 def to_csv(
-    data: Union[dict, pd.DataFrame, DataFrameLike],
+    data: Union[dict, DataFrameLike, "pd.DataFrame"],
     prefix: str = "altair-data",
     extension: str = "csv",
     filename: str = "{prefix}-{hash}.{extension}",
@@ -216,13 +221,13 @@ def to_values(data: DataType) -> ToValuesReturnType:
     """Replace a DataFrame by a data model with values."""
     check_data_type(data)
     if hasattr(data, "__geo_interface__"):
-        if isinstance(data, pd.DataFrame):
+        if _is_pandas_dataframe(data):
             data = sanitize_dataframe(data)
         # Maybe the type could be further clarified here that it is
         # SupportGeoInterface and then the ignore statement is not needed?
         data_sanitized = sanitize_geo_interface(data.__geo_interface__)  # type: ignore[arg-type]
         return {"values": data_sanitized}
-    elif isinstance(data, pd.DataFrame):
+    elif _is_pandas_dataframe(data):
         data = sanitize_dataframe(data)
         return {"values": data.to_dict(orient="records")}
     elif isinstance(data, dict):
@@ -238,8 +243,10 @@ def to_values(data: DataType) -> ToValuesReturnType:
 
 
 def check_data_type(data: DataType) -> None:
-    if not isinstance(data, (dict, pd.DataFrame, DataFrameLike)) and not any(
-        hasattr(data, attr) for attr in ["__geo_interface__"]
+    if (
+        not isinstance(data, (dict, DataFrameLike))
+        and not _is_pandas_dataframe(data)
+        and not any(hasattr(data, attr) for attr in ["__geo_interface__"])
     ):
         raise TypeError(
             "Expected dict, DataFrame or a __geo_interface__ attribute, got: {}".format(
@@ -259,13 +266,13 @@ def _data_to_json_string(data: DataType) -> str:
     """Return a JSON string representation of the input data"""
     check_data_type(data)
     if hasattr(data, "__geo_interface__"):
-        if isinstance(data, pd.DataFrame):
+        if _is_pandas_dataframe(data):
             data = sanitize_dataframe(data)
         # Maybe the type could be further clarified here that it is
         # SupportGeoInterface and then the ignore statement is not needed?
         data = sanitize_geo_interface(data.__geo_interface__)  # type: ignore[arg-type]
         return json.dumps(data)
-    elif isinstance(data, pd.DataFrame):
+    elif _is_pandas_dataframe(data):
         data = sanitize_dataframe(data)
         return data.to_json(orient="records", double_precision=15)
     elif isinstance(data, dict):
@@ -281,23 +288,25 @@ def _data_to_json_string(data: DataType) -> str:
         )
 
 
-def _data_to_csv_string(data: Union[dict, pd.DataFrame, DataFrameLike]) -> str:
+def _data_to_csv_string(data: Union[dict, DataFrameLike, "pd.DataFrame"]) -> str:
     """return a CSV string representation of the input data"""
     check_data_type(data)
     if hasattr(data, "__geo_interface__"):
         raise NotImplementedError(
             "to_csv does not work with data that "
             "contains the __geo_interface__ attribute"
         )
-    elif isinstance(data, pd.DataFrame):
+    elif _is_pandas_dataframe(data):
         data = sanitize_dataframe(data)
         return data.to_csv(index=False)
     elif isinstance(data, dict):
+        from altair.utils._importers import import_pandas
+
         if "values" not in data:
             raise KeyError("values expected in data dict, but not present")
+        pd = import_pandas()
         return pd.DataFrame.from_dict(data["values"]).to_csv(index=False)
     elif isinstance(data, DataFrameLike):
-        # experimental interchange dataframe support
         import pyarrow as pa
         import pyarrow.csv as pa_csv
 

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
@@ -22,6 +22,7 @@
     overload,
     Literal,
     TypeVar,
+    TYPE_CHECKING,
 )
 from itertools import zip_longest
 from importlib.metadata import version as importlib_version
@@ -31,7 +32,6 @@
 import jsonschema.exceptions
 import jsonschema.validators
 import numpy as np
-import pandas as pd
 from packaging.version import Version
 
 # This leads to circular imports with the vegalite module. Currently, this works
@@ -44,6 +44,15 @@
 else:
     from typing_extensions import Self
 
+if TYPE_CHECKING:
+    pass
+
-if TYPE_CHECKING:
-    pass
-if TYPE_CHECKING:
-    pass
+
+class _PandasTimestamp:
+    def isoformat(self):
+        return "dummy_isoformat"  # Return a dummy ISO format string
+
+
 TSchemaBase = TypeVar("TSchemaBase", bound=Type["SchemaBase"])
 
 ValidationErrorList = List[jsonschema.exceptions.ValidationError]
@@ -477,7 +486,9 @@ def _todict(obj: Any, context: Optional[Dict[str, Any]]) -> Any:
         return obj.to_dict()
     elif isinstance(obj, np.number):
         return float(obj)
-    elif isinstance(obj, (pd.Timestamp, np.datetime64)):
+    elif isinstance(obj, (_PandasTimestamp, np.datetime64)):
+        import pandas as pd
+
         return pd.Timestamp(obj).isoformat()
     else:
         return obj
@@ -936,7 +947,7 @@ def to_dict(
             # parsed_shorthand is removed from context if it exists so that it is
             # not passed to child to_dict function calls
             parsed_shorthand = context.pop("parsed_shorthand", {})
-            # Prevent that pandas categorical data is automatically sorted
+            # Prevent that categorical data is automatically sorted
             # when a non-ordinal data type is specifed manually
             # or if the encoding channel does not support sorting
             if "sort" in parsed_shorthand and (

diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py
@@ -4,7 +4,6 @@
 import io
 import json
 import jsonschema
-import pandas as pd
 from toolz.curried import pipe as _pipe
 import itertools
 import sys
@@ -26,7 +25,7 @@
     using_vegafusion as _using_vegafusion,
     compile_with_vegafusion as _compile_with_vegafusion,
 )
-from ...utils.core import DataFrameLike
+from ...utils.core import DataFrameLike, _is_pandas_dataframe
 from ...utils.data import DataType
 
 if sys.version_info >= (3, 11):
@@ -106,15 +105,15 @@ def _prepare_data(data, context=None):
     if data is Undefined:
         return data
 
-    # convert dataframes  or objects with __geo_interface__ to dict
-    elif isinstance(data, pd.DataFrame) or hasattr(data, "__geo_interface__"):
+    # convert dataframes or objects with __geo_interface__ to dict
+    elif isinstance(data, DataFrameLike) or hasattr(data, "__geo_interface__"):
         data = _pipe(data, data_transformers.get())
 
     # convert string input to a URLData
     elif isinstance(data, str):
         data = core.UrlData(data)
 
-    elif isinstance(data, DataFrameLike):
+    elif _is_pandas_dataframe(data):
         data = _pipe(data, data_transformers.get())
 
     # consolidate inline data to top-level datasets

diff --git a/altair/vegalite/v5/schema/channels.py b/altair/vegalite/v5/schema/channels.py
@@ -11,7 +11,7 @@
 
 import sys
 from . import core
-import pandas as pd
+from altair.utils.core import DataFrameLike
 from altair.utils.schemapi import Undefined, UndefinedType, with_property_setters
 from altair.utils import parse_shorthand
 from typing import Any, overload, Sequence, List, Literal, Union, Optional
@@ -60,7 +60,7 @@ def to_dict(
                 # We still parse it out of the shorthand, but drop it here.
                 parsed.pop("type", None)
             elif not (type_in_shorthand or type_defined_explicitly):
-                if isinstance(context.get("data", None), pd.DataFrame):
+                if isinstance(context.get("data", None), DataFrameLike):
                     raise ValueError(
                         'Unable to determine data type for the field "{}";'
                         " verify that the field name is not misspelled."

diff --git a/tests/utils/test_core.py b/tests/utils/test_core.py
@@ -4,11 +4,11 @@
 
 import numpy as np
 import pandas as pd
+from pandas.api.types import infer_dtype
 import pytest
 
 import altair as alt
 from altair.utils.core import parse_shorthand, update_nested, infer_encoding_types
-from altair.utils.core import infer_dtype
 
 json_schema_specification = alt.load_schema()["$schema"]
 json_schema_dict_str = f'{{"$schema": "{json_schema_specification}"}}'