diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 40e3a1578..72576e608 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -202,6 +202,7 @@ # Custom exceptions "LegacyBigQueryStorageError", "LegacyPyarrowError", + "LegacyPandasError", ] diff --git a/google/cloud/bigquery/_versions_helpers.py b/google/cloud/bigquery/_versions_helpers.py index ce529b76e..4ff4b9700 100644 --- a/google/cloud/bigquery/_versions_helpers.py +++ b/google/cloud/bigquery/_versions_helpers.py @@ -24,6 +24,7 @@ _MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") +_MIN_PANDAS_VERSION = packaging.version.Version("1.1.0") class PyarrowVersions: @@ -171,3 +172,65 @@ def try_import(self, raise_if_error: bool = False) -> Any: BQ_STORAGE_VERSIONS = BQStorageVersions() + + +class PandasVersions: + """Version comparisons for pandas package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of pandas""" + if self._installed_version is None: + import pandas # type: ignore + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(pandas, "__version__", "0.0.0") + ) + + return self._installed_version + + def try_import(self, raise_if_error: bool = False) -> Any: + """Verify that a recent enough version of pandas extra is installed. + The function assumes that pandas extra is installed, and should thus + be used in places where this assumption holds. + Because `pip` can install an outdated version of this extra despite + the constraints in `setup.py`, the calling code can use this helper + to verify the version compatibility at runtime. + Returns: + The ``pandas`` module or ``None``. + Raises: + exceptions.LegacyPandasError: + If the pandas package is outdated and ``raise_if_error`` is + ``True``. + """ + try: + import pandas + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise exceptions.LegacyPandasError( + "pandas package not found. Install pandas version >=" + f" {_MIN_PANDAS_VERSION}" + ) from exc + return None + + if self.installed_version < _MIN_PANDAS_VERSION: + if raise_if_error: + msg = ( + "Dependency pandas is outdated, please upgrade" + f" it to version >= {_MIN_PANDAS_VERSION}" + f" (version found: {self.installed_version})." + ) + raise exceptions.LegacyPandasError(msg) + return None + + return pandas + + +PANDAS_VERSIONS = PandasVersions() diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 496015b21..d4a759ba4 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -115,6 +115,9 @@ from google.cloud.bigquery.table import RowIterator pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() +pandas = ( + _versions_helpers.PANDAS_VERSIONS.try_import() +) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this TimeoutType = Union[float, None] ResumableTimeoutType = Union[ @@ -124,7 +127,6 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] - import pandas # type: ignore import requests # required by api-core _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB @@ -2488,7 +2490,7 @@ def load_table_from_file( def load_table_from_dataframe( self, - dataframe: "pandas.DataFrame", + dataframe: "pandas.DataFrame", # type: ignore destination: Union[Table, TableReference, str], num_retries: int = _DEFAULT_NUM_RETRIES, job_id: Optional[str] = None, diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py index e94a6c832..62e0d540c 100644 --- a/google/cloud/bigquery/exceptions.py +++ b/google/cloud/bigquery/exceptions.py @@ -29,3 +29,7 @@ class BigQueryStorageNotFoundError(BigQueryError): """Raised when BigQuery Storage extra is not installed when trying to import it. """ + + +class LegacyPandasError(BigQueryError): + """Raised when too old a version of pandas package is detected at runtime.""" diff --git a/tests/unit/test__versions_helpers.py b/tests/unit/test__versions_helpers.py index 144f14b7c..afe170e7a 100644 --- a/tests/unit/test__versions_helpers.py +++ b/tests/unit/test__versions_helpers.py @@ -26,6 +26,11 @@ except ImportError: # pragma: NO COVER bigquery_storage = None +try: + import pandas # type: ignore +except ImportError: # pragma: NO COVER + pandas = None + from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions @@ -173,3 +178,49 @@ def test_bqstorage_is_read_session_optional_false(): bqstorage_versions = _versions_helpers.BQStorageVersions() with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): assert not bqstorage_versions.is_read_session_optional + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +@pytest.mark.parametrize("version", ["1.5.0", "2.0.0", "2.1.0"]) +def test_try_import_raises_no_error_w_recent_pandas(version): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new=version): + try: + pandas = versions.try_import(raise_if_error=True) + assert pandas is not None + except exceptions.LegacyPandasError: # pragma: NO COVER + raise ("Legacy error raised with a non-legacy dependency version.") + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_try_import_returns_none_w_legacy_pandas(): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new="1.0.0"): + pandas = versions.try_import() + assert pandas is None + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_try_import_raises_error_w_legacy_pandas(): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new="1.0.0"): + with pytest.raises(exceptions.LegacyPandasError): + versions.try_import(raise_if_error=True) + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_installed_pandas_version_returns_cached(): + versions = _versions_helpers.PandasVersions() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + +@pytest.mark.skipif(pandas is None, reason="pandas is not installed") +def test_installed_pandas_version_returns_parsed_version(): + versions = _versions_helpers.PandasVersions() + with mock.patch("pandas.__version__", new="1.1.0"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 1 + assert version.micro == 0