Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: refactor version checks for pandas library #1711

Merged
merged 8 commits into from
Nov 7, 2023
1 change: 1 addition & 0 deletions google/cloud/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@
# Custom exceptions
"LegacyBigQueryStorageError",
"LegacyPyarrowError",
"LegacyPandasError",
]


Expand Down
63 changes: 63 additions & 0 deletions google/cloud/bigquery/_versions_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
_MIN_PANDAS_VERSION = packaging.version.Version("1.1.0")


class PyarrowVersions:
Expand Down Expand Up @@ -171,3 +172,65 @@ def try_import(self, raise_if_error: bool = False) -> Any:


BQ_STORAGE_VERSIONS = BQStorageVersions()


class PandasVersions:
"""Version comparisons for pandas package."""

def __init__(self):
self._installed_version = None

@property
def installed_version(self) -> packaging.version.Version:
"""Return the parsed version of pandas"""
if self._installed_version is None:
import pandas # type: ignore

self._installed_version = packaging.version.parse(
# Use 0.0.0, since it is earlier than any released version.
# Legacy versions also have the same property, but
# creating a LegacyVersion has been deprecated.
# https://github.com/pypa/packaging/issues/321
getattr(pandas, "__version__", "0.0.0")
)

return self._installed_version

def try_import(self, raise_if_error: bool = False) -> Any:
"""Verify that a recent enough version of pandas extra is installed.
The function assumes that pandas extra is installed, and should thus
be used in places where this assumption holds.
Because `pip` can install an outdated version of this extra despite
the constraints in `setup.py`, the calling code can use this helper
to verify the version compatibility at runtime.
Returns:
The ``pandas`` module or ``None``.
Raises:
exceptions.LegacyPandasError:
If the pandas package is outdated and ``raise_if_error`` is
``True``.
"""
try:
import pandas
except ImportError as exc: # pragma: NO COVER
if raise_if_error:
raise exceptions.LegacyPandasError(
"pandas package not found. Install pandas version >="
f" {_MIN_PANDAS_VERSION}"
) from exc
return None

if self.installed_version < _MIN_PANDAS_VERSION:
if raise_if_error:
msg = (
"Dependency pyarrow is outdated, please upgrade"
f" it to version >= {_MIN_PANDAS_VERSION}"
f" (version found: {self.installed_version})."
)
raise exceptions.LegacyPandasError(msg)
return None

return pandas


PANDAS_VERSIONS = PandasVersions()
6 changes: 4 additions & 2 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@
from google.cloud.bigquery.table import RowIterator

pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()
pandas = (
_versions_helpers.PANDAS_VERSIONS.try_import()
) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this

TimeoutType = Union[float, None]
ResumableTimeoutType = Union[
Expand All @@ -124,7 +127,6 @@
if typing.TYPE_CHECKING: # pragma: NO COVER
# os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition.
PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]]
import pandas # type: ignore
import requests # required by api-core

_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB
Expand Down Expand Up @@ -2488,7 +2490,7 @@ def load_table_from_file(

def load_table_from_dataframe(
self,
dataframe: "pandas.DataFrame",
dataframe: "pandas.DataFrame", # type: ignore
destination: Union[Table, TableReference, str],
num_retries: int = _DEFAULT_NUM_RETRIES,
job_id: Optional[str] = None,
Expand Down
4 changes: 4 additions & 0 deletions google/cloud/bigquery/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,7 @@ class BigQueryStorageNotFoundError(BigQueryError):
"""Raised when BigQuery Storage extra is not installed when trying to
import it.
"""


class LegacyPandasError(BigQueryError):
"""Raised when too old a version of pandas package is detected at runtime."""
50 changes: 50 additions & 0 deletions tests/unit/test__versions_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
except ImportError: # pragma: NO COVER
bigquery_storage = None

try:
import pandas # type: ignore
except ImportError: # pragma: NO COVER
pandas = None

from google.cloud.bigquery import _versions_helpers
from google.cloud.bigquery import exceptions

Expand Down Expand Up @@ -173,3 +178,48 @@ def test_bqstorage_is_read_session_optional_false():
bqstorage_versions = _versions_helpers.BQStorageVersions()
with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"):
assert not bqstorage_versions.is_read_session_optional


@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
def test_try_import_raises_no_error_w_recent_pandas():
versions = _versions_helpers.PandasVersions()
with mock.patch("pandas.__version__", new="1.5.0"):
try:
pandas = versions.try_import(raise_if_error=True)
assert pandas is not None
except exceptions.LegacyPandasError: # pragma: NO COVER
raise ("Legacy error raised with a non-legacy dependency version.")


@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
def test_try_import_returns_none_w_legacy_pandas():
versions = _versions_helpers.PandasVersions()
with mock.patch("pandas.__version__", new="1.0.0"):
pandas = versions.try_import()
assert pandas is None


@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
def test_try_import_raises_error_w_legacy_pandas():
versions = _versions_helpers.PandasVersions()
with mock.patch("pandas.__version__", new="1.0.0"):
with pytest.raises(exceptions.LegacyPandasError):
versions.try_import(raise_if_error=True)


@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
def test_installed_pandas_version_returns_cached():
versions = _versions_helpers.PandasVersions()
versions._installed_version = object()
assert versions.installed_version is versions._installed_version


@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
def test_installed_pandas_version_returns_parsed_version():
versions = _versions_helpers.PandasVersions()
with mock.patch("pandas.__version__", new="1.1.0"):
version = versions.installed_version

assert version.major == 1
assert version.minor == 1
assert version.micro == 0