Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(deps): raise exception when pandas is installed but db-dtypes is not #1191

Merged
merged 9 commits into from
Mar 30, 2022
33 changes: 26 additions & 7 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,25 @@

try:
import pandas # type: ignore
except ImportError: # pragma: NO COVER

pandas_import_exception = None
except ImportError as exc: # pragma: NO COVER
pandas = None
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype
pandas_import_exception = exc
else:
import numpy

from db_dtypes import DateDtype, TimeDtype # type: ignore
try:
import db_dtypes # type: ignore

date_dtype_name = db_dtypes.DateDtype.name
time_dtype_name = db_dtypes.TimeDtype.name
db_dtypes_import_exception = None
except ImportError as exc: # pragma: NO COVER
db_dtypes = None
db_dtypes_import_exception = exc
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype

date_dtype_name = DateDtype.name
time_dtype_name = TimeDtype.name

import pyarrow # type: ignore
import pyarrow.parquet # type: ignore
Expand Down Expand Up @@ -84,6 +93,9 @@ def _to_wkb(v):

_MAX_QUEUE_SIZE_DEFAULT = object() # max queue size sentinel for BQ Storage downloads

_NO_PANDAS_ERROR = "Please install the 'pandas' package to use this function."
_NO_DB_TYPES_ERROR = "Please install the 'db-dtypes' package to use this function."

_PANDAS_DTYPE_TO_BQ = {
"bool": "BOOLEAN",
"datetime64[ns, UTC]": "TIMESTAMP",
Expand Down Expand Up @@ -290,13 +302,13 @@ def types_mapper(arrow_data_type):
not date_as_object
and pyarrow.types.is_date(arrow_data_type)
):
return DateDtype()
return db_dtypes.DateDtype()

elif pyarrow.types.is_integer(arrow_data_type):
return pandas.Int64Dtype()

elif pyarrow.types.is_time(arrow_data_type):
return TimeDtype()
return db_dtypes.TimeDtype()

return types_mapper

Expand Down Expand Up @@ -970,3 +982,10 @@ def dataframe_to_json_generator(dataframe):
output[column] = value

yield output


def verify_pandas_imports():
if pandas is None:
raise ValueError(_NO_PANDAS_ERROR) from pandas_import_exception
if db_dtypes is None:
raise ValueError(_NO_DB_TYPES_ERROR) from db_dtypes_import_exception
20 changes: 6 additions & 14 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@
import pandas # type: ignore
except ImportError: # pragma: NO COVER
pandas = None
else:
import db_dtypes # type: ignore # noqa

import pyarrow # type: ignore

Expand Down Expand Up @@ -69,10 +67,6 @@
from google.cloud.bigquery.dataset import DatasetReference


_NO_PANDAS_ERROR = (
"The pandas library is not installed, please install "
"pandas to use the to_dataframe() function."
)
_NO_GEOPANDAS_ERROR = (
"The geopandas library is not installed, please install "
"geopandas to use the to_geodataframe() function."
Expand Down Expand Up @@ -1818,8 +1812,8 @@ def to_dataframe_iterable(
ValueError:
If the :mod:`pandas` library cannot be imported.
"""
if pandas is None:
raise ValueError(_NO_PANDAS_ERROR)
_pandas_helpers.verify_pandas_imports()

if dtypes is None:
dtypes = {}

Expand Down Expand Up @@ -1928,8 +1922,8 @@ def to_dataframe(
:mod:`shapely` library cannot be imported.

"""
if pandas is None:
raise ValueError(_NO_PANDAS_ERROR)
_pandas_helpers.verify_pandas_imports()

if geography_as_object and shapely is None:
raise ValueError(_NO_SHAPELY_ERROR)

Expand Down Expand Up @@ -2181,8 +2175,7 @@ def to_dataframe(
Returns:
pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
"""
if pandas is None:
raise ValueError(_NO_PANDAS_ERROR)
_pandas_helpers.verify_pandas_imports()
return pandas.DataFrame()

def to_geodataframe(
Expand Down Expand Up @@ -2238,8 +2231,7 @@ def to_dataframe_iterable(
ValueError:
If the :mod:`pandas` library cannot be imported.
"""
if pandas is None:
raise ValueError(_NO_PANDAS_ERROR)
_pandas_helpers.verify_pandas_imports()
return iter((pandas.DataFrame(),))

def to_arrow_iterable(
Expand Down
12 changes: 12 additions & 0 deletions tests/unit/test__pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1751,3 +1751,15 @@ def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata):
).metadata
== metadata
)


def test_verify_pandas_imports_no_pandas(module_under_test, monkeypatch):
monkeypatch.setattr(module_under_test, "pandas", None)
with pytest.raises(ValueError, match="Please install the 'pandas' package"):
module_under_test.verify_pandas_imports()


def test_verify_pandas_imports_no_db_dtypes(module_under_test, monkeypatch):
monkeypatch.setattr(module_under_test, "db_dtypes", None)
with pytest.raises(ValueError, match="Please install the 'db-dtypes' package"):
module_under_test.verify_pandas_imports()
8 changes: 4 additions & 4 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1836,7 +1836,7 @@ def test_to_arrow_iterable(self):
self.assertEqual(record_batch.num_rows, 0)
self.assertEqual(record_batch.num_columns, 0)

@mock.patch("google.cloud.bigquery.table.pandas", new=None)
@mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None)
def test_to_dataframe_error_if_pandas_is_none(self):
row_iterator = self._make_one()
with self.assertRaises(ValueError):
Expand All @@ -1849,7 +1849,7 @@ def test_to_dataframe(self):
self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 0) # verify the number of rows

@mock.patch("google.cloud.bigquery.table.pandas", new=None)
@mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None)
def test_to_dataframe_iterable_error_if_pandas_is_none(self):
row_iterator = self._make_one()
with self.assertRaises(ValueError):
Expand Down Expand Up @@ -2967,7 +2967,7 @@ def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self):
assert isinstance(dataframes[0], pandas.DataFrame)
assert isinstance(dataframes[1], pandas.DataFrame)

@mock.patch("google.cloud.bigquery.table.pandas", new=None)
@mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None)
def test_to_dataframe_iterable_error_if_pandas_is_none(self):
from google.cloud.bigquery.schema import SchemaField

Expand Down Expand Up @@ -3339,7 +3339,7 @@ def test_to_dataframe_datetime_objects(self):
self.assertEqual(df["ts"][0].date(), datetime.date(1336, 3, 23))
self.assertEqual(df["date"][0], datetime.date(1111, 1, 1))

@mock.patch("google.cloud.bigquery.table.pandas", new=None)
@mock.patch("google.cloud.bigquery._pandas_helpers.pandas", new=None)
def test_to_dataframe_error_if_pandas_is_none(self):
from google.cloud.bigquery.schema import SchemaField

Expand Down