From 06c357d19802b943637560cb724fbcba859f07c2 Mon Sep 17 00:00:00 2001 From: Gaurang Shah Date: Wed, 21 Feb 2024 12:55:26 -0500 Subject: [PATCH] fix: throw exception for data type mismatch for load_table_from_dataframe api --- google/cloud/bigquery/_pandas_helpers.py | 14 +++++++++----- tests/unit/test_client.py | 24 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index e97dda7e57..ea8ab4256d 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -302,11 +302,15 @@ def bq_to_arrow_array(series, bq_field): field_type_upper = bq_field.field_type.upper() if bq_field.field_type else "" - if bq_field.mode.upper() == "REPEATED": - return pyarrow.ListArray.from_pandas(series, type=arrow_type) - if field_type_upper in schema._STRUCT_TYPES: - return pyarrow.StructArray.from_pandas(series, type=arrow_type) - return pyarrow.Array.from_pandas(series, type=arrow_type) + try: + if bq_field.mode.upper() == "REPEATED": + return pyarrow.ListArray.from_pandas(series, type=arrow_type) + if field_type_upper in schema._STRUCT_TYPES: + return pyarrow.StructArray.from_pandas(series, type=arrow_type) + + return pyarrow.Array.from_pandas(series, type=arrow_type) + except pyarrow.lib.ArrowInvalid as ae: + raise ValueError(f"{str(ae)} for column {bq_field.name}") def get_column_or_index(dataframe, name): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index d20712a8a5..1c34d42455 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8963,6 +8963,30 @@ def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self): SchemaField("x", "BIGNUMERIC", "NULLABLE", None), ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_datatype_mismatch(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery import job + + client = self._make_client() + dataframe = pandas.DataFrame({"x": [1, 2, "three"]}) + schema = [SchemaField("x", "INTEGER")] + job_config = job.LoadJobConfig(schema=schema) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", autospec=True + ) + with get_table_patch, pytest.raises(ValueError) as e: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION, job_config=job_config + ) + + assert ( + str(e.value) + == "Could not convert 'three' with type str: tried to convert to int64 for column x" + ) + # With autodetect specified, we pass the value as is. For more info, see # https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 def test_load_table_from_json_basic_use(self):