Skip to content

Commit

Permalink
fix: throw exception for data type mismatch for load_table_from_dataf…
Browse files Browse the repository at this point in the history
…rame api
  • Loading branch information
Gaurang033 committed Mar 18, 2024
1 parent dc93241 commit 3a16f37
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
14 changes: 9 additions & 5 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,11 +302,15 @@ def bq_to_arrow_array(series, bq_field):

field_type_upper = bq_field.field_type.upper() if bq_field.field_type else ""

if bq_field.mode.upper() == "REPEATED":
return pyarrow.ListArray.from_pandas(series, type=arrow_type)
if field_type_upper in schema._STRUCT_TYPES:
return pyarrow.StructArray.from_pandas(series, type=arrow_type)
return pyarrow.Array.from_pandas(series, type=arrow_type)
try:
if bq_field.mode.upper() == "REPEATED":
return pyarrow.ListArray.from_pandas(series, type=arrow_type)
if field_type_upper in schema._STRUCT_TYPES:
return pyarrow.StructArray.from_pandas(series, type=arrow_type)

return pyarrow.Array.from_pandas(series, type=arrow_type)
except pyarrow.lib.ArrowInvalid as ae:
raise ValueError(f"{str(ae)} for column {bq_field.name}")


def get_column_or_index(dataframe, name):
Expand Down
24 changes: 24 additions & 0 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8963,6 +8963,30 @@ def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self):
SchemaField("x", "BIGNUMERIC", "NULLABLE", None),
)

@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
def test_load_table_from_dataframe_w_datatype_mismatch(self):
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery import job
import re

client = self._make_client()
dataframe = pandas.DataFrame({"x": [1, 2, "three"]})
schema = [SchemaField("x", "INTEGER")]
job_config = job.LoadJobConfig(schema=schema)

get_table_patch = mock.patch(
"google.cloud.bigquery.client.Client.get_table", autospec=True
)
with get_table_patch, pytest.raises(ValueError) as e:
client.load_table_from_dataframe(
dataframe, self.TABLE_REF, location=self.LOCATION, job_config=job_config
)
assert re.match(
r"Could not convert '?three'? with type str: tried to convert to int\d{0,2} for column x",
str(e.value),
)

# With autodetect specified, we pass the value as is. For more info, see
# https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
def test_load_table_from_json_basic_use(self):
Expand Down

0 comments on commit 3a16f37

Please sign in to comment.