diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dfda1470413b7..a939c7e33bb17 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1466,7 +1466,7 @@ def to_gbq( Behavior when the destination table exists. Value can be one of: ``'fail'`` - If table exists, do nothing. + If table exists raise pandas_gbq.gbq.TableCreationError. ``'replace'`` If table exists, drop it, recreate it, and insert data. ``'append'`` diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index b6309860d06f7..48c8923dab7cd 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -1,6 +1,9 @@ +from contextlib import ExitStack as does_not_raise from datetime import datetime import os import platform +import random +import string import numpy as np import pytest @@ -18,11 +21,6 @@ PRIVATE_KEY_JSON_PATH = None PRIVATE_KEY_JSON_CONTENTS = None -DATASET_ID = "pydata_pandas_bq_testing_py3" - -TABLE_ID = "new_test" -DESTINATION_TABLE = "{0}.{1}".format(DATASET_ID + "1", TABLE_ID) - VERSION = platform.python_version() @@ -149,34 +147,33 @@ def mock_read_gbq(sql, **kwargs): @pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath: - @classmethod - def setup_class(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - + @pytest.fixture() + def gbq_dataset(self): + # Setup Dataset _skip_if_no_project_id() _skip_if_no_private_key_path() - cls.client = _get_client() - cls.dataset = cls.client.dataset(DATASET_ID + "1") + dataset_id = "pydata_pandas_bq_testing_py31" + + self.client = _get_client() + self.dataset = self.client.dataset(dataset_id) try: # Clean-up previous test runs. - cls.client.delete_dataset(cls.dataset, delete_contents=True) + self.client.delete_dataset(self.dataset, delete_contents=True) except api_exceptions.NotFound: pass # It's OK if the dataset doesn't already exist. - cls.client.create_dataset(bigquery.Dataset(cls.dataset)) + self.client.create_dataset(bigquery.Dataset(self.dataset)) + + table_name = "".join(random.choices(string.ascii_lowercase, k=10)) + destination_table = f"{dataset_id}.{table_name}" + yield destination_table - @classmethod - def teardown_class(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - cls.client.delete_dataset(cls.dataset, delete_contents=True) + # Teardown Dataset + self.client.delete_dataset(self.dataset, delete_contents=True) - def test_roundtrip(self): - destination_table = DESTINATION_TABLE + "1" + def test_roundtrip(self, gbq_dataset): + destination_table = gbq_dataset test_size = 20001 df = make_mixed_dataframe_v2(test_size) @@ -189,21 +186,26 @@ def test_roundtrip(self): ) result = pd.read_gbq( - "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), + f"SELECT COUNT(*) AS num_rows FROM {destination_table}", project_id=_get_project_id(), credentials=_get_credentials(), dialect="standard", ) assert result["num_rows"][0] == test_size - @pytest.mark.xfail(reason="Test breaking master", strict=False) @pytest.mark.parametrize( - "if_exists, expected_num_rows", - [("append", 300), ("fail", 200), ("replace", 100)], + "if_exists, expected_num_rows, expectation", + [ + ("append", 300, does_not_raise()), + ("fail", 200, pytest.raises(pandas_gbq.gbq.TableCreationError)), + ("replace", 100, does_not_raise()), + ], ) - def test_gbq_if_exists(self, if_exists, expected_num_rows): + def test_gbq_if_exists( + self, if_exists, expected_num_rows, expectation, gbq_dataset + ): # GH 29598 - destination_table = DESTINATION_TABLE + "2" + destination_table = gbq_dataset test_size = 200 df = make_mixed_dataframe_v2(test_size) @@ -215,13 +217,14 @@ def test_gbq_if_exists(self, if_exists, expected_num_rows): credentials=_get_credentials(), ) - df.iloc[:100].to_gbq( - destination_table, - _get_project_id(), - if_exists=if_exists, - chunksize=None, - credentials=_get_credentials(), - ) + with expectation: + df.iloc[:100].to_gbq( + destination_table, + _get_project_id(), + if_exists=if_exists, + chunksize=None, + credentials=_get_credentials(), + ) result = pd.read_gbq( f"SELECT COUNT(*) AS num_rows FROM {destination_table}",