From 3e124a28c95a7ba6cbaba0495799eafbe6ff490b Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 02:31:02 +0000 Subject: [PATCH 1/7] Fixtures for GBQ Tests --- pandas/tests/io/test_gbq.py | 62 ++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index f040dc2d0a70a..4a92986adc5e4 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -1,8 +1,10 @@ +from contextlib import ExitStack as does_not_raise from datetime import datetime import os import platform import numpy as np +from pandas_gbq.gbq import TableCreationError import pytest import pytz @@ -21,7 +23,7 @@ DATASET_ID = "pydata_pandas_bq_testing_py3" TABLE_ID = "new_test" -DESTINATION_TABLE = "{0}.{1}".format(DATASET_ID + "1", TABLE_ID) +DESTINATION_TABLE = f"{DATASET_ID + '1'}.{TABLE_ID}" VERSION = platform.python_version() @@ -149,33 +151,28 @@ def mock_read_gbq(sql, **kwargs): @pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath: - @classmethod - def setup_class(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *BEFORE* - # executing *ALL* tests described below. - + @pytest.fixture() + def gbq_dataset(self): + # Setup Dataset _skip_if_no_project_id() _skip_if_no_private_key_path() - cls.client = _get_client() - cls.dataset = cls.client.dataset(DATASET_ID + "1") + self.client = _get_client() + self.dataset = self.client.dataset(DATASET_ID + "1") try: # Clean-up previous test runs. - cls.client.delete_dataset(cls.dataset, delete_contents=True) + self.client.delete_dataset(self.dataset, delete_contents=True) except api_exceptions.NotFound: pass # It's OK if the dataset doesn't already exist. - cls.client.create_dataset(bigquery.Dataset(cls.dataset)) + self.client.create_dataset(bigquery.Dataset(self.dataset)) + + yield - @classmethod - def teardown_class(cls): - # - GLOBAL CLASS FIXTURES - - # put here any instruction you want to execute only *ONCE* *AFTER* - # executing all tests. - cls.client.delete_dataset(cls.dataset, delete_contents=True) + # Teardown Dataset + self.client.delete_dataset(self.dataset, delete_contents=True) - def test_roundtrip(self): + def test_roundtrip(self, gbq_dataset): destination_table = DESTINATION_TABLE + "1" test_size = 20001 @@ -189,31 +186,38 @@ def test_roundtrip(self): ) result = pd.read_gbq( - "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table), + f"SELECT COUNT(*) AS num_rows FROM {destination_table}", project_id=_get_project_id(), credentials=_get_credentials(), dialect="standard", ) assert result["num_rows"][0] == test_size - @pytest.mark.xfail(reason="Test breaking master") + @pytest.mark.xfail(reason="Test breaking master", strict=False) @pytest.mark.parametrize( - "if_exists, expected_num_rows", - [("append", 300), ("fail", 200), ("replace", 100)], + "if_exists, expected_num_rows, expectation", + [ + ("append", 300, does_not_raise()), + ("fail", 200, pytest.raises(TableCreationError)), + ("replace", 100, does_not_raise()), + ], ) - def test_gbq_if_exists(self, if_exists, expected_num_rows): + def test_gbq_if_exists( + self, if_exists, expected_num_rows, expectation, gbq_dataset + ): # GH 29598 destination_table = DESTINATION_TABLE + "2" test_size = 200 df = make_mixed_dataframe_v2(test_size) - df.to_gbq( - destination_table, - _get_project_id(), - chunksize=None, - credentials=_get_credentials(), - ) + with expectation: + df.to_gbq( + destination_table, + _get_project_id(), + chunksize=None, + credentials=_get_credentials(), + ) df.iloc[:100].to_gbq( destination_table, From 51f51718e7ca6303f07ceeb31bd94e722acc5785 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 02:35:21 +0000 Subject: [PATCH 2/7] Fix import --- pandas/tests/io/test_gbq.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 4a92986adc5e4..5f7df121afc24 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -4,7 +4,6 @@ import platform import numpy as np -from pandas_gbq.gbq import TableCreationError import pytest import pytz @@ -198,7 +197,7 @@ def test_roundtrip(self, gbq_dataset): "if_exists, expected_num_rows, expectation", [ ("append", 300, does_not_raise()), - ("fail", 200, pytest.raises(TableCreationError)), + ("fail", 200, pytest.raises(pandas_gbq.gbq.TableCreationError)), ("replace", 100, does_not_raise()), ], ) From e69f9a1e3ef687f118d928d033891f413befc4aa Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 16:16:30 +0000 Subject: [PATCH 3/7] Update as per comments --- pandas/tests/io/test_gbq.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 5f7df121afc24..83eead64562da 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -2,6 +2,7 @@ from datetime import datetime import os import platform +import uuid import numpy as np import pytest @@ -19,11 +20,6 @@ PRIVATE_KEY_JSON_PATH = None PRIVATE_KEY_JSON_CONTENTS = None -DATASET_ID = "pydata_pandas_bq_testing_py3" - -TABLE_ID = "new_test" -DESTINATION_TABLE = f"{DATASET_ID + '1'}.{TABLE_ID}" - VERSION = platform.python_version() @@ -156,8 +152,10 @@ def gbq_dataset(self): _skip_if_no_project_id() _skip_if_no_private_key_path() + dataset_id = "pydata_pandas_bq_testing_py31" + self.client = _get_client() - self.dataset = self.client.dataset(DATASET_ID + "1") + self.dataset = self.client.dataset(dataset_id) try: # Clean-up previous test runs. self.client.delete_dataset(self.dataset, delete_contents=True) @@ -166,13 +164,15 @@ def gbq_dataset(self): self.client.create_dataset(bigquery.Dataset(self.dataset)) - yield + table_id = str(uuid.uuid1()) + destination_table = f"{dataset_id}.{table_id}" + yield destination_table # Teardown Dataset self.client.delete_dataset(self.dataset, delete_contents=True) def test_roundtrip(self, gbq_dataset): - destination_table = DESTINATION_TABLE + "1" + destination_table = gbq_dataset test_size = 20001 df = make_mixed_dataframe_v2(test_size) @@ -205,7 +205,7 @@ def test_gbq_if_exists( self, if_exists, expected_num_rows, expectation, gbq_dataset ): # GH 29598 - destination_table = DESTINATION_TABLE + "2" + destination_table = gbq_dataset test_size = 200 df = make_mixed_dataframe_v2(test_size) From d266ca0ce57a0934d74e0606c2c786250a2e823f Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 16:31:23 +0000 Subject: [PATCH 4/7] Update doc --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dfda1470413b7..a939c7e33bb17 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1466,7 +1466,7 @@ def to_gbq( Behavior when the destination table exists. Value can be one of: ``'fail'`` - If table exists, do nothing. + If table exists raise pandas_gbq.gbq.TableCreationError. ``'replace'`` If table exists, drop it, recreate it, and insert data. ``'append'`` From add8e88e311fd4c2fc1c965a7c532da19b932ab7 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 16:34:49 +0000 Subject: [PATCH 5/7] Remove xfail --- pandas/tests/io/test_gbq.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 83eead64562da..4bc4b7a0b1f2e 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -192,7 +192,6 @@ def test_roundtrip(self, gbq_dataset): ) assert result["num_rows"][0] == test_size - @pytest.mark.xfail(reason="Test breaking master", strict=False) @pytest.mark.parametrize( "if_exists, expected_num_rows, expectation", [ From 59a9dcde75e7a21c955ecc3518b81d06eb8b7f2c Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 20:18:46 +0000 Subject: [PATCH 6/7] Table name must be alphanumeric --- pandas/tests/io/test_gbq.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 4bc4b7a0b1f2e..5a0cb49535d51 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -2,7 +2,8 @@ from datetime import datetime import os import platform -import uuid +import random +import string import numpy as np import pytest @@ -164,8 +165,8 @@ def gbq_dataset(self): self.client.create_dataset(bigquery.Dataset(self.dataset)) - table_id = str(uuid.uuid1()) - destination_table = f"{dataset_id}.{table_id}" + table_name = "".join(random.choices(string.ascii_lowercase, k=10)) + destination_table = f"{dataset_id}.{table_name}" yield destination_table # Teardown Dataset From f4ecffbc855d62cb988422e0cce3138228e27842 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Thu, 26 Dec 2019 22:58:11 +0000 Subject: [PATCH 7/7] Put expectation on correct to_gbq call --- pandas/tests/io/test_gbq.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 5a0cb49535d51..48c8923dab7cd 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -210,22 +210,22 @@ def test_gbq_if_exists( test_size = 200 df = make_mixed_dataframe_v2(test_size) + df.to_gbq( + destination_table, + _get_project_id(), + chunksize=None, + credentials=_get_credentials(), + ) + with expectation: - df.to_gbq( + df.iloc[:100].to_gbq( destination_table, _get_project_id(), + if_exists=if_exists, chunksize=None, credentials=_get_credentials(), ) - df.iloc[:100].to_gbq( - destination_table, - _get_project_id(), - if_exists=if_exists, - chunksize=None, - credentials=_get_credentials(), - ) - result = pd.read_gbq( f"SELECT COUNT(*) AS num_rows FROM {destination_table}", project_id=_get_project_id(),