Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add options to ignore errors when creating/deleting datasets/tables. #7491

Merged
merged 1 commit into from
Mar 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 54 additions & 16 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from google.resumable_media.requests import MultipartUpload
from google.resumable_media.requests import ResumableUpload

import google.api_core.exceptions
from google.api_core import page_iterator
import google.cloud._helpers
from google.cloud import exceptions
Expand Down Expand Up @@ -297,7 +298,7 @@ def dataset(self, dataset_id, project=None):

return DatasetReference(project, dataset_id)

def create_dataset(self, dataset):
def create_dataset(self, dataset, exists_ok=False, retry=DEFAULT_RETRY):
"""API call: create the dataset via a POST request.

See
Expand All @@ -312,6 +313,11 @@ def create_dataset(self, dataset):
A :class:`~google.cloud.bigquery.dataset.Dataset` to create.
If ``dataset`` is a reference, an empty dataset is created
with the specified ID and client's default location.
exists_ok (bool):
Defaults to ``False``. If ``True``, ignore "already exists"
errors when creating the dataset.
retry (google.api_core.retry.Retry):
Optional. How to retry the RPC.

Returns:
google.cloud.bigquery.dataset.Dataset:
Expand All @@ -338,11 +344,15 @@ def create_dataset(self, dataset):
if data.get("location") is None and self.location is not None:
data["location"] = self.location

api_response = self._connection.api_request(method="POST", path=path, data=data)

return Dataset.from_api_repr(api_response)

def create_table(self, table):
try:
api_response = self._call_api(retry, method="POST", path=path, data=data)
return Dataset.from_api_repr(api_response)
except google.api_core.exceptions.Conflict:
if not exists_ok:
raise
return self.get_dataset(dataset.reference, retry=retry)

def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY):
"""API call: create a table via a PUT request

See
Expand All @@ -358,6 +368,11 @@ def create_table(self, table):
If ``table`` is a reference, an empty table is created
with the specified ID. The dataset that the table belongs to
must already exist.
exists_ok (bool):
Defaults to ``False``. If ``True``, ignore "already exists"
errors when creating the table.
retry (google.api_core.retry.Retry):
Optional. How to retry the RPC.

Returns:
google.cloud.bigquery.table.Table:
Expand All @@ -369,10 +384,14 @@ def create_table(self, table):
table = Table(table)

path = "/projects/%s/datasets/%s/tables" % (table.project, table.dataset_id)
api_response = self._connection.api_request(
method="POST", path=path, data=table.to_api_repr()
)
return Table.from_api_repr(api_response)
data = table.to_api_repr()
try:
api_response = self._call_api(retry, method="POST", path=path, data=data)
return Table.from_api_repr(api_response)
except google.api_core.exceptions.Conflict:
if not exists_ok:
raise
return self.get_table(table.reference, retry=retry)

def _call_api(self, retry, **kwargs):
call = functools.partial(self._connection.api_request, **kwargs)
Expand Down Expand Up @@ -563,7 +582,9 @@ def list_tables(
result.dataset = dataset
return result

def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY):
def delete_dataset(
self, dataset, delete_contents=False, retry=DEFAULT_RETRY, not_found_ok=False
):
"""Delete a dataset.

See
Expand All @@ -579,12 +600,15 @@ def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY):
in, this method attempts to create a dataset reference from a
string using
:func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.
retry (:class:`google.api_core.retry.Retry`):
(Optional) How to retry the RPC.
delete_contents (boolean):
(Optional) If True, delete all the tables in the dataset. If
False and the dataset contains tables, the request will fail.
Default is False.
retry (:class:`google.api_core.retry.Retry`):
(Optional) How to retry the RPC.
not_found_ok (bool):
Defaults to ``False``. If ``True``, ignore "not found" errors
when deleting the dataset.
"""
if isinstance(dataset, str):
dataset = DatasetReference.from_string(
Expand All @@ -598,9 +622,15 @@ def delete_dataset(self, dataset, delete_contents=False, retry=DEFAULT_RETRY):
if delete_contents:
params["deleteContents"] = "true"

self._call_api(retry, method="DELETE", path=dataset.path, query_params=params)
try:
self._call_api(
retry, method="DELETE", path=dataset.path, query_params=params
)
except google.api_core.exceptions.NotFound:
if not not_found_ok:
raise

def delete_table(self, table, retry=DEFAULT_RETRY):
def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False):
"""Delete a table

See
Expand All @@ -618,13 +648,21 @@ def delete_table(self, table, retry=DEFAULT_RETRY):
:func:`google.cloud.bigquery.table.TableReference.from_string`.
retry (:class:`google.api_core.retry.Retry`):
(Optional) How to retry the RPC.
not_found_ok (bool):
Defaults to ``False``. If ``True``, ignore "not found" errors
when deleting the table.
"""
if isinstance(table, str):
table = TableReference.from_string(table, default_project=self.project)

if not isinstance(table, (Table, TableReference)):
raise TypeError("table must be a Table or a TableReference")
self._call_api(retry, method="DELETE", path=table.path)

try:
self._call_api(retry, method="DELETE", path=table.path)
except google.api_core.exceptions.NotFound:
if not not_found_ok:
raise

def _get_query_results(
self, job_id, retry, project=None, timeout_ms=None, location=None
Expand Down
189 changes: 189 additions & 0 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
except (ImportError, AttributeError): # pragma: NO COVER
pyarrow = None

import google.api_core.exceptions
from google.cloud.bigquery.dataset import DatasetReference


Expand Down Expand Up @@ -804,6 +805,61 @@ def test_create_dataset_w_string(self):
},
)

def test_create_dataset_alreadyexists_w_exists_ok_false(self):
creds = _make_credentials()
client = self._make_one(
project=self.PROJECT, credentials=creds, location=self.LOCATION
)
client._connection = _make_connection(
google.api_core.exceptions.AlreadyExists("dataset already exists")
)

with pytest.raises(google.api_core.exceptions.AlreadyExists):
client.create_dataset(self.DS_ID)

def test_create_dataset_alreadyexists_w_exists_ok_true(self):
post_path = "/projects/{}/datasets".format(self.PROJECT)
get_path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID)
resource = {
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
"etag": "etag",
"id": "{}:{}".format(self.PROJECT, self.DS_ID),
"location": self.LOCATION,
}
creds = _make_credentials()
client = self._make_one(
project=self.PROJECT, credentials=creds, location=self.LOCATION
)
conn = client._connection = _make_connection(
google.api_core.exceptions.AlreadyExists("dataset already exists"), resource
)

dataset = client.create_dataset(self.DS_ID, exists_ok=True)

self.assertEqual(dataset.dataset_id, self.DS_ID)
self.assertEqual(dataset.project, self.PROJECT)
self.assertEqual(dataset.etag, resource["etag"])
self.assertEqual(dataset.full_dataset_id, resource["id"])
self.assertEqual(dataset.location, self.LOCATION)

conn.api_request.assert_has_calls(
[
mock.call(
method="POST",
path=post_path,
data={
"datasetReference": {
"projectId": self.PROJECT,
"datasetId": self.DS_ID,
},
"labels": {},
"location": self.LOCATION,
},
),
mock.call(method="GET", path=get_path),
]
)

def test_create_table_w_day_partition(self):
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.table import TimePartitioning
Expand Down Expand Up @@ -1177,6 +1233,79 @@ def test_create_table_w_string(self):
)
self.assertEqual(got.table_id, self.TABLE_ID)

def test_create_table_alreadyexists_w_exists_ok_false(self):
post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID)
creds = _make_credentials()
client = self._make_one(
project=self.PROJECT, credentials=creds, location=self.LOCATION
)
conn = client._connection = _make_connection(
google.api_core.exceptions.AlreadyExists("table already exists")
)

with pytest.raises(google.api_core.exceptions.AlreadyExists):
client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID))

conn.api_request.assert_called_once_with(
method="POST",
path=post_path,
data={
"tableReference": {
"projectId": self.PROJECT,
"datasetId": self.DS_ID,
"tableId": self.TABLE_ID,
},
"labels": {},
},
)

def test_create_table_alreadyexists_w_exists_ok_true(self):
post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID)
get_path = "/projects/{}/datasets/{}/tables/{}".format(
self.PROJECT, self.DS_ID, self.TABLE_ID
)
resource = {
"id": "%s:%s:%s" % (self.PROJECT, self.DS_ID, self.TABLE_ID),
"tableReference": {
"projectId": self.PROJECT,
"datasetId": self.DS_ID,
"tableId": self.TABLE_ID,
},
}
creds = _make_credentials()
client = self._make_one(
project=self.PROJECT, credentials=creds, location=self.LOCATION
)
conn = client._connection = _make_connection(
google.api_core.exceptions.AlreadyExists("table already exists"), resource
)

got = client.create_table(
"{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True
)

self.assertEqual(got.project, self.PROJECT)
self.assertEqual(got.dataset_id, self.DS_ID)
self.assertEqual(got.table_id, self.TABLE_ID)

conn.api_request.assert_has_calls(
[
mock.call(
method="POST",
path=post_path,
data={
"tableReference": {
"projectId": self.PROJECT,
"datasetId": self.DS_ID,
"tableId": self.TABLE_ID,
},
"labels": {},
},
),
mock.call(method="GET", path=get_path),
]
)

def test_get_table(self):
path = "projects/%s/datasets/%s/tables/%s" % (
self.PROJECT,
Expand Down Expand Up @@ -1804,6 +1933,33 @@ def test_delete_dataset_wrong_type(self):
with self.assertRaises(TypeError):
client.delete_dataset(client.dataset(self.DS_ID).table("foo"))

def test_delete_dataset_w_not_found_ok_false(self):
path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID)
creds = _make_credentials()
http = object()
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
conn = client._connection = _make_connection(
google.api_core.exceptions.NotFound("dataset not found")
)

with self.assertRaises(google.api_core.exceptions.NotFound):
client.delete_dataset(self.DS_ID)

conn.api_request.assert_called_with(method="DELETE", path=path, query_params={})

def test_delete_dataset_w_not_found_ok_true(self):
path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID)
creds = _make_credentials()
http = object()
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
conn = client._connection = _make_connection(
google.api_core.exceptions.NotFound("dataset not found")
)

client.delete_dataset(self.DS_ID, not_found_ok=True)

conn.api_request.assert_called_with(method="DELETE", path=path, query_params={})

def test_delete_table(self):
from google.cloud.bigquery.table import Table

Expand Down Expand Up @@ -1836,6 +1992,39 @@ def test_delete_table_w_wrong_type(self):
with self.assertRaises(TypeError):
client.delete_table(client.dataset(self.DS_ID))

def test_delete_table_w_not_found_ok_false(self):
path = "/projects/{}/datasets/{}/tables/{}".format(
self.PROJECT, self.DS_ID, self.TABLE_ID
)
creds = _make_credentials()
http = object()
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
conn = client._connection = _make_connection(
google.api_core.exceptions.NotFound("table not found")
)

with self.assertRaises(google.api_core.exceptions.NotFound):
client.delete_table("{}.{}".format(self.DS_ID, self.TABLE_ID))

conn.api_request.assert_called_with(method="DELETE", path=path)

def test_delete_table_w_not_found_ok_true(self):
path = "/projects/{}/datasets/{}/tables/{}".format(
self.PROJECT, self.DS_ID, self.TABLE_ID
)
creds = _make_credentials()
http = object()
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
conn = client._connection = _make_connection(
google.api_core.exceptions.NotFound("table not found")
)

client.delete_table(
"{}.{}".format(self.DS_ID, self.TABLE_ID), not_found_ok=True
)

conn.api_request.assert_called_with(method="DELETE", path=path)

def test_job_from_resource_unknown_type(self):
from google.cloud.bigquery.job import UnknownJob

Expand Down