Skip to content

Commit

Permalink
BigQuery: Use TableListItem for table listing. (#4427)
Browse files Browse the repository at this point in the history
* BigQuery: Use TableListItem for table listing.

The table list response only includes a subset of all table properties.
This commit adds a new type to document explicitly which properties are
included, but also make it clear that this object should not be used in
place of a full Table object.

* Get bigquery.client imports closer to accepted style.

* Share property code between table and table list item for view_use_legacy_sql

* Clarify TableListItem docs.

* Fix link syntax.

* Shrink property links.
  • Loading branch information
tswast authored Nov 20, 2017
1 parent 4402e40 commit f308bd4
Show file tree
Hide file tree
Showing 4 changed files with 248 additions and 34 deletions.
29 changes: 16 additions & 13 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,28 @@
from google.api_core import page_iterator
from google.api_core.exceptions import GoogleAPICallError
from google.api_core.exceptions import NotFound

from google.cloud import exceptions
from google.cloud.client import ClientWithProject

from google.cloud.bigquery._helpers import DEFAULT_RETRY
from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW
from google.cloud.bigquery._helpers import _field_to_index_mapping
from google.cloud.bigquery._helpers import _item_to_row
from google.cloud.bigquery._helpers import _rows_page_start
from google.cloud.bigquery._helpers import _snake_to_camel_case
from google.cloud.bigquery._http import Connection
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.table import _row_from_mapping
from google.cloud.bigquery.job import CopyJob
from google.cloud.bigquery.job import ExtractJob
from google.cloud.bigquery.job import LoadJob
from google.cloud.bigquery.job import QueryJob, QueryJobConfig
from google.cloud.bigquery.query import QueryResults
from google.cloud.bigquery._helpers import _item_to_row
from google.cloud.bigquery._helpers import _rows_page_start
from google.cloud.bigquery._helpers import _field_to_index_mapping
from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW
from google.cloud.bigquery._helpers import DEFAULT_RETRY
from google.cloud.bigquery._helpers import _snake_to_camel_case
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.table import TableListItem
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.table import _TABLE_HAS_NO_SCHEMA
from google.cloud.bigquery.table import _row_from_mapping


_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB
Expand Down Expand Up @@ -405,8 +407,9 @@ def list_dataset_tables(self, dataset, max_results=None, page_token=None,
:param retry: (Optional) How to retry the RPC.
:rtype: :class:`~google.api_core.page_iterator.Iterator`
:returns: Iterator of :class:`~google.cloud.bigquery.table.Table`
contained within the current dataset.
:returns:
Iterator of :class:`~google.cloud.bigquery.table.TableListItem`
contained within the current dataset.
"""
if not isinstance(dataset, (Dataset, DatasetReference)):
raise TypeError('dataset must be a Dataset or a DatasetReference')
Expand Down Expand Up @@ -1367,7 +1370,7 @@ def _item_to_table(iterator, resource):
:rtype: :class:`~google.cloud.bigquery.table.Table`
:returns: The next table in the page.
"""
return Table.from_api_repr(resource)
return TableListItem(resource)


def _make_job_id(job_id, prefix=None):
Expand Down
164 changes: 147 additions & 17 deletions bigquery/google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,25 @@
_MARKER = object()


def _view_use_legacy_sql_getter(table):
"""Specifies whether to execute the view with Legacy or Standard SQL.
If this table is not a view, None is returned.
Returns:
bool: True if the view is using legacy SQL, or None if not a view
"""
view = table._properties.get('view')
if view is not None:
# The server-side default for useLegacySql is True.
return view.get('useLegacySql', True)
# In some cases, such as in a table list no view object is present, but the
# resource still represents a view. Use the type as a fallback.
if table.table_type == 'VIEW':
# The server-side default for useLegacySql is True.
return True


class TableReference(object):
"""TableReferences are pointers to tables.
Expand Down Expand Up @@ -531,23 +550,7 @@ def view_query(self):
"""Delete SQL query defining the table as a view."""
self._properties.pop('view', None)

@property
def view_use_legacy_sql(self):
"""Specifies whether to execute the view with Legacy or Standard SQL.
The default is False for views (use Standard SQL).
If this table is not a view, None is returned.
:rtype: bool or ``NoneType``
:returns: The boolean for view.useLegacySql, or None if not a view.
"""
view = self._properties.get('view')
if view is not None:
# useLegacySql is never missing from the view dict if this table
# was created client-side, because the view_query setter populates
# it. So a missing or None can only come from the server, whose
# default is True.
return view.get('useLegacySql', True)
view_use_legacy_sql = property(_view_use_legacy_sql_getter)

@view_use_legacy_sql.setter
def view_use_legacy_sql(self, value):
Expand Down Expand Up @@ -713,6 +716,133 @@ def _build_resource(self, filter_fields):
return resource


class TableListItem(object):
"""A read-only table resource from a list operation.
For performance reasons, the BigQuery API only includes some of the table
properties when listing tables. Notably,
:attr:`~google.cloud.bigquery.table.Table.schema` and
:attr:`~google.cloud.bigquery.table.Table.num_rows` are missing.
For a full list of the properties that the BigQuery API returns, see the
`REST documentation for tables.list
<https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list>`_.
Args:
resource (dict):
A table-like resource object from a table list response.
"""

def __init__(self, resource):
self._properties = resource

@property
def project(self):
"""The project ID of the project this table belongs to.
Returns:
str: the project ID of the table.
"""
return self._properties.get('tableReference', {}).get('projectId')

@property
def dataset_id(self):
"""The dataset ID of the dataset this table belongs to.
Returns:
str: the dataset ID of the table.
"""
return self._properties.get('tableReference', {}).get('datasetId')

@property
def table_id(self):
"""The table ID.
Returns:
str: the table ID.
"""
return self._properties.get('tableReference', {}).get('tableId')

@property
def reference(self):
"""A :class:`~google.cloud.bigquery.table.TableReference` pointing to
this table.
Returns:
google.cloud.bigquery.table.TableReference: pointer to this table
"""
from google.cloud.bigquery import dataset

dataset_ref = dataset.DatasetReference(self.project, self.dataset_id)
return TableReference(dataset_ref, self.table_id)

@property
def labels(self):
"""Labels for the table.
This method always returns a dict. To change a table's labels,
modify the dict, then call ``Client.update_table``. To delete a
label, set its value to ``None`` before updating.
Returns:
Map[str, str]: A dictionary of the the table's labels
"""
return self._properties.get('labels', {})

@property
def full_table_id(self):
"""ID for the table, in the form ``project_id:dataset_id:table_id``.
Returns:
str: The fully-qualified ID of the table
"""
return self._properties.get('id')

@property
def table_type(self):
"""The type of the table.
Possible values are "TABLE", "VIEW", or "EXTERNAL".
Returns:
str: The kind of table
"""
return self._properties.get('type')

@property
def partitioning_type(self):
"""Time partitioning of the table.
Returns:
str:
Type of partitioning if the table is partitioned, None
otherwise.
"""
return self._properties.get('timePartitioning', {}).get('type')

@property
def partition_expiration(self):
"""Expiration time in ms for a partition
Returns:
int: The time in ms for partition expiration
"""
return int(
self._properties.get('timePartitioning', {}).get('expirationMs'))

@property
def friendly_name(self):
"""Title of the table.
Returns:
str: The name as set by the user, or None (the default)
"""
return self._properties.get('friendlyName')

view_use_legacy_sql = property(_view_use_legacy_sql_getter)


def _row_from_mapping(mapping, schema):
"""Convert a mapping to a row tuple using the schema.
Expand Down
8 changes: 4 additions & 4 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,7 @@ def test_list_dataset_tables_empty(self):
self.assertEqual(req['path'], '/%s' % PATH)

def test_list_dataset_tables_defaults(self):
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.table import TableListItem

TABLE_1 = 'table_one'
TABLE_2 = 'table_two'
Expand Down Expand Up @@ -1039,7 +1039,7 @@ def test_list_dataset_tables_defaults(self):

self.assertEqual(len(tables), len(DATA['tables']))
for found, expected in zip(tables, DATA['tables']):
self.assertIsInstance(found, Table)
self.assertIsInstance(found, TableListItem)
self.assertEqual(found.full_table_id, expected['id'])
self.assertEqual(found.table_type, expected['type'])
self.assertEqual(token, TOKEN)
Expand All @@ -1050,7 +1050,7 @@ def test_list_dataset_tables_defaults(self):
self.assertEqual(req['path'], '/%s' % PATH)

def test_list_dataset_tables_explicit(self):
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.table import TableListItem

TABLE_1 = 'table_one'
TABLE_2 = 'table_two'
Expand Down Expand Up @@ -1087,7 +1087,7 @@ def test_list_dataset_tables_explicit(self):

self.assertEqual(len(tables), len(DATA['tables']))
for found, expected in zip(tables, DATA['tables']):
self.assertIsInstance(found, Table)
self.assertIsInstance(found, TableListItem)
self.assertEqual(found.full_table_id, expected['id'])
self.assertEqual(found.table_type, expected['type'])
self.assertIsNone(token)
Expand Down
81 changes: 81 additions & 0 deletions bigquery/tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,87 @@ def test__row_from_mapping_w_schema(self):
('Phred Phlyntstone', 32, ['red', 'green'], None))


class TestTableListItem(unittest.TestCase):

@staticmethod
def _get_target_class():
from google.cloud.bigquery.table import TableListItem

return TableListItem

def _make_one(self, *args, **kw):
return self._get_target_class()(*args, **kw)

def test_ctor(self):
project = 'test-project'
dataset_id = 'test_dataset'
table_id = 'coffee_table'
resource = {
'kind': 'bigquery#table',
'id': '{}:{}:{}'.format(project, dataset_id, table_id),
'tableReference': {
'projectId': project,
'datasetId': dataset_id,
'tableId': table_id,
},
'friendlyName': 'Mahogany Coffee Table',
'type': 'TABLE',
'timePartitioning': {
'type': 'DAY',
'expirationMs': '10000',
},
'labels': {
'some-stuff': 'this-is-a-label',
},
}

table = self._make_one(resource)
self.assertEqual(table.project, project)
self.assertEqual(table.dataset_id, dataset_id)
self.assertEqual(table.table_id, table_id)
self.assertEqual(
table.full_table_id,
'{}:{}:{}'.format(project, dataset_id, table_id))
self.assertEqual(table.reference.project, project)
self.assertEqual(table.reference.dataset_id, dataset_id)
self.assertEqual(table.reference.table_id, table_id)
self.assertEqual(table.friendly_name, 'Mahogany Coffee Table')
self.assertEqual(table.table_type, 'TABLE')
self.assertEqual(table.partitioning_type, 'DAY')
self.assertEqual(table.partition_expiration, 10000)
self.assertEqual(table.labels['some-stuff'], 'this-is-a-label')
self.assertIsNone(table.view_use_legacy_sql)

def test_ctor_view(self):
project = 'test-project'
dataset_id = 'test_dataset'
table_id = 'just_looking'
resource = {
'kind': 'bigquery#table',
'id': '{}:{}:{}'.format(project, dataset_id, table_id),
'tableReference': {
'projectId': project,
'datasetId': dataset_id,
'tableId': table_id,
},
'type': 'VIEW',
}

table = self._make_one(resource)
self.assertEqual(table.project, project)
self.assertEqual(table.dataset_id, dataset_id)
self.assertEqual(table.table_id, table_id)
self.assertEqual(
table.full_table_id,
'{}:{}:{}'.format(project, dataset_id, table_id))
self.assertEqual(table.reference.project, project)
self.assertEqual(table.reference.dataset_id, dataset_id)
self.assertEqual(table.reference.table_id, table_id)
self.assertEqual(table.table_type, 'VIEW')
# Server default for useLegacySql is True.
self.assertTrue(table.view_use_legacy_sql)


class TestRow(unittest.TestCase):

def test_row(self):
Expand Down

0 comments on commit f308bd4

Please sign in to comment.