diff --git a/bigtable/google/cloud/bigtable/row_data.py b/bigtable/google/cloud/bigtable/row_data.py index 9bde1c0cb5a3..b86f3331ae3a 100644 --- a/bigtable/google/cloud/bigtable/row_data.py +++ b/bigtable/google/cloud/bigtable/row_data.py @@ -22,6 +22,16 @@ from google.cloud._helpers import _to_bytes +_MISSING_COLUMN_FAMILY = ( + 'Column family {} is not among the cells stored in this row.') +_MISSING_COLUMN = ( + 'Column {} is not among the cells stored in this row in the ' + 'column family {}.') +_MISSING_INDEX = ( + 'Index {!r} is not valid for the cells stored in this row for column {} ' + 'in the column family {}. There are {} such cells.') + + class Cell(object): """Representation of a Google Cloud Bigtable Cell. @@ -171,6 +181,105 @@ def row_key(self): """ return self._row_key + def _get_cells_no_copy(self, column_family_id, column): + """Get a time series of cells stored on this instance. + + Args: + column_family_id (str): The ID of the column family. Must be of the + form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. + column (bytes): The column within the column family where the cells + are located. + + Returns: + List[~google.cloud.bigtable.row_data.Cell]: The cells stored in the + specified column. + + Raises: + KeyError: If ``column_family_id`` is not among the cells stored + in this row. + KeyError: If ``column`` is not among the cells stored in this row + for the given ``column_family_id``. + """ + try: + column_family = self._cells[column_family_id] + except KeyError: + raise KeyError(_MISSING_COLUMN_FAMILY.format(column_family_id)) + + try: + cells = column_family[column] + except KeyError: + raise KeyError(_MISSING_COLUMN.format(column, column_family_id)) + + return cells + + def get_cell(self, column_family_id, column, index=0): + """Get a single cell stored on this instance. + + .. note:: + + This returns a copy of the actual cell (so that the + caller cannot mutate internal state). + + Args: + column_family_id (str): The ID of the column family. Must be of the + form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. + column (bytes): The column within the column family where the cell + is located. + index (Optional[int]): The offset within the series of values. If + not specified, will return the first cell. + + Returns: + ~google.cloud.bigtable.row_data.Cell: The cell stored in the + specified column. + + Raises: + KeyError: If ``column_family_id`` is not among the cells stored + in this row. + KeyError: If ``column`` is not among the cells stored in this row + for the given ``column_family_id``. + IndexError: If ``index`` cannot be found within the cells stored + in this row for the given ``column_family_id``, ``column`` + pair. + """ + cells = self._get_cells_no_copy(column_family_id, column) + + try: + cell = cells[index] + except (TypeError, IndexError): + num_cells = len(cells) + msg = _MISSING_INDEX.format( + index, column, column_family_id, num_cells) + raise IndexError(msg) + + return copy.deepcopy(cell) + + def get_cells(self, column_family_id, column): + """Get a time series of cells stored on this instance. + + .. note:: + + This returns a copy of the actual cells (so that the + caller cannot mutate internal state). + + Args: + column_family_id (str): The ID of the column family. Must be of the + form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. + column (bytes): The column within the column family where the cells + are located. + + Returns: + List[~google.cloud.bigtable.row_data.Cell]: The cells stored in the + specified column. + + Raises: + KeyError: If ``column_family_id`` is not among the cells stored + in this row. + KeyError: If ``column`` is not among the cells stored in this row + for the given ``column_family_id``. + """ + cells = self._get_cells_no_copy(column_family_id, column) + return copy.deepcopy(cells) + class InvalidReadRowsResponse(RuntimeError): """Exception raised to to invalid response data from back-end.""" diff --git a/bigtable/tests/unit/test_row_data.py b/bigtable/tests/unit/test_row_data.py index 7cfb1dc45d4e..c50988c855df 100644 --- a/bigtable/tests/unit/test_row_data.py +++ b/bigtable/tests/unit/test_row_data.py @@ -13,6 +13,10 @@ # limitations under the License. +import datetime +import json +import operator +import os import unittest import mock @@ -30,7 +34,6 @@ def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) def _from_pb_test_helper(self, labels=None): - import datetime from google.cloud._helpers import _EPOCH from google.cloud.bigtable._generated import ( data_pb2 as data_v2_pb2) @@ -174,6 +177,138 @@ def test_to_dict(self): } self.assertEqual(result, expected_result) + def test_get_cell_defaults(self): + family_name = u'name1' + qual = b'col1' + cell = _make_cell(b'') + + partial_row_data = self._make_one(None) + partial_row_data._cells = { + family_name: { + qual: [cell], + }, + } + + result = partial_row_data.get_cell(family_name, qual) + # Make sure we get a copy, not the original. + self.assertIsNot(result, cell) + self.assertEqual(result, cell) + + def test_get_cell_explicit_index(self): + family_name = u'name1' + qual = b'col1' + cell1 = _make_cell(b'1') + cell2 = _make_cell(b'2') + + partial_row_data = self._make_one(None) + partial_row_data._cells = { + family_name: { + qual: [cell1, cell2], + }, + } + + result = partial_row_data.get_cell(family_name, qual, index=1) + # Make sure we get a copy, not the original. + self.assertIsNot(result, cell2) + self.assertEqual(result, cell2) + + def test_get_cell_bad_family(self): + from google.cloud.bigtable import row_data + + family_name = u'name1' + partial_row_data = self._make_one(None) + self.assertEqual(partial_row_data._cells, {}) + + with self.assertRaises(KeyError) as exc_info: + partial_row_data.get_cell(family_name, None) + + expected_arg = row_data._MISSING_COLUMN_FAMILY.format(family_name) + self.assertEqual(exc_info.exception.args, (expected_arg,)) + + def test_get_cell_bad_column(self): + from google.cloud.bigtable import row_data + + family_name = u'name1' + qual = b'col1' + + partial_row_data = self._make_one(None) + partial_row_data._cells = {family_name: {}} + + with self.assertRaises(KeyError) as exc_info: + partial_row_data.get_cell(family_name, qual) + + expected_arg = row_data._MISSING_COLUMN.format(qual, family_name) + self.assertEqual(exc_info.exception.args, (expected_arg,)) + + def test_get_cell_bad_index(self): + from google.cloud.bigtable import row_data + + family_name = u'name1' + qual = b'col1' + + partial_row_data = self._make_one(None) + partial_row_data._cells = { + family_name: { + qual: [], + }, + } + + for index in (5, 'not-int'): + with self.assertRaises(IndexError) as exc_info: + partial_row_data.get_cell(family_name, qual, index=index) + + expected_arg = row_data._MISSING_INDEX.format( + index, qual, family_name, 0) + self.assertEqual(exc_info.exception.args, (expected_arg,)) + + def test_get_cells(self): + family_name = u'name1' + qual = b'col1' + cell = _make_cell(b'hi-mom') + + partial_row_data = self._make_one(None) + cells = [cell] + partial_row_data._cells = { + family_name: { + qual: cells, + }, + } + + result = partial_row_data.get_cells(family_name, qual) + # Make sure we get a copy, not the original. + self.assertIsNot(result, cells) + self.assertEqual(result, cells) + self.assertIsNot(result[0], cell) + self.assertEqual(result[0], cell) + + def test_get_cells_bad_family(self): + from google.cloud.bigtable import row_data + + family_name = u'name1' + partial_row_data = self._make_one(None) + self.assertEqual(partial_row_data._cells, {}) + + with self.assertRaises(KeyError) as exc_info: + partial_row_data.get_cells(family_name, None) + + expected_arg = row_data._MISSING_COLUMN_FAMILY.format(family_name) + self.assertEqual(exc_info.exception.args, (expected_arg,)) + + def test_get_cells_bad_column(self): + from google.cloud.bigtable import row_data + + family_name = u'name1' + qual = b'col1' + + partial_row_data = self._make_one(None) + partial_row_data._cells = {family_name: {}} + + with self.assertRaises(KeyError) as exc_info: + partial_row_data.get_cells(family_name, qual) + + expected_arg = row_data._MISSING_COLUMN.format(qual, family_name) + self.assertEqual(exc_info.exception.args, (expected_arg,)) + def test_cells_property(self): partial_row_data = self._make_one(None) cells = {1: 2} @@ -433,8 +568,6 @@ def _make_one(self, *args, **kwargs): return self._get_target_class()(*args, **kwargs) def _load_json_test(self, test_name): - import os - if self.__class__._json_tests is None: dirname = os.path.dirname(__file__) filename = os.path.join(dirname, 'read-rows-acceptance-test.json') @@ -500,8 +633,6 @@ def test_invalid_commit_with_chunk(self): # JSON Error cases: incomplete final row def _sort_flattend_cells(self, flattened): - import operator - key_func = operator.itemgetter('rk', 'fm', 'qual') return sorted(flattened, key=key_func) @@ -717,8 +848,6 @@ def _parse_readrows_acceptance_tests(filename): test/resources/com/google/cloud/bigtable/grpc/scanner/v2/ read-rows-acceptance-test.json """ - import json - with open(filename) as json_file: test_json = json.load(json_file) @@ -727,3 +856,9 @@ def _parse_readrows_acceptance_tests(filename): chunks = _generate_cell_chunks(test['chunks']) results = test['results'] yield name, chunks, results + + +def _make_cell(value): + from google.cloud.bigtable import row_data + + return row_data.Cell(value, datetime.datetime.utcnow())