Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BigQuery: Add support of the project ID with org prefix to the Table.from_string() method #9161

33 changes: 29 additions & 4 deletions bigquery/google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import copy
import datetime
import decimal
import re

from google.cloud._helpers import UTC
from google.cloud._helpers import _date_from_iso8601_date
Expand All @@ -29,6 +30,12 @@
_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f"
_TIMEONLY_WO_MICROS = "%H:%M:%S"
_TIMEONLY_W_MICROS = "%H:%M:%S.%f"
_PROJECT_PREFIX_PATTERN = re.compile(
r"""
(?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)(?:$|\.(?P<custom_id>[^.]+)$)
""",
plamut marked this conversation as resolved.
Show resolved Hide resolved
re.VERBOSE,
)


def _not_null(value, field):
Expand Down Expand Up @@ -586,24 +593,42 @@ def _str_or_none(value):
return str(value)


def _split_id(full_id):
IlyaFaer marked this conversation as resolved.
Show resolved Hide resolved
"""Helper: split full_id into composite parts.

Args:
full_id (str): Fully-qualified ID in standard SQL format.

Returns:
List[str]: ID's parts separated into components.
"""
with_prefix = _PROJECT_PREFIX_PATTERN.match(full_id)
if with_prefix is None:
parts = full_id.split(".")
else:
parts = with_prefix.groups()
parts = [part for part in parts if part]
return parts


def _parse_3_part_id(full_id, default_project=None, property_name="table_id"):
output_project_id = default_project
output_dataset_id = None
output_resource_id = None
parts = full_id.split(".")
parts = _split_id(full_id)

if len(parts) != 2 and len(parts) != 3:
raise ValueError(
"{property_name} must be a fully-qualified ID in "
'standard SQL format. e.g. "project.dataset.{property_name}", '
'standard SQL format, e.g., "project.dataset.{property_name}", '
"got {}".format(full_id, property_name=property_name)
)

if len(parts) == 2 and not default_project:
raise ValueError(
"When default_project is not set, {property_name} must be a "
"fully-qualified ID in standard SQL format. "
'e.g. "project.dataset_id.{property_name}", got {}'.format(
"fully-qualified ID in standard SQL format, "
'e.g., "project.dataset_id.{property_name}", got {}'.format(
full_id, property_name=property_name
)
)
Expand Down
17 changes: 1 addition & 16 deletions bigquery/google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import six
import copy
import re

import google.cloud._helpers
from google.cloud.bigquery import _helpers
Expand All @@ -27,14 +26,6 @@
from google.cloud.bigquery.table import TableReference


_PROJECT_PREFIX_PATTERN = re.compile(
r"""
(?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)$
""",
re.VERBOSE,
)


def _get_table_reference(self, table_id):
"""Constructs a TableReference.

Expand Down Expand Up @@ -299,13 +290,7 @@ def from_string(cls, dataset_id, default_project=None):
"""
output_dataset_id = dataset_id
output_project_id = default_project
with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id)
if with_prefix is None:
parts = dataset_id.split(".")
else:
project_id = with_prefix.group("project_id")
dataset_id = with_prefix.group("dataset_id")
parts = [project_id, dataset_id]
parts = _helpers._split_id(dataset_id)

if len(parts) == 1 and not default_project:
raise ValueError(
Expand Down
12 changes: 12 additions & 0 deletions bigquery/tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,23 @@ def test_from_string(self):
self.assertEqual(got.dataset_id, "string_dataset")
self.assertEqual(got.table_id, "string_table")

def test_from_string_w_prefix(self):
cls = self._get_target_class()
got = cls.from_string("google.com:string-project.string_dataset.string_table")
self.assertEqual(got.project, "google.com:string-project")
self.assertEqual(got.dataset_id, "string_dataset")
self.assertEqual(got.table_id, "string_table")

def test_from_string_legacy_string(self):
cls = self._get_target_class()
with self.assertRaises(ValueError):
cls.from_string("string-project:string_dataset.string_table")

def test_from_string_w_incorrect_prefix(self):
cls = self._get_target_class()
with self.assertRaises(ValueError):
cls.from_string("google.com.string-project.string_dataset.string_table")

def test_from_string_not_fully_qualified(self):
cls = self._get_target_class()
with self.assertRaises(ValueError):
Expand Down