ethyca · galvana · Sep 25, 2024 · Sep 17, 2024 · Sep 18, 2024 · Sep 18, 2024
diff --git a/requirements.txt b/requirements.txt
@@ -16,7 +16,7 @@ types-defusedxml==0.7.0.20240218
 expandvars==0.9.0
 fastapi[all]==0.111.0
 fastapi-pagination[sqlalchemy]==0.12.25
-fideslang==3.0.4
+fideslang @ git+https://github.com/ethyca/fideslang.git@0d8c203295d6d427b9274db5d9b8815065bdf75b
 fideslog==1.2.10
 firebase-admin==5.3.0
 GitPython==3.1.41

diff --git a/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py b/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py
@@ -42,8 +42,8 @@ class BigQuerySchema(ConnectionConfigSecretsSchema):
     )
     dataset: Optional[str] = Field(
         default=None,
-        title="BigQuery Dataset",
-        description="The dataset within your BigQuery project that contains the tables you want to access.",
+        title="Default BigQuery Dataset",
+        description="The default BigQuery dataset that will be used if one isn't provided in the associated Fides datasets.",
     )
 
     _required_components: ClassVar[List[str]] = ["keyfile_creds"]

diff --git a/src/fides/api/schemas/namespace_meta/__init__.py b/src/fides/api/schemas/namespace_meta/__init__.py
diff --git a/src/fides/api/schemas/namespace_meta/bigquery_namespace_meta.py b/src/fides/api/schemas/namespace_meta/bigquery_namespace_meta.py
@@ -0,0 +1,18 @@
+from typing import Optional
+
+from fides.api.schemas.base_class import FidesSchema
+
+
+class BigQueryNamespaceMeta(FidesSchema):
+    """
+    Represents the namespace structure for BigQuery queries.
+
+    Attributes:
+        project_id (Optional[str]): The ID of the Google Cloud project.
+            This is optional as queries within the same project may omit it.
+        dataset_id (str): The ID of the BigQuery dataset. This is required
+            for all BigQuery queries to specify the dataset being queried.
+    """
+
+    project_id: Optional[str] = None
+    dataset_id: str
diff --git a/src/fides/api/service/connectors/query_config.py b/src/fides/api/service/connectors/query_config.py
@@ -21,6 +21,9 @@
 from fides.api.graph.execution import ExecutionNode
 from fides.api.models.policy import Policy, Rule
 from fides.api.models.privacy_request import ManualAction, PrivacyRequest
+from fides.api.schemas.namespace_meta.bigquery_namespace_meta import (
+    BigQueryNamespaceMeta,
+)
 from fides.api.schemas.policy import ActionType
 from fides.api.service.masking.strategy.masking_strategy import MaskingStrategy
 from fides.api.service.masking.strategy.masking_strategy_nullify import (
@@ -810,14 +813,41 @@ class BigQueryQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig):
     Generates SQL valid for BigQuery
     """
 
+    def __init__(
+        self,
+        node: ExecutionNode,
+        namespace_meta: Optional[BigQueryNamespaceMeta] = None,
+    ):
+        """
+        Accepts an optional namespace_meta param to be able to specify dataset and project IDs for the generated queries.
+        """
+        super().__init__(node)
+        self.namespace_meta = namespace_meta
+
+    def _generate_table_name(self) -> str:
+        """
+        Prepends the dataset ID and project ID to the base table name
+        if the BigQuery namespace meta is provided.
+        """
+
+        table_name = self.node.collection.name
+        if self.namespace_meta:
+            table_name = f"{self.namespace_meta.dataset_id}.{table_name}"
+            if project_id := self.namespace_meta.project_id:
+                table_name = f"{project_id}.{table_name}"
+        return table_name
+
     def get_formatted_query_string(
         self,
         field_list: str,
         clauses: List[str],
     ) -> str:
-        """Returns a query string with backtick formatting for tables that have the same names as
-        BigQuery reserved words."""
-        return f'SELECT {field_list} FROM `{self.node.collection.name}` WHERE {" OR ".join(clauses)}'
+        """
+        Returns a query string with backtick formatting for tables that have the same names as
+        BigQuery reserved words.
+        """
+
+        return f'SELECT {field_list} FROM `{self._generate_table_name()}` WHERE {" OR ".join(clauses)}'
 
     def generate_update(
         self, row: Row, policy: Policy, request: PrivacyRequest, client: Engine
@@ -843,9 +873,7 @@ def generate_update(
             )
             return None
 
-        table = Table(
-            self.node.address.collection, MetaData(bind=client), autoload=True
-        )
+        table = Table(self._generate_table_name(), MetaData(bind=client), autoload=True)
         pk_clauses: List[ColumnElement] = [
             getattr(table.c, k) == v for k, v in non_empty_primary_keys.items()
         ]

diff --git a/src/fides/api/service/connectors/sql_connector.py b/src/fides/api/service/connectors/sql_connector.py
@@ -14,7 +14,8 @@
 from google.oauth2 import service_account
 from loguru import logger
 from snowflake.sqlalchemy import URL as Snowflake_URL
-from sqlalchemy import Column, text
+from sqlalchemy import Column, select, text
+from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.engine import (  # type: ignore
     URL,
     Connection,
@@ -24,6 +25,7 @@
     create_engine,
 )
 from sqlalchemy.exc import InternalError, OperationalError
+from sqlalchemy.orm import Session
 from sqlalchemy.sql import Executable  # type: ignore
 from sqlalchemy.sql.elements import TextClause
 
@@ -57,6 +59,9 @@
 from fides.api.schemas.connection_configuration.connection_secrets_mysql import (
     MySQLSchema,
 )
+from fides.api.schemas.namespace_meta.bigquery_namespace_meta import (
+    BigQueryNamespaceMeta,
+)
 from fides.api.service.connectors.base_connector import BaseConnector
 from fides.api.service.connectors.query_config import (
     BigQueryQueryConfig,
@@ -71,6 +76,10 @@
 from fides.api.util.collection_util import Row
 from fides.config import get_config
 
+from fides.api.models.sql_models import (  # type: ignore[attr-defined] # isort: skip
+    Dataset as CtlDataset,
+)
+
 CONFIG = get_config()
 
 sshtunnel.SSH_TIMEOUT = CONFIG.security.bastion_server_ssh_timeout
@@ -115,6 +124,18 @@ def default_cursor_result_to_rows(results: LegacyCursorResult) -> List[Row]:
             rows.append({col[0]: row_tuple[count] for count, col in enumerate(columns)})
         return rows
 
+    @staticmethod
+    def get_namespace_meta(db: Session, dataset: str) -> Optional[Dict[str, Any]]:
+        """
+        Util function to return the namespace meta for a given ctl_dataset.
+        """
+
+        return db.scalar(
+            select(CtlDataset.fides_meta["namespace"].cast(JSONB)).where(
+                CtlDataset.fides_key == dataset
+            )
+        )
+
     @abstractmethod
     def build_uri(self) -> Optional[str]:
         """Build a database specific uri connection string"""
@@ -529,7 +550,14 @@ def create_client(self) -> Engine:
     # Overrides SQLConnector.query_config
     def query_config(self, node: ExecutionNode) -> BigQueryQueryConfig:
         """Query wrapper corresponding to the input execution_node."""
-        return BigQueryQueryConfig(node)
+
+        db: Session = Session.object_session(self.configuration)
+        namespace_meta: Optional[BigQueryNamespaceMeta] = None
+
+        if raw_meta := SQLConnector.get_namespace_meta(db, node.address.dataset):
+            namespace_meta = BigQueryNamespaceMeta(**raw_meta)
+
+        return BigQueryQueryConfig(node, namespace_meta)
 
     # Overrides SQLConnector.test_connection
     def test_connection(self) -> Optional[ConnectionTestStatus]:

diff --git a/tests/ctl/core/test_dataset.py b/tests/ctl/core/test_dataset.py
@@ -384,6 +384,23 @@ def test_field_data_categories(db) -> None:
     assert ctl_dataset.field_data_categories
 
 
+@pytest.mark.unit
+def test_namespace_meta(db) -> None:
+    ctl_dataset = CtlDataset.create_from_dataset_dict(
+        db,
+        {
+            "fides_key": f"dataset_key-f{uuid4()}",
+            "fides_meta": {"namespace": {"dataset_id": "public"}},
+            "collections": [],
+        },
+    )
+    assert ctl_dataset.fides_meta == {
+        "resource_id": None,
+        "after": None,
+        "namespace": {"dataset_id": "public"},
+    }
+
+
 # Generate Dataset Database Integration Tests
 
 # These URLs are for the databases in the docker-compose.integration-tests.yml file

diff --git a/tests/fixtures/bigquery_fixtures.py b/tests/fixtures/bigquery_fixtures.py
@@ -61,6 +61,30 @@ def bigquery_connection_config(db: Session) -> Generator:
     connection_config.delete(db)
 
 
+@pytest.fixture(scope="function")
+def bigquery_connection_config_without_default_dataset(db: Session) -> Generator:
+    connection_config = ConnectionConfig.create(
+        db=db,
+        data={
+            "name": str(uuid4()),
+            "key": "my_bigquery_config",
+            "connection_type": ConnectionType.bigquery,
+            "access": AccessLevel.write,
+        },
+    )
+    # Pulling from integration config file or GitHub secrets
+    keyfile_creds = integration_config.get("bigquery", {}).get(
+        "keyfile_creds"
+    ) or ast.literal_eval(os.environ.get("BIGQUERY_KEYFILE_CREDS"))
+    if keyfile_creds:
+        schema = BigQuerySchema(keyfile_creds=keyfile_creds)
+        connection_config.secrets = schema.model_dump(mode="json")
+        connection_config.save(db=db)
+
+    yield connection_config
+    connection_config.delete(db)
+
+
 @pytest.fixture
 def bigquery_example_test_dataset_config(
     bigquery_connection_config: ConnectionConfig,
@@ -88,6 +112,39 @@ def bigquery_example_test_dataset_config(
     ctl_dataset.delete(db=db)
 
 
+@pytest.fixture
+def bigquery_example_test_dataset_config_with_namespace_meta(
+    bigquery_connection_config_without_default_dataset: ConnectionConfig,
+    db: Session,
+    example_datasets: List[Dict],
+) -> Generator:
+    bigquery_dataset = example_datasets[7]
+    bigquery_dataset["fides_meta"] = {
+        "namespace": {
+            "project_id": "silken-precinct-284918",
+            "dataset_id": "fidesopstest",
+        }
+    }
+    fides_key = bigquery_dataset["fides_key"]
+    bigquery_connection_config_without_default_dataset.name = fides_key
+    bigquery_connection_config_without_default_dataset.key = fides_key
+    bigquery_connection_config_without_default_dataset.save(db=db)
+
+    ctl_dataset = CtlDataset.create_from_dataset_dict(db, bigquery_dataset)
+
+    dataset = DatasetConfig.create(
+        db=db,
+        data={
+            "connection_config_id": bigquery_connection_config_without_default_dataset.id,
+            "fides_key": fides_key,
+            "ctl_dataset_id": ctl_dataset.id,
+        },
+    )
+    yield dataset
+    dataset.delete(db=db)
+    ctl_dataset.delete(db=db)
+
+
 @pytest.fixture(scope="function")
 def bigquery_resources(
     bigquery_example_test_dataset_config,
@@ -140,6 +197,61 @@ def bigquery_resources(
         connection.execute(stmt)
 
 
+@pytest.fixture(scope="function")
+def bigquery_resources_with_namespace_meta(
+    bigquery_example_test_dataset_config_with_namespace_meta,
+):
+    bigquery_connection_config = (
+        bigquery_example_test_dataset_config_with_namespace_meta.connection_config
+    )
+    connector = BigQueryConnector(bigquery_connection_config)
+    bigquery_client = connector.client()
+    with bigquery_client.connect() as connection:
+        uuid = str(uuid4())
+        customer_email = f"customer-{uuid}@example.com"
+        customer_name = f"{uuid}"
+
+        stmt = "select max(id) from fidesopstest.customer;"
+        res = connection.execute(stmt)
+        customer_id = res.all()[0][0] + 1
+
+        stmt = "select max(id) from fidesopstest.address;"
+        res = connection.execute(stmt)
+        address_id = res.all()[0][0] + 1
+
+        city = "Test City"
+        state = "TX"
+        stmt = f"""
+        insert into fidesopstest.address (id, house, street, city, state, zip)
+        values ({address_id}, '{111}', 'Test Street', '{city}', '{state}', '55555');
+        """
+        connection.execute(stmt)
+
+        stmt = f"""
+            insert into fidesopstest.customer (id, email, name, address_id)
+            values ({customer_id}, '{customer_email}', '{customer_name}', {address_id});
+        """
+        connection.execute(stmt)
+
+        yield {
+            "email": customer_email,
+            "name": customer_name,
+            "id": customer_id,
+            "client": bigquery_client,
+            "address_id": address_id,
+            "city": city,
+            "state": state,
+            "connector": connector,
+            "dataset": bigquery_example_test_dataset_config_with_namespace_meta.fides_key,
+        }
+        # Remove test data and close BigQuery connection in teardown
+        stmt = f"delete from fidesopstest.customer where email = '{customer_email}';"
+        connection.execute(stmt)
+
+        stmt = f"delete from fidesopstest.address where id = {address_id};"
+        connection.execute(stmt)
+
+
 @pytest.fixture(scope="session")
 def bigquery_test_engine() -> Generator:
     """Return a connection to a Google BigQuery Warehouse"""