From f4e1d234d892a28919ec3e0a677860ac777cc94d Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Tue, 9 Jul 2024 14:11:14 -0700 Subject: [PATCH 01/10] Migrate remaining data categories --- .../alembic/migrations/helpers/__init__.py | 0 .../helpers/fideslang_migration_functions.py | 348 +++++++++++++++++ ...ea164cee8bc_fideslang_2_data_migrations.py | 360 +----------------- ...cc7dc_migrate_remaining_data_categories.py | 91 +++++ 4 files changed, 452 insertions(+), 347 deletions(-) create mode 100644 src/fides/api/alembic/migrations/helpers/__init__.py create mode 100644 src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py create mode 100644 src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py diff --git a/src/fides/api/alembic/migrations/helpers/__init__.py b/src/fides/api/alembic/migrations/helpers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py b/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py new file mode 100644 index 0000000000..abc34aa7d9 --- /dev/null +++ b/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py @@ -0,0 +1,348 @@ +import json +from typing import Dict, List, Optional + +from sqlalchemy import text +from sqlalchemy.engine import Connection, ResultProxy +from sqlalchemy.sql.elements import TextClause + + +def _replace_matching_data_label( + data_label: str, data_label_map: Dict[str, str] +) -> str: + """ + Helper function to do string replacement for updated fides_keys. + """ + for old, new in data_label_map.items(): + if data_label and data_label.startswith(old): + return data_label.replace(old, new) + + return data_label + + +def update_privacy_declarations( + bind: Connection, data_use_map: Dict[str, str], data_category_map: Dict[str, str] +) -> None: + """ + Upgrade or downgrade Privacy Declarations for Fideslang 2.0 + + This updates: + - data uses + - data categories + - shared categories + """ + existing_privacy_declarations: ResultProxy = bind.execute( + text( + "SELECT id, data_use, data_categories, shared_categories FROM privacydeclaration;" + ) + ) + for row in existing_privacy_declarations: + data_use: str = _replace_matching_data_label(row["data_use"], data_use_map) + data_categories: List[str] = [ + _replace_matching_data_label(data_category, data_category_map) + for data_category in row["data_categories"] + ] + shared_categories: List[str] = [ + _replace_matching_data_label(data_category, data_category_map) + for data_category in row["shared_categories"] + ] + + update_query: TextClause = text( + "UPDATE privacydeclaration SET data_use = :updated_use, data_categories = :updated_categories, shared_categories = :updated_shared WHERE id= :declaration_id" + ) + bind.execute( + update_query, + { + "declaration_id": row["id"], + "updated_use": data_use, + "updated_categories": data_categories, + "updated_shared": shared_categories, + }, + ) + + +def update_ctl_policies( + bind: Connection, data_use_map: Dict[str, str], data_category_map: Dict[str, str] +) -> None: + """ + Upgrade or downgrade Policy Rules for Fideslang 2.0 + + This updates: + - data uses + - data categories + """ + existing_ctl_policies: ResultProxy = bind.execute( + text("SELECT id, rules FROM ctl_policies;") + ) + + for row in existing_ctl_policies: + rules: List[Dict] = row["rules"] + + for i, rule in enumerate(rules or []): + data_uses: List = rule.get("data_uses", {}).get("values", []) + rules[i]["data_uses"]["values"] = [ + _replace_matching_data_label(use, data_use_map) for use in data_uses + ] + + data_categories: List = rule.get("data_categories", {}).get("values", []) + rules[i]["data_categories"]["values"] = [ + _replace_matching_data_label(category, data_category_map) + for category in data_categories + ] + + update_data_use_query: TextClause = text( + "UPDATE ctl_policies SET rules = :updated_rules WHERE id= :policy_id" + ) + bind.execute( + update_data_use_query, + {"policy_id": row["id"], "updated_rules": json.dumps(rules)}, + ) + + +def update_data_label_tables( + bind: Connection, update_map: Dict[str, str], table_name: str +) -> None: + """ + Upgrade or downgrade Data Labels for Fideslang 2.0 + """ + existing_labels: ResultProxy = bind.execute( + text(f"SELECT fides_key, parent_key FROM {table_name};") + ) + for row in existing_labels: + old_key = row["fides_key"] + new_key = _replace_matching_data_label(old_key, update_map) + + old_parent = row["parent_key"] + new_parent = _replace_matching_data_label(old_parent, update_map) + + update_query: TextClause = text( + f"UPDATE {table_name} SET fides_key = :updated_key, parent_key = :updated_parent WHERE fides_key = :old_key" + ) + bind.execute( + update_query, + { + "updated_key": new_key, + "old_key": old_key, + "updated_parent": new_parent, + }, + ) + + +def update_rule_targets(bind: Connection, data_label_map: Dict[str, str]) -> None: + """Upgrade ruletargets to use the new data categories.""" + + existing_rule_targets: ResultProxy = bind.execute( + text("SELECT id, data_category FROM ruletarget;") + ) + + for row in existing_rule_targets: + data_category = row["data_category"] + + if not data_category: + continue + + updated_category: str = _replace_matching_data_label( + data_category, data_label_map + ) + + update_data_category_query: TextClause = text( + "UPDATE ruletarget SET data_category = :updated_category WHERE id= :target_id" + ) + bind.execute( + update_data_category_query, + {"target_id": row["id"], "updated_category": updated_category}, + ) + + +def update_datasets_data_categories( + bind: Connection, data_label_map: Dict[str, str] +) -> None: + """Upgrade the datasets and their collections/fields in the database to use the new data categories.""" + + # Get all datasets out of the database + existing_datasets: ResultProxy = bind.execute( + text("SELECT id, data_categories, collections FROM ctl_datasets;") + ) + + for row in existing_datasets: + # Update data categories at the top level + dataset_data_categories: Optional[List[str]] = row["data_categories"] + + if dataset_data_categories: + updated_categories: List[str] = [ + _replace_matching_data_label(category, data_label_map) + for category in dataset_data_categories + ] + + update_label_query: TextClause = text( + "UPDATE ctl_datasets SET data_categories = :updated_labels WHERE id= :dataset_id" + ) + bind.execute( + update_label_query, + {"dataset_id": row["id"], "updated_labels": updated_categories}, + ) + + # Update the collections objects + collections: str = json.dumps(row["collections"]) + if collections: + for key, value in data_label_map.items(): + collections = collections.replace(key, value) + + update_collections_query: TextClause = text( + "UPDATE ctl_datasets SET collections = :updated_collections WHERE id= :dataset_id" + ) + bind.execute( + update_collections_query, + {"dataset_id": row["id"], "updated_collections": collections}, + ) + + +def update_system_ingress_egress_data_categories( + bind: Connection, data_label_map: Dict[str, str] +) -> None: + """Upgrade or downgrade data categories on system DataFlow objects (egress/ingress)""" + existing_systems: ResultProxy = bind.execute( + text("SELECT id, egress, ingress FROM ctl_systems;") + ) + + for row in existing_systems: + ingress = row["ingress"] + egress = row["egress"] + + # Do a blunt find/replace + if ingress: + for item in ingress: + if item["data_categories"]: + item["data_categories"] = [ + _replace_matching_data_label(category, data_label_map) + for category in item["data_categories"] + ] + + update_ingress_query: TextClause = text( + "UPDATE ctl_systems SET ingress = :updated_ingress WHERE id= :system_id" + ) + bind.execute( + update_ingress_query, + {"system_id": row["id"], "updated_ingress": json.dumps(ingress)}, + ) + + if egress: + for item in egress: + if item["data_categories"]: + item["data_categories"] = [ + _replace_matching_data_label(category, data_label_map) + for category in item["data_categories"] + ] + + update_egress_query: TextClause = text( + "UPDATE ctl_systems SET egress = :updated_egress WHERE id= :system_id" + ) + bind.execute( + update_egress_query, + {"system_id": row["id"], "updated_egress": json.dumps(egress)}, + ) + + +def update_privacy_notices(bind: Connection, data_use_map: Dict[str, str]) -> None: + """ + Update the Privacy Notice Models. + + This includes the following models: + - PrivacyNotice + - PrivacyNoticeHistory + - PrivacyNoticeTemplate + """ + privacy_notice_tables = [ + "privacynotice", + "privacynoticetemplate", + "privacynoticehistory", + ] + for table in privacy_notice_tables: + existing_notices: ResultProxy = bind.execute( + text(f"SELECT id, data_uses FROM {table};") + ) + + for row in existing_notices: + data_uses = row["data_uses"] + + # Do a blunt find/replace + updated_data_uses = [ + _replace_matching_data_label(use, data_use_map) for use in data_uses + ] + + update_query: TextClause = text( + f"UPDATE {table} SET data_uses= :updated_uses WHERE id= :notice_id" + ) + bind.execute( + update_query, + {"notice_id": row["id"], "updated_uses": updated_data_uses}, + ) + + +def update_consent(bind: Connection, data_use_map: Dict[str, str]) -> None: + """ + Update Consent objects in the database. + """ + + # Update the Consent table + existing_consents: ResultProxy = bind.execute( + text("SELECT provided_identity_id, data_use FROM consent;") + ) + + for row in existing_consents: + updated_use: str = _replace_matching_data_label(row["data_use"], data_use_map) + + update_label_query: TextClause = text( + "UPDATE consent SET data_use= :updated_label WHERE provided_identity_id= :key AND data_use = :old_use" + ) + bind.execute( + update_label_query, + { + "key": row["provided_identity_id"], + "old_use": row["data_use"], + "updated_label": updated_use, + }, + ) + + # Update the Privacy Request Table + existing_privacy_requests: ResultProxy = bind.execute( + text("select id, consent_preferences from privacyrequest;") + ) + + for row in existing_privacy_requests: + preferences: List[Dict] = row["consent_preferences"] + + if preferences: + for index, preference in enumerate(preferences): + preferences[index]["data_use"] = _replace_matching_data_label( + data_label=preference["data_use"], data_label_map=data_use_map + ) + + update_pr_query: TextClause = text( + "UPDATE privacyrequest SET consent_preferences= :updated_preferences WHERE id= :id" + ) + bind.execute( + update_pr_query, + {"id": row["id"], "updated_preferences": json.dumps(preferences)}, + ) + + # Update the Consent Request Table + existing_consent_requests: ResultProxy = bind.execute( + text("select id, preferences from consentrequest;") + ) + + for row in existing_consent_requests: + preferences: List[Dict] = row["preferences"] + + if preferences: + for index, preference in enumerate(preferences): + preferences[index]["data_use"] = _replace_matching_data_label( + data_label=preference["data_use"], data_label_map=data_use_map + ) + + update_cr_query: TextClause = text( + "UPDATE consentrequest SET preferences= :updated_preferences WHERE id= :id" + ) + bind.execute( + update_cr_query, + {"id": row["id"], "updated_preferences": json.dumps(preferences)}, + ) diff --git a/src/fides/api/alembic/migrations/versions/1ea164cee8bc_fideslang_2_data_migrations.py b/src/fides/api/alembic/migrations/versions/1ea164cee8bc_fideslang_2_data_migrations.py index 22f789fd83..294b6a5a63 100644 --- a/src/fides/api/alembic/migrations/versions/1ea164cee8bc_fideslang_2_data_migrations.py +++ b/src/fides/api/alembic/migrations/versions/1ea164cee8bc_fideslang_2_data_migrations.py @@ -6,14 +6,23 @@ """ -import json -from typing import Dict, List, Optional +from typing import Dict from alembic import op from loguru import logger from sqlalchemy import text -from sqlalchemy.engine import Connection, ResultProxy -from sqlalchemy.sql.elements import TextClause +from sqlalchemy.engine import Connection + +from fides.api.alembic.migrations.helpers.fideslang_migration_functions import ( + update_consent, + update_ctl_policies, + update_data_label_tables, + update_datasets_data_categories, + update_privacy_declarations, + update_privacy_notices, + update_rule_targets, + update_system_ingress_egress_data_categories, +) # revision identifiers, used by Alembic. revision = "1ea164cee8bc" @@ -65,349 +74,6 @@ value: key for key, value in data_category_upgrades.items() } - -def _replace_matching_data_label( - data_label: str, data_label_map: Dict[str, str] -) -> str: - """ - Helper function to do string replacement for updated fides_keys. - """ - for old, new in data_label_map.items(): - if data_label and data_label.startswith(old): - return data_label.replace(old, new) - - return data_label - - -def update_privacy_declarations( - bind: Connection, data_use_map: Dict[str, str], data_category_map: Dict[str, str] -) -> None: - """ - Upgrade or downgrade Privacy Declarations for Fideslang 2.0 - - This updates: - - data uses - - data categories - - shared categories - """ - existing_privacy_declarations: ResultProxy = bind.execute( - text( - "SELECT id, data_use, data_categories, shared_categories FROM privacydeclaration;" - ) - ) - for row in existing_privacy_declarations: - data_use: str = _replace_matching_data_label(row["data_use"], data_use_map) - data_categories: List[str] = [ - _replace_matching_data_label(data_category, data_category_map) - for data_category in row["data_categories"] - ] - shared_categories: List[str] = [ - _replace_matching_data_label(data_category, data_category_map) - for data_category in row["shared_categories"] - ] - - update_query: TextClause = text( - "UPDATE privacydeclaration SET data_use = :updated_use, data_categories = :updated_categories, shared_categories = :updated_shared WHERE id= :declaration_id" - ) - bind.execute( - update_query, - { - "declaration_id": row["id"], - "updated_use": data_use, - "updated_categories": data_categories, - "updated_shared": shared_categories, - }, - ) - - -def update_ctl_policies( - bind: Connection, data_use_map: Dict[str, str], data_category_map: Dict[str, str] -) -> None: - """ - Upgrade or downgrade Policy Rules for Fideslang 2.0 - - This updates: - - data uses - - data categories - """ - existing_ctl_policies: ResultProxy = bind.execute( - text("SELECT id, rules FROM ctl_policies;") - ) - - for row in existing_ctl_policies: - rules: List[Dict] = row["rules"] - - for i, rule in enumerate(rules or []): - data_uses: List = rule.get("data_uses", {}).get("values", []) - rules[i]["data_uses"]["values"] = [ - _replace_matching_data_label(use, data_use_map) for use in data_uses - ] - - data_categories: List = rule.get("data_categories", {}).get("values", []) - rules[i]["data_categories"]["values"] = [ - _replace_matching_data_label(category, data_category_map) - for category in data_categories - ] - - update_data_use_query: TextClause = text( - "UPDATE ctl_policies SET rules = :updated_rules WHERE id= :policy_id" - ) - bind.execute( - update_data_use_query, - {"policy_id": row["id"], "updated_rules": json.dumps(rules)}, - ) - - -def update_data_label_tables( - bind: Connection, update_map: Dict[str, str], table_name: str -) -> None: - """ - Upgrade or downgrade Data Labels for Fideslang 2.0 - """ - existing_labels: ResultProxy = bind.execute( - text(f"SELECT fides_key, parent_key FROM {table_name};") - ) - for row in existing_labels: - old_key = row["fides_key"] - new_key = _replace_matching_data_label(old_key, update_map) - - old_parent = row["parent_key"] - new_parent = _replace_matching_data_label(old_parent, update_map) - - update_query: TextClause = text( - f"UPDATE {table_name} SET fides_key = :updated_key, parent_key = :updated_parent WHERE fides_key = :old_key" - ) - bind.execute( - update_query, - { - "updated_key": new_key, - "old_key": old_key, - "updated_parent": new_parent, - }, - ) - - -def update_rule_targets(bind: Connection, data_label_map: Dict[str, str]) -> None: - """Upgrade ruletargets to use the new data categories.""" - - existing_rule_targets: ResultProxy = bind.execute( - text("SELECT id, data_category FROM ruletarget;") - ) - - for row in existing_rule_targets: - data_category = row["data_category"] - - if not data_category: - continue - - updated_category: str = _replace_matching_data_label( - data_category, data_label_map - ) - - update_data_category_query: TextClause = text( - "UPDATE ruletarget SET data_category = :updated_category WHERE id= :target_id" - ) - bind.execute( - update_data_category_query, - {"target_id": row["id"], "updated_category": updated_category}, - ) - - -def update_datasets_data_categories( - bind: Connection, data_label_map: Dict[str, str] -) -> None: - """Upgrade the datasets and their collections/fields in the database to use the new data categories.""" - - # Get all datasets out of the database - existing_datasets: ResultProxy = bind.execute( - text("SELECT id, data_categories, collections FROM ctl_datasets;") - ) - - for row in existing_datasets: - # Update data categories at the top level - dataset_data_categories: Optional[List[str]] = row["data_categories"] - - if dataset_data_categories: - updated_categories: List[str] = [ - _replace_matching_data_label(category, data_label_map) - for category in dataset_data_categories - ] - - update_label_query: TextClause = text( - "UPDATE ctl_datasets SET data_categories = :updated_labels WHERE id= :dataset_id" - ) - bind.execute( - update_label_query, - {"dataset_id": row["id"], "updated_labels": updated_categories}, - ) - - # Update the collections objects - collections: str = json.dumps(row["collections"]) - if collections: - for key, value in data_label_map.items(): - collections = collections.replace(key, value) - - update_collections_query: TextClause = text( - "UPDATE ctl_datasets SET collections = :updated_collections WHERE id= :dataset_id" - ) - bind.execute( - update_collections_query, - {"dataset_id": row["id"], "updated_collections": collections}, - ) - - -def update_system_ingress_egress_data_categories( - bind: Connection, data_label_map: Dict[str, str] -) -> None: - """Upgrade or downgrade data categories on system DataFlow objects (egress/ingress)""" - existing_systems: ResultProxy = bind.execute( - text("SELECT id, egress, ingress FROM ctl_systems;") - ) - - for row in existing_systems: - ingress = row["ingress"] - egress = row["egress"] - - # Do a blunt find/replace - if ingress: - for item in ingress: - if item["data_categories"]: - item["data_categories"] = [ - _replace_matching_data_label(category, data_label_map) - for category in item["data_categories"] - ] - - update_ingress_query: TextClause = text( - "UPDATE ctl_systems SET ingress = :updated_ingress WHERE id= :system_id" - ) - bind.execute( - update_ingress_query, - {"system_id": row["id"], "updated_ingress": json.dumps(ingress)}, - ) - - if egress: - for item in egress: - if item["data_categories"]: - item["data_categories"] = [ - _replace_matching_data_label(category, data_label_map) - for category in item["data_categories"] - ] - - update_egress_query: TextClause = text( - "UPDATE ctl_systems SET egress = :updated_egress WHERE id= :system_id" - ) - bind.execute( - update_egress_query, - {"system_id": row["id"], "updated_egress": json.dumps(egress)}, - ) - - -def update_privacy_notices(bind: Connection, data_use_map: Dict[str, str]) -> None: - """ - Update the Privacy Notice Models. - - This includes the following models: - - PrivacyNotice - - PrivacyNoticeHistory - - PrivacyNoticeTemplate - """ - privacy_notice_tables = [ - "privacynotice", - "privacynoticetemplate", - "privacynoticehistory", - ] - for table in privacy_notice_tables: - existing_notices: ResultProxy = bind.execute( - text(f"SELECT id, data_uses FROM {table};") - ) - - for row in existing_notices: - data_uses = row["data_uses"] - - # Do a blunt find/replace - updated_data_uses = [ - _replace_matching_data_label(use, data_use_map) for use in data_uses - ] - - update_query: TextClause = text( - f"UPDATE {table} SET data_uses= :updated_uses WHERE id= :notice_id" - ) - bind.execute( - update_query, - {"notice_id": row["id"], "updated_uses": updated_data_uses}, - ) - - -def update_consent(bind: Connection, data_use_map: Dict[str, str]) -> None: - """ - Update Consent objects in the database. - """ - - # Update the Consent table - existing_consents: ResultProxy = bind.execute( - text("SELECT provided_identity_id, data_use FROM consent;") - ) - - for row in existing_consents: - updated_use: str = _replace_matching_data_label(row["data_use"], data_use_map) - - update_label_query: TextClause = text( - "UPDATE consent SET data_use= :updated_label WHERE provided_identity_id= :key AND data_use = :old_use" - ) - bind.execute( - update_label_query, - { - "key": row["provided_identity_id"], - "old_use": row["data_use"], - "updated_label": updated_use, - }, - ) - - # Update the Privacy Request Table - existing_privacy_requests: ResultProxy = bind.execute( - text("select id, consent_preferences from privacyrequest;") - ) - - for row in existing_privacy_requests: - preferences: List[Dict] = row["consent_preferences"] - - if preferences: - for index, preference in enumerate(preferences): - preferences[index]["data_use"] = _replace_matching_data_label( - data_label=preference["data_use"], data_label_map=data_use_map - ) - - update_pr_query: TextClause = text( - "UPDATE privacyrequest SET consent_preferences= :updated_preferences WHERE id= :id" - ) - bind.execute( - update_pr_query, - {"id": row["id"], "updated_preferences": json.dumps(preferences)}, - ) - - # Update the Consent Request Table - existing_consent_requests: ResultProxy = bind.execute( - text("select id, preferences from consentrequest;") - ) - - for row in existing_consent_requests: - preferences: List[Dict] = row["preferences"] - - if preferences: - for index, preference in enumerate(preferences): - preferences[index]["data_use"] = _replace_matching_data_label( - data_label=preference["data_use"], data_label_map=data_use_map - ) - - update_cr_query: TextClause = text( - "UPDATE consentrequest SET preferences= :updated_preferences WHERE id= :id" - ) - bind.execute( - update_cr_query, - {"id": row["id"], "updated_preferences": json.dumps(preferences)}, - ) - - def upgrade() -> None: """ Given that our advice is to turn off auto-migrations and make a db copy, diff --git a/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py b/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py new file mode 100644 index 0000000000..af31adebea --- /dev/null +++ b/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py @@ -0,0 +1,91 @@ +"""migrate remaining data categories + +Revision ID: a6d9cdfcc7dc +Revises: 31493e48c1d8 +Create Date: 2024-07-09 17:51:17.542889 + +""" + +from typing import Dict + +from alembic import op +from loguru import logger +from sqlalchemy import text +from sqlalchemy.engine import Connection + +from fides.api.alembic.migrations.helpers.fideslang_migration_functions import ( + update_ctl_policies, + update_data_label_tables, + update_datasets_data_categories, + update_privacy_declarations, + update_rule_targets, + update_system_ingress_egress_data_categories, +) +from fides.api.db.seed import load_default_dsr_policies + +# revision identifiers, used by Alembic. +revision = "a6d9cdfcc7dc" +down_revision = "31493e48c1d8" +branch_labels = None +depends_on = None + +############### +## Data Uses ## +############### +""" +The `key` is the old value, the `value` is the new value +These are ordered specifically so that string replacement works on both parent and child items +""" +data_use_upgrades: Dict[str, str] = {} + +##################### +## Data Categories ## +##################### +""" +The `key` is the old value, the `value` is the new value +These are ordered specifically so that string replacement works on both parent and child items +""" +data_category_upgrades: Dict[str, str] = { + "user.biometric_health": "user.biometric.health", + "user.credentials.biometric_credentials": "user.authorization.biometric", + "user.credentials.password": "user.authorization.password", +} + + +def upgrade() -> None: + """ + Given that our advice is to turn off auto-migrations and make a db copy, + there is no "downgrade" version of this. It also wouldn't be feasible given + it would require an older version of fideslang. + """ + bind: Connection = op.get_bind() + + logger.info("Removing old default data categories") + bind.execute(text("DELETE FROM ctl_data_categories WHERE is_default = TRUE;")) + + logger.info("Upgrading additional Privacy Declarations for Fideslang 2.0") + update_privacy_declarations(bind, data_use_upgrades, data_category_upgrades) + + logger.info("Upgrading additional Policy Rules for Fideslang 2.0") + update_ctl_policies(bind, data_use_upgrades, data_category_upgrades) + + logger.info("Upgrading additional Data Categories in Datasets") + update_datasets_data_categories(bind, data_category_upgrades) + + logger.info("Upgrading additional Data Categories in System egress/ingress") + update_system_ingress_egress_data_categories(bind, data_category_upgrades) + + logger.info("Updating additional Rule Targets") + update_rule_targets(bind, data_category_upgrades) + + logger.info("Upgrading additional Taxonomy Items for Fideslang 2.0") + update_data_label_tables(bind, data_category_upgrades, "ctl_data_categories") + + +def downgrade() -> None: + """ + This migration does not support downgrades. + """ + logger.info( + "Data migrations from Fideslang 2.0 to Fideslang 1.0 are not supported." + ) From f767315dd2d203ffcc1fc9f6236336ce02706547 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Tue, 9 Jul 2024 15:59:40 -0700 Subject: [PATCH 02/10] Adding update_default_dsr_policies --- ...cc7dc_migrate_remaining_data_categories.py | 1 - src/fides/api/db/seed.py | 47 +++++++++++++++---- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py b/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py index af31adebea..90b6ab1b78 100644 --- a/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py +++ b/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py @@ -21,7 +21,6 @@ update_rule_targets, update_system_ingress_egress_data_categories, ) -from fides.api.db.seed import load_default_dsr_policies # revision identifiers, used by Alembic. revision = "a6d9cdfcc7dc" diff --git a/src/fides/api/db/seed.py b/src/fides/api/db/seed.py index 8d0a877995..60d755a7a8 100644 --- a/src/fides/api/db/seed.py +++ b/src/fides/api/db/seed.py @@ -216,11 +216,6 @@ def load_default_access_policy( except KeyOrNameAlreadyExists: # pragma: no cover # This rule target already exists against the Policy pass - else: - log.debug( - f"Skipping {DEFAULT_ACCESS_POLICY_RULE} creation as it already exists in the database" - ) - def load_default_erasure_policy( db_session: Session, client_id: str, default_data_categories: List[str] @@ -281,10 +276,6 @@ def load_default_erasure_policy( except KeyOrNameAlreadyExists: # pragma: no cover # This rule target already exists against the Policy pass - else: - log.debug( - f"Skipping {DEFAULT_ERASURE_POLICY_RULE} creation as it already exists in the database" - ) log.info(f"Creating default policy: {DEFAULT_CONSENT_POLICY}...") consent_policy = Policy.create_or_update( @@ -346,6 +337,43 @@ def load_default_dsr_policies() -> None: log.info("All default policies & rules created") +def update_default_dsr_policies() -> None: + """ + Checks whether DSR execution policies exist in the database, and + inserts them to target a default set of data categories if not. + """ + with sync_session() as db_session: # type: ignore[attr-defined] + new_data_categories = [ + "user.content", + "user.privacy_preferences", + ] + + access_rule: Optional[FidesBase] = Rule.get_by( + db_session, field="key", value=DEFAULT_ACCESS_POLICY_RULE + ) + erasure_rule: Optional[FidesBase] = Rule.get_by( + db_session, field="key", value=DEFAULT_ERASURE_POLICY_RULE + ) + for rule in [access_rule, erasure_rule]: + for target in new_data_categories: + data = { + "data_category": target, + "rule_id": rule.id, + } + compound_key = to_snake_case(RuleTarget.get_compound_key(data=data)) + data["key"] = compound_key + try: + RuleTarget.create( + db=db_session, + data=data, + ) + except KeyOrNameAlreadyExists: # pragma: no cover + # This rule target already exists against the Policy + pass + + log.info("All default policies & rules updated") + + async def load_default_organization(async_session: AsyncSession) -> None: """ Seed the database with a default organization unless @@ -416,6 +444,7 @@ async def load_default_resources(async_session: AsyncSession) -> None: await load_default_organization(async_session) await load_default_taxonomy(async_session) load_default_dsr_policies() + update_default_dsr_policies() async def load_samples(async_session: AsyncSession) -> None: From 7392ede806c7888c9c161597c56bce9a97d38999 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Tue, 9 Jul 2024 16:01:03 -0700 Subject: [PATCH 03/10] Re-adding else statements --- src/fides/api/db/seed.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/fides/api/db/seed.py b/src/fides/api/db/seed.py index 60d755a7a8..140e32216b 100644 --- a/src/fides/api/db/seed.py +++ b/src/fides/api/db/seed.py @@ -216,6 +216,10 @@ def load_default_access_policy( except KeyOrNameAlreadyExists: # pragma: no cover # This rule target already exists against the Policy pass + else: + log.debug( + f"Skipping {DEFAULT_ACCESS_POLICY_RULE} creation as it already exists in the database" + ) def load_default_erasure_policy( db_session: Session, client_id: str, default_data_categories: List[str] @@ -276,6 +280,10 @@ def load_default_erasure_policy( except KeyOrNameAlreadyExists: # pragma: no cover # This rule target already exists against the Policy pass + else: + log.debug( + f"Skipping {DEFAULT_ERASURE_POLICY_RULE} creation as it already exists in the database" + ) log.info(f"Creating default policy: {DEFAULT_CONSENT_POLICY}...") consent_policy = Policy.create_or_update( From 307ebeade8d18624feb4d188f66d52f8982b929e Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Tue, 9 Jul 2024 16:04:15 -0700 Subject: [PATCH 04/10] Re-adding user.behavior category --- src/fides/api/db/seed.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fides/api/db/seed.py b/src/fides/api/db/seed.py index 140e32216b..a1dc54b81e 100644 --- a/src/fides/api/db/seed.py +++ b/src/fides/api/db/seed.py @@ -352,6 +352,7 @@ def update_default_dsr_policies() -> None: """ with sync_session() as db_session: # type: ignore[attr-defined] new_data_categories = [ + "user.behavior", "user.content", "user.privacy_preferences", ] From 166d0b9de744e14c965a021d063cbbdaca26c7cb Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Tue, 16 Jul 2024 15:17:04 -0700 Subject: [PATCH 05/10] Adding logic to remove conflicting rule targets --- .../migrations/helpers/database_functions.py | 6 + .../helpers/fideslang_migration_functions.py | 105 ++++++++++++++++++ ...ea164cee8bc_fideslang_2_data_migrations.py | 1 + ...cc7dc_migrate_remaining_data_categories.py | 9 ++ src/fides/api/db/seed.py | 40 +------ .../test_data_category_migration.py | 55 +++++++++ 6 files changed, 177 insertions(+), 39 deletions(-) create mode 100644 src/fides/api/alembic/migrations/helpers/database_functions.py create mode 100644 tests/ops/migration_tests/test_data_category_migration.py diff --git a/src/fides/api/alembic/migrations/helpers/database_functions.py b/src/fides/api/alembic/migrations/helpers/database_functions.py new file mode 100644 index 0000000000..a6b171f567 --- /dev/null +++ b/src/fides/api/alembic/migrations/helpers/database_functions.py @@ -0,0 +1,6 @@ +import uuid + + +def generate_record_id(prefix): + """Generates an ID that can be used for a database table row ID.""" + return prefix + "_" + str(uuid.uuid4()) diff --git a/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py b/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py index abc34aa7d9..719ff3caff 100644 --- a/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py +++ b/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py @@ -1,10 +1,21 @@ import json from typing import Dict, List, Optional +from loguru import logger from sqlalchemy import text from sqlalchemy.engine import Connection, ResultProxy +from sqlalchemy.orm import Session +from sqlalchemy.orm.session import Session from sqlalchemy.sql.elements import TextClause +from fides.api.alembic.migrations.helpers.database_functions import generate_record_id +from fides.api.common_exceptions import KeyOrNameAlreadyExists +from fides.api.db.base_class import FidesBase +from fides.api.db.seed import DEFAULT_ACCESS_POLICY_RULE, DEFAULT_ERASURE_POLICY_RULE +from fides.api.models.policy import Rule, RuleTarget +from fides.api.schemas.policy import ActionType +from fides.api.util.text import to_snake_case + def _replace_matching_data_label( data_label: str, data_label_map: Dict[str, str] @@ -346,3 +357,97 @@ def update_consent(bind: Connection, data_use_map: Dict[str, str]) -> None: update_cr_query, {"id": row["id"], "updated_preferences": json.dumps(preferences)}, ) + + +def remove_conflicting_rule_targets(db: Session): + """ + Iterates through all of the erasure policies and removes level 3 data categories in favor of level 2 data categories. + + For example: user.demographic is preserved over user.demographic.* + """ + erasure_rules = Rule.filter( + db=db, conditions=(Rule.action_type == ActionType.erasure) + ).all() + + for rule in erasure_rules: + all_categories = {target.data_category for target in rule.targets} + + rule_targets_to_remove = [] + for target in rule.targets: + parts = target.data_category.split(".") + if len(parts) == 3: + parent_category = f"{parts[0]}.{parts[1]}" + if parent_category in all_categories: + rule_targets_to_remove.append(target) + + for rule_target in rule_targets_to_remove: + logger.info( + f"Removing conflicting rule target {rule_target.data_category} for rule {rule.key}" + ) + db.delete(rule_target) + + db.commit() + + +def update_default_dsr_policies(db: Session) -> None: + """ + Updates the default policies with new data categories using manual insertion. + """ + + new_data_categories = [ + "user.behavior", + "user.content", + "user.privacy_preferences", + ] + + rules: List[Rule] = ( + db.query(Rule) + .filter(Rule.key.in_([DEFAULT_ACCESS_POLICY_RULE, DEFAULT_ERASURE_POLICY_RULE])) + .all() + ) + + if not rules: + logger.info("No default policies were found to update") + return + + updates_made = False + for rule in rules: + for data_category in new_data_categories: + compound_key = to_snake_case(f"{rule.id}_{data_category}") + + # check if the rule target already exists + existing_target = RuleTarget.filter( + db=db, + conditions=( + (RuleTarget.rule_id == rule.id) + & (RuleTarget.data_category == data_category) + ), + ).first() + + if existing_target is None: + # Insert rule targets directly into the database to bypass validation checks. + # Invalid entries are removed in remove_conflicting_rule_targets + db.execute( + "INSERT INTO ruletarget (id, name, key, data_category, rule_id) " + "VALUES (:id, :name, :key, :data_category, :rule_id)", + { + "id": generate_record_id("rul"), + "name": f"{rule.id}-{data_category}", + "key": compound_key, + "data_category": data_category, + "rule_id": rule.id, + }, + ) + logger.info( + f"Inserted new rule target: {data_category} for rule {rule.key}" + ) + updates_made = True + else: + logger.info( + f"Rule target already exists: {data_category} for rule {rule.key}" + ) + + if updates_made: + logger.info("The default policies have been updated with new data categories") + else: + logger.info("No updates were necessary for the default policies") diff --git a/src/fides/api/alembic/migrations/versions/1ea164cee8bc_fideslang_2_data_migrations.py b/src/fides/api/alembic/migrations/versions/1ea164cee8bc_fideslang_2_data_migrations.py index 294b6a5a63..c11fc227b3 100644 --- a/src/fides/api/alembic/migrations/versions/1ea164cee8bc_fideslang_2_data_migrations.py +++ b/src/fides/api/alembic/migrations/versions/1ea164cee8bc_fideslang_2_data_migrations.py @@ -74,6 +74,7 @@ value: key for key, value in data_category_upgrades.items() } + def upgrade() -> None: """ Given that our advice is to turn off auto-migrations and make a db copy, diff --git a/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py b/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py index 90b6ab1b78..3982c53b78 100644 --- a/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py +++ b/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py @@ -12,11 +12,14 @@ from loguru import logger from sqlalchemy import text from sqlalchemy.engine import Connection +from sqlalchemy.orm.session import Session from fides.api.alembic.migrations.helpers.fideslang_migration_functions import ( + remove_conflicting_rule_targets, update_ctl_policies, update_data_label_tables, update_datasets_data_categories, + update_default_dsr_policies, update_privacy_declarations, update_rule_targets, update_system_ingress_egress_data_categories, @@ -80,6 +83,12 @@ def upgrade() -> None: logger.info("Upgrading additional Taxonomy Items for Fideslang 2.0") update_data_label_tables(bind, data_category_upgrades, "ctl_data_categories") + session = Session(bind=bind) + # insert new rule targets directly into the database for the default policies + update_default_dsr_policies(session) + # remove conflicting rule targets from all erasure policies + remove_conflicting_rule_targets(session) + def downgrade() -> None: """ diff --git a/src/fides/api/db/seed.py b/src/fides/api/db/seed.py index a1dc54b81e..8d0a877995 100644 --- a/src/fides/api/db/seed.py +++ b/src/fides/api/db/seed.py @@ -221,6 +221,7 @@ def load_default_access_policy( f"Skipping {DEFAULT_ACCESS_POLICY_RULE} creation as it already exists in the database" ) + def load_default_erasure_policy( db_session: Session, client_id: str, default_data_categories: List[str] ) -> None: @@ -345,44 +346,6 @@ def load_default_dsr_policies() -> None: log.info("All default policies & rules created") -def update_default_dsr_policies() -> None: - """ - Checks whether DSR execution policies exist in the database, and - inserts them to target a default set of data categories if not. - """ - with sync_session() as db_session: # type: ignore[attr-defined] - new_data_categories = [ - "user.behavior", - "user.content", - "user.privacy_preferences", - ] - - access_rule: Optional[FidesBase] = Rule.get_by( - db_session, field="key", value=DEFAULT_ACCESS_POLICY_RULE - ) - erasure_rule: Optional[FidesBase] = Rule.get_by( - db_session, field="key", value=DEFAULT_ERASURE_POLICY_RULE - ) - for rule in [access_rule, erasure_rule]: - for target in new_data_categories: - data = { - "data_category": target, - "rule_id": rule.id, - } - compound_key = to_snake_case(RuleTarget.get_compound_key(data=data)) - data["key"] = compound_key - try: - RuleTarget.create( - db=db_session, - data=data, - ) - except KeyOrNameAlreadyExists: # pragma: no cover - # This rule target already exists against the Policy - pass - - log.info("All default policies & rules updated") - - async def load_default_organization(async_session: AsyncSession) -> None: """ Seed the database with a default organization unless @@ -453,7 +416,6 @@ async def load_default_resources(async_session: AsyncSession) -> None: await load_default_organization(async_session) await load_default_taxonomy(async_session) load_default_dsr_policies() - update_default_dsr_policies() async def load_samples(async_session: AsyncSession) -> None: diff --git a/tests/ops/migration_tests/test_data_category_migration.py b/tests/ops/migration_tests/test_data_category_migration.py new file mode 100644 index 0000000000..cfe5dd8194 --- /dev/null +++ b/tests/ops/migration_tests/test_data_category_migration.py @@ -0,0 +1,55 @@ +import pytest + +from fides.api.alembic.migrations.helpers.database_functions import generate_record_id +from fides.api.alembic.migrations.helpers.fideslang_migration_functions import ( + remove_conflicting_rule_targets, +) +from fides.api.common_exceptions import KeyOrNameAlreadyExists, PolicyValidationError +from fides.api.db.seed import DEFAULT_ERASURE_POLICY_RULE +from fides.api.models.policy import Rule, RuleTarget + + +class TestDataCategoryMigrationFunctions: + def test_remove_conflicting_rule_targets(self, db): + + # prep the default erasure rule for testing by inserting a conflicting data category + # directly into the database and bypassing the checks on RuleTarget.create + erasure_rule = Rule.get_by(db, field="key", value=DEFAULT_ERASURE_POLICY_RULE) + erasure_rule_id = erasure_rule.id + db.execute( + "INSERT INTO ruletarget (id, name, key, data_category, rule_id) VALUES (:id, :name, :key, :data_category, :rule_id)", + { + "id": generate_record_id("rul"), + "name": f"{erasure_rule_id}-user.biometric.health", + "key": f"{erasure_rule_id}-userbiometrichealth", + "data_category": "user.biometric.health", + "rule_id": erasure_rule_id, + }, + ) + db.commit() + + with pytest.raises(PolicyValidationError) as exc: + RuleTarget.create( + db=db, + data={ + "data_category": "user.biometric", + "rule_id": erasure_rule_id, + }, + ) + assert ( + "Policy rules are invalid, action conflict in erasure rules detected for categories user.biometric.health and user.biometric" + in str(exc) + ) + + remove_conflicting_rule_targets(db) + + # verify we no longer get a validation error and have moved + # instead to a KeyOrNameAlreadyExists exception + with pytest.raises(KeyOrNameAlreadyExists): + RuleTarget.create( + db=db, + data={ + "data_category": "user.biometric", + "rule_id": erasure_rule_id, + }, + ) From a7f1fae4f9b28f624eb2bb5dfcab40c1a257631d Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Tue, 16 Jul 2024 15:20:51 -0700 Subject: [PATCH 06/10] Misc cleanup --- .../migrations/helpers/fideslang_migration_functions.py | 2 -- .../migrations/versions/14acee6f5459_translation_data.py | 8 +++----- ...5f96f13be8ec_property_specific_messaging_migration_.py | 5 +---- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py b/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py index 719ff3caff..0abcca2eec 100644 --- a/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py +++ b/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py @@ -9,8 +9,6 @@ from sqlalchemy.sql.elements import TextClause from fides.api.alembic.migrations.helpers.database_functions import generate_record_id -from fides.api.common_exceptions import KeyOrNameAlreadyExists -from fides.api.db.base_class import FidesBase from fides.api.db.seed import DEFAULT_ACCESS_POLICY_RULE, DEFAULT_ERASURE_POLICY_RULE from fides.api.models.policy import Rule, RuleTarget from fides.api.schemas.policy import ActionType diff --git a/src/fides/api/alembic/migrations/versions/14acee6f5459_translation_data.py b/src/fides/api/alembic/migrations/versions/14acee6f5459_translation_data.py index f654495db1..689fc04db4 100644 --- a/src/fides/api/alembic/migrations/versions/14acee6f5459_translation_data.py +++ b/src/fides/api/alembic/migrations/versions/14acee6f5459_translation_data.py @@ -17,6 +17,8 @@ from sqlalchemy import text from sqlalchemy.engine import ResultProxy +from fides.api.alembic.migrations.helpers.database_functions import generate_record_id + # revision identifiers, used by Alembic. revision = "14acee6f5459" @@ -179,10 +181,6 @@ } -def generate_record_id(prefix): - return prefix + "_" + str(uuid.uuid4()) - - # this ties a DB record ID to our logical identifier of the type of OOB experience. # the DB record ID is based on what we've defined in the associated config yml. class DefaultExperienceConfigTypes(Enum): @@ -847,7 +845,7 @@ def migrate_notices(bind): SELECT :record_id, name, description, origin, consent_mechanism, data_uses, :new_version, disabled, enforcement_level, has_gpc_flag, internal_description, notice_key, gpp_field_mapping, framework, :language, title, translation_id, privacy_notice_id FROM privacynoticehistory WHERE version = :current_version AND - privacy_notice_id = :privacy_notice_id + privacy_notice_id = :privacy_notice_id ORDER BY created_at DESC LIMIT 1 """ ) diff --git a/src/fides/api/alembic/migrations/versions/5f96f13be8ec_property_specific_messaging_migration_.py b/src/fides/api/alembic/migrations/versions/5f96f13be8ec_property_specific_messaging_migration_.py index bc8e207283..dd33db23e6 100644 --- a/src/fides/api/alembic/migrations/versions/5f96f13be8ec_property_specific_messaging_migration_.py +++ b/src/fides/api/alembic/migrations/versions/5f96f13be8ec_property_specific_messaging_migration_.py @@ -22,6 +22,7 @@ from sqlalchemy.exc import IntegrityError from sqlalchemy.sql.elements import TextClause +from fides.api.alembic.migrations.helpers.database_functions import generate_record_id from fides.api.schemas.messaging.messaging import MessagingActionType # revision identifiers, used by Alembic. @@ -79,10 +80,6 @@ AUTO_MIGRATED_STRING = "auto-migrated" -def generate_record_id(prefix): - return prefix + "_" + str(uuid.uuid4()) - - def upgrade(): # ### commands auto generated by Alembic - please adjust! ### bind: Connection = op.get_bind() From 64a0d860a700626f76d913bd3ce7199ecada5e2c Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Mon, 22 Jul 2024 10:52:49 -0700 Subject: [PATCH 07/10] Fixing test --- tests/ops/migration_tests/test_data_category_migration.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ops/migration_tests/test_data_category_migration.py b/tests/ops/migration_tests/test_data_category_migration.py index cfe5dd8194..a7885f0958 100644 --- a/tests/ops/migration_tests/test_data_category_migration.py +++ b/tests/ops/migration_tests/test_data_category_migration.py @@ -5,6 +5,7 @@ remove_conflicting_rule_targets, ) from fides.api.common_exceptions import KeyOrNameAlreadyExists, PolicyValidationError +from fides.api.db import seed from fides.api.db.seed import DEFAULT_ERASURE_POLICY_RULE from fides.api.models.policy import Rule, RuleTarget @@ -14,6 +15,7 @@ def test_remove_conflicting_rule_targets(self, db): # prep the default erasure rule for testing by inserting a conflicting data category # directly into the database and bypassing the checks on RuleTarget.create + seed.load_default_dsr_policies() erasure_rule = Rule.get_by(db, field="key", value=DEFAULT_ERASURE_POLICY_RULE) erasure_rule_id = erasure_rule.id db.execute( From e581f42ad66e78545efb9c62e508d5a72b626c67 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Mon, 22 Jul 2024 12:37:22 -0700 Subject: [PATCH 08/10] Fixing downrev --- .../versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py b/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py index 3982c53b78..e4e73f0d9b 100644 --- a/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py +++ b/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py @@ -27,7 +27,7 @@ # revision identifiers, used by Alembic. revision = "a6d9cdfcc7dc" -down_revision = "31493e48c1d8" +down_revision = "f712aa9429f4" branch_labels = None depends_on = None From 10594780e76fd7ab5f0ee38342bb1d7009984f77 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Thu, 25 Jul 2024 17:32:39 -0700 Subject: [PATCH 09/10] Changes based on PR feedback --- .../helpers/fideslang_migration_functions.py | 74 +++++++++++-------- ...cc7dc_migrate_remaining_data_categories.py | 14 ++-- .../test_data_category_migration.py | 6 +- 3 files changed, 55 insertions(+), 39 deletions(-) diff --git a/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py b/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py index 0abcca2eec..525d8b414d 100644 --- a/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py +++ b/src/fides/api/alembic/migrations/helpers/fideslang_migration_functions.py @@ -4,13 +4,10 @@ from loguru import logger from sqlalchemy import text from sqlalchemy.engine import Connection, ResultProxy -from sqlalchemy.orm import Session -from sqlalchemy.orm.session import Session from sqlalchemy.sql.elements import TextClause from fides.api.alembic.migrations.helpers.database_functions import generate_record_id from fides.api.db.seed import DEFAULT_ACCESS_POLICY_RULE, DEFAULT_ERASURE_POLICY_RULE -from fides.api.models.policy import Rule, RuleTarget from fides.api.schemas.policy import ActionType from fides.api.util.text import to_snake_case @@ -357,37 +354,52 @@ def update_consent(bind: Connection, data_use_map: Dict[str, str]) -> None: ) -def remove_conflicting_rule_targets(db: Session): +def remove_conflicting_rule_targets(bind: Connection): """ Iterates through all of the erasure policies and removes level 3 data categories in favor of level 2 data categories. For example: user.demographic is preserved over user.demographic.* + + This is needed because RuleTarget.create() validates all sibling rule targets to prevent invalid masking scenarios. """ - erasure_rules = Rule.filter( - db=db, conditions=(Rule.action_type == ActionType.erasure) - ).all() + erasure_rules: ResultProxy = bind.execute( + text("SELECT id, key FROM rule WHERE action_type = :action_type"), + {"action_type": ActionType.erasure.value}, + ) for rule in erasure_rules: - all_categories = {target.data_category for target in rule.targets} + all_categories_query: ResultProxy = bind.execute( + text("SELECT data_category FROM ruletarget WHERE rule_id = :rule_id"), + {"rule_id": rule.id}, + ) + all_categories = {row.data_category for row in all_categories_query} + + rule_targets = bind.execute( + text("SELECT id, data_category FROM ruletarget WHERE rule_id = :rule_id"), + {"rule_id": rule.id}, + ) rule_targets_to_remove = [] - for target in rule.targets: + for target in rule_targets: parts = target.data_category.split(".") if len(parts) == 3: parent_category = f"{parts[0]}.{parts[1]}" if parent_category in all_categories: rule_targets_to_remove.append(target) + logger.info( + f"Marking conflicting rule target {target.data_category} for removal from rule {rule.key}" + ) - for rule_target in rule_targets_to_remove: - logger.info( - f"Removing conflicting rule target {rule_target.data_category} for rule {rule.key}" + if rule_targets_to_remove: + target_ids = [target.id for target in rule_targets_to_remove] + bind.execute( + text("DELETE FROM ruletarget WHERE id IN :target_ids"), + {"target_ids": tuple(target_ids)}, ) - db.delete(rule_target) + logger.info(f"Removed {len(target_ids)} conflicting rule targets") - db.commit() - -def update_default_dsr_policies(db: Session) -> None: +def update_default_dsr_policies(bind: Connection) -> None: """ Updates the default policies with new data categories using manual insertion. """ @@ -398,13 +410,17 @@ def update_default_dsr_policies(db: Session) -> None: "user.privacy_preferences", ] - rules: List[Rule] = ( - db.query(Rule) - .filter(Rule.key.in_([DEFAULT_ACCESS_POLICY_RULE, DEFAULT_ERASURE_POLICY_RULE])) - .all() + rules: ResultProxy = bind.execute( + text( + "SELECT id, key FROM rule WHERE key IN (:access_policy, :erasure_policy);" + ), + { + "access_policy": DEFAULT_ACCESS_POLICY_RULE, + "erasure_policy": DEFAULT_ERASURE_POLICY_RULE, + }, ) - if not rules: + if rules.rowcount == 0: logger.info("No default policies were found to update") return @@ -414,20 +430,20 @@ def update_default_dsr_policies(db: Session) -> None: compound_key = to_snake_case(f"{rule.id}_{data_category}") # check if the rule target already exists - existing_target = RuleTarget.filter( - db=db, - conditions=( - (RuleTarget.rule_id == rule.id) - & (RuleTarget.data_category == data_category) + existing_target: ResultProxy = bind.execute( + text( + "SELECT 1 FROM ruletarget WHERE rule_id = :rule_id AND data_category = :data_category" ), + {"rule_id": rule.id, "data_category": data_category}, ).first() if existing_target is None: # Insert rule targets directly into the database to bypass validation checks. # Invalid entries are removed in remove_conflicting_rule_targets - db.execute( - "INSERT INTO ruletarget (id, name, key, data_category, rule_id) " - "VALUES (:id, :name, :key, :data_category, :rule_id)", + bind.execute( + text( + "INSERT INTO ruletarget (id, name, key, data_category, rule_id) VALUES (:id, :name, :key, :data_category, :rule_id)" + ), { "id": generate_record_id("rul"), "name": f"{rule.id}-{data_category}", diff --git a/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py b/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py index e4e73f0d9b..43281c510e 100644 --- a/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py +++ b/src/fides/api/alembic/migrations/versions/a6d9cdfcc7dc_migrate_remaining_data_categories.py @@ -83,17 +83,15 @@ def upgrade() -> None: logger.info("Upgrading additional Taxonomy Items for Fideslang 2.0") update_data_label_tables(bind, data_category_upgrades, "ctl_data_categories") - session = Session(bind=bind) - # insert new rule targets directly into the database for the default policies - update_default_dsr_policies(session) - # remove conflicting rule targets from all erasure policies - remove_conflicting_rule_targets(session) + logger.info("Adding new rule targets to default policies") + update_default_dsr_policies(bind) + + logger.info("Removing conflicting rule targets from all erasure policies") + remove_conflicting_rule_targets(bind) def downgrade() -> None: """ This migration does not support downgrades. """ - logger.info( - "Data migrations from Fideslang 2.0 to Fideslang 1.0 are not supported." - ) + logger.info("Removal of additional Fideslang 2.0 data categories is unsupported.") diff --git a/tests/ops/migration_tests/test_data_category_migration.py b/tests/ops/migration_tests/test_data_category_migration.py index a7885f0958..f178e46c53 100644 --- a/tests/ops/migration_tests/test_data_category_migration.py +++ b/tests/ops/migration_tests/test_data_category_migration.py @@ -11,8 +11,8 @@ class TestDataCategoryMigrationFunctions: - def test_remove_conflicting_rule_targets(self, db): + def test_remove_conflicting_rule_targets(self, db): # prep the default erasure rule for testing by inserting a conflicting data category # directly into the database and bypassing the checks on RuleTarget.create seed.load_default_dsr_policies() @@ -43,7 +43,9 @@ def test_remove_conflicting_rule_targets(self, db): in str(exc) ) - remove_conflicting_rule_targets(db) + remove_conflicting_rule_targets(db.connection()) + + db.commit() # verify we no longer get a validation error and have moved # instead to a KeyOrNameAlreadyExists exception From 957fedbf7a6af9e19e7754167616784b29ca70c8 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Sat, 27 Jul 2024 18:47:46 -0700 Subject: [PATCH 10/10] Updating change log --- CHANGELOG.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3092cf7d88..c45e95d240 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,10 +37,9 @@ The types of changes are: - Fixed typo in the BigQuery integration description [#5120](https://github.com/ethyca/fides/pull/5120) - Fixed default values of Experience config toggles [#5123](https://github.com/ethyca/fides/pull/5123) - Skip indexing Custom Privacy Request Field array values [#5127](https://github.com/ethyca/fides/pull/5127) -- Fixed Admin UI issue where banner would dissapear in Experience Preview with GPC enabled [#5131](https://github.com/ethyca/fides/pull/5131) - -### Fixed +- Fixed Admin UI issue where banner would disappear in Experience Preview with GPC enabled [#5131](https://github.com/ethyca/fides/pull/5131) - Fixed not being able to edit a monitor from scheduled to not scheduled [#5114](https://github.com/ethyca/fides/pull/5114) +- Migrating missing Fideslang 2.0 data categories [#5073](https://github.com/ethyca/fides/pull/5073) ## [2.41.0](https://github.com/ethyca/fides/compare/2.40.0...2.41.0)