Skip to content

Commit

Permalink
Handle Keys and Collections with a double underscore (#3688)
Browse files Browse the repository at this point in the history
  • Loading branch information
SteveDMurphy committed Jul 1, 2023
1 parent 3f573a3 commit f5bf671
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 5 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ The types of changes are:
### Added
- Set `sslmode` to `prefer` if connecting to Redshift via ssh [#3685](https://github.com/ethyca/fides/pull/3685)

### Fixed
- Handle names with a double underscore when processing access and erasure requests [#3688](https://github.com/ethyca/fides/pull/3688)

## [2.15.0](https://github.com/ethyca/fides/compare/2.14.1...2.15.0)

### Added
Expand Down
14 changes: 12 additions & 2 deletions src/fides/api/task/graph_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,13 @@
from fides.api.task.refine_target_path import FieldPathNodeInput
from fides.api.task.task_resources import TaskResources
from fides.api.util.cache import get_cache
from fides.api.util.collection_util import NodeInput, Row, append, partition
from fides.api.util.collection_util import (
NodeInput,
Row,
append,
extract_key_for_address,
partition,
)
from fides.api.util.consent_util import add_errored_system_status_for_consent_reporting
from fides.api.util.logger import Pii
from fides.api.util.saas_util import FIDESOPS_GROUPED_INPUTS
Expand Down Expand Up @@ -733,7 +739,11 @@ def get_cached_data_for_erasures(
value_dict = cache.get_encoded_objects_by_prefix(
f"PLACEHOLDER_RESULTS__{privacy_request_id}"
)
return {k.split("__")[-1]: v for k, v in value_dict.items()}
number_of_leading_strings_to_exclude = 3
return {
extract_key_for_address(k, number_of_leading_strings_to_exclude): v
for k, v in value_dict.items()
}


def update_erasure_mapping_from_cache(
Expand Down
11 changes: 8 additions & 3 deletions src/fides/api/task/task_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
)
from fides.api.service.connectors.base_email_connector import BaseEmailConnector
from fides.api.util.cache import get_cache
from fides.api.util.collection_util import Row
from fides.api.util.collection_util import Row, extract_key_for_address


class Connections:
Expand Down Expand Up @@ -147,7 +147,11 @@ def get_all_cached_objects(self) -> Dict[str, Optional[List[Row]]]:
f"{self.request.id}__access_request"
)
# extract request id to return a map of address:value
return {k.split("__")[-1]: v for k, v in value_dict.items()}
number_of_leading_strings_to_exclude = 2
return {
extract_key_for_address(k, number_of_leading_strings_to_exclude): v
for k, v in value_dict.items()
}

def cache_erasure(self, key: str, value: int) -> None:
"""Cache that a node's masking is complete. Object will be stored in redis under
Expand All @@ -163,7 +167,8 @@ def get_all_cached_erasures(self) -> Dict[str, int]:
f"{self.request.id}__erasure_request"
)
# extract request id to return a map of address:value
return {k.split("__")[-1]: v for k, v in value_dict.items()} # type: ignore
number_of_leading_strings_to_exclude = 2
return {extract_key_for_address(k, number_of_leading_strings_to_exclude): v for k, v in value_dict.items()} # type: ignore

def write_execution_log( # pylint: disable=too-many-arguments
self,
Expand Down
18 changes: 18 additions & 0 deletions src/fides/api/util/collection_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,21 @@ def filter_nonempty_values(d: Optional[Dict[Any, Any]]) -> Dict[Any, Any]:
if d:
return {e[0]: e[1] for e in d.items() if e[1]}
return {}


def extract_key_for_address(
full_request_id: str, number_of_leading_strings_to_exclude: int
) -> str:
"""
Handles extracting the correct Dataset:Collection to map to extracted
values.
Due to differences in the number of leading strings based on access or
erasure, a parameter is used to ensure the correct values are returned.
Handles an edge case where double underscores exist in either the fides_key
of the Dataset or the Collection name.
"""
request_id_dataset, collection = full_request_id.split(":")
dataset = request_id_dataset.split("__", number_of_leading_strings_to_exclude)[-1]
return f"{dataset}:{collection}"
7 changes: 7 additions & 0 deletions tests/ops/task/test_task_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ def test_cache_object(self, db, privacy_request, policy, integration_manual_conf
"access_request__postgres_example:payment",
[{"id": 2, "ccn": "111-111-1111-1111", "customer_id": 1}],
)
resources.cache_object(
"access_request__postgres__double__underscore__example:double__underscore__collection",
[{"id": 3, "last_name": "Doe"}],
)
resources.cache_erasure("manual_example:filing-cabinet", 2)

# Only access results from "cache_object" are returned
Expand All @@ -24,6 +28,9 @@ def test_cache_object(self, db, privacy_request, policy, integration_manual_conf
{"id": 2, "ccn": "111-111-1111-1111", "customer_id": 1}
],
"postgres_example:customer": [{"id": 1, "last_name": "Doe"}],
"postgres__double__underscore__example:double__underscore__collection": [
{"id": 3, "last_name": "Doe"}
],
}

def test_cache_erasure(
Expand Down

0 comments on commit f5bf671

Please sign in to comment.