Skip to content

Commit

Permalink
tweaks based on code review
Browse files Browse the repository at this point in the history
  • Loading branch information
adamsachs committed Jun 28, 2023
1 parent 87edf48 commit 8565ca3
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 6 deletions.
11 changes: 11 additions & 0 deletions .fides/redis_dataset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@ dataset:
data_categories: [system.operations]
fidesops_meta:
data_type: string[] # List of edges between the upstream collection and the current collection
- name: EN_DATA_USE_MAP__<privacy_request_id>
description: This map of traversed `Collection`s to associated `DataUse`s is stored and retrieved to be included in access request output packages.
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
fidesops_meta:
data_type: object # Dict mapping `Collection` addresses -> set of associated `DataUse`s
fields:
- name: <dataset_name>:<collection_name> # `Collection` address
data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified
data_categories: [system.operations]
fidesops_meta:
data_type: string[] # set of `DataUse`s associated with this `Collection`
- name: EN_EMAIL_INFORMATION__<privacy_request_id>__<current_step>__<dataset_name>__<collection_name> # Usage: For building emails associated with email-connector datasets at the end of the privacy request. This encrypted raw information is retrieved from each relevant email-based collection and used to build a single email per email connector, with instructions on how to mask data on the given dataset.
fidesops_meta:
data_type: object # Stores how to locate and mask records for a given "email" collection.
Expand Down
4 changes: 2 additions & 2 deletions src/fides/api/service/storage/storage_uploader_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def _s3_uploader(
auth_method = config.details[StorageDetails.AUTH_METHOD.value]

return upload_to_s3(
config.secrets, data, bucket_name, file_key, config.format.value, privacy_request, auth_method, data_category_field_mapping # type: ignore
config.secrets, data, bucket_name, file_key, config.format.value, privacy_request, auth_method, data_category_field_mapping, data_use_map # type: ignore
)


Expand All @@ -112,4 +112,4 @@ def _local_uploader(
) -> str:
"""Uploads data to local storage, used for quick-start/demo purposes"""
file_key: str = _construct_file_key(privacy_request.id, config)
return upload_to_local(data, file_key, privacy_request, config.format.value, data_category_field_mapping) # type: ignore
return upload_to_local(data, file_key, privacy_request, config.format.value, data_category_field_mapping, data_use_map) # type: ignore
16 changes: 13 additions & 3 deletions src/fides/api/task/graph_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,15 +671,18 @@ def _format_data_use_map_for_caching(
) -> Dict[str, Set[str]]:
"""
Create a map of `Collection`s mapped to their associated `DataUse`s
to be stored in the cache.
to be stored in the cache. This is done before request execution, so that we
maintain the _original_ state of the graph as it's used for request execution.
The graph is subject to change "from underneath" the request execution runtime,
but we want to avoid picking up those changes in our data use map.
`DataUse`s are associated with a `Collection` by means of the `System`
that's linked to a `Collection`'s `Connection` definition.
Example:
{
<collection1>: ["data_use_1", "data_use_2"],
<collection2>: ["data_use_1"],
<collection1>: {"data_use_1", "data_use_2"},
<collection2>: {"data_use_1"},
}
"""
return {collection.value: g_task.data_uses for collection, g_task in env.items()}
Expand Down Expand Up @@ -742,7 +745,14 @@ def termination_fn(
privacy_request, env, end_nodes, resources, ActionType.access
)
)

# cache access graph for use in logging/analytics event
privacy_request.cache_access_graph(format_graph_for_caching(env, end_nodes))

# cache a map of collections -> data uses for the output package of access requests
# this is cached here before request execution, since this is the state of the
# graph used for request execution. the graph could change _during_ request execution,
# but we don't want those changes in our data use map.
privacy_request.cache_data_use_map(_format_data_use_map_for_caching(env))

v = delayed(get(dsk, TERMINATOR_ADDRESS, num_workers=1))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ def test_upload_access_results_has_data_category_field_mapping(
args, kwargs = upload_mock.call_args
data_category_field_mapping = kwargs["data_category_field_mapping"]

# make sure the catergory field mapping generally looks as we expect
# make sure the category field mapping generally looks as we expect
address_mapping = data_category_field_mapping[
CollectionAddress.from_string("postgres_example_test_dataset:address")
]
Expand Down

0 comments on commit 8565ca3

Please sign in to comment.