Skip to content

Commit

Permalink
CLI Issues: YAML Cannot Find Constructors for FidesKeys (#5113)
Browse files Browse the repository at this point in the history
  • Loading branch information
pattisdr committed Jul 22, 2024
1 parent fb095d0 commit 947fee0
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ The types of changes are:
### Fixed
- Fixed bug with unescaped table names in mysql queries [#5072](https://github.com/ethyca/fides/pull/5072/)
- Fixed bug with unresponsive messaging ui [#5081](https://github.com/ethyca/fides/pull/5081/)
- Fixed FidesKey constructor bugs in CLI [#5113](https://github.com/ethyca/fides/pull/5113)


## [2.40.0](https://github.com/ethyca/fides/compare/2.39.2...2.40.0)
Expand Down
2 changes: 1 addition & 1 deletion src/fides/core/annotate_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def get_data_categories_annotation(
dataset_member, valid_categories
)

return [FidesKey(value) for value in user_response]
return [str(FidesKey(value)) for value in user_response] # type: ignore


def annotate_dataset(
Expand Down
6 changes: 4 additions & 2 deletions src/fides/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,10 @@ def make_dataset_key_unique(
to avoid naming collisions.
"""

dataset.fides_key = FidesKey(
generate_unique_fides_key(dataset.fides_key, database_host, database_name)
dataset.fides_key = str( # type: ignore
FidesKey(
generate_unique_fides_key(dataset.fides_key, database_host, database_name)
)
)
dataset.meta = {"database_host": database_host, "database_name": database_name}
return dataset
Expand Down
63 changes: 63 additions & 0 deletions tests/ctl/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from json import dump, loads
from typing import Generator

import click
import pytest
import yaml
from click.testing import CliRunner
from git.repo import Repo
from py._path.local import LocalPath
Expand Down Expand Up @@ -217,6 +219,61 @@ def test_pull_all(
assert result.exit_code == 0


@pytest.mark.integration
class TestAnnotate:

def test_annotate(
self,
test_config_path: str,
test_cli_runner: CliRunner,
) -> None:
"""
Test annotating dataset allowing you to interactively annotate the dataset with data categories
"""
with open(
"tests/ctl/data/dataset_missing_categories.yml", "r"
) as current_dataset_yml:
dataset_yml = yaml.safe_load(current_dataset_yml)
# Confirm starting state, that the first field has no data categories
assert (
"data_categories"
not in dataset_yml["dataset"][0]["collections"][0]["fields"][0]
)

result = test_cli_runner.invoke(
cli,
[
"-f",
test_config_path,
"annotate",
"dataset",
"tests/ctl/data/dataset_missing_categories.yml",
],
input="user\n",
)
print(result.output)
with open("tests/ctl/data/dataset_missing_categories.yml", "r") as dataset_yml:
# Helps assert that the data category was output correctly
dataset_yml = yaml.safe_load(dataset_yml)
assert dataset_yml["dataset"][0]["collections"][0]["fields"][0][
"data_categories"
] == ["user"]

# Now remove the data category that was written by annotate dataset
del dataset_yml["dataset"][0]["collections"][0]["fields"][0][
"data_categories"
]

with open(
"tests/ctl/data/dataset_missing_categories.yml", "w"
) as current_dataset_yml:
# Restore the original contents to the file
yaml.safe_dump(dataset_yml, current_dataset_yml)

assert result.exit_code == 0
print(result.output)


@pytest.mark.integration
def test_audit(test_config_path: str, test_cli_runner: CliRunner) -> None:
result = test_cli_runner.invoke(cli, ["-f", test_config_path, "evaluate", "-a"])
Expand Down Expand Up @@ -666,6 +723,12 @@ def test_generate_dataset_db_with_connection_string(
print(result.output)
assert result.exit_code == 0

with open(tmp_file, "r") as dataset_yml:
# Helps assert that the file was output correctly, namely, fides_keys were serialized as strings
# and not a FidesKey python object
dataset = yaml.safe_load(dataset_yml).get("dataset", [])
assert isinstance(dataset[0]["fides_key"], str)

@pytest.mark.integration
def test_generate_dataset_db_with_credentials_id(
self,
Expand Down
13 changes: 13 additions & 0 deletions tests/ctl/data/dataset_missing_categories.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
dataset:
- collections:
- description: Organization information
fields:
- name: city
- data_categories:
- account.contact.state
name: state
name: organization
description: Sample dataset to be annotated
fides_key: test_missing_data_categories
name: Sample Dataset
organization_fides_key: default_organization

0 comments on commit 947fee0

Please sign in to comment.