Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move config validation to the CatalogConfigResolver init #4195

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 39 additions & 7 deletions kedro/io/catalog_config_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,43 @@ def _resolve_value(key: str, value: Any) -> Any:

return {k: _resolve_value(k, v) for k, v in config.items()}

@classmethod
def _validate_pattern_config(cls, ds_name: str, ds_config: dict[str, Any]) -> None:
"""Checks whether a dataset factory pattern configuration is valid - all
keys used in the configuration present in the dataset factory pattern name.

Args:
ds_name: Dataset factory pattern name.
ds_config: Dataset pattern configuration.

Raises:
DatasetError: when keys used in the configuration do not present in the dataset factory pattern name.

"""
# Find all occurrences of {} in the string including brackets
search_regex = r"\{.*?\}"
name_placeholders = set(re.findall(search_regex, ds_name))
config_placeholders = set()

def _traverse_config(config: Any) -> None:
if isinstance(config, dict):
for value in config.values():
_traverse_config(value)
elif isinstance(config, (list, tuple)):
for value in config:
_traverse_config(value)
elif isinstance(config, str) and "}" in config:
config_placeholders.update(set(re.findall(search_regex, config)))

_traverse_config(ds_config)

if config_placeholders - name_placeholders:
raise DatasetError(
f"Incorrect dataset configuration provided. "
f"Keys used in the configuration {config_placeholders - name_placeholders} "
f"should present in the dataset factory pattern name {ds_name}."
)

@classmethod
def _resolve_dataset_config(
cls,
Expand All @@ -147,13 +184,7 @@ def _resolve_dataset_config(
cls._resolve_dataset_config(ds_name, pattern, value) for value in config
]
elif isinstance(config, str) and "}" in config:
try:
config = config.format_map(resolved_vars.named)
except KeyError as exc:
raise DatasetError(
f"Unable to resolve '{config}' from the pattern '{pattern}'. Keys used in the configuration "
f"should be present in the dataset factory pattern."
) from exc
config = config.format_map(resolved_vars.named)
return config

def list_patterns(self) -> list[str]:
Expand Down Expand Up @@ -192,6 +223,7 @@ def _extract_patterns(

for ds_name, ds_config in config.items():
if cls.is_pattern(ds_name):
cls._validate_pattern_config(ds_name, ds_config)
dataset_patterns[ds_name] = cls._resolve_credentials(
ds_config, credentials
)
Expand Down
7 changes: 3 additions & 4 deletions tests/io/test_data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -946,13 +946,12 @@ def test_unmatched_key_error_when_parsing_config(
self, config_with_dataset_factories_bad_pattern
):
"""Check error raised when key mentioned in the config is not in pattern name"""
catalog = DataCatalog.from_config(**config_with_dataset_factories_bad_pattern)
pattern = (
"Unable to resolve 'data/01_raw/{brand}_plane.pq' from the pattern '{type}@planes'. "
"Keys used in the configuration should be present in the dataset factory pattern."
"Incorrect dataset configuration provided. Keys used in the configuration {'{brand}'} "
"should present in the dataset factory pattern name {type}@planes."
)
with pytest.raises(DatasetError, match=re.escape(pattern)):
catalog._get_dataset("jet@planes")
_ = DataCatalog.from_config(**config_with_dataset_factories_bad_pattern)

def test_factory_config_versioned(
self, config_with_dataset_factories, filepath, dummy_dataframe
Expand Down