diff --git a/CHANGELOG.md b/CHANGELOG.md index 374e44c1976..710146a2def 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Added native python 're' module for regex in jinja templates [#2851](https://github.com/fishtown-analytics/dbt/pull/2851) - Store resolved node names in manifest ([#2647](https://github.com/fishtown-analytics/dbt/issues/2647), [#2837](https://github.com/fishtown-analytics/dbt/pull/2837)) - Save selectors dictionary to manifest, allow descriptions ([#2693](https://github.com/fishtown-analytics/dbt/issues/2693), [#2866](https://github.com/fishtown-analytics/dbt/pull/2866)) +- Normalize cli-style-strings in manifest selectors dictionary ([#2879](https://github.com/fishtown-anaytics/dbt/issues/2879), [#2895](https://github.com/fishtown-analytics/dbt/pull/2895)) ### Fixes - Respect --project-dir in dbt clean command ([#2840](https://github.com/fishtown-analytics/dbt/issues/2840), [#2841](https://github.com/fishtown-analytics/dbt/pull/2841)) diff --git a/core/dbt/config/project.py b/core/dbt/config/project.py index f8ea67fda1c..1de8756a127 100644 --- a/core/dbt/config/project.py +++ b/core/dbt/config/project.py @@ -25,6 +25,7 @@ from dbt.version import get_installed_version from dbt.utils import MultiDict from dbt.node_types import NodeType +from dbt.config.selectors import SelectorDict from dbt.contracts.project import ( Project as ProjectContract, @@ -369,15 +370,13 @@ def create_project(self, rendered: RenderComponents) -> 'Project': query_comment = _query_comment_from_cfg(cfg.query_comment) packages = package_config_from_data(rendered.packages_dict) + selectors = selector_config_from_data(rendered.selectors_dict) manifest_selectors: Dict[str, Any] = {} - if rendered.selectors_dict: + if rendered.selectors_dict and rendered.selectors_dict['selectors']: # this is a dict with a single key 'selectors' pointing to a list # of dicts. - if rendered.selectors_dict['selectors']: - # for each selector dict, transform into 'name': { } - for sel in rendered.selectors_dict['selectors']: - manifest_selectors[sel['name']] = sel - selectors = selector_config_from_data(rendered.selectors_dict) + manifest_selectors = SelectorDict.parse_from_selectors_list( + rendered.selectors_dict['selectors']) project = Project( project_name=name, diff --git a/core/dbt/config/selectors.py b/core/dbt/config/selectors.py index ad2604855d4..7b888732196 100644 --- a/core/dbt/config/selectors.py +++ b/core/dbt/config/selectors.py @@ -15,6 +15,7 @@ from dbt.contracts.selection import SelectorFile from dbt.exceptions import DbtSelectorsError, RuntimeException from dbt.graph import parse_from_selectors_definition, SelectionSpec +from dbt.graph.selector_spec import SelectionCriteria MALFORMED_SELECTOR_ERROR = """\ The selectors.yml file in this project is malformed. Please double check @@ -113,3 +114,67 @@ def selector_config_from_data( result_type='invalid_selector', ) from e return selectors + + +# These are utilities to clean up the dictionary created from +# selectors.yml by turning the cli-string format entries into +# normalized dictionary entries. It parallels the flow in +# dbt/graph/cli.py. If changes are made there, it might +# be necessary to make changes here. Ideally it would be +# good to combine the two flows into one at some point. +class SelectorDict: + + @classmethod + def parse_dict_definition(cls, definition): + key = list(definition)[0] + value = definition[key] + if isinstance(value, list): + new_values = [] + for sel_def in value: + new_value = cls.parse_from_definition(sel_def) + new_values.append(new_value) + value = new_values + if key == 'exclude': + definition = {key: value} + elif len(definition) == 1: + definition = {'method': key, 'value': value} + return definition + + @classmethod + def parse_a_definition(cls, def_type, definition): + # this definition must be a list + new_dict = {def_type: []} + for sel_def in definition[def_type]: + if isinstance(sel_def, dict): + sel_def = cls.parse_from_definition(sel_def) + new_dict[def_type].append(sel_def) + elif isinstance(sel_def, str): + sel_def = SelectionCriteria.dict_from_single_spec(sel_def) + new_dict[def_type].append(sel_def) + else: + new_dict[def_type].append(sel_def) + return new_dict + + @classmethod + def parse_from_definition(cls, definition): + if isinstance(definition, str): + definition = SelectionCriteria.dict_from_single_spec(definition) + elif 'union' in definition: + definition = cls.parse_a_definition('union', definition) + elif 'intersection' in definition: + definition = cls.parse_a_definition('intersection', definition) + elif isinstance(definition, dict): + definition = cls.parse_dict_definition(definition) + return definition + + # This is the normal entrypoint of this code. Give it the + # list of selectors generated from the selectors.yml file. + @classmethod + def parse_from_selectors_list(cls, selectors): + selector_dict = {} + for selector in selectors: + sel_name = selector['name'] + selector_dict[sel_name] = selector + definition = cls.parse_from_definition(selector['definition']) + selector_dict[sel_name]['definition'] = definition + return selector_dict diff --git a/core/dbt/graph/selector_spec.py b/core/dbt/graph/selector_spec.py index c5d13905bfd..417696b2056 100644 --- a/core/dbt/graph/selector_spec.py +++ b/core/dbt/graph/selector_spec.py @@ -123,6 +123,26 @@ def from_dict(cls, raw: Any, dct: Dict[str, Any]) -> 'SelectionCriteria': children_depth=children_depth, ) + @classmethod + def dict_from_single_spec(cls, raw: str): + result = RAW_SELECTOR_PATTERN.match(raw) + if result is None: + return {'error': 'Invalid selector spec'} + dct: Dict[str, Any] = result.groupdict() + method_name, method_arguments = cls.parse_method(dct) + meth_name = str(method_name) + if method_arguments: + meth_name = meth_name + '.' + '.'.join(method_arguments) + dct['method'] = meth_name + dct = {k: v for k, v in dct.items() if (v is not None and v != '')} + if 'childrens_parents' in dct: + dct['childrens_parents'] = bool(dct.get('childrens_parents')) + if 'parents' in dct: + dct['parents'] = bool(dct.get('parents')) + if 'children' in dct: + dct['children'] = bool(dct.get('children')) + return dct + @classmethod def from_single_spec(cls, raw: str) -> 'SelectionCriteria': result = RAW_SELECTOR_PATTERN.match(raw) diff --git a/test/unit/test_manifest_selectors.py b/test/unit/test_manifest_selectors.py new file mode 100644 index 00000000000..d7e7c3d1fe8 --- /dev/null +++ b/test/unit/test_manifest_selectors.py @@ -0,0 +1,115 @@ +import dbt.exceptions +import textwrap +import yaml +import unittest +from dbt.config.selectors import SelectorDict + + +def get_selector_dict(txt: str) -> dict: + txt = textwrap.dedent(txt) + dct = yaml.safe_load(txt) + return dct + + +class SelectorUnitTest(unittest.TestCase): + + def test_compare_cli_non_cli(self): + dct = get_selector_dict('''\ + selectors: + - name: nightly_diet_snowplow + description: "This uses more CLI-style syntax" + definition: + union: + - intersection: + - '@source:snowplow' + - 'tag:nightly' + - 'models/export' + - exclude: + - intersection: + - 'package:snowplow' + - 'config.materialized:incremental' + - export_performance_timing + - name: nightly_diet_snowplow_full + description: "This is a fuller YAML specification" + definition: + union: + - intersection: + - method: source + value: snowplow + childrens_parents: true + - method: tag + value: nightly + - method: path + value: models/export + - exclude: + - intersection: + - method: package + value: snowplow + - method: config.materialized + value: incremental + - method: fqn + value: export_performance_timing + ''') + + sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors']) + assert(sel_dict) + with_strings = sel_dict['nightly_diet_snowplow']['definition'] + no_strings = sel_dict['nightly_diet_snowplow_full']['definition'] + self.assertEqual(with_strings, no_strings) + + def test_single_string_definition(self): + dct = get_selector_dict('''\ + selectors: + - name: nightly_selector + definition: + 'tag:nightly' + ''') + + sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors']) + assert(sel_dict) + expected = {'method': 'tag', 'value': 'nightly'} + definition = sel_dict['nightly_selector']['definition'] + self.assertEqual(expected, definition) + + + def test_single_key_value_definition(self): + dct = get_selector_dict('''\ + selectors: + - name: nightly_selector + definition: + tag: nightly + ''') + + sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors']) + assert(sel_dict) + expected = {'method': 'tag', 'value': 'nightly'} + definition = sel_dict['nightly_selector']['definition'] + self.assertEqual(expected, definition) + + def test_parent_definition(self): + dct = get_selector_dict('''\ + selectors: + - name: kpi_nightly_selector + definition: + '+exposure:kpi_nightly' + ''') + + sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors']) + assert(sel_dict) + expected = {'method': 'exposure', 'value': 'kpi_nightly', 'parents': True} + definition = sel_dict['kpi_nightly_selector']['definition'] + self.assertEqual(expected, definition) + + def test_plus_definition(self): + dct = get_selector_dict('''\ + selectors: + - name: my_model_children_selector + definition: + 'my_model+2' + ''') + + sel_dict = SelectorDict.parse_from_selectors_list(dct['selectors']) + assert(sel_dict) + expected = {'method': 'fqn', 'value': 'my_model', 'children': True, 'children_depth': '2'} + definition = sel_dict['my_model_children_selector']['definition'] + self.assertEqual(expected, definition)