diff --git a/great_expectations/data_context/data_context.py b/great_expectations/data_context/data_context.py index 92204309f86b..dbd47f102fd2 100644 --- a/great_expectations/data_context/data_context.py +++ b/great_expectations/data_context/data_context.py @@ -3360,6 +3360,27 @@ def run_profiler_with_dynamic_arguments( include_citation=include_citation, ) + @usage_statistics_enabled_method( + event_name="data_context.run_profiler_on_data", + ) + def run_profiler_on_data( + self, + batch_request: Union[dict, BatchRequest, RuntimeBatchRequest], + name: Optional[str] = None, + ge_cloud_id: Optional[str] = None, + expectation_suite_name: Optional[str] = None, + include_citation: bool = True, + ) -> ExpectationSuite: + return RuleBasedProfiler.run_profiler_on_data( + data_context=self, + profiler_store=self.profiler_store, + batch_request=batch_request, + name=name, + ge_cloud_id=ge_cloud_id, + expectation_suite_name=expectation_suite_name, + include_citation=include_citation, + ) + def test_yaml_config( self, yaml_config: str, diff --git a/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py b/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py index 908f80956889..f587164bed4e 100644 --- a/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py +++ b/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py @@ -99,6 +99,10 @@ def name(self) -> str: def batch_request(self) -> Optional[Union[BatchRequest, RuntimeBatchRequest, dict]]: return self._batch_request + @batch_request.setter + def batch_request(self, batch_request: dict) -> None: + self._batch_request = batch_request + @property def data_context(self) -> "DataContext": # noqa: F821 return self._data_context diff --git a/great_expectations/rule_based_profiler/rule_based_profiler.py b/great_expectations/rule_based_profiler/rule_based_profiler.py index 07d7a70fd9d0..ac5d3118b5a1 100644 --- a/great_expectations/rule_based_profiler/rule_based_profiler.py +++ b/great_expectations/rule_based_profiler/rule_based_profiler.py @@ -1,4 +1,5 @@ import copy +import logging import uuid from typing import Any, Dict, List, Optional, Union @@ -7,6 +8,7 @@ BatchRequest, RuntimeBatchRequest, batch_request_contains_batch_data, + get_batch_request_as_dict, ) from great_expectations.core.config_peer import ConfigPeer from great_expectations.core.expectation_configuration import ExpectationConfiguration @@ -18,6 +20,7 @@ GeCloudIdentifier, ) from great_expectations.data_context.util import instantiate_class_from_config +from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.config.base import ( DomainBuilderConfig, ExpectationConfigurationBuilderConfig, @@ -43,6 +46,8 @@ ) from great_expectations.util import filter_properties_dict +logger = logging.getLogger(__name__) + def _validate_builder_override_config(builder_config: dict): """ @@ -814,6 +819,75 @@ def run_profiler( return result + @staticmethod + def run_profiler_on_data( + data_context: "DataContext", # noqa: F821 + profiler_store: ProfilerStore, + batch_request: Union[dict, BatchRequest, RuntimeBatchRequest], + name: Optional[str] = None, + ge_cloud_id: Optional[str] = None, + expectation_suite_name: Optional[str] = None, + include_citation: bool = True, + ) -> ExpectationSuite: + profiler: RuleBasedProfiler = RuleBasedProfiler.get_profiler( + data_context=data_context, + profiler_store=profiler_store, + name=name, + ge_cloud_id=ge_cloud_id, + ) + + rules: Dict[ + str, Dict[str, Any] + ] = profiler._generate_rule_overrides_from_batch_request(batch_request) + + result: ExpectationSuite = profiler.run( + rules=rules, + expectation_suite_name=expectation_suite_name, + include_citation=include_citation, + ) + return result + + def _generate_rule_overrides_from_batch_request( + self, batch_request: Union[dict, BatchRequest, RuntimeBatchRequest] + ) -> Dict[str, Dict[str, Any]]: + """Iterates through the profiler's builder attributes and generates a set of + Rules that contain overrides from the input batch request. This only applies to + ParameterBuilder and any DomainBuilder with a COLUMN MetricDomainType. + + Note that we are passing ALL batches to the parameter builder. If not used carefully, + a bias may creep in to the resulting estimates computed by these objects. + + Users of this override should be aware that a batch request should either have no + notion of "current/active" batch or it is excluded. + + Args: + batch_request: Data used to override builder attributes + + Returns: + The dictionary representation of the Rules used as runtime arguments to `run()` + """ + rules: List[Rule] = self.rules + if not isinstance(batch_request, dict): + batch_request = get_batch_request_as_dict(batch_request) + logger.info("Converted batch request to dictionary: %s", batch_request) + + resulting_rules: Dict[str, Dict[str, Any]] = {} + + for rule in rules: + domain_builder = rule.domain_builder + if domain_builder.domain_type == MetricDomainTypes.COLUMN: + domain_builder.batch_request = batch_request + domain_builder.batch_request["data_connector_query"] = {"index": -1} + + parameter_builders = rule.parameter_builders + if parameter_builders: + for parameter_builder in parameter_builders: + parameter_builder.batch_request = batch_request + + resulting_rules[rule.name] = rule.to_dict() + + return resulting_rules + @staticmethod def add_profiler( config: RuleBasedProfilerConfig, diff --git a/tests/data_context/test_data_context_profilers.py b/tests/data_context/test_data_context_profilers.py index 128f158eeae7..cf4eee5235d8 100644 --- a/tests/data_context/test_data_context_profilers.py +++ b/tests/data_context/test_data_context_profilers.py @@ -79,3 +79,39 @@ def test_run_profiler_with_dynamic_arguments_emits_proper_usage_stats( } ) ] + + +@mock.patch("great_expectations.rule_based_profiler.RuleBasedProfiler.run") +@mock.patch( + "great_expectations.core.usage_statistics.usage_statistics.UsageStatisticsHandler.emit" +) +def test_run_profiler_on_data_emits_proper_usage_stats( + mock_emit: mock.MagicMock, + mock_profiler_run: mock.MagicMock, + empty_data_context_stats_enabled: DataContext, + populated_profiler_store: ProfilerStore, + profiler_name: str, +): + with mock.patch( + "great_expectations.data_context.DataContext.profiler_store" + ) as mock_profiler_store: + mock_profiler_store.__get__ = mock.Mock(return_value=populated_profiler_store) + empty_data_context_stats_enabled.run_profiler_on_data( + name=profiler_name, + batch_request={ + "datasource_name": "my_datasource", + "data_connector_name": "my_data_connector", + "data_asset_name": "my_data_asset", + }, + ) + + assert mock_emit.call_count == 1 + assert mock_emit.call_args_list == [ + mock.call( + { + "event_payload": {}, + "event": "data_context.run_profiler_on_data", + "success": True, + } + ) + ] diff --git a/tests/rule_based_profiler/test_rule_based_profiler.py b/tests/rule_based_profiler/test_rule_based_profiler.py index e86b0e94efe5..2326187cd9f0 100644 --- a/tests/rule_based_profiler/test_rule_based_profiler.py +++ b/tests/rule_based_profiler/test_rule_based_profiler.py @@ -1,3 +1,4 @@ +import logging from typing import Any, Dict, List, Optional from unittest import mock @@ -5,6 +6,7 @@ import pytest import great_expectations.exceptions as ge_exceptions +from great_expectations.core.batch import BatchRequest from great_expectations.data_context.store.profiler_store import ProfilerStore from great_expectations.data_context.types.resource_identifiers import ( ConfigurationIdentifier, @@ -645,6 +647,88 @@ def test_run_profiler_with_dynamic_args( ) +@mock.patch("great_expectations.rule_based_profiler.RuleBasedProfiler.run") +@mock.patch("great_expectations.data_context.data_context.DataContext") +def test_run_profiler_on_data_emits_appropriate_logging( + mock_data_context: mock.MagicMock, + mock_profiler_run: mock.MagicMock, + populated_profiler_store: ProfilerStore, + profiler_name: str, + caplog: Any, +): + batch_request: BatchRequest = BatchRequest( + datasource_name="my_datasource", + data_connector_name="my_data_connector", + data_asset_name="my_data_asset", + ) + + with caplog.at_level(logging.INFO): + RuleBasedProfiler.run_profiler_on_data( + data_context=mock_data_context, + profiler_store=populated_profiler_store, + name=profiler_name, + batch_request=batch_request, + ) + + assert "Converted batch request" in caplog.text + + +@mock.patch("great_expectations.rule_based_profiler.RuleBasedProfiler.run") +@mock.patch("great_expectations.data_context.data_context.DataContext") +def test_run_profiler_on_data_creates_suite_with_dict_arg( + mock_data_context: mock.MagicMock, + mock_profiler_run: mock.MagicMock, + populated_profiler_store: ProfilerStore, + profiler_name: str, +): + batch_request: Dict[str, str] = { + "datasource_name": "my_datasource", + "data_connector_name": "my_data_connector", + "data_asset_name": "my_data_asset", + } + + RuleBasedProfiler.run_profiler_on_data( + data_context=mock_data_context, + profiler_store=populated_profiler_store, + name=profiler_name, + batch_request=batch_request, + ) + + assert mock_profiler_run.called + + rule = mock_profiler_run.call_args[1]["rules"]["rule_1"] + resulting_batch_request = rule["parameter_builders"][0]["batch_request"] + assert resulting_batch_request == batch_request + + +@mock.patch("great_expectations.rule_based_profiler.RuleBasedProfiler.run") +@mock.patch("great_expectations.data_context.data_context.DataContext") +def test_run_profiler_on_data_creates_suite_with_batch_request_arg( + mock_data_context: mock.MagicMock, + mock_profiler_run: mock.MagicMock, + populated_profiler_store: ProfilerStore, + profiler_name: str, +): + batch_request: BatchRequest = BatchRequest( + datasource_name="my_datasource", + data_connector_name="my_data_connector", + data_asset_name="my_data_asset", + ) + + RuleBasedProfiler.run_profiler_on_data( + data_context=mock_data_context, + profiler_store=populated_profiler_store, + name=profiler_name, + batch_request=batch_request, + ) + + assert mock_profiler_run.called + + rule = mock_profiler_run.call_args[1]["rules"]["rule_1"] + resulting_batch_request = rule["parameter_builders"][0]["batch_request"] + assert resulting_batch_request == batch_request.to_dict() + + @mock.patch("great_expectations.data_context.data_context.DataContext") def test_get_profiler_with_too_many_args_raises_error( mock_data_context: mock.MagicMock,