From a4ac3715bebda9bc292468c5a9ad327fa0e883ca Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Tue, 22 Feb 2022 10:36:41 -0800 Subject: [PATCH 1/4] WIP --- .../quentin_user_workflow_verbose_profiler_config.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_fixtures/rule_based_profiler/quentin_user_workflow_verbose_profiler_config.yml b/tests/test_fixtures/rule_based_profiler/quentin_user_workflow_verbose_profiler_config.yml index 775459065322..cdb57aec9317 100644 --- a/tests/test_fixtures/rule_based_profiler/quentin_user_workflow_verbose_profiler_config.yml +++ b/tests/test_fixtures/rule_based_profiler/quentin_user_workflow_verbose_profiler_config.yml @@ -9,6 +9,7 @@ variables: - 2.5e-1 - 5.0e-1 - 7.5e-1 + allow_relative_error: linear num_bootstrap_samples: 9139 # BatchRequest yielding thirty five (35) batches (January, 2018 -- November, 2020 trip data) @@ -42,7 +43,7 @@ rules: metric_domain_kwargs: $domain.domain_kwargs metric_value_kwargs: quantiles: $variables.quantiles - allow_relative_error: linear + allow_relative_error: $variables.allow_relative_error false_positive_rate: $variables.false_positive_rate num_bootstrap_samples: $variables.num_bootstrap_samples expectation_configuration_builders: @@ -50,7 +51,9 @@ rules: class_name: DefaultExpectationConfigurationBuilder module_name: great_expectations.rule_based_profiler.expectation_configuration_builder column: $domain.domain_kwargs.column - quantiles: $variables.quantiles - value_ranges: $parameter.quantile_value_ranges.value.value_range + quantile_ranges: + quantiles: $variables.quantiles + value_ranges: $parameter.quantile_value_ranges.value.value_range + allow_relative_error: $variables.allow_relative_error meta: profiler_details: $parameter.quantile_value_ranges.details From 52960de4b3bf5f7ff421b5bbbf5b1f51d7dd8365 Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Tue, 22 Feb 2022 10:43:32 -0800 Subject: [PATCH 2/4] fix quantile test configuration --- .../test_profiler_user_workflows.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/profiling/rule_based_profilers/test_profiler_user_workflows.py b/tests/integration/profiling/rule_based_profilers/test_profiler_user_workflows.py index 4943599ecc9f..6d839b0078af 100644 --- a/tests/integration/profiling/rule_based_profilers/test_profiler_user_workflows.py +++ b/tests/integration/profiling/rule_based_profilers/test_profiler_user_workflows.py @@ -1,6 +1,6 @@ import datetime from numbers import Number -from typing import Any, Dict, List, cast +from typing import Any, Dict, List, Tuple, cast import numpy as np import pandas as pd @@ -361,7 +361,7 @@ def test_quentin_profiler_user_workflow_multi_batch_quantiles_value_ranges_rule( expect_column_quantile_values_to_be_between_expectation_configurations_value_ranges_by_column: Dict[ str, List[List[Number]] ] = { - column_name: expectation_kwargs["value_ranges"] + column_name: expectation_kwargs["quantile_ranges"]["value_ranges"] for column_name, expectation_kwargs in expect_column_quantile_values_to_be_between_expectation_configurations_kwargs_dict.items() } @@ -379,7 +379,7 @@ def test_quentin_profiler_user_workflow_multi_batch_quantiles_value_ranges_rule( rtol: float = 1.0e-7 atol: float = 5.0e-2 - value_range: List[Number] + value_ranges: List[Tuple[Tuple[float, float]]] paired_quantiles: zip column_quantiles: List[List[Number]] idx: int @@ -395,12 +395,12 @@ def test_quentin_profiler_user_workflow_multi_batch_quantiles_value_ranges_rule( "expect_column_quantile_values_to_be_between_quantile_ranges_by_column" ][column_name], ) - for value_range in list(paired_quantiles): + for value_ranges in list(paired_quantiles): for idx in range(2): np.testing.assert_allclose( - actual=value_range[0][idx], - desired=value_range[1][idx], + actual=value_ranges[0][idx], + desired=value_ranges[1][idx], rtol=rtol, atol=atol, - err_msg=f"Actual value of {value_range[0][idx]} differs from expected value of {value_range[1][idx]} by more than {atol + rtol * abs(value_range[1][idx])} tolerance.", + err_msg=f"Actual value of {value_ranges[0][idx]} differs from expected value of {value_ranges[1][idx]} by more than {atol + rtol * abs(value_ranges[1][idx])} tolerance.", ) From f68529e0d385a95abf09359a6c7d27c99699d26e Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Tue, 22 Feb 2022 11:04:23 -0800 Subject: [PATCH 3/4] adding docstrings for properties --- .../domain_builder/column_domain_builder.py | 5 +++++ .../domain_builder/domain_builder.py | 12 ++++++++---- .../metric_multi_batch_parameter_builder.py | 4 ++++ ...ric_metric_range_multi_batch_parameter_builder.py | 4 ++++ .../parameter_builder/parameter_builder.py | 12 ++++++++---- .../regex_pattern_string_parameter_builder.py | 4 ++++ .../simple_date_format_string_parameter_builder.py | 4 ++++ 7 files changed, 37 insertions(+), 8 deletions(-) diff --git a/great_expectations/rule_based_profiler/domain_builder/column_domain_builder.py b/great_expectations/rule_based_profiler/domain_builder/column_domain_builder.py index 840a17aabe46..045d8cd7186f 100644 --- a/great_expectations/rule_based_profiler/domain_builder/column_domain_builder.py +++ b/great_expectations/rule_based_profiler/domain_builder/column_domain_builder.py @@ -34,6 +34,11 @@ def __init__( def domain_type(self) -> Union[str, MetricDomainTypes]: return MetricDomainTypes.COLUMN + """ + All DomainBuilder classes, whose "domain_type" property equals "MetricDomainTypes.COLUMN", must extend present class + (ColumnDomainBuilder) in order to provide full getter/setter accessor for "column_names" property (as override). + """ + @property def column_names(self) -> List[str]: return self._column_names diff --git a/great_expectations/rule_based_profiler/domain_builder/domain_builder.py b/great_expectations/rule_based_profiler/domain_builder/domain_builder.py index a78bac08e093..cb43041dc862 100644 --- a/great_expectations/rule_based_profiler/domain_builder/domain_builder.py +++ b/great_expectations/rule_based_profiler/domain_builder/domain_builder.py @@ -68,16 +68,16 @@ def domain_type(self) -> Union[str, MetricDomainTypes]: def batch_request(self) -> Optional[Union[BatchRequest, RuntimeBatchRequest, dict]]: return self._batch_request + """ + Full getter/setter accessors for "batch_request" and "batch" are for configuring DomainBuilder dynamically. + """ + @batch_request.setter def batch_request( self, value: Union[BatchRequest, RuntimeBatchRequest, dict] ) -> None: self._batch_request = value - @property - def data_context(self) -> "DataContext": # noqa: F821 - return self._data_context - @property def batch(self) -> Optional[Batch]: return self._batch @@ -86,6 +86,10 @@ def batch(self) -> Optional[Batch]: def batch(self, value: Batch) -> None: self._batch = value + @property + def data_context(self) -> "DataContext": # noqa: F821 + return self._data_context + @abstractmethod def _get_domains( self, diff --git a/great_expectations/rule_based_profiler/parameter_builder/metric_multi_batch_parameter_builder.py b/great_expectations/rule_based_profiler/parameter_builder/metric_multi_batch_parameter_builder.py index b87ae526bedc..80f3a66e442f 100644 --- a/great_expectations/rule_based_profiler/parameter_builder/metric_multi_batch_parameter_builder.py +++ b/great_expectations/rule_based_profiler/parameter_builder/metric_multi_batch_parameter_builder.py @@ -69,6 +69,10 @@ def __init__( self._reduce_scalar_metric = reduce_scalar_metric + """ + Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically. + """ + @property def metric_name(self) -> str: return self._metric_name diff --git a/great_expectations/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py b/great_expectations/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py index 9b32ba1632c6..51c46d5d46a9 100644 --- a/great_expectations/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py +++ b/great_expectations/rule_based_profiler/parameter_builder/numeric_metric_range_multi_batch_parameter_builder.py @@ -145,6 +145,10 @@ def __init__( self._truncate_values = truncate_values + """ + Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically. + """ + @property def metric_name(self) -> str: return self._metric_name diff --git a/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py b/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py index 95b3b4ebec84..168fe1d1af7c 100644 --- a/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py +++ b/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py @@ -100,6 +100,10 @@ def build_parameters( def name(self) -> str: return self._name + """ + Full getter/setter accessors for "batch_request" and "batch_list" are for configuring ParameterBuilder dynamically. + """ + @property def batch_request(self) -> Optional[Union[BatchRequest, RuntimeBatchRequest, dict]]: return self._batch_request @@ -110,10 +114,6 @@ def batch_request( ) -> None: self._batch_request = value - @property - def data_context(self) -> "DataContext": # noqa: F821 - return self._data_context - @property def batch_list(self) -> Optional[List[Batch]]: return self._batch_list @@ -122,6 +122,10 @@ def batch_list(self) -> Optional[List[Batch]]: def batch_list(self, value: List[Batch]) -> None: self._batch_list = value + @property + def data_context(self) -> "DataContext": # noqa: F821 + return self._data_context + @abstractmethod def _build_parameters( self, diff --git a/great_expectations/rule_based_profiler/parameter_builder/regex_pattern_string_parameter_builder.py b/great_expectations/rule_based_profiler/parameter_builder/regex_pattern_string_parameter_builder.py index a25bc78f3077..01adb8c061f9 100644 --- a/great_expectations/rule_based_profiler/parameter_builder/regex_pattern_string_parameter_builder.py +++ b/great_expectations/rule_based_profiler/parameter_builder/regex_pattern_string_parameter_builder.py @@ -78,6 +78,10 @@ def __init__( self._candidate_regexes = candidate_regexes + """ + Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically. + """ + @property def metric_domain_kwargs(self) -> Optional[Union[str, dict]]: return self._metric_domain_kwargs diff --git a/great_expectations/rule_based_profiler/parameter_builder/simple_date_format_string_parameter_builder.py b/great_expectations/rule_based_profiler/parameter_builder/simple_date_format_string_parameter_builder.py index 99feeccba532..3972fef76b4a 100644 --- a/great_expectations/rule_based_profiler/parameter_builder/simple_date_format_string_parameter_builder.py +++ b/great_expectations/rule_based_profiler/parameter_builder/simple_date_format_string_parameter_builder.py @@ -123,6 +123,10 @@ def __init__( self._candidate_strings = candidate_strings + """ + Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically. + """ + @property def metric_domain_kwargs(self) -> Optional[Union[str, dict]]: return self._metric_domain_kwargs From 127000e33b2198b1a50e5c8844ba1d5d7123b95f Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Tue, 22 Feb 2022 11:27:44 -0800 Subject: [PATCH 4/4] typo --- .../default_expectation_configuration_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/great_expectations/rule_based_profiler/expectation_configuration_builder/default_expectation_configuration_builder.py b/great_expectations/rule_based_profiler/expectation_configuration_builder/default_expectation_configuration_builder.py index 1a2f6941e27f..3f5f587bffa8 100644 --- a/great_expectations/rule_based_profiler/expectation_configuration_builder/default_expectation_configuration_builder.py +++ b/great_expectations/rule_based_profiler/expectation_configuration_builder/default_expectation_configuration_builder.py @@ -37,7 +37,7 @@ def __init__( if not isinstance(meta, dict): raise ge_exceptions.ProfilerExecutionError( message=f"""Argument "{meta}" in "{self.__class__.__name__}" must be of type "dictionary" \ -(value of type "{str(type())}" was encountered). +(value of type "{str(type(meta))}" was encountered). """ )