Skip to content

Commit

Permalink
[MAINTENANCE] Rule-Based Profiler prerequisite: fix quantiles profile…
Browse files Browse the repository at this point in the history
…r configuration and add comments (#4255)
  • Loading branch information
alexsherstinsky authored Feb 22, 2022
1 parent 48bf966 commit 995a2e4
Show file tree
Hide file tree
Showing 10 changed files with 51 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ def __init__(
def domain_type(self) -> Union[str, MetricDomainTypes]:
return MetricDomainTypes.COLUMN

"""
All DomainBuilder classes, whose "domain_type" property equals "MetricDomainTypes.COLUMN", must extend present class
(ColumnDomainBuilder) in order to provide full getter/setter accessor for "column_names" property (as override).
"""

@property
def column_names(self) -> List[str]:
return self._column_names
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,16 @@ def domain_type(self) -> Union[str, MetricDomainTypes]:
def batch_request(self) -> Optional[Union[BatchRequest, RuntimeBatchRequest, dict]]:
return self._batch_request

"""
Full getter/setter accessors for "batch_request" and "batch" are for configuring DomainBuilder dynamically.
"""

@batch_request.setter
def batch_request(
self, value: Union[BatchRequest, RuntimeBatchRequest, dict]
) -> None:
self._batch_request = value

@property
def data_context(self) -> "DataContext": # noqa: F821
return self._data_context

@property
def batch(self) -> Optional[Batch]:
return self._batch
Expand All @@ -86,6 +86,10 @@ def batch(self) -> Optional[Batch]:
def batch(self, value: Batch) -> None:
self._batch = value

@property
def data_context(self) -> "DataContext": # noqa: F821
return self._data_context

@abstractmethod
def _get_domains(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(
if not isinstance(meta, dict):
raise ge_exceptions.ProfilerExecutionError(
message=f"""Argument "{meta}" in "{self.__class__.__name__}" must be of type "dictionary" \
(value of type "{str(type())}" was encountered).
(value of type "{str(type(meta))}" was encountered).
"""
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def __init__(

self._reduce_scalar_metric = reduce_scalar_metric

"""
Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically.
"""

@property
def metric_name(self) -> str:
return self._metric_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ def __init__(

self._truncate_values = truncate_values

"""
Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically.
"""

@property
def metric_name(self) -> str:
return self._metric_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ def build_parameters(
def name(self) -> str:
return self._name

"""
Full getter/setter accessors for "batch_request" and "batch_list" are for configuring ParameterBuilder dynamically.
"""

@property
def batch_request(self) -> Optional[Union[BatchRequest, RuntimeBatchRequest, dict]]:
return self._batch_request
Expand All @@ -110,10 +114,6 @@ def batch_request(
) -> None:
self._batch_request = value

@property
def data_context(self) -> "DataContext": # noqa: F821
return self._data_context

@property
def batch_list(self) -> Optional[List[Batch]]:
return self._batch_list
Expand All @@ -122,6 +122,10 @@ def batch_list(self) -> Optional[List[Batch]]:
def batch_list(self, value: List[Batch]) -> None:
self._batch_list = value

@property
def data_context(self) -> "DataContext": # noqa: F821
return self._data_context

@abstractmethod
def _build_parameters(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ def __init__(

self._candidate_regexes = candidate_regexes

"""
Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically.
"""

@property
def metric_domain_kwargs(self) -> Optional[Union[str, dict]]:
return self._metric_domain_kwargs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ def __init__(

self._candidate_strings = candidate_strings

"""
Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically.
"""

@property
def metric_domain_kwargs(self) -> Optional[Union[str, dict]]:
return self._metric_domain_kwargs
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from numbers import Number
from typing import Any, Dict, List, cast
from typing import Any, Dict, List, Tuple, cast

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -361,7 +361,7 @@ def test_quentin_profiler_user_workflow_multi_batch_quantiles_value_ranges_rule(
expect_column_quantile_values_to_be_between_expectation_configurations_value_ranges_by_column: Dict[
str, List[List[Number]]
] = {
column_name: expectation_kwargs["value_ranges"]
column_name: expectation_kwargs["quantile_ranges"]["value_ranges"]
for column_name, expectation_kwargs in expect_column_quantile_values_to_be_between_expectation_configurations_kwargs_dict.items()
}

Expand All @@ -379,7 +379,7 @@ def test_quentin_profiler_user_workflow_multi_batch_quantiles_value_ranges_rule(
rtol: float = 1.0e-7
atol: float = 5.0e-2

value_range: List[Number]
value_ranges: List[Tuple[Tuple[float, float]]]
paired_quantiles: zip
column_quantiles: List[List[Number]]
idx: int
Expand All @@ -395,12 +395,12 @@ def test_quentin_profiler_user_workflow_multi_batch_quantiles_value_ranges_rule(
"expect_column_quantile_values_to_be_between_quantile_ranges_by_column"
][column_name],
)
for value_range in list(paired_quantiles):
for value_ranges in list(paired_quantiles):
for idx in range(2):
np.testing.assert_allclose(
actual=value_range[0][idx],
desired=value_range[1][idx],
actual=value_ranges[0][idx],
desired=value_ranges[1][idx],
rtol=rtol,
atol=atol,
err_msg=f"Actual value of {value_range[0][idx]} differs from expected value of {value_range[1][idx]} by more than {atol + rtol * abs(value_range[1][idx])} tolerance.",
err_msg=f"Actual value of {value_ranges[0][idx]} differs from expected value of {value_ranges[1][idx]} by more than {atol + rtol * abs(value_ranges[1][idx])} tolerance.",
)
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ variables:
- 2.5e-1
- 5.0e-1
- 7.5e-1
allow_relative_error: linear
num_bootstrap_samples: 9139

# BatchRequest yielding thirty five (35) batches (January, 2018 -- November, 2020 trip data)
Expand Down Expand Up @@ -42,15 +43,17 @@ rules:
metric_domain_kwargs: $domain.domain_kwargs
metric_value_kwargs:
quantiles: $variables.quantiles
allow_relative_error: linear
allow_relative_error: $variables.allow_relative_error
false_positive_rate: $variables.false_positive_rate
num_bootstrap_samples: $variables.num_bootstrap_samples
expectation_configuration_builders:
- expectation_type: expect_column_quantile_values_to_be_between
class_name: DefaultExpectationConfigurationBuilder
module_name: great_expectations.rule_based_profiler.expectation_configuration_builder
column: $domain.domain_kwargs.column
quantiles: $variables.quantiles
value_ranges: $parameter.quantile_value_ranges.value.value_range
quantile_ranges:
quantiles: $variables.quantiles
value_ranges: $parameter.quantile_value_ranges.value.value_range
allow_relative_error: $variables.allow_relative_error
meta:
profiler_details: $parameter.quantile_value_ranges.details

0 comments on commit 995a2e4

Please sign in to comment.