Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINTENANCE] Rule-Based Profiler prerequisite: fix quantiles profiler configuration and add comments #4255

Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ def __init__(
def domain_type(self) -> Union[str, MetricDomainTypes]:
return MetricDomainTypes.COLUMN

"""
All DomainBuilder classes, whose "domain_type" property equals "MetricDomainTypes.COLUMN", must extend present class
(ColumnDomainBuilder) in order to provide full getter/setter accessor for "column_names" property (as override).
"""

@property
def column_names(self) -> List[str]:
return self._column_names
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,16 @@ def domain_type(self) -> Union[str, MetricDomainTypes]:
def batch_request(self) -> Optional[Union[BatchRequest, RuntimeBatchRequest, dict]]:
return self._batch_request

"""
Full getter/setter accessors for "batch_request" and "batch" are for configuring DomainBuilder dynamically.
"""

@batch_request.setter
def batch_request(
self, value: Union[BatchRequest, RuntimeBatchRequest, dict]
) -> None:
self._batch_request = value

@property
def data_context(self) -> "DataContext": # noqa: F821
return self._data_context

@property
def batch(self) -> Optional[Batch]:
return self._batch
Expand All @@ -86,6 +86,10 @@ def batch(self) -> Optional[Batch]:
def batch(self, value: Batch) -> None:
self._batch = value

@property
def data_context(self) -> "DataContext": # noqa: F821
return self._data_context

@abstractmethod
def _get_domains(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def __init__(

self._reduce_scalar_metric = reduce_scalar_metric

"""
Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically.
"""

@property
def metric_name(self) -> str:
return self._metric_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ def __init__(

self._truncate_values = truncate_values

"""
Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically.
"""

@property
def metric_name(self) -> str:
return self._metric_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ def build_parameters(
def name(self) -> str:
return self._name

"""
Full getter/setter accessors for "batch_request" and "batch_list" are for configuring ParameterBuilder dynamically.
"""

@property
def batch_request(self) -> Optional[Union[BatchRequest, RuntimeBatchRequest, dict]]:
return self._batch_request
Expand All @@ -110,10 +114,6 @@ def batch_request(
) -> None:
self._batch_request = value

@property
def data_context(self) -> "DataContext": # noqa: F821
return self._data_context

@property
def batch_list(self) -> Optional[List[Batch]]:
return self._batch_list
Expand All @@ -122,6 +122,10 @@ def batch_list(self) -> Optional[List[Batch]]:
def batch_list(self, value: List[Batch]) -> None:
self._batch_list = value

@property
def data_context(self) -> "DataContext": # noqa: F821
return self._data_context

@abstractmethod
def _build_parameters(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ def __init__(

self._candidate_regexes = candidate_regexes

"""
Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically.
"""

@property
def metric_domain_kwargs(self) -> Optional[Union[str, dict]]:
return self._metric_domain_kwargs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ def __init__(

self._candidate_strings = candidate_strings

"""
Full getter/setter accessors for needed properties are for configuring MetricMultiBatchParameterBuilder dynamically.
"""

@property
def metric_domain_kwargs(self) -> Optional[Union[str, dict]]:
return self._metric_domain_kwargs
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from numbers import Number
from typing import Any, Dict, List, cast
from typing import Any, Dict, List, Tuple, cast

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -361,7 +361,7 @@ def test_quentin_profiler_user_workflow_multi_batch_quantiles_value_ranges_rule(
expect_column_quantile_values_to_be_between_expectation_configurations_value_ranges_by_column: Dict[
str, List[List[Number]]
] = {
column_name: expectation_kwargs["value_ranges"]
column_name: expectation_kwargs["quantile_ranges"]["value_ranges"]
for column_name, expectation_kwargs in expect_column_quantile_values_to_be_between_expectation_configurations_kwargs_dict.items()
}

Expand All @@ -379,7 +379,7 @@ def test_quentin_profiler_user_workflow_multi_batch_quantiles_value_ranges_rule(
rtol: float = 1.0e-7
atol: float = 5.0e-2

value_range: List[Number]
value_ranges: List[Tuple[Tuple[float, float]]]
paired_quantiles: zip
column_quantiles: List[List[Number]]
idx: int
Expand All @@ -395,12 +395,12 @@ def test_quentin_profiler_user_workflow_multi_batch_quantiles_value_ranges_rule(
"expect_column_quantile_values_to_be_between_quantile_ranges_by_column"
][column_name],
)
for value_range in list(paired_quantiles):
for value_ranges in list(paired_quantiles):
for idx in range(2):
np.testing.assert_allclose(
actual=value_range[0][idx],
desired=value_range[1][idx],
actual=value_ranges[0][idx],
desired=value_ranges[1][idx],
rtol=rtol,
atol=atol,
err_msg=f"Actual value of {value_range[0][idx]} differs from expected value of {value_range[1][idx]} by more than {atol + rtol * abs(value_range[1][idx])} tolerance.",
err_msg=f"Actual value of {value_ranges[0][idx]} differs from expected value of {value_ranges[1][idx]} by more than {atol + rtol * abs(value_ranges[1][idx])} tolerance.",
)
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ variables:
- 2.5e-1
- 5.0e-1
- 7.5e-1
allow_relative_error: linear
num_bootstrap_samples: 9139

# BatchRequest yielding thirty five (35) batches (January, 2018 -- November, 2020 trip data)
Expand Down Expand Up @@ -42,15 +43,17 @@ rules:
metric_domain_kwargs: $domain.domain_kwargs
metric_value_kwargs:
quantiles: $variables.quantiles
allow_relative_error: linear
allow_relative_error: $variables.allow_relative_error
Comment on lines -45 to +46
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @alexsherstinsky one question about this value : where did this change come from?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From our synchronous discussion: previous version specified configuration incorrectly — detected upon studying https://greatexpectations.io/expectations/expect_column_quantile_values_to_be_between closely

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Shinnnyshinshin In addition: I put that value into the variables section so that it could be changed, if needed, more easily. Thanks!

false_positive_rate: $variables.false_positive_rate
num_bootstrap_samples: $variables.num_bootstrap_samples
expectation_configuration_builders:
- expectation_type: expect_column_quantile_values_to_be_between
class_name: DefaultExpectationConfigurationBuilder
module_name: great_expectations.rule_based_profiler.expectation_configuration_builder
column: $domain.domain_kwargs.column
quantiles: $variables.quantiles
value_ranges: $parameter.quantile_value_ranges.value.value_range
quantile_ranges:
quantiles: $variables.quantiles
value_ranges: $parameter.quantile_value_ranges.value.value_range
allow_relative_error: $variables.allow_relative_error
meta:
profiler_details: $parameter.quantile_value_ranges.details