Skip to content

Commit

Permalink
Revert "Makes data validator API consistent with other type-checking"
Browse files Browse the repository at this point in the history
This reverts commit d903745.
  • Loading branch information
skrawcz committed May 25, 2023
1 parent 5cea4a7 commit 965ce96
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 51 deletions.
24 changes: 5 additions & 19 deletions hamilton/data_quality/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,13 @@ def __init__(self, importance: str):
def importance(self) -> DataValidationLevel:
return self._importance

@classmethod
def applies_to(cls, datatype: Type[Type]) -> bool:
"""Whether or not this data validator can apply to the specified dataset.
Note that overriding this is not the intended API (it was the old one),
but this will be a stable part of the API moving forward, at least until
Hamilton 2.0.
@abc.abstractmethod
def applies_to(self, datatype: Type[Type]) -> bool:
"""Whether or not this data validator can apply to the specified dataset
:param datatype: Datatype to validate.
:param datatype:
:return: True if it can be run on the specified type, false otherwise
"""
for type_ in cls.applicable_types():
if type_ == Any or issubclass(type_, datatype):
return True
return False

@classmethod
def applicable_types(cls) -> List[type]:
"""Returns the list of classes for which this is valid.
:return: List of classes
"""
pass

@abc.abstractmethod
Expand Down Expand Up @@ -132,7 +118,7 @@ def __init__(self, importance: str):

@classmethod
@abc.abstractmethod
def applicable_types(cls) -> List[type]:
def applies_to(cls, datatype: Type[Type]) -> bool:
pass

@abc.abstractmethod
Expand Down
42 changes: 22 additions & 20 deletions hamilton/data_quality/default_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def arg(cls) -> str:
return "range"

@classmethod
def applicable_types(cls) -> List[type]:
return [pd.Series]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(datatype, pd.Series) # TODO -- handle dataframes?

def description(self) -> str:
return f"Validates that the datapoint falls within the range ({self.range[0]}, {self.range[1]})"
Expand Down Expand Up @@ -69,8 +69,8 @@ def arg(cls) -> str:
return "values_in"

@classmethod
def applicable_types(cls) -> List[type]:
return [pd.Series]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(datatype, pd.Series) # TODO -- handle dataframes?

def description(self) -> str:
return f"Validates that all data points are from a fixed set of values: ({self.values}), ignoring NA values."
Expand Down Expand Up @@ -113,8 +113,8 @@ def __init__(self, range: Tuple[numbers.Real, numbers.Real], importance: str):
self.range = range

@classmethod
def applicable_types(cls) -> List[type]:
return [numbers.Real]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(datatype, numbers.Real)

def description(self) -> str:
return f"Validates that the datapoint falls within the range ({self.range[0]}, {self.range[1]})"
Expand Down Expand Up @@ -151,8 +151,10 @@ def arg(cls) -> str:
return "values_in"

@classmethod
def applicable_types(cls) -> List[type]:
return [numbers.Real, str]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(datatype, numbers.Real) or issubclass(
datatype, str
) # TODO support list, dict and typing.* variants

def description(self) -> str:
return f"Validates that python values are from a fixed set of values: ({self.values})."
Expand Down Expand Up @@ -187,8 +189,8 @@ def _to_percent(fraction: float):
return "{0:.2%}".format(fraction)

@classmethod
def applicable_types(cls) -> List[type]:
return [pd.Series]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(datatype, pd.Series)

def description(self) -> str:
return f"Validates that no more than {MaxFractionNansValidatorPandasSeries._to_percent(self.max_fraction_nans)} of the data is Nan."
Expand Down Expand Up @@ -249,8 +251,8 @@ def __init__(self, data_type: Type[Type], importance: str):
self.datatype = data_type

@classmethod
def applicable_types(cls) -> List[type]:
return [pd.Series]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(datatype, pd.Series)

def description(self) -> str:
return f"Validates that the datatype of the pandas series is a subclass of: {self.datatype}"
Expand Down Expand Up @@ -280,8 +282,8 @@ def __init__(self, data_type: Type[Type], importance: str):
self.datatype = data_type

@classmethod
def applicable_types(cls) -> List[type]:
return [numbers.Real, str, bool, int, float, list, dict]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(datatype, numbers.Real) or datatype in (str, bool)

def description(self) -> str:
return f"Validates that the datatype of the pandas series is a subclass of: {self.datatype}"
Expand Down Expand Up @@ -310,8 +312,8 @@ def __init__(self, max_standard_dev: float, importance: str):
self.max_standard_dev = max_standard_dev

@classmethod
def applicable_types(cls) -> List[type]:
return [pd.Series]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(datatype, pd.Series)

def description(self) -> str:
return f"Validates that the standard deviation of a pandas series is no greater than : {self.max_standard_dev}"
Expand All @@ -338,8 +340,8 @@ def __init__(self, mean_in_range: Tuple[float, float], importance: str):
self.mean_in_range = mean_in_range

@classmethod
def applicable_types(cls) -> List[type]:
return [pd.Series]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(datatype, pd.Series)

def description(self) -> str:
return f"Validates that a pandas series has mean in range [{self.mean_in_range[0]}, {self.mean_in_range[1]}]"
Expand All @@ -366,8 +368,8 @@ def __init__(self, allow_none: bool, importance: str):
self.allow_none = allow_none

@classmethod
def applicable_types(cls) -> List[type]:
return [Any]
def applies_to(cls, datatype: Type[Type]) -> bool:
return True

def description(self) -> str:
if self.allow_none:
Expand Down
14 changes: 9 additions & 5 deletions hamilton/data_quality/pandera_validators.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import Type

import pandas as pd
import pandera as pa
Expand All @@ -14,8 +14,10 @@ def __init__(self, schema: pa.DataFrameSchema, importance: str):
self.schema = schema

@classmethod
def applicable_types(cls) -> List[type]:
return [pd.DataFrame]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(
datatype, pd.DataFrame
) # TODO -- allow for modin, etc. as they come for free with pandera

def description(self) -> str:
return "Validates that the returned dataframe matches the pander"
Expand Down Expand Up @@ -52,8 +54,10 @@ def __init__(self, schema: pa.SeriesSchema, importance: str):
self.schema = schema

@classmethod
def applicable_types(cls) -> List[type]:
return [pd.Series]
def applies_to(cls, datatype: Type[Type]) -> bool:
return issubclass(
datatype, pd.Series
) # TODO -- allow for modin, etc. as they come for free with pandera

def description(self) -> str:
pass
Expand Down
14 changes: 7 additions & 7 deletions tests/resources/dq_dummy_examples.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import Type

import pandas as pd

Expand All @@ -11,8 +11,8 @@ def __init__(self, equal_to: int, importance: str):
self.equal_to = equal_to

@classmethod
def applicable_types(cls) -> List[type]:
return [int]
def applies_to(cls, datatype: Type[Type]) -> bool:
return datatype == int

def description(self) -> str:
return "Data must be equal to 10 to be valid"
Expand Down Expand Up @@ -60,8 +60,8 @@ def validate(self, dataset: pd.Series) -> ValidationResult:
)

@classmethod
def applicable_types(cls) -> List[type]:
return [pd.Series]
def applies_to(cls, datatype: Type[Type]) -> bool:
return datatype == pd.Series

@classmethod
def arg(cls) -> str:
Expand Down Expand Up @@ -92,8 +92,8 @@ def validate(self, dataset: pd.Series) -> ValidationResult:
)

@classmethod
def applicable_types(cls) -> List[type]:
return [pd.Series]
def applies_to(cls, datatype: Type[Type]) -> bool:
return datatype == pd.Series

@classmethod
def arg(cls) -> str:
Expand Down

0 comments on commit 965ce96

Please sign in to comment.