Skip to content

Commit

Permalink
working ibis schema validator
Browse files Browse the repository at this point in the history
  • Loading branch information
zilto authored and zilto committed Mar 3, 2024
1 parent 63f7d7c commit 769e327
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 18 deletions.
13 changes: 6 additions & 7 deletions examples/ibis/run.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
import argparse

from hamilton import driver
from hamilton.execution.executors import SynchronousLocalTaskExecutor
from hamilton.plugins.h_tqdm import ProgressBar


def view_expression(expression, **kwargs):
"""View an Ibis expression
Expand Down Expand Up @@ -76,6 +69,12 @@ def main(level: str, model: str):


if __name__ == "__main__":
import argparse

from hamilton import driver
from hamilton.execution.executors import SynchronousLocalTaskExecutor
from hamilton.plugins.h_tqdm import ProgressBar

parser = argparse.ArgumentParser()
parser.add_argument("--level", choices=["column", "table"])
parser.add_argument("--model", choices=["linear", "random_forest", "boosting"])
Expand Down
18 changes: 12 additions & 6 deletions examples/ibis/table_dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@ def raw_table(raw_data_path: str) -> ir.Table:
return ibis.read_csv(sources=raw_data_path, table_name="absenteism").rename("snake_case")


# @check_output(
# schema=ibis.schema(
# [("has_children", "int"), ("has_pet", "bool")]
# )
# )
def feature_table(raw_table: ir.Table) -> ir.Table:
"""Add to `raw_table` the feature columns `has_children`
`has_pet`, and `is_summer_brazil`
Expand All @@ -29,7 +24,18 @@ def feature_table(raw_table: ir.Table) -> ir.Table:
is_summer_brazil=ibis._.month_of_absence.isin([1, 2, 12]),
)


@check_output(
schema=ibis.schema({
'has_children': "int",
'has_pet': "bool",
'is_summer_brazil': "bool",
'service_time': "int",
'seasons': "int",
'disciplinary_failure': "int",
'absenteeism_time_in_hours': "int",
}),
importance="fail"
)
def feature_set(
feature_table: ir.Table,
feature_selection: list[str],
Expand Down
12 changes: 7 additions & 5 deletions hamilton/plugins/ibis_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ def register_types():

class SchemaValidatorIbis(base.DataValidator):
def __init__(self, schema: ibis.expr.schema.Schema, importance: str):
"""
`schema` is an ordered mapping.
"""
"""`schema` is an ordered mapping"""
super(SchemaValidatorIbis, self).__init__(importance)
self.schema = schema

Expand All @@ -51,8 +49,12 @@ def description(self) -> str:

def validate(self, data: ir.Table) -> base.ValidationResult:
passes = data.schema().equals(self.schema)
message = ""
diagnostics = {}
if passes:
message = "Data passes Ibis schema check"
diagnostics = {"schema": self.schema.fields}
else:
message = "Data failed Ibis schema check"
diagnostics = {"schema": self.schema.fields}
return base.ValidationResult(
passes=passes,
message=message,
Expand Down

0 comments on commit 769e327

Please sign in to comment.