Skip to content

Commit

Permalink
Allow using pandas.StringDtype (#2229)
Browse files Browse the repository at this point in the history
Signed-off-by: pyalex <moskalenko.alexey@gmail.com>
  • Loading branch information
pyalex authored Jan 26, 2022
1 parent 396f729 commit d7707c1
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 40 deletions.
77 changes: 40 additions & 37 deletions sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def python_type_to_feast_value_type(
type_map = {
"int": ValueType.INT64,
"str": ValueType.STRING,
"string": ValueType.STRING, # pandas.StringDtype
"float": ValueType.DOUBLE,
"bytes": ValueType.BYTES,
"float64": ValueType.DOUBLE,
Expand All @@ -118,48 +119,50 @@ def python_type_to_feast_value_type(
if type_name in type_map:
return type_map[type_name]

if type_name == "ndarray" or isinstance(value, list):
if recurse:

# Convert to list type
list_items = pd.core.series.Series(value)

# This is the final type which we infer from the list
common_item_value_type = None
for item in list_items:
if isinstance(item, ProtoValue):
current_item_value_type: ValueType = _proto_value_to_value_type(
item
)
else:
# Get the type from the current item, only one level deep
current_item_value_type = python_type_to_feast_value_type(
name=name, value=item, recurse=False
)
# Validate whether the type stays consistent
if (
common_item_value_type
and not common_item_value_type == current_item_value_type
):
raise ValueError(
f"List value type for field {name} is inconsistent. "
f"{common_item_value_type} different from "
f"{current_item_value_type}."
)
common_item_value_type = current_item_value_type
if common_item_value_type is None:
return ValueType.UNKNOWN
return ValueType[common_item_value_type.name + "_LIST"]
else:
assert value
if isinstance(value, np.ndarray) and str(value.dtype) in type_map:
item_type = type_map[str(value.dtype)]
return ValueType[item_type.name + "_LIST"]

if isinstance(value, (list, np.ndarray)):
# if the value's type is "ndarray" and we couldn't infer from "value.dtype"
# this is most probably array of "object",
# so we need to iterate over objects and try to infer type of each item
if not recurse:
raise ValueError(
f"Value type for field {name} is {value.dtype.__str__()} but "
f"Value type for field {name} is {type(value)} but "
f"recursion is not allowed. Array types can only be one level "
f"deep."
)

assert value
return type_map[value.dtype.__str__()]
# This is the final type which we infer from the list
common_item_value_type = None
for item in value:
if isinstance(item, ProtoValue):
current_item_value_type: ValueType = _proto_value_to_value_type(item)
else:
# Get the type from the current item, only one level deep
current_item_value_type = python_type_to_feast_value_type(
name=name, value=item, recurse=False
)
# Validate whether the type stays consistent
if (
common_item_value_type
and not common_item_value_type == current_item_value_type
):
raise ValueError(
f"List value type for field {name} is inconsistent. "
f"{common_item_value_type} different from "
f"{current_item_value_type}."
)
common_item_value_type = current_item_value_type
if common_item_value_type is None:
return ValueType.UNKNOWN
return ValueType[common_item_value_type.name + "_LIST"]

raise ValueError(
f"Value with native type {type_name} "
f"cannot be converted into Feast value type"
)


def python_values_to_feast_value_type(
Expand Down
41 changes: 38 additions & 3 deletions sdk/python/tests/integration/registration/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from feast import Entity, Feature, RepoConfig, ValueType
from feast.data_source import RequestDataSource
from feast.errors import RegistryInferenceFailure
from feast.errors import RegistryInferenceFailure, SpecifiedFeaturesNotPresentError
from feast.feature_view import FeatureView
from feast.inference import (
update_data_sources_with_inferred_event_timestamp_col,
Expand Down Expand Up @@ -86,19 +86,54 @@ def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1)
)


def test_modify_feature_views_success():
def test_on_demand_features_type_inference():
# Create Feature Views
date_request = RequestDataSource(
name="date_request", schema={"some_date": ValueType.UNIX_TIMESTAMP}
)

@on_demand_feature_view(
inputs={"date_request": date_request},
features=[Feature("output", ValueType.UNIX_TIMESTAMP)],
features=[
Feature("output", ValueType.UNIX_TIMESTAMP),
Feature("string_output", ValueType.STRING),
],
)
def test_view(features_df: pd.DataFrame) -> pd.DataFrame:
data = pd.DataFrame()
data["output"] = features_df["some_date"]
data["string_output"] = features_df["some_date"].astype(pd.StringDtype())
return data

test_view.infer_features()

@on_demand_feature_view(
inputs={"date_request": date_request},
features=[
Feature("output", ValueType.UNIX_TIMESTAMP),
Feature("object_output", ValueType.STRING),
],
)
def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame:
data = pd.DataFrame()
data["output"] = features_df["some_date"]
data["object_output"] = features_df["some_date"].astype(str)
return data

with pytest.raises(ValueError, match="Value with native type object"):
invalid_test_view.infer_features()

@on_demand_feature_view(
inputs={"date_request": date_request},
features=[
Feature("output", ValueType.UNIX_TIMESTAMP),
Feature("missing", ValueType.STRING),
],
)
def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame:
data = pd.DataFrame()
data["output"] = features_df["some_date"]
return data

with pytest.raises(SpecifiedFeaturesNotPresentError):
test_view_with_missing_feature.infer_features()

0 comments on commit d7707c1

Please sign in to comment.