Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow using pandas.StringDtype to support on-demand features with STRING type #2229

Merged
merged 1 commit into from
Jan 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 40 additions & 37 deletions sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def python_type_to_feast_value_type(
type_map = {
"int": ValueType.INT64,
"str": ValueType.STRING,
"string": ValueType.STRING, # pandas.StringDtype
"float": ValueType.DOUBLE,
"bytes": ValueType.BYTES,
"float64": ValueType.DOUBLE,
Expand All @@ -119,48 +120,50 @@ def python_type_to_feast_value_type(
if type_name in type_map:
return type_map[type_name]

if type_name == "ndarray" or isinstance(value, list):
if recurse:

# Convert to list type
list_items = pd.core.series.Series(value)

# This is the final type which we infer from the list
common_item_value_type = None
for item in list_items:
if isinstance(item, ProtoValue):
current_item_value_type: ValueType = _proto_value_to_value_type(
item
)
else:
# Get the type from the current item, only one level deep
current_item_value_type = python_type_to_feast_value_type(
name=name, value=item, recurse=False
)
# Validate whether the type stays consistent
if (
common_item_value_type
and not common_item_value_type == current_item_value_type
):
raise ValueError(
f"List value type for field {name} is inconsistent. "
f"{common_item_value_type} different from "
f"{current_item_value_type}."
)
common_item_value_type = current_item_value_type
if common_item_value_type is None:
return ValueType.UNKNOWN
return ValueType[common_item_value_type.name + "_LIST"]
else:
assert value
if isinstance(value, np.ndarray) and str(value.dtype) in type_map:
item_type = type_map[str(value.dtype)]
return ValueType[item_type.name + "_LIST"]

if isinstance(value, (list, np.ndarray)):
# if the value's type is "ndarray" and we couldn't infer from "value.dtype"
# this is most probably array of "object",
# so we need to iterate over objects and try to infer type of each item
if not recurse:
raise ValueError(
f"Value type for field {name} is {value.dtype.__str__()} but "
f"Value type for field {name} is {type(value)} but "
f"recursion is not allowed. Array types can only be one level "
f"deep."
)

assert value
return type_map[value.dtype.__str__()]
# This is the final type which we infer from the list
common_item_value_type = None
for item in value:
if isinstance(item, ProtoValue):
current_item_value_type: ValueType = _proto_value_to_value_type(item)
else:
# Get the type from the current item, only one level deep
current_item_value_type = python_type_to_feast_value_type(
name=name, value=item, recurse=False
)
# Validate whether the type stays consistent
if (
common_item_value_type
and not common_item_value_type == current_item_value_type
):
raise ValueError(
f"List value type for field {name} is inconsistent. "
f"{common_item_value_type} different from "
f"{current_item_value_type}."
)
common_item_value_type = current_item_value_type
if common_item_value_type is None:
return ValueType.UNKNOWN
return ValueType[common_item_value_type.name + "_LIST"]

raise ValueError(
f"Value with native type {type_name} "
f"cannot be converted into Feast value type"
)


def python_values_to_feast_value_type(
Expand Down
41 changes: 38 additions & 3 deletions sdk/python/tests/integration/registration/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from feast import Entity, Feature, RepoConfig, ValueType
from feast.data_source import RequestDataSource
from feast.errors import RegistryInferenceFailure
from feast.errors import RegistryInferenceFailure, SpecifiedFeaturesNotPresentError
from feast.feature_view import FeatureView
from feast.inference import (
update_data_sources_with_inferred_event_timestamp_col,
Expand Down Expand Up @@ -86,19 +86,54 @@ def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1)
)


def test_modify_feature_views_success():
def test_on_demand_features_type_inference():
# Create Feature Views
date_request = RequestDataSource(
name="date_request", schema={"some_date": ValueType.UNIX_TIMESTAMP}
)

@on_demand_feature_view(
inputs={"date_request": date_request},
features=[Feature("output", ValueType.UNIX_TIMESTAMP)],
features=[
Feature("output", ValueType.UNIX_TIMESTAMP),
Feature("string_output", ValueType.STRING),
],
)
def test_view(features_df: pd.DataFrame) -> pd.DataFrame:
data = pd.DataFrame()
data["output"] = features_df["some_date"]
data["string_output"] = features_df["some_date"].astype(pd.StringDtype())
return data

test_view.infer_features()

@on_demand_feature_view(
inputs={"date_request": date_request},
features=[
Feature("output", ValueType.UNIX_TIMESTAMP),
Feature("object_output", ValueType.STRING),
],
)
def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame:
data = pd.DataFrame()
data["output"] = features_df["some_date"]
data["object_output"] = features_df["some_date"].astype(str)
return data

with pytest.raises(ValueError, match="Value with native type object"):
invalid_test_view.infer_features()

@on_demand_feature_view(
inputs={"date_request": date_request},
features=[
Feature("output", ValueType.UNIX_TIMESTAMP),
Feature("missing", ValueType.STRING),
],
)
def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame:
data = pd.DataFrame()
data["output"] = features_df["some_date"]
return data

with pytest.raises(SpecifiedFeaturesNotPresentError):
test_view_with_missing_feature.infer_features()