Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Added Redshift and Spark typecheck to data_source event_timestamp_col inference #2389

Merged
merged 8 commits into from
Mar 17, 2022
2 changes: 2 additions & 0 deletions sdk/python/feast/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ def update_data_sources_with_inferred_event_timestamp_col(
assert (
isinstance(data_source, FileSource)
or isinstance(data_source, BigQuerySource)
or isinstance(data_source, RedshiftSource)
adchia marked this conversation as resolved.
Show resolved Hide resolved
or isinstance(data_source, SnowflakeSource)
or "SparkSource" == data_source.__class__.__name__
)

# loop through table columns to find singular match
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ class UniversalDataSources:
global_ds: DataSource
field_mapping: DataSource

def values(self):
return dataclasses.asdict(self).values()


def construct_universal_data_sources(
datasets: UniversalDatasets, data_source_creator: DataSourceCreator
Expand Down
26 changes: 25 additions & 1 deletion sdk/python/tests/integration/registration/test_inference.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from copy import deepcopy

import pandas as pd
import pytest

Expand Down Expand Up @@ -111,7 +113,7 @@ def test_infer_datasource_names_dwh():


@pytest.mark.integration
def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1):
def test_update_file_data_source_with_inferred_event_timestamp_col(simple_dataset_1):
df_with_two_viable_timestamp_cols = simple_dataset_1.copy(deep=True)
df_with_two_viable_timestamp_cols["ts_2"] = simple_dataset_1["ts_1"]

Expand All @@ -138,6 +140,28 @@ def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1)
)


@pytest.mark.integration
@pytest.mark.universal
adchia marked this conversation as resolved.
Show resolved Hide resolved
def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_sources):
(_, _, data_sources) = universal_data_sources
adchia marked this conversation as resolved.
Show resolved Hide resolved
data_sources_copy = deepcopy(data_sources)

# remove defined event_timestamp_column to allow for inference
for data_source in data_sources_copy.values():
data_source.event_timestamp_column = None

update_data_sources_with_inferred_event_timestamp_col(
adchia marked this conversation as resolved.
Show resolved Hide resolved
data_sources_copy.values(), RepoConfig(provider="local", project="test"),
)
actual_event_timestamp_cols = [
source.event_timestamp_column for source in data_sources_copy.values()
]

assert actual_event_timestamp_cols == ["event_timestamp"] * len(
data_sources_copy.values()
)


def test_on_demand_features_type_inference():
# Create Feature Views
date_request = RequestDataSource(
Expand Down