From 2c777818a040ab802ed9d12c16820468dd2350c6 Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Wed, 20 Apr 2022 10:38:51 -0700 Subject: [PATCH 1/3] Switch from `join_key` to `join_keys` in tests and docs Signed-off-by: Felix Wang --- docs/getting-started/concepts/entity.md | 2 +- docs/getting-started/concepts/feature-view.md | 2 +- docs/getting-started/quickstart.md | 4 ++-- docs/tutorials/validating-historical-features.md | 2 +- sdk/python/feast/feature_view.py | 2 +- sdk/python/feast/templates/aws/driver_repo.py | 6 +++--- sdk/python/feast/templates/gcp/driver_repo.py | 6 +++--- sdk/python/feast/templates/local/example.py | 2 +- .../feast/templates/snowflake/driver_repo.py | 6 +++--- .../tests/example_repos/example_feature_repo_1.py | 4 ++-- .../example_feature_repo_with_entity_join_key.py | 2 +- .../feature_repos/universal/entities.py | 2 +- .../test_universal_historical_retrieval.py | 2 +- .../integration/registration/test_feature_store.py | 6 ++++-- .../integration/registration/test_inference.py | 14 +++++++------- 15 files changed, 32 insertions(+), 30 deletions(-) diff --git a/docs/getting-started/concepts/entity.md b/docs/getting-started/concepts/entity.md index bc8aa2ac99..77cfc0aff2 100644 --- a/docs/getting-started/concepts/entity.md +++ b/docs/getting-started/concepts/entity.md @@ -3,7 +3,7 @@ An entity is a collection of semantically related features. Users define entities to map to the domain of their use case. For example, a ride-hailing service could have customers and drivers as their entities, which group related features that correspond to these customers and drivers. ```python -driver = Entity(name='driver', value_type=ValueType.STRING, join_key='driver_id') +driver = Entity(name='driver', value_type=ValueType.STRING, join_keys=['driver_id']) ``` Entities are typically defined as part of feature views. Entity name is used to reference the entity from a feature view definition and join key is used to identify the physical primary key on which feature values should be stored and retrieved. These keys are used during the lookup of feature values from the online store and the join process in point-in-time joins. It is possible to define composite entities \(more than one entity object\) in a feature view. It is also possible for feature views to have zero entities. See [feature view](feature-view.md) for more details. diff --git a/docs/getting-started/concepts/feature-view.md b/docs/getting-started/concepts/feature-view.md index 80fd803d1f..e3decf39c9 100644 --- a/docs/getting-started/concepts/feature-view.md +++ b/docs/getting-started/concepts/feature-view.md @@ -79,7 +79,7 @@ It is suggested that you dynamically specify the new FeatureView name using `.wi from feast import BigQuerySource, Entity, FeatureView, Field, ValueType from feast.types import Int32 -location = Entity(name="location", join_key="location_id", value_type=ValueType.INT64) +location = Entity(name="location", join_keys=["location_id"], value_type=ValueType.INT64) location_stats_fv= FeatureView( name="location_stats", diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index b188e0189d..e9a294d5fc 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -98,7 +98,7 @@ driver_hourly_stats = FileSource( # fetch features. # Entity has a name used for later reference (in a feature view, eg) # and join_key to identify physical field name used in storages -driver = Entity(name="driver", value_type=ValueType.INT64, join_key="driver_id", description="driver id",) +driver = Entity(name="driver", value_type=ValueType.INT64, join_keys=["driver_id"], description="driver id",) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this @@ -168,7 +168,7 @@ driver_hourly_stats = FileSource( # fetch features. # Entity has a name used for later reference (in a feature view, eg) # and join_key to identify physical field name used in storages -driver = Entity(name="driver", value_type=ValueType.INT64, join_key="driver_id", description="driver id",) +driver = Entity(name="driver", value_type=ValueType.INT64, join_keys=["driver_id"], description="driver id",) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this diff --git a/docs/tutorials/validating-historical-features.md b/docs/tutorials/validating-historical-features.md index 5f85e66c94..addd309902 100644 --- a/docs/tutorials/validating-historical-features.md +++ b/docs/tutorials/validating-historical-features.md @@ -129,7 +129,7 @@ batch_source = FileSource( ```python -taxi_entity = Entity(name='taxi', join_key='taxi_id') +taxi_entity = Entity(name='taxi', join_keys=['taxi_id']) ``` diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index ea5953e223..7060870780 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -45,7 +45,7 @@ DUMMY_ENTITY_NAME = "__dummy" DUMMY_ENTITY_VAL = "" DUMMY_ENTITY = Entity( - name=DUMMY_ENTITY_NAME, join_key=DUMMY_ENTITY_ID, value_type=ValueType.STRING, + name=DUMMY_ENTITY_NAME, join_keys=[DUMMY_ENTITY_ID], value_type=ValueType.STRING, ) diff --git a/sdk/python/feast/templates/aws/driver_repo.py b/sdk/python/feast/templates/aws/driver_repo.py index 04c3ea3b9c..5188f57cf8 100644 --- a/sdk/python/feast/templates/aws/driver_repo.py +++ b/sdk/python/feast/templates/aws/driver_repo.py @@ -9,10 +9,10 @@ driver = Entity( # Name of the entity. Must be unique within a project name="driver", - # The join key of an entity describes the storage level field/column on which - # features can be looked up. The join key is also used to join feature + # The join keys of an entity describe the storage level field/column on which + # features can be looked up. The join keys are also used to join feature # tables/views when building feature vectors - join_key="driver_id", + join_keys=["driver_id"], # The storage level type for an entity value_type=ValueType.INT64, ) diff --git a/sdk/python/feast/templates/gcp/driver_repo.py b/sdk/python/feast/templates/gcp/driver_repo.py index 7c686cdb6f..7d137f996b 100644 --- a/sdk/python/feast/templates/gcp/driver_repo.py +++ b/sdk/python/feast/templates/gcp/driver_repo.py @@ -9,10 +9,10 @@ driver = Entity( # Name of the entity. Must be unique within a project name="driver", - # The join key of an entity describes the storage level field/column on which - # features can be looked up. The join key is also used to join feature + # The join keys of an entity describe the storage level field/column on which + # features can be looked up. The join keys are also used to join feature # tables/views when building feature vectors - join_key="driver_id", + join_keys=["driver_id"], # The storage level type for an entity value_type=ValueType.INT64, ) diff --git a/sdk/python/feast/templates/local/example.py b/sdk/python/feast/templates/local/example.py index 56d24e60a5..1d441e0e99 100644 --- a/sdk/python/feast/templates/local/example.py +++ b/sdk/python/feast/templates/local/example.py @@ -16,7 +16,7 @@ # Define an entity for the driver. You can think of entity as a primary key used to # fetch features. -driver = Entity(name="driver", join_key="driver_id", value_type=ValueType.INT64,) +driver = Entity(name="driver", join_keys=["driver_id"], value_type=ValueType.INT64,) # Our parquet files contain sample data that includes a driver_id column, timestamps and # three feature column. Here we define a Feature View that will allow us to serve this diff --git a/sdk/python/feast/templates/snowflake/driver_repo.py b/sdk/python/feast/templates/snowflake/driver_repo.py index 55a25d445c..ecccb9863b 100644 --- a/sdk/python/feast/templates/snowflake/driver_repo.py +++ b/sdk/python/feast/templates/snowflake/driver_repo.py @@ -11,10 +11,10 @@ driver = Entity( # Name of the entity. Must be unique within a project name="driver", - # The join key of an entity describes the storage level field/column on which - # features can be looked up. The join key is also used to join feature + # The join keys of an entity describe the storage level field/column on which + # features can be looked up. The join keys are also used to join feature # tables/views when building feature vectors - join_key="driver_id", + join_keys=["driver_id"], ) # Indicates a data source from which feature values can be retrieved. Sources are queried when building training diff --git a/sdk/python/tests/example_repos/example_feature_repo_1.py b/sdk/python/tests/example_repos/example_feature_repo_1.py index bd07100af8..d8b6d7c89b 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_1.py +++ b/sdk/python/tests/example_repos/example_feature_repo_1.py @@ -45,14 +45,14 @@ driver = Entity( name="driver", # The name is derived from this argument, not object name. - join_key="driver_id", + join_keys=["driver_id"], value_type=ValueType.INT64, description="driver id", ) customer = Entity( name="customer", # The name is derived from this argument, not object name. - join_key="customer_id", + join_keys=["customer_id"], value_type=ValueType.STRING, ) diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py b/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py index 3e1bbbba77..ba18cf84ba 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py +++ b/sdk/python/tests/example_repos/example_feature_repo_with_entity_join_key.py @@ -15,7 +15,7 @@ name="driver_id", value_type=ValueType.INT64, description="driver id", - join_key="driver", + join_keys=["driver"], ) diff --git a/sdk/python/tests/integration/feature_repos/universal/entities.py b/sdk/python/tests/integration/feature_repos/universal/entities.py index e8e90a6af6..b7a7583f1b 100644 --- a/sdk/python/tests/integration/feature_repos/universal/entities.py +++ b/sdk/python/tests/integration/feature_repos/universal/entities.py @@ -6,7 +6,7 @@ def driver(value_type: ValueType = ValueType.INT64): name="driver", # The name is derived from this argument, not object name. value_type=value_type, description="driver id", - join_key="driver_id", + join_keys=["driver_id"], ) diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index 0d6ef84ff4..b62f7cda24 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -689,7 +689,7 @@ def test_historical_features_from_bigquery_sources_containing_backfills(environm created_timestamp_column="created", ) - driver = Entity(name="driver", join_key="driver_id", value_type=ValueType.INT64) + driver = Entity(name="driver", join_keys=["driver_id"], value_type=ValueType.INT64) driver_fv = FeatureView( name="driver_stats", entities=["driver"], diff --git a/sdk/python/tests/integration/registration/test_feature_store.py b/sdk/python/tests/integration/registration/test_feature_store.py index 39de7fc688..ca61734c78 100644 --- a/sdk/python/tests/integration/registration/test_feature_store.py +++ b/sdk/python/tests/integration/registration/test_feature_store.py @@ -219,7 +219,9 @@ def test_feature_view_inference_success(test_feature_store, dataframe_source): with prep_file_source( df=dataframe_source, event_timestamp_column="ts_1" ) as file_source: - entity = Entity(name="id", join_key="id_join_key", value_type=ValueType.INT64) + entity = Entity( + name="id", join_keys=["id_join_key"], value_type=ValueType.INT64 + ) fv1 = FeatureView( name="fv1", @@ -436,7 +438,7 @@ def test_reapply_feature_view_success(test_feature_store, dataframe_source): df=dataframe_source, event_timestamp_column="ts_1" ) as file_source: - e = Entity(name="id", join_key="id_join_key", value_type=ValueType.STRING) + e = Entity(name="id", join_keys=["id_join_key"], value_type=ValueType.STRING) # Create Feature View fv1 = FeatureView( diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 558700dc9c..6e9aff1f03 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -54,8 +54,8 @@ def test_update_entities_with_inferred_types_from_feature_views( name="fv2", entities=["id"], batch_source=file_source_2, ttl=None, ) - actual_1 = Entity(name="id", join_key="id_join_key") - actual_2 = Entity(name="id", join_key="id_join_key") + actual_1 = Entity(name="id", join_keys=["id_join_key"]) + actual_2 = Entity(name="id", join_keys=["id_join_key"]) update_entities_with_inferred_types_from_feature_views( [actual_1], [fv1], RepoConfig(provider="local", project="test") @@ -64,16 +64,16 @@ def test_update_entities_with_inferred_types_from_feature_views( [actual_2], [fv2], RepoConfig(provider="local", project="test") ) assert actual_1 == Entity( - name="id", join_key="id_join_key", value_type=ValueType.INT64 + name="id", join_keys=["id_join_key"], value_type=ValueType.INT64 ) assert actual_2 == Entity( - name="id", join_key="id_join_key", value_type=ValueType.STRING + name="id", join_keys=["id_join_key"], value_type=ValueType.STRING ) with pytest.raises(RegistryInferenceFailure): # two viable data types update_entities_with_inferred_types_from_feature_views( - [Entity(name="id", join_key="id_join_key")], + [Entity(name="id", join_keys=["id_join_key"])], [fv1, fv2], RepoConfig(provider="local", project="test"), ) @@ -289,8 +289,8 @@ def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: def test_update_feature_views_with_inferred_features(): file_source = FileSource(name="test", path="test path") - entity1 = Entity(name="test1", join_key="test_column_1") - entity2 = Entity(name="test2", join_key="test_column_2") + entity1 = Entity(name="test1", join_keys=["test_column_1"]) + entity2 = Entity(name="test2", join_keys=["test_column_2"]) feature_view_1 = FeatureView( name="test1", entities=[entity1], From 0cc06ca092251ed63c130eb4b2f4143c49c6041c Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Wed, 20 Apr 2022 10:56:28 -0700 Subject: [PATCH 2/3] Convert iterator to list so it can be used repeatedly Signed-off-by: Felix Wang --- sdk/python/feast/inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index 38f3fa36cf..717e4a607b 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -32,7 +32,7 @@ def update_entities_with_inferred_types_from_feature_views( if not (incomplete_entities_keys & set(view.entities)): continue # skip if view doesn't contain any entities that need inference - col_names_and_types = view.batch_source.get_table_column_names_and_types(config) + col_names_and_types = list(view.batch_source.get_table_column_names_and_types(config)) for entity_name in view.entities: if entity_name in incomplete_entities: entity = incomplete_entities[entity_name] From 009ea7e1661b2d5e0a69a5282f6837128c052d6d Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Wed, 20 Apr 2022 11:42:32 -0700 Subject: [PATCH 3/3] Format Signed-off-by: Felix Wang --- sdk/python/feast/inference.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index 717e4a607b..711b37c0b4 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -32,7 +32,9 @@ def update_entities_with_inferred_types_from_feature_views( if not (incomplete_entities_keys & set(view.entities)): continue # skip if view doesn't contain any entities that need inference - col_names_and_types = list(view.batch_source.get_table_column_names_and_types(config)) + col_names_and_types = list( + view.batch_source.get_table_column_names_and_types(config) + ) for entity_name in view.entities: if entity_name in incomplete_entities: entity = incomplete_entities[entity_name]