forked from feast-dev/feast
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add support for DynamoDB online_read in batches (feast-dev#2371)
* feat: dynamodb onlin read in batches Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * run linters and format Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * feat: batch_size parameter Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * docs: typo in batch_size description Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * trailing white space Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * fix: batch_size is last argument Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * test: dynamodb online store online_read in batches Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * test: mock dynamodb behavior Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * feat: batch_size value must be less than 40 Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * feat: batch_size defaults to 40 Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * feat: sort dynamodb responses Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * resolve merge conflicts Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * test online response proto with redshift:dynamodb Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * feat: consistency in batch_size process Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * fix: return batch_size times None Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * remove debug code Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * typo in docstring Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> * batch_size in onlineconfigstore Signed-off-by: Miguel Trejo <armando.trejo.marrufo@gmail.com> Co-authored-by: Danny Chiao <danny@tecton.ai>
- Loading branch information
1 parent
45db6dc
commit 702ec49
Showing
3 changed files
with
170 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
57 changes: 57 additions & 0 deletions
57
sdk/python/tests/unit/online_store/test_dynamodb_online_store.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from dataclasses import dataclass | ||
|
||
import pytest | ||
from moto import mock_dynamodb2 | ||
|
||
from feast.infra.offline_stores.file import FileOfflineStoreConfig | ||
from feast.infra.online_stores.dynamodb import ( | ||
DynamoDBOnlineStore, | ||
DynamoDBOnlineStoreConfig, | ||
) | ||
from feast.repo_config import RepoConfig | ||
from tests.utils.online_store_utils import ( | ||
_create_n_customer_test_samples, | ||
_create_test_table, | ||
_insert_data_test_table, | ||
) | ||
|
||
REGISTRY = "s3://test_registry/registry.db" | ||
PROJECT = "test_aws" | ||
PROVIDER = "aws" | ||
TABLE_NAME = "dynamodb_online_store" | ||
REGION = "us-west-2" | ||
|
||
|
||
@dataclass | ||
class MockFeatureView: | ||
name: str | ||
|
||
|
||
@pytest.fixture | ||
def repo_config(): | ||
return RepoConfig( | ||
registry=REGISTRY, | ||
project=PROJECT, | ||
provider=PROVIDER, | ||
online_store=DynamoDBOnlineStoreConfig(region=REGION), | ||
offline_store=FileOfflineStoreConfig(), | ||
) | ||
|
||
|
||
@mock_dynamodb2 | ||
@pytest.mark.parametrize("n_samples", [5, 50, 100]) | ||
def test_online_read(repo_config, n_samples): | ||
"""Test DynamoDBOnlineStore online_read method.""" | ||
_create_test_table(PROJECT, f"{TABLE_NAME}_{n_samples}", REGION) | ||
data = _create_n_customer_test_samples(n=n_samples) | ||
_insert_data_test_table(data, PROJECT, f"{TABLE_NAME}_{n_samples}", REGION) | ||
|
||
entity_keys, features = zip(*data) | ||
dynamodb_store = DynamoDBOnlineStore() | ||
returned_items = dynamodb_store.online_read( | ||
config=repo_config, | ||
table=MockFeatureView(name=f"{TABLE_NAME}_{n_samples}"), | ||
entity_keys=entity_keys, | ||
) | ||
assert len(returned_items) == len(data) | ||
assert [item[1] for item in returned_items] == list(features) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from datetime import datetime | ||
|
||
import boto3 | ||
|
||
from feast import utils | ||
from feast.infra.online_stores.helpers import compute_entity_id | ||
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto | ||
from feast.protos.feast.types.Value_pb2 import Value as ValueProto | ||
|
||
|
||
def _create_n_customer_test_samples(n=10): | ||
return [ | ||
( | ||
EntityKeyProto( | ||
join_keys=["customer"], entity_values=[ValueProto(string_val=str(i))] | ||
), | ||
{ | ||
"avg_orders_day": ValueProto(float_val=1.0), | ||
"name": ValueProto(string_val="John"), | ||
"age": ValueProto(int64_val=3), | ||
}, | ||
) | ||
for i in range(n) | ||
] | ||
|
||
|
||
def _create_test_table(project, tbl_name, region): | ||
client = boto3.client("dynamodb", region_name=region) | ||
client.create_table( | ||
TableName=f"{project}.{tbl_name}", | ||
KeySchema=[{"AttributeName": "entity_id", "KeyType": "HASH"}], | ||
AttributeDefinitions=[{"AttributeName": "entity_id", "AttributeType": "S"}], | ||
BillingMode="PAY_PER_REQUEST", | ||
) | ||
|
||
|
||
def _delete_test_table(project, tbl_name, region): | ||
client = boto3.client("dynamodb", region_name=region) | ||
client.delete_table(TableName=f"{project}.{tbl_name}") | ||
|
||
|
||
def _insert_data_test_table(data, project, tbl_name, region): | ||
dynamodb_resource = boto3.resource("dynamodb", region_name=region) | ||
table_instance = dynamodb_resource.Table(f"{project}.{tbl_name}") | ||
for entity_key, features in data: | ||
entity_id = compute_entity_id(entity_key) | ||
with table_instance.batch_writer() as batch: | ||
batch.put_item( | ||
Item={ | ||
"entity_id": entity_id, | ||
"event_ts": str(utils.make_tzaware(datetime.utcnow())), | ||
"values": {k: v.SerializeToString() for k, v in features.items()}, | ||
} | ||
) |