diff --git a/infra/scripts/test-docker-compose-databricks.sh b/infra/scripts/test-docker-compose-databricks.sh index a5accb34bf..6b96ff2770 100755 --- a/infra/scripts/test-docker-compose-databricks.sh +++ b/infra/scripts/test-docker-compose-databricks.sh @@ -17,7 +17,7 @@ clean_up () { docker-compose $COMPOSE_ARGS down # Remove configuration file - rm .env + rm -f .env exit $ARG } diff --git a/infra/scripts/test-docker-compose.sh b/infra/scripts/test-docker-compose.sh index 4cfe33b669..8c054b1a80 100755 --- a/infra/scripts/test-docker-compose.sh +++ b/infra/scripts/test-docker-compose.sh @@ -15,7 +15,7 @@ clean_up () { docker-compose -f docker-compose.yml -f docker-compose.online.yml down # Remove configuration file - rm .env + rm -f .env exit $ARG } diff --git a/tests/ds_scenarios/ds_fraud_feature_data.py b/tests/ds_scenarios/ds_fraud_feature_data.py new file mode 100644 index 0000000000..a6047a9f2a --- /dev/null +++ b/tests/ds_scenarios/ds_fraud_feature_data.py @@ -0,0 +1,40 @@ +import datetime +import numpy as np +import pandas as pd +from feast import Feature, FeatureSet, Entity, ValueType +from pytz import utc + +""" + +Fraud features: customer counts for different windows of time (15M throughout day): +- FR1-7: int + +""" +FRAUD_COUNTS_FEATURE_SET = FeatureSet( + 'fraud_count_features', + entities=[Entity('customer_id', ValueType.INT64)], + features=[ + Feature('window_count1', ValueType.INT64), + Feature('window_count2', ValueType.INT64), + Feature('window_count3', ValueType.INT64), + Feature('window_count4', ValueType.INT64), + Feature('window_count5', ValueType.INT64), + Feature('window_count6', ValueType.INT64), + Feature('window_count7', ValueType.INT64), + ] +) + +def create_fraud_counts_df(initial_customer_id=1, n=1000, dt=None): + if dt is None: + dt = datetime.datetime.now(datetime.timezone.utc) + return pd.DataFrame({ + 'datetime': dt, + 'customer_id': list(range(initial_customer_id, initial_customer_id + n)), + 'window_count1': list(np.random.random_integers(10, size=n)), + 'window_count2': list(np.random.random_integers(20, size=n)), + 'window_count3': list(np.random.random_integers(50, size=n)), + 'window_count4': list(np.random.random_integers(100, size=n)), + 'window_count5': list(np.random.random_integers(1000, size=n)), + 'window_count6': list(np.random.random_integers(2000, size=n)), + 'window_count7': list(np.random.random_integers(5000, size=n)), + }) \ No newline at end of file diff --git a/tests/ds_scenarios/test-ingest.py b/tests/ds_scenarios/test-ingest.py index f9a7c85521..a0190d8874 100644 --- a/tests/ds_scenarios/test-ingest.py +++ b/tests/ds_scenarios/test-ingest.py @@ -7,6 +7,9 @@ PRODUCT_IMAGE_FEATURE_SET, create_product_image_features_df, PRODUCT_TEXT_ATTRIBUTE_FEATURE_SET, create_product_text_attributes_df, ) +from ds_fraud_feature_data import ( + FRAUD_COUNTS_FEATURE_SET, create_fraud_counts_df, +) PROJECT_NAME = 'ds_' + uuid.uuid4().hex.upper()[0:6] @@ -48,6 +51,7 @@ def client(core_url, serving_url, allow_dirty): @pytest.mark.parametrize("data_frame_generator,feature_set", [ (create_product_image_features_df, PRODUCT_IMAGE_FEATURE_SET), (create_product_text_attributes_df, PRODUCT_TEXT_ATTRIBUTE_FEATURE_SET), + (create_fraud_counts_df, FRAUD_COUNTS_FEATURE_SET), ]) def test_ingestion(client, data_frame_generator, feature_set): client.apply(feature_set)