Skip to content

Commit

Permalink
Algattik/654 fraud scenario (feast-dev#22)
Browse files Browse the repository at this point in the history
Closes KE-654

Added third data science scenario, ingestion of data for fraud detection.
  • Loading branch information
algattik committed Jun 11, 2020
1 parent 24c0486 commit 76deb09
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 2 deletions.
2 changes: 1 addition & 1 deletion infra/scripts/test-docker-compose-databricks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ clean_up () {
docker-compose $COMPOSE_ARGS down

# Remove configuration file
rm .env
rm -f .env

exit $ARG
}
Expand Down
2 changes: 1 addition & 1 deletion infra/scripts/test-docker-compose.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ clean_up () {
docker-compose -f docker-compose.yml -f docker-compose.online.yml down

# Remove configuration file
rm .env
rm -f .env

exit $ARG
}
Expand Down
40 changes: 40 additions & 0 deletions tests/ds_scenarios/ds_fraud_feature_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import datetime
import numpy as np
import pandas as pd
from feast import Feature, FeatureSet, Entity, ValueType
from pytz import utc

"""
Fraud features: customer counts for different windows of time (15M throughout day):
- FR1-7: int
"""
FRAUD_COUNTS_FEATURE_SET = FeatureSet(
'fraud_count_features',
entities=[Entity('customer_id', ValueType.INT64)],
features=[
Feature('window_count1', ValueType.INT64),
Feature('window_count2', ValueType.INT64),
Feature('window_count3', ValueType.INT64),
Feature('window_count4', ValueType.INT64),
Feature('window_count5', ValueType.INT64),
Feature('window_count6', ValueType.INT64),
Feature('window_count7', ValueType.INT64),
]
)

def create_fraud_counts_df(initial_customer_id=1, n=1000, dt=None):
if dt is None:
dt = datetime.datetime.now(datetime.timezone.utc)
return pd.DataFrame({
'datetime': dt,
'customer_id': list(range(initial_customer_id, initial_customer_id + n)),
'window_count1': list(np.random.random_integers(10, size=n)),
'window_count2': list(np.random.random_integers(20, size=n)),
'window_count3': list(np.random.random_integers(50, size=n)),
'window_count4': list(np.random.random_integers(100, size=n)),
'window_count5': list(np.random.random_integers(1000, size=n)),
'window_count6': list(np.random.random_integers(2000, size=n)),
'window_count7': list(np.random.random_integers(5000, size=n)),
})
4 changes: 4 additions & 0 deletions tests/ds_scenarios/test-ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
PRODUCT_IMAGE_FEATURE_SET, create_product_image_features_df,
PRODUCT_TEXT_ATTRIBUTE_FEATURE_SET, create_product_text_attributes_df,
)
from ds_fraud_feature_data import (
FRAUD_COUNTS_FEATURE_SET, create_fraud_counts_df,
)

PROJECT_NAME = 'ds_' + uuid.uuid4().hex.upper()[0:6]

Expand Down Expand Up @@ -48,6 +51,7 @@ def client(core_url, serving_url, allow_dirty):
@pytest.mark.parametrize("data_frame_generator,feature_set", [
(create_product_image_features_df, PRODUCT_IMAGE_FEATURE_SET),
(create_product_text_attributes_df, PRODUCT_TEXT_ATTRIBUTE_FEATURE_SET),
(create_fraud_counts_df, FRAUD_COUNTS_FEATURE_SET),
])
def test_ingestion(client, data_frame_generator, feature_set):
client.apply(feature_set)
Expand Down

0 comments on commit 76deb09

Please sign in to comment.