diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index accd95efbb..ab46d1cb91 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -62,3 +62,10 @@ jobs: env_vars: OS,PYTHON fail_ci_if_error: true verbose: true + - name: Benchmark python + run: FEAST_USAGE=False IS_TEST=True pytest --verbose --color=yes sdk/python/tests --integration --benchmark --benchmark-json=./benchmarks.json + - name: Upload Benchmark Artifact + uses: actions/upload-artifact@v2 + with: + name: benchmarks.json + path: benchmarks.json diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml index dda17496b2..101e4c4cbb 100644 --- a/.github/workflows/pr_integration_tests.yml +++ b/.github/workflows/pr_integration_tests.yml @@ -73,3 +73,10 @@ jobs: env_vars: OS,PYTHON fail_ci_if_error: true verbose: true + - name: Benchmark python + run: FEAST_USAGE=False IS_TEST=True pytest --verbose --color=yes sdk/python/tests --integration --benchmark --benchmark-json=./benchmarks.json + - name: Upload Benchmark Artifact + uses: actions/upload-artifact@v2 + with: + name: benchmarks.json + path: benchmarks.json diff --git a/Makefile b/Makefile index f7fa7cdbee..b59585bf9c 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,9 @@ compile-protos-python: install-python: python -m pip install -e sdk/python -U --use-deprecated=legacy-resolver +benchmark-python: + FEAST_USAGE=False IS_TEST=True pytest --integration --benchmark sdk/python/tests + test-python: FEAST_USAGE=False pytest -n 8 sdk/python/tests diff --git a/sdk/python/setup.py b/sdk/python/setup.py index b776369ca6..48a61a9f39 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -95,6 +95,7 @@ "pytest==6.0.0", "pytest-cov", "pytest-xdist", + "pytest-benchmark>=3.4.1", "pytest-lazy-fixture==0.6.3", "pytest-timeout==1.4.2", "pytest-ordering==0.6.*", diff --git a/sdk/python/tests/benchmarks/test_benchmark_universal_online_retrieval.py b/sdk/python/tests/benchmarks/test_benchmark_universal_online_retrieval.py new file mode 100644 index 0000000000..b124cd5c76 --- /dev/null +++ b/sdk/python/tests/benchmarks/test_benchmark_universal_online_retrieval.py @@ -0,0 +1,58 @@ +import random + +import pytest + +from feast import FeatureService +from tests.integration.feature_repos.repo_configuration import ( + construct_universal_feature_views, +) +from tests.integration.feature_repos.universal.entities import customer, driver + + +@pytest.mark.benchmark +@pytest.mark.integration +def test_online_retrieval(environment, universal_data_sources, benchmark): + + fs = environment.feature_store + entities, datasets, data_sources = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + feature_service = FeatureService( + "convrate_plus100", + features=[feature_views["driver"][["conv_rate"]], feature_views["driver_odfv"]], + ) + + feast_objects = [] + feast_objects.extend(feature_views.values()) + feast_objects.extend([driver(), customer(), feature_service]) + fs.apply(feast_objects) + fs.materialize(environment.start_date, environment.end_date) + + sample_drivers = random.sample(entities["driver"], 10) + + sample_customers = random.sample(entities["customer"], 10) + + entity_rows = [ + {"driver": d, "customer_id": c, "val_to_add": 50} + for (d, c) in zip(sample_drivers, sample_customers) + ] + + feature_refs = [ + "driver_stats:conv_rate", + "driver_stats:avg_daily_trips", + "customer_profile:current_balance", + "customer_profile:avg_passenger_count", + "customer_profile:lifetime_trip_count", + "conv_rate_plus_100:conv_rate_plus_100", + "conv_rate_plus_100:conv_rate_plus_val_to_add", + "global_stats:num_rides", + "global_stats:avg_ride_length", + ] + unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f] + # Remove the on demand feature view output features, since they're not present in the source dataframe + unprefixed_feature_refs.remove("conv_rate_plus_100") + unprefixed_feature_refs.remove("conv_rate_plus_val_to_add") + + benchmark( + fs.get_online_features, features=feature_refs, entity_rows=entity_rows, + ) diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index bf8f4d83a5..7708c3aa55 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -37,6 +37,7 @@ def pytest_configure(config): config.addinivalue_line( "markers", "integration: mark test that has external dependencies" ) + config.addinivalue_line("markers", "benchmark: mark benchmarking tests") def pytest_addoption(parser): @@ -46,17 +47,23 @@ def pytest_addoption(parser): default=False, help="Run tests with external dependencies", ) + parser.addoption( + "--benchmark", action="store_true", default=False, help="Run benchmark tests", + ) def pytest_collection_modifyitems(config, items): - if config.getoption("--integration"): - return + should_run_integration = config.getoption("--integration") is True + should_run_benchmark = config.getoption("--benchmark") is True skip_integration = pytest.mark.skip( reason="not running tests with external dependencies" ) + skip_benchmark = pytest.mark.skip(reason="not running benchmarks") for item in items: - if "integration" in item.keywords: + if "integration" in item.keywords and not should_run_integration: item.add_marker(skip_integration) + if "benchmark" in item.keywords and not should_run_benchmark: + item.add_marker(skip_benchmark) @pytest.fixture diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 9e2d01a75b..297be1ac59 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -31,7 +31,7 @@ ) -@dataclass(frozen=True, repr=True) +@dataclass(frozen=True) class IntegrationTestRepoConfig: """ This class should hold all possible parameters that may need to be varied by individual tests. @@ -46,6 +46,17 @@ class IntegrationTestRepoConfig: infer_event_timestamp_col: bool = True infer_features: bool = False + def __repr__(self) -> str: + return "-".join( + [ + f"Provider: {self.provider}", + f"{self.offline_store_creator.__name__.split('.')[-1].rstrip('DataSourceCreator')}", + self.online_store + if isinstance(self.online_store, str) + else self.online_store["type"], + ] + ) + DYNAMO_CONFIG = {"type": "dynamodb", "region": "us-west-2"} REDIS_CONFIG = {"type": "redis", "connection_string": "localhost:6379,db=0"}