diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index accd95efbb..ab46d1cb91 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -62,3 +62,10 @@ jobs:
           env_vars: OS,PYTHON
           fail_ci_if_error: true
           verbose: true
+      - name: Benchmark python
+        run: FEAST_USAGE=False IS_TEST=True pytest --verbose --color=yes sdk/python/tests --integration --benchmark --benchmark-json=./benchmarks.json
+      - name: Upload Benchmark Artifact
+        uses: actions/upload-artifact@v2
+        with:
+          name: benchmarks.json
+          path: benchmarks.json
diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml
index dda17496b2..101e4c4cbb 100644
--- a/.github/workflows/pr_integration_tests.yml
+++ b/.github/workflows/pr_integration_tests.yml
@@ -73,3 +73,10 @@ jobs:
           env_vars: OS,PYTHON
           fail_ci_if_error: true
           verbose: true
+      - name: Benchmark python
+        run: FEAST_USAGE=False IS_TEST=True pytest --verbose --color=yes sdk/python/tests --integration --benchmark --benchmark-json=./benchmarks.json
+      - name: Upload Benchmark Artifact
+        uses: actions/upload-artifact@v2
+        with:
+          name: benchmarks.json
+          path: benchmarks.json
diff --git a/Makefile b/Makefile
index f7fa7cdbee..b59585bf9c 100644
--- a/Makefile
+++ b/Makefile
@@ -52,6 +52,9 @@ compile-protos-python:
 install-python:
 	python -m pip install -e sdk/python -U --use-deprecated=legacy-resolver
 
+benchmark-python:
+	FEAST_USAGE=False IS_TEST=True pytest --integration --benchmark sdk/python/tests
+
 test-python:
 	FEAST_USAGE=False pytest -n 8 sdk/python/tests
 
diff --git a/sdk/python/setup.py b/sdk/python/setup.py
index b776369ca6..48a61a9f39 100644
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -95,6 +95,7 @@
     "pytest==6.0.0",
     "pytest-cov",
     "pytest-xdist",
+    "pytest-benchmark>=3.4.1",
     "pytest-lazy-fixture==0.6.3",
     "pytest-timeout==1.4.2",
     "pytest-ordering==0.6.*",
diff --git a/sdk/python/tests/benchmarks/test_benchmark_universal_online_retrieval.py b/sdk/python/tests/benchmarks/test_benchmark_universal_online_retrieval.py
new file mode 100644
index 0000000000..b124cd5c76
--- /dev/null
+++ b/sdk/python/tests/benchmarks/test_benchmark_universal_online_retrieval.py
@@ -0,0 +1,58 @@
+import random
+
+import pytest
+
+from feast import FeatureService
+from tests.integration.feature_repos.repo_configuration import (
+    construct_universal_feature_views,
+)
+from tests.integration.feature_repos.universal.entities import customer, driver
+
+
+@pytest.mark.benchmark
+@pytest.mark.integration
+def test_online_retrieval(environment, universal_data_sources, benchmark):
+
+    fs = environment.feature_store
+    entities, datasets, data_sources = universal_data_sources
+    feature_views = construct_universal_feature_views(data_sources)
+
+    feature_service = FeatureService(
+        "convrate_plus100",
+        features=[feature_views["driver"][["conv_rate"]], feature_views["driver_odfv"]],
+    )
+
+    feast_objects = []
+    feast_objects.extend(feature_views.values())
+    feast_objects.extend([driver(), customer(), feature_service])
+    fs.apply(feast_objects)
+    fs.materialize(environment.start_date, environment.end_date)
+
+    sample_drivers = random.sample(entities["driver"], 10)
+
+    sample_customers = random.sample(entities["customer"], 10)
+
+    entity_rows = [
+        {"driver": d, "customer_id": c, "val_to_add": 50}
+        for (d, c) in zip(sample_drivers, sample_customers)
+    ]
+
+    feature_refs = [
+        "driver_stats:conv_rate",
+        "driver_stats:avg_daily_trips",
+        "customer_profile:current_balance",
+        "customer_profile:avg_passenger_count",
+        "customer_profile:lifetime_trip_count",
+        "conv_rate_plus_100:conv_rate_plus_100",
+        "conv_rate_plus_100:conv_rate_plus_val_to_add",
+        "global_stats:num_rides",
+        "global_stats:avg_ride_length",
+    ]
+    unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
+    # Remove the on demand feature view output features, since they're not present in the source dataframe
+    unprefixed_feature_refs.remove("conv_rate_plus_100")
+    unprefixed_feature_refs.remove("conv_rate_plus_val_to_add")
+
+    benchmark(
+        fs.get_online_features, features=feature_refs, entity_rows=entity_rows,
+    )
diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index bf8f4d83a5..7708c3aa55 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -37,6 +37,7 @@ def pytest_configure(config):
     config.addinivalue_line(
         "markers", "integration: mark test that has external dependencies"
     )
+    config.addinivalue_line("markers", "benchmark: mark benchmarking tests")
 
 
 def pytest_addoption(parser):
@@ -46,17 +47,23 @@ def pytest_addoption(parser):
         default=False,
         help="Run tests with external dependencies",
     )
+    parser.addoption(
+        "--benchmark", action="store_true", default=False, help="Run benchmark tests",
+    )
 
 
 def pytest_collection_modifyitems(config, items):
-    if config.getoption("--integration"):
-        return
+    should_run_integration = config.getoption("--integration") is True
+    should_run_benchmark = config.getoption("--benchmark") is True
     skip_integration = pytest.mark.skip(
         reason="not running tests with external dependencies"
     )
+    skip_benchmark = pytest.mark.skip(reason="not running benchmarks")
     for item in items:
-        if "integration" in item.keywords:
+        if "integration" in item.keywords and not should_run_integration:
             item.add_marker(skip_integration)
+        if "benchmark" in item.keywords and not should_run_benchmark:
+            item.add_marker(skip_benchmark)
 
 
 @pytest.fixture
diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py
index 9e2d01a75b..297be1ac59 100644
--- a/sdk/python/tests/integration/feature_repos/repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py
@@ -31,7 +31,7 @@
 )
 
 
-@dataclass(frozen=True, repr=True)
+@dataclass(frozen=True)
 class IntegrationTestRepoConfig:
     """
     This class should hold all possible parameters that may need to be varied by individual tests.
@@ -46,6 +46,17 @@ class IntegrationTestRepoConfig:
     infer_event_timestamp_col: bool = True
     infer_features: bool = False
 
+    def __repr__(self) -> str:
+        return "-".join(
+            [
+                f"Provider: {self.provider}",
+                f"{self.offline_store_creator.__name__.split('.')[-1].rstrip('DataSourceCreator')}",
+                self.online_store
+                if isinstance(self.online_store, str)
+                else self.online_store["type"],
+            ]
+        )
+
 
 DYNAMO_CONFIG = {"type": "dynamodb", "region": "us-west-2"}
 REDIS_CONFIG = {"type": "redis", "connection_string": "localhost:6379,db=0"}