From 789d7a1dac0bf22f421de1ffbef2d351a09598e8 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 18 Aug 2021 14:52:10 -0700
Subject: [PATCH 01/51] Fix API cruft from DataSourceCreator

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../universal/data_source_creator.py          |  2 +-
 .../universal/data_sources/bigquery.py        |  4 +--
 .../universal/data_sources/file.py            | 25 +++++++++++--------
 .../universal/data_sources/redshift.py        |  4 +--
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
index fa5293c06d..6efadaa77f 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
@@ -28,5 +28,5 @@ def teardown(self):
         ...
 
     @abstractmethod
-    def get_prefixed_table_name(self, name: str, suffix: str) -> str:
+    def get_prefixed_table_name(self, table_name: str) -> str:
         ...
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
index 74804e7512..070b87690f 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
@@ -69,5 +69,5 @@ def create_data_sources(
             field_mapping=field_mapping or {"ts_1": "ts"},
         )
 
-    def get_prefixed_table_name(self, name: str, suffix: str) -> str:
-        return f"{self.client.project}.{name}.{suffix}"
+    def get_prefixed_table_name(self, suffix: str) -> str:
+        return f"{self.client.project}.{self.project_name}.{suffix}"
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
index b386c399dc..0c570e8a5f 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
@@ -1,5 +1,5 @@
 import tempfile
-from typing import Any, Dict
+from typing import Any, Dict, List
 
 import pandas as pd
 
@@ -14,10 +14,11 @@
 
 
 class FileDataSourceCreator(DataSourceCreator):
-    f: Any
+    files: List[Any]
 
-    def __init__(self, _: str):
-        pass
+    def __init__(self, project_name: str):
+        self.project_name = project_name
+        self.files = []
 
     def create_data_sources(
         self,
@@ -27,22 +28,26 @@ def create_data_sources(
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
     ) -> DataSource:
-        self.f = tempfile.NamedTemporaryFile(suffix=".parquet", delete=False)
-        df.to_parquet(self.f.name)
+        f = tempfile.NamedTemporaryFile(
+            prefix=self.project_name, suffix=".parquet", delete=False
+        )
+        df.to_parquet(f.name)
+        self.files.append(f)
         return FileSource(
             file_format=ParquetFormat(),
-            path=f"file://{self.f.name}",
+            path=f"file://{f.name}",
             event_timestamp_column=event_timestamp_column,
             created_timestamp_column=created_timestamp_column,
             date_partition_column="",
             field_mapping=field_mapping or {"ts_1": "ts"},
         )
 
-    def get_prefixed_table_name(self, name: str, suffix: str) -> str:
-        return f"{name}.{suffix}"
+    def get_prefixed_table_name(self, suffix: str) -> str:
+        return f"{self.project_name}.{suffix}"
 
     def create_offline_store_config(self) -> FeastConfigBaseModel:
         return FileOfflineStoreConfig()
 
     def teardown(self):
-        self.f.close()
+        for f in self.files:
+            f.close()
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
index f731b60bb3..3802a49510 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
@@ -65,8 +65,8 @@ def create_data_sources(
     def create_offline_store_config(self) -> FeastConfigBaseModel:
         return self.offline_store_config
 
-    def get_prefixed_table_name(self, name: str, suffix: str) -> str:
-        return f"{name}_{suffix}"
+    def get_prefixed_table_name(self, suffix: str) -> str:
+        return f"{self.project_name}_{suffix}"
 
     def teardown(self):
         for table in self.tables:

From 8b7eab57e994390949820b575efaa28807d03c5b Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 18 Aug 2021 15:16:13 -0700
Subject: [PATCH 02/51] Remove the need for get_prefixed_table_name

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../feature_repos/test_repo_configuration.py  | 22 +++++++------------
 .../universal/data_source_creator.py          |  5 +++--
 .../universal/data_sources/bigquery.py        |  9 ++++++--
 .../universal/data_sources/file.py            | 10 +++++++--
 .../universal/data_sources/redshift.py        |  9 ++++++--
 5 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index f912f4dbdb..e18135520c 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -112,12 +112,9 @@ class Environment:
 
     def customer_feature_view(self) -> FeatureView:
         if self._customer_feature_view is None:
-            customer_table_id = self.data_source_creator.get_prefixed_table_name(
-                self.name, "customer_profile"
-            )
             ds = self.data_source_creator.create_data_sources(
-                customer_table_id,
                 self.customer_df,
+                suffix="customer_profile",
                 event_timestamp_column="event_timestamp",
                 created_timestamp_column="created",
             )
@@ -126,12 +123,9 @@ def customer_feature_view(self) -> FeatureView:
 
     def driver_stats_feature_view(self) -> FeatureView:
         if self._driver_stats_feature_view is None:
-            driver_table_id = self.data_source_creator.get_prefixed_table_name(
-                self.name, "driver_hourly"
-            )
             ds = self.data_source_creator.create_data_sources(
-                driver_table_id,
                 self.driver_df,
+                suffix="driver_hourly",
                 event_timestamp_column="event_timestamp",
                 created_timestamp_column="created",
             )
@@ -142,12 +136,9 @@ def driver_stats_feature_view(self) -> FeatureView:
 
     def orders_table(self) -> Optional[str]:
         if self._orders_table is None:
-            orders_table_id = self.data_source_creator.get_prefixed_table_name(
-                self.name, "orders"
-            )
             ds = self.data_source_creator.create_data_sources(
-                orders_table_id,
                 self.orders_df,
+                suffix="orders",
                 event_timestamp_column="event_timestamp",
                 created_timestamp_column="created",
             )
@@ -210,7 +201,6 @@ def construct_test_environment(
     :param test_repo_config: configuration
     :return: A feature store built using the supplied configuration.
     """
-    df = create_dataset()
 
     project = f"test_correctness_{str(uuid.uuid4()).replace('-', '')[:8]}"
 
@@ -221,9 +211,13 @@ def construct_test_environment(
     offline_creator: DataSourceCreator = importer.get_class_from_type(
         module_name, config_class_name, "DataSourceCreator"
     )(project)
+
+    # This needs to be abstracted away for test_e2e_universal which uses a different dataset.
+    df = create_dataset()
     ds = offline_creator.create_data_sources(
-        project, df, field_mapping={"ts_1": "ts", "id": "driver_id"}
+        df, destination=project, field_mapping={"ts_1": "ts", "id": "driver_id"}
     )
+
     offline_store = offline_creator.create_offline_store_config()
     online_store = test_repo_config.online_store
 
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
index 6efadaa77f..6c038f1046 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Dict
+from typing import Dict, Optional
 
 import pandas as pd
 
@@ -11,8 +11,9 @@ class DataSourceCreator(ABC):
     @abstractmethod
     def create_data_sources(
         self,
-        destination: str,
         df: pd.DataFrame,
+        destination: Optional[str] = None,
+        suffix: Optional[str] = None,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
index 070b87690f..288951c5e4 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
@@ -1,4 +1,4 @@
-from typing import Dict
+from typing import Dict, Optional
 
 import pandas as pd
 from google.cloud import bigquery
@@ -42,14 +42,19 @@ def create_offline_store_config(self):
 
     def create_data_sources(
         self,
-        destination: str,
         df: pd.DataFrame,
+        destination: Optional[str] = None,
+        suffix: Optional[str] = None,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
         **kwargs,
     ) -> DataSource:
 
+        assert destination or suffix
+        if not destination:
+            destination = self.get_prefixed_table_name(suffix)
+
         job_config = bigquery.LoadJobConfig()
         if self.gcp_project not in destination:
             destination = f"{self.gcp_project}.{self.project_name}.{destination}"
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
index 0c570e8a5f..f212a5cfcd 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
@@ -1,5 +1,5 @@
 import tempfile
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 
 import pandas as pd
 
@@ -22,12 +22,18 @@ def __init__(self, project_name: str):
 
     def create_data_sources(
         self,
-        destination: str,
         df: pd.DataFrame,
+        destination: Optional[str] = None,
+        suffix: Optional[str] = None,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
     ) -> DataSource:
+
+        assert destination or suffix
+        if not destination:
+            destination = self.get_prefixed_table_name(suffix)
+
         f = tempfile.NamedTemporaryFile(
             prefix=self.project_name, suffix=".parquet", delete=False
         )
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
index 3802a49510..291ef89b27 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
@@ -1,4 +1,4 @@
-from typing import Dict
+from typing import Dict, Optional
 
 import pandas as pd
 
@@ -33,13 +33,18 @@ def __init__(self, project_name: str):
 
     def create_data_sources(
         self,
-        destination: str,
         df: pd.DataFrame,
+        destination: Optional[str] = None,
+        suffix: Optional[str] = None,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
     ) -> DataSource:
 
+        assert destination or suffix
+        if not destination:
+            destination = self.get_prefixed_table_name(suffix)
+
         aws_utils.upload_df_to_redshift(
             self.client,
             self.offline_store_config.cluster_id,

From 0cc21ab1dfefccc834d0b0733370b9d3944c3053 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 18 Aug 2021 17:09:13 -0700
Subject: [PATCH 03/51] major refactor

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../feature_repos/test_repo_configuration.py  | 167 ++++++++++--------
 .../test_universal_historical_retrieval.py    |  20 ++-
 .../online_store/test_universal_online.py     |  10 +-
 3 files changed, 112 insertions(+), 85 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index e18135520c..ac07215653 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -1,11 +1,12 @@
 import tempfile
 import uuid
 from contextlib import contextmanager
-from dataclasses import dataclass, replace
+from dataclasses import dataclass, field, replace
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Union
 
+import pandas as pd
 import pytest
 
 from feast import FeatureStore, FeatureView, RepoConfig, driver_test_data, importer
@@ -71,9 +72,63 @@ def ds_creator_path(cls: str):
 ]
 
 
-OFFLINE_STORES: List[str] = []
-ONLINE_STORES: List[str] = []
-PROVIDERS: List[str] = []
+def construct_entities() -> Dict[str, List[Any]]:
+    return {"customer": list(range(1001, 1110)), "driver": list(range(5001, 5110))}
+
+
+def construct_datasets(
+    entities: Dict[str, List[Any]], start_time: datetime, end_time: datetime
+) -> Dict[str, pd.DataFrame]:
+    customer_df = driver_test_data.create_customer_daily_profile_df(
+        entities["customer"], start_time, end_time
+    )
+    driver_df = driver_test_data.create_driver_hourly_stats_df(
+        entities["driver"], start_time, end_time
+    )
+    orders_df = driver_test_data.create_orders_df(
+        customers=entities["customer"],
+        drivers=entities["driver"],
+        start_date=end_time - timedelta(days=365),
+        end_date=end_time + timedelta(days=365),
+        order_count=1000,
+    )
+
+    return {"customer": customer_df, "driver": driver_df, "orders": orders_df}
+
+
+def construct_data_sources(
+    datasets: Dict[str, pd.DataFrame], data_source_creator: DataSourceCreator
+) -> Dict[str, DataSource]:
+    customer_ds = data_source_creator.create_data_sources(
+        datasets["customer"],
+        suffix="customer_profile",
+        event_timestamp_column="event_timestamp",
+        created_timestamp_column="created",
+    )
+    driver_ds = data_source_creator.create_data_sources(
+        datasets["driver"],
+        suffix="driver_hourly",
+        event_timestamp_column="event_timestamp",
+        created_timestamp_column="created",
+    )
+    orders_ds = data_source_creator.create_data_sources(
+        datasets["orders"],
+        suffix="orders",
+        event_timestamp_column="event_timestamp",
+        created_timestamp_column="created",
+    )
+    return {"customer": customer_ds, "driver": driver_ds, "orders": orders_ds}
+
+
+def construct_feature_views(
+    data_sources: Dict[str, DataSource]
+) -> Dict[str, FeatureView]:
+    return {
+        "customer": create_customer_daily_profile_feature_view(
+            data_sources["customer"]
+        ),
+        "driver": create_driver_hourly_stats_feature_view(data_sources["driver"]),
+    }
 
 
 @dataclass
@@ -84,69 +139,44 @@ class Environment:
     data_source: DataSource
     data_source_creator: DataSourceCreator
 
-    end_date = datetime.now().replace(microsecond=0, second=0, minute=0)
-    start_date = end_date - timedelta(days=7)
-    before_start_date = end_date - timedelta(days=365)
-    after_end_date = end_date + timedelta(days=365)
-
-    customer_entities = list(range(1001, 1110))
-    customer_df = driver_test_data.create_customer_daily_profile_df(
-        customer_entities, start_date, end_date
+    entites_creator: Callable[[], Dict[str, List[Any]]] = field(
+        default_factory=construct_entities
     )
-    _customer_feature_view: Optional[FeatureView] = None
-
-    driver_entities = list(range(5001, 5110))
-    driver_df = driver_test_data.create_driver_hourly_stats_df(
-        driver_entities, start_date, end_date
+    datasets_creator: Callable[
+        [Dict[str, List[Any]], datetime, datetime], Dict[str, pd.DataFrame]
+    ] = field(default=construct_datasets)
+    datasources_creator: Callable[
+        [Dict[str, pd.DataFrame]], Dict[str, DataSource]
+    ] = field(default=construct_data_sources)
+    feature_views_creator: Callable[
+        [Dict[str, DataSource]], Dict[str, FeatureView]
+    ] = field(default=construct_feature_views)
+
+    entites: Dict[str, List[Any]] = field(default_factory=dict)
+    datasets: Dict[str, pd.DataFrame] = field(default_factory=dict)
+    datasources: Dict[str, DataSource] = field(default_factory=dict)
+    feature_views: List[FeatureView] = field(default_factory=list)
+
+    end_date: datetime = field(
+        default=datetime.now().replace(microsecond=0, second=0, minute=0)
     )
-    _driver_stats_feature_view: Optional[FeatureView] = None
+    start_date: datetime = end_date - timedelta(days=7)
 
-    orders_df = driver_test_data.create_orders_df(
-        customers=customer_entities,
-        drivers=driver_entities,
-        start_date=before_start_date,
-        end_date=after_end_date,
-        order_count=1000,
-    )
-    _orders_table: Optional[str] = None
-
-    def customer_feature_view(self) -> FeatureView:
-        if self._customer_feature_view is None:
-            ds = self.data_source_creator.create_data_sources(
-                self.customer_df,
-                suffix="customer_profile",
-                event_timestamp_column="event_timestamp",
-                created_timestamp_column="created",
-            )
-            self._customer_feature_view = create_customer_daily_profile_feature_view(ds)
-        return self._customer_feature_view
-
-    def driver_stats_feature_view(self) -> FeatureView:
-        if self._driver_stats_feature_view is None:
-            ds = self.data_source_creator.create_data_sources(
-                self.driver_df,
-                suffix="driver_hourly",
-                event_timestamp_column="event_timestamp",
-                created_timestamp_column="created",
-            )
-            self._driver_stats_feature_view = create_driver_hourly_stats_feature_view(
-                ds
-            )
-        return self._driver_stats_feature_view
-
-    def orders_table(self) -> Optional[str]:
-        if self._orders_table is None:
-            ds = self.data_source_creator.create_data_sources(
-                self.orders_df,
-                suffix="orders",
-                event_timestamp_column="event_timestamp",
-                created_timestamp_column="created",
-            )
-            if hasattr(ds, "table_ref"):
-                self._orders_table = ds.table_ref
-            elif hasattr(ds, "table"):
-                self._orders_table = ds.table
-        return self._orders_table
+    def __post_init__(self):
+        self.entites = self.entites_creator()
+        self.datasets = self.datasets_creator(
+            self.entites, self.start_date, self.end_date
+        )
+        self.datasources = self.datasources_creator(self.datasets)
+        self.feature_views = self.feature_views_creator(self.datasources)
+
+
+def table_name_from_data_source(ds: DataSource) -> Optional[str]:
+    if hasattr(ds, "table_ref"):
+        return ds.table_ref
+    elif hasattr(ds, "table"):
+        return ds.table
+    return None
 
 
 def vary_full_feature_names(configs: List[TestRepoConfig]) -> List[TestRepoConfig]:
@@ -244,12 +274,7 @@ def construct_test_environment(
         try:
             if create_and_apply:
                 entities.extend([driver(), customer()])
-                fvs.extend(
-                    [
-                        environment.driver_stats_feature_view(),
-                        environment.customer_feature_view(),
-                    ]
-                )
+                fvs.extend(environment.feature_views)
                 fs.apply(fvs + entities)
 
             if materialize:
diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
index 7379c27a62..6199ebacf8 100644
--- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
+++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
@@ -14,6 +14,7 @@
 from tests.integration.feature_repos.test_repo_configuration import (
     Environment,
     parametrize_offline_retrieval_test,
+    table_name_from_data_source,
 )
 
 np.random.seed(0)
@@ -139,20 +140,21 @@ def get_expected_training_df(
 def test_historical_features(environment: Environment):
     store = environment.feature_store
 
-    customer_df, customer_fv = (
-        environment.customer_df,
-        environment.customer_feature_view(),
+    customer_df, driver_df, orders_df = (
+        environment.datasets["customer"],
+        environment.datasets["driver"],
+        environment.datasets["orders"],
     )
-    driver_df, driver_fv = (
-        environment.driver_df,
-        environment.driver_stats_feature_view(),
+    customer_fv, driver_fv = (
+        environment.feature_views["customer"],
+        environment.feature_views["driver"],
     )
-    orders_df = environment.orders_df
     full_feature_names = environment.test_repo_config.full_feature_names
 
     entity_df_query = None
-    if environment.orders_table():
-        entity_df_query = f"SELECT * FROM {environment.orders_table()}"
+    if "orders" in environment.datasources:
+        orders_table = table_name_from_data_source(environment.datasources["orders"])
+        entity_df_query = f"SELECT * FROM {orders_table}"
 
     event_timestamp = (
         DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py
index a4337a305c..1e3483d38e 100644
--- a/sdk/python/tests/integration/online_store/test_universal_online.py
+++ b/sdk/python/tests/integration/online_store/test_universal_online.py
@@ -14,14 +14,14 @@ def test_online_retrieval(environment: Environment):
     fs = environment.feature_store
     full_feature_names = environment.test_repo_config.full_feature_names
 
-    sample_drivers = random.sample(environment.driver_entities, 10)
-    drivers_df = environment.driver_df[
-        environment.driver_df["driver_id"].isin(sample_drivers)
+    sample_drivers = random.sample(environment.entites["driver"], 10)
+    drivers_df = environment.datasets["driver"][
+        environment.datasets["driver"]["driver_id"].isin(sample_drivers)
     ]
 
     sample_customers = random.sample(environment.customer_entities, 10)
-    customers_df = environment.customer_df[
-        environment.customer_df["customer_id"].isin(sample_customers)
+    customers_df = environment.datasets["customer"][
+        environment.datasets["customer"]["customer_id"].isin(sample_customers)
     ]
 
     entity_rows = [

From 6ddd5b0482e44d4e79a7c5c9bc59c6fae8ba761b Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 18 Aug 2021 17:13:51 -0700
Subject: [PATCH 04/51] move start time

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../integration/feature_repos/test_repo_configuration.py     | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index ac07215653..addd8c981e 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -155,14 +155,15 @@ class Environment:
     entites: Dict[str, List[Any]] = field(default_factory=dict)
     datasets: Dict[str, pd.DataFrame] = field(default_factory=dict)
     datasources: Dict[str, DataSource] = field(default_factory=dict)
-    feature_views: List[FeatureView] = field(default_factory=list)
+    feature_views: Dict[str, FeatureView] = field(default_factory=list)
 
     end_date: datetime = field(
         default=datetime.now().replace(microsecond=0, second=0, minute=0)
     )
-    start_date: datetime = end_date - timedelta(days=7)
 
     def __post_init__(self):
+        self.start_date: datetime = self.end_date - timedelta(days=7)
+
         self.entites = self.entites_creator()
         self.datasets = self.datasets_creator(
             self.entites, self.start_date, self.end_date

From a70074552377c237b840d228ff1c20dc48f9814a Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 18 Aug 2021 17:21:34 -0700
Subject: [PATCH 05/51] Remove one dimension of variation to be added in later

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../tests/integration/feature_repos/test_repo_configuration.py  | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index addd8c981e..15e253f6eb 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -325,7 +325,6 @@ def parametrize_offline_retrieval_test(offline_retrieval_test):
 
     configs = vary_providers_for_offline_stores(FULL_REPO_CONFIGS)
     configs = vary_full_feature_names(configs)
-    configs = vary_infer_event_timestamp_col(configs)
 
     @pytest.mark.integration
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
@@ -350,7 +349,6 @@ def parametrize_online_test(online_test):
 
     configs = vary_providers_for_offline_stores(FULL_REPO_CONFIGS)
     configs = vary_full_feature_names(configs)
-    configs = vary_infer_event_timestamp_col(configs)
 
     @pytest.mark.integration
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))

From c25b6cbaf2cf6de6c32b7fcd01d4b8b70ed1770e Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 18 Aug 2021 17:32:28 -0700
Subject: [PATCH 06/51] Fix default

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../tests/integration/feature_repos/test_repo_configuration.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 15e253f6eb..5f63ccd94b 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -140,7 +140,7 @@ class Environment:
     data_source_creator: DataSourceCreator
 
     entites_creator: Callable[[], Dict[str, List[Any]]] = field(
-        default_factory=construct_entities
+        default=construct_entities
     )
     datasets_creator: Callable[
         [Dict[str, List[Any]], datetime, datetime], Dict[str, pd.DataFrame]

From a3e44739c29b4f69dd6080c672295bfb39ca1d79 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 18 Aug 2021 21:31:50 -0700
Subject: [PATCH 07/51] Fixups

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../feature_repos/test_repo_configuration.py       | 14 ++++++++------
 .../online_store/test_universal_online.py          |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 5f63ccd94b..caa180e39f 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -139,20 +139,20 @@ class Environment:
     data_source: DataSource
     data_source_creator: DataSourceCreator
 
-    entites_creator: Callable[[], Dict[str, List[Any]]] = field(
+    entities_creator: Callable[[], Dict[str, List[Any]]] = field(
         default=construct_entities
     )
     datasets_creator: Callable[
         [Dict[str, List[Any]], datetime, datetime], Dict[str, pd.DataFrame]
     ] = field(default=construct_datasets)
     datasources_creator: Callable[
-        [Dict[str, pd.DataFrame]], Dict[str, DataSource]
+        [Dict[str, pd.DataFrame], DataSourceCreator], Dict[str, DataSource]
     ] = field(default=construct_data_sources)
     feature_views_creator: Callable[
         [Dict[str, DataSource]], Dict[str, FeatureView]
     ] = field(default=construct_feature_views)
 
-    entites: Dict[str, List[Any]] = field(default_factory=dict)
+    entities: Dict[str, List[Any]] = field(default_factory=dict)
     datasets: Dict[str, pd.DataFrame] = field(default_factory=dict)
     datasources: Dict[str, DataSource] = field(default_factory=dict)
     feature_views: Dict[str, FeatureView] = field(default_factory=list)
@@ -164,11 +164,13 @@ class Environment:
     def __post_init__(self):
         self.start_date: datetime = self.end_date - timedelta(days=7)
 
-        self.entites = self.entites_creator()
+        self.entities = self.entities_creator()
         self.datasets = self.datasets_creator(
-            self.entites, self.start_date, self.end_date
+            self.entities, self.start_date, self.end_date
+        )
+        self.datasources = self.datasources_creator(
+            self.datasets, self.data_source_creator
         )
-        self.datasources = self.datasources_creator(self.datasets)
         self.feature_views = self.feature_views_creator(self.datasources)
 
 
diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py
index 1e3483d38e..52973207f6 100644
--- a/sdk/python/tests/integration/online_store/test_universal_online.py
+++ b/sdk/python/tests/integration/online_store/test_universal_online.py
@@ -14,7 +14,7 @@ def test_online_retrieval(environment: Environment):
     fs = environment.feature_store
     full_feature_names = environment.test_repo_config.full_feature_names
 
-    sample_drivers = random.sample(environment.entites["driver"], 10)
+    sample_drivers = random.sample(environment.entities["driver"], 10)
     drivers_df = environment.datasets["driver"][
         environment.datasets["driver"]["driver_id"].isin(sample_drivers)
     ]

From 3f632fd670379bae3fb382c74b1af5d8135c4d10 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 18 Aug 2021 22:42:29 -0700
Subject: [PATCH 08/51] Fixups

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../tests/integration/feature_repos/test_repo_configuration.py  | 2 +-
 .../tests/integration/online_store/test_universal_online.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index caa180e39f..6215986178 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -277,7 +277,7 @@ def construct_test_environment(
         try:
             if create_and_apply:
                 entities.extend([driver(), customer()])
-                fvs.extend(environment.feature_views)
+                fvs.extend(environment.feature_views.values())
                 fs.apply(fvs + entities)
 
             if materialize:
diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py
index 52973207f6..46b84130b8 100644
--- a/sdk/python/tests/integration/online_store/test_universal_online.py
+++ b/sdk/python/tests/integration/online_store/test_universal_online.py
@@ -19,7 +19,7 @@ def test_online_retrieval(environment: Environment):
         environment.datasets["driver"]["driver_id"].isin(sample_drivers)
     ]
 
-    sample_customers = random.sample(environment.customer_entities, 10)
+    sample_customers = random.sample(environment.entities["customer"], 10)
     customers_df = environment.datasets["customer"][
         environment.datasets["customer"]["customer_id"].isin(sample_customers)
     ]

From 9aeea86a07a81ee7e426e76b3b4e9d98dbfe2ea1 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 19 Aug 2021 07:31:26 -0700
Subject: [PATCH 09/51] Fix up tests

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../offline_store/test_universal_historical_retrieval.py      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
index 6199ebacf8..b00818a078 100644
--- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
+++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
@@ -152,8 +152,8 @@ def test_historical_features(environment: Environment):
     full_feature_names = environment.test_repo_config.full_feature_names
 
     entity_df_query = None
-    if "orders" in environment.datasources:
-        orders_table = table_name_from_data_source(environment.datasources["orders"])
+    orders_table = table_name_from_data_source(environment.datasources["orders"])
+    if orders_table:
         entity_df_query = f"SELECT * FROM {orders_table}"
 
     event_timestamp = (

From 84cbefde065922b1d8fbb858bf187bfc9f370015 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 19 Aug 2021 10:02:26 -0700
Subject: [PATCH 10/51] Add retries to execute_redshift_statement_async

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/feast/infra/utils/aws_utils.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py
index aea460cfb8..81d42dbf38 100644
--- a/sdk/python/feast/infra/utils/aws_utils.py
+++ b/sdk/python/feast/infra/utils/aws_utils.py
@@ -7,7 +7,12 @@
 import pandas as pd
 import pyarrow as pa
 import pyarrow.parquet as pq
-from tenacity import retry, retry_if_exception_type, wait_exponential
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_exponential,
+)
 
 from feast.errors import RedshiftCredentialsError, RedshiftQueryError
 from feast.type_map import pa_to_redshift_value_type
@@ -15,7 +20,7 @@
 try:
     import boto3
     from botocore.config import Config
-    from botocore.exceptions import ClientError
+    from botocore.exceptions import ClientError, ConnectionClosedError
 except ImportError as e:
     from feast.errors import FeastExtrasDependencyImportError
 
@@ -50,6 +55,11 @@ def get_bucket_and_key(s3_path: str) -> Tuple[str, str]:
     return bucket, key
 
 
+@retry(
+    wait=wait_exponential(multiplier=0.1, max=30),
+    retry=retry_if_exception_type(ConnectionClosedError),
+    stop=stop_after_attempt(3),
+)
 def execute_redshift_statement_async(
     redshift_data_client, cluster_id: str, database: str, user: str, query: str
 ) -> dict:

From fe180eb6dbf7f19c88efc0c4db877939f420c362 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 19 Aug 2021 10:26:23 -0700
Subject: [PATCH 11/51] Add retries to execute_redshift_statement_async

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/feast/infra/utils/aws_utils.py     |  4 +--
 .../feature_repos/test_repo_configuration.py  | 26 ++++++++++---------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py
index 81d42dbf38..2567074525 100644
--- a/sdk/python/feast/infra/utils/aws_utils.py
+++ b/sdk/python/feast/infra/utils/aws_utils.py
@@ -56,7 +56,7 @@ def get_bucket_and_key(s3_path: str) -> Tuple[str, str]:
 
 
 @retry(
-    wait=wait_exponential(multiplier=0.1, max=30),
+    wait=wait_exponential(multiplier=1, max=30),
     retry=retry_if_exception_type(ConnectionClosedError),
     stop=stop_after_attempt(3),
 )
@@ -92,7 +92,7 @@ class RedshiftStatementNotFinishedError(Exception):
 
 
 @retry(
-    wait=wait_exponential(multiplier=0.1, max=30),
+    wait=wait_exponential(multiplier=1, max=30),
     retry=retry_if_exception_type(RedshiftStatementNotFinishedError),
 )
 def wait_for_redshift_statement(redshift_data_client, statement: dict) -> None:
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 6215986178..840d212cee 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -72,11 +72,11 @@ def ds_creator_path(cls: str):
 ]
 
 
-def construct_entities() -> Dict[str, List[Any]]:
+def construct_universal_entities() -> Dict[str, List[Any]]:
     return {"customer": list(range(1001, 1110)), "driver": list(range(5001, 5110))}
 
 
-def construct_datasets(
+def construct_universal_datasets(
     entities: Dict[str, List[Any]], start_time: datetime, end_time: datetime
 ) -> Dict[str, pd.DataFrame]:
     customer_df = driver_test_data.create_customer_daily_profile_df(
@@ -96,7 +96,7 @@ def construct_datasets(
     return {"customer": customer_df, "driver": driver_df, "orders": orders_df}
 
 
-def construct_data_sources(
+def construct_universal_data_sources(
     datasets: Dict[str, pd.DataFrame], data_source_creator: DataSourceCreator
 ) -> Dict[str, DataSource]:
     customer_ds = data_source_creator.create_data_sources(
@@ -120,7 +120,7 @@ def construct_data_sources(
     return {"customer": customer_ds, "driver": driver_ds, "orders": orders_ds}
 
 
-def construct_feature_views(
+def construct_universal_feature_views(
     data_sources: Dict[str, DataSource]
 ) -> Dict[str, FeatureView]:
     return {
@@ -140,17 +140,17 @@ class Environment:
     data_source_creator: DataSourceCreator
 
     entities_creator: Callable[[], Dict[str, List[Any]]] = field(
-        default=construct_entities
+        default=construct_universal_entities
     )
     datasets_creator: Callable[
         [Dict[str, List[Any]], datetime, datetime], Dict[str, pd.DataFrame]
-    ] = field(default=construct_datasets)
+    ] = field(default=construct_universal_datasets)
     datasources_creator: Callable[
         [Dict[str, pd.DataFrame], DataSourceCreator], Dict[str, DataSource]
-    ] = field(default=construct_data_sources)
+    ] = field(default=construct_universal_data_sources)
     feature_views_creator: Callable[
         [Dict[str, DataSource]], Dict[str, FeatureView]
-    ] = field(default=construct_feature_views)
+    ] = field(default=construct_universal_feature_views)
 
     entities: Dict[str, List[Any]] = field(default_factory=dict)
     datasets: Dict[str, pd.DataFrame] = field(default_factory=dict)
@@ -219,7 +219,7 @@ def vary_providers_for_offline_stores(
 
 
 @contextmanager
-def construct_test_environment(
+def construct_universal_test_environment(
     test_repo_config: TestRepoConfig,
     create_and_apply: bool = False,
     materialize: bool = False,
@@ -305,7 +305,7 @@ def parametrize_e2e_test(e2e_test):
     @pytest.mark.integration
     @pytest.mark.parametrize("config", FULL_REPO_CONFIGS, ids=lambda v: str(v))
     def inner_test(config):
-        with construct_test_environment(config) as environment:
+        with construct_universal_test_environment(config) as environment:
             e2e_test(environment)
 
     return inner_test
@@ -331,7 +331,9 @@ def parametrize_offline_retrieval_test(offline_retrieval_test):
     @pytest.mark.integration
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
     def inner_test(config):
-        with construct_test_environment(config, create_and_apply=True) as environment:
+        with construct_universal_test_environment(
+            config, create_and_apply=True
+        ) as environment:
             offline_retrieval_test(environment)
 
     return inner_test
@@ -355,7 +357,7 @@ def parametrize_online_test(online_test):
     @pytest.mark.integration
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
     def inner_test(config):
-        with construct_test_environment(
+        with construct_universal_test_environment(
             config, create_and_apply=True, materialize=True
         ) as environment:
             online_test(environment)

From 0cd08d2d8a6ddb7707a91fa57613b72efc78b116 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 19 Aug 2021 10:57:29 -0700
Subject: [PATCH 12/51] refactoooor

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../feature_repos/test_repo_configuration.py  | 71 ++++++++++++++-----
 1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 840d212cee..a345d2708a 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -131,6 +131,55 @@ def construct_universal_feature_views(
     }
 
 
+def setup_entities(
+    environment: "Environment", entities_override: Optional[Dict[str, List[Any]]] = None
+) -> "Environment":
+    environment.entities = (
+        entities_override if entities_override else construct_universal_entities()
+    )
+    return environment
+
+
+def setup_datasets(
+    environment: "Environment",
+    datasets_override: Optional[Dict[str, pd.DataFrame]] = None,
+) -> "Environment":
+    environment.datasets = (
+        datasets_override
+        if datasets_override
+        else construct_universal_datasets(
+            environment.entities, environment.start_date, environment.end_date
+        )
+    )
+    return environment
+
+
+def setup_data_sources(
+    environment: "Environment",
+    data_sources_override: Optional[Dict[str, DataSource]] = None,
+) -> "Environment":
+    environment.datasources = (
+        data_sources_override
+        if data_sources_override
+        else construct_universal_data_sources(
+            environment.datasets, environment.data_source_creator
+        )
+    )
+    return environment
+
+
+def setup_feature_views(
+    environment: "Environment",
+    feature_views_override: Optional[Dict[str, FeatureView]] = None,
+) -> "Environment":
+    environment.feature_views = (
+        feature_views_override
+        if feature_views_override
+        else construct_universal_feature_views(environment.datasources)
+    )
+    return environment
+
+
 @dataclass
 class Environment:
     name: str
@@ -142,15 +191,6 @@ class Environment:
     entities_creator: Callable[[], Dict[str, List[Any]]] = field(
         default=construct_universal_entities
     )
-    datasets_creator: Callable[
-        [Dict[str, List[Any]], datetime, datetime], Dict[str, pd.DataFrame]
-    ] = field(default=construct_universal_datasets)
-    datasources_creator: Callable[
-        [Dict[str, pd.DataFrame], DataSourceCreator], Dict[str, DataSource]
-    ] = field(default=construct_universal_data_sources)
-    feature_views_creator: Callable[
-        [Dict[str, DataSource]], Dict[str, FeatureView]
-    ] = field(default=construct_universal_feature_views)
 
     entities: Dict[str, List[Any]] = field(default_factory=dict)
     datasets: Dict[str, pd.DataFrame] = field(default_factory=dict)
@@ -164,15 +204,6 @@ class Environment:
     def __post_init__(self):
         self.start_date: datetime = self.end_date - timedelta(days=7)
 
-        self.entities = self.entities_creator()
-        self.datasets = self.datasets_creator(
-            self.entities, self.start_date, self.end_date
-        )
-        self.datasources = self.datasources_creator(
-            self.datasets, self.data_source_creator
-        )
-        self.feature_views = self.feature_views_creator(self.datasources)
-
 
 def table_name_from_data_source(ds: DataSource) -> Optional[str]:
     if hasattr(ds, "table_ref"):
@@ -271,6 +302,10 @@ def construct_universal_test_environment(
             data_source=ds,
             data_source_creator=offline_creator,
         )
+        environment = setup_entities(environment)
+        environment = setup_datasets(environment)
+        environment = setup_data_sources(environment)
+        environment = setup_feature_views(environment)
 
         fvs = []
         entities = []

From 4701183121e148c7f3a8dffbaa8babf58c094f28 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 19 Aug 2021 10:59:34 -0700
Subject: [PATCH 13/51] remove retries

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/feast/infra/utils/aws_utils.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py
index 2567074525..84fa611c15 100644
--- a/sdk/python/feast/infra/utils/aws_utils.py
+++ b/sdk/python/feast/infra/utils/aws_utils.py
@@ -7,12 +7,7 @@
 import pandas as pd
 import pyarrow as pa
 import pyarrow.parquet as pq
-from tenacity import (
-    retry,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_exponential,
-)
+from tenacity import retry, retry_if_exception_type, wait_exponential
 
 from feast.errors import RedshiftCredentialsError, RedshiftQueryError
 from feast.type_map import pa_to_redshift_value_type
@@ -20,7 +15,7 @@
 try:
     import boto3
     from botocore.config import Config
-    from botocore.exceptions import ClientError, ConnectionClosedError
+    from botocore.exceptions import ClientError
 except ImportError as e:
     from feast.errors import FeastExtrasDependencyImportError
 
@@ -55,11 +50,6 @@ def get_bucket_and_key(s3_path: str) -> Tuple[str, str]:
     return bucket, key
 
 
-@retry(
-    wait=wait_exponential(multiplier=1, max=30),
-    retry=retry_if_exception_type(ConnectionClosedError),
-    stop=stop_after_attempt(3),
-)
 def execute_redshift_statement_async(
     redshift_data_client, cluster_id: str, database: str, user: str, query: str
 ) -> dict:

From 4cde2847549b618bb961d2cc1845313e1a3706e9 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Fri, 20 Aug 2021 12:51:42 -0700
Subject: [PATCH 14/51] Remove provider variation since they don't really play
 a big role

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../integration/feature_repos/test_repo_configuration.py    | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index a345d2708a..cda1f62250 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -360,8 +360,7 @@ def parametrize_offline_retrieval_test(offline_retrieval_test):
     The decorator takes care of tearing down the feature store, as well as the sample data.
     """
 
-    configs = vary_providers_for_offline_stores(FULL_REPO_CONFIGS)
-    configs = vary_full_feature_names(configs)
+    configs = vary_full_feature_names(FULL_REPO_CONFIGS)
 
     @pytest.mark.integration
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
@@ -386,8 +385,7 @@ def parametrize_online_test(online_test):
     The decorator takes care of tearing down the feature store, as well as the sample data.
     """
 
-    configs = vary_providers_for_offline_stores(FULL_REPO_CONFIGS)
-    configs = vary_full_feature_names(configs)
+    configs = vary_full_feature_names(FULL_REPO_CONFIGS)
 
     @pytest.mark.integration
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))

From 974bb0bae8a77137b1742dff0dddd56f46856251 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Mon, 23 Aug 2021 09:50:59 -0700
Subject: [PATCH 15/51] Session scoped cache for test datasets and skipping
 older tests whose functionality is present in other universal tests

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py                  | 22 +++++++++
 .../feature_repos/test_repo_configuration.py  | 45 ++++++++++++++-----
 .../test_offline_online_store_consistency.py  |  3 ++
 .../test_historical_retrieval.py              |  6 +++
 4 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index 0c94f4d57a..ac5598006b 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -87,3 +87,25 @@ def simple_dataset_2() -> pd.DataFrame:
         ],
     }
     return pd.DataFrame.from_dict(data)
+
+
+class DataSourceCache:
+    cache = {}
+
+    def get(self, test_repo_config):
+        return self.cache.get(test_repo_config.offline_store_creator, None)
+
+    def put(self, test_repo_config,
+            entites,
+            datasets,
+            data_sources,
+            data_source_creator):
+        self.cache[test_repo_config.offline_store_creator] = (entites, datasets, data_sources, data_source_creator)
+
+
+@pytest.fixture(scope="session", autouse=True)
+def data_source_cache():
+    dsc = DataSourceCache()
+    yield dsc
+    for _, v in dsc.cache.items():
+        v[3].teardown()
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index cda1f62250..6cc4a88815 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -4,7 +4,7 @@
 from dataclasses import dataclass, field, replace
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Union, Tuple
 
 import pandas as pd
 import pytest
@@ -254,6 +254,7 @@ def construct_universal_test_environment(
     test_repo_config: TestRepoConfig,
     create_and_apply: bool = False,
     materialize: bool = False,
+    data_source_cache = None
 ) -> Environment:
     """
     This method should take in the parameters from the test repo config and created a feature repo, apply it,
@@ -302,9 +303,27 @@ def construct_universal_test_environment(
             data_source=ds,
             data_source_creator=offline_creator,
         )
-        environment = setup_entities(environment)
-        environment = setup_datasets(environment)
-        environment = setup_data_sources(environment)
+        print(f"Data Source Cache: {data_source_cache}")
+        if data_source_cache:
+            fixtures = data_source_cache.get(test_repo_config)
+            print(f"Fixtures: {fixtures}")
+            if fixtures:
+                environment = setup_entities(environment, entities_override=fixtures[0])
+                environment = setup_datasets(environment, datasets_override=fixtures[1])
+                environment = setup_data_sources(environment, data_sources_override=fixtures[2])
+            else:
+                environment = setup_entities(environment)
+                environment = setup_datasets(environment)
+                environment = setup_data_sources(environment)
+                data_source_cache.put(test_repo_config, environment.entities,
+                                      environment.datasets,
+                                      environment.datasources,
+                                      offline_creator)
+        else:
+            environment = setup_entities(environment)
+            environment = setup_datasets(environment)
+            environment = setup_data_sources(environment)
+
         environment = setup_feature_views(environment)
 
         fvs = []
@@ -320,7 +339,8 @@ def construct_universal_test_environment(
 
             yield environment
         finally:
-            offline_creator.teardown()
+            if not data_source_cache:
+                offline_creator.teardown()
             fs.teardown()
 
 
@@ -339,8 +359,8 @@ def parametrize_e2e_test(e2e_test):
 
     @pytest.mark.integration
     @pytest.mark.parametrize("config", FULL_REPO_CONFIGS, ids=lambda v: str(v))
-    def inner_test(config):
-        with construct_universal_test_environment(config) as environment:
+    def inner_test(config, data_source_cache):
+        with construct_universal_test_environment(config, data_source_cache=data_source_cache) as environment:
             e2e_test(environment)
 
     return inner_test
@@ -364,9 +384,10 @@ def parametrize_offline_retrieval_test(offline_retrieval_test):
 
     @pytest.mark.integration
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
-    def inner_test(config):
+    def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
-            config, create_and_apply=True
+            config, create_and_apply=True,
+            data_source_cache=data_source_cache
         ) as environment:
             offline_retrieval_test(environment)
 
@@ -389,10 +410,10 @@ def parametrize_online_test(online_test):
 
     @pytest.mark.integration
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
-    def inner_test(config):
+    def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
-            config, create_and_apply=True, materialize=True
+            config, create_and_apply=True, materialize=True,
+            data_source_cache=data_source_cache
         ) as environment:
             online_test(environment)
-
     return inner_test
diff --git a/sdk/python/tests/integration/materialization/test_offline_online_store_consistency.py b/sdk/python/tests/integration/materialization/test_offline_online_store_consistency.py
index e97f7d0d84..ecb4c82bd3 100644
--- a/sdk/python/tests/integration/materialization/test_offline_online_store_consistency.py
+++ b/sdk/python/tests/integration/materialization/test_offline_online_store_consistency.py
@@ -382,6 +382,9 @@ def run_offline_online_store_consistency_test(
     )
 
 
+@pytest.mark.skip(
+    reason="This is the most expensive test and is subsumed by the universal tests."
+)
 @pytest.mark.integration
 @pytest.mark.parametrize(
     "bq_source_type", ["query", "table"],
diff --git a/sdk/python/tests/integration/offline_store/test_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_historical_retrieval.py
index 5d735fcd9d..26b6b335dc 100644
--- a/sdk/python/tests/integration/offline_store/test_historical_retrieval.py
+++ b/sdk/python/tests/integration/offline_store/test_historical_retrieval.py
@@ -394,6 +394,9 @@ def test_historical_features_from_parquet_sources(
         store.teardown()
 
 
+@pytest.mark.skip(
+    reason="This is the most expensive test and is subsumed by the universal tests."
+)
 @pytest.mark.integration
 @pytest.mark.parametrize(
     "provider_type", ["local", "gcp", "gcp_custom_offline_config"],
@@ -655,6 +658,9 @@ def test_historical_features_from_bigquery_sources(
             store.teardown()
 
 
+@pytest.mark.skip(
+    reason="This is the most expensive test and is subsumed by the universal tests."
+)
 @pytest.mark.integration
 @pytest.mark.parametrize(
     "provider_type", ["local", "aws"],

From dbbd6fb708a4dd9a3cc8d6dccf4fd64631ca5f13 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Mon, 23 Aug 2021 09:51:16 -0700
Subject: [PATCH 16/51] make format

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py                  |  9 +++--
 .../feature_repos/test_repo_configuration.py  | 33 ++++++++++++-------
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index ac5598006b..b8ec789019 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -95,12 +95,15 @@ class DataSourceCache:
     def get(self, test_repo_config):
         return self.cache.get(test_repo_config.offline_store_creator, None)
 
-    def put(self, test_repo_config,
+    def put(
+        self, test_repo_config, entites, datasets, data_sources, data_source_creator
+    ):
+        self.cache[test_repo_config.offline_store_creator] = (
             entites,
             datasets,
             data_sources,
-            data_source_creator):
-        self.cache[test_repo_config.offline_store_creator] = (entites, datasets, data_sources, data_source_creator)
+            data_source_creator,
+        )
 
 
 @pytest.fixture(scope="session", autouse=True)
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 6cc4a88815..b91c0723a7 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -4,7 +4,7 @@
 from dataclasses import dataclass, field, replace
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Union, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import pandas as pd
 import pytest
@@ -254,7 +254,7 @@ def construct_universal_test_environment(
     test_repo_config: TestRepoConfig,
     create_and_apply: bool = False,
     materialize: bool = False,
-    data_source_cache = None
+    data_source_cache=None,
 ) -> Environment:
     """
     This method should take in the parameters from the test repo config and created a feature repo, apply it,
@@ -310,15 +310,20 @@ def construct_universal_test_environment(
             if fixtures:
                 environment = setup_entities(environment, entities_override=fixtures[0])
                 environment = setup_datasets(environment, datasets_override=fixtures[1])
-                environment = setup_data_sources(environment, data_sources_override=fixtures[2])
+                environment = setup_data_sources(
+                    environment, data_sources_override=fixtures[2]
+                )
             else:
                 environment = setup_entities(environment)
                 environment = setup_datasets(environment)
                 environment = setup_data_sources(environment)
-                data_source_cache.put(test_repo_config, environment.entities,
-                                      environment.datasets,
-                                      environment.datasources,
-                                      offline_creator)
+                data_source_cache.put(
+                    test_repo_config,
+                    environment.entities,
+                    environment.datasets,
+                    environment.datasources,
+                    offline_creator,
+                )
         else:
             environment = setup_entities(environment)
             environment = setup_datasets(environment)
@@ -360,7 +365,9 @@ def parametrize_e2e_test(e2e_test):
     @pytest.mark.integration
     @pytest.mark.parametrize("config", FULL_REPO_CONFIGS, ids=lambda v: str(v))
     def inner_test(config, data_source_cache):
-        with construct_universal_test_environment(config, data_source_cache=data_source_cache) as environment:
+        with construct_universal_test_environment(
+            config, data_source_cache=data_source_cache
+        ) as environment:
             e2e_test(environment)
 
     return inner_test
@@ -386,8 +393,7 @@ def parametrize_offline_retrieval_test(offline_retrieval_test):
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
     def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
-            config, create_and_apply=True,
-            data_source_cache=data_source_cache
+            config, create_and_apply=True, data_source_cache=data_source_cache
         ) as environment:
             offline_retrieval_test(environment)
 
@@ -412,8 +418,11 @@ def parametrize_online_test(online_test):
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
     def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
-            config, create_and_apply=True, materialize=True,
-            data_source_cache=data_source_cache
+            config,
+            create_and_apply=True,
+            materialize=True,
+            data_source_cache=data_source_cache,
         ) as environment:
             online_test(environment)
+
     return inner_test

From 473b630297f9bc8715d306d0a19bef7cbd4b91a7 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Mon, 23 Aug 2021 09:56:18 -0700
Subject: [PATCH 17/51] make format

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index b8ec789019..7147e26dee 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -14,6 +14,7 @@
 import multiprocessing
 from datetime import datetime, timedelta
 from sys import platform
+from typing import Any, Dict
 
 import pandas as pd
 import pytest
@@ -90,7 +91,7 @@ def simple_dataset_2() -> pd.DataFrame:
 
 
 class DataSourceCache:
-    cache = {}
+    cache: Dict[str, Any] = {}
 
     def get(self, test_repo_config):
         return self.cache.get(test_repo_config.offline_store_creator, None)

From 95982e231cbf17b3a4475f8c4a7a16b15c48402d Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Mon, 23 Aug 2021 09:57:22 -0700
Subject: [PATCH 18/51] remove import

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../tests/integration/feature_repos/test_repo_configuration.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index b91c0723a7..f59160a967 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -4,7 +4,7 @@
 from dataclasses import dataclass, field, replace
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Union
 
 import pandas as pd
 import pytest

From df9596b51450d7eb38d0f6b5845664ed3aa03708 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 25 Aug 2021 13:23:20 -0700
Subject: [PATCH 19/51] fix merge

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../integration/offline_store/test_s3_custom_endpoint.py      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py b/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
index 207ebd9732..e5bff828b3 100644
--- a/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
+++ b/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
@@ -2,7 +2,7 @@
 
 from tests.integration.feature_repos.test_repo_configuration import (
     TestRepoConfig,
-    construct_test_environment,
+    construct_universal_test_environment,
 )
 
 # TODO: Allow integration tests to run using different credentials.
@@ -27,7 +27,7 @@ def test_registration_and_retrieval_from_custom_s3_endpoint():
     os.environ["AWS_ACCESS_KEY_ID"] = "AKIAIOSFODNN7EXAMPLE"
     os.environ["AWS_SECRET_ACCESS_KEY"] = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
 
-    with construct_test_environment(
+    with construct_universal_test_environment(
         config, create_and_apply=True, materialize=True
     ) as environment:
         fs = environment.feature_store

From fc46edb5bdd6dfcbcd9f4bff63a4aae2d346fb58 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 25 Aug 2021 14:59:42 -0700
Subject: [PATCH 20/51] Use an enum for the stopping procedure instead of the
 bools

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../feature_repos/test_repo_configuration.py  | 37 ++++++++++---------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 189a998191..62b3e67750 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -3,8 +3,9 @@
 from contextlib import contextmanager
 from dataclasses import dataclass, field, replace
 from datetime import datetime, timedelta
+from enum import Enum
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 
 import pandas as pd
 import pytest
@@ -188,10 +189,6 @@ class Environment:
     data_source: DataSource
     data_source_creator: DataSourceCreator
 
-    entities_creator: Callable[[], Dict[str, List[Any]]] = field(
-        default=construct_universal_entities
-    )
-
     entities: Dict[str, List[Any]] = field(default_factory=dict)
     datasets: Dict[str, pd.DataFrame] = field(default_factory=dict)
     datasources: Dict[str, DataSource] = field(default_factory=dict)
@@ -249,12 +246,18 @@ def vary_providers_for_offline_stores(
     return new_configs
 
 
+class EnvironmentSetupSteps(Enum):
+    INIT = 0
+    CREATE_AND_APPLY_OBJECTS = 1
+    MATERIALIZE = 2
+
+
+DEFAULT_STEP = EnvironmentSetupSteps.INIT
+
+
 @contextmanager
 def construct_universal_test_environment(
-    test_repo_config: TestRepoConfig,
-    create_and_apply: bool = False,
-    materialize: bool = False,
-    data_source_cache=None,
+    test_repo_config: TestRepoConfig, stop_at_step=DEFAULT_STEP, data_source_cache=None,
 ) -> Environment:
     """
     This method should take in the parameters from the test repo config and created a feature repo, apply it,
@@ -303,10 +306,8 @@ def construct_universal_test_environment(
             data_source=ds,
             data_source_creator=offline_creator,
         )
-        print(f"Data Source Cache: {data_source_cache}")
         if data_source_cache:
             fixtures = data_source_cache.get(test_repo_config)
-            print(f"Fixtures: {fixtures}")
             if fixtures:
                 environment = setup_entities(environment, entities_override=fixtures[0])
                 environment = setup_datasets(environment, datasets_override=fixtures[1])
@@ -334,12 +335,13 @@ def construct_universal_test_environment(
         fvs = []
         entities = []
         try:
-            if create_and_apply:
+            if stop_at_step <= EnvironmentSetupSteps.INIT:
+                pass
+            if stop_at_step >= EnvironmentSetupSteps.CREATE_AND_APPLY_OBJECTS:
                 entities.extend([driver(), customer()])
                 fvs.extend(environment.feature_views.values())
                 fs.apply(fvs + entities)
-
-            if materialize:
+            if stop_at_step >= EnvironmentSetupSteps.MATERIALIZE:
                 fs.materialize(environment.start_date, environment.end_date)
 
             yield environment
@@ -393,7 +395,9 @@ def parametrize_offline_retrieval_test(offline_retrieval_test):
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
     def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
-            config, create_and_apply=True, data_source_cache=data_source_cache
+            config,
+            stop_at_step=EnvironmentSetupSteps.CREATE_AND_APPLY_OBJECTS,
+            data_source_cache=data_source_cache,
         ) as environment:
             offline_retrieval_test(environment)
 
@@ -419,8 +423,7 @@ def parametrize_online_test(online_test):
     def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
             config,
-            create_and_apply=True,
-            materialize=True,
+            stop_at_step=EnvironmentSetupSteps.MATERIALIZE,
             data_source_cache=data_source_cache,
         ) as environment:
             online_test(environment)

From 0daaff0ef03a3ceb09218352dc22c31aff75074b Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 25 Aug 2021 21:56:47 -0700
Subject: [PATCH 21/51] Fix refs

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../feature_repos/test_repo_configuration.py  | 74 ++++++++++---------
 1 file changed, 40 insertions(+), 34 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 62b3e67750..bf9b409fe0 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -3,7 +3,7 @@
 from contextlib import contextmanager
 from dataclasses import dataclass, field, replace
 from datetime import datetime, timedelta
-from enum import Enum
+from enum import IntEnum
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 
@@ -100,19 +100,19 @@ def construct_universal_datasets(
 def construct_universal_data_sources(
     datasets: Dict[str, pd.DataFrame], data_source_creator: DataSourceCreator
 ) -> Dict[str, DataSource]:
-    customer_ds = data_source_creator.create_data_sources(
+    customer_ds = data_source_creator.create_data_source(
         datasets["customer"],
         suffix="customer_profile",
         event_timestamp_column="event_timestamp",
         created_timestamp_column="created",
     )
-    driver_ds = data_source_creator.create_data_sources(
+    driver_ds = data_source_creator.create_data_source(
         datasets["driver"],
         suffix="driver_hourly",
         event_timestamp_column="event_timestamp",
         created_timestamp_column="created",
     )
-    orders_ds = data_source_creator.create_data_sources(
+    orders_ds = data_source_creator.create_data_source(
         datasets["orders"],
         suffix="orders",
         event_timestamp_column="event_timestamp",
@@ -246,10 +246,11 @@ def vary_providers_for_offline_stores(
     return new_configs
 
 
-class EnvironmentSetupSteps(Enum):
+class EnvironmentSetupSteps(IntEnum):
     INIT = 0
-    CREATE_AND_APPLY_OBJECTS = 1
-    MATERIALIZE = 2
+    CREATE_OBJECTS = 1
+    APPLY_OBJECTS = 2
+    MATERIALIZE = 3
 
 
 DEFAULT_STEP = EnvironmentSetupSteps.INIT
@@ -306,38 +307,43 @@ def construct_universal_test_environment(
             data_source=ds,
             data_source_creator=offline_creator,
         )
-        if data_source_cache:
-            fixtures = data_source_cache.get(test_repo_config)
-            if fixtures:
-                environment = setup_entities(environment, entities_override=fixtures[0])
-                environment = setup_datasets(environment, datasets_override=fixtures[1])
-                environment = setup_data_sources(
-                    environment, data_sources_override=fixtures[2]
-                )
-            else:
-                environment = setup_entities(environment)
-                environment = setup_datasets(environment)
-                environment = setup_data_sources(environment)
-                data_source_cache.put(
-                    test_repo_config,
-                    environment.entities,
-                    environment.datasets,
-                    environment.datasources,
-                    offline_creator,
-                )
-        else:
-            environment = setup_entities(environment)
-            environment = setup_datasets(environment)
-            environment = setup_data_sources(environment)
-
-        environment = setup_feature_views(environment)
 
         fvs = []
         entities = []
         try:
             if stop_at_step <= EnvironmentSetupSteps.INIT:
                 pass
-            if stop_at_step >= EnvironmentSetupSteps.CREATE_AND_APPLY_OBJECTS:
+            if stop_at_step >= EnvironmentSetupSteps.CREATE_OBJECTS:
+                if data_source_cache:
+                    fixtures = data_source_cache.get(test_repo_config)
+                    if fixtures:
+                        environment = setup_entities(
+                            environment, entities_override=fixtures[0]
+                        )
+                        environment = setup_datasets(
+                            environment, datasets_override=fixtures[1]
+                        )
+                        environment = setup_data_sources(
+                            environment, data_sources_override=fixtures[2]
+                        )
+                    else:
+                        environment = setup_entities(environment)
+                        environment = setup_datasets(environment)
+                        environment = setup_data_sources(environment)
+                        data_source_cache.put(
+                            test_repo_config,
+                            environment.entities,
+                            environment.datasets,
+                            environment.datasources,
+                            offline_creator,
+                        )
+                else:
+                    environment = setup_entities(environment)
+                    environment = setup_datasets(environment)
+                    environment = setup_data_sources(environment)
+
+                environment = setup_feature_views(environment)
+            if stop_at_step >= EnvironmentSetupSteps.APPLY_OBJECTS:
                 entities.extend([driver(), customer()])
                 fvs.extend(environment.feature_views.values())
                 fs.apply(fvs + entities)
@@ -396,7 +402,7 @@ def parametrize_offline_retrieval_test(offline_retrieval_test):
     def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
             config,
-            stop_at_step=EnvironmentSetupSteps.CREATE_AND_APPLY_OBJECTS,
+            stop_at_step=EnvironmentSetupSteps.CREATE_OBJECTS,
             data_source_cache=data_source_cache,
         ) as environment:
             offline_retrieval_test(environment)

From ef6a6b3bf2b69b759b365af4a9446c3ee20d2e74 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 25 Aug 2021 22:41:06 -0700
Subject: [PATCH 22/51] fix step

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../integration/feature_repos/test_repo_configuration.py    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index bf9b409fe0..85cdb57422 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -258,7 +258,9 @@ class EnvironmentSetupSteps(IntEnum):
 
 @contextmanager
 def construct_universal_test_environment(
-    test_repo_config: TestRepoConfig, stop_at_step=DEFAULT_STEP, data_source_cache=None,
+    test_repo_config: TestRepoConfig,
+    stop_at_step=DEFAULT_STEP,
+    data_source_cache=None,
 ) -> Environment:
     """
     This method should take in the parameters from the test repo config and created a feature repo, apply it,
@@ -402,7 +404,7 @@ def parametrize_offline_retrieval_test(offline_retrieval_test):
     def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
             config,
-            stop_at_step=EnvironmentSetupSteps.CREATE_OBJECTS,
+            stop_at_step=EnvironmentSetupSteps.APPLY_OBJECTS,
             data_source_cache=data_source_cache,
         ) as environment:
             offline_retrieval_test(environment)

From d63691d2b902851f0a4d7ff59800100e36502b8f Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 26 Aug 2021 13:24:08 -0700
Subject: [PATCH 23/51] WIP fixes

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/feast/feature_view.py              | 10 +++++++-
 .../feast/infra/offline_stores/bigquery.py    |  1 +
 sdk/python/feast/infra/provider.py            |  5 +++-
 sdk/python/feast/registry.py                  |  1 +
 sdk/python/feast/type_map.py                  |  4 ++++
 .../integration/e2e/test_universal_e2e.py     |  5 +++-
 .../feature_repos/test_repo_configuration.py  | 23 +++++++++++++------
 .../feature_repos/universal/feature_views.py  | 15 +++++++-----
 8 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py
index a833c7a5e5..cbb5a752d3 100644
--- a/sdk/python/feast/feature_view.py
+++ b/sdk/python/feast/feature_view.py
@@ -325,6 +325,14 @@ def infer_features_from_batch_source(self, config: RepoConfig):
                 self.batch_source.created_timestamp_column,
             } | set(self.entities)
 
+            if self.batch_source.event_timestamp_column in self.batch_source.field_mapping:
+                columns_to_exclude.add(self.batch_source.field_mapping[self.batch_source.event_timestamp_column])
+            if self.batch_source.created_timestamp_column in self.batch_source.field_mapping:
+                columns_to_exclude.add(self.batch_source.field_mapping[self.batch_source.created_timestamp_column])
+            for e in self.entities:
+                if e in self.batch_source.field_mapping:
+                    columns_to_exclude.add(self.batch_source.field_mapping[e])
+
             for (
                 col_name,
                 col_datatype,
@@ -335,7 +343,7 @@ def infer_features_from_batch_source(self, config: RepoConfig):
                 ):
                     feature_name = (
                         self.batch_source.field_mapping[col_name]
-                        if col_name in self.batch_source.field_mapping.keys()
+                        if col_name in self.batch_source.field_mapping
                         else col_name
                     )
                     self.features.append(
diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py
index 3463e96898..2bfd863991 100644
--- a/sdk/python/feast/infra/offline_stores/bigquery.py
+++ b/sdk/python/feast/infra/offline_stores/bigquery.py
@@ -86,6 +86,7 @@ def pull_latest_from_table_or_query(
             )
             WHERE _feast_row = 1
             """
+
         return BigQueryRetrievalJob(query=query, client=client, config=config)
 
     @staticmethod
diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py
index 5d8a3eeed8..8f450662c9 100644
--- a/sdk/python/feast/infra/provider.py
+++ b/sdk/python/feast/infra/provider.py
@@ -238,9 +238,12 @@ def _get_column_names(
             reverse_field_mapping[col] if col in reverse_field_mapping.keys() else col
             for col in feature_names
         ]
+    feature_names = set(feature_names) - set(join_keys)
+    feature_names = feature_names - {event_timestamp_column}
+    feature_names = feature_names - {created_timestamp_column}
     return (
         join_keys,
-        feature_names,
+        list(feature_names),
         event_timestamp_column,
         created_timestamp_column,
     )
diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py
index af04de5f3f..0839453b75 100644
--- a/sdk/python/feast/registry.py
+++ b/sdk/python/feast/registry.py
@@ -593,6 +593,7 @@ def __init__(self, repo_path: Path, registry_path_string: str):
 
     def get_registry_proto(self):
         registry_proto = RegistryProto()
+        import pdb; pdb.set_trace()
         if self._filepath.exists():
             registry_proto.ParseFromString(self._filepath.read_bytes())
             return registry_proto
diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py
index af09069407..7aee3e5403 100644
--- a/sdk/python/feast/type_map.py
+++ b/sdk/python/feast/type_map.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import re
+from datetime import datetime
 from typing import Any, Dict, Union
 
 import numpy as np
@@ -104,6 +105,7 @@ def python_type_to_feast_value_type(
         "int8": ValueType.INT32,
         "bool": ValueType.BOOL,
         "timedelta": ValueType.UNIX_TIMESTAMP,
+        "datetime": ValueType.UNIX_TIMESTAMP,
         "datetime64[ns]": ValueType.UNIX_TIMESTAMP,
         "datetime64[ns, tz]": ValueType.UNIX_TIMESTAMP,
         "category": ValueType.STRING,
@@ -281,6 +283,8 @@ def _python_value_to_proto_value(feast_value_type, value) -> ProtoValue:
         elif feast_value_type == ValueType.INT64:
             return ProtoValue(int64_val=int(value))
         elif feast_value_type == ValueType.UNIX_TIMESTAMP:
+            if isinstance(value, datetime):
+                return ProtoValue(int64_val=int(value.timestamp()))
             return ProtoValue(int64_val=int(value))
         elif feast_value_type == ValueType.FLOAT:
             return ProtoValue(float_val=float(value))
diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index 5c89d9b966..560972294a 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -16,10 +16,13 @@
 
 @parametrize_e2e_test
 def test_e2e_consistency(test_environment: Environment):
+    infer_features = test_environment.test_repo_config.infer_features
     fs, fv = (
         test_environment.feature_store,
-        driver_feature_view(test_environment.data_source),
+        driver_feature_view(data_source=test_environment.data_source,
+                            infer_features=infer_features),
     )
+    import pdb; pdb.set_trace()
     entity = driver()
     fs.apply([fv, entity])
 
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 85cdb57422..5a93daca9d 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -36,6 +36,7 @@ class TestRepoConfig:
 
     full_feature_names: bool = True
     infer_event_timestamp_col: bool = True
+    infer_features: bool = False
 
 
 def ds_creator_path(cls: str):
@@ -230,6 +231,14 @@ def vary_infer_event_timestamp_col(
     return new_configs
 
 
+def vary_infer_feature(configs: List[TestRepoConfig]) -> List[TestRepoConfig]:
+    new_configs = []
+    for c in configs:
+        true_c = replace(c, infer_features=True)
+        false_c = replace(c, infer_features=False)
+        new_configs.extend([true_c, false_c])
+    return new_configs
+
 def vary_providers_for_offline_stores(
     configs: List[TestRepoConfig],
 ) -> List[TestRepoConfig]:
@@ -258,9 +267,8 @@ class EnvironmentSetupSteps(IntEnum):
 
 @contextmanager
 def construct_universal_test_environment(
-    test_repo_config: TestRepoConfig,
-    stop_at_step=DEFAULT_STEP,
-    data_source_cache=None,
+    test_repo_config: TestRepoConfig, stop_at_step=DEFAULT_STEP, data_source_cache=None,
+    **kwargs
 ) -> Environment:
     """
     This method should take in the parameters from the test repo config and created a feature repo, apply it,
@@ -270,6 +278,8 @@ def construct_universal_test_environment(
     The user is *not* expected to perform any clean up actions.
 
     :param test_repo_config: configuration
+    :param stop_at_step: The step which should be the last one executed when setting up the test environment.
+    :param data_source_cache:
     :return: A feature store built using the supplied configuration.
     """
 
@@ -313,8 +323,6 @@ def construct_universal_test_environment(
         fvs = []
         entities = []
         try:
-            if stop_at_step <= EnvironmentSetupSteps.INIT:
-                pass
             if stop_at_step >= EnvironmentSetupSteps.CREATE_OBJECTS:
                 if data_source_cache:
                     fixtures = data_source_cache.get(test_repo_config)
@@ -354,7 +362,8 @@ def construct_universal_test_environment(
 
             yield environment
         finally:
-            if not data_source_cache:
+            import pdb; pdb.set_trace()
+            if not data_source_cache and stop_at_step >= EnvironmentSetupSteps.CREATE_OBJECTS:
                 offline_creator.teardown()
             fs.teardown()
 
@@ -373,7 +382,7 @@ def parametrize_e2e_test(e2e_test):
     """
 
     @pytest.mark.integration
-    @pytest.mark.parametrize("config", FULL_REPO_CONFIGS, ids=lambda v: str(v))
+    @pytest.mark.parametrize("config", vary_infer_feature(FULL_REPO_CONFIGS), ids=lambda v: str(v))
     def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
             config, data_source_cache=data_source_cache
diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py
index 0306044ecd..176a03f0dc 100644
--- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py
+++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py
@@ -5,22 +5,24 @@
 
 
 def driver_feature_view(
-    data_source: DataSource, name="test_correctness"
+    data_source: DataSource, name="test_correctness",
+    infer_features: bool = False,
 ) -> FeatureView:
     return FeatureView(
         name=name,
         entities=["driver"],
-        features=[Feature("value", ValueType.FLOAT)],
+        features=None if infer_features else [Feature("value", ValueType.FLOAT)],
         ttl=timedelta(days=5),
         input=data_source,
     )
 
 
-def create_driver_hourly_stats_feature_view(source):
+def create_driver_hourly_stats_feature_view(source,
+                                            infer_features: bool = False):
     driver_stats_feature_view = FeatureView(
         name="driver_stats",
         entities=["driver"],
-        features=[
+        features=None if infer_features else [
             Feature(name="conv_rate", dtype=ValueType.FLOAT),
             Feature(name="acc_rate", dtype=ValueType.FLOAT),
             Feature(name="avg_daily_trips", dtype=ValueType.INT32),
@@ -31,11 +33,12 @@ def create_driver_hourly_stats_feature_view(source):
     return driver_stats_feature_view
 
 
-def create_customer_daily_profile_feature_view(source):
+def create_customer_daily_profile_feature_view(source,
+                                               infer_features: bool = False):
     customer_profile_feature_view = FeatureView(
         name="customer_profile",
         entities=["customer_id"],
-        features=[
+        features=None if infer_features else [
             Feature(name="current_balance", dtype=ValueType.FLOAT),
             Feature(name="avg_passenger_count", dtype=ValueType.FLOAT),
             Feature(name="lifetime_trip_count", dtype=ValueType.INT32),

From 3d7597752b1fd1b1280a2ab28f209ef99795e28c Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 26 Aug 2021 14:06:09 -0700
Subject: [PATCH 24/51] Fix for feature inferencing

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/feast/feature_view.py              | 22 +++++++++++++++----
 sdk/python/feast/infra/provider.py            |  8 +++----
 sdk/python/feast/registry.py                  |  1 -
 sdk/python/feast/type_map.py                  |  4 ++++
 .../integration/e2e/test_universal_e2e.py     |  6 ++---
 .../feature_repos/test_repo_configuration.py  | 17 +++++++++-----
 .../universal/data_sources/file.py            |  2 +-
 .../feature_repos/universal/feature_views.py  | 17 +++++++-------
 8 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py
index cbb5a752d3..801d578e7d 100644
--- a/sdk/python/feast/feature_view.py
+++ b/sdk/python/feast/feature_view.py
@@ -325,10 +325,24 @@ def infer_features_from_batch_source(self, config: RepoConfig):
                 self.batch_source.created_timestamp_column,
             } | set(self.entities)
 
-            if self.batch_source.event_timestamp_column in self.batch_source.field_mapping:
-                columns_to_exclude.add(self.batch_source.field_mapping[self.batch_source.event_timestamp_column])
-            if self.batch_source.created_timestamp_column in self.batch_source.field_mapping:
-                columns_to_exclude.add(self.batch_source.field_mapping[self.batch_source.created_timestamp_column])
+            if (
+                self.batch_source.event_timestamp_column
+                in self.batch_source.field_mapping
+            ):
+                columns_to_exclude.add(
+                    self.batch_source.field_mapping[
+                        self.batch_source.event_timestamp_column
+                    ]
+                )
+            if (
+                self.batch_source.created_timestamp_column
+                in self.batch_source.field_mapping
+            ):
+                columns_to_exclude.add(
+                    self.batch_source.field_mapping[
+                        self.batch_source.created_timestamp_column
+                    ]
+                )
             for e in self.entities:
                 if e in self.batch_source.field_mapping:
                     columns_to_exclude.add(self.batch_source.field_mapping[e])
diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py
index 8f450662c9..e2c2bb2975 100644
--- a/sdk/python/feast/infra/provider.py
+++ b/sdk/python/feast/infra/provider.py
@@ -238,12 +238,12 @@ def _get_column_names(
             reverse_field_mapping[col] if col in reverse_field_mapping.keys() else col
             for col in feature_names
         ]
-    feature_names = set(feature_names) - set(join_keys)
-    feature_names = feature_names - {event_timestamp_column}
-    feature_names = feature_names - {created_timestamp_column}
+    _feature_names = set(feature_names) - set(join_keys)
+    _feature_names = _feature_names - {event_timestamp_column}
+    _feature_names = _feature_names - {created_timestamp_column}
     return (
         join_keys,
-        list(feature_names),
+        list(_feature_names),
         event_timestamp_column,
         created_timestamp_column,
     )
diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py
index 0839453b75..af04de5f3f 100644
--- a/sdk/python/feast/registry.py
+++ b/sdk/python/feast/registry.py
@@ -593,7 +593,6 @@ def __init__(self, repo_path: Path, registry_path_string: str):
 
     def get_registry_proto(self):
         registry_proto = RegistryProto()
-        import pdb; pdb.set_trace()
         if self._filepath.exists():
             registry_proto.ParseFromString(self._filepath.read_bytes())
             return registry_proto
diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py
index 7aee3e5403..4cc5345262 100644
--- a/sdk/python/feast/type_map.py
+++ b/sdk/python/feast/type_map.py
@@ -19,6 +19,7 @@
 import numpy as np
 import pandas as pd
 from google.protobuf.json_format import MessageToDict
+from google.protobuf.timestamp_pb2 import Timestamp
 
 from feast.protos.feast.types.Value_pb2 import (
     BoolList,
@@ -105,6 +106,7 @@ def python_type_to_feast_value_type(
         "int8": ValueType.INT32,
         "bool": ValueType.BOOL,
         "timedelta": ValueType.UNIX_TIMESTAMP,
+        "Timestamp": ValueType.UNIX_TIMESTAMP,
         "datetime": ValueType.UNIX_TIMESTAMP,
         "datetime64[ns]": ValueType.UNIX_TIMESTAMP,
         "datetime64[ns, tz]": ValueType.UNIX_TIMESTAMP,
@@ -285,6 +287,8 @@ def _python_value_to_proto_value(feast_value_type, value) -> ProtoValue:
         elif feast_value_type == ValueType.UNIX_TIMESTAMP:
             if isinstance(value, datetime):
                 return ProtoValue(int64_val=int(value.timestamp()))
+            elif isinstance(value, Timestamp):
+                return ProtoValue(int64_val=int(value.ToSeconds()))
             return ProtoValue(int64_val=int(value))
         elif feast_value_type == ValueType.FLOAT:
             return ProtoValue(float_val=float(value))
diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index 560972294a..0d2a62322a 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -19,10 +19,10 @@ def test_e2e_consistency(test_environment: Environment):
     infer_features = test_environment.test_repo_config.infer_features
     fs, fv = (
         test_environment.feature_store,
-        driver_feature_view(data_source=test_environment.data_source,
-                            infer_features=infer_features),
+        driver_feature_view(
+            data_source=test_environment.data_source, infer_features=infer_features
+        ),
     )
-    import pdb; pdb.set_trace()
     entity = driver()
     fs.apply([fv, entity])
 
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 5a93daca9d..92e90cb122 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -239,6 +239,7 @@ def vary_infer_feature(configs: List[TestRepoConfig]) -> List[TestRepoConfig]:
         new_configs.extend([true_c, false_c])
     return new_configs
 
+
 def vary_providers_for_offline_stores(
     configs: List[TestRepoConfig],
 ) -> List[TestRepoConfig]:
@@ -267,8 +268,10 @@ class EnvironmentSetupSteps(IntEnum):
 
 @contextmanager
 def construct_universal_test_environment(
-    test_repo_config: TestRepoConfig, stop_at_step=DEFAULT_STEP, data_source_cache=None,
-    **kwargs
+    test_repo_config: TestRepoConfig,
+    stop_at_step=DEFAULT_STEP,
+    data_source_cache=None,
+    **kwargs,
 ) -> Environment:
     """
     This method should take in the parameters from the test repo config and created a feature repo, apply it,
@@ -362,8 +365,10 @@ def construct_universal_test_environment(
 
             yield environment
         finally:
-            import pdb; pdb.set_trace()
-            if not data_source_cache and stop_at_step >= EnvironmentSetupSteps.CREATE_OBJECTS:
+            if (
+                not data_source_cache
+                and stop_at_step >= EnvironmentSetupSteps.CREATE_OBJECTS
+            ):
                 offline_creator.teardown()
             fs.teardown()
 
@@ -382,7 +387,9 @@ def parametrize_e2e_test(e2e_test):
     """
 
     @pytest.mark.integration
-    @pytest.mark.parametrize("config", vary_infer_feature(FULL_REPO_CONFIGS), ids=lambda v: str(v))
+    @pytest.mark.parametrize(
+        "config", vary_infer_feature(FULL_REPO_CONFIGS), ids=lambda v: str(v)
+    )
     def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
             config, data_source_cache=data_source_cache
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
index 046f40afa0..cd86740e6a 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
@@ -44,7 +44,7 @@ def create_data_source(
         self.files.append(f)
         return FileSource(
             file_format=ParquetFormat(),
-            path=f"file://{f.name}",
+            path=f"{f.name}",
             event_timestamp_column=event_timestamp_column,
             created_timestamp_column=created_timestamp_column,
             date_partition_column="",
diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py
index 176a03f0dc..b57e2988ab 100644
--- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py
+++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py
@@ -5,8 +5,7 @@
 
 
 def driver_feature_view(
-    data_source: DataSource, name="test_correctness",
-    infer_features: bool = False,
+    data_source: DataSource, name="test_correctness", infer_features: bool = False,
 ) -> FeatureView:
     return FeatureView(
         name=name,
@@ -17,12 +16,13 @@ def driver_feature_view(
     )
 
 
-def create_driver_hourly_stats_feature_view(source,
-                                            infer_features: bool = False):
+def create_driver_hourly_stats_feature_view(source, infer_features: bool = False):
     driver_stats_feature_view = FeatureView(
         name="driver_stats",
         entities=["driver"],
-        features=None if infer_features else [
+        features=None
+        if infer_features
+        else [
             Feature(name="conv_rate", dtype=ValueType.FLOAT),
             Feature(name="acc_rate", dtype=ValueType.FLOAT),
             Feature(name="avg_daily_trips", dtype=ValueType.INT32),
@@ -33,12 +33,13 @@ def create_driver_hourly_stats_feature_view(source,
     return driver_stats_feature_view
 
 
-def create_customer_daily_profile_feature_view(source,
-                                               infer_features: bool = False):
+def create_customer_daily_profile_feature_view(source, infer_features: bool = False):
     customer_profile_feature_view = FeatureView(
         name="customer_profile",
         entities=["customer_id"],
-        features=None if infer_features else [
+        features=None
+        if infer_features
+        else [
             Feature(name="current_balance", dtype=ValueType.FLOAT),
             Feature(name="avg_passenger_count", dtype=ValueType.FLOAT),
             Feature(name="lifetime_trip_count", dtype=ValueType.INT32),

From 1abbaa84d59aaa1774abffa0fe43dbf7573e9b36 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 26 Aug 2021 14:09:16 -0700
Subject: [PATCH 25/51] C901 '_python_value_to_proto_value' is too complex :(

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/feast/type_map.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py
index 4cc5345262..3ea6cda968 100644
--- a/sdk/python/feast/type_map.py
+++ b/sdk/python/feast/type_map.py
@@ -164,7 +164,7 @@ def _type_err(item, dtype):
     raise ValueError(f'Value "{item}" is of type {type(item)} not of type {dtype}')
 
 
-def _python_value_to_proto_value(feast_value_type, value) -> ProtoValue:
+def _python_value_to_proto_value(feast_value_type, value) -> ProtoValue:  # noqa: C901
     """
     Converts a Python (native, pandas) value to a Feast Proto Value based
     on a provided value type

From 21996b786a00b816ef0d0926798a1d5640b77a06 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 26 Aug 2021 23:20:35 -0700
Subject: [PATCH 26/51] Split out construct_test_repo and
 construct_universal_test_repo

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/feast/infra/provider.py            |   3 +-
 sdk/python/tests/conftest.py                  |  29 ++--
 .../integration/e2e/test_universal_e2e.py     |  47 ++++--
 .../feature_repos/test_repo_configuration.py  | 134 +++++++++---------
 .../universal/data_sources/bigquery.py        |  23 ++-
 .../universal/data_sources/file.py            |   2 +-
 6 files changed, 129 insertions(+), 109 deletions(-)

diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py
index e2c2bb2975..e5a03d7ca8 100644
--- a/sdk/python/feast/infra/provider.py
+++ b/sdk/python/feast/infra/provider.py
@@ -239,8 +239,7 @@ def _get_column_names(
             for col in feature_names
         ]
     _feature_names = set(feature_names) - set(join_keys)
-    _feature_names = _feature_names - {event_timestamp_column}
-    _feature_names = _feature_names - {created_timestamp_column}
+    _feature_names = _feature_names - {event_timestamp_column, created_timestamp_column}
     return (
         join_keys,
         list(_feature_names),
diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index 7147e26dee..ee193ea419 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -14,11 +14,13 @@
 import multiprocessing
 from datetime import datetime, timedelta
 from sys import platform
-from typing import Any, Dict
+from typing import Any, Callable, Dict, Tuple
 
 import pandas as pd
 import pytest
 
+from feast.data_source import DataSource
+
 
 def pytest_configure(config):
     if platform in ["darwin", "windows"]:
@@ -90,26 +92,19 @@ def simple_dataset_2() -> pd.DataFrame:
     return pd.DataFrame.from_dict(data)
 
 
-class DataSourceCache:
-    cache: Dict[str, Any] = {}
-
-    def get(self, test_repo_config):
-        return self.cache.get(test_repo_config.offline_store_creator, None)
-
-    def put(
-        self, test_repo_config, entites, datasets, data_sources, data_source_creator
-    ):
-        self.cache[test_repo_config.offline_store_creator] = (
-            entites,
-            datasets,
-            data_sources,
-            data_source_creator,
-        )
+class DataSourceCache(dict):
+    def get_or_create(self, key, c: Callable[[], Any]):
+        if key in self:
+            return self[key]
+        else:
+            v = c()
+            self[key] = v
+            return v
 
 
 @pytest.fixture(scope="session", autouse=True)
 def data_source_cache():
     dsc = DataSourceCache()
     yield dsc
-    for _, v in dsc.cache.items():
+    for _, v in dsc.items():
         v[3].teardown()
diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index 0d2a62322a..2992dcc6d7 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -3,30 +3,49 @@
 from typing import Optional
 
 import pandas as pd
+import pytest
 from pytz import utc
 
 from feast import FeatureStore, FeatureView
+from tests.conftest import DataSourceCache
+from tests.data.data_creator import create_dataset
 from tests.integration.feature_repos.test_repo_configuration import (
-    Environment,
-    parametrize_e2e_test,
+    FULL_REPO_CONFIGS,
+    construct_test_environment,
+    vary_infer_feature,
 )
 from tests.integration.feature_repos.universal.entities import driver
 from tests.integration.feature_repos.universal.feature_views import driver_feature_view
 
 
-@parametrize_e2e_test
-def test_e2e_consistency(test_environment: Environment):
-    infer_features = test_environment.test_repo_config.infer_features
-    fs, fv = (
-        test_environment.feature_store,
-        driver_feature_view(
-            data_source=test_environment.data_source, infer_features=infer_features
-        ),
-    )
-    entity = driver()
-    fs.apply([fv, entity])
+@pytest.mark.integration
+@pytest.mark.parametrize(
+    "config", vary_infer_feature(FULL_REPO_CONFIGS), ids=lambda v: str(v)
+)
+def test_e2e_consistency(config, data_source_cache: DataSourceCache):
+    df = create_dataset()
+    test_suite_name = "test_e2e_consistency"
+    with construct_test_environment(test_suite_name, config) as test_environment:
+        fs = test_environment.feature_store
+        infer_features = test_environment.test_repo_config.infer_features
+        key = f"test_e2e_consistency_{test_environment.test_repo_config.offline_store_creator}"
+        _, _, data_source, _ = data_source_cache.get_or_create(
+            key,
+            lambda: (
+                None,
+                df,
+                test_environment.data_source_creator.create_data_source(
+                    df, fs.project, field_mapping={"ts_1": "ts", "id": "driver_id"}
+                ),
+                test_environment.data_source_creator,
+            ),
+        )
+        fv = driver_feature_view(data_source=data_source, infer_features=infer_features)
+
+        entity = driver()
+        fs.apply([fv, entity])
 
-    run_offline_online_store_consistency_test(fs, fv)
+        run_offline_online_store_consistency_test(fs, fv)
 
 
 def check_offline_and_online_features(
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 92e90cb122..e6bb9f441b 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -12,7 +12,6 @@
 
 from feast import FeatureStore, FeatureView, RepoConfig, driver_test_data, importer
 from feast.data_source import DataSource
-from tests.data.data_creator import create_dataset
 from tests.integration.feature_repos.universal.data_source_creator import (
     DataSourceCreator,
 )
@@ -187,7 +186,6 @@ class Environment:
     name: str
     test_repo_config: TestRepoConfig
     feature_store: FeatureStore
-    data_source: DataSource
     data_source_creator: DataSourceCreator
 
     entities: Dict[str, List[Any]] = field(default_factory=dict)
@@ -267,26 +265,10 @@ class EnvironmentSetupSteps(IntEnum):
 
 
 @contextmanager
-def construct_universal_test_environment(
-    test_repo_config: TestRepoConfig,
-    stop_at_step=DEFAULT_STEP,
-    data_source_cache=None,
-    **kwargs,
+def construct_test_environment(
+    test_suite_name: str, test_repo_config: TestRepoConfig,
 ) -> Environment:
-    """
-    This method should take in the parameters from the test repo config and created a feature repo, apply it,
-    and return the constructed feature store object to callers.
-
-    This feature store object can be interacted for the purposes of tests.
-    The user is *not* expected to perform any clean up actions.
-
-    :param test_repo_config: configuration
-    :param stop_at_step: The step which should be the last one executed when setting up the test environment.
-    :param data_source_cache:
-    :return: A feature store built using the supplied configuration.
-    """
-
-    project = f"test_correctness_{str(uuid.uuid4()).replace('-', '')[:8]}"
+    project = f"{test_suite_name}_{str(uuid.uuid4()).replace('-', '')[:8]}"
 
     module_name, config_class_name = test_repo_config.offline_store_creator.rsplit(
         ".", 1
@@ -296,12 +278,6 @@ def construct_universal_test_environment(
         module_name, config_class_name, "DataSourceCreator"
     )(project)
 
-    # This needs to be abstracted away for test_e2e_universal which uses a different dataset.
-    df = create_dataset()
-    ds = offline_creator.create_data_source(
-        df, project, field_mapping={"ts_1": "ts", "id": "driver_id"}
-    )
-
     offline_store = offline_creator.create_offline_store_config()
     online_store = test_repo_config.online_store
 
@@ -319,16 +295,44 @@ def construct_universal_test_environment(
             name=project,
             test_repo_config=test_repo_config,
             feature_store=fs,
-            data_source=ds,
             data_source_creator=offline_creator,
         )
 
+        try:
+            yield environment
+        finally:
+            fs.teardown()
+
+
+@contextmanager
+def construct_universal_test_environment(
+    test_suite_name: str,
+    test_repo_config: TestRepoConfig,
+    stop_at_step=DEFAULT_STEP,
+    data_source_cache=None,
+    **kwargs,
+) -> Environment:
+    """
+    This method should take in the parameters from the test repo config and created a feature repo, apply it,
+    and return the constructed feature store object to callers.
+
+    This feature store object can be interacted for the purposes of tests.
+    The user is *not* expected to perform any clean up actions.
+
+    :param test_suite_name: A name for the test suite.
+    :param test_repo_config: configuration
+    :param stop_at_step: The step which should be the last one executed when setting up the test environment.
+    :param data_source_cache:
+    :return: A feature store built using the supplied configuration.
+    """
+    with construct_test_environment(test_suite_name, test_repo_config) as environment:
+        fs = environment.feature_store
         fvs = []
         entities = []
         try:
             if stop_at_step >= EnvironmentSetupSteps.CREATE_OBJECTS:
-                if data_source_cache:
-                    fixtures = data_source_cache.get(test_repo_config)
+                if data_source_cache is not None:
+                    fixtures = data_source_cache.get(test_repo_config, None)
                     if fixtures:
                         environment = setup_entities(
                             environment, entities_override=fixtures[0]
@@ -340,20 +344,39 @@ def construct_universal_test_environment(
                             environment, data_sources_override=fixtures[2]
                         )
                     else:
-                        environment = setup_entities(environment)
-                        environment = setup_datasets(environment)
-                        environment = setup_data_sources(environment)
-                        data_source_cache.put(
-                            test_repo_config,
+                        environment = setup_entities(
+                            environment,
+                            entities_override=kwargs.get("entites_override", None),
+                        )
+                        environment = setup_datasets(
+                            environment,
+                            datasets_override=kwargs.get("datasets_overrides", None),
+                        )
+                        environment = setup_data_sources(
+                            environment,
+                            data_sources_override=kwargs.get(
+                                "data_sources_override", None
+                            ),
+                        )
+                        data_source_cache[test_repo_config] = (
                             environment.entities,
                             environment.datasets,
                             environment.datasources,
-                            offline_creator,
+                            environment.data_source_creator,
                         )
                 else:
-                    environment = setup_entities(environment)
-                    environment = setup_datasets(environment)
-                    environment = setup_data_sources(environment)
+                    environment = setup_entities(
+                        environment,
+                        entities_override=kwargs.get("entites_override", None),
+                    )
+                    environment = setup_datasets(
+                        environment,
+                        datasets_override=kwargs.get("datasets_overrides", None),
+                    )
+                    environment = setup_data_sources(
+                        environment,
+                        data_sources_override=kwargs.get("data_sources_override", None),
+                    )
 
                 environment = setup_feature_views(environment)
             if stop_at_step >= EnvironmentSetupSteps.APPLY_OBJECTS:
@@ -366,37 +389,10 @@ def construct_universal_test_environment(
             yield environment
         finally:
             if (
-                not data_source_cache
+                data_source_cache is None
                 and stop_at_step >= EnvironmentSetupSteps.CREATE_OBJECTS
             ):
-                offline_creator.teardown()
-            fs.teardown()
-
-
-def parametrize_e2e_test(e2e_test):
-    """
-    This decorator should be used for end-to-end tests. These tests are expected to be parameterized,
-    and receive an empty feature repo created for all supported configurations.
-
-    The decorator also ensures that sample data needed for the test is available in the relevant offline store.
-
-    Decorated tests should create and apply the objects needed by the tests, and perform any operations needed
-    (such as materialization and looking up feature values).
-
-    The decorator takes care of tearing down the feature store, as well as the sample data.
-    """
-
-    @pytest.mark.integration
-    @pytest.mark.parametrize(
-        "config", vary_infer_feature(FULL_REPO_CONFIGS), ids=lambda v: str(v)
-    )
-    def inner_test(config, data_source_cache):
-        with construct_universal_test_environment(
-            config, data_source_cache=data_source_cache
-        ) as environment:
-            e2e_test(environment)
-
-    return inner_test
+                environment.data_source_creator.teardown()
 
 
 def parametrize_offline_retrieval_test(offline_retrieval_test):
@@ -419,6 +415,7 @@ def parametrize_offline_retrieval_test(offline_retrieval_test):
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
     def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
+            offline_retrieval_test.__name__,
             config,
             stop_at_step=EnvironmentSetupSteps.APPLY_OBJECTS,
             data_source_cache=data_source_cache,
@@ -446,6 +443,7 @@ def parametrize_online_test(online_test):
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
     def inner_test(config, data_source_cache):
         with construct_universal_test_environment(
+            online_test.__name__,
             config,
             stop_at_step=EnvironmentSetupSteps.MATERIALIZE,
             data_source_cache=data_source_cache,
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
index a38e03143b..5c888ca113 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
@@ -2,6 +2,7 @@
 
 import pandas as pd
 from google.cloud import bigquery
+from google.cloud.bigquery import Dataset
 
 from feast import BigQuerySource
 from feast.data_source import DataSource
@@ -12,21 +13,27 @@
 
 
 class BigQueryDataSourceCreator(DataSourceCreator):
+    dataset: Optional[Dataset] = None
+
     def __init__(self, project_name: str):
         self.client = bigquery.Client()
         self.project_name = project_name
         self.gcp_project = self.client.project
         self.dataset_id = f"{self.gcp_project}.{project_name}"
-        self.dataset = bigquery.Dataset(self.dataset_id)
-        print(f"Creating dataset: {self.dataset_id}")
-        self.client.create_dataset(self.dataset, exists_ok=True)
-        self.dataset.default_table_expiration_ms = (
-            1000 * 60 * 60 * 24 * 14
-        )  # 2 weeks in milliseconds
-        self.client.update_dataset(self.dataset, ["default_table_expiration_ms"])
 
         self.tables = []
 
+    def get_dataset(self):
+        if not self.dataset:
+            self.dataset = bigquery.Dataset(self.dataset_id)
+            print(f"Creating dataset: {self.dataset_id}")
+            self.client.create_dataset(self.dataset, exists_ok=True)
+            self.dataset.default_table_expiration_ms = (
+                1000 * 60 * 60 * 24 * 14
+            )  # 2 weeks in milliseconds
+            self.client.update_dataset(self.dataset, ["default_table_expiration_ms"])
+        return self.dataset
+
     def teardown(self):
 
         for table in self.tables:
@@ -55,6 +62,8 @@ def create_data_source(
         if not destination:
             destination = self.get_prefixed_table_name(suffix)
 
+        self.get_dataset()
+
         job_config = bigquery.LoadJobConfig()
         if self.gcp_project not in destination:
             destination = f"{self.gcp_project}.{self.project_name}.{destination}"
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
index cd86740e6a..48630dd3f9 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
@@ -38,7 +38,7 @@ def create_data_source(
             destination = self.get_prefixed_table_name(suffix)
 
         f = tempfile.NamedTemporaryFile(
-            prefix=self.project_name, suffix=".parquet", delete=False
+            prefix=f"{self.project_name}_{destination}", suffix=".parquet", delete=False
         )
         df.to_parquet(f.name)
         self.files.append(f)

From 4541561130fda3910ba7a758ac3d56ac2df490d3 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 26 Aug 2021 23:23:11 -0700
Subject: [PATCH 27/51] remove import

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index ee193ea419..57a47c7f17 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -14,13 +14,11 @@
 import multiprocessing
 from datetime import datetime, timedelta
 from sys import platform
-from typing import Any, Callable, Dict, Tuple
+from typing import Any, Callable
 
 import pandas as pd
 import pytest
 
-from feast.data_source import DataSource
-
 
 def pytest_configure(config):
     if platform in ["darwin", "windows"]:

From 2f20b5c615bfe2a1fbec4938423f5dadf3c00a5a Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 26 Aug 2021 23:33:01 -0700
Subject: [PATCH 28/51] add unsafe_hash

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/integration/e2e/test_universal_e2e.py          | 2 +-
 .../tests/integration/feature_repos/test_repo_configuration.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index 2992dcc6d7..3362b45466 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -28,7 +28,7 @@ def test_e2e_consistency(config, data_source_cache: DataSourceCache):
     with construct_test_environment(test_suite_name, config) as test_environment:
         fs = test_environment.feature_store
         infer_features = test_environment.test_repo_config.infer_features
-        key = f"test_e2e_consistency_{test_environment.test_repo_config.offline_store_creator}"
+        key = f"{test_suite_name}_{test_environment.test_repo_config.offline_store_creator}"
         _, _, data_source, _ = data_source_cache.get_or_create(
             key,
             lambda: (
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index e6bb9f441b..52d01bcbf7 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -22,7 +22,7 @@
 )
 
 
-@dataclass(frozen=True, repr=True)
+@dataclass(frozen=True, repr=True, unsafe_hash=True)
 class TestRepoConfig:
     """
     This class should hold all possible parameters that may need to be varied by individual tests.

From 6cc32b520ad837e1c89f78e8cca0f8f959733e7d Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 26 Aug 2021 23:48:18 -0700
Subject: [PATCH 29/51] Update testrepoconfig

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../feature_repos/test_repo_configuration.py          | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 52d01bcbf7..ea81bc587a 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -22,14 +22,14 @@
 )
 
 
-@dataclass(frozen=True, repr=True, unsafe_hash=True)
+@dataclass(repr=True)
 class TestRepoConfig:
     """
     This class should hold all possible parameters that may need to be varied by individual tests.
     """
 
     provider: str = "local"
-    online_store: Union[str, Dict] = "sqlite"
+    online_store: Union[str, Dict] = field(default="sqlite", hash=False)
 
     offline_store_creator: str = "tests.integration.feature_repos.universal.data_sources.file.FileDataSourceCreator"
 
@@ -37,6 +37,13 @@ class TestRepoConfig:
     infer_event_timestamp_col: bool = True
     infer_features: bool = False
 
+    def __post_init__(self):
+        self.online_store_type = (
+            self.online_store
+            if isinstance(self.online_store, str)
+            else self.online_store.get("type", "unknown")
+        )
+
 
 def ds_creator_path(cls: str):
     return f"tests.integration.feature_repos.universal.data_sources.{cls}"

From 68f299706ba11f5b9cf918472758554dd38e8ef0 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Thu, 26 Aug 2021 23:52:41 -0700
Subject: [PATCH 30/51] Update testrepoconfig

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../feature_repos/test_repo_configuration.py    | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index ea81bc587a..9ce7c91345 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -22,14 +22,14 @@
 )
 
 
-@dataclass(repr=True)
+@dataclass(frozen=True, repr=True)
 class TestRepoConfig:
     """
     This class should hold all possible parameters that may need to be varied by individual tests.
     """
 
     provider: str = "local"
-    online_store: Union[str, Dict] = field(default="sqlite", hash=False)
+    online_store: Union[str, Dict] = "sqlite"
 
     offline_store_creator: str = "tests.integration.feature_repos.universal.data_sources.file.FileDataSourceCreator"
 
@@ -37,13 +37,6 @@ class TestRepoConfig:
     infer_event_timestamp_col: bool = True
     infer_features: bool = False
 
-    def __post_init__(self):
-        self.online_store_type = (
-            self.online_store
-            if isinstance(self.online_store, str)
-            else self.online_store.get("type", "unknown")
-        )
-
 
 def ds_creator_path(cls: str):
     return f"tests.integration.feature_repos.universal.data_sources.{cls}"
@@ -339,7 +332,9 @@ def construct_universal_test_environment(
         try:
             if stop_at_step >= EnvironmentSetupSteps.CREATE_OBJECTS:
                 if data_source_cache is not None:
-                    fixtures = data_source_cache.get(test_repo_config, None)
+                    fixtures = data_source_cache.get(
+                        test_repo_config.offline_store_creator, None
+                    )
                     if fixtures:
                         environment = setup_entities(
                             environment, entities_override=fixtures[0]
@@ -365,7 +360,7 @@ def construct_universal_test_environment(
                                 "data_sources_override", None
                             ),
                         )
-                        data_source_cache[test_repo_config] = (
+                        data_source_cache[test_repo_config.offline_store_creator] = (
                             environment.entities,
                             environment.datasets,
                             environment.datasources,

From a73868d5ddc68c40731539e4c80053054d4edaf6 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Fri, 27 Aug 2021 00:01:14 -0700
Subject: [PATCH 31/51] Remove kwargs from construct_universal_test_environment

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../feature_repos/test_repo_configuration.py  | 33 ++++---------------
 1 file changed, 6 insertions(+), 27 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 9ce7c91345..fbee272436 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -310,7 +310,6 @@ def construct_universal_test_environment(
     test_repo_config: TestRepoConfig,
     stop_at_step=DEFAULT_STEP,
     data_source_cache=None,
-    **kwargs,
 ) -> Environment:
     """
     This method should take in the parameters from the test repo config and created a feature repo, apply it,
@@ -346,20 +345,9 @@ def construct_universal_test_environment(
                             environment, data_sources_override=fixtures[2]
                         )
                     else:
-                        environment = setup_entities(
-                            environment,
-                            entities_override=kwargs.get("entites_override", None),
-                        )
-                        environment = setup_datasets(
-                            environment,
-                            datasets_override=kwargs.get("datasets_overrides", None),
-                        )
-                        environment = setup_data_sources(
-                            environment,
-                            data_sources_override=kwargs.get(
-                                "data_sources_override", None
-                            ),
-                        )
+                        environment = setup_entities(environment)
+                        environment = setup_datasets(environment)
+                        environment = setup_data_sources(environment)
                         data_source_cache[test_repo_config.offline_store_creator] = (
                             environment.entities,
                             environment.datasets,
@@ -367,18 +355,9 @@ def construct_universal_test_environment(
                             environment.data_source_creator,
                         )
                 else:
-                    environment = setup_entities(
-                        environment,
-                        entities_override=kwargs.get("entites_override", None),
-                    )
-                    environment = setup_datasets(
-                        environment,
-                        datasets_override=kwargs.get("datasets_overrides", None),
-                    )
-                    environment = setup_data_sources(
-                        environment,
-                        data_sources_override=kwargs.get("data_sources_override", None),
-                    )
+                    environment = setup_entities(environment)
+                    environment = setup_datasets(environment)
+                    environment = setup_data_sources(environment)
 
                 environment = setup_feature_views(environment)
             if stop_at_step >= EnvironmentSetupSteps.APPLY_OBJECTS:

From ff9d49dfd2a4330e766955465d08d97ad31fa1d8 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Fri, 27 Aug 2021 10:42:08 -0700
Subject: [PATCH 32/51] Remove unneeded method

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../integration/feature_repos/test_repo_configuration.py      | 4 ++--
 .../feature_repos/universal/data_source_creator.py            | 4 ----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index fbee272436..641017ac76 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -278,7 +278,7 @@ def construct_test_environment(
         module_name, config_class_name, "DataSourceCreator"
     )(project)
 
-    offline_store = offline_creator.create_offline_store_config()
+    offline_store_config = offline_creator.create_offline_store_config()
     online_store = test_repo_config.online_store
 
     with tempfile.TemporaryDirectory() as repo_dir_name:
@@ -286,7 +286,7 @@ def construct_test_environment(
             registry=str(Path(repo_dir_name) / "registry.db"),
             project=project,
             provider=test_repo_config.provider,
-            offline_store=offline_store,
+            offline_store=offline_store_config,
             online_store=online_store,
             repo_path=repo_dir_name,
         )
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
index feb5ae5e98..4385adab85 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
@@ -27,7 +27,3 @@ def create_offline_store_config(self) -> FeastConfigBaseModel:
     @abstractmethod
     def teardown(self):
         ...
-
-    @abstractmethod
-    def get_prefixed_table_name(self, table_name: str) -> str:
-        ...

From b95c3ee389018a17e105e21a83999713e98616be Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Fri, 27 Aug 2021 12:26:46 -0700
Subject: [PATCH 33/51] Docs

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py                  | 15 +++++++++++++++
 .../integration/e2e/test_universal_e2e.py     |  2 +-
 .../feature_repos/test_repo_configuration.py  |  6 +++---
 .../universal/data_source_creator.py          | 19 ++++++++++++++++---
 .../universal/data_sources/bigquery.py        |  5 +----
 .../universal/data_sources/file.py            |  7 ++-----
 .../universal/data_sources/redshift.py        |  6 ++----
 7 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index 57a47c7f17..4cc8bcec1a 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -91,6 +91,21 @@ def simple_dataset_2() -> pd.DataFrame:
 
 
 class DataSourceCache(dict):
+    """
+    DataSourceCache is meant to cache datasources to be reused across multiple tests.
+
+    Staging data into a remote store is an expensive operation, so we expose a way through which we can have the
+    datasources created shared between tests.
+    The key for the cache is an arbitrary string, which should be constructed using a combination of the test name along
+    with the backing store.
+    The value for the cache is a 4-valued Tuple:
+        - the entites that are meant to be used with the data source created
+        - The dataframe that is persisted into the remote store.
+        - The data source object that represents the table/file; created from the dataframe mentioned above.
+        - The data_source_creator object that is used to upload the data frame and construct the data source object.
+          This object is needed so that we can invoke the teardown method at the end of all our tests.
+    """
+
     def get_or_create(self, key, c: Callable[[], Any]):
         if key in self:
             return self[key]
diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index 3362b45466..57d2d361ce 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -24,7 +24,7 @@
 )
 def test_e2e_consistency(config, data_source_cache: DataSourceCache):
     df = create_dataset()
-    test_suite_name = "test_e2e_consistency"
+    test_suite_name = test_e2e_consistency.__name__
     with construct_test_environment(test_suite_name, config) as test_environment:
         fs = test_environment.feature_store
         infer_features = test_environment.test_repo_config.infer_features
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 641017ac76..e5a40c0b0e 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -102,19 +102,19 @@ def construct_universal_data_sources(
 ) -> Dict[str, DataSource]:
     customer_ds = data_source_creator.create_data_source(
         datasets["customer"],
-        suffix="customer_profile",
+        destination="customer_profile",
         event_timestamp_column="event_timestamp",
         created_timestamp_column="created",
     )
     driver_ds = data_source_creator.create_data_source(
         datasets["driver"],
-        suffix="driver_hourly",
+        destination="driver_hourly",
         event_timestamp_column="event_timestamp",
         created_timestamp_column="created",
     )
     orders_ds = data_source_creator.create_data_source(
         datasets["orders"],
-        suffix="orders",
+        destination="orders",
         event_timestamp_column="event_timestamp",
         created_timestamp_column="created",
     )
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
index 4385adab85..d65a91bd2f 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Dict, Optional
+from typing import Dict
 
 import pandas as pd
 
@@ -12,12 +12,25 @@ class DataSourceCreator(ABC):
     def create_data_source(
         self,
         df: pd.DataFrame,
-        destination: Optional[str] = None,
-        suffix: Optional[str] = None,
+        destination: str,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
     ) -> DataSource:
+        """
+        Create a data source based on the dataframe. Implementing this method requires the underlying implementation to
+        persist the dataframe in offline store, using the destination string as a way to differentiate multiple
+        dataframes and data sources.
+
+        :param df: The dataframe to be used to create the data source.
+        :param destination: The destination that this data frame is meant for. This str is used by the implementing
+            classes to isolate the multiple dataframes from each other.
+        :param event_timestamp_column: Pass through for the underlying data source.
+        :param created_timestamp_column: Pass through for the underlying data source.
+        :param field_mapping: Pass through for the underlying data source.
+        :return: A Data source object, pointing to a table or file that is uploaded/persisted for the purpose of the
+            test.
+        """
         ...
 
     @abstractmethod
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
index 5c888ca113..0863d7bd25 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
@@ -51,16 +51,13 @@ def create_data_source(
         self,
         df: pd.DataFrame,
         destination: Optional[str] = None,
-        suffix: Optional[str] = None,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
         **kwargs,
     ) -> DataSource:
 
-        assert destination or suffix
-        if not destination:
-            destination = self.get_prefixed_table_name(suffix)
+        destination = self.get_prefixed_table_name(destination)
 
         self.get_dataset()
 
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
index 48630dd3f9..7d204ed7b1 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
@@ -26,16 +26,13 @@ def __init__(self, project_name: str):
     def create_data_source(
         self,
         df: pd.DataFrame,
-        destination: Optional[str] = None,
-        suffix: Optional[str] = None,
+        destination: str,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
     ) -> DataSource:
 
-        assert destination or suffix
-        if not destination:
-            destination = self.get_prefixed_table_name(suffix)
+        destination = self.get_prefixed_table_name(destination)
 
         f = tempfile.NamedTemporaryFile(
             prefix=f"{self.project_name}_{destination}", suffix=".parquet", delete=False
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
index 34d4cfd4a5..528e0261f8 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
@@ -34,16 +34,14 @@ def __init__(self, project_name: str):
     def create_data_source(
         self,
         df: pd.DataFrame,
-        destination: Optional[str] = None,
+        destination: str,
         suffix: Optional[str] = None,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
     ) -> DataSource:
 
-        assert destination or suffix
-        if not destination:
-            destination = self.get_prefixed_table_name(suffix)
+        destination = self.get_prefixed_table_name(destination)
 
         aws_utils.upload_df_to_redshift(
             self.client,

From 3e5aada09083822ede24d546b86448438f3b92cf Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Fri, 27 Aug 2021 13:04:34 -0700
Subject: [PATCH 34/51] Kill skipped tests

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../test_offline_online_store_consistency.py  |  15 -
 .../test_historical_retrieval.py              | 559 ------------------
 2 files changed, 574 deletions(-)

diff --git a/sdk/python/tests/integration/materialization/test_offline_online_store_consistency.py b/sdk/python/tests/integration/materialization/test_offline_online_store_consistency.py
index ecb4c82bd3..fbca6e9856 100644
--- a/sdk/python/tests/integration/materialization/test_offline_online_store_consistency.py
+++ b/sdk/python/tests/integration/materialization/test_offline_online_store_consistency.py
@@ -382,21 +382,6 @@ def run_offline_online_store_consistency_test(
     )
 
 
-@pytest.mark.skip(
-    reason="This is the most expensive test and is subsumed by the universal tests."
-)
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "bq_source_type", ["query", "table"],
-)
-@pytest.mark.parametrize("full_feature_names", [True, False])
-def test_bq_offline_online_store_consistency(
-    bq_source_type: str, full_feature_names: bool
-):
-    with prep_bq_fs_and_fv(bq_source_type) as (fs, fv):
-        run_offline_online_store_consistency_test(fs, fv, full_feature_names)
-
-
 @pytest.mark.parametrize("full_feature_names", [True, False])
 @pytest.mark.integration
 def test_redis_offline_online_store_consistency(full_feature_names: bool):
diff --git a/sdk/python/tests/integration/offline_store/test_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_historical_retrieval.py
index 26b6b335dc..63b720d3df 100644
--- a/sdk/python/tests/integration/offline_store/test_historical_retrieval.py
+++ b/sdk/python/tests/integration/offline_store/test_historical_retrieval.py
@@ -394,565 +394,6 @@ def test_historical_features_from_parquet_sources(
         store.teardown()
 
 
-@pytest.mark.skip(
-    reason="This is the most expensive test and is subsumed by the universal tests."
-)
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "provider_type", ["local", "gcp", "gcp_custom_offline_config"],
-)
-@pytest.mark.parametrize(
-    "infer_event_timestamp_col", [False, True],
-)
-@pytest.mark.parametrize(
-    "full_feature_names", [False, True],
-)
-def test_historical_features_from_bigquery_sources(
-    provider_type, infer_event_timestamp_col, capsys, full_feature_names
-):
-    start_date = datetime.now().replace(microsecond=0, second=0, minute=0)
-    (
-        customer_entities,
-        driver_entities,
-        end_date,
-        orders_df,
-        start_date,
-    ) = generate_entities(start_date, infer_event_timestamp_col)
-
-    bigquery_dataset = (
-        f"test_hist_retrieval_{int(time.time_ns())}_{random.randint(1000, 9999)}"
-    )
-
-    with BigQueryDataSet(bigquery_dataset), TemporaryDirectory() as temp_dir:
-        gcp_project = bigquery.Client().project
-
-        # Orders Query
-        table_id = f"{bigquery_dataset}.orders"
-        stage_orders_bigquery(orders_df, table_id)
-        entity_df_query = f"SELECT * FROM {gcp_project}.{table_id}"
-
-        # Driver Feature View
-        driver_df = driver_data.create_driver_hourly_stats_df(
-            driver_entities, start_date, end_date
-        )
-        driver_table_id = f"{gcp_project}.{bigquery_dataset}.driver_hourly"
-        stage_driver_hourly_stats_bigquery_source(driver_df, driver_table_id)
-        driver_source = BigQuerySource(
-            table_ref=driver_table_id,
-            event_timestamp_column="event_timestamp",
-            created_timestamp_column="created",
-        )
-        driver_fv = create_driver_hourly_stats_feature_view(driver_source)
-
-        # Customer Feature View
-        customer_df = driver_data.create_customer_daily_profile_df(
-            customer_entities, start_date, end_date
-        )
-        customer_table_id = f"{gcp_project}.{bigquery_dataset}.customer_profile"
-
-        stage_customer_daily_profile_bigquery_source(customer_df, customer_table_id)
-        customer_source = BigQuerySource(
-            table_ref=customer_table_id,
-            event_timestamp_column="event_timestamp",
-            created_timestamp_column="created",
-        )
-        customer_fv = create_customer_daily_profile_feature_view(customer_source)
-
-        driver = Entity(name="driver", join_key="driver_id", value_type=ValueType.INT64)
-        customer = Entity(name="customer_id", value_type=ValueType.INT64)
-
-        if provider_type == "local":
-            store = FeatureStore(
-                config=RepoConfig(
-                    registry=os.path.join(temp_dir, "registry.db"),
-                    project="default",
-                    provider="local",
-                    online_store=SqliteOnlineStoreConfig(
-                        path=os.path.join(temp_dir, "online_store.db"),
-                    ),
-                    offline_store=BigQueryOfflineStoreConfig(
-                        type="bigquery", dataset=bigquery_dataset
-                    ),
-                )
-            )
-        elif provider_type == "gcp":
-            store = FeatureStore(
-                config=RepoConfig(
-                    registry=os.path.join(temp_dir, "registry.db"),
-                    project="".join(
-                        random.choices(string.ascii_uppercase + string.digits, k=10)
-                    ),
-                    provider="gcp",
-                    offline_store=BigQueryOfflineStoreConfig(
-                        type="bigquery", dataset=bigquery_dataset
-                    ),
-                )
-            )
-        elif provider_type == "gcp_custom_offline_config":
-            store = FeatureStore(
-                config=RepoConfig(
-                    registry=os.path.join(temp_dir, "registry.db"),
-                    project="".join(
-                        random.choices(string.ascii_uppercase + string.digits, k=10)
-                    ),
-                    provider="gcp",
-                    offline_store=BigQueryOfflineStoreConfig(
-                        type="bigquery", dataset="foo"
-                    ),
-                )
-            )
-        else:
-            raise Exception("Invalid provider used as part of test configuration")
-
-        store.apply([driver, customer, driver_fv, customer_fv])
-
-        try:
-            event_timestamp = (
-                DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
-                if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL in orders_df.columns
-                else "e_ts"
-            )
-            expected_df = get_expected_training_df(
-                customer_df,
-                customer_fv,
-                driver_df,
-                driver_fv,
-                orders_df,
-                event_timestamp,
-                full_feature_names,
-            )
-
-            job_from_sql = store.get_historical_features(
-                entity_df=entity_df_query,
-                features=[
-                    "driver_stats:conv_rate",
-                    "driver_stats:avg_daily_trips",
-                    "customer_profile:current_balance",
-                    "customer_profile:avg_passenger_count",
-                    "customer_profile:lifetime_trip_count",
-                ],
-                full_feature_names=full_feature_names,
-            )
-
-            start_time = datetime.utcnow()
-            actual_df_from_sql_entities = job_from_sql.to_df()
-            end_time = datetime.utcnow()
-            with capsys.disabled():
-                print(
-                    str(
-                        f"\nTime to execute job_from_sql.to_df() = '{(end_time - start_time)}'"
-                    )
-                )
-
-            assert sorted(expected_df.columns) == sorted(
-                actual_df_from_sql_entities.columns
-            )
-            assert_frame_equal(
-                expected_df.sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                ).reset_index(drop=True),
-                actual_df_from_sql_entities[expected_df.columns]
-                .sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                )
-                .reset_index(drop=True),
-                check_dtype=False,
-            )
-
-            table_from_sql_entities = job_from_sql.to_arrow()
-            assert_frame_equal(
-                actual_df_from_sql_entities, table_from_sql_entities.to_pandas()
-            )
-
-            timestamp_column = (
-                "e_ts"
-                if infer_event_timestamp_col
-                else DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
-            )
-
-            entity_df_query_with_invalid_join_key = (
-                f"select order_id, driver_id, customer_id as customer, "
-                f"order_is_success, {timestamp_column}, FROM {gcp_project}.{table_id}"
-            )
-            # Rename the join key; this should now raise an error.
-            assertpy.assert_that(store.get_historical_features).raises(
-                errors.FeastEntityDFMissingColumnsError
-            ).when_called_with(
-                entity_df=entity_df_query_with_invalid_join_key,
-                features=[
-                    "driver_stats:conv_rate",
-                    "driver_stats:avg_daily_trips",
-                    "customer_profile:current_balance",
-                    "customer_profile:avg_passenger_count",
-                    "customer_profile:lifetime_trip_count",
-                ],
-            )
-
-            job_from_df = store.get_historical_features(
-                entity_df=orders_df,
-                features=[
-                    "driver_stats:conv_rate",
-                    "driver_stats:avg_daily_trips",
-                    "customer_profile:current_balance",
-                    "customer_profile:avg_passenger_count",
-                    "customer_profile:lifetime_trip_count",
-                ],
-                full_feature_names=full_feature_names,
-            )
-
-            # Rename the join key; this should now raise an error.
-            orders_df_with_invalid_join_key = orders_df.rename(
-                {"customer_id": "customer"}, axis="columns"
-            )
-            assertpy.assert_that(store.get_historical_features).raises(
-                errors.FeastEntityDFMissingColumnsError
-            ).when_called_with(
-                entity_df=orders_df_with_invalid_join_key,
-                features=[
-                    "driver_stats:conv_rate",
-                    "driver_stats:avg_daily_trips",
-                    "customer_profile:current_balance",
-                    "customer_profile:avg_passenger_count",
-                    "customer_profile:lifetime_trip_count",
-                ],
-            )
-
-            # Make sure that custom dataset name is being used from the offline_store config
-            if provider_type == "gcp_custom_offline_config":
-                assertpy.assert_that(job_from_df.query).contains("foo.feast_entity_df")
-            else:
-                assertpy.assert_that(job_from_df.query).contains(
-                    f"{bigquery_dataset}.feast_entity_df"
-                )
-
-            start_time = datetime.utcnow()
-            actual_df_from_df_entities = job_from_df.to_df()
-            end_time = datetime.utcnow()
-            with capsys.disabled():
-                print(
-                    str(
-                        f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n"
-                    )
-                )
-
-            assert sorted(expected_df.columns) == sorted(
-                actual_df_from_df_entities.columns
-            )
-            assert_frame_equal(
-                expected_df.sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                ).reset_index(drop=True),
-                actual_df_from_df_entities[expected_df.columns]
-                .sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                )
-                .reset_index(drop=True),
-                check_dtype=False,
-            )
-
-            table_from_df_entities = job_from_df.to_arrow()
-            assert_frame_equal(
-                actual_df_from_df_entities, table_from_df_entities.to_pandas()
-            )
-        finally:
-            store.teardown()
-
-
-@pytest.mark.skip(
-    reason="This is the most expensive test and is subsumed by the universal tests."
-)
-@pytest.mark.integration
-@pytest.mark.parametrize(
-    "provider_type", ["local", "aws"],
-)
-@pytest.mark.parametrize(
-    "infer_event_timestamp_col", [False, True],
-)
-@pytest.mark.parametrize(
-    "full_feature_names", [False, True],
-)
-def test_historical_features_from_redshift_sources(
-    provider_type, infer_event_timestamp_col, capsys, full_feature_names
-):
-    client = aws_utils.get_redshift_data_client("us-west-2")
-    s3 = aws_utils.get_s3_resource("us-west-2")
-
-    offline_store = RedshiftOfflineStoreConfig(
-        cluster_id="feast-integration-tests",
-        region="us-west-2",
-        user="admin",
-        database="feast",
-        s3_staging_location="s3://feast-integration-tests/redshift/tests/ingestion",
-        iam_role="arn:aws:iam::402087665549:role/redshift_s3_access_role",
-    )
-
-    start_date = datetime.now().replace(microsecond=0, second=0, minute=0)
-    (
-        customer_entities,
-        driver_entities,
-        end_date,
-        orders_df,
-        start_date,
-    ) = generate_entities(start_date, infer_event_timestamp_col)
-
-    redshift_table_prefix = (
-        f"test_hist_retrieval_{int(time.time_ns())}_{random.randint(1000, 9999)}"
-    )
-
-    # Stage orders_df to Redshift
-    table_name = f"{redshift_table_prefix}_orders"
-    entity_df_query = f"SELECT * FROM {table_name}"
-    orders_context = aws_utils.temporarily_upload_df_to_redshift(
-        client,
-        offline_store.cluster_id,
-        offline_store.database,
-        offline_store.user,
-        s3,
-        f"{offline_store.s3_staging_location}/copy/{table_name}.parquet",
-        offline_store.iam_role,
-        table_name,
-        orders_df,
-    )
-
-    # Stage driver_df to Redshift
-    driver_df = driver_data.create_driver_hourly_stats_df(
-        driver_entities, start_date, end_date
-    )
-    driver_table_name = f"{redshift_table_prefix}_driver_hourly"
-    driver_context = aws_utils.temporarily_upload_df_to_redshift(
-        client,
-        offline_store.cluster_id,
-        offline_store.database,
-        offline_store.user,
-        s3,
-        f"{offline_store.s3_staging_location}/copy/{driver_table_name}.parquet",
-        offline_store.iam_role,
-        driver_table_name,
-        driver_df,
-    )
-
-    # Stage customer_df to Redshift
-    customer_df = driver_data.create_customer_daily_profile_df(
-        customer_entities, start_date, end_date
-    )
-    customer_table_name = f"{redshift_table_prefix}_customer_profile"
-    customer_context = aws_utils.temporarily_upload_df_to_redshift(
-        client,
-        offline_store.cluster_id,
-        offline_store.database,
-        offline_store.user,
-        s3,
-        f"{offline_store.s3_staging_location}/copy/{customer_table_name}.parquet",
-        offline_store.iam_role,
-        customer_table_name,
-        customer_df,
-    )
-
-    with orders_context, driver_context, customer_context, TemporaryDirectory() as temp_dir:
-        driver_source = RedshiftSource(
-            table=driver_table_name,
-            event_timestamp_column="event_timestamp",
-            created_timestamp_column="created",
-        )
-        driver_fv = create_driver_hourly_stats_feature_view(driver_source)
-
-        customer_source = RedshiftSource(
-            table=customer_table_name,
-            event_timestamp_column="event_timestamp",
-            created_timestamp_column="created",
-        )
-        customer_fv = create_customer_daily_profile_feature_view(customer_source)
-
-        driver = Entity(name="driver", join_key="driver_id", value_type=ValueType.INT64)
-        customer = Entity(name="customer_id", value_type=ValueType.INT64)
-
-        if provider_type == "local":
-            store = FeatureStore(
-                config=RepoConfig(
-                    registry=os.path.join(temp_dir, "registry.db"),
-                    project="default",
-                    provider="local",
-                    online_store=SqliteOnlineStoreConfig(
-                        path=os.path.join(temp_dir, "online_store.db"),
-                    ),
-                    offline_store=offline_store,
-                )
-            )
-        elif provider_type == "aws":
-            store = FeatureStore(
-                config=RepoConfig(
-                    registry=os.path.join(temp_dir, "registry.db"),
-                    project="".join(
-                        random.choices(string.ascii_uppercase + string.digits, k=10)
-                    ),
-                    provider="aws",
-                    online_store=DynamoDBOnlineStoreConfig(region="us-west-2"),
-                    offline_store=offline_store,
-                )
-            )
-        else:
-            raise Exception("Invalid provider used as part of test configuration")
-
-        store.apply([driver, customer, driver_fv, customer_fv])
-
-        try:
-            event_timestamp = (
-                DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
-                if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL in orders_df.columns
-                else "e_ts"
-            )
-            expected_df = get_expected_training_df(
-                customer_df,
-                customer_fv,
-                driver_df,
-                driver_fv,
-                orders_df,
-                event_timestamp,
-                full_feature_names,
-            )
-
-            job_from_sql = store.get_historical_features(
-                entity_df=entity_df_query,
-                features=[
-                    "driver_stats:conv_rate",
-                    "driver_stats:avg_daily_trips",
-                    "customer_profile:current_balance",
-                    "customer_profile:avg_passenger_count",
-                    "customer_profile:lifetime_trip_count",
-                ],
-                full_feature_names=full_feature_names,
-            )
-
-            start_time = datetime.utcnow()
-            actual_df_from_sql_entities = job_from_sql.to_df()
-            end_time = datetime.utcnow()
-            with capsys.disabled():
-                print(
-                    str(
-                        f"\nTime to execute job_from_sql.to_df() = '{(end_time - start_time)}'"
-                    )
-                )
-
-            assert sorted(expected_df.columns) == sorted(
-                actual_df_from_sql_entities.columns
-            )
-            assert_frame_equal(
-                expected_df.sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                ).reset_index(drop=True),
-                actual_df_from_sql_entities[expected_df.columns]
-                .sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                )
-                .reset_index(drop=True),
-                check_dtype=False,
-            )
-
-            table_from_sql_entities = job_from_sql.to_arrow()
-            assert_frame_equal(
-                actual_df_from_sql_entities.sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                ).reset_index(drop=True),
-                table_from_sql_entities.to_pandas()
-                .sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                )
-                .reset_index(drop=True),
-            )
-
-            timestamp_column = (
-                "e_ts"
-                if infer_event_timestamp_col
-                else DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL
-            )
-
-            entity_df_query_with_invalid_join_key = (
-                f"select order_id, driver_id, customer_id as customer, "
-                f"order_is_success, {timestamp_column} FROM {table_name}"
-            )
-            # Rename the join key; this should now raise an error.
-            assertpy.assert_that(
-                store.get_historical_features(
-                    entity_df=entity_df_query_with_invalid_join_key,
-                    features=[
-                        "driver_stats:conv_rate",
-                        "driver_stats:avg_daily_trips",
-                        "customer_profile:current_balance",
-                        "customer_profile:avg_passenger_count",
-                        "customer_profile:lifetime_trip_count",
-                    ],
-                ).to_df
-            ).raises(errors.FeastEntityDFMissingColumnsError).when_called_with()
-
-            job_from_df = store.get_historical_features(
-                entity_df=orders_df,
-                features=[
-                    "driver_stats:conv_rate",
-                    "driver_stats:avg_daily_trips",
-                    "customer_profile:current_balance",
-                    "customer_profile:avg_passenger_count",
-                    "customer_profile:lifetime_trip_count",
-                ],
-                full_feature_names=full_feature_names,
-            )
-
-            # Rename the join key; this should now raise an error.
-            orders_df_with_invalid_join_key = orders_df.rename(
-                {"customer_id": "customer"}, axis="columns"
-            )
-            assertpy.assert_that(
-                store.get_historical_features(
-                    entity_df=orders_df_with_invalid_join_key,
-                    features=[
-                        "driver_stats:conv_rate",
-                        "driver_stats:avg_daily_trips",
-                        "customer_profile:current_balance",
-                        "customer_profile:avg_passenger_count",
-                        "customer_profile:lifetime_trip_count",
-                    ],
-                ).to_df
-            ).raises(errors.FeastEntityDFMissingColumnsError).when_called_with()
-
-            start_time = datetime.utcnow()
-            actual_df_from_df_entities = job_from_df.to_df()
-            end_time = datetime.utcnow()
-            with capsys.disabled():
-                print(
-                    str(
-                        f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n"
-                    )
-                )
-
-            assert sorted(expected_df.columns) == sorted(
-                actual_df_from_df_entities.columns
-            )
-            assert_frame_equal(
-                expected_df.sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                ).reset_index(drop=True),
-                actual_df_from_df_entities[expected_df.columns]
-                .sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                )
-                .reset_index(drop=True),
-                check_dtype=False,
-            )
-
-            table_from_df_entities = job_from_df.to_arrow()
-            assert_frame_equal(
-                actual_df_from_df_entities.sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                ).reset_index(drop=True),
-                table_from_df_entities.to_pandas()
-                .sort_values(
-                    by=[event_timestamp, "order_id", "driver_id", "customer_id"]
-                )
-                .reset_index(drop=True),
-            )
-        finally:
-            store.teardown()
-
-
 def test_feature_name_collision_on_historical_retrieval():
 
     # _validate_feature_refs is the function that checks for colliding feature names

From b6abcaa13b05f17fd91397c25477ae5eb7b7d9dd Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Fri, 27 Aug 2021 13:09:39 -0700
Subject: [PATCH 35/51] reorder

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../offline_store/test_historical_retrieval.py     | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/sdk/python/tests/integration/offline_store/test_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_historical_retrieval.py
index 63b720d3df..44f9e595e3 100644
--- a/sdk/python/tests/integration/offline_store/test_historical_retrieval.py
+++ b/sdk/python/tests/integration/offline_store/test_historical_retrieval.py
@@ -5,7 +5,6 @@
 from datetime import datetime, timedelta
 from tempfile import TemporaryDirectory
 
-import assertpy
 import numpy as np
 import pandas as pd
 import pytest
@@ -14,15 +13,7 @@
 from pytz import utc
 
 import feast.driver_test_data as driver_data
-from feast import (
-    BigQuerySource,
-    FeatureService,
-    FileSource,
-    RedshiftSource,
-    RepoConfig,
-    errors,
-    utils,
-)
+from feast import BigQuerySource, FeatureService, FileSource, RepoConfig, utils
 from feast.entity import Entity
 from feast.errors import FeatureNameCollisionError
 from feast.feature import Feature
@@ -32,10 +23,7 @@
 from feast.infra.offline_stores.offline_utils import (
     DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL,
 )
-from feast.infra.offline_stores.redshift import RedshiftOfflineStoreConfig
-from feast.infra.online_stores.dynamodb import DynamoDBOnlineStoreConfig
 from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig
-from feast.infra.utils import aws_utils
 from feast.value_type import ValueType
 
 np.random.seed(0)

From 95806545ed49dbbb90765191c04a2e801be1b1ff Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Fri, 27 Aug 2021 13:33:01 -0700
Subject: [PATCH 36/51] add todo

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/feast/type_map.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py
index 3ea6cda968..c65ec6e14c 100644
--- a/sdk/python/feast/type_map.py
+++ b/sdk/python/feast/type_map.py
@@ -164,6 +164,7 @@ def _type_err(item, dtype):
     raise ValueError(f'Value "{item}" is of type {type(item)} not of type {dtype}')
 
 
+# TODO(achals): Simplify this method and remove the noqa.
 def _python_value_to_proto_value(feast_value_type, value) -> ProtoValue:  # noqa: C901
     """
     Converts a Python (native, pandas) value to a Feast Proto Value based

From 171a1efbe15c650cbc68919f45a2fc9683d72766 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Fri, 27 Aug 2021 14:10:15 -0700
Subject: [PATCH 37/51] Split universal vs non data_source_cache

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py                          | 11 ++++++++++-
 .../feature_repos/test_repo_configuration.py          |  8 ++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index 4cc8bcec1a..fce15e987d 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -115,9 +115,18 @@ def get_or_create(self, key, c: Callable[[], Any]):
             return v
 
 
-@pytest.fixture(scope="session", autouse=True)
+@pytest.fixture(scope="function", autouse=True)
 def data_source_cache():
     dsc = DataSourceCache()
     yield dsc
     for _, v in dsc.items():
         v[3].teardown()
+
+
+@pytest.fixture(scope="session", autouse=True)
+def universal_data_source_cache():
+    dsc = DataSourceCache()
+    yield dsc
+    for _, v in dsc.items():
+        v[3].teardown()
+
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index e5a40c0b0e..4679763d1d 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -394,12 +394,12 @@ def parametrize_offline_retrieval_test(offline_retrieval_test):
 
     @pytest.mark.integration
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
-    def inner_test(config, data_source_cache):
+    def inner_test(config, universal_data_source_cache):
         with construct_universal_test_environment(
             offline_retrieval_test.__name__,
             config,
             stop_at_step=EnvironmentSetupSteps.APPLY_OBJECTS,
-            data_source_cache=data_source_cache,
+            data_source_cache=universal_data_source_cache,
         ) as environment:
             offline_retrieval_test(environment)
 
@@ -422,12 +422,12 @@ def parametrize_online_test(online_test):
 
     @pytest.mark.integration
     @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
-    def inner_test(config, data_source_cache):
+    def inner_test(config, universal_data_source_cache):
         with construct_universal_test_environment(
             online_test.__name__,
             config,
             stop_at_step=EnvironmentSetupSteps.MATERIALIZE,
-            data_source_cache=data_source_cache,
+            data_source_cache=universal_data_source_cache,
         ) as environment:
             online_test(environment)
 

From de9d8aae39920f0085e1de4bdccdaa1df000dc4f Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Fri, 27 Aug 2021 14:13:31 -0700
Subject: [PATCH 38/51] make format

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index fce15e987d..3647124864 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -129,4 +129,3 @@ def universal_data_source_cache():
     yield dsc
     for _, v in dsc.items():
         v[3].teardown()
-

From 6454775341efcf19d535c41aec3033466e84099b Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Tue, 31 Aug 2021 16:42:44 -0700
Subject: [PATCH 39/51] WIP fixtures

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py                  | 26 +++++++++++++++
 .../integration/e2e/test_universal_e2e.py     |  5 +--
 .../feature_repos/test_repo_configuration.py  |  3 +-
 .../online_store/test_universal_online.py     | 32 +++++++++++--------
 4 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index 3647124864..414cad492e 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -18,6 +18,8 @@
 
 import pandas as pd
 import pytest
+from tests.integration.feature_repos.test_repo_configuration import FULL_REPO_CONFIGS, construct_test_environment, \
+    construct_universal_entities, construct_universal_datasets, construct_universal_data_sources
 
 
 def pytest_configure(config):
@@ -129,3 +131,27 @@ def universal_data_source_cache():
     yield dsc
     for _, v in dsc.items():
         v[3].teardown()
+
+
+@pytest.fixture(params=FULL_REPO_CONFIGS, scope="session")
+def environment(request):
+    with construct_test_environment(request.param) as e:
+        yield e
+
+
+@pytest.fixture(scope="session")
+def universal_data_sources(environment):
+
+    with environment as environment:
+
+        entities = construct_universal_entities()
+        datasets = construct_universal_datasets(
+            entities, environment.start_date, environment.end_date
+        )
+        datasources = construct_universal_data_sources(
+            datasets, environment.data_source_creator
+        )
+
+        yield (entities, datasets, datasources)
+
+        environment.data_source_creator.teardown()
diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index 57d2d361ce..c2b1a77d32 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -22,10 +22,11 @@
 @pytest.mark.parametrize(
     "config", vary_infer_feature(FULL_REPO_CONFIGS), ids=lambda v: str(v)
 )
-def test_e2e_consistency(config, data_source_cache: DataSourceCache):
+def test_e2e_consistency(config, data_source_cache: DataSourceCache, environment,
+                         universal_data_sources):
     df = create_dataset()
     test_suite_name = test_e2e_consistency.__name__
-    with construct_test_environment(test_suite_name, config) as test_environment:
+    with construct_test_environment(config, test_suite_name) as test_environment:
         fs = test_environment.feature_store
         infer_features = test_environment.test_repo_config.infer_features
         key = f"{test_suite_name}_{test_environment.test_repo_config.offline_store_creator}"
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 4679763d1d..4975285556 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -266,7 +266,8 @@ class EnvironmentSetupSteps(IntEnum):
 
 @contextmanager
 def construct_test_environment(
-    test_suite_name: str, test_repo_config: TestRepoConfig,
+    test_repo_config: TestRepoConfig,
+    test_suite_name: str = "integration_test"
 ) -> Environment:
     project = f"{test_suite_name}_{str(uuid.uuid4()).replace('-', '')[:8]}"
 
diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py
index 46b84130b8..b2a88a4f79 100644
--- a/sdk/python/tests/integration/online_store/test_universal_online.py
+++ b/sdk/python/tests/integration/online_store/test_universal_online.py
@@ -2,26 +2,32 @@
 import unittest
 
 import pandas as pd
+import pytest
 
-from tests.integration.feature_repos.test_repo_configuration import (
-    Environment,
-    parametrize_online_test,
-)
+from tests.integration.feature_repos.test_repo_configuration import construct_universal_feature_views
+from tests.integration.feature_repos.universal.entities import driver, customer
 
 
-@parametrize_online_test
-def test_online_retrieval(environment: Environment):
+@pytest.mark.integration
+@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
+def test_online_retrieval(environment, universal_data_sources, full_feature_names):
     fs = environment.feature_store
-    full_feature_names = environment.test_repo_config.full_feature_names
+    entities, datasets, data_sources = universal_data_sources
+    feature_views = construct_universal_feature_views(data_sources)
 
-    sample_drivers = random.sample(environment.entities["driver"], 10)
-    drivers_df = environment.datasets["driver"][
-        environment.datasets["driver"]["driver_id"].isin(sample_drivers)
+    print(f"Created universal feature views {feature_views}")
+
+    fs.apply([feature_views.values()] + [driver(), customer()])
+    fs.materialize(environment.start_date, environment.end_date)
+
+    sample_drivers = random.sample(entities["driver"], 10)
+    drivers_df = datasets["driver"][
+        datasets["driver"]["driver_id"].isin(sample_drivers)
     ]
 
-    sample_customers = random.sample(environment.entities["customer"], 10)
-    customers_df = environment.datasets["customer"][
-        environment.datasets["customer"]["customer_id"].isin(sample_customers)
+    sample_customers = random.sample(entities["customer"], 10)
+    customers_df = datasets["customer"][
+        datasets["customer"]["customer_id"].isin(sample_customers)
     ]
 
     entity_rows = [

From 3fbb2e4f4bdd7a4e083d7608b997faf65da09aa7 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Tue, 31 Aug 2021 17:17:46 -0700
Subject: [PATCH 40/51] WIP Trying fixtures more effectively

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py                  |  73 ++-----
 .../integration/e2e/test_universal_e2e.py     |   5 +-
 .../feature_repos/test_repo_configuration.py  | 202 +-----------------
 .../feature_repos/universal/feature_views.py  |   2 +-
 .../test_universal_historical_retrieval.py    |  22 +-
 .../online_store/test_universal_online.py     |  14 +-
 6 files changed, 45 insertions(+), 273 deletions(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index 414cad492e..8371ba202f 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -14,12 +14,17 @@
 import multiprocessing
 from datetime import datetime, timedelta
 from sys import platform
-from typing import Any, Callable
 
 import pandas as pd
 import pytest
-from tests.integration.feature_repos.test_repo_configuration import FULL_REPO_CONFIGS, construct_test_environment, \
-    construct_universal_entities, construct_universal_datasets, construct_universal_data_sources
+
+from tests.integration.feature_repos.test_repo_configuration import (
+    FULL_REPO_CONFIGS,
+    construct_test_environment,
+    construct_universal_data_sources,
+    construct_universal_datasets,
+    construct_universal_entities,
+)
 
 
 def pytest_configure(config):
@@ -92,47 +97,6 @@ def simple_dataset_2() -> pd.DataFrame:
     return pd.DataFrame.from_dict(data)
 
 
-class DataSourceCache(dict):
-    """
-    DataSourceCache is meant to cache datasources to be reused across multiple tests.
-
-    Staging data into a remote store is an expensive operation, so we expose a way through which we can have the
-    datasources created shared between tests.
-    The key for the cache is an arbitrary string, which should be constructed using a combination of the test name along
-    with the backing store.
-    The value for the cache is a 4-valued Tuple:
-        - the entites that are meant to be used with the data source created
-        - The dataframe that is persisted into the remote store.
-        - The data source object that represents the table/file; created from the dataframe mentioned above.
-        - The data_source_creator object that is used to upload the data frame and construct the data source object.
-          This object is needed so that we can invoke the teardown method at the end of all our tests.
-    """
-
-    def get_or_create(self, key, c: Callable[[], Any]):
-        if key in self:
-            return self[key]
-        else:
-            v = c()
-            self[key] = v
-            return v
-
-
-@pytest.fixture(scope="function", autouse=True)
-def data_source_cache():
-    dsc = DataSourceCache()
-    yield dsc
-    for _, v in dsc.items():
-        v[3].teardown()
-
-
-@pytest.fixture(scope="session", autouse=True)
-def universal_data_source_cache():
-    dsc = DataSourceCache()
-    yield dsc
-    for _, v in dsc.items():
-        v[3].teardown()
-
-
 @pytest.fixture(params=FULL_REPO_CONFIGS, scope="session")
 def environment(request):
     with construct_test_environment(request.param) as e:
@@ -141,17 +105,14 @@ def environment(request):
 
 @pytest.fixture(scope="session")
 def universal_data_sources(environment):
+    entities = construct_universal_entities()
+    datasets = construct_universal_datasets(
+        entities, environment.start_date, environment.end_date
+    )
+    datasources = construct_universal_data_sources(
+        datasets, environment.data_source_creator
+    )
 
-    with environment as environment:
-
-        entities = construct_universal_entities()
-        datasets = construct_universal_datasets(
-            entities, environment.start_date, environment.end_date
-        )
-        datasources = construct_universal_data_sources(
-            datasets, environment.data_source_creator
-        )
-
-        yield (entities, datasets, datasources)
+    yield (entities, datasets, datasources)
 
-        environment.data_source_creator.teardown()
+    environment.data_source_creator.teardown()
diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index c2b1a77d32..7f462db722 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -22,8 +22,9 @@
 @pytest.mark.parametrize(
     "config", vary_infer_feature(FULL_REPO_CONFIGS), ids=lambda v: str(v)
 )
-def test_e2e_consistency(config, data_source_cache: DataSourceCache, environment,
-                         universal_data_sources):
+def test_e2e_consistency(
+    config, data_source_cache: DataSourceCache, environment, universal_data_sources
+):
     df = create_dataset()
     test_suite_name = test_e2e_consistency.__name__
     with construct_test_environment(config, test_suite_name) as test_environment:
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
index 4975285556..17aecd1909 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
@@ -3,19 +3,16 @@
 from contextlib import contextmanager
 from dataclasses import dataclass, field, replace
 from datetime import datetime, timedelta
-from enum import IntEnum
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 
 import pandas as pd
-import pytest
 
 from feast import FeatureStore, FeatureView, RepoConfig, driver_test_data, importer
 from feast.data_source import DataSource
 from tests.integration.feature_repos.universal.data_source_creator import (
     DataSourceCreator,
 )
-from tests.integration.feature_repos.universal.entities import customer, driver
 from tests.integration.feature_repos.universal.feature_views import (
     create_customer_daily_profile_feature_view,
     create_driver_hourly_stats_feature_view,
@@ -122,7 +119,7 @@ def construct_universal_data_sources(
 
 
 def construct_universal_feature_views(
-    data_sources: Dict[str, DataSource]
+    data_sources: Dict[str, DataSource],
 ) -> Dict[str, FeatureView]:
     return {
         "customer": create_customer_daily_profile_feature_view(
@@ -132,55 +129,6 @@ def construct_universal_feature_views(
     }
 
 
-def setup_entities(
-    environment: "Environment", entities_override: Optional[Dict[str, List[Any]]] = None
-) -> "Environment":
-    environment.entities = (
-        entities_override if entities_override else construct_universal_entities()
-    )
-    return environment
-
-
-def setup_datasets(
-    environment: "Environment",
-    datasets_override: Optional[Dict[str, pd.DataFrame]] = None,
-) -> "Environment":
-    environment.datasets = (
-        datasets_override
-        if datasets_override
-        else construct_universal_datasets(
-            environment.entities, environment.start_date, environment.end_date
-        )
-    )
-    return environment
-
-
-def setup_data_sources(
-    environment: "Environment",
-    data_sources_override: Optional[Dict[str, DataSource]] = None,
-) -> "Environment":
-    environment.datasources = (
-        data_sources_override
-        if data_sources_override
-        else construct_universal_data_sources(
-            environment.datasets, environment.data_source_creator
-        )
-    )
-    return environment
-
-
-def setup_feature_views(
-    environment: "Environment",
-    feature_views_override: Optional[Dict[str, FeatureView]] = None,
-) -> "Environment":
-    environment.feature_views = (
-        feature_views_override
-        if feature_views_override
-        else construct_universal_feature_views(environment.datasources)
-    )
-    return environment
-
-
 @dataclass
 class Environment:
     name: str
@@ -188,11 +136,6 @@ class Environment:
     feature_store: FeatureStore
     data_source_creator: DataSourceCreator
 
-    entities: Dict[str, List[Any]] = field(default_factory=dict)
-    datasets: Dict[str, pd.DataFrame] = field(default_factory=dict)
-    datasources: Dict[str, DataSource] = field(default_factory=dict)
-    feature_views: Dict[str, FeatureView] = field(default_factory=list)
-
     end_date: datetime = field(
         default=datetime.now().replace(microsecond=0, second=0, minute=0)
     )
@@ -254,20 +197,9 @@ def vary_providers_for_offline_stores(
     return new_configs
 
 
-class EnvironmentSetupSteps(IntEnum):
-    INIT = 0
-    CREATE_OBJECTS = 1
-    APPLY_OBJECTS = 2
-    MATERIALIZE = 3
-
-
-DEFAULT_STEP = EnvironmentSetupSteps.INIT
-
-
 @contextmanager
 def construct_test_environment(
-    test_repo_config: TestRepoConfig,
-    test_suite_name: str = "integration_test"
+    test_repo_config: TestRepoConfig, test_suite_name: str = "integration_test"
 ) -> Environment:
     project = f"{test_suite_name}_{str(uuid.uuid4()).replace('-', '')[:8]}"
 
@@ -303,133 +235,3 @@ def construct_test_environment(
             yield environment
         finally:
             fs.teardown()
-
-
-@contextmanager
-def construct_universal_test_environment(
-    test_suite_name: str,
-    test_repo_config: TestRepoConfig,
-    stop_at_step=DEFAULT_STEP,
-    data_source_cache=None,
-) -> Environment:
-    """
-    This method should take in the parameters from the test repo config and created a feature repo, apply it,
-    and return the constructed feature store object to callers.
-
-    This feature store object can be interacted for the purposes of tests.
-    The user is *not* expected to perform any clean up actions.
-
-    :param test_suite_name: A name for the test suite.
-    :param test_repo_config: configuration
-    :param stop_at_step: The step which should be the last one executed when setting up the test environment.
-    :param data_source_cache:
-    :return: A feature store built using the supplied configuration.
-    """
-    with construct_test_environment(test_suite_name, test_repo_config) as environment:
-        fs = environment.feature_store
-        fvs = []
-        entities = []
-        try:
-            if stop_at_step >= EnvironmentSetupSteps.CREATE_OBJECTS:
-                if data_source_cache is not None:
-                    fixtures = data_source_cache.get(
-                        test_repo_config.offline_store_creator, None
-                    )
-                    if fixtures:
-                        environment = setup_entities(
-                            environment, entities_override=fixtures[0]
-                        )
-                        environment = setup_datasets(
-                            environment, datasets_override=fixtures[1]
-                        )
-                        environment = setup_data_sources(
-                            environment, data_sources_override=fixtures[2]
-                        )
-                    else:
-                        environment = setup_entities(environment)
-                        environment = setup_datasets(environment)
-                        environment = setup_data_sources(environment)
-                        data_source_cache[test_repo_config.offline_store_creator] = (
-                            environment.entities,
-                            environment.datasets,
-                            environment.datasources,
-                            environment.data_source_creator,
-                        )
-                else:
-                    environment = setup_entities(environment)
-                    environment = setup_datasets(environment)
-                    environment = setup_data_sources(environment)
-
-                environment = setup_feature_views(environment)
-            if stop_at_step >= EnvironmentSetupSteps.APPLY_OBJECTS:
-                entities.extend([driver(), customer()])
-                fvs.extend(environment.feature_views.values())
-                fs.apply(fvs + entities)
-            if stop_at_step >= EnvironmentSetupSteps.MATERIALIZE:
-                fs.materialize(environment.start_date, environment.end_date)
-
-            yield environment
-        finally:
-            if (
-                data_source_cache is None
-                and stop_at_step >= EnvironmentSetupSteps.CREATE_OBJECTS
-            ):
-                environment.data_source_creator.teardown()
-
-
-def parametrize_offline_retrieval_test(offline_retrieval_test):
-    """
-    This decorator should be used by tests that rely on the offline store. These tests are expected to be parameterized,
-    and receive an Environment object that contains a reference to a Feature Store with pre-applied
-    entities and feature views.
-
-    The decorator also ensures that sample data needed for the test is available in the relevant offline store.
-
-    Decorated tests should interact with the offline store, via the FeatureStore.get_historical_features method. They
-    may perform more operations as needed.
-
-    The decorator takes care of tearing down the feature store, as well as the sample data.
-    """
-
-    configs = vary_full_feature_names(FULL_REPO_CONFIGS)
-
-    @pytest.mark.integration
-    @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
-    def inner_test(config, universal_data_source_cache):
-        with construct_universal_test_environment(
-            offline_retrieval_test.__name__,
-            config,
-            stop_at_step=EnvironmentSetupSteps.APPLY_OBJECTS,
-            data_source_cache=universal_data_source_cache,
-        ) as environment:
-            offline_retrieval_test(environment)
-
-    return inner_test
-
-
-def parametrize_online_test(online_test):
-    """
-    This decorator should be used by tests that rely on the offline store. These tests are expected to be parameterized,
-    and receive an Environment object that contains a reference to a Feature Store with pre-applied
-    entities and feature views.
-
-    The decorator also ensures that sample data needed for the test is available in the relevant offline store. This
-    data is also materialized into the online store.
-
-    The decorator takes care of tearing down the feature store, as well as the sample data.
-    """
-
-    configs = vary_full_feature_names(FULL_REPO_CONFIGS)
-
-    @pytest.mark.integration
-    @pytest.mark.parametrize("config", configs, ids=lambda v: str(v))
-    def inner_test(config, universal_data_source_cache):
-        with construct_universal_test_environment(
-            online_test.__name__,
-            config,
-            stop_at_step=EnvironmentSetupSteps.MATERIALIZE,
-            data_source_cache=universal_data_source_cache,
-        ) as environment:
-            online_test(environment)
-
-    return inner_test
diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py
index b57e2988ab..e8bc2e0911 100644
--- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py
+++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py
@@ -16,7 +16,7 @@ def driver_feature_view(
     )
 
 
-def create_driver_hourly_stats_feature_view(source, infer_features: bool = False):
+def create_driver_hourly_stats_feature_view(source, infer_features: bool = True):
     driver_stats_feature_view = FeatureView(
         name="driver_stats",
         entities=["driver"],
diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
index b00818a078..c23d3b3eea 100644
--- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
+++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 from pandas.testing import assert_frame_equal
 from pytz import utc
 
@@ -12,8 +13,7 @@
     DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL,
 )
 from tests.integration.feature_repos.test_repo_configuration import (
-    Environment,
-    parametrize_offline_retrieval_test,
+    construct_universal_feature_views,
     table_name_from_data_source,
 )
 
@@ -136,18 +136,22 @@ def get_expected_training_df(
     return expected_df
 
 
-@parametrize_offline_retrieval_test
-def test_historical_features(environment: Environment):
+@pytest.mark.integration
+@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
+def test_historical_features(environment, universal_data_sources, full_feature_names):
     store = environment.feature_store
 
+    (entities, datasets, data_sources) = universal_data_sources
+    feature_views = construct_universal_feature_views(data_sources)
+
     customer_df, driver_df, orders_df = (
-        environment.datasets["customer"],
-        environment.datasets["driver"],
-        environment.datasets["orders"],
+        datasets["customer"],
+        datasets["driver"],
+        datasets["orders"],
     )
     customer_fv, driver_fv = (
-        environment.feature_views["customer"],
-        environment.feature_views["driver"],
+        feature_views["customer"],
+        feature_views["driver"],
     )
     full_feature_names = environment.test_repo_config.full_feature_names
 
diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py
index b2a88a4f79..3aca0d73d3 100644
--- a/sdk/python/tests/integration/online_store/test_universal_online.py
+++ b/sdk/python/tests/integration/online_store/test_universal_online.py
@@ -4,20 +4,24 @@
 import pandas as pd
 import pytest
 
-from tests.integration.feature_repos.test_repo_configuration import construct_universal_feature_views
-from tests.integration.feature_repos.universal.entities import driver, customer
+from tests.integration.feature_repos.test_repo_configuration import (
+    construct_universal_feature_views,
+)
+from tests.integration.feature_repos.universal.entities import customer, driver
 
 
 @pytest.mark.integration
 @pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
 def test_online_retrieval(environment, universal_data_sources, full_feature_names):
+
     fs = environment.feature_store
     entities, datasets, data_sources = universal_data_sources
     feature_views = construct_universal_feature_views(data_sources)
 
-    print(f"Created universal feature views {feature_views}")
-
-    fs.apply([feature_views.values()] + [driver(), customer()])
+    feast_objects = []
+    feast_objects.extend(feature_views.values())
+    feast_objects.extend([driver(), customer()])
+    fs.apply(feast_objects)
     fs.materialize(environment.start_date, environment.end_date)
 
     sample_drivers = random.sample(entities["driver"], 10)

From 2961d58c430829100b474e3e45279d99339b0067 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Tue, 31 Aug 2021 21:44:19 -0700
Subject: [PATCH 41/51] fix refs

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../offline_store/test_universal_historical_retrieval.py       | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
index c23d3b3eea..be2467fb81 100644
--- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
+++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
@@ -153,10 +153,9 @@ def test_historical_features(environment, universal_data_sources, full_feature_n
         feature_views["customer"],
         feature_views["driver"],
     )
-    full_feature_names = environment.test_repo_config.full_feature_names
 
     entity_df_query = None
-    orders_table = table_name_from_data_source(environment.datasources["orders"])
+    orders_table = table_name_from_data_source(data_sources["orders"])
     if orders_table:
         entity_df_query = f"SELECT * FROM {orders_table}"
 

From 173981571ec2844f14d995b89acb7c5a0839cdd8 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Tue, 31 Aug 2021 22:04:59 -0700
Subject: [PATCH 42/51] Fix refs

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../integration/e2e/test_universal_e2e.py     |  1 -
 .../offline_store/test_s3_custom_endpoint.py  | 20 ++++++++++++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index 7f462db722..424f85468f 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -7,7 +7,6 @@
 from pytz import utc
 
 from feast import FeatureStore, FeatureView
-from tests.conftest import DataSourceCache
 from tests.data.data_creator import create_dataset
 from tests.integration.feature_repos.test_repo_configuration import (
     FULL_REPO_CONFIGS,
diff --git a/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py b/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
index e5bff828b3..7c94d003c3 100644
--- a/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
+++ b/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
@@ -2,17 +2,19 @@
 
 from tests.integration.feature_repos.test_repo_configuration import (
     TestRepoConfig,
-    construct_universal_test_environment,
+    construct_test_environment,
+    construct_universal_feature_views,
 )
 
 # TODO: Allow integration tests to run using different credentials.
+from tests.integration.feature_repos.universal.entities import customer, driver
 
 
 @pytest.mark.integration
 @pytest.mark.skip(
     reason="No way to run this test today. Credentials conflict with real AWS credentials in CI"
 )
-def test_registration_and_retrieval_from_custom_s3_endpoint():
+def test_registration_and_retrieval_from_custom_s3_endpoint(universal_data_sources):
     config = TestRepoConfig(
         offline_store_creator="tests.integration.feature_repos.universal.data_sources.file.S3FileDataSourceCreator"
     )
@@ -27,10 +29,18 @@ def test_registration_and_retrieval_from_custom_s3_endpoint():
     os.environ["AWS_ACCESS_KEY_ID"] = "AKIAIOSFODNN7EXAMPLE"
     os.environ["AWS_SECRET_ACCESS_KEY"] = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
 
-    with construct_universal_test_environment(
-        config, create_and_apply=True, materialize=True
-    ) as environment:
+    with construct_test_environment(config) as environment:
         fs = environment.feature_store
+
+        entities, datasets, data_sources = universal_data_sources
+        feature_views = construct_universal_feature_views(data_sources)
+
+        feast_objects = []
+        feast_objects.extend(feature_views.values())
+        feast_objects.extend([driver(), customer()])
+        fs.apply(feast_objects)
+        fs.materialize(environment.start_date, environment.end_date)
+
         out = fs.get_online_features(
             features=["driver_stats:conv_rate"], entity_rows=[{"driver": 5001}]
         ).to_dict()

From fe14ba3b8cd3c49b7c1be2b82eb174ad6c05072c Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Tue, 31 Aug 2021 22:07:41 -0700
Subject: [PATCH 43/51] Fix refs

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../tests/integration/offline_store/test_s3_custom_endpoint.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py b/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
index 7c94d003c3..8f85efd23f 100644
--- a/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
+++ b/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
@@ -5,9 +5,9 @@
     construct_test_environment,
     construct_universal_feature_views,
 )
+from tests.integration.feature_repos.universal.entities import customer, driver
 
 # TODO: Allow integration tests to run using different credentials.
-from tests.integration.feature_repos.universal.entities import customer, driver
 
 
 @pytest.mark.integration

From 43d8e270b808585ac37b39a74e9bf44e474f4244 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Tue, 31 Aug 2021 22:09:01 -0700
Subject: [PATCH 44/51] Fix refs

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../integration/e2e/test_universal_e2e.py     | 21 +++++++------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index 424f85468f..6f1805a840 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -21,26 +21,21 @@
 @pytest.mark.parametrize(
     "config", vary_infer_feature(FULL_REPO_CONFIGS), ids=lambda v: str(v)
 )
-def test_e2e_consistency(
-    config, data_source_cache: DataSourceCache, environment, universal_data_sources
-):
+def test_e2e_consistency(config):
     df = create_dataset()
     test_suite_name = test_e2e_consistency.__name__
     with construct_test_environment(config, test_suite_name) as test_environment:
         fs = test_environment.feature_store
         infer_features = test_environment.test_repo_config.infer_features
-        key = f"{test_suite_name}_{test_environment.test_repo_config.offline_store_creator}"
-        _, _, data_source, _ = data_source_cache.get_or_create(
-            key,
-            lambda: (
-                None,
-                df,
-                test_environment.data_source_creator.create_data_source(
-                    df, fs.project, field_mapping={"ts_1": "ts", "id": "driver_id"}
-                ),
-                test_environment.data_source_creator,
+        _, _, data_source, _ = (
+            None,
+            df,
+            test_environment.data_source_creator.create_data_source(
+                df, fs.project, field_mapping={"ts_1": "ts", "id": "driver_id"}
             ),
+            test_environment.data_source_creator,
         )
+
         fv = driver_feature_view(data_source=data_source, infer_features=infer_features)
 
         entity = driver()

From 58c75b04bd0e8b7f1df9051081e9ee7b601689ad Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Tue, 31 Aug 2021 22:20:26 -0700
Subject: [PATCH 45/51] fix historical tests

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../offline_store/test_universal_historical_retrieval.py     | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
index be2467fb81..91e3f5edea 100644
--- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
+++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
@@ -16,6 +16,7 @@
     construct_universal_feature_views,
     table_name_from_data_source,
 )
+from tests.integration.feature_repos.universal.entities import customer, driver
 
 np.random.seed(0)
 
@@ -154,6 +155,10 @@ def test_historical_features(environment, universal_data_sources, full_feature_n
         feature_views["driver"],
     )
 
+    feast_objects = []
+    feast_objects.extend([customer_fv, driver_fv, driver(), customer()])
+    store.apply(feast_objects)
+
     entity_df_query = None
     orders_table = table_name_from_data_source(data_sources["orders"])
     if orders_table:

From 8b5ba5a0cca2a203258a451a6a2b0e3ac81ddfa7 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Tue, 31 Aug 2021 22:31:10 -0700
Subject: [PATCH 46/51] renames

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py                  |  4 +-
 .../integration/e2e/test_universal_e2e.py     |  2 +-
 ...configuration.py => repo_configuration.py} | 37 +++++++++++--------
 .../offline_store/test_s3_custom_endpoint.py  |  6 +--
 .../test_universal_historical_retrieval.py    |  2 +-
 .../online_store/test_universal_online.py     |  2 +-
 6 files changed, 29 insertions(+), 24 deletions(-)
 rename sdk/python/tests/integration/feature_repos/{test_repo_configuration.py => repo_configuration.py} (89%)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index 8371ba202f..d8f34fcb3f 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -18,7 +18,7 @@
 import pandas as pd
 import pytest
 
-from tests.integration.feature_repos.test_repo_configuration import (
+from tests.integration.feature_repos.repo_configuration import (
     FULL_REPO_CONFIGS,
     construct_test_environment,
     construct_universal_data_sources,
@@ -113,6 +113,6 @@ def universal_data_sources(environment):
         datasets, environment.data_source_creator
     )
 
-    yield (entities, datasets, datasources)
+    yield entities, datasets, datasources
 
     environment.data_source_creator.teardown()
diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index 6f1805a840..c4c2294a18 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -8,7 +8,7 @@
 
 from feast import FeatureStore, FeatureView
 from tests.data.data_creator import create_dataset
-from tests.integration.feature_repos.test_repo_configuration import (
+from tests.integration.feature_repos.repo_configuration import (
     FULL_REPO_CONFIGS,
     construct_test_environment,
     vary_infer_feature,
diff --git a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py
similarity index 89%
rename from sdk/python/tests/integration/feature_repos/test_repo_configuration.py
rename to sdk/python/tests/integration/feature_repos/repo_configuration.py
index 17aecd1909..1acb11ae1c 100644
--- a/sdk/python/tests/integration/feature_repos/test_repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py
@@ -20,7 +20,7 @@
 
 
 @dataclass(frozen=True, repr=True)
-class TestRepoConfig:
+class IntegrationTestRepoConfig:
     """
     This class should hold all possible parameters that may need to be varied by individual tests.
     """
@@ -41,28 +41,28 @@ def ds_creator_path(cls: str):
 
 DYNAMO_CONFIG = {"type": "dynamodb", "region": "us-west-2"}
 REDIS_CONFIG = {"type": "redis", "connection_string": "localhost:6379,db=0"}
-FULL_REPO_CONFIGS: List[TestRepoConfig] = [
+FULL_REPO_CONFIGS: List[IntegrationTestRepoConfig] = [
     # Local configurations
-    TestRepoConfig(),
-    TestRepoConfig(online_store=REDIS_CONFIG),
+    IntegrationTestRepoConfig(),
+    IntegrationTestRepoConfig(online_store=REDIS_CONFIG),
     # GCP configurations
-    TestRepoConfig(
+    IntegrationTestRepoConfig(
         provider="gcp",
         offline_store_creator=ds_creator_path("bigquery.BigQueryDataSourceCreator"),
         online_store="datastore",
     ),
-    TestRepoConfig(
+    IntegrationTestRepoConfig(
         provider="gcp",
         offline_store_creator=ds_creator_path("bigquery.BigQueryDataSourceCreator"),
         online_store=REDIS_CONFIG,
     ),
     # AWS configurations
-    TestRepoConfig(
+    IntegrationTestRepoConfig(
         provider="aws",
         offline_store_creator=ds_creator_path("redshift.RedshiftDataSourceCreator"),
         online_store=DYNAMO_CONFIG,
     ),
-    TestRepoConfig(
+    IntegrationTestRepoConfig(
         provider="aws",
         offline_store_creator=ds_creator_path("redshift.RedshiftDataSourceCreator"),
         online_store=REDIS_CONFIG,
@@ -132,7 +132,7 @@ def construct_universal_feature_views(
 @dataclass
 class Environment:
     name: str
-    test_repo_config: TestRepoConfig
+    test_repo_config: IntegrationTestRepoConfig
     feature_store: FeatureStore
     data_source_creator: DataSourceCreator
 
@@ -152,7 +152,9 @@ def table_name_from_data_source(ds: DataSource) -> Optional[str]:
     return None
 
 
-def vary_full_feature_names(configs: List[TestRepoConfig]) -> List[TestRepoConfig]:
+def vary_full_feature_names(
+    configs: List[IntegrationTestRepoConfig],
+) -> List[IntegrationTestRepoConfig]:
     new_configs = []
     for c in configs:
         true_c = replace(c, full_feature_names=True)
@@ -162,8 +164,8 @@ def vary_full_feature_names(configs: List[TestRepoConfig]) -> List[TestRepoConfi
 
 
 def vary_infer_event_timestamp_col(
-    configs: List[TestRepoConfig],
-) -> List[TestRepoConfig]:
+    configs: List[IntegrationTestRepoConfig],
+) -> List[IntegrationTestRepoConfig]:
     new_configs = []
     for c in configs:
         true_c = replace(c, infer_event_timestamp_col=True)
@@ -172,7 +174,9 @@ def vary_infer_event_timestamp_col(
     return new_configs
 
 
-def vary_infer_feature(configs: List[TestRepoConfig]) -> List[TestRepoConfig]:
+def vary_infer_feature(
+    configs: List[IntegrationTestRepoConfig],
+) -> List[IntegrationTestRepoConfig]:
     new_configs = []
     for c in configs:
         true_c = replace(c, infer_features=True)
@@ -182,8 +186,8 @@ def vary_infer_feature(configs: List[TestRepoConfig]) -> List[TestRepoConfig]:
 
 
 def vary_providers_for_offline_stores(
-    configs: List[TestRepoConfig],
-) -> List[TestRepoConfig]:
+    configs: List[IntegrationTestRepoConfig],
+) -> List[IntegrationTestRepoConfig]:
     new_configs = []
     for c in configs:
         if "FileDataSourceCreator" in c.offline_store_creator:
@@ -199,7 +203,8 @@ def vary_providers_for_offline_stores(
 
 @contextmanager
 def construct_test_environment(
-    test_repo_config: TestRepoConfig, test_suite_name: str = "integration_test"
+    test_repo_config: IntegrationTestRepoConfig,
+    test_suite_name: str = "integration_test",
 ) -> Environment:
     project = f"{test_suite_name}_{str(uuid.uuid4()).replace('-', '')[:8]}"
 
diff --git a/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py b/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
index 8f85efd23f..3eb3f4da6f 100644
--- a/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
+++ b/sdk/python/tests/integration/offline_store/test_s3_custom_endpoint.py
@@ -1,7 +1,7 @@
 import pytest
 
-from tests.integration.feature_repos.test_repo_configuration import (
-    TestRepoConfig,
+from tests.integration.feature_repos.repo_configuration import (
+    IntegrationTestRepoConfig,
     construct_test_environment,
     construct_universal_feature_views,
 )
@@ -15,7 +15,7 @@
     reason="No way to run this test today. Credentials conflict with real AWS credentials in CI"
 )
 def test_registration_and_retrieval_from_custom_s3_endpoint(universal_data_sources):
-    config = TestRepoConfig(
+    config = IntegrationTestRepoConfig(
         offline_store_creator="tests.integration.feature_repos.universal.data_sources.file.S3FileDataSourceCreator"
     )
     import os
diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
index 91e3f5edea..75cd5bbf70 100644
--- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
+++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py
@@ -12,7 +12,7 @@
 from feast.infra.offline_stores.offline_utils import (
     DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL,
 )
-from tests.integration.feature_repos.test_repo_configuration import (
+from tests.integration.feature_repos.repo_configuration import (
     construct_universal_feature_views,
     table_name_from_data_source,
 )
diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py
index 3aca0d73d3..b3bcf688bd 100644
--- a/sdk/python/tests/integration/online_store/test_universal_online.py
+++ b/sdk/python/tests/integration/online_store/test_universal_online.py
@@ -4,7 +4,7 @@
 import pandas as pd
 import pytest
 
-from tests.integration.feature_repos.test_repo_configuration import (
+from tests.integration.feature_repos.repo_configuration import (
     construct_universal_feature_views,
 )
 from tests.integration.feature_repos.universal.entities import customer, driver

From c445f5384e56ccdf7b6f9225dc49d29cc90eef52 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 1 Sep 2021 10:47:12 -0700
Subject: [PATCH 47/51] CR updates

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/feast/infra/provider.py            |  6 +-
 sdk/python/tests/conftest.py                  | 16 ++++++
 .../integration/e2e/test_universal_e2e.py     | 36 +++---------
 .../feature_repos/repo_configuration.py       | 57 ++-----------------
 .../universal/data_source_creator.py          | 19 ++++---
 .../universal/data_sources/bigquery.py        | 21 +++----
 .../universal/data_sources/file.py            | 12 ++--
 .../universal/data_sources/redshift.py        | 12 ++--
 8 files changed, 68 insertions(+), 111 deletions(-)

diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py
index e5a03d7ca8..b040b512a0 100644
--- a/sdk/python/feast/infra/provider.py
+++ b/sdk/python/feast/infra/provider.py
@@ -238,11 +238,15 @@ def _get_column_names(
             reverse_field_mapping[col] if col in reverse_field_mapping.keys() else col
             for col in feature_names
         ]
+
+    # We need to exclude join keys and timestamp columns from the list of features, after they are mapped to
+    # their final column names via the `field_mapping` field of the source.
     _feature_names = set(feature_names) - set(join_keys)
     _feature_names = _feature_names - {event_timestamp_column, created_timestamp_column}
+    feature_names = list(_feature_names)
     return (
         join_keys,
-        list(_feature_names),
+        feature_names,
         event_timestamp_column,
         created_timestamp_column,
     )
diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index d8f34fcb3f..742b572f8e 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -18,8 +18,10 @@
 import pandas as pd
 import pytest
 
+from tests.data.data_creator import create_dataset
 from tests.integration.feature_repos.repo_configuration import (
     FULL_REPO_CONFIGS,
+    Environment,
     construct_test_environment,
     construct_universal_data_sources,
     construct_universal_datasets,
@@ -116,3 +118,17 @@ def universal_data_sources(environment):
     yield entities, datasets, datasources
 
     environment.data_source_creator.teardown()
+
+
+@pytest.fixture(scope="session")
+def e2e_data_sources(environment: Environment):
+    df = create_dataset()
+    data_source = environment.data_source_creator.create_data_source(
+        df,
+        environment.feature_store.project,
+        field_mapping={"ts_1": "ts", "id": "driver_id"},
+    )
+
+    yield df, data_source
+
+    environment.data_source_creator.teardown()
diff --git a/sdk/python/tests/integration/e2e/test_universal_e2e.py b/sdk/python/tests/integration/e2e/test_universal_e2e.py
index c4c2294a18..dd09aef19c 100644
--- a/sdk/python/tests/integration/e2e/test_universal_e2e.py
+++ b/sdk/python/tests/integration/e2e/test_universal_e2e.py
@@ -7,41 +7,21 @@
 from pytz import utc
 
 from feast import FeatureStore, FeatureView
-from tests.data.data_creator import create_dataset
-from tests.integration.feature_repos.repo_configuration import (
-    FULL_REPO_CONFIGS,
-    construct_test_environment,
-    vary_infer_feature,
-)
 from tests.integration.feature_repos.universal.entities import driver
 from tests.integration.feature_repos.universal.feature_views import driver_feature_view
 
 
 @pytest.mark.integration
-@pytest.mark.parametrize(
-    "config", vary_infer_feature(FULL_REPO_CONFIGS), ids=lambda v: str(v)
-)
-def test_e2e_consistency(config):
-    df = create_dataset()
-    test_suite_name = test_e2e_consistency.__name__
-    with construct_test_environment(config, test_suite_name) as test_environment:
-        fs = test_environment.feature_store
-        infer_features = test_environment.test_repo_config.infer_features
-        _, _, data_source, _ = (
-            None,
-            df,
-            test_environment.data_source_creator.create_data_source(
-                df, fs.project, field_mapping={"ts_1": "ts", "id": "driver_id"}
-            ),
-            test_environment.data_source_creator,
-        )
-
-        fv = driver_feature_view(data_source=data_source, infer_features=infer_features)
+@pytest.mark.parametrize("infer_features", [True, False])
+def test_e2e_consistency(environment, e2e_data_sources, infer_features):
+    fs = environment.feature_store
+    df, data_source = e2e_data_sources
+    fv = driver_feature_view(data_source=data_source, infer_features=infer_features)
 
-        entity = driver()
-        fs.apply([fv, entity])
+    entity = driver()
+    fs.apply([fv, entity])
 
-        run_offline_online_store_consistency_test(fs, fv)
+    run_offline_online_store_consistency_test(fs, fv)
 
 
 def check_offline_and_online_features(
diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py
index 1acb11ae1c..8a90fd54d6 100644
--- a/sdk/python/tests/integration/feature_repos/repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py
@@ -1,7 +1,7 @@
 import tempfile
 import uuid
 from contextlib import contextmanager
-from dataclasses import dataclass, field, replace
+from dataclasses import dataclass, field
 from datetime import datetime, timedelta
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
@@ -99,19 +99,19 @@ def construct_universal_data_sources(
 ) -> Dict[str, DataSource]:
     customer_ds = data_source_creator.create_data_source(
         datasets["customer"],
-        destination="customer_profile",
+        destination_name="customer_profile",
         event_timestamp_column="event_timestamp",
         created_timestamp_column="created",
     )
     driver_ds = data_source_creator.create_data_source(
         datasets["driver"],
-        destination="driver_hourly",
+        destination_name="driver_hourly",
         event_timestamp_column="event_timestamp",
         created_timestamp_column="created",
     )
     orders_ds = data_source_creator.create_data_source(
         datasets["orders"],
-        destination="orders",
+        destination_name="orders",
         event_timestamp_column="event_timestamp",
         created_timestamp_column="created",
     )
@@ -152,55 +152,6 @@ def table_name_from_data_source(ds: DataSource) -> Optional[str]:
     return None
 
 
-def vary_full_feature_names(
-    configs: List[IntegrationTestRepoConfig],
-) -> List[IntegrationTestRepoConfig]:
-    new_configs = []
-    for c in configs:
-        true_c = replace(c, full_feature_names=True)
-        false_c = replace(c, full_feature_names=False)
-        new_configs.extend([true_c, false_c])
-    return new_configs
-
-
-def vary_infer_event_timestamp_col(
-    configs: List[IntegrationTestRepoConfig],
-) -> List[IntegrationTestRepoConfig]:
-    new_configs = []
-    for c in configs:
-        true_c = replace(c, infer_event_timestamp_col=True)
-        false_c = replace(c, infer_event_timestamp_col=False)
-        new_configs.extend([true_c, false_c])
-    return new_configs
-
-
-def vary_infer_feature(
-    configs: List[IntegrationTestRepoConfig],
-) -> List[IntegrationTestRepoConfig]:
-    new_configs = []
-    for c in configs:
-        true_c = replace(c, infer_features=True)
-        false_c = replace(c, infer_features=False)
-        new_configs.extend([true_c, false_c])
-    return new_configs
-
-
-def vary_providers_for_offline_stores(
-    configs: List[IntegrationTestRepoConfig],
-) -> List[IntegrationTestRepoConfig]:
-    new_configs = []
-    for c in configs:
-        if "FileDataSourceCreator" in c.offline_store_creator:
-            new_configs.append(c)
-        elif "RedshiftDataSourceCreator" in c.offline_store_creator:
-            for p in ["local", "aws"]:
-                new_configs.append(replace(c, provider=p))
-        elif "BigQueryDataSourceCreator" in c.offline_store_creator:
-            for p in ["local", "gcp"]:
-                new_configs.append(replace(c, provider=p))
-    return new_configs
-
-
 @contextmanager
 def construct_test_environment(
     test_repo_config: IntegrationTestRepoConfig,
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
index d65a91bd2f..e0d6983bf1 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py
@@ -12,7 +12,7 @@ class DataSourceCreator(ABC):
     def create_data_source(
         self,
         df: pd.DataFrame,
-        destination: str,
+        destination_name: str,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
@@ -22,13 +22,16 @@ def create_data_source(
         persist the dataframe in offline store, using the destination string as a way to differentiate multiple
         dataframes and data sources.
 
-        :param df: The dataframe to be used to create the data source.
-        :param destination: The destination that this data frame is meant for. This str is used by the implementing
-            classes to isolate the multiple dataframes from each other.
-        :param event_timestamp_column: Pass through for the underlying data source.
-        :param created_timestamp_column: Pass through for the underlying data source.
-        :param field_mapping: Pass through for the underlying data source.
-        :return: A Data source object, pointing to a table or file that is uploaded/persisted for the purpose of the
+        Args:
+            df: The dataframe to be used to create the data source.
+            destination_name: This str is used by the implementing classes to
+                isolate the multiple dataframes from each other.
+            event_timestamp_column: Pass through for the underlying data source.
+            created_timestamp_column: Pass through for the underlying data source.
+            field_mapping: Pass through for the underlying data source.
+
+        Returns:
+            A Data source object, pointing to a table or file that is uploaded/persisted for the purpose of the
             test.
         """
         ...
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
index 0863d7bd25..9b702ebe6c 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py
@@ -23,7 +23,7 @@ def __init__(self, project_name: str):
 
         self.tables = []
 
-    def get_dataset(self):
+    def create_dataset(self):
         if not self.dataset:
             self.dataset = bigquery.Dataset(self.dataset_id)
             print(f"Creating dataset: {self.dataset_id}")
@@ -32,7 +32,6 @@ def get_dataset(self):
                 1000 * 60 * 60 * 24 * 14
             )  # 2 weeks in milliseconds
             self.client.update_dataset(self.dataset, ["default_table_expiration_ms"])
-        return self.dataset
 
     def teardown(self):
 
@@ -50,30 +49,32 @@ def create_offline_store_config(self):
     def create_data_source(
         self,
         df: pd.DataFrame,
-        destination: Optional[str] = None,
+        destination_name: Optional[str] = None,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
         **kwargs,
     ) -> DataSource:
 
-        destination = self.get_prefixed_table_name(destination)
+        destination_name = self.get_prefixed_table_name(destination_name)
 
-        self.get_dataset()
+        self.create_dataset()
 
         job_config = bigquery.LoadJobConfig()
-        if self.gcp_project not in destination:
-            destination = f"{self.gcp_project}.{self.project_name}.{destination}"
+        if self.gcp_project not in destination_name:
+            destination_name = (
+                f"{self.gcp_project}.{self.project_name}.{destination_name}"
+            )
 
         job = self.client.load_table_from_dataframe(
-            df, destination, job_config=job_config
+            df, destination_name, job_config=job_config
         )
         job.result()
 
-        self.tables.append(destination)
+        self.tables.append(destination_name)
 
         return BigQuerySource(
-            table_ref=destination,
+            table_ref=destination_name,
             event_timestamp_column=event_timestamp_column,
             created_timestamp_column=created_timestamp_column,
             date_partition_column="",
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
index 7d204ed7b1..0d402b2314 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py
@@ -26,16 +26,18 @@ def __init__(self, project_name: str):
     def create_data_source(
         self,
         df: pd.DataFrame,
-        destination: str,
+        destination_name: str,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
     ) -> DataSource:
 
-        destination = self.get_prefixed_table_name(destination)
+        destination_name = self.get_prefixed_table_name(destination_name)
 
         f = tempfile.NamedTemporaryFile(
-            prefix=f"{self.project_name}_{destination}", suffix=".parquet", delete=False
+            prefix=f"{self.project_name}_{destination_name}",
+            suffix=".parquet",
+            delete=False,
         )
         df.to_parquet(f.name)
         self.files.append(f)
@@ -102,13 +104,13 @@ def _upload_parquet_file(self, df, file_name, minio_endpoint):
     def create_data_source(
         self,
         df: pd.DataFrame,
-        destination: Optional[str] = None,
+        destination_name: Optional[str] = None,
         suffix: Optional[str] = None,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
     ) -> DataSource:
-        filename = f"{destination}.parquet"
+        filename = f"{destination_name}.parquet"
         port = self.minio.get_exposed_port("9000")
         host = self.minio.get_container_host_ip()
         minio_endpoint = f"{host}:{port}"
diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
index 528e0261f8..88780f07a0 100644
--- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
+++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py
@@ -34,14 +34,14 @@ def __init__(self, project_name: str):
     def create_data_source(
         self,
         df: pd.DataFrame,
-        destination: str,
+        destination_name: str,
         suffix: Optional[str] = None,
         event_timestamp_column="ts",
         created_timestamp_column="created_ts",
         field_mapping: Dict[str, str] = None,
     ) -> DataSource:
 
-        destination = self.get_prefixed_table_name(destination)
+        destination_name = self.get_prefixed_table_name(destination_name)
 
         aws_utils.upload_df_to_redshift(
             self.client,
@@ -49,16 +49,16 @@ def create_data_source(
             self.offline_store_config.database,
             self.offline_store_config.user,
             self.s3,
-            f"{self.offline_store_config.s3_staging_location}/copy/{destination}.parquet",
+            f"{self.offline_store_config.s3_staging_location}/copy/{destination_name}.parquet",
             self.offline_store_config.iam_role,
-            destination,
+            destination_name,
             df,
         )
 
-        self.tables.append(destination)
+        self.tables.append(destination_name)
 
         return RedshiftSource(
-            table=destination,
+            table=destination_name,
             event_timestamp_column=event_timestamp_column,
             created_timestamp_column=created_timestamp_column,
             date_partition_column="",

From e0ccd9b8babd0f77fee07f25f1760a11a5267c8e Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 1 Sep 2021 11:15:23 -0700
Subject: [PATCH 48/51] use the actual ref to data source creators

Signed-off-by: Achal Shah <achals@gmail.com>
---
 .../feature_repos/repo_configuration.py       | 35 +++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py
index 8a90fd54d6..00c439ca89 100644
--- a/sdk/python/tests/integration/feature_repos/repo_configuration.py
+++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py
@@ -4,15 +4,24 @@
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Type, Union
 
 import pandas as pd
 
-from feast import FeatureStore, FeatureView, RepoConfig, driver_test_data, importer
+from feast import FeatureStore, FeatureView, RepoConfig, driver_test_data
 from feast.data_source import DataSource
 from tests.integration.feature_repos.universal.data_source_creator import (
     DataSourceCreator,
 )
+from tests.integration.feature_repos.universal.data_sources.bigquery import (
+    BigQueryDataSourceCreator,
+)
+from tests.integration.feature_repos.universal.data_sources.file import (
+    FileDataSourceCreator,
+)
+from tests.integration.feature_repos.universal.data_sources.redshift import (
+    RedshiftDataSourceCreator,
+)
 from tests.integration.feature_repos.universal.feature_views import (
     create_customer_daily_profile_feature_view,
     create_driver_hourly_stats_feature_view,
@@ -28,17 +37,13 @@ class IntegrationTestRepoConfig:
     provider: str = "local"
     online_store: Union[str, Dict] = "sqlite"
 
-    offline_store_creator: str = "tests.integration.feature_repos.universal.data_sources.file.FileDataSourceCreator"
+    offline_store_creator: Type[DataSourceCreator] = FileDataSourceCreator
 
     full_feature_names: bool = True
     infer_event_timestamp_col: bool = True
     infer_features: bool = False
 
 
-def ds_creator_path(cls: str):
-    return f"tests.integration.feature_repos.universal.data_sources.{cls}"
-
-
 DYNAMO_CONFIG = {"type": "dynamodb", "region": "us-west-2"}
 REDIS_CONFIG = {"type": "redis", "connection_string": "localhost:6379,db=0"}
 FULL_REPO_CONFIGS: List[IntegrationTestRepoConfig] = [
@@ -48,23 +53,23 @@ def ds_creator_path(cls: str):
     # GCP configurations
     IntegrationTestRepoConfig(
         provider="gcp",
-        offline_store_creator=ds_creator_path("bigquery.BigQueryDataSourceCreator"),
+        offline_store_creator=BigQueryDataSourceCreator,
         online_store="datastore",
     ),
     IntegrationTestRepoConfig(
         provider="gcp",
-        offline_store_creator=ds_creator_path("bigquery.BigQueryDataSourceCreator"),
+        offline_store_creator=BigQueryDataSourceCreator,
         online_store=REDIS_CONFIG,
     ),
     # AWS configurations
     IntegrationTestRepoConfig(
         provider="aws",
-        offline_store_creator=ds_creator_path("redshift.RedshiftDataSourceCreator"),
+        offline_store_creator=RedshiftDataSourceCreator,
         online_store=DYNAMO_CONFIG,
     ),
     IntegrationTestRepoConfig(
         provider="aws",
-        offline_store_creator=ds_creator_path("redshift.RedshiftDataSourceCreator"),
+        offline_store_creator=RedshiftDataSourceCreator,
         online_store=REDIS_CONFIG,
     ),
 ]
@@ -159,13 +164,7 @@ def construct_test_environment(
 ) -> Environment:
     project = f"{test_suite_name}_{str(uuid.uuid4()).replace('-', '')[:8]}"
 
-    module_name, config_class_name = test_repo_config.offline_store_creator.rsplit(
-        ".", 1
-    )
-
-    offline_creator: DataSourceCreator = importer.get_class_from_type(
-        module_name, config_class_name, "DataSourceCreator"
-    )(project)
+    offline_creator: DataSourceCreator = test_repo_config.offline_store_creator(project)
 
     offline_store_config = offline_creator.create_offline_store_config()
     online_store = test_repo_config.online_store

From a510ca6029073857cc35e34c5865a7f34714bbc9 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 1 Sep 2021 14:27:14 -0700
Subject: [PATCH 49/51] format

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py                  |  8 +--
 .../registration/test_universal_types.py      | 49 ++++++++++++-------
 2 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index a25e134343..605787b234 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -28,7 +28,9 @@
     construct_universal_datasets,
     construct_universal_entities,
 )
-from tests.integration.feature_repos.universal.data_sources.bigquery import BigQueryDataSourceCreator
+from tests.integration.feature_repos.universal.data_sources.bigquery import (
+    BigQueryDataSourceCreator,
+)
 
 
 def pytest_configure(config):
@@ -126,9 +128,7 @@ def universal_data_sources(environment):
 def e2e_data_sources(environment: Environment):
     df = create_dataset()
     data_source = environment.data_source_creator.create_data_source(
-        df,
-        environment.feature_store.project,
-        field_mapping={"ts_1": "ts"},
+        df, environment.feature_store.project, field_mapping={"ts_1": "ts"},
     )
 
     yield df, data_source
diff --git a/sdk/python/tests/integration/registration/test_universal_types.py b/sdk/python/tests/integration/registration/test_universal_types.py
index fd18875faf..bfd2cb0292 100644
--- a/sdk/python/tests/integration/registration/test_universal_types.py
+++ b/sdk/python/tests/integration/registration/test_universal_types.py
@@ -2,15 +2,20 @@
 
 import pandas as pd
 import pytest
-from tests.data.data_creator import get_feature_values_for_dtype, create_dataset
-from tests.integration.feature_repos.universal.entities import driver
-from tests.integration.feature_repos.universal.feature_views import driver_feature_view
 
 from feast.infra.offline_stores.offline_store import RetrievalJob
 from feast.type_map import python_type_to_feast_value_type
 from feast.value_type import ValueType
-from tests.integration.feature_repos.repo_configuration import IntegrationTestRepoConfig, construct_test_environment
-from tests.integration.feature_repos.universal.data_sources.bigquery import BigQueryDataSourceCreator
+from tests.data.data_creator import create_dataset, get_feature_values_for_dtype
+from tests.integration.feature_repos.repo_configuration import (
+    IntegrationTestRepoConfig,
+    construct_test_environment,
+)
+from tests.integration.feature_repos.universal.data_sources.bigquery import (
+    BigQueryDataSourceCreator,
+)
+from tests.integration.feature_repos.universal.entities import driver
+from tests.integration.feature_repos.universal.feature_views import driver_feature_view
 
 
 def entity_feature_types_ids(entity_type: ValueType, feature_dtype: str):
@@ -50,7 +55,7 @@ def test_entity_inference_types_match(entity_type, feature_dtype, feature_is_lis
         data_source = environment.data_source_creator.create_data_source(
             df,
             destination_name=environment.feature_store.project,
-            field_mapping={"ts_1": "ts"}
+            field_mapping={"ts_1": "ts"},
         )
         fv = create_feature_view(feature_dtype, feature_is_list, data_source)
         fs = environment.feature_store
@@ -88,13 +93,15 @@ def test_entity_inference_types_match(entity_type, feature_dtype, feature_is_lis
 @pytest.mark.parametrize(
     "feature_is_list", [True, False], ids=lambda v: f"feature_is_list:{str(v)}"
 )
-def test_feature_get_historical_features_types_match(entity_type, feature_dtype, feature_is_list):
+def test_feature_get_historical_features_types_match(
+    entity_type, feature_dtype, feature_is_list
+):
     with construct_test_environment(GCP_CONFIG) as environment:
         df = create_dataset(entity_type, feature_dtype, feature_is_list)
         data_source = environment.data_source_creator.create_data_source(
             df,
             destination_name=environment.feature_store.project,
-            field_mapping={"ts_1": "ts"}
+            field_mapping={"ts_1": "ts"},
         )
         fv = create_feature_view(feature_dtype, feature_is_list, data_source)
         fs = environment.feature_store
@@ -104,16 +111,16 @@ def test_feature_get_historical_features_types_match(entity_type, feature_dtype,
 
             features = [f"{fv.name}:value"]
             df = pd.DataFrame()
-            df["driver_id"] = (
-                ["1", "3"] if entity_type == ValueType.STRING else [1, 3]
-            )
+            df["driver_id"] = ["1", "3"] if entity_type == ValueType.STRING else [1, 3]
             now = datetime.utcnow()
             ts = pd.Timestamp(now).round("ms")
             df["ts"] = [
                 ts - timedelta(hours=4),
                 ts - timedelta(hours=2),
             ]
-            historical_features = fs.get_historical_features(entity_df=df, features=features,)
+            historical_features = fs.get_historical_features(
+                entity_df=df, features=features,
+            )
 
             # TODO(adchia): pandas doesn't play well with nan values in ints. BQ will also coerce to floats if there are NaNs
             historical_features_df = historical_features.to_df()
@@ -121,8 +128,12 @@ def test_feature_get_historical_features_types_match(entity_type, feature_dtype,
             if feature_is_list:
                 assert_feature_list_types(feature_dtype, historical_features_df)
             else:
-                assert_expected_historical_feature_types(feature_dtype, historical_features_df)
-            assert_expected_arrow_types(feature_dtype, feature_is_list, historical_features)
+                assert_expected_historical_feature_types(
+                    feature_dtype, historical_features_df
+                )
+            assert_expected_arrow_types(
+                feature_dtype, feature_is_list, historical_features
+            )
         finally:
             environment.data_source_creator.teardown()
 
@@ -139,13 +150,15 @@ def test_feature_get_historical_features_types_match(entity_type, feature_dtype,
 @pytest.mark.parametrize(
     "feature_is_list", [False], ids=lambda v: f"feature_is_list:{str(v)}"
 )
-def test_feature_get_online_features_types_match(entity_type, feature_dtype, feature_is_list):
+def test_feature_get_online_features_types_match(
+    entity_type, feature_dtype, feature_is_list
+):
     with construct_test_environment(GCP_CONFIG) as environment:
         df = create_dataset(entity_type, feature_dtype, feature_is_list)
         data_source = environment.data_source_creator.create_data_source(
             df,
             destination_name=environment.feature_store.project,
-            field_mapping={"ts_1": "ts"}
+            field_mapping={"ts_1": "ts"},
         )
         fv = create_feature_view(feature_dtype, feature_is_list, data_source)
         fs = environment.feature_store
@@ -170,7 +183,9 @@ def test_feature_get_online_features_types_match(entity_type, feature_dtype, fea
             }
             assert (
                 type(online_features["value"][0]).__name__
-                == feature_list_dtype_to_expected_online_response_value_type[feature_dtype]
+                == feature_list_dtype_to_expected_online_response_value_type[
+                    feature_dtype
+                ]
             )
         finally:
             environment.data_source_creator.teardown()

From 6c105020455e652e07f86220610cc96d1c54d60a Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 1 Sep 2021 14:28:11 -0700
Subject: [PATCH 50/51] unused imports'

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index 605787b234..76812825b3 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import multiprocessing
-import uuid
 from datetime import datetime, timedelta
 from sys import platform
 
@@ -28,9 +27,6 @@
     construct_universal_datasets,
     construct_universal_entities,
 )
-from tests.integration.feature_repos.universal.data_sources.bigquery import (
-    BigQueryDataSourceCreator,
-)
 
 
 def pytest_configure(config):

From ff1fb8175b3b9d6b27b35e0426bcd1073336cf4f Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 1 Sep 2021 14:38:29 -0700
Subject: [PATCH 51/51] Add ids for pytest params

Signed-off-by: Achal Shah <achals@gmail.com>
---
 sdk/python/tests/conftest.py                           | 10 +++++++++-
 .../integration/registration/test_universal_types.py   |  1 -
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
index 76812825b3..55bdeb3a7d 100644
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -99,7 +99,9 @@ def simple_dataset_2() -> pd.DataFrame:
     return pd.DataFrame.from_dict(data)
 
 
-@pytest.fixture(params=FULL_REPO_CONFIGS, scope="session")
+@pytest.fixture(
+    params=FULL_REPO_CONFIGS, scope="session", ids=[str(c) for c in FULL_REPO_CONFIGS]
+)
 def environment(request):
     with construct_test_environment(request.param) as e:
         yield e
@@ -130,3 +132,9 @@ def e2e_data_sources(environment: Environment):
     yield df, data_source
 
     environment.data_source_creator.teardown()
+
+
+@pytest.fixture(params=FULL_REPO_CONFIGS, scope="session")
+def type_test_environment(request):
+    with construct_test_environment(request.param) as e:
+        yield e
diff --git a/sdk/python/tests/integration/registration/test_universal_types.py b/sdk/python/tests/integration/registration/test_universal_types.py
index bfd2cb0292..5390b72cc0 100644
--- a/sdk/python/tests/integration/registration/test_universal_types.py
+++ b/sdk/python/tests/integration/registration/test_universal_types.py
@@ -33,7 +33,6 @@ def entity_feature_types_ids(entity_type: ValueType, feature_dtype: str):
     offline_store_creator=BigQueryDataSourceCreator,
     online_store="datastore",
 )
-feature_is_list = [True, False]
 
 
 # TODO: change parametrization to allow for other providers aside from gcp