diff --git a/.github/workflows/java_master_only.yml b/.github/workflows/java_master_only.yml index f5297615f6..194024a168 100644 --- a/.github/workflows/java_master_only.yml +++ b/.github/workflows/java_master_only.yml @@ -69,6 +69,52 @@ jobs: java-version: '11' java-package: jdk architecture: x64 + - name: Setup Python (to call feast apply) + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: 3.8 + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install Python dependencies + run: make install-python-ci-dependencies + - uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-it-maven- - uses: actions/cache@v2 with: path: ~/.m2/repository @@ -95,10 +141,46 @@ jobs: java-version: '11' java-package: jdk architecture: x64 - - uses: actions/setup-python@v2 + - name: Setup Python (to call feast apply) + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: 3.8 + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 with: - python-version: '3.8' - architecture: 'x64' + go-version: 1.18.0 + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install Python dependencies + run: make install-python-ci-dependencies - uses: actions/cache@v2 with: path: ~/.m2/repository diff --git a/.github/workflows/java_pr.yml b/.github/workflows/java_pr.yml index 328a8e7c7b..c552428664 100644 --- a/.github/workflows/java_pr.yml +++ b/.github/workflows/java_pr.yml @@ -40,6 +40,52 @@ jobs: java-version: '11' java-package: jdk architecture: x64 + - name: Setup Python (to call feast apply) + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: 3.8 + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install Python dependencies + run: make install-python-ci-dependencies + - uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-it-maven- - uses: actions/cache@v2 with: path: ~/.m2/repository @@ -129,6 +175,46 @@ jobs: aws-region: us-west-2 - name: Use AWS CLI run: aws sts get-caller-identity + - name: Setup Python (to call feast apply) + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: 3.8 + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install Python dependencies + run: make install-python-ci-dependencies - name: Run integration tests run: make test-java-integration - name: Save report diff --git a/java/CONTRIBUTING.md b/java/CONTRIBUTING.md index 74549034b9..7ccfe108c0 100644 --- a/java/CONTRIBUTING.md +++ b/java/CONTRIBUTING.md @@ -59,6 +59,7 @@ mvn spotless:apply ### Project Makefile The Project Makefile provides useful shorthands for common development tasks: +> Note: These commands rely on a local version of `feast` (Python) to be installed Run all Unit tests: ``` diff --git a/java/serving/README.md b/java/serving/README.md index a0d87563a9..0a153ceab8 100644 --- a/java/serving/README.md +++ b/java/serving/README.md @@ -136,4 +136,6 @@ Unit & Integration Tests can be used to verify functionality: mvn test -pl serving --also-make # run integration tests mvn verify -pl serving --also-make +# run integration tests with debugger +mvn -Dmaven.failsafe.debug verify -pl serving --also-make ``` \ No newline at end of file diff --git a/java/serving/pom.xml b/java/serving/pom.xml index e597775f9b..9eea11ef96 100644 --- a/java/serving/pom.xml +++ b/java/serving/pom.xml @@ -82,6 +82,28 @@ + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + + python + src/test/resources/docker-compose/feast10/ + + setup_it.py + + + feast_test_apply + process-test-resources + + exec + + + + diff --git a/java/serving/src/test/resources/docker-compose/feast10/definitions.py b/java/serving/src/test/resources/docker-compose/feast10/definitions.py index 908f65ce8b..4ac7b807c6 100644 --- a/java/serving/src/test/resources/docker-compose/feast10/definitions.py +++ b/java/serving/src/test/resources/docker-compose/feast10/definitions.py @@ -75,8 +75,9 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame: entity = Entity(name="entity") -benchmark_feature_views = [ - FeatureView( +benchmark_feature_views = [] +for i in range(25): + fv = FeatureView( name=f"feature_view_{i}", entities=[entity], ttl=timedelta(seconds=86400), @@ -84,8 +85,7 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame: online=True, source=generated_data_source, ) - for i in range(25) -] + benchmark_feature_views.append(fv) benchmark_feature_service = FeatureService( name=f"benchmark_feature_service", features=benchmark_feature_views, diff --git a/java/serving/src/test/resources/docker-compose/feast10/registry.db b/java/serving/src/test/resources/docker-compose/feast10/registry.db deleted file mode 100644 index 746934e3d0..0000000000 Binary files a/java/serving/src/test/resources/docker-compose/feast10/registry.db and /dev/null differ diff --git a/java/serving/src/test/resources/docker-compose/feast10/setup_it.py b/java/serving/src/test/resources/docker-compose/feast10/setup_it.py new file mode 100644 index 0000000000..733ebdfb49 --- /dev/null +++ b/java/serving/src/test/resources/docker-compose/feast10/setup_it.py @@ -0,0 +1,86 @@ +from pathlib import Path +from feast.repo_config import load_repo_config +from datetime import datetime, timedelta + +import numpy as np +import pandas as pd + +from definitions import ( + benchmark_feature_service, + benchmark_feature_views, + driver, + driver_hourly_stats_view, + entity, + transformed_conv_rate, +) + +from feast import FeatureStore + + +def setup_data(): + start = datetime.now() - timedelta(days=10) + + df = pd.DataFrame() + df["driver_id"] = np.arange(1000, 1010) + df["created"] = datetime.now() + df["conv_rate"] = np.arange(0, 1, 0.1) + df["acc_rate"] = np.arange(0.5, 1, 0.05) + df["avg_daily_trips"] = np.arange(0, 1000, 100) + + # some of rows are beyond 7 days to test OUTSIDE_MAX_AGE status + df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map( + lambda days: timedelta(days=days) + ) + + # Store data in parquet files. Parquet is convenient for local development mode. For + # production, you can use your favorite DWH, such as BigQuery. See Feast documentation + # for more info. + df.to_parquet("driver_stats.parquet") + + # For Benchmarks + # Please read more in Feast RFC-031 + # (link https://docs.google.com/document/d/12UuvTQnTTCJhdRgy6h10zSbInNGSyEJkIxpOcgOen1I/edit) + # about this benchmark setup + def generate_data( + num_rows: int, num_features: int, destination: str + ) -> pd.DataFrame: + features = [f"feature_{i}" for i in range(num_features)] + columns = ["entity", "event_timestamp"] + features + df = pd.DataFrame(0, index=np.arange(num_rows), columns=columns) + df["event_timestamp"] = datetime.utcnow() + for column in features: + df[column] = np.random.randint(1, num_rows, num_rows) + + df["entity"] = "key-" + pd.Series(np.arange(1, num_rows + 1)).astype( + pd.StringDtype() + ) + + df.to_parquet(destination) + + generate_data(10**3, 250, "benchmark_data.parquet") + + +def main(): + print("Running setup_it.py") + + setup_data() + existing_repo_config = load_repo_config(Path(".")) + + # Update to default online store since otherwise, relies on Dockerized Redis service + fs = FeatureStore(config=existing_repo_config.copy(update={"online_store": {}})) + fs.apply( + [ + driver_hourly_stats_view, + transformed_conv_rate, + driver, + entity, + benchmark_feature_service, + *benchmark_feature_views, + ] + ) + + print("setup_it finished") + + +if __name__ == "__main__": + main() diff --git a/sdk/python/feast/diff/registry_diff.py b/sdk/python/feast/diff/registry_diff.py index 56d5b84c71..37c8af9155 100644 --- a/sdk/python/feast/diff/registry_diff.py +++ b/sdk/python/feast/diff/registry_diff.py @@ -144,8 +144,8 @@ def diff_registry_objects( continue elif getattr(current_spec, _field.name) != getattr(new_spec, _field.name): if _field.name == "user_defined_function": - current_spec = cast(OnDemandFeatureViewSpec, current_proto) - new_spec = cast(OnDemandFeatureViewSpec, new_proto) + current_spec = cast(OnDemandFeatureViewSpec, current_spec) + new_spec = cast(OnDemandFeatureViewSpec, new_spec) current_udf = current_spec.user_defined_function new_udf = new_spec.user_defined_function for _udf_field in current_udf.DESCRIPTOR.fields: diff --git a/sdk/python/tests/unit/diff/test_registry_diff.py b/sdk/python/tests/unit/diff/test_registry_diff.py index 61f4f77702..8af6c50a13 100644 --- a/sdk/python/tests/unit/diff/test_registry_diff.py +++ b/sdk/python/tests/unit/diff/test_registry_diff.py @@ -1,9 +1,14 @@ +import pandas as pd + +from feast import Field from feast.diff.registry_diff import ( diff_registry_objects, tag_objects_for_keep_delete_update_add, ) from feast.entity import Entity from feast.feature_view import FeatureView +from feast.on_demand_feature_view import on_demand_feature_view +from feast.types import String from tests.utils.data_source_test_creator import prep_file_source @@ -89,3 +94,54 @@ def test_diff_registry_objects_feature_views(simple_dataset_1): assert feast_object_diffs.feast_object_property_diffs[0].val_declared == { "when": "after" } + + +def test_diff_odfv(simple_dataset_1): + with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: + entity = Entity(name="id", join_keys=["id"]) + fv = FeatureView( + name="fv2", + entities=[entity], + source=file_source, + tags={"when": "before"}, + ) + + @on_demand_feature_view( + sources=[fv], + schema=[Field(name="first_char", dtype=String)], + ) + def pre_changed(inputs: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["first_char"] = inputs["string_col"].str[:1].astype("string") + return df + + @on_demand_feature_view( + sources=[fv], + schema=[Field(name="first_char", dtype=String)], + ) + def post_changed(inputs: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["first_char"] = inputs["string_col"].str[:1].astype("string") + "hi" + return df + + feast_object_diffs = diff_registry_objects( + pre_changed, pre_changed, "on demand feature view" + ) + assert len(feast_object_diffs.feast_object_property_diffs) == 0 + + feast_object_diffs = diff_registry_objects( + pre_changed, post_changed, "on demand feature view" + ) + + # Note that user_defined_function.body is excluded because it always changes (dill is non-deterministic), even + # if no code is changed + assert len(feast_object_diffs.feast_object_property_diffs) == 3 + assert feast_object_diffs.feast_object_property_diffs[0].property_name == "name" + assert ( + feast_object_diffs.feast_object_property_diffs[1].property_name + == "user_defined_function.name" + ) + assert ( + feast_object_diffs.feast_object_property_diffs[2].property_name + == "user_defined_function.body_text" + )