diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index 0e5064ba78..0b873a2091 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Any, Callable, List, Literal, Optional, Tuple, Union +import dask import dask.dataframe as dd import pandas as pd import pyarrow @@ -42,6 +43,11 @@ _run_dask_field_mapping, ) +# FileRetrievalJob will cast string objects to string[pyarrow] from dask version 2023.7.1 +# This is not the desired behavior for our use case, so we set the convert-string option to False +# See (https://github.com/dask/dask/issues/10881#issuecomment-1923327936) +dask.config.set({"dataframe.convert-string": False}) + class FileOfflineStoreConfig(FeastConfigBaseModel): """Offline store config for local (file-based) store""" @@ -366,8 +372,6 @@ def evaluate_offline_job(): source_df[DUMMY_ENTITY_ID] = DUMMY_ENTITY_VAL columns_to_extract.add(DUMMY_ENTITY_ID) - source_df = source_df.persist() - return source_df[list(columns_to_extract)].persist() # When materializing a single feature view, we don't need full feature names. On demand transforms aren't materialized diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 39d33caa39..e41706f403 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -179,10 +179,6 @@ executing==2.0.1 # via stack-data fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.4 - # via - # feast (setup.py) - # pandavro fastjsonschema==2.19.1 # via nbformat filelock==3.13.1 @@ -515,7 +511,6 @@ numpy==1.24.4 # great-expectations # ibis-framework # pandas - # pandavro # pyarrow # scipy oauthlib==3.2.2 @@ -543,18 +538,14 @@ packaging==23.2 # pytest # snowflake-connector-python # sphinx -pandas==1.5.3 +pandas==2.2.0 ; python_version >= "3.9" # via # altair # db-dtypes # feast (setup.py) # google-cloud-bigquery # great-expectations - # ibis-framework - # pandavro # snowflake-connector-python -pandavro==1.5.2 - # via feast (setup.py) pandocfilters==1.5.1 # via nbconvert parso==0.8.3 @@ -824,7 +815,6 @@ six==1.16.0 # isodate # kubernetes # mock - # pandavro # python-dateutil # rfc3339-validator # thriftpy2 @@ -975,6 +965,8 @@ typing-extensions==4.9.0 # sqlalchemy2-stubs # typeguard # uvicorn +tzdata==2024.1 + # via pandas tzlocal==5.2 # via # great-expectations diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 7141cd0f25..b5dd9a78be 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -46,10 +46,6 @@ exceptiongroup==1.2.0 # via anyio fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.4 - # via - # feast (setup.py) - # pandavro fissix==21.11.13 # via bowler fsspec==2024.2.0 @@ -115,17 +111,12 @@ numpy==1.24.4 # via # feast (setup.py) # pandas - # pandavro # pyarrow packaging==23.2 # via # dask # gunicorn -pandas==1.5.3 - # via - # feast (setup.py) - # pandavro -pandavro==1.5.2 +pandas==2.2.0 # via feast (setup.py) partd==1.4.1 # via dask @@ -171,9 +162,7 @@ rpds-py==0.18.0 # jsonschema # referencing six==1.16.0 - # via - # pandavro - # python-dateutil + # via python-dateutil sniffio==1.3.0 # via # anyio diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 0356b85973..339a6b1c49 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -182,10 +182,6 @@ executing==2.0.1 # via stack-data fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.4 - # via - # feast (setup.py) - # pandavro fastjsonschema==2.19.1 # via nbformat filelock==3.13.1 @@ -530,7 +526,6 @@ numpy==1.24.4 # great-expectations # ibis-framework # pandas - # pandavro # pyarrow # scipy oauthlib==3.2.2 @@ -558,18 +553,14 @@ packaging==23.2 # pytest # snowflake-connector-python # sphinx -pandas==1.5.3 +pandas==1.5.3 ; python_version < "3.9" # via # altair # db-dtypes # feast (setup.py) # google-cloud-bigquery # great-expectations - # ibis-framework - # pandavro # snowflake-connector-python -pandavro==1.5.2 - # via feast (setup.py) pandocfilters==1.5.1 # via nbconvert parso==0.8.3 diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 541beecf0d..d00a77ee6f 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -46,10 +46,6 @@ exceptiongroup==1.2.0 # via anyio fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.4 - # via - # feast (setup.py) - # pandavro fissix==21.11.13 # via bowler fsspec==2024.2.0 @@ -119,17 +115,12 @@ numpy==1.24.4 # via # feast (setup.py) # pandas - # pandavro # pyarrow packaging==23.2 # via # dask # gunicorn -pandas==1.5.3 - # via - # feast (setup.py) - # pandavro -pandavro==1.5.2 +pandas==2.0.3 # via feast (setup.py) partd==1.4.1 # via dask @@ -177,9 +168,7 @@ rpds-py==0.18.0 # jsonschema # referencing six==1.16.0 - # via - # pandavro - # python-dateutil + # via python-dateutil sniffio==1.3.0 # via # anyio diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 6515779481..99f7ee0285 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -179,10 +179,6 @@ executing==2.0.1 # via stack-data fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.4 - # via - # feast (setup.py) - # pandavro fastjsonschema==2.19.1 # via nbformat filelock==3.13.1 @@ -523,7 +519,6 @@ numpy==1.24.4 # great-expectations # ibis-framework # pandas - # pandavro # pyarrow # scipy oauthlib==3.2.2 @@ -551,18 +546,14 @@ packaging==23.2 # pytest # snowflake-connector-python # sphinx -pandas==1.5.3 +pandas==2.2.0 # via # altair # db-dtypes # feast (setup.py) # google-cloud-bigquery # great-expectations - # ibis-framework - # pandavro # snowflake-connector-python -pandavro==1.5.2 - # via feast (setup.py) pandocfilters==1.5.1 # via nbconvert parso==0.8.3 @@ -834,7 +825,6 @@ six==1.16.0 # isodate # kubernetes # mock - # pandavro # python-dateutil # rfc3339-validator # thriftpy2 @@ -988,6 +978,8 @@ typing-extensions==4.9.0 # starlette # typeguard # uvicorn +tzdata==2024.1 + # via pandas tzlocal==5.2 # via # great-expectations diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 12c14a03ae..4364dc62bf 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -46,10 +46,6 @@ exceptiongroup==1.2.0 # via anyio fastapi==0.109.2 # via feast (setup.py) -fastavro==1.9.4 - # via - # feast (setup.py) - # pandavro fissix==21.11.13 # via bowler fsspec==2024.2.0 @@ -116,17 +112,12 @@ numpy==1.24.4 # via # feast (setup.py) # pandas - # pandavro # pyarrow packaging==23.2 # via # dask # gunicorn -pandas==1.5.3 - # via - # feast (setup.py) - # pandavro -pandavro==1.5.2 +pandas==2.2.0 # via feast (setup.py) partd==1.4.1 # via dask @@ -172,9 +163,7 @@ rpds-py==0.18.0 # jsonschema # referencing six==1.16.0 - # via - # pandavro - # python-dateutil + # via python-dateutil sniffio==1.3.0 # via # anyio diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py index f49ed80a26..fdf182be57 100644 --- a/sdk/python/tests/integration/e2e/test_validation.py +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -167,7 +167,7 @@ def test_logged_features_validation(environment, universal_data_sources): { "customer_id": 2000 + i, "driver_id": 6000 + i, - "event_timestamp": datetime.datetime.now(), + "event_timestamp": make_tzaware(datetime.datetime.now()), } ] ), diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index 0abb290563..9baba2397b 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -340,6 +340,11 @@ def test_historical_features_with_entities_from_query( table_from_sql_entities = job_from_sql.to_arrow().to_pandas() for col in table_from_sql_entities.columns: + # check if col dtype is timezone naive + if pd.api.types.is_datetime64_dtype(table_from_sql_entities[col]): + table_from_sql_entities[col] = table_from_sql_entities[col].dt.tz_localize( + "UTC" + ) expected_df_query[col] = expected_df_query[col].astype( table_from_sql_entities[col].dtype ) diff --git a/setup.py b/setup.py index 57decb8301..484024b206 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,6 @@ "click>=7.0.0,<9.0.0", "colorama>=0.3.9,<1", "dill~=0.3.0", - "fastavro>=1.1.0,<2", "grpcio>=1.56.2,<2", "grpcio-tools>=1.56.2,<2", "grpcio-reflection>=1.56.2,<2", @@ -54,9 +53,7 @@ "jsonschema", "mmh3", "numpy>=1.22,<1.25", - "pandas>=1.4.3,<2", - # For some reason pandavro higher than 1.5.* only support pandas less than 1.3. - "pandavro~=1.5.0", + "pandas>=1.4.3,<3", # Higher than 4.23.4 seems to cause a seg fault "protobuf<4.23.4,>3.20", "proto-plus>=1.20.0,<2", @@ -190,6 +187,7 @@ "types-setuptools", "types-tabulate", "virtualenv<20.24.2", + "pandas>=1.4.3,<2; python_version < '3.9'", ] + GCP_REQUIRED + REDIS_REQUIRED