Skip to content

Commit

Permalink
feat: Pandas v2 compatibility (#3957)
Browse files Browse the repository at this point in the history
* feat: Support pandas v2

Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>

* fix: Prune dependencies

Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>

* chore: Re-compile reqs py310

Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>

* fix: Mark test skip with conditions

Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>

* chore: Re-compile reqs py39

Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>

* chore: Update skip reason

Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>

* chore: Re-compile reqs py38

Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>

* chore: Bump snowflake connector

Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>

* chore: Remove test skip

Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>

---------

Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>
  • Loading branch information
sudohainguyen authored Mar 4, 2024
1 parent 591ba4e commit 64459ad
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 78 deletions.
8 changes: 6 additions & 2 deletions sdk/python/feast/infra/offline_stores/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
from typing import Any, Callable, List, Literal, Optional, Tuple, Union

import dask
import dask.dataframe as dd
import pandas as pd
import pyarrow
Expand Down Expand Up @@ -42,6 +43,11 @@
_run_dask_field_mapping,
)

# FileRetrievalJob will cast string objects to string[pyarrow] from dask version 2023.7.1
# This is not the desired behavior for our use case, so we set the convert-string option to False
# See (https://github.com/dask/dask/issues/10881#issuecomment-1923327936)
dask.config.set({"dataframe.convert-string": False})


class FileOfflineStoreConfig(FeastConfigBaseModel):
"""Offline store config for local (file-based) store"""
Expand Down Expand Up @@ -366,8 +372,6 @@ def evaluate_offline_job():
source_df[DUMMY_ENTITY_ID] = DUMMY_ENTITY_VAL
columns_to_extract.add(DUMMY_ENTITY_ID)

source_df = source_df.persist()

return source_df[list(columns_to_extract)].persist()

# When materializing a single feature view, we don't need full feature names. On demand transforms aren't materialized
Expand Down
14 changes: 3 additions & 11 deletions sdk/python/requirements/py3.10-ci-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,6 @@ executing==2.0.1
# via stack-data
fastapi==0.109.2
# via feast (setup.py)
fastavro==1.9.4
# via
# feast (setup.py)
# pandavro
fastjsonschema==2.19.1
# via nbformat
filelock==3.13.1
Expand Down Expand Up @@ -515,7 +511,6 @@ numpy==1.24.4
# great-expectations
# ibis-framework
# pandas
# pandavro
# pyarrow
# scipy
oauthlib==3.2.2
Expand Down Expand Up @@ -543,18 +538,14 @@ packaging==23.2
# pytest
# snowflake-connector-python
# sphinx
pandas==1.5.3
pandas==2.2.0 ; python_version >= "3.9"
# via
# altair
# db-dtypes
# feast (setup.py)
# google-cloud-bigquery
# great-expectations
# ibis-framework
# pandavro
# snowflake-connector-python
pandavro==1.5.2
# via feast (setup.py)
pandocfilters==1.5.1
# via nbconvert
parso==0.8.3
Expand Down Expand Up @@ -824,7 +815,6 @@ six==1.16.0
# isodate
# kubernetes
# mock
# pandavro
# python-dateutil
# rfc3339-validator
# thriftpy2
Expand Down Expand Up @@ -975,6 +965,8 @@ typing-extensions==4.9.0
# sqlalchemy2-stubs
# typeguard
# uvicorn
tzdata==2024.1
# via pandas
tzlocal==5.2
# via
# great-expectations
Expand Down
15 changes: 2 additions & 13 deletions sdk/python/requirements/py3.10-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@ exceptiongroup==1.2.0
# via anyio
fastapi==0.109.2
# via feast (setup.py)
fastavro==1.9.4
# via
# feast (setup.py)
# pandavro
fissix==21.11.13
# via bowler
fsspec==2024.2.0
Expand Down Expand Up @@ -115,17 +111,12 @@ numpy==1.24.4
# via
# feast (setup.py)
# pandas
# pandavro
# pyarrow
packaging==23.2
# via
# dask
# gunicorn
pandas==1.5.3
# via
# feast (setup.py)
# pandavro
pandavro==1.5.2
pandas==2.2.0
# via feast (setup.py)
partd==1.4.1
# via dask
Expand Down Expand Up @@ -171,9 +162,7 @@ rpds-py==0.18.0
# jsonschema
# referencing
six==1.16.0
# via
# pandavro
# python-dateutil
# via python-dateutil
sniffio==1.3.0
# via
# anyio
Expand Down
11 changes: 1 addition & 10 deletions sdk/python/requirements/py3.8-ci-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,6 @@ executing==2.0.1
# via stack-data
fastapi==0.109.2
# via feast (setup.py)
fastavro==1.9.4
# via
# feast (setup.py)
# pandavro
fastjsonschema==2.19.1
# via nbformat
filelock==3.13.1
Expand Down Expand Up @@ -530,7 +526,6 @@ numpy==1.24.4
# great-expectations
# ibis-framework
# pandas
# pandavro
# pyarrow
# scipy
oauthlib==3.2.2
Expand Down Expand Up @@ -558,18 +553,14 @@ packaging==23.2
# pytest
# snowflake-connector-python
# sphinx
pandas==1.5.3
pandas==1.5.3 ; python_version < "3.9"
# via
# altair
# db-dtypes
# feast (setup.py)
# google-cloud-bigquery
# great-expectations
# ibis-framework
# pandavro
# snowflake-connector-python
pandavro==1.5.2
# via feast (setup.py)
pandocfilters==1.5.1
# via nbconvert
parso==0.8.3
Expand Down
15 changes: 2 additions & 13 deletions sdk/python/requirements/py3.8-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@ exceptiongroup==1.2.0
# via anyio
fastapi==0.109.2
# via feast (setup.py)
fastavro==1.9.4
# via
# feast (setup.py)
# pandavro
fissix==21.11.13
# via bowler
fsspec==2024.2.0
Expand Down Expand Up @@ -119,17 +115,12 @@ numpy==1.24.4
# via
# feast (setup.py)
# pandas
# pandavro
# pyarrow
packaging==23.2
# via
# dask
# gunicorn
pandas==1.5.3
# via
# feast (setup.py)
# pandavro
pandavro==1.5.2
pandas==2.0.3
# via feast (setup.py)
partd==1.4.1
# via dask
Expand Down Expand Up @@ -177,9 +168,7 @@ rpds-py==0.18.0
# jsonschema
# referencing
six==1.16.0
# via
# pandavro
# python-dateutil
# via python-dateutil
sniffio==1.3.0
# via
# anyio
Expand Down
14 changes: 3 additions & 11 deletions sdk/python/requirements/py3.9-ci-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,6 @@ executing==2.0.1
# via stack-data
fastapi==0.109.2
# via feast (setup.py)
fastavro==1.9.4
# via
# feast (setup.py)
# pandavro
fastjsonschema==2.19.1
# via nbformat
filelock==3.13.1
Expand Down Expand Up @@ -523,7 +519,6 @@ numpy==1.24.4
# great-expectations
# ibis-framework
# pandas
# pandavro
# pyarrow
# scipy
oauthlib==3.2.2
Expand Down Expand Up @@ -551,18 +546,14 @@ packaging==23.2
# pytest
# snowflake-connector-python
# sphinx
pandas==1.5.3
pandas==2.2.0
# via
# altair
# db-dtypes
# feast (setup.py)
# google-cloud-bigquery
# great-expectations
# ibis-framework
# pandavro
# snowflake-connector-python
pandavro==1.5.2
# via feast (setup.py)
pandocfilters==1.5.1
# via nbconvert
parso==0.8.3
Expand Down Expand Up @@ -834,7 +825,6 @@ six==1.16.0
# isodate
# kubernetes
# mock
# pandavro
# python-dateutil
# rfc3339-validator
# thriftpy2
Expand Down Expand Up @@ -988,6 +978,8 @@ typing-extensions==4.9.0
# starlette
# typeguard
# uvicorn
tzdata==2024.1
# via pandas
tzlocal==5.2
# via
# great-expectations
Expand Down
15 changes: 2 additions & 13 deletions sdk/python/requirements/py3.9-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@ exceptiongroup==1.2.0
# via anyio
fastapi==0.109.2
# via feast (setup.py)
fastavro==1.9.4
# via
# feast (setup.py)
# pandavro
fissix==21.11.13
# via bowler
fsspec==2024.2.0
Expand Down Expand Up @@ -116,17 +112,12 @@ numpy==1.24.4
# via
# feast (setup.py)
# pandas
# pandavro
# pyarrow
packaging==23.2
# via
# dask
# gunicorn
pandas==1.5.3
# via
# feast (setup.py)
# pandavro
pandavro==1.5.2
pandas==2.2.0
# via feast (setup.py)
partd==1.4.1
# via dask
Expand Down Expand Up @@ -172,9 +163,7 @@ rpds-py==0.18.0
# jsonschema
# referencing
six==1.16.0
# via
# pandavro
# python-dateutil
# via python-dateutil
sniffio==1.3.0
# via
# anyio
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/tests/integration/e2e/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def test_logged_features_validation(environment, universal_data_sources):
{
"customer_id": 2000 + i,
"driver_id": 6000 + i,
"event_timestamp": datetime.datetime.now(),
"event_timestamp": make_tzaware(datetime.datetime.now()),
}
]
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,11 @@ def test_historical_features_with_entities_from_query(

table_from_sql_entities = job_from_sql.to_arrow().to_pandas()
for col in table_from_sql_entities.columns:
# check if col dtype is timezone naive
if pd.api.types.is_datetime64_dtype(table_from_sql_entities[col]):
table_from_sql_entities[col] = table_from_sql_entities[col].dt.tz_localize(
"UTC"
)
expected_df_query[col] = expected_df_query[col].astype(
table_from_sql_entities[col].dtype
)
Expand Down
6 changes: 2 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
"click>=7.0.0,<9.0.0",
"colorama>=0.3.9,<1",
"dill~=0.3.0",
"fastavro>=1.1.0,<2",
"grpcio>=1.56.2,<2",
"grpcio-tools>=1.56.2,<2",
"grpcio-reflection>=1.56.2,<2",
Expand All @@ -54,9 +53,7 @@
"jsonschema",
"mmh3",
"numpy>=1.22,<1.25",
"pandas>=1.4.3,<2",
# For some reason pandavro higher than 1.5.* only support pandas less than 1.3.
"pandavro~=1.5.0",
"pandas>=1.4.3,<3",
# Higher than 4.23.4 seems to cause a seg fault
"protobuf<4.23.4,>3.20",
"proto-plus>=1.20.0,<2",
Expand Down Expand Up @@ -190,6 +187,7 @@
"types-setuptools",
"types-tabulate",
"virtualenv<20.24.2",
"pandas>=1.4.3,<2; python_version < '3.9'",
]
+ GCP_REQUIRED
+ REDIS_REQUIRED
Expand Down

0 comments on commit 64459ad

Please sign in to comment.