diff --git a/Makefile b/Makefile index 4b85c0e448..6736e64078 100644 --- a/Makefile +++ b/Makefile @@ -310,7 +310,7 @@ format-python: cd ${ROOT_DIR}/sdk/python; python -m black --target-version py38 feast tests lint-python: - cd ${ROOT_DIR}/sdk/python; python -m mypy + cd ${ROOT_DIR}/sdk/python; python -m mypy --exclude=/tests/ --follow-imports=skip feast cd ${ROOT_DIR}/sdk/python; python -m isort feast/ tests/ --check-only cd ${ROOT_DIR}/sdk/python; python -m flake8 feast/ tests/ cd ${ROOT_DIR}/sdk/python; python -m black --check feast tests diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index b7ce19aad9..3421fd5d30 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import enum import warnings from abc import ABC, abstractmethod @@ -485,12 +484,12 @@ def to_proto(self) -> DataSourceProto: return data_source_proto def validate(self, config: RepoConfig): - pass + raise NotImplementedError def get_table_column_names_and_types( self, config: RepoConfig ) -> Iterable[Tuple[str, str]]: - pass + raise NotImplementedError @staticmethod def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: @@ -534,12 +533,12 @@ def __init__( self.schema = schema def validate(self, config: RepoConfig): - pass + raise NotImplementedError def get_table_column_names_and_types( self, config: RepoConfig ) -> Iterable[Tuple[str, str]]: - pass + raise NotImplementedError def __eq__(self, other): if not isinstance(other, RequestSource): @@ -610,12 +609,12 @@ def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: @typechecked class KinesisSource(DataSource): def validate(self, config: RepoConfig): - pass + raise NotImplementedError def get_table_column_names_and_types( self, config: RepoConfig ) -> Iterable[Tuple[str, str]]: - pass + raise NotImplementedError @staticmethod def from_proto(data_source: DataSourceProto): @@ -639,7 +638,7 @@ def from_proto(data_source: DataSourceProto): @staticmethod def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: - pass + raise NotImplementedError def get_table_query_string(self) -> str: raise NotImplementedError @@ -772,12 +771,12 @@ def __hash__(self): return super().__hash__() def validate(self, config: RepoConfig): - pass + raise NotImplementedError def get_table_column_names_and_types( self, config: RepoConfig ) -> Iterable[Tuple[str, str]]: - pass + raise NotImplementedError @staticmethod def from_proto(data_source: DataSourceProto): diff --git a/sdk/python/feast/feature_service.py b/sdk/python/feast/feature_service.py index c3037a55da..7ec923205a 100644 --- a/sdk/python/feast/feature_service.py +++ b/sdk/python/feast/feature_service.py @@ -56,7 +56,7 @@ def __init__( *, name: str, features: List[Union[FeatureView, OnDemandFeatureView]], - tags: Dict[str, str] = None, + tags: Optional[Dict[str, str]] = None, description: str = "", owner: str = "", logging_config: Optional[LoggingConfig] = None, diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index 67f9662d31..f87ae7ab13 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -101,7 +101,7 @@ def __init__( name: str, source: DataSource, schema: Optional[List[Field]] = None, - entities: List[Entity] = None, + entities: Optional[List[Entity]] = None, ttl: Optional[timedelta] = timedelta(days=0), online: bool = True, description: str = "", diff --git a/sdk/python/feast/importer.py b/sdk/python/feast/importer.py index bbd592101a..d1d7d62901 100644 --- a/sdk/python/feast/importer.py +++ b/sdk/python/feast/importer.py @@ -1,4 +1,5 @@ import importlib +from typing import Optional from feast.errors import ( FeastClassImportError, @@ -7,7 +8,7 @@ ) -def import_class(module_name: str, class_name: str, class_type: str = None): +def import_class(module_name: str, class_name: str, class_type: Optional[str] = None): """ Dynamically loads and returns a class from a module. diff --git a/sdk/python/feast/infra/contrib/spark_kafka_processor.py b/sdk/python/feast/infra/contrib/spark_kafka_processor.py index ea55d89988..bac1c28b06 100644 --- a/sdk/python/feast/infra/contrib/spark_kafka_processor.py +++ b/sdk/python/feast/infra/contrib/spark_kafka_processor.py @@ -5,6 +5,7 @@ from pyspark.sql import DataFrame, SparkSession from pyspark.sql.avro.functions import from_avro from pyspark.sql.functions import col, from_json +from pyspark.sql.streaming import StreamingQuery from feast.data_format import AvroFormat, JsonFormat from feast.data_source import KafkaSource, PushMode @@ -63,7 +64,13 @@ def __init__( self.join_keys = [fs.get_entity(entity).join_key for entity in sfv.entities] super().__init__(fs=fs, sfv=sfv, data_source=sfv.stream_source) - def ingest_stream_feature_view(self, to: PushMode = PushMode.ONLINE) -> None: + # Type hinting for data_source type. + # data_source type has been checked to be an instance of KafkaSource. + self.data_source: KafkaSource = self.data_source # type: ignore + + def ingest_stream_feature_view( + self, to: PushMode = PushMode.ONLINE + ) -> StreamingQuery: ingested_stream_df = self._ingest_stream_data() transformed_df = self._construct_transformation_plan(ingested_stream_df) online_store_query = self._write_stream_data(transformed_df, to) @@ -122,7 +129,7 @@ def _ingest_stream_data(self) -> StreamTable: def _construct_transformation_plan(self, df: StreamTable) -> StreamTable: return self.sfv.udf.__call__(df) if self.sfv.udf else df - def _write_stream_data(self, df: StreamTable, to: PushMode): + def _write_stream_data(self, df: StreamTable, to: PushMode) -> StreamingQuery: # Validation occurs at the fs.write_to_online_store() phase against the stream feature view schema. def batch_write(row: DataFrame, batch_id: int): rows: pd.DataFrame = row.toPandas() diff --git a/sdk/python/feast/infra/contrib/stream_processor.py b/sdk/python/feast/infra/contrib/stream_processor.py index 24817c82ea..df4e144f8c 100644 --- a/sdk/python/feast/infra/contrib/stream_processor.py +++ b/sdk/python/feast/infra/contrib/stream_processor.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Optional from pyspark.sql import DataFrame +from typing_extensions import TypeAlias from feast.data_source import DataSource, PushMode from feast.importer import import_class @@ -17,7 +18,7 @@ } # TODO: support more types other than just Spark. -StreamTable = DataFrame +StreamTable: TypeAlias = DataFrame class ProcessorConfig(FeastConfigBaseModel): @@ -54,13 +55,13 @@ def ingest_stream_feature_view(self, to: PushMode = PushMode.ONLINE) -> None: Ingests data from the stream source attached to the stream feature view; transforms the data and then persists it to the online store and/or offline store, depending on the 'to' parameter. """ - pass + raise NotImplementedError def _ingest_stream_data(self) -> StreamTable: """ Ingests data into a StreamTable. """ - pass + raise NotImplementedError def _construct_transformation_plan(self, table: StreamTable) -> StreamTable: """ @@ -68,14 +69,14 @@ def _construct_transformation_plan(self, table: StreamTable) -> StreamTable: evaluation, the StreamTable will not be materialized until it is actually evaluated. For example: df.collect() in spark or tbl.execute() in Flink. """ - pass + raise NotImplementedError def _write_stream_data(self, table: StreamTable, to: PushMode) -> None: """ Launches a job to persist stream data to the online store and/or offline store, depending on the 'to' parameter, and returns a handle for the job. """ - pass + raise NotImplementedError def get_stream_processor_object( diff --git a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena_source.py b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena_source.py index 8e9e3893f3..0aca42cd68 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena_source.py @@ -297,9 +297,9 @@ class SavedDatasetAthenaStorage(SavedDatasetStorage): def __init__( self, table_ref: str, - query: str = None, - database: str = None, - data_source: str = None, + query: Optional[str] = None, + database: Optional[str] = None, + data_source: Optional[str] = None, ): self.athena_options = AthenaOptions( table=table_ref, query=query, database=database, data_source=data_source diff --git a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py index 384ab69e81..f68e109d6c 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/tests/data_source.py @@ -51,7 +51,7 @@ def create_data_source( suffix: Optional[str] = None, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: table_name = destination_name diff --git a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py index 9b751d98ef..2604cf7c18 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/tests/data_source.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import Dict, List, Optional import pandas as pd import pytest @@ -66,7 +66,7 @@ def create_data_source( destination_name: str, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, **kwargs, ) -> DataSource: # Make sure the field mapping is correct and convert the datetime datasources. @@ -99,10 +99,10 @@ def create_data_source( ) def create_saved_dataset_destination(self) -> SavedDatasetStorage: - pass + raise NotImplementedError def get_prefixed_table_name(self, destination_name: str) -> str: return f"{self.project_name}_{destination_name}" def teardown(self): - pass + raise NotImplementedError diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py index f447950132..224fcea30f 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/tests/data_source.py @@ -85,7 +85,7 @@ def create_data_source( suffix: Optional[str] = None, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py index 71c07b20c2..7b4fda3b5f 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py @@ -2,7 +2,7 @@ import shutil import tempfile import uuid -from typing import Dict, List +from typing import Dict, List, Optional import pandas as pd from pyspark import SparkConf @@ -70,7 +70,7 @@ def create_data_source( destination_name: str, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, **kwargs, ) -> DataSource: if timestamp_field in df: diff --git a/sdk/python/feast/infra/offline_stores/file_source.py b/sdk/python/feast/infra/offline_stores/file_source.py index ac824b359f..887b410079 100644 --- a/sdk/python/feast/infra/offline_stores/file_source.py +++ b/sdk/python/feast/infra/offline_stores/file_source.py @@ -183,7 +183,7 @@ def create_filesystem_and_path( return None, path def get_table_query_string(self) -> str: - pass + raise NotImplementedError class FileOptions: diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 6141e3c435..30135feccb 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import warnings -from abc import ABC, abstractmethod +from abc import ABC from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, List, Optional, Union @@ -150,9 +150,8 @@ def to_sql(self) -> str: """ Return RetrievalJob generated SQL statement if applicable. """ - pass + raise NotImplementedError - @abstractmethod def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: """ Synchronously executes the underlying query and returns the result as a pandas dataframe. @@ -162,9 +161,8 @@ def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: Does not handle on demand transformations or dataset validation. For either of those, `to_df` should be used. """ - pass + raise NotImplementedError - @abstractmethod def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: """ Synchronously executes the underlying query and returns the result as an arrow table. @@ -174,21 +172,18 @@ def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: Does not handle on demand transformations or dataset validation. For either of those, `to_arrow` should be used. """ - pass + raise NotImplementedError @property - @abstractmethod def full_feature_names(self) -> bool: """Returns True if full feature names should be applied to the results of the query.""" - pass + raise NotImplementedError @property - @abstractmethod def on_demand_feature_views(self) -> List[OnDemandFeatureView]: """Returns a list containing all the on demand feature views to be handled.""" - pass + raise NotImplementedError - @abstractmethod def persist( self, storage: SavedDatasetStorage, @@ -204,13 +199,12 @@ def persist( allow_overwrite: If True, a pre-existing location (e.g. table or file) can be overwritten. Currently not all individual offline store implementations make use of this parameter. """ - pass + raise NotImplementedError @property - @abstractmethod def metadata(self) -> Optional[RetrievalMetadata]: """Returns metadata about the retrieval job.""" - pass + raise NotImplementedError def supports_remote_storage_export(self) -> bool: """Returns True if the RetrievalJob supports `to_remote_storage`.""" @@ -226,7 +220,7 @@ def to_remote_storage(self) -> List[str]: Returns: A list of parquet file paths in remote storage. """ - raise NotImplementedError() + raise NotImplementedError class OfflineStore(ABC): @@ -239,7 +233,6 @@ class OfflineStore(ABC): """ @staticmethod - @abstractmethod def pull_latest_from_table_or_query( config: RepoConfig, data_source: DataSource, @@ -270,10 +263,9 @@ def pull_latest_from_table_or_query( Returns: A RetrievalJob that can be executed to get the entity rows. """ - pass + raise NotImplementedError @staticmethod - @abstractmethod def get_historical_features( config: RepoConfig, feature_views: List[FeatureView], @@ -302,10 +294,9 @@ def get_historical_features( Returns: A RetrievalJob that can be executed to get the features. """ - pass + raise NotImplementedError @staticmethod - @abstractmethod def pull_all_from_table_or_query( config: RepoConfig, data_source: DataSource, @@ -334,7 +325,7 @@ def pull_all_from_table_or_query( Returns: A RetrievalJob that can be executed to get the entity rows. """ - pass + raise NotImplementedError @staticmethod def write_logged_features( @@ -358,7 +349,7 @@ def write_logged_features( logging_config: A LoggingConfig object that determines where the logs will be written. registry: The registry for the current feature store. """ - raise NotImplementedError() + raise NotImplementedError @staticmethod def offline_write_batch( @@ -377,4 +368,4 @@ def offline_write_batch( progress: Function to be called once a portion of the data has been written, used to show progress. """ - raise NotImplementedError() + raise NotImplementedError diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index 837cf49655..6034bf5ac7 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -51,13 +51,13 @@ class RedshiftOfflineStoreConfig(FeastConfigBaseModel): type: Literal["redshift"] = "redshift" """ Offline store type selector""" - cluster_id: Optional[StrictStr] + cluster_id: Optional[StrictStr] = None """ Redshift cluster identifier, for provisioned clusters """ - user: Optional[StrictStr] + user: Optional[StrictStr] = None """ Redshift user name, only required for provisioned clusters """ - workgroup: Optional[StrictStr] + workgroup: Optional[StrictStr] = None """ Redshift workgroup identifier, for serverless """ region: StrictStr diff --git a/sdk/python/feast/infra/offline_stores/snowflake_source.py b/sdk/python/feast/infra/offline_stores/snowflake_source.py index 0cbf82dd1c..e29197c68d 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake_source.py +++ b/sdk/python/feast/infra/offline_stores/snowflake_source.py @@ -1,5 +1,5 @@ import warnings -from typing import Callable, Dict, Iterable, Optional, Tuple +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple from typeguard import typechecked @@ -223,7 +223,7 @@ def get_table_column_names_and_types( query = f"SELECT * FROM {self.get_table_query_string()} LIMIT 5" cursor = execute_snowflake_statement(conn, query) - metadata = [ + metadata: List[Dict[str, Any]] = [ { "column_name": column.name, "type_code": column.type_code, diff --git a/sdk/python/feast/infra/online_stores/dynamodb.py b/sdk/python/feast/infra/online_stores/dynamodb.py index 525978e736..a1eef16f40 100644 --- a/sdk/python/feast/infra/online_stores/dynamodb.py +++ b/sdk/python/feast/infra/online_stores/dynamodb.py @@ -288,12 +288,12 @@ def _get_dynamodb_resource(self, region: str, endpoint_url: Optional[str] = None ) return self._dynamodb_resource - def _sort_dynamodb_response(self, responses: list, order: list): + def _sort_dynamodb_response(self, responses: list, order: list) -> Any: """DynamoDB Batch Get Item doesn't return items in a particular order.""" # Assign an index to order order_with_index = {value: idx for idx, value in enumerate(order)} # Sort table responses by index - table_responses_ordered = [ + table_responses_ordered: Any = [ (order_with_index[tbl_res["entity_id"]], tbl_res) for tbl_res in responses ] table_responses_ordered = sorted( diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py index 28b10c1259..811abe106c 100644 --- a/sdk/python/feast/infra/passthrough_provider.py +++ b/sdk/python/feast/infra/passthrough_provider.py @@ -180,7 +180,7 @@ def online_read( config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], - requested_features: List[str] = None, + requested_features: Optional[List[str]] = None, ) -> List: set_usage_attribute("provider", self.__class__.__name__) result = [] diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index 82879b264a..2a9670cace 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -211,7 +211,7 @@ def online_read( config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], - requested_features: List[str] = None, + requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: """ Reads features values for the given entity keys. diff --git a/sdk/python/feast/infra/registry/base_registry.py b/sdk/python/feast/infra/registry/base_registry.py index 8928a5800d..f89b079478 100644 --- a/sdk/python/feast/infra/registry/base_registry.py +++ b/sdk/python/feast/infra/registry/base_registry.py @@ -503,7 +503,9 @@ def list_validation_references( Returns: List of request feature views """ + raise NotImplementedError + @abstractmethod def list_project_metadata( self, project: str, allow_cache: bool = False ) -> List[ProjectMetadata]: diff --git a/sdk/python/feast/infra/registry/registry_store.py b/sdk/python/feast/infra/registry/registry_store.py index c42a55cd9d..5151fd74b2 100644 --- a/sdk/python/feast/infra/registry/registry_store.py +++ b/sdk/python/feast/infra/registry/registry_store.py @@ -17,7 +17,7 @@ def get_registry_proto(self) -> RegistryProto: Returns: Returns either the registry proto stored at the registry path, or an empty registry proto. """ - pass + raise NotImplementedError @abstractmethod def update_registry_proto(self, registry_proto: RegistryProto): @@ -40,7 +40,7 @@ def teardown(self): class NoopRegistryStore(RegistryStore): def get_registry_proto(self) -> RegistryProto: - pass + return RegistryProto() def update_registry_proto(self, registry_proto: RegistryProto): pass diff --git a/sdk/python/feast/infra/registry/snowflake.py b/sdk/python/feast/infra/registry/snowflake.py index 56c7bc1f65..c1ebf13d6b 100644 --- a/sdk/python/feast/infra/registry/snowflake.py +++ b/sdk/python/feast/infra/registry/snowflake.py @@ -418,7 +418,7 @@ def _delete_object( """ cursor = execute_snowflake_statement(conn, query) - if cursor.rowcount < 1 and not_found_exception: + if cursor.rowcount < 1 and not_found_exception: # type: ignore raise not_found_exception(name, project) self._set_last_updated_metadata(datetime.utcnow(), project) diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index ef83c6d1c6..c3604ee41f 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -816,7 +816,7 @@ def execute_athena_query( database: str, workgroup: str, query: str, - temp_table: str = None, + temp_table: Optional[str] = None, ) -> str: """Execute athena statement synchronously. Waits for the query to finish. diff --git a/sdk/python/feast/infra/utils/hbase_utils.py b/sdk/python/feast/infra/utils/hbase_utils.py index d44f93f161..72afda2ef3 100644 --- a/sdk/python/feast/infra/utils/hbase_utils.py +++ b/sdk/python/feast/infra/utils/hbase_utils.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from happybase import ConnectionPool @@ -38,9 +38,9 @@ class HBaseConnector: def __init__( self, - pool: ConnectionPool = None, - host: str = None, - port: int = None, + pool: Optional[ConnectionPool] = None, + host: Optional[str] = None, + port: Optional[int] = None, connection_pool_size: int = 4, ): if pool is None: diff --git a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py index a4cda89a6f..8eb5177ac2 100644 --- a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py +++ b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py @@ -43,7 +43,11 @@ class GetSnowflakeConnection: - def __init__(self, config: str, autocommit=True): + def __init__( + self, + config: str, + autocommit=True, + ): self.config = config self.autocommit = autocommit diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index e51e1e743b..ad3e273d37 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -51,7 +51,7 @@ import pyarrow # null timestamps get converted to -9223372036854775808 -NULL_TIMESTAMP_INT_VALUE = np.datetime64("NaT").astype(int) +NULL_TIMESTAMP_INT_VALUE: int = np.datetime64("NaT").astype(int) def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: @@ -114,7 +114,10 @@ def feast_value_type_to_pandas_type(value_type: ValueType) -> Any: def python_type_to_feast_value_type( - name: str, value: Any = None, recurse: bool = True, type_name: Optional[str] = None + name: str, + value: Optional[Any] = None, + recurse: bool = True, + type_name: Optional[str] = None, ) -> ValueType: """ Finds the equivalent Feast Value Type for a Python value. Both native @@ -321,7 +324,7 @@ def _python_datetime_to_int_timestamp( elif isinstance(value, Timestamp): int_timestamps.append(int(value.ToSeconds())) elif isinstance(value, np.datetime64): - int_timestamps.append(value.astype("datetime64[s]").astype(np.int_)) + int_timestamps.append(value.astype("datetime64[s]").astype(np.int_)) # type: ignore[attr-defined] elif isinstance(value, type(np.nan)): int_timestamps.append(NULL_TIMESTAMP_INT_VALUE) else: diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index 740356907d..9435a68deb 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -121,9 +121,7 @@ comm==0.2.0 # ipykernel # ipywidgets coverage[toml]==7.3.2 - # via - # coverage - # pytest-cov + # via pytest-cov cryptography==41.0.6 # via # azure-identity @@ -173,7 +171,7 @@ execnet==2.0.2 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -226,9 +224,7 @@ google-auth==2.23.4 google-auth-httplib2==0.1.1 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via - # feast (setup.py) - # google-cloud-bigquery + # via feast (setup.py) google-cloud-bigquery-storage==2.22.0 # via feast (setup.py) google-cloud-bigtable==2.21.0 @@ -462,7 +458,7 @@ msgpack==1.0.7 # via cachecontrol multiprocess==0.70.15 # via bytewax -mypy==0.982 +mypy==1.8.0 # via # feast (setup.py) # sqlalchemy @@ -801,9 +797,7 @@ sniffio==1.3.0 snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==3.5.0 - # via - # feast (setup.py) - # snowflake-connector-python + # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 @@ -829,14 +823,12 @@ sphinxcontrib-qthelp==1.0.6 sphinxcontrib-serializinghtml==1.1.9 # via sphinx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy stack-data==0.6.3 # via ipython -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -961,9 +953,7 @@ urllib3==1.26.18 # rockset # snowflake-connector-python uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 18486d7fa9..5d5d451e14 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -42,7 +42,7 @@ dill==0.3.7 # via feast (setup.py) exceptiongroup==1.1.3 # via anyio -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -175,12 +175,10 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -210,9 +208,7 @@ typing-extensions==4.8.0 urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index 3bda9e72f9..808a58e11b 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -127,9 +127,7 @@ comm==0.2.0 # ipykernel # ipywidgets coverage[toml]==7.3.2 - # via - # coverage - # pytest-cov + # via pytest-cov cryptography==41.0.6 # via # azure-identity @@ -178,7 +176,7 @@ execnet==2.0.2 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -231,9 +229,7 @@ google-auth==2.23.4 google-auth-httplib2==0.1.1 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via - # feast (setup.py) - # google-cloud-bigquery + # via feast (setup.py) google-cloud-bigquery-storage==2.22.0 # via feast (setup.py) google-cloud-bigtable==2.21.0 @@ -478,7 +474,7 @@ msgpack==1.0.7 # via cachecontrol multiprocess==0.70.15 # via bytewax -mypy==0.982 +mypy==1.8.0 # via # feast (setup.py) # sqlalchemy @@ -824,9 +820,7 @@ sniffio==1.3.0 snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==3.5.0 - # via - # feast (setup.py) - # snowflake-connector-python + # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 @@ -846,14 +840,12 @@ sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 # via sphinx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy stack-data==0.6.3 # via ipython -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -981,9 +973,7 @@ urllib3==1.26.18 # rockset # snowflake-connector-python uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index c180c50c81..163fa4c9a8 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -42,7 +42,7 @@ dill==0.3.7 # via feast (setup.py) exceptiongroup==1.1.3 # via anyio -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -180,12 +180,10 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -216,9 +214,7 @@ typing-extensions==4.8.0 urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 6989d5b4cc..f9d7ac3fb9 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -121,9 +121,7 @@ comm==0.2.0 # ipykernel # ipywidgets coverage[toml]==7.3.2 - # via - # coverage - # pytest-cov + # via pytest-cov cryptography==41.0.6 # via # azure-identity @@ -173,7 +171,7 @@ execnet==2.0.2 # via pytest-xdist executing==2.0.1 # via stack-data -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -226,9 +224,7 @@ google-auth==2.23.4 google-auth-httplib2==0.1.1 # via google-api-python-client google-cloud-bigquery[pandas]==3.12.0 - # via - # feast (setup.py) - # google-cloud-bigquery + # via feast (setup.py) google-cloud-bigquery-storage==2.22.0 # via feast (setup.py) google-cloud-bigtable==2.21.0 @@ -469,7 +465,7 @@ msgpack==1.0.7 # via cachecontrol multiprocess==0.70.15 # via bytewax -mypy==0.982 +mypy==1.8.0 # via # feast (setup.py) # sqlalchemy @@ -810,9 +806,7 @@ sniffio==1.3.0 snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==3.5.0 - # via - # feast (setup.py) - # snowflake-connector-python + # via feast (setup.py) sortedcontainers==2.4.0 # via snowflake-connector-python soupsieve==2.5 @@ -838,14 +832,12 @@ sphinxcontrib-qthelp==1.0.6 sphinxcontrib-serializinghtml==1.1.9 # via sphinx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy stack-data==0.6.3 # via ipython -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -973,9 +965,7 @@ urllib3==1.26.18 # rockset # snowflake-connector-python uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn virtualenv==20.23.0 diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 3b6f88b4e2..4d9b8f107d 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -42,7 +42,7 @@ dill==0.3.7 # via feast (setup.py) exceptiongroup==1.1.3 # via anyio -fastapi==0.99.1 +fastapi==0.109.1 # via feast (setup.py) fastavro==1.9.0 # via @@ -175,12 +175,10 @@ sniffio==1.3.0 # anyio # httpx sqlalchemy[mypy]==1.4.50 - # via - # feast (setup.py) - # sqlalchemy + # via feast (setup.py) sqlalchemy2-stubs==0.0.2a37 # via sqlalchemy -starlette==0.27.0 +starlette==0.35.1 # via fastapi tabulate==0.9.0 # via feast (setup.py) @@ -211,9 +209,7 @@ typing-extensions==4.8.0 urllib3==2.1.0 # via requests uvicorn[standard]==0.24.0.post1 - # via - # feast (setup.py) - # uvicorn + # via feast (setup.py) uvloop==0.19.0 # via uvicorn volatile==2.1.0 diff --git a/sdk/python/tests/data/data_creator.py b/sdk/python/tests/data/data_creator.py index 8d5b1979fa..1fc66aee84 100644 --- a/sdk/python/tests/data/data_creator.py +++ b/sdk/python/tests/data/data_creator.py @@ -9,7 +9,7 @@ def create_basic_driver_dataset( entity_type: FeastType = Int32, - feature_dtype: str = None, + feature_dtype: Optional[str] = None, feature_is_list: bool = False, list_has_empty_list: bool = False, ) -> pd.DataFrame: diff --git a/sdk/python/tests/foo_provider.py b/sdk/python/tests/foo_provider.py index d27e2645d4..ba256a3813 100644 --- a/sdk/python/tests/foo_provider.py +++ b/sdk/python/tests/foo_provider.py @@ -71,16 +71,16 @@ def get_historical_features( project: str, full_feature_names: bool = False, ) -> RetrievalJob: - pass + return RetrievalJob() def online_read( self, config: RepoConfig, table: FeatureView, entity_keys: List[EntityKeyProto], - requested_features: List[str] = None, + requested_features: Optional[List[str]] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: - pass + return [] def retrieve_saved_dataset(self, config: RepoConfig, dataset: SavedDataset): pass @@ -102,4 +102,4 @@ def retrieve_feature_service_logs( config: RepoConfig, registry: BaseRegistry, ) -> RetrievalJob: - pass + return RetrievalJob() diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py index b36af0db47..d64463606f 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py @@ -20,7 +20,7 @@ def create_data_source( destination_name: str, event_timestamp_column="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, timestamp_field: Optional[str] = None, ) -> DataSource: """ @@ -53,7 +53,7 @@ def create_saved_dataset_destination(self) -> SavedDatasetStorage: ... def create_logged_features_destination(self) -> LoggingDestination: - pass + raise NotImplementedError @abstractmethod def teardown(self): diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py index 384037eef1..215d19ba7f 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py @@ -66,7 +66,7 @@ def create_data_source( destination_name: str, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, **kwargs, ) -> DataSource: diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py index 124dd4c88d..3263785683 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py @@ -41,7 +41,7 @@ def create_data_source( destination_name: str, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) @@ -96,7 +96,7 @@ def create_data_source( destination_name: str, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) @@ -171,7 +171,7 @@ def create_data_source( suffix: Optional[str] = None, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: filename = f"{destination_name}.parquet" port = self.minio.get_exposed_port("9000") diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py index dfe8e3d33b..e6f20d6125 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py @@ -51,7 +51,7 @@ def create_data_source( suffix: Optional[str] = None, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py index c14780da97..1414291a18 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py @@ -51,7 +51,7 @@ def create_data_source( suffix: Optional[str] = None, timestamp_field="ts", created_timestamp_column="created_ts", - field_mapping: Dict[str, str] = None, + field_mapping: Optional[Dict[str, str]] = None, ) -> DataSource: destination_name = self.get_prefixed_table_name(destination_name) diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py b/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py index c3872ea697..10a8143739 100644 --- a/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py +++ b/sdk/python/tests/integration/feature_repos/universal/online_store_creator.py @@ -8,7 +8,7 @@ def __init__(self, project_name: str, **kwargs): self.project_name = project_name def create_online_store(self) -> FeastConfigBaseModel: - ... + raise NotImplementedError def teardown(self): - ... + raise NotImplementedError diff --git a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py index ef0cce0470..220bdba0da 100644 --- a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py +++ b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py @@ -39,6 +39,9 @@ class MockRetrievalJob(RetrievalJob): + def to_sql(self) -> str: + return "" + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: """ Synchronously executes the underlying query and returns the result as a pandas dataframe. @@ -46,7 +49,7 @@ def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: Does not handle on demand transformations or dataset validation. For either of those, `to_df` should be used. """ - pass + return pd.DataFrame() def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: """ @@ -55,17 +58,17 @@ def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: Does not handle on demand transformations or dataset validation. For either of those, `to_arrow` should be used. """ - pass + return pyarrow.Table() @property def full_feature_names(self) -> bool: """Returns True if full feature names should be applied to the results of the query.""" - pass + return False @property def on_demand_feature_views(self) -> List[OnDemandFeatureView]: """Returns a list containing all the on demand feature views to be handled.""" - pass + return [] def persist( self, @@ -87,7 +90,7 @@ def persist( @property def metadata(self) -> Optional[RetrievalMetadata]: """Returns metadata about the retrieval job.""" - pass + raise NotImplementedError # Since RetreivalJob are not really tested for subclasses we add some tests here. @@ -208,7 +211,7 @@ def retrieval_job(request, environment): def test_to_sql(): - assert MockRetrievalJob().to_sql() is None + assert MockRetrievalJob().to_sql() == "" @pytest.mark.parametrize("timeout", (None, 30)) diff --git a/setup.py b/setup.py index 4fb80871b2..81ae63a7a4 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ "toml>=0.10.0,<1", "tqdm>=4,<5", "typeguard==2.13.3", - "fastapi>=0.68.0,<0.100", + "fastapi>=0.68.0", "uvicorn[standard]>=0.14.0,<1", "gunicorn", "dask>=2021.1.0", @@ -156,7 +156,7 @@ "minio==7.1.0", "mock==2.0.0", "moto<5", - "mypy>=0.981,<0.990", + "mypy>=1.4.1", "avro==1.10.0", "fsspec<2023.10.0", "urllib3>=1.25.4,<3",