Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Fix default feast apply path without any extras #2373

Merged
merged 2 commits into from
Mar 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions docs/reference/data-sources/spark.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ The spark data source API allows for the retrieval of historical feature values
Using a table reference from SparkSession(for example, either in memory or a Hive Metastore)

```python
from feast import SparkSource
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
SparkSource,
)

my_spark_source = SparkSource(
table="FEATURE_TABLE",
Expand All @@ -23,7 +25,9 @@ my_spark_source = SparkSource(
Using a query

```python
from feast import SparkSource
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
SparkSource,
)

my_spark_source = SparkSource(
query="SELECT timestamp as ts, created, f1, f2 "
Expand All @@ -34,7 +38,9 @@ my_spark_source = SparkSource(
Using a file reference

```python
from feast import SparkSource
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
SparkSource,
)

my_spark_source = SparkSource(
path=f"{CURRENT_DIR}/data/driver_hourly_stats",
Expand Down
4 changes: 0 additions & 4 deletions sdk/python/feast/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
from pkg_resources import DistributionNotFound, get_distribution

from feast.infra.offline_stores.bigquery_source import BigQuerySource
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
SparkSource,
)
from feast.infra.offline_stores.file_source import FileSource
from feast.infra.offline_stores.redshift_source import RedshiftSource
from feast.infra.offline_stores.snowflake_source import SnowflakeSource
Expand Down Expand Up @@ -50,5 +47,4 @@
"RedshiftSource",
"RequestFeatureView",
"SnowflakeSource",
"SparkSource",
]
2 changes: 0 additions & 2 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from feast.data_source import DataSource
from feast.diff.infra_diff import InfraDiff, diff_infra_protos
from feast.diff.registry_diff import RegistryDiff, apply_diff_to_registry, diff_between
from feast.dqm.profilers.ge_profiler import GEProfiler
from feast.entity import Entity
from feast.errors import (
EntityNotFoundException,
Expand Down Expand Up @@ -881,7 +880,6 @@ def create_saved_dataset(
storage: SavedDatasetStorage,
tags: Optional[Dict[str, str]] = None,
feature_service: Optional[FeatureService] = None,
profiler: Optional[GEProfiler] = None,
) -> SavedDataset:
"""
Execute provided retrieval job and persist its outcome in given storage.
Expand Down
7 changes: 4 additions & 3 deletions sdk/python/feast/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
FileSource,
RedshiftSource,
SnowflakeSource,
SparkSource,
)
from feast.data_source import DataSource, RequestDataSource
from feast.errors import RegistryInferenceFailure
Expand Down Expand Up @@ -87,8 +86,10 @@ def update_data_sources_with_inferred_event_timestamp_col(
):
# prepare right match pattern for data source
ts_column_type_regex_pattern = ""
if isinstance(data_source, FileSource) or isinstance(
data_source, SparkSource
# TODO(adchia): Move Spark source inference out of this logic
if (
isinstance(data_source, FileSource)
or "SparkSource" == data_source.__class__.__name__
):
ts_column_type_regex_pattern = r"^timestamp"
elif isinstance(data_source, BigQuerySource):
Expand Down