Skip to content

Commit

Permalink
Provide descriptive error on invalid table reference (feast-dev#1627)
Browse files Browse the repository at this point in the history
* Initial commit to catch nonexistent table

Signed-off-by: Cody Lin <codyjlin@yahoo.com>
Signed-off-by: Cody Lin <codyl@twitter.com>

* simplify nonexistent BQ table test

Signed-off-by: Cody Lin <codyl@twitter.com>

* clean up table_exists exception

Signed-off-by: Cody Lin <codyl@twitter.com>

* remove unneeded variable

Signed-off-by: Cody Lin <codyl@twitter.com>

* function name change to _assert_table_exists

Signed-off-by: Cody Lin <codyl@twitter.com>

* Initial commit to catch nonexistent table

Signed-off-by: Cody Lin <codyjlin@yahoo.com>
Signed-off-by: Cody Lin <codyl@twitter.com>

* simplify nonexistent BQ table test

Signed-off-by: Cody Lin <codyl@twitter.com>

* clean up table_exists exception

Signed-off-by: Cody Lin <codyl@twitter.com>

* function name change to _assert_table_exists

Signed-off-by: Cody Lin <codyl@twitter.com>

* fix lint errors and rebase

Signed-off-by: Cody Lin <codyl@twitter.com>

* Fix get_table(None) error

Signed-off-by: Cody Lin <codyl@twitter.com>

* custom exception for both missing file and BQ source

Signed-off-by: Cody Lin <codyl@twitter.com>

* revert FileSource checks

Signed-off-by: Cody Lin <codyl@twitter.com>

* Use DataSourceNotFoundException instead of subclassing

Signed-off-by: Cody Lin <codyl@twitter.com>

* Moved assert_table_exists out of the BQ constructor to apply_total

Signed-off-by: Cody Lin <codyl@twitter.com>

* rename test and test asset

Signed-off-by: Cody Lin <codyl@twitter.com>

* move validate logic back to data_source

Signed-off-by: Cody Lin <codyl@twitter.com>

* fixed tests

Signed-off-by: Cody Lin <codyl@twitter.com>

* Set pytest.integration for tests that access BQ

Signed-off-by: Cody Lin <codyl@twitter.com>

* Import pytest in failed test files

Signed-off-by: Cody Lin <codyl@twitter.com>
Signed-off-by: Mwad22 <51929507+Mwad22@users.noreply.github.com>
  • Loading branch information
codyjlin authored and Mwad22 committed Jul 7, 2021
1 parent dd25ad6 commit cef2869
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 1 deletion.
22 changes: 22 additions & 0 deletions sdk/python/feast/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from feast import type_map
from feast.data_format import FileFormat, StreamFormat
from feast.errors import DataSourceNotFoundException
from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
from feast.value_type import ValueType

Expand Down Expand Up @@ -519,6 +520,12 @@ def to_proto(self) -> DataSourceProto:
"""
raise NotImplementedError

def validate(self):
"""
Validates the underlying data source.
"""
raise NotImplementedError


class FileSource(DataSource):
def __init__(
Expand Down Expand Up @@ -615,6 +622,10 @@ def to_proto(self) -> DataSourceProto:

return data_source_proto

def validate(self):
# TODO: validate a FileSource
pass

@staticmethod
def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]:
return type_map.pa_to_feast_value_type
Expand Down Expand Up @@ -692,6 +703,17 @@ def to_proto(self) -> DataSourceProto:

return data_source_proto

def validate(self):
if not self.query:
from google.api_core.exceptions import NotFound
from google.cloud import bigquery

client = bigquery.Client()
try:
client.get_table(self.table_ref)
except NotFound:
raise DataSourceNotFoundException(self.table_ref)

def get_table_query_string(self) -> str:
"""Returns a string that can directly be used to reference this table in SQL"""
if self.table_ref:
Expand Down
7 changes: 7 additions & 0 deletions sdk/python/feast/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
from colorama import Fore, Style


class DataSourceNotFoundException(Exception):
def __init__(self, path):
super().__init__(
f"Unable to find table at '{path}'. Please check that table exists."
)


class FeastObjectNotFoundException(Exception):
pass

Expand Down
3 changes: 2 additions & 1 deletion sdk/python/feast/repo_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,12 @@ def apply_total(repo_config: RepoConfig, repo_path: Path):

data_sources = [t.input for t in repo.feature_views]

# Make sure the data source used by this feature view is supported by
# Make sure the data source used by this feature view is supported by Feast
for data_source in data_sources:
assert_offline_store_supports_data_source(
repo_config.offline_store, data_source
)
data_source.validate()

update_data_sources_with_inferred_event_timestamp_col(data_sources)
for view in repo.feature_views:
Expand Down
20 changes: 20 additions & 0 deletions sdk/python/tests/example_feature_repo_with_missing_bq_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from datetime import timedelta

from feast import BigQuerySource, Entity, Feature, FeatureView, ValueType

nonexistent_source = BigQuerySource(
table_ref="project.dataset.nonexistent_table", event_timestamp_column=""
)

driver = Entity(name="driver", value_type=ValueType.INT64, description="driver id",)

nonexistent_features = FeatureView(
name="driver_locations",
entities=["driver"],
ttl=timedelta(days=1),
features=[
Feature(name="lat", dtype=ValueType.FLOAT),
Feature(name="lon", dtype=ValueType.STRING),
],
input=nonexistent_source,
)
35 changes: 35 additions & 0 deletions sdk/python/tests/test_cli_gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,38 @@ def test_basic() -> None:

result = runner.run(["teardown"], cwd=repo_path)
assert result.returncode == 0


@pytest.mark.integration
def test_missing_bq_source_fail() -> None:
project_id = "".join(
random.choice(string.ascii_lowercase + string.digits) for _ in range(10)
)
runner = CliRunner()
with tempfile.TemporaryDirectory() as repo_dir_name, tempfile.TemporaryDirectory() as data_dir_name:

repo_path = Path(repo_dir_name)
data_path = Path(data_dir_name)

repo_config = repo_path / "feature_store.yaml"

repo_config.write_text(
dedent(
f"""
project: {project_id}
registry: {data_path / "registry.db"}
provider: gcp
"""
)
)

repo_example = repo_path / "example.py"
repo_example.write_text(
(
Path(__file__).parent / "example_feature_repo_with_missing_bq_source.py"
).read_text()
)

returncode, output = runner.run_with_output(["apply"], cwd=repo_path)
assert returncode == 1
assert b"DataSourceNotFoundException" in output
3 changes: 3 additions & 0 deletions sdk/python/tests/test_cli_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
from textwrap import dedent

import assertpy
import pytest

from feast.feature_store import FeatureStore
from tests.cli_utils import CliRunner
from tests.online_read_write_test import basic_rw_test


@pytest.mark.integration
def test_workflow() -> None:
"""
Test running apply on a sample repo, and make sure the infra gets created.
Expand Down Expand Up @@ -78,6 +80,7 @@ def test_workflow() -> None:
assertpy.assert_that(result.returncode).is_equal_to(0)


@pytest.mark.integration
def test_non_local_feature_repo() -> None:
"""
Test running apply on a sample repo, and make sure the infra gets created.
Expand Down
2 changes: 2 additions & 0 deletions sdk/python/tests/test_online_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from tests.cli_utils import CliRunner, get_example_repo


@pytest.mark.integration
def test_online() -> None:
"""
Test reading from the online store in local mode.
Expand Down Expand Up @@ -247,6 +248,7 @@ def test_online() -> None:
os.rename(store.config.registry + "_fake", store.config.registry)


@pytest.mark.integration
def test_online_to_df():
"""
Test dataframe conversion. Make sure the response columns and rows are
Expand Down
2 changes: 2 additions & 0 deletions sdk/python/tests/test_partial_apply.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import pytest
from google.protobuf.duration_pb2 import Duration

from feast import BigQuerySource, Feature, FeatureView, ValueType
from tests.cli_utils import CliRunner, get_example_repo
from tests.online_read_write_test import basic_rw_test


@pytest.mark.integration
def test_partial() -> None:
"""
Add another table to existing repo using partial apply API. Make sure both the table
Expand Down

0 comments on commit cef2869

Please sign in to comment.