Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve GCP exception handling #1561

Merged
merged 5 commits into from
May 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions sdk/python/feast/errors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from colorama import Fore, Style


class FeastObjectNotFoundException(Exception):
pass

Expand Down Expand Up @@ -47,3 +50,13 @@ def __init__(self, module_name, class_name):
super().__init__(
f"Could not import provider '{class_name}' from module '{module_name}'"
)


class FeastExtrasDependencyImportError(Exception):
def __init__(self, extras_type: str, nested_error: str):
message = (
nested_error
+ "\n"
+ f"You may need run {Style.BRIGHT + Fore.GREEN}pip install 'feast[{extras_type}]'{Style.RESET_ALL}"
)
super().__init__(message)
19 changes: 10 additions & 9 deletions sdk/python/feast/infra/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import mmh3
import pandas
import pyarrow
from google.auth.exceptions import DefaultCredentialsError
from tqdm import tqdm

from feast import FeatureTable, utils
Expand All @@ -27,6 +26,14 @@
from feast.registry import Registry
from feast.repo_config import DatastoreOnlineStoreConfig, RepoConfig

try:
from google.auth.exceptions import DefaultCredentialsError
from google.cloud import bigquery, datastore
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iirc we didn't have google cloud dependencies imported at the top because Feast will try to import them even when the user is not using gcp (e.g. they're just trying things locally) due to this file being imported somewhere else. If you uninstall the google cloud pip packages in your venv and try the quickstart you can confirm whether this happens

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, but I think its cleaner to conditionally import our providers and stores than their subdependencies. Let me test to validate it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think only this change was necessary. Our tests are failing for other reasons

except ImportError as e:
from feast.errors import FeastExtrasDependencyImportError

raise FeastExtrasDependencyImportError("gcp", str(e))


class GcpProvider(Provider):
_gcp_project_id: Optional[str]
Expand All @@ -39,8 +46,6 @@ def __init__(self, config: RepoConfig):
self._gcp_project_id = None

def _initialize_client(self):
from google.cloud import datastore

try:
if self._gcp_project_id is not None:
return datastore.Client(self._gcp_project_id)
Expand All @@ -49,7 +54,8 @@ def _initialize_client(self):
except DefaultCredentialsError as e:
raise FeastProviderLoginError(
str(e)
+ '\nIt may be necessary to run "gcloud auth application-default login" if you would like to use your local Google Cloud account'
+ '\nIt may be necessary to run "gcloud auth application-default login" if you would like to use your '
"local Google Cloud account "
)

def update_infra(
Expand All @@ -61,7 +67,6 @@ def update_infra(
entities_to_keep: Sequence[Entity],
partial: bool,
):
from google.cloud import datastore

client = self._initialize_client()

Expand Down Expand Up @@ -190,8 +195,6 @@ def materialize_single_feature_view(

@staticmethod
def _pull_query(query: str) -> pyarrow.Table:
from google.cloud import bigquery

client = bigquery.Client()
query_job = client.query(query)
return query_job.to_arrow()
Expand Down Expand Up @@ -248,8 +251,6 @@ def _write_minibatch(
],
progress: Optional[Callable[[int], Any]],
):
from google.cloud import datastore

entities = []
for entity_key, features, timestamp, created_ts in data:
document_id = compute_datastore_entity_id(entity_key)
Expand Down
13 changes: 9 additions & 4 deletions sdk/python/feast/infra/offline_stores/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import pandas
import pyarrow
from google.auth.exceptions import DefaultCredentialsError
from jinja2 import BaseLoader, Environment

from feast.data_source import BigQuerySource, DataSource
Expand All @@ -20,6 +19,15 @@
from feast.registry import Registry
from feast.repo_config import RepoConfig

try:
from google.auth.exceptions import DefaultCredentialsError
from google.cloud import bigquery
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here as well, I think (though it may have been just one of gcp.py and bigquery.py where this happened)


except ImportError as e:
from feast.errors import FeastExtrasDependencyImportError

raise FeastExtrasDependencyImportError("gcp", str(e))


class BigQueryOfflineStore(OfflineStore):
@staticmethod
Expand Down Expand Up @@ -192,7 +200,6 @@ class FeatureViewQueryContext:

def _upload_entity_df_into_bigquery(project, entity_df, client) -> str:
"""Uploads a Pandas entity dataframe into a BigQuery table and returns a reference to the resulting table"""
from google.cloud import bigquery

# First create the BigQuery dataset if it doesn't exist
dataset = bigquery.Dataset(f"{client.project}.feast_{project}")
Expand Down Expand Up @@ -303,8 +310,6 @@ def build_point_in_time_query(

def _get_bigquery_client():
try:
from google.cloud import bigquery

client = bigquery.Client()
except DefaultCredentialsError as e:
raise FeastProviderLoginError(
Expand Down
6 changes: 4 additions & 2 deletions sdk/python/feast/infra/offline_stores/helpers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from typing import List

from feast.data_source import BigQuerySource, DataSource, FileSource
from feast.infra.offline_stores.bigquery import BigQueryOfflineStore
from feast.infra.offline_stores.file import FileOfflineStore
from feast.infra.offline_stores.offline_store import OfflineStore


Expand All @@ -13,10 +11,14 @@ def get_offline_store_from_sources(sources: List[DataSource]) -> OfflineStore:

# Retrieve features from ParquetOfflineStore
if all(source == FileSource for source in source_types):
from feast.infra.offline_stores.file import FileOfflineStore

return FileOfflineStore()

# Retrieve features from BigQueryOfflineStore
if all(source == BigQuerySource for source in source_types):
from feast.infra.offline_stores.bigquery import BigQueryOfflineStore

return BigQueryOfflineStore()

# Could not map inputs to an OfflineStore implementation
Expand Down
17 changes: 6 additions & 11 deletions sdk/python/feast/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,18 +431,13 @@ def _write_registry(self, registry_proto: RegistryProto):
class GCSRegistryStore(RegistryStore):
def __init__(self, uri: str):
try:
from google.auth.exceptions import DefaultCredentialsError
from google.cloud import storage
except ImportError:
# TODO: Ensure versioning depends on requirements.txt/setup.py and isn't hardcoded
raise ImportError(
"Install package google-cloud-storage==1.20.* for gcs support"
"run ```pip install google-cloud-storage==1.20.*```"
)
try:
self.gcs_client = storage.Client()
except DefaultCredentialsError:
self.gcs_client = storage.Client.create_anonymous_client()
except ImportError as e:
from feast.errors import FeastExtrasDependencyImportError

raise FeastExtrasDependencyImportError("gcp", str(e))

self.gcs_client = storage.Client()
self._uri = urlparse(uri)
self._bucket = self._uri.hostname
self._blob = self._uri.path.lstrip("/")
Expand Down