From d025f61f85d31c62aafafe5de8210dc3358b5aa4 Mon Sep 17 00:00:00 2001 From: Herraj Luhano Date: Tue, 4 Feb 2025 17:22:29 -0500 Subject: [PATCH] [DISCO-3067] [load test: skip] Refactor async gcs client to be more generic --- .../providers/manifest/backends/manifest.py | 13 -------- .../providers/manifest/backends/protocol.py | 9 ------ merino/providers/manifest/provider.py | 9 ------ merino/utils/gcs/async_gcs_client.py | 30 +++++++------------ merino/web/api_v1.py | 5 +--- 5 files changed, 12 insertions(+), 54 deletions(-) diff --git a/merino/providers/manifest/backends/manifest.py b/merino/providers/manifest/backends/manifest.py index 629ba7af..943512f9 100644 --- a/merino/providers/manifest/backends/manifest.py +++ b/merino/providers/manifest/backends/manifest.py @@ -9,7 +9,6 @@ ManifestRemoteFilemanager, ) from merino.providers.manifest.backends.protocol import ManifestData -from merino.utils.gcs.async_gcs_client import AsyncGcsClient logger = logging.getLogger(__name__) @@ -55,15 +54,3 @@ def fetch_manifest_data(self) -> tuple[GetManifestResultCode, ManifestData | Non case GetManifestResultCode.FAIL: logger.error("Failed to fetch manifest from GCS (FAIL).") return result_code, None - - async def fetch_via_async_gcs_client(self) -> ManifestData | None: - """Create an async gcs client and download the same manifest blob - this is temporary redundant logic just to test out the async client downloads from gcs - to mainly test if auth is working properly with this client - """ - async_gcs_client = AsyncGcsClient() - manifest_via_async = await async_gcs_client.get_manifest_from_blob( - bucket_name=settings.image_manifest.gcs_bucket, blob_name=GCS_BLOB_NAME - ) - - return manifest_via_async diff --git a/merino/providers/manifest/backends/protocol.py b/merino/providers/manifest/backends/protocol.py index 6ce42f62..9e54f37b 100644 --- a/merino/providers/manifest/backends/protocol.py +++ b/merino/providers/manifest/backends/protocol.py @@ -50,12 +50,3 @@ async def fetch(self) -> tuple[GetManifestResultCode, ManifestData | None]: ManifestBackendError: If the manifest is unavailable or there is an error reading it. """ ... - - async def fetch_via_async_gcs_client(self) -> ManifestData | None: - """Use an async GCS client to fetch the manifest from bucket. - Returns: - ManifestData or None - Raises: - RuntimeError: If the async storage client fails to initialize - """ - ... diff --git a/merino/providers/manifest/provider.py b/merino/providers/manifest/provider.py index 8289ae06..70ca18b5 100644 --- a/merino/providers/manifest/provider.py +++ b/merino/providers/manifest/provider.py @@ -86,12 +86,3 @@ def _should_fetch(self) -> bool: def get_manifest_data(self) -> ManifestData | None: """Return manifest data""" return self.manifest_data - - async def get_manifest_data_via_async_client(self) -> ManifestData | None: - """Return manifest data""" - try: - manifest_via_async = await self.backend.fetch_via_async_gcs_client() - return manifest_via_async - except Exception: - # We don't want our provider to blow up in case a RuntimeError is thrown by async_gcs_client module - return None diff --git a/merino/utils/gcs/async_gcs_client.py b/merino/utils/gcs/async_gcs_client.py index 358b3549..12c7f5a0 100644 --- a/merino/utils/gcs/async_gcs_client.py +++ b/merino/utils/gcs/async_gcs_client.py @@ -1,18 +1,16 @@ """Async client that usese community maintained gcloud-aio library to interact with Google Cloud Storage""" import logging -import orjson -from orjson import JSONDecodeError - from gcloud.aio.storage import Storage -from merino.providers.manifest.backends.protocol import ManifestData logger = logging.getLogger(__name__) class AsyncGcsClient: - """Class that provides wrapper functions around gcloud-aio-storage functions. More functionality to be added.""" + """Class that provides wrapper functions around gcloud-aio-storage functions. + Raises a RuntimeError if the client initialization fails. + """ storage: Storage @@ -22,26 +20,20 @@ def __init__(self) -> None: except Exception as ex: raise RuntimeError(f"Failed to initialize async GCS client: {ex}") - async def get_manifest_from_blob( - self, bucket_name: str, blob_name: str - ) -> ManifestData | None: - """Download the top pick blob from the bucket and return as Manifest object""" + async def get_object_from_bucket(self, *, bucket_name: str, object_name: str) -> bytes | None: + """Download an object from a bucket""" try: - top_picks_blob = await self.storage.download(bucket_name, blob_name) - - if top_picks_blob is not None: - manifest = ManifestData(**orjson.loads(top_picks_blob)) + object = await self.storage.download(bucket_name, object_name) + if object: # close the client connection await self.storage.close() - logger.info("Succussfully downloaded manifest blob via async client") + return object - return manifest - except JSONDecodeError: - await self.storage.close() - logger.error(f"Tried to load invalid json for blob: {blob_name}") return None except Exception as ex: await self.storage.close() - logger.error(f"Unexpected error when downloading blob: {ex}") + logger.error( + f"Unexpected error when downloading object from bucket {bucket_name}: {ex}" + ) return None diff --git a/merino/web/api_v1.py b/merino/web/api_v1.py index 8648fa67..1740018e 100644 --- a/merino/web/api_v1.py +++ b/merino/web/api_v1.py @@ -341,15 +341,12 @@ async def get_manifest( with metrics_client.timeit("manifest.request.timing"): manifest_data = provider.get_manifest_data() - manifest_data_via_async_client = await provider.get_manifest_data_via_async_client() if manifest_data and manifest_data.domains: metrics_client.increment("manifest.request.success") return ORJSONResponse( - content=jsonable_encoder( - manifest_data_via_async_client if not None else manifest_data - ), + content=jsonable_encoder(manifest_data), headers={ "Cache-Control": ( f"private, max-age={settings.runtime.default_manifest_response_ttl_sec}"