Skip to content

Commit

Permalink
[DISCO-3067] [load test: skip] Refactor async gcs client to be more g…
Browse files Browse the repository at this point in the history
…eneric
  • Loading branch information
Herraj committed Feb 4, 2025
1 parent 381685d commit d025f61
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 54 deletions.
13 changes: 0 additions & 13 deletions merino/providers/manifest/backends/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
ManifestRemoteFilemanager,
)
from merino.providers.manifest.backends.protocol import ManifestData
from merino.utils.gcs.async_gcs_client import AsyncGcsClient

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -55,15 +54,3 @@ def fetch_manifest_data(self) -> tuple[GetManifestResultCode, ManifestData | Non
case GetManifestResultCode.FAIL:
logger.error("Failed to fetch manifest from GCS (FAIL).")
return result_code, None

async def fetch_via_async_gcs_client(self) -> ManifestData | None:
"""Create an async gcs client and download the same manifest blob
this is temporary redundant logic just to test out the async client downloads from gcs
to mainly test if auth is working properly with this client
"""
async_gcs_client = AsyncGcsClient()
manifest_via_async = await async_gcs_client.get_manifest_from_blob(
bucket_name=settings.image_manifest.gcs_bucket, blob_name=GCS_BLOB_NAME
)

return manifest_via_async
9 changes: 0 additions & 9 deletions merino/providers/manifest/backends/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,3 @@ async def fetch(self) -> tuple[GetManifestResultCode, ManifestData | None]:
ManifestBackendError: If the manifest is unavailable or there is an error reading it.
"""
...

async def fetch_via_async_gcs_client(self) -> ManifestData | None:
"""Use an async GCS client to fetch the manifest from bucket.
Returns:
ManifestData or None
Raises:
RuntimeError: If the async storage client fails to initialize
"""
...
9 changes: 0 additions & 9 deletions merino/providers/manifest/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,3 @@ def _should_fetch(self) -> bool:
def get_manifest_data(self) -> ManifestData | None:
"""Return manifest data"""
return self.manifest_data

async def get_manifest_data_via_async_client(self) -> ManifestData | None:
"""Return manifest data"""
try:
manifest_via_async = await self.backend.fetch_via_async_gcs_client()
return manifest_via_async
except Exception:
# We don't want our provider to blow up in case a RuntimeError is thrown by async_gcs_client module
return None
30 changes: 11 additions & 19 deletions merino/utils/gcs/async_gcs_client.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
"""Async client that usese community maintained gcloud-aio library to interact with Google Cloud Storage"""

import logging
import orjson
from orjson import JSONDecodeError

from gcloud.aio.storage import Storage
from merino.providers.manifest.backends.protocol import ManifestData


logger = logging.getLogger(__name__)


class AsyncGcsClient:
"""Class that provides wrapper functions around gcloud-aio-storage functions. More functionality to be added."""
"""Class that provides wrapper functions around gcloud-aio-storage functions.
Raises a RuntimeError if the client initialization fails.
"""

storage: Storage

Expand All @@ -22,26 +20,20 @@ def __init__(self) -> None:
except Exception as ex:
raise RuntimeError(f"Failed to initialize async GCS client: {ex}")

async def get_manifest_from_blob(
self, bucket_name: str, blob_name: str
) -> ManifestData | None:
"""Download the top pick blob from the bucket and return as Manifest object"""
async def get_object_from_bucket(self, *, bucket_name: str, object_name: str) -> bytes | None:
"""Download an object from a bucket"""
try:
top_picks_blob = await self.storage.download(bucket_name, blob_name)

if top_picks_blob is not None:
manifest = ManifestData(**orjson.loads(top_picks_blob))
object = await self.storage.download(bucket_name, object_name)

if object:
# close the client connection
await self.storage.close()
logger.info("Succussfully downloaded manifest blob via async client")
return object

return manifest
except JSONDecodeError:
await self.storage.close()
logger.error(f"Tried to load invalid json for blob: {blob_name}")
return None
except Exception as ex:
await self.storage.close()
logger.error(f"Unexpected error when downloading blob: {ex}")
logger.error(
f"Unexpected error when downloading object from bucket {bucket_name}: {ex}"
)
return None
5 changes: 1 addition & 4 deletions merino/web/api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,15 +341,12 @@ async def get_manifest(

with metrics_client.timeit("manifest.request.timing"):
manifest_data = provider.get_manifest_data()
manifest_data_via_async_client = await provider.get_manifest_data_via_async_client()

if manifest_data and manifest_data.domains:
metrics_client.increment("manifest.request.success")

return ORJSONResponse(
content=jsonable_encoder(
manifest_data_via_async_client if not None else manifest_data
),
content=jsonable_encoder(manifest_data),
headers={
"Cache-Control": (
f"private, max-age={settings.runtime.default_manifest_response_ttl_sec}"
Expand Down

0 comments on commit d025f61

Please sign in to comment.