From af269db16765e417882849ff64d4df1ab77da619 Mon Sep 17 00:00:00 2001 From: ykeremy Date: Thu, 4 Jul 2024 00:28:37 +0000 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=94=84=20synced=20local=20'skyvern/'?= =?UTF-8?q?=20with=20remote=20'skyvern/'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Uploads to: ``` s3://skyvern-uploads////_ Ex: s3://skyvern-uploads/local/o_221157791538210890/2024-07-03/2e95b6fd-9dd6-4c1b-a79f-0e8a24ab14c5_cat.avif ``` ---- | :rocket: | This description was created by [Ellipsis](https://www.ellipsis.dev) for commit 9fcc5363e207314ff99811721274db6b757aa4fc | |--------|--------| ### Summary: Added `upload_file` endpoint to upload files to S3 and return presigned URLs, with `aws_client` initialization moved to `skyvern/forge/sdk/api/aws.py`. **Key points**: - Added `upload_file` endpoint in `skyvern/forge/sdk/routes/agent_protocol.py`. - Endpoint uploads file to `s3://////_`. - Returns S3 URI and presigned URL for the uploaded file. - Added `aws_client` initialization in `skyvern/forge/sdk/api/aws.py`. - Removed `AsyncAWSClient` initialization from `skyvern/forge/app.py`. - Handles file reading, S3 upload, and presigned URL generation. ---- Generated with :heart: by [ellipsis.dev](https://www.ellipsis.dev) --- skyvern/forge/sdk/api/aws.py | 3 ++ skyvern/forge/sdk/routes/agent_protocol.py | 47 +++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/skyvern/forge/sdk/api/aws.py b/skyvern/forge/sdk/api/aws.py index 9a9c80615..91c6f0729 100644 --- a/skyvern/forge/sdk/api/aws.py +++ b/skyvern/forge/sdk/api/aws.py @@ -137,3 +137,6 @@ def key(self) -> str: @property def uri(self) -> str: return self._parsed.geturl() + + +aws_client = AsyncAWSClient() diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index de5602614..892ad0ba5 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -1,8 +1,21 @@ +import datetime +import uuid from typing import Annotated, Any import structlog import yaml -from fastapi import APIRouter, BackgroundTasks, Depends, Header, HTTPException, Query, Request, Response, status +from fastapi import ( + APIRouter, + BackgroundTasks, + Depends, + Header, + HTTPException, + Query, + Request, + Response, + UploadFile, + status, +) from fastapi.responses import ORJSONResponse from pydantic import BaseModel @@ -10,6 +23,7 @@ from skyvern.exceptions import StepNotFound from skyvern.forge import app from skyvern.forge.prompts import prompt_engine +from skyvern.forge.sdk.api.aws import aws_client from skyvern.forge.sdk.api.llm.exceptions import LLMProviderError from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType from skyvern.forge.sdk.core import skyvern_context @@ -736,3 +750,34 @@ async def update_organization( max_steps_per_run=org_update.max_steps_per_run, max_retries_per_step=org_update.max_retries_per_step, ) + + +# Implement an endpoint that gets a single file and uploads it to S3 +@base_router.post("/upload_file/", include_in_schema=False) +@base_router.post("/upload_file") +async def upload_file( + file: UploadFile, + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> Response: + bucket = app.SETTINGS_MANAGER.AWS_S3_BUCKET_UPLOADS + todays_date = datetime.datetime.now().strftime("%Y-%m-%d") + uuid_prefixed_filename = f"{str(uuid.uuid4())}_{file.filename}" + s3_uri = ( + f"s3://{bucket}/{app.SETTINGS_MANAGER.ENV}/{current_org.organization_id}/{todays_date}/{uuid_prefixed_filename}" + ) + data: bytes = await file.read() + uploaded_s3_uri = await aws_client.upload_file(s3_uri, data) + if not uploaded_s3_uri: + raise HTTPException(status_code=500, detail="Failed to upload file to S3.") + + # Generate a presigned URL for the uploaded file + presigned_urls = await aws_client.create_presigned_urls([uploaded_s3_uri]) + if not presigned_urls: + raise HTTPException(status_code=500, detail="Failed to generate presigned URL.") + + presigned_url = presigned_urls[0] + return ORJSONResponse( + content={"s3_uri": uploaded_s3_uri, "presigned_url": presigned_url}, + status_code=200, + media_type="application/json", + ) From 0f65662b804262de6e36b3e156dc29fccafb02e5 Mon Sep 17 00:00:00 2001 From: Kerem Yilmaz Date: Wed, 3 Jul 2024 17:50:39 -0700 Subject: [PATCH 2/3] Limit max upload size and stream the file to s3 --- skyvern/config.py | 1 + skyvern/forge/sdk/api/aws.py | 13 ++++++++++++- skyvern/forge/sdk/routes/agent_protocol.py | 17 +++++++++++++---- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/skyvern/config.py b/skyvern/config.py index 8b01afc82..571e20417 100644 --- a/skyvern/config.py +++ b/skyvern/config.py @@ -48,6 +48,7 @@ class Settings(BaseSettings): # S3 bucket settings AWS_REGION: str = "us-east-1" AWS_S3_BUCKET_UPLOADS: str = "skyvern-uploads" + MAX_UPLOAD_FILE_SIZE: int = 100 * 1024 * 1024 # 10 MB SKYVERN_TELEMETRY: bool = True ANALYTICS_ID: str = "anonymous" diff --git a/skyvern/forge/sdk/api/aws.py b/skyvern/forge/sdk/api/aws.py index 91c6f0729..92ed4ac41 100644 --- a/skyvern/forge/sdk/api/aws.py +++ b/skyvern/forge/sdk/api/aws.py @@ -1,5 +1,5 @@ from enum import StrEnum -from typing import Any, Callable +from typing import IO, Any, Callable from urllib.parse import urlparse import aioboto3 @@ -55,6 +55,17 @@ async def upload_file(self, uri: str, data: bytes, client: AioBaseClient = None) LOG.exception("S3 upload failed.", uri=uri) return None + @execute_with_async_client(client_type=AWSClientType.S3) + async def upload_file_stream(self, uri: str, file_obj: IO[bytes], client: AioBaseClient = None) -> str | None: + try: + parsed_uri = S3Uri(uri) + await client.upload_fileobj(file_obj, parsed_uri.bucket, parsed_uri.key) + LOG.debug("Upload file stream success", uri=uri) + return uri + except Exception: + LOG.exception("S3 upload stream failed.", uri=uri) + return None + @execute_with_async_client(client_type=AWSClientType.S3) async def upload_file_from_path(self, uri: str, file_path: str, client: AioBaseClient = None) -> None: try: diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index 892ad0ba5..7fc8d4afc 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -752,11 +752,20 @@ async def update_organization( ) -# Implement an endpoint that gets a single file and uploads it to S3 +async def validate_file_size(file: UploadFile) -> UploadFile: + # Check the file size + if file.size > app.SETTINGS_MANAGER.MAX_UPLOAD_FILE_SIZE: + raise HTTPException( + status_code=413, + detail=f"File size exceeds the maximum allowed size ({app.SETTINGS_MANAGER.MAX_UPLOAD_FILE_SIZE} bytes)", + ) + return file + + @base_router.post("/upload_file/", include_in_schema=False) @base_router.post("/upload_file") async def upload_file( - file: UploadFile, + file: UploadFile = Depends(validate_file_size), current_org: Organization = Depends(org_auth_service.get_current_org), ) -> Response: bucket = app.SETTINGS_MANAGER.AWS_S3_BUCKET_UPLOADS @@ -765,8 +774,8 @@ async def upload_file( s3_uri = ( f"s3://{bucket}/{app.SETTINGS_MANAGER.ENV}/{current_org.organization_id}/{todays_date}/{uuid_prefixed_filename}" ) - data: bytes = await file.read() - uploaded_s3_uri = await aws_client.upload_file(s3_uri, data) + # Stream the file to S3 + uploaded_s3_uri = await aws_client.upload_file_stream(s3_uri, file.file) if not uploaded_s3_uri: raise HTTPException(status_code=500, detail="Failed to upload file to S3.") From 726cdcca4fa5bf53295606bf3fbcd24dcd7a162d Mon Sep 17 00:00:00 2001 From: Kerem Yilmaz Date: Wed, 3 Jul 2024 17:53:01 -0700 Subject: [PATCH 3/3] Update max upload size to 10MB --- skyvern/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skyvern/config.py b/skyvern/config.py index 571e20417..ac7c79912 100644 --- a/skyvern/config.py +++ b/skyvern/config.py @@ -48,7 +48,7 @@ class Settings(BaseSettings): # S3 bucket settings AWS_REGION: str = "us-east-1" AWS_S3_BUCKET_UPLOADS: str = "skyvern-uploads" - MAX_UPLOAD_FILE_SIZE: int = 100 * 1024 * 1024 # 10 MB + MAX_UPLOAD_FILE_SIZE: int = 10 * 1024 * 1024 # 10 MB SKYVERN_TELEMETRY: bool = True ANALYTICS_ID: str = "anonymous"