mobiusml · movchan74 · Aug 16, 2024 · Aug 14, 2024 · Aug 14, 2024 · Aug 15, 2024
diff --git a/aana/api/request_handler.py b/aana/api/request_handler.py
@@ -15,6 +15,7 @@
 from aana.api.event_handlers.event_manager import EventManager
 from aana.api.responses import AanaJSONResponse
 from aana.configs.settings import settings as aana_settings
+from aana.core.models.api import DeploymentStatus, SDKStatus, SDKStatusResponse
 from aana.core.models.chat import ChatCompletion, ChatCompletionRequest, ChatDialog
 from aana.core.models.sampling import SamplingParams
 from aana.core.models.task import TaskId, TaskInfo
@@ -39,13 +40,19 @@ class RequestHandler:
 
     ready = False
 
-    def __init__(self, endpoints: list[Endpoint]):
+    def __init__(
+        self, app_name: str, endpoints: list[Endpoint], deployments: list[str]
+    ):
         """Constructor.
 
         Args:
-            endpoints (dict): List of endpoints for the request
+            app_name (str): The name of the application.
+            endpoints (dict): List of endpoints for the request.
+            deployments (list[str]): List of deployment names for the app.
         """
+        self.app_name = app_name
         self.endpoints = endpoints
+        self.deployments = deployments
 
         self.event_manager = EventManager()
         self.custom_schemas: dict[str, dict] = {}
@@ -229,3 +236,64 @@ async def _async_chat_completions(
                 "created": int(time.time()),
                 "choices": [{"index": 0, "message": response["message"]}],
             }
+
+    @app.get("/api/status", response_model=SDKStatusResponse)
+    async def status(self) -> SDKStatusResponse:
+        """The endpoint for checking the status of the application."""
+        app_names = [
+            self.app_name,
+            *self.deployments,
+        ]  # the list of Ray Serve apps that belong to this Aana app
+        serve_status = serve.status()
+        app_statuses = {
+            app_name: app_status
+            for app_name, app_status in serve_status.applications.items()
+            if app_name in app_names
+        }
+
+        app_status_message = ""
+        if any(
+            app.status in ["DEPLOY_FAILED", "UNHEALTHY", "NOT_STARTED"]
+            for app in app_statuses.values()
+        ):
+            sdk_status = SDKStatus.UNHEALTHY
+            error_messages = []
+            for app_name, app_status in app_statuses.items():
+                if app_status.status in ["DEPLOY_FAILED", "UNHEALTHY"]:
+                    for (
+                        deployment_name,
+                        deployment_status,
+                    ) in app_status.deployments.items():
+                        error_messages.append(
+                            f"Error: {deployment_name} ({app_name}): {deployment_status.message}"
+                        )
+            app_status_message = "\n".join(error_messages)
+        elif all(app.status == "RUNNING" for app in app_statuses.values()):
+            sdk_status = SDKStatus.RUNNING
+        elif any(
+            app.status in ["DEPLOYING", "DELETING"] for app in app_statuses.values()
+        ):
+            sdk_status = SDKStatus.DEPLOYING
+        else:
+            sdk_status = SDKStatus.UNHEALTHY
+            app_status_message = "Unknown status"
+
+        deployment_statuses = {}
+        for app_name, app_status in app_statuses.items():
+            messages = []
+            for deployment_name, deployment_status in app_status.deployments.items():
+                if deployment_status.message:
+                    messages.append(
+                        f"{deployment_name} ({app_name}): {deployment_status.message}"
+                    )
+            message = "\n".join(messages)
+
+            deployment_statuses[app_name] = DeploymentStatus(
+                status=app_status.status, message=message
+            )
+
+        return SDKStatusResponse(
+            status=sdk_status,
+            message=app_status_message,
+            deployments=deployment_statuses,
+        )
diff --git a/aana/core/models/api.py b/aana/core/models/api.py
@@ -0,0 +1,77 @@
+from enum import Enum
+
+from pydantic import BaseModel, ConfigDict, Field
+from ray.serve.schema import ApplicationStatus
+
+
+class SDKStatus(str, Enum):
+    """The status of the SDK."""
+
+    UNHEALTHY = "UNHEALTHY"
+    RUNNING = "RUNNING"
+    DEPLOYING = "DEPLOYING"
+
+
+class DeploymentStatus(BaseModel):
+    """The status of a deployment."""
+
+    status: ApplicationStatus = Field(description="The status of the deployment.")
+    message: str = Field(
+        description="The message for more information like error message."
+    )
+
+
+class SDKStatusResponse(BaseModel):
+    """The response for the SDK status endpoint.
+
+    Attributes:
+        status (SDKStatus): The status of the SDK.
+        message (str): The message for more information like error message.
+        deployments (dict[str, DeploymentStatus]): The status of each deployment in the Aana app.
+    """
+
+    status: SDKStatus = Field(description="The status of the SDK.")
+    message: str = Field(
+        description="The message for more information like error message."
+    )
+    deployments: dict[str, DeploymentStatus] = Field(
+        description="The status of each deployment in the Aana app."
+    )
+
+    model_config = ConfigDict(
+        json_schema_extra={
+            "description": "The response for the SDK status endpoint.",
+            "examples": [
+                {
+                    "status": "RUNNING",
+                    "message": "",
+                    "deployments": {
+                        "app": {
+                            "status": "RUNNING",
+                            "message": "",
+                        },
+                        "lowercase_deployment": {
+                            "status": "RUNNING",
+                            "message": "",
+                        },
+                    },
+                },
+                {
+                    "status": "UNHEALTHY",
+                    "message": "Error: Lowercase (lowercase_deployment): A replica's health check failed. "
+                    "This deployment will be UNHEALTHY until the replica recovers or a new deploy happens.",
+                    "deployments": {
+                        "app": {
+                            "status": "RUNNING",
+                            "message": "",
+                        },
+                        "lowercase_deployment": {
+                            "status": "UNHEALTHY",
+                            "message": "A replica's health check failed. This deployment will be UNHEALTHY "
+                            "until the replica recovers or a new deploy happens.",
+                        },
+                    },
+                },
+            ],
+        }
+    )
diff --git a/aana/sdk.py b/aana/sdk.py
@@ -223,7 +223,9 @@ def get_main_app(self) -> Application:
             Application: The main application instance.
         """
         return RequestHandler.options(num_replicas=aana_settings.num_workers).bind(
-            endpoints=self.endpoints.values()
+            app_name=self.name,
+            endpoints=self.endpoints.values(),
+            deployments=list(self.deployments.keys()),
         )
 
     def register_endpoint(

diff --git a/aana/tests/conftest.py b/aana/tests/conftest.py
@@ -76,15 +76,11 @@ def create_app():
     app.connect(
         port=portpicker.pick_unused_port(), show_logs=True, num_cpus=10
     )  # pretend we have 10 cpus for testing
+    app.migrate()
 
     def start_app(deployments, endpoints):
         for deployment in deployments:
             deployment_instance = deployment["instance"]
-            if not is_gpu_available():
-                # if GPU is not available then we don't want to request GPU resources
-                deployment_instance = deployment_instance.options(
-                    ray_actor_options={"num_gpus": 0}
-                )
 
             app.register_deployment(
                 name=deployment["name"], instance=deployment_instance

diff --git a/aana/tests/units/test_app.py b/aana/tests/units/test_app.py
@@ -1,9 +1,8 @@
 # ruff: noqa: S101, S113
 import json
-from typing import Annotated, TypedDict
+from typing import TypedDict
 
 import requests
-from pydantic import Field
 from ray import serve
 
 from aana.api.api_generation import Endpoint
@@ -24,16 +23,13 @@ async def lower(self, text: str) -> dict:
         Returns:
             dict: The lowercase text
         """
-        return {"text": [t.lower() for t in text]}
-
-
-TextList = Annotated[list[str], Field(description="List of text to lowercase.")]
+        return {"text": text.lower()}
 
 
 class LowercaseEndpointOutput(TypedDict):
     """The output of the lowercase endpoint."""
 
-    text: list[str]
+    text: str
 
 
 class LowercaseEndpoint(Endpoint):
@@ -46,7 +42,7 @@ async def initialize(self):
         )
         await super().initialize()
 
-    async def run(self, text: TextList) -> LowercaseEndpointOutput:
+    async def run(self, text: str) -> LowercaseEndpointOutput:
         """Lowercase the text.
 
         Args:
@@ -89,11 +85,11 @@ def test_app(create_app):
     assert response.json() == {"ready": True}
 
     # Test lowercase endpoint
-    data = {"text": ["Hello World!", "This is a test."]}
+    data = {"text": "Hello World! This is a test."}
     response = requests.post(
         f"http://localhost:{port}{route_prefix}/lowercase",
         data={"body": json.dumps(data)},
     )
     assert response.status_code == 200
     lowercase_text = response.json().get("text")
-    assert lowercase_text == ["hello world!", "this is a test."]
+    assert lowercase_text == "hello world! this is a test."
diff --git a/aana/tests/units/test_app_deploy.py b/aana/tests/units/test_app_deploy.py
@@ -0,0 +1,65 @@
+# ruff: noqa: S101, S113
+from typing import Any
+
+import pytest
+from ray import serve
+
+from aana.deployments.base_deployment import BaseDeployment
+from aana.exceptions.runtime import FailedDeployment, InsufficientResources
+
+
+@serve.deployment
+class DummyFailingDeployment(BaseDeployment):
+    """Simple deployment that fails on initialization."""
+
+    async def apply_config(self, config: dict[str, Any]):
+        """Apply the configuration to the deployment and initialize it."""
+        raise Exception("Dummy exception")  # noqa: TRY002, TRY003
+
+
+@serve.deployment
+class Lowercase(BaseDeployment):
+    """Simple deployment that lowercases the text."""
+
+    async def apply_config(self, config: dict[str, Any]):
+        """Apply the configuration to the deployment and initialize it."""
+        pass
+
+    async def lower(self, text: str) -> dict:
+        """Lowercase the text.
+
+        Args:
+            text (str): The text to lowercase
+
+        Returns:
+            dict: The lowercase text
+        """
+        return {"text": [t.lower() for t in text]}
+
+
+def test_failed_deployment(create_app):
+    """Test that a failed deployment raises a FailedDeployment exception."""
+    deployments = [
+        {
+            "name": "deployment",
+            "instance": DummyFailingDeployment.options(num_replicas=1, user_config={}),
+        }
+    ]
+    with pytest.raises(FailedDeployment):
+        create_app(deployments, [])
+
+
+def test_insufficient_resources(create_app):
+    """Test that deployment fails when there are insufficient resources to deploy."""
+    deployments = [
+        {
+            "name": "deployment",
+            "instance": Lowercase.options(
+                num_replicas=1,
+                ray_actor_options={"num_gpus": 100},  # requires 100 GPUs
+                user_config={},
+            ),
+        }
+    ]
+    with pytest.raises(InsufficientResources):
+        create_app(deployments, [])