Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Status Endpoint #164

Merged
merged 3 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 70 additions & 2 deletions aana/api/request_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from aana.api.event_handlers.event_manager import EventManager
from aana.api.responses import AanaJSONResponse
from aana.configs.settings import settings as aana_settings
from aana.core.models.api import DeploymentStatus, SDKStatus, SDKStatusResponse
from aana.core.models.chat import ChatCompletion, ChatCompletionRequest, ChatDialog
from aana.core.models.sampling import SamplingParams
from aana.core.models.task import TaskId, TaskInfo
Expand All @@ -39,13 +40,19 @@ class RequestHandler:

ready = False

def __init__(self, endpoints: list[Endpoint]):
def __init__(
self, app_name: str, endpoints: list[Endpoint], deployments: list[str]
):
"""Constructor.

Args:
endpoints (dict): List of endpoints for the request
app_name (str): The name of the application.
endpoints (dict): List of endpoints for the request.
deployments (list[str]): List of deployment names for the app.
"""
self.app_name = app_name
self.endpoints = endpoints
self.deployments = deployments

self.event_manager = EventManager()
self.custom_schemas: dict[str, dict] = {}
Expand Down Expand Up @@ -229,3 +236,64 @@ async def _async_chat_completions(
"created": int(time.time()),
"choices": [{"index": 0, "message": response["message"]}],
}

@app.get("/api/status", response_model=SDKStatusResponse)
async def status(self) -> SDKStatusResponse:
"""The endpoint for checking the status of the application."""
app_names = [
self.app_name,
*self.deployments,
] # the list of Ray Serve apps that belong to this Aana app
serve_status = serve.status()
app_statuses = {
app_name: app_status
for app_name, app_status in serve_status.applications.items()
if app_name in app_names
}

app_status_message = ""
if any(
app.status in ["DEPLOY_FAILED", "UNHEALTHY", "NOT_STARTED"]
for app in app_statuses.values()
):
sdk_status = SDKStatus.UNHEALTHY
error_messages = []
for app_name, app_status in app_statuses.items():
if app_status.status in ["DEPLOY_FAILED", "UNHEALTHY"]:
for (
deployment_name,
deployment_status,
HRashidi marked this conversation as resolved.
Show resolved Hide resolved
) in app_status.deployments.items():
error_messages.append(
f"Error: {deployment_name} ({app_name}): {deployment_status.message}"
)
app_status_message = "\n".join(error_messages)
elif all(app.status == "RUNNING" for app in app_statuses.values()):
sdk_status = SDKStatus.RUNNING
elif any(
app.status in ["DEPLOYING", "DELETING"] for app in app_statuses.values()
):
sdk_status = SDKStatus.DEPLOYING
else:
sdk_status = SDKStatus.UNHEALTHY
app_status_message = "Unknown status"

deployment_statuses = {}
for app_name, app_status in app_statuses.items():
messages = []
for deployment_name, deployment_status in app_status.deployments.items():
if deployment_status.message:
messages.append(
f"{deployment_name} ({app_name}): {deployment_status.message}"
)
message = "\n".join(messages)

deployment_statuses[app_name] = DeploymentStatus(
status=app_status.status, message=message
)

return SDKStatusResponse(
status=sdk_status,
message=app_status_message,
deployments=deployment_statuses,
)
77 changes: 77 additions & 0 deletions aana/core/models/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from enum import Enum

from pydantic import BaseModel, ConfigDict, Field
from ray.serve.schema import ApplicationStatus


class SDKStatus(str, Enum):
"""The status of the SDK."""

UNHEALTHY = "UNHEALTHY"
RUNNING = "RUNNING"
DEPLOYING = "DEPLOYING"


class DeploymentStatus(BaseModel):
"""The status of a deployment."""

status: ApplicationStatus = Field(description="The status of the deployment.")
message: str = Field(
description="The message for more information like error message."
)


class SDKStatusResponse(BaseModel):
"""The response for the SDK status endpoint.

Attributes:
status (SDKStatus): The status of the SDK.
message (str): The message for more information like error message.
deployments (dict[str, DeploymentStatus]): The status of each deployment in the Aana app.
"""

status: SDKStatus = Field(description="The status of the SDK.")
message: str = Field(
description="The message for more information like error message."
)
deployments: dict[str, DeploymentStatus] = Field(
description="The status of each deployment in the Aana app."
)

model_config = ConfigDict(
json_schema_extra={
"description": "The response for the SDK status endpoint.",
"examples": [
{
"status": "RUNNING",
"message": "",
"deployments": {
"app": {
"status": "RUNNING",
"message": "",
},
"lowercase_deployment": {
"status": "RUNNING",
"message": "",
},
},
},
{
"status": "UNHEALTHY",
"message": "Error: Lowercase (lowercase_deployment): A replica's health check failed. "
"This deployment will be UNHEALTHY until the replica recovers or a new deploy happens.",
"deployments": {
"app": {
"status": "RUNNING",
"message": "",
},
"lowercase_deployment": {
"status": "UNHEALTHY",
"message": "A replica's health check failed. This deployment will be UNHEALTHY "
"until the replica recovers or a new deploy happens.",
},
},
},
],
}
)
4 changes: 3 additions & 1 deletion aana/sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,9 @@ def get_main_app(self) -> Application:
Application: The main application instance.
"""
return RequestHandler.options(num_replicas=aana_settings.num_workers).bind(
endpoints=self.endpoints.values()
app_name=self.name,
endpoints=self.endpoints.values(),
deployments=list(self.deployments.keys()),
)

def register_endpoint(
Expand Down
6 changes: 1 addition & 5 deletions aana/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,11 @@ def create_app():
app.connect(
port=portpicker.pick_unused_port(), show_logs=True, num_cpus=10
) # pretend we have 10 cpus for testing
app.migrate()

def start_app(deployments, endpoints):
for deployment in deployments:
deployment_instance = deployment["instance"]
if not is_gpu_available():
# if GPU is not available then we don't want to request GPU resources
deployment_instance = deployment_instance.options(
ray_actor_options={"num_gpus": 0}
)

app.register_deployment(
name=deployment["name"], instance=deployment_instance
Expand Down
16 changes: 6 additions & 10 deletions aana/tests/units/test_app.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
# ruff: noqa: S101, S113
import json
from typing import Annotated, TypedDict
from typing import TypedDict

import requests
from pydantic import Field
from ray import serve

from aana.api.api_generation import Endpoint
Expand All @@ -24,16 +23,13 @@ async def lower(self, text: str) -> dict:
Returns:
dict: The lowercase text
"""
return {"text": [t.lower() for t in text]}


TextList = Annotated[list[str], Field(description="List of text to lowercase.")]
return {"text": text.lower()}


class LowercaseEndpointOutput(TypedDict):
"""The output of the lowercase endpoint."""

text: list[str]
text: str


class LowercaseEndpoint(Endpoint):
Expand All @@ -46,7 +42,7 @@ async def initialize(self):
)
await super().initialize()

async def run(self, text: TextList) -> LowercaseEndpointOutput:
async def run(self, text: str) -> LowercaseEndpointOutput:
"""Lowercase the text.

Args:
Expand Down Expand Up @@ -89,11 +85,11 @@ def test_app(create_app):
assert response.json() == {"ready": True}

# Test lowercase endpoint
data = {"text": ["Hello World!", "This is a test."]}
data = {"text": "Hello World! This is a test."}
response = requests.post(
f"http://localhost:{port}{route_prefix}/lowercase",
data={"body": json.dumps(data)},
)
assert response.status_code == 200
lowercase_text = response.json().get("text")
assert lowercase_text == ["hello world!", "this is a test."]
assert lowercase_text == "hello world! this is a test."
65 changes: 65 additions & 0 deletions aana/tests/units/test_app_deploy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# ruff: noqa: S101, S113
from typing import Any

import pytest
from ray import serve

from aana.deployments.base_deployment import BaseDeployment
from aana.exceptions.runtime import FailedDeployment, InsufficientResources


@serve.deployment
class DummyFailingDeployment(BaseDeployment):
"""Simple deployment that fails on initialization."""

async def apply_config(self, config: dict[str, Any]):
"""Apply the configuration to the deployment and initialize it."""
raise Exception("Dummy exception") # noqa: TRY002, TRY003


@serve.deployment
class Lowercase(BaseDeployment):
"""Simple deployment that lowercases the text."""

async def apply_config(self, config: dict[str, Any]):
"""Apply the configuration to the deployment and initialize it."""
pass

async def lower(self, text: str) -> dict:
"""Lowercase the text.

Args:
text (str): The text to lowercase

Returns:
dict: The lowercase text
"""
return {"text": [t.lower() for t in text]}


def test_failed_deployment(create_app):
"""Test that a failed deployment raises a FailedDeployment exception."""
deployments = [
{
"name": "deployment",
"instance": DummyFailingDeployment.options(num_replicas=1, user_config={}),
}
]
with pytest.raises(FailedDeployment):
create_app(deployments, [])


def test_insufficient_resources(create_app):
"""Test that deployment fails when there are insufficient resources to deploy."""
deployments = [
{
"name": "deployment",
"instance": Lowercase.options(
num_replicas=1,
ray_actor_options={"num_gpus": 100}, # requires 100 GPUs
user_config={},
),
}
]
with pytest.raises(InsufficientResources):
create_app(deployments, [])
Loading
Loading