Skip to content
This repository has been archived by the owner on Oct 19, 2024. It is now read-only.

Commit

Permalink
[Serve] Improve logging (#724)
Browse files Browse the repository at this point in the history
  • Loading branch information
merrymercy committed Oct 1, 2022
1 parent 56fb3e6 commit 4a09b8d
Show file tree
Hide file tree
Showing 8 changed files with 110 additions and 23 deletions.
10 changes: 5 additions & 5 deletions examples/llm_serving/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ Serving OPT-175B using Alpa
===========================

This tutorial shows how to setup a serving system to serve the largest available pretrained language model `OPT-175B <https://github.com/facebookresearch/metaseq/tree/main/projects/OPT>`_.
You can also try a live demo at `Alpa-OPT Demo <https://opt.alpa.ai>`_.

👉 Try a live demo at `Alpa-OPT Demo <https://opt.alpa.ai>`_ 👈

Overview
========
Expand Down Expand Up @@ -203,12 +204,11 @@ They will use two ports. The port of the website is defined in the command line

.. code:: shell
# Launch the website
uvicorn launch_website:app --host 0.0.0.0 --port 8001
# Launch the model worker (in a new terminal)
# Launch the model worker
python3 launch_model_worker.py --model alpa/opt-175b
# Launch the website (in a new terminal)
uvicorn launch_website:app --host 0.0.0.0 --port 8001
Then open ``http://[IP-ADDRESS]:8001`` in your browser to try out the model!

Expand Down
2 changes: 1 addition & 1 deletion examples/llm_serving/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import requests

DEFAULT_URL = "https://opt.alpa.ai/"
DEFAULT_URL = "https://opt.alpa.ai"


class Client(object):
Expand Down
1 change: 0 additions & 1 deletion examples/llm_serving/launch_model_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,6 @@ def check_authorization(self, args, request):
parser.add_argument("--host", type=str, default="0.0.0.0")
parser.add_argument("--torch-device", type=str, default="cpu")
parser.add_argument("--no-recaptcha", action="store_true")
parser.add_argument("--keys-file", type=str, default="keys.json")
parser.add_argument("--register-name", type=str, default="default")
parser.add_argument("--ssl-keyfile", type=str)
parser.add_argument("--ssl-certfile", type=str)
Expand Down
71 changes: 70 additions & 1 deletion examples/llm_serving/launch_website.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
from typing import Union

from fastapi import FastAPI, Request
Expand All @@ -19,12 +20,80 @@
else:
sampling_css = ""


recaptcha = load_recaptcha(USE_RECAPTCHA)


def log_scope(request):
scope = request.scope
del scope["app"]
del scope["fastapi_astack"]
del scope["router"]
del scope["endpoint"]
del scope["route"]
scope["tstamp"] = time.time()
logging.info(scope)
return scope


##### Redirect Begin #####
import asyncio
import pickle
import time

from alpa.serve.http_util import HTTPRequestWrapper, make_error_response
import ray
from starlette.responses import JSONResponse
ray.init(address="auto", namespace="alpa_serve")

manager = None

async def connect_manager():
global manager
while True:
if manager is None:
try:
manager = ray.get_actor("mesh_group_manager_0")
except ValueError:
manager = None
await asyncio.sleep(1)

asyncio.get_event_loop().create_task(connect_manager())

async def redirect(request):
global manager

body = await request.body()
scope = log_scope(request)
request = pickle.dumps(HTTPRequestWrapper(scope, body))
try:
ret = await manager.handle_request.remote("default", request)
except ray.exceptions.RayActorError:
manager = None
if isinstance(ret, Exception):
ret = make_error_response(ret)
ret = JSONResponse(ret, status_code=400)
return ret


@app.post("/completions")
async def completions(request: Request):
return await redirect(request)


@app.post("/logprobs")
async def logprobs(request: Request):
return await redirect(request)


@app.post("/call")
async def logprobs(request: Request):
return await redirect(request)

##### Redirect End #####

@app.get("/")
async def homepage(request: Request):
log_scope(request)
return templates.TemplateResponse("index.html", {
"request": request,
"num_return_sequences": NUM_RETURN_SEQ,
Expand Down
21 changes: 21 additions & 0 deletions examples/llm_serving/log_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
version: 1
formatters:
simple:
format: "%(asctime)s | %(levelname)s | %(name)s | %(message)s"
datefmt: "%Y-%m-%d %H:%M:%S"
handlers:
console:
class : logging.StreamHandler
formatter: simple
level : INFO
stream : ext://sys.stdout
file:
class : logging.handlers.TimedRotatingFileHandler
filename: weblogs/llm_serving.website.log
when: "D"
utc: True
formatter: simple
level : INFO
root:
level: INFO
handlers: [console, file]
1 change: 1 addition & 0 deletions examples/llm_serving/service/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Alpa serve url
ALPA_SERVE_PORT = 20001
ALPA_SERVE_URL = f"window.location.protocol + '//' + window.location.hostname + ':{ALPA_SERVE_PORT}/completions'"
#ALPA_SERVE_URL = f'"completions"'

# Generation params
NUM_BEAMS = 1
Expand Down
17 changes: 9 additions & 8 deletions examples/llm_serving/service/static/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,17 @@
if ("responseJSON" in xhr) {
msg = "Error: " + xhr.responseJSON.message;
if (msg.includes("No replica of model") ||
msg.includes("is not registered")) {
msg += "\nThe server is probably under maintenance. " +
"Please come back later.";
msg.includes("is not registered") ||
msg.includes("object has no attribute")) {
msg += "\nThe server is probably under regular maintenance. " +
"Please come back 10 minutes later.";
}
$("#error").text(msg);
} else {
$("#error").text(
"Cannot connect to the server due to unknown error. " +
"\nThe server is probably under maintenance. " +
"Please come back later.");
"Cannot connect to the server due to unknown errors. " +
"\nThe server is probably under regular maintenance. " +
"Please come back 10 minutes later.");
}
}
});
Expand Down Expand Up @@ -379,7 +380,7 @@ <h1 class="display-2">Large Model for Everyone</h1>

<div class="d-grid gap-4 d-sm-flex justify-content-sm-center">
<a href="#generation" class="btn btn-primary px-4 btn-lg">Try Live Generation</a>
<a href="https://alpa-projects.github.io/tutorials/llm_serving.html" class="btn btn-outline-primary px-4 btn-lg" target="_blank">Host Your Own Service</a>
<a href="https://alpa-projects.github.io/tutorials/opt_serving.html" class="btn btn-outline-primary px-4 btn-lg" target="_blank">Host Your Own Service</a>
</div>
</div>

Expand Down Expand Up @@ -423,7 +424,7 @@ <h1 class="display-2">Large Model for Everyone</h1>
<div class="form-group row" data-html2canvas-ignore="true" style="{{sampling_css}}">
<label for="temperature_slider" class="col col-form-label text-end fw-bold">Temperature:</label>
<div class="col my-2">
<input type="range" value="0.7" min="0.1" max="1.0" step="0.10" class="form-range"
<input type="range" value="0.7" min="0.0" max="1.0" step="0.10" class="form-range"
oninput="this.parentNode.nextElementSibling.value = this.value" name="temperature" id="temperature_slider">
</div>
<output class='col col-form-label'>0.7</output>
Expand Down
10 changes: 3 additions & 7 deletions examples/llm_serving/service/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,9 @@ def build_logger():
# Add a file handler for all loggers
if handler is None:
os.makedirs(LOGDIR, exist_ok=True)
logfile_path = os.path.join(
LOGDIR,
f"alpa.llm_serving.log.{datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}"
)
handler = logging.handlers.RotatingFileHandler(logfile_path,
maxBytes=1024 * 1024,
backupCount=100000)
filename = os.path.join(LOGDIR, f"llm_serving.worker.log")
handler = logging.handlers.TimedRotatingFileHandler(
filename, when='D', utc=True)
handler.setFormatter(formatter)

for name, item in logging.root.manager.loggerDict.items():
Expand Down

0 comments on commit 4a09b8d

Please sign in to comment.