Skip to content

Commit

Permalink
[lmi] add minimal requestId logging for new requests, and exceptions/…
Browse files Browse the repository at this point in the history
…errors (#2472)
  • Loading branch information
siddvenk authored Oct 23, 2024
1 parent ad5dfe7 commit 0107e30
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 5 deletions.
8 changes: 7 additions & 1 deletion engines/python/setup/djl_python/input_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,25 @@ def parse_input_with_formatter(inputs: Input, **kwargs) -> ParsedInput:
input_formatter_function = configs.input_formatter if configs.input_formatter else format_input
for i in range(start_batch_id, len(batch)):
input_item = batch[i]
client_request_id = input_item.get_property("requestId")
try:
# input formatter can be user written as well. We look for model.py and search for the decorator.
request_input = input_formatter_function(input_item, **kwargs)

# populate additional information in request_input
request_id = req_id_counter.next_id() if req_id_counter else i
request_input.request_id = request_id
request_input.client_request_id = client_request_id
request_input.tokenizer = kwargs.get("tokenizer")
request_input.tgi_compat = configs.tgi_compat

# We add server maintained parameters
add_server_maintained_params(request_input, input_item, **kwargs)
request = Request(request_input=request_input)
requests.append(request)
logging.info(
f"[RequestId={client_request_id}] parsed and scheduled for inference"
)
except Exception as e: # pylint: disable=broad-except
err_msg = "Input Parsing failed. Ensure that the request payload is valid. "
# str(e) for KeyError only yields the name of the key, which isn't useful as a response to the client
Expand All @@ -100,7 +105,8 @@ def parse_input_with_formatter(inputs: Input, **kwargs) -> ParsedInput:
else:
err_msg += str(e)
errors[i] = err_msg
logging.warning(err_msg, exc_info=True)
logging.warning(f"[RequestId={client_request_id}" + err_msg,
exc_info=True)
continue

return ParsedInput(errors=errors, requests=requests, batch=batch)
Expand Down
8 changes: 8 additions & 0 deletions engines/python/setup/djl_python/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,11 @@ def set_error_code(self, code: int):
Sets the HTTP Status code to return when inference fails
"""
self.error_code = code

def get_client_request_id(self) -> str:
"""
Returns the requestId specified in the HTTP request
:return: the requestId specified in the HTTP request
"""
return self.request_input.client_request_id
1 change: 1 addition & 0 deletions engines/python/setup/djl_python/request_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ class RequestInput:
server_parameters: parameters that are modified by the built-in handlers to support backend engines.
"""
request_id: int = None
client_request_id: str = None
output_formatter: Union[Callable, str] = None
parameters: Dict = field(default_factory=lambda: {})
server_parameters: Dict = field(default_factory=lambda: {})
Expand Down
15 changes: 11 additions & 4 deletions engines/python/setup/djl_python/rolling_batch/rolling_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,23 @@ def try_catch_handling(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except Exception as e:
logging.exception("Rolling batch inference error")
logging.exception(
f"Rolling batch inference error. There are {len(self.active_requests)} requests impacted. Dumping the impacted requestIds"
)
for request in self.active_requests:
logging.info(
f"[RequestId={request.get_client_request_id()}] impacted by rolling batch error"
)
error_message = "exception occurred during rolling batch inference"
token = Token(-1,
"",
log_prob=-1,
special_token=True,
error_msg=str(e))
error_msg=error_message)
request.set_next_token(token,
last_token=True,
finish_reason="error")
request.set_error_message(str(e))
request.set_error_message(error_message)
# TODO: make configurable
request.set_error_code(424)
response = self.postprocess_results()
Expand Down Expand Up @@ -143,7 +149,8 @@ def postprocess_results(self) -> List[dict]:
res = {
"data": req.get_next_token(),
"last": req.is_last_token(),
"content_type": req.get_content_type()
"content_type": req.get_content_type(),
"request_id": req.get_client_request_id(),
}
if req.get_error_message():
res["error"] = req.get_error_message()
Expand Down
2 changes: 2 additions & 0 deletions engines/python/setup/djl_python/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,13 @@ def rolling_batch_inference(parsed_input, inputs: Input, outputs: Output,
outputs.add_property(f"batch_{i}_Content-Type", "application/json")
else:
content_type = result[idx].get("content_type")
client_request_id = result[idx].get("request_id")
outputs.add(Output.binary_encode(result[idx]),
key="data",
batch_index=i)
if content_type is not None:
outputs.add_property(f"batch_{i}_Content-Type", content_type)
outputs.add_property(f"batch_{i}_requestId", client_request_id)
idx += 1
return outputs

Expand Down

0 comments on commit 0107e30

Please sign in to comment.