-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(llm): response stream chunking de-buffering (#14079)
Fixes an issue where "AI streaming responses" were returning all inside a single chunk, instead of being returned chunk-by-chunk to the client. Also fixes other parsing issues with Bedrock, where the wrong response content-type was used. FTI-6419
- Loading branch information
Showing
8 changed files
with
63 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
message: "**ai-proxy**: Fixed a bug where response streaming in Gemini and Bedrock providers was returning whole chat responses in one chunk." | ||
type: bugfix | ||
scope: Plugin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 41 additions & 0 deletions
41
kong/llm/plugin/shared-filters/normalize-response-header.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
local _M = { | ||
NAME = "normalize-response-header", | ||
STAGE = "REQ_POST_PROCESSING", | ||
DESCRIPTION = "normalize upstream response headers", | ||
} | ||
|
||
local ai_plugin_ctx = require("kong.llm.plugin.ctx") | ||
local get_global_ctx, _ = ai_plugin_ctx.get_global_accessors(_M.NAME) | ||
|
||
local FILTER_OUTPUT_SCHEMA = { | ||
stream_content_type = "string", | ||
} | ||
|
||
local _, set_ctx = ai_plugin_ctx.get_namespaced_accesors(_M.NAME, FILTER_OUTPUT_SCHEMA) | ||
|
||
function _M:run(_) | ||
-- for error and exit response, just use plaintext headers | ||
if kong.response.get_source() == "service" then | ||
-- we use openai's streaming mode (SSE) | ||
if get_global_ctx("stream_mode") then | ||
-- we are going to send plaintext event-stream frames for ALL models, | ||
-- but we capture the original incoming content-type for the chunk-parser later. | ||
set_ctx("stream_content_type", kong.service.response.get_header("Content-Type")) | ||
kong.response.set_header("Content-Type", "text/event-stream") | ||
|
||
-- TODO: disable gzip for SSE because it needs immediate flush for each chunk | ||
-- and seems nginx doesn't support it | ||
elseif get_global_ctx("accept_gzip") then | ||
-- for gzip response, don't set content-length at all to align with upstream | ||
kong.response.clear_header("Content-Length") | ||
kong.response.set_header("Content-Encoding", "gzip") | ||
else | ||
kong.response.clear_header("Content-Encoding") | ||
end | ||
else | ||
kong.response.clear_header("Content-Encoding") | ||
end | ||
return true | ||
end | ||
|
||
return _M |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
b7f5ed2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bazel Build
Docker image available
kong/kong:b7f5ed2c927912e094eb0d58d5ffcdbeb4c55cd9
Artifacts available https://github.com/Kong/kong/actions/runs/12686675764