From 0eb945bf88e397b8fea4f25d2e291ec774c6efee Mon Sep 17 00:00:00 2001 From: Siddharth More Date: Mon, 23 Sep 2024 17:37:22 -0700 Subject: [PATCH 1/6] Add CorrelationID to chat request Signed-off-by: Siddharth More --- .../endpoints/localai/get_token_metrics.go | 23 +++++++++++++++++++ core/http/endpoints/openai/chat.go | 7 ++++++ core/http/endpoints/openai/completion.go | 2 ++ core/http/endpoints/openai/request.go | 13 ++++++++++- 4 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 core/http/endpoints/localai/get_token_metrics.go diff --git a/core/http/endpoints/localai/get_token_metrics.go b/core/http/endpoints/localai/get_token_metrics.go new file mode 100644 index 000000000000..533ccc2e355d --- /dev/null +++ b/core/http/endpoints/localai/get_token_metrics.go @@ -0,0 +1,23 @@ +package localai + +import ( + "github.com/gofiber/fiber/v2" + "github.com/mudler/LocalAI/core/backend" + "github.com/mudler/LocalAI/core/config" + "github.com/rs/zerolog/log" +) + +// GetMetricsEndpoint exposes the GetMetrics method via an HTTP endpoint +func GetMetricsEndpoint(cl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { + return func(c *fiber.Ctx) error { + // Assuming you have logic in the backend for fetching the metrics + metrics, err := backend.TokenMetrics(loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig)(cl *config.BackendConfigLoader, appConfig *config.ApplicationConfig).. // Call your backend method + if err != nil { + log.Err(err).Msg("Failed to get metrics") + return fiber.NewError(fiber.StatusInternalServerError, "Failed to get metrics") + } + + // Return metrics as a JSON response + return c.JSON(metrics) + } +} diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index b937120a3331..7aebbd34054b 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -161,6 +161,12 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup textContentToReturn = "" id = uuid.New().String() created = int(time.Now().Unix()) + // Set CorrelationID + correlationID := c.Get("X-Correlation-ID") + if len(strings.TrimSpace(correlationID)) == 0 { + correlationID = id + } + c.Set("X-correlationID", correlationID) modelFile, input, err := readRequest(c, cl, ml, startupOptions, true) if err != nil { @@ -444,6 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") + c.Set("X-correlationID", id) responses := make(chan schema.OpenAIResponse) diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index b087cc5f8d35..e5de1b3f0296 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -57,6 +57,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a } return func(c *fiber.Ctx) error { + // Add Correlation + c.Set("X-Correlation-ID", id) modelFile, input, err := readRequest(c, cl, ml, appConfig, true) if err != nil { return fmt.Errorf("failed reading parameters from request:%w", err) diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index e24dd28f2e4b..d6182a391fe8 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/gofiber/fiber/v2" + "github.com/google/uuid" "github.com/mudler/LocalAI/core/config" fiberContext "github.com/mudler/LocalAI/core/http/ctx" "github.com/mudler/LocalAI/core/schema" @@ -15,6 +16,11 @@ import ( "github.com/rs/zerolog/log" ) +type correlationIDKeyType string + +// CorrelationIDKey to track request across process boundary +const CorrelationIDKey correlationIDKeyType = "correlationID" + func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) { input := new(schema.OpenAIRequest) @@ -24,9 +30,14 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo } received, _ := json.Marshal(input) + // Extract or generate the correlation ID + correlationID := c.Get("X-Correlation-ID", uuid.New().String()) ctx, cancel := context.WithCancel(o.Context) - input.Context = ctx + // Add the correlation ID to the new context + ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID) + + input.Context = ctxWithCorrelationID input.Cancel = cancel log.Debug().Msgf("Request received: %s", string(received)) From d73497b997b77e4514e57a507e267bb1b2e16e4c Mon Sep 17 00:00:00 2001 From: Siddharth More Date: Mon, 23 Sep 2024 17:41:01 -0700 Subject: [PATCH 2/6] remove get_token_metrics Signed-off-by: Siddharth More --- .../endpoints/localai/get_token_metrics.go | 23 ------------------- 1 file changed, 23 deletions(-) delete mode 100644 core/http/endpoints/localai/get_token_metrics.go diff --git a/core/http/endpoints/localai/get_token_metrics.go b/core/http/endpoints/localai/get_token_metrics.go deleted file mode 100644 index 533ccc2e355d..000000000000 --- a/core/http/endpoints/localai/get_token_metrics.go +++ /dev/null @@ -1,23 +0,0 @@ -package localai - -import ( - "github.com/gofiber/fiber/v2" - "github.com/mudler/LocalAI/core/backend" - "github.com/mudler/LocalAI/core/config" - "github.com/rs/zerolog/log" -) - -// GetMetricsEndpoint exposes the GetMetrics method via an HTTP endpoint -func GetMetricsEndpoint(cl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { - return func(c *fiber.Ctx) error { - // Assuming you have logic in the backend for fetching the metrics - metrics, err := backend.TokenMetrics(loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig)(cl *config.BackendConfigLoader, appConfig *config.ApplicationConfig).. // Call your backend method - if err != nil { - log.Err(err).Msg("Failed to get metrics") - return fiber.NewError(fiber.StatusInternalServerError, "Failed to get metrics") - } - - // Return metrics as a JSON response - return c.JSON(metrics) - } -} From 4955557af25907d2e6c613516788eeb1203309f7 Mon Sep 17 00:00:00 2001 From: Siddharth More Date: Mon, 23 Sep 2024 22:13:12 -0700 Subject: [PATCH 3/6] Add CorrelationID to proto Signed-off-by: Siddharth More --- backend/backend.proto | 1 + backend/cpp/llama/grpc-server.cpp | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/backend/backend.proto b/backend/backend.proto index 31bd63e50867..b2d4518e1333 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -136,6 +136,7 @@ message PredictOptions { repeated Message Messages = 44; repeated string Videos = 45; repeated string Audios = 46; + string CorrelationId = 47; } // The response message containing the result diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 56d59d217a7c..2ae1d33b93d1 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2106,6 +2106,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama data["ignore_eos"] = predict->ignoreeos(); data["embeddings"] = predict->embeddings(); + // Add the correlation_id to json data + data["correlation_id"] = predict->correlation_id(); + // for each image in the request, add the image data // for (int i = 0; i < predict->images_size(); i++) { @@ -2344,6 +2347,11 @@ class BackendServiceImpl final : public backend::Backend::Service { int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); reply.set_prompt_tokens(tokens_evaluated); + // Log Request Correlation Id + LOG_VERBOSE("correlation:", { + { "id", data["correlation_id"] } + }); + // Send the reply writer->Write(reply); @@ -2367,6 +2375,12 @@ class BackendServiceImpl final : public backend::Backend::Service { std::string completion_text; task_result result = llama.queue_results.recv(task_id); if (!result.error && result.stop) { + + // Log Request Correlation Id + LOG_VERBOSE("correlation:", { + { "id", data["correlation_id"] } + }); + completion_text = result.result_json.value("content", ""); int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0); int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0); From ae121d8d0cb791c90d10ec70c2bf276c224b722f Mon Sep 17 00:00:00 2001 From: Siddharth More Date: Wed, 25 Sep 2024 20:28:39 -0700 Subject: [PATCH 4/6] fix correlation method name Signed-off-by: Siddharth More --- backend/cpp/llama/grpc-server.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index 2ae1d33b93d1..791612dbcc99 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -2106,8 +2106,8 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama data["ignore_eos"] = predict->ignoreeos(); data["embeddings"] = predict->embeddings(); - // Add the correlation_id to json data - data["correlation_id"] = predict->correlation_id(); + // Add the correlationid to json data + data["correlation_id"] = predict->correlationid(); // for each image in the request, add the image data // From e30cf41559754a09e0c780efdc49c53cc73d6e7e Mon Sep 17 00:00:00 2001 From: Siddharth More Date: Thu, 26 Sep 2024 17:44:29 -0700 Subject: [PATCH 5/6] Update core/http/endpoints/openai/chat.go Co-authored-by: Ettore Di Giacinto Signed-off-by: Siddharth More --- core/http/endpoints/openai/chat.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 7aebbd34054b..8cd201745694 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -166,7 +166,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup if len(strings.TrimSpace(correlationID)) == 0 { correlationID = id } - c.Set("X-correlationID", correlationID) + c.Set("X-Correlation-ID", correlationID) modelFile, input, err := readRequest(c, cl, ml, startupOptions, true) if err != nil { From 5418ce6220c85a841f7fd573951160fea85c2b4c Mon Sep 17 00:00:00 2001 From: Siddharth More Date: Fri, 27 Sep 2024 12:19:36 +0200 Subject: [PATCH 6/6] Update core/http/endpoints/openai/chat.go Signed-off-by: Ettore Di Giacinto Signed-off-by: Siddharth More --- core/http/endpoints/openai/chat.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 8cd201745694..1ac1387eed3e 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -450,7 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup c.Set("Cache-Control", "no-cache") c.Set("Connection", "keep-alive") c.Set("Transfer-Encoding", "chunked") - c.Set("X-correlationID", id) + c.Set("X-Correlation-ID", id) responses := make(chan schema.OpenAIResponse)