From 0eb945bf88e397b8fea4f25d2e291ec774c6efee Mon Sep 17 00:00:00 2001
From: Siddharth More <siddimore@gmail.com>
Date: Mon, 23 Sep 2024 17:37:22 -0700
Subject: [PATCH 1/6] Add CorrelationID to chat request

Signed-off-by: Siddharth More <siddimore@gmail.com>
---
 .../endpoints/localai/get_token_metrics.go    | 23 +++++++++++++++++++
 core/http/endpoints/openai/chat.go            |  7 ++++++
 core/http/endpoints/openai/completion.go      |  2 ++
 core/http/endpoints/openai/request.go         | 13 ++++++++++-
 4 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 core/http/endpoints/localai/get_token_metrics.go

diff --git a/core/http/endpoints/localai/get_token_metrics.go b/core/http/endpoints/localai/get_token_metrics.go
new file mode 100644
index 000000000000..533ccc2e355d
--- /dev/null
+++ b/core/http/endpoints/localai/get_token_metrics.go
@@ -0,0 +1,23 @@
+package localai
+
+import (
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/backend"
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/rs/zerolog/log"
+)
+
+// GetMetricsEndpoint exposes the GetMetrics method via an HTTP endpoint
+func GetMetricsEndpoint(cl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		// Assuming you have logic in the backend for fetching the metrics
+		metrics, err := backend.TokenMetrics(loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig)(cl *config.BackendConfigLoader, appConfig *config.ApplicationConfig).. // Call your backend method
+		if err != nil {
+			log.Err(err).Msg("Failed to get metrics")
+			return fiber.NewError(fiber.StatusInternalServerError, "Failed to get metrics")
+		}
+
+		// Return metrics as a JSON response
+		return c.JSON(metrics)
+	}
+}
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index b937120a3331..7aebbd34054b 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -161,6 +161,12 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		textContentToReturn = ""
 		id = uuid.New().String()
 		created = int(time.Now().Unix())
+		// Set CorrelationID
+		correlationID := c.Get("X-Correlation-ID")
+		if len(strings.TrimSpace(correlationID)) == 0 {
+			correlationID = id
+		}
+		c.Set("X-correlationID", correlationID)
 
 		modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
 		if err != nil {
@@ -444,6 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			c.Set("Cache-Control", "no-cache")
 			c.Set("Connection", "keep-alive")
 			c.Set("Transfer-Encoding", "chunked")
+			c.Set("X-correlationID", id)
 
 			responses := make(chan schema.OpenAIResponse)
 
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index b087cc5f8d35..e5de1b3f0296 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -57,6 +57,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 	}
 
 	return func(c *fiber.Ctx) error {
+		// Add Correlation
+		c.Set("X-Correlation-ID", id)
 		modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
index e24dd28f2e4b..d6182a391fe8 100644
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 
 	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	fiberContext "github.com/mudler/LocalAI/core/http/ctx"
 	"github.com/mudler/LocalAI/core/schema"
@@ -15,6 +16,11 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
+type correlationIDKeyType string
+
+// CorrelationIDKey to track request across process boundary
+const CorrelationIDKey correlationIDKeyType = "correlationID"
+
 func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
 	input := new(schema.OpenAIRequest)
 
@@ -24,9 +30,14 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo
 	}
 
 	received, _ := json.Marshal(input)
+	// Extract or generate the correlation ID
+	correlationID := c.Get("X-Correlation-ID", uuid.New().String())
 
 	ctx, cancel := context.WithCancel(o.Context)
-	input.Context = ctx
+	// Add the correlation ID to the new context
+	ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID)
+
+	input.Context = ctxWithCorrelationID
 	input.Cancel = cancel
 
 	log.Debug().Msgf("Request received: %s", string(received))

From d73497b997b77e4514e57a507e267bb1b2e16e4c Mon Sep 17 00:00:00 2001
From: Siddharth More <siddimore@gmail.com>
Date: Mon, 23 Sep 2024 17:41:01 -0700
Subject: [PATCH 2/6] remove get_token_metrics

Signed-off-by: Siddharth More <siddimore@gmail.com>
---
 .../endpoints/localai/get_token_metrics.go    | 23 -------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 core/http/endpoints/localai/get_token_metrics.go

diff --git a/core/http/endpoints/localai/get_token_metrics.go b/core/http/endpoints/localai/get_token_metrics.go
deleted file mode 100644
index 533ccc2e355d..000000000000
--- a/core/http/endpoints/localai/get_token_metrics.go
+++ /dev/null
@@ -1,23 +0,0 @@
-package localai
-
-import (
-	"github.com/gofiber/fiber/v2"
-	"github.com/mudler/LocalAI/core/backend"
-	"github.com/mudler/LocalAI/core/config"
-	"github.com/rs/zerolog/log"
-)
-
-// GetMetricsEndpoint exposes the GetMetrics method via an HTTP endpoint
-func GetMetricsEndpoint(cl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		// Assuming you have logic in the backend for fetching the metrics
-		metrics, err := backend.TokenMetrics(loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig)(cl *config.BackendConfigLoader, appConfig *config.ApplicationConfig).. // Call your backend method
-		if err != nil {
-			log.Err(err).Msg("Failed to get metrics")
-			return fiber.NewError(fiber.StatusInternalServerError, "Failed to get metrics")
-		}
-
-		// Return metrics as a JSON response
-		return c.JSON(metrics)
-	}
-}

From 4955557af25907d2e6c613516788eeb1203309f7 Mon Sep 17 00:00:00 2001
From: Siddharth More <siddimore@gmail.com>
Date: Mon, 23 Sep 2024 22:13:12 -0700
Subject: [PATCH 3/6] Add CorrelationID to proto

Signed-off-by: Siddharth More <siddimore@gmail.com>
---
 backend/backend.proto             |  1 +
 backend/cpp/llama/grpc-server.cpp | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/backend/backend.proto b/backend/backend.proto
index 31bd63e50867..b2d4518e1333 100644
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -136,6 +136,7 @@ message PredictOptions {
   repeated Message Messages = 44;
   repeated string Videos = 45;
   repeated string Audios = 46;
+  string CorrelationId = 47;
 }
 
 // The response message containing the result
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 56d59d217a7c..2ae1d33b93d1 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2106,6 +2106,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
     data["ignore_eos"] = predict->ignoreeos();
     data["embeddings"] = predict->embeddings();
 
+    // Add the correlation_id to json data
+    data["correlation_id"] = predict->correlation_id();
+
     // for each image in the request, add the image data
     //
     for (int i = 0; i < predict->images_size(); i++) {
@@ -2344,6 +2347,11 @@ class BackendServiceImpl final : public backend::Backend::Service {
                 int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
                 reply.set_prompt_tokens(tokens_evaluated);
 
+                // Log Request Correlation Id
+                LOG_VERBOSE("correlation:", {
+                    { "id", data["correlation_id"] }
+                });
+
                 // Send the reply
                 writer->Write(reply);
 
@@ -2367,6 +2375,12 @@ class BackendServiceImpl final : public backend::Backend::Service {
         std::string completion_text;
         task_result result = llama.queue_results.recv(task_id);
         if (!result.error && result.stop) {
+            
+            // Log Request Correlation Id
+            LOG_VERBOSE("correlation:", {
+                { "id", data["correlation_id"] }
+            });
+
             completion_text = result.result_json.value("content", "");
             int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
             int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);

From ae121d8d0cb791c90d10ec70c2bf276c224b722f Mon Sep 17 00:00:00 2001
From: Siddharth More <siddimore@gmail.com>
Date: Wed, 25 Sep 2024 20:28:39 -0700
Subject: [PATCH 4/6] fix correlation method name

Signed-off-by: Siddharth More <siddimore@gmail.com>
---
 backend/cpp/llama/grpc-server.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index 2ae1d33b93d1..791612dbcc99 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -2106,8 +2106,8 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
     data["ignore_eos"] = predict->ignoreeos();
     data["embeddings"] = predict->embeddings();
 
-    // Add the correlation_id to json data
-    data["correlation_id"] = predict->correlation_id();
+    // Add the correlationid to json data
+    data["correlation_id"] = predict->correlationid();
 
     // for each image in the request, add the image data
     //

From e30cf41559754a09e0c780efdc49c53cc73d6e7e Mon Sep 17 00:00:00 2001
From: Siddharth More <siddimore@gmail.com>
Date: Thu, 26 Sep 2024 17:44:29 -0700
Subject: [PATCH 5/6] Update core/http/endpoints/openai/chat.go

Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Signed-off-by: Siddharth More <siddimore@gmail.com>
---
 core/http/endpoints/openai/chat.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 7aebbd34054b..8cd201745694 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -166,7 +166,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		if len(strings.TrimSpace(correlationID)) == 0 {
 			correlationID = id
 		}
-		c.Set("X-correlationID", correlationID)
+		c.Set("X-Correlation-ID", correlationID)
 
 		modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
 		if err != nil {

From 5418ce6220c85a841f7fd573951160fea85c2b4c Mon Sep 17 00:00:00 2001
From: Siddharth More <siddimore@gmail.com>
Date: Fri, 27 Sep 2024 12:19:36 +0200
Subject: [PATCH 6/6] Update core/http/endpoints/openai/chat.go

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Signed-off-by: Siddharth More <siddimore@gmail.com>
---
 core/http/endpoints/openai/chat.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 8cd201745694..1ac1387eed3e 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -450,7 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			c.Set("Cache-Control", "no-cache")
 			c.Set("Connection", "keep-alive")
 			c.Set("Transfer-Encoding", "chunked")
-			c.Set("X-correlationID", id)
+			c.Set("X-Correlation-ID", id)
 
 			responses := make(chan schema.OpenAIResponse)